Skip to content

Commit

Permalink
Implement accumulator refresh table
Browse files Browse the repository at this point in the history
For each thread persist an accumulator cache for the network, where each
cache contains multiple entries for each of the possible king squares.
When the accumulator needs to be refreshed, the cached entry is used to more
efficiently update the accumulator, instead of rebuilding it from scratch.
This idea, was first described by Luecx (author of Koivisto) and
is commonly referred to as "Finny Tables".

When the accumulator needs to be refreshed, instead of filling it with
biases and adding every piece from scratch, we...

1. Take the `AccumulatorRefreshEntry` associated with the new king bucket
2. Calculate the features to activate and deactivate (from differences
   between bitboards in the entry and bitboards of the actual position)
3. Apply the updates on the refresh entry
4. Copy the content of the refresh entry accumulator to the accumulator
   we were refreshing
5. Copy the bitboards from the position to the refresh entry, to match
   the newly updated accumulator

No functional change
  • Loading branch information
PikaCat-OuO committed Apr 25, 2024
1 parent 4de1161 commit cc56cab
Show file tree
Hide file tree
Showing 13 changed files with 266 additions and 135 deletions.
18 changes: 13 additions & 5 deletions src/evaluate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@
#include <iomanip>
#include <iostream>
#include <sstream>
#include <memory>

#include "nnue/network.h"
#include "nnue/nnue_misc.h"
#include "position.h"
#include "types.h"
#include "uci.h"
#include "nnue/nnue_accumulator.h"

namespace Stockfish {

Expand All @@ -46,7 +48,10 @@ int Eval::simple_eval(const Position& pos, Color c) {

// Evaluate is the evaluator for the outer world. It returns a static evaluation
// of the position from the point of view of the side to move.
Value Eval::evaluate(const Eval::NNUE::Network& network, const Position& pos, int optimism) {
Value Eval::evaluate(const Eval::NNUE::Network& network,
const Position& pos,
NNUE::AccumulatorCaches& caches,
int optimism) {

assert(!pos.checkers());

Expand All @@ -56,7 +61,7 @@ Value Eval::evaluate(const Eval::NNUE::Network& network, const Position& pos, in
int simpleEval = simple_eval(pos, stm);

int nnueComplexity;
Value nnue = network.evaluate(pos, true, &nnueComplexity);
Value nnue = network.evaluate(pos, &caches.cache, true, &nnueComplexity);

// Blend optimism and eval with nnue complexity and material imbalance
optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 729;
Expand All @@ -80,21 +85,24 @@ Value Eval::evaluate(const Eval::NNUE::Network& network, const Position& pos, in
// Trace scores are from white's point of view
std::string Eval::trace(Position& pos, const Eval::NNUE::Network& network) {

auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>();
caches->clear(network);

if (pos.checkers())
return "Final evaluation: none (in check)";

std::stringstream ss;
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);

ss << '\n' << NNUE::trace(pos, network) << '\n';
ss << '\n' << NNUE::trace(pos, network, *caches) << '\n';

ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);

Value v = network.evaluate(pos);
Value v = network.evaluate(pos, &caches->cache);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";

v = evaluate(network, pos, VALUE_ZERO);
v = evaluate(network, pos, *caches, VALUE_ZERO);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
ss << " [with scaled NNUE, ...]";
Expand Down
6 changes: 5 additions & 1 deletion src/evaluate.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,16 @@ namespace Eval {

namespace NNUE {
class Network;
struct AccumulatorCaches;
}

std::string trace(Position& pos, const Eval::NNUE::Network& network);

int simple_eval(const Position& pos, Color c);
Value evaluate(const NNUE::Network& network, const Position& pos, int optimism);
Value evaluate(const NNUE::Network& network,
const Position& pos,
Eval::NNUE::AccumulatorCaches& caches,
int optimism);

} // namespace Eval

Expand Down
19 changes: 3 additions & 16 deletions src/nnue/features/half_ka_v2_hm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include "../../bitboard.h"
#include "../../position.h"
#include "../../types.h"
#include "../nnue_common.h"
#include "../nnue_accumulator.h"

namespace Stockfish::Eval::NNUE::Features {

Expand All @@ -36,22 +36,9 @@ inline IndexType HalfKAv2_hm::make_index(Square s, Piece pc, Square ksq, int ab)
+ PS_NB * ((KingBuckets[ksq] & 0x7) * 9 + ab));
}

// Get a list of indices for active features
template<Color Perspective>
void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active) {
Square ksq = pos.square<KING>(Perspective);
int ab = pos.count<ADVISOR>(Perspective) * 3 + pos.count<BISHOP>(Perspective);
Bitboard bb = pos.pieces();
while (bb)
{
Square s = pop_lsb(bb);
active.push_back(make_index<Perspective>(s, pos.piece_on(s), ksq, ab));
}
}

// Explicit template instantiations
template void HalfKAv2_hm::append_active_indices<WHITE>(const Position& pos, IndexList& active);
template void HalfKAv2_hm::append_active_indices<BLACK>(const Position& pos, IndexList& active);
template IndexType HalfKAv2_hm::make_index<WHITE>(Square s, Piece pc, Square ksq, int ab);
template IndexType HalfKAv2_hm::make_index<BLACK>(Square s, Piece pc, Square ksq, int ab);

// Get a list of indices for recently changed features
template<Color Perspective>
Expand Down
21 changes: 15 additions & 6 deletions src/nnue/features/half_ka_v2_hm.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@ class HalfKAv2_hm {
};
// clang-format on

// Index of a feature for a given king position and another piece on some square
template<Color Perspective>
static IndexType make_index(Square s, Piece pc, Square ksq, int ab);

public:
// Feature name
static constexpr const char* Name = "HalfKAv2_hm";
Expand Down Expand Up @@ -95,6 +91,19 @@ class HalfKAv2_hm {
};
#undef M

static constexpr uint8_t KingCacheMaps[SQUARE_NB] = {
0, 0, 0, 0, 1, 2, 0, 0, 0,
0, 0, 0, 5, 4, 3, 0, 0, 0,
0, 0, 0, 6, 7, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 6, 7, 8, 0, 0, 0,
0, 0, 0, 5, 4, 3, 0, 0, 0,
0, 0, 0, 0, 1, 2, 0, 0, 0,
};

// Map advisor and bishop location into White King plane
static constexpr uint8_t ABMap[SQUARE_NB] = {
0, 0, 0, 1, 0, 2, 5, 0, 0,
Expand Down Expand Up @@ -133,9 +142,9 @@ class HalfKAv2_hm {
static constexpr IndexType MaxActiveDimensions = 32;
using IndexList = ValueList<IndexType, MaxActiveDimensions>;

// Get a list of indices for active features
// Index of a feature for a given king position and another piece on some square
template<Color Perspective>
static void append_active_indices(const Position& pos, IndexList& active);
static IndexType make_index(Square s, Piece pc, Square ksq, int ab);

// Get a list of indices for recently changed features
template<Color Perspective>
Expand Down
18 changes: 11 additions & 7 deletions src/nnue/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ bool Network::save(const std::optional<std::string>& filename) const {
}


Value Network::evaluate(const Position& pos, bool adjusted, int* complexity) const {
Value Network::evaluate(const Position& pos,
AccumulatorCaches::Cache* cache,
bool adjusted,
int* complexity) const {
// We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly.

Expand All @@ -145,7 +148,7 @@ Value Network::evaluate(const Position& pos, bool adjusted, int* complexity) con
ASSERT_ALIGNED(transformedFeatures, alignment);

const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket);
const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket);
const auto positional = network[bucket]->propagate(transformedFeatures);

if (complexity)
Expand Down Expand Up @@ -188,12 +191,12 @@ void Network::verify(std::string evalfilePath) const {
}


void Network::hint_common_access(const Position& pos) const {
featureTransformer->hint_common_access(pos);
void Network::hint_common_access(const Position& pos, AccumulatorCaches::Cache* cache) const {
featureTransformer->hint_common_access(pos, cache);
}


NnueEvalTrace Network::trace_evaluate(const Position& pos) const {
NnueEvalTrace Network::trace_evaluate(const Position& pos, AccumulatorCaches::Cache* cache) const {
// We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly.
constexpr uint64_t alignment = CacheLineSize;
Expand All @@ -214,8 +217,9 @@ NnueEvalTrace Network::trace_evaluate(const Position& pos) const {
t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
{
const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket);
const auto positional = network[bucket]->propagate(transformedFeatures);
const auto materialist =
featureTransformer->transform(pos, cache, transformedFeatures, bucket);
const auto positional = network[bucket]->propagate(transformedFeatures);

t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
t.positional[bucket] = static_cast<Value>(positional / OutputScale);
Expand Down
12 changes: 9 additions & 3 deletions src/nnue/network.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "nnue_architecture.h"
#include "nnue_feature_transformer.h"
#include "nnue_misc.h"
#include "nnue_accumulator.h"

namespace Stockfish {

Expand All @@ -42,13 +43,16 @@ class Network {
bool save(const std::optional<std::string>& filename) const;


Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr) const;
Value evaluate(const Position& pos,
AccumulatorCaches::Cache* cache,
bool adjusted = false,
int* complexity = nullptr) const;


void hint_common_access(const Position& pos) const;
void hint_common_access(const Position& pos, AccumulatorCaches::Cache* cache) const;

void verify(std::string evalfilePath) const;
NnueEvalTrace trace_evaluate(const Position& pos) const;
NnueEvalTrace trace_evaluate(const Position& pos, AccumulatorCaches::Cache* cache) const;

private:
void load_user_net(const std::string&, const std::string&);
Expand All @@ -75,6 +79,8 @@ class Network {
// Hash value of evaluation function structure
static constexpr std::uint32_t hash =
FeatureTransformer::get_hash_value() ^ NetworkArchitecture::get_hash_value();

friend struct AccumulatorCaches::Cache;
};

} // namespace Stockfish::Eval::NNUE
Expand Down
62 changes: 59 additions & 3 deletions src/nnue/nnue_accumulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,67 @@

namespace Stockfish::Eval::NNUE {

using BiasType = std::int16_t;
using PSQTWeightType = std::int32_t;
using IndexType = std::uint32_t;

// Class that holds the result of affine transformation of input features
struct alignas(CacheLineSize) Accumulator {
std::int16_t accumulation[2][TransformedFeatureDimensions];
std::int32_t psqtAccumulation[2][PSQTBuckets];
bool computed[2];
std::int16_t accumulation[COLOR_NB][TransformedFeatureDimensions];
std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
bool computed[COLOR_NB];
};


// AccumulatorCaches struct provides per-thread accumulator caches, where each
// cache contains multiple entries for each of the possible king squares.
// When the accumulator needs to be refreshed, the cached entry is used to more
// efficiently update the accumulator, instead of rebuilding it from scratch.
// This idea, was first described by Luecx (author of Koivisto) and
// is commonly referred to as "Finny Tables".
struct AccumulatorCaches {

struct alignas(CacheLineSize) Cache {

struct alignas(CacheLineSize) Entry {
BiasType accumulation[COLOR_NB][TransformedFeatureDimensions];
PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets];
Bitboard byColorBB[COLOR_NB][COLOR_NB];
Bitboard byTypeBB[COLOR_NB][PIECE_TYPE_NB];

// To initialize a refresh entry, we set all its bitboards empty,
// so we put the biases in the accumulation, without any weights on top
void clear(const BiasType* biases) {

std::memset(byColorBB, 0, sizeof(byColorBB));
std::memset(byTypeBB, 0, sizeof(byTypeBB));

std::memcpy(accumulation[WHITE], biases,
TransformedFeatureDimensions * sizeof(BiasType));
std::memcpy(accumulation[BLACK], biases,
TransformedFeatureDimensions * sizeof(BiasType));

std::memset(psqtAccumulation, 0, sizeof(psqtAccumulation));
}
};

template<typename Network>
void clear(const Network& network) {
for (auto& entry : entries)
entry.clear(network.featureTransformer->biases);
}

Entry& operator[](int index) { return entries[index]; }

std::array<Entry, 9 * 3 * 3> entries;
};

template<typename Network>
void clear(const Network& network) {
cache.clear(network);
}

Cache cache;
};

} // namespace Stockfish::Eval::NNUE
Expand Down
Loading

0 comments on commit cc56cab

Please sign in to comment.