Skip to content

Commit

Permalink
Optimize update_accumulator_refresh_cache()
Browse files Browse the repository at this point in the history
No functional change
  • Loading branch information
mstembera authored and PikaCat-OuO committed May 14, 2024
1 parent 78d2402 commit 300bf7f
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions src/nnue/nnue_feature_transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,11 @@ class FeatureTransformer {

for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
{
auto entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]);
auto accTile =
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
auto entryTile =
reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]);

for (IndexType k = 0; k < NumRegs; ++k)
acc[k] = entryTile[k];

Expand All @@ -712,7 +716,7 @@ class FeatureTransformer {
auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);

for (unsigned k = 0; k < NumRegs; ++k)
acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]), columnA[k]);
acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
}
for (; i < int(removed.size()); ++i)
{
Expand All @@ -735,12 +739,17 @@ class FeatureTransformer {

for (IndexType k = 0; k < NumRegs; k++)
vec_store(&entryTile[k], acc[k]);
for (IndexType k = 0; k < NumRegs; k++)
vec_store(&accTile[k], acc[k]);
}

for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
{
auto entryTilePsqt =
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * PsqtTileHeight]);
auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
&accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
auto entryTilePsqt = reinterpret_cast<psqt_vec_t*>(
&entry.psqtAccumulation[j * PsqtTileHeight]);

for (std::size_t k = 0; k < NumPsqtRegs; ++k)
psqt[k] = entryTilePsqt[k];

Expand All @@ -765,6 +774,8 @@ class FeatureTransformer {

for (std::size_t k = 0; k < NumPsqtRegs; ++k)
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
vec_store_psqt(&accTilePsqt[k], psqt[k]);
}

#else
Expand All @@ -788,8 +799,6 @@ class FeatureTransformer {
entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k];
}

#endif

// The accumulator of the refresh entry has been updated.
// Now copy its content to the actual accumulator we were refreshing

Expand All @@ -798,6 +807,7 @@ class FeatureTransformer {

std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
sizeof(int32_t) * PSQTBuckets);
#endif

for (Color c : {WHITE, BLACK})
entry.byColorBB[c] = pos.pieces(c);
Expand Down

0 comments on commit 300bf7f

Please sign in to comment.