Path: blob/master/src/nnue/nnue_accumulator.cpp
375 views
/*1Stockfish, a UCI chess playing engine derived from Glaurung 2.12Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)34Stockfish is free software: you can redistribute it and/or modify5it under the terms of the GNU General Public License as published by6the Free Software Foundation, either version 3 of the License, or7(at your option) any later version.89Stockfish is distributed in the hope that it will be useful,10but WITHOUT ANY WARRANTY; without even the implied warranty of11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the12GNU General Public License for more details.1314You should have received a copy of the GNU General Public License15along with this program. If not, see <http://www.gnu.org/licenses/>.16*/1718#include "nnue_accumulator.h"1920#include <cassert>21#include <cstdint>22#include <initializer_list>23#include <type_traits>2425#include "../bitboard.h"26#include "../misc.h"27#include "../position.h"28#include "../types.h"29#include "nnue_architecture.h"30#include "nnue_feature_transformer.h" // IWYU pragma: keep31#include "simd.h"3233namespace Stockfish::Eval::NNUE {3435using namespace SIMD;3637namespace {3839template<Color Perspective, IndexType TransformedFeatureDimensions>40void double_inc_update(const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,41const Square ksq,42AccumulatorState& middle_state,43AccumulatorState& target_state,44const AccumulatorState& computed);4546template<Color Perspective, bool Forward, IndexType TransformedFeatureDimensions>47void update_accumulator_incremental(48const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,49const Square ksq,50AccumulatorState& target_state,51const AccumulatorState& computed);5253template<Color Perspective, IndexType Dimensions>54void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& featureTransformer,55const Position& pos,56AccumulatorState& accumulatorState,57AccumulatorCaches::Cache<Dimensions>& cache);5859}6061void AccumulatorState::reset(const DirtyPiece& dp) noexcept {62dirtyPiece = dp;63accumulatorBig.computed.fill(false);64accumulatorSmall.computed.fill(false);65}6667const AccumulatorState& AccumulatorStack::latest() const noexcept { return accumulators[size - 1]; }6869AccumulatorState& AccumulatorStack::mut_latest() noexcept { return accumulators[size - 1]; }7071void AccumulatorStack::reset() noexcept {72accumulators[0].reset({});73size = 1;74}7576void AccumulatorStack::push(const DirtyPiece& dirtyPiece) noexcept {77assert(size + 1 < accumulators.size());78accumulators[size].reset(dirtyPiece);79size++;80}8182void AccumulatorStack::pop() noexcept {83assert(size > 1);84size--;85}8687template<IndexType Dimensions>88void AccumulatorStack::evaluate(const Position& pos,89const FeatureTransformer<Dimensions>& featureTransformer,90AccumulatorCaches::Cache<Dimensions>& cache) noexcept {9192evaluate_side<WHITE>(pos, featureTransformer, cache);93evaluate_side<BLACK>(pos, featureTransformer, cache);94}9596template<Color Perspective, IndexType Dimensions>97void AccumulatorStack::evaluate_side(const Position& pos,98const FeatureTransformer<Dimensions>& featureTransformer,99AccumulatorCaches::Cache<Dimensions>& cache) noexcept {100101const auto last_usable_accum = find_last_usable_accumulator<Perspective, Dimensions>();102103if ((accumulators[last_usable_accum].template acc<Dimensions>()).computed[Perspective])104forward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);105106else107{108update_accumulator_refresh_cache<Perspective>(featureTransformer, pos, mut_latest(), cache);109backward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);110}111}112113// Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator114// state just before a change that requires full refresh.115template<Color Perspective, IndexType Dimensions>116std::size_t AccumulatorStack::find_last_usable_accumulator() const noexcept {117118for (std::size_t curr_idx = size - 1; curr_idx > 0; curr_idx--)119{120if ((accumulators[curr_idx].template acc<Dimensions>()).computed[Perspective])121return curr_idx;122123if (FeatureSet::requires_refresh(accumulators[curr_idx].dirtyPiece, Perspective))124return curr_idx;125}126127return 0;128}129130template<Color Perspective, IndexType Dimensions>131void AccumulatorStack::forward_update_incremental(132const Position& pos,133const FeatureTransformer<Dimensions>& featureTransformer,134const std::size_t begin) noexcept {135136assert(begin < accumulators.size());137assert((accumulators[begin].acc<Dimensions>()).computed[Perspective]);138139const Square ksq = pos.square<KING>(Perspective);140141for (std::size_t next = begin + 1; next < size; next++)142{143if (next + 1 < size)144{145DirtyPiece& dp1 = accumulators[next].dirtyPiece;146DirtyPiece& dp2 = accumulators[next + 1].dirtyPiece;147148if (dp1.to != SQ_NONE && dp1.to == dp2.remove_sq)149{150const Square captureSq = dp1.to;151dp1.to = dp2.remove_sq = SQ_NONE;152double_inc_update<Perspective>(featureTransformer, ksq, accumulators[next],153accumulators[next + 1], accumulators[next - 1]);154dp1.to = dp2.remove_sq = captureSq;155156next++;157continue;158}159}160update_accumulator_incremental<Perspective, true>(161featureTransformer, ksq, accumulators[next], accumulators[next - 1]);162}163164assert((latest().acc<Dimensions>()).computed[Perspective]);165}166167template<Color Perspective, IndexType Dimensions>168void AccumulatorStack::backward_update_incremental(169const Position& pos,170const FeatureTransformer<Dimensions>& featureTransformer,171const std::size_t end) noexcept {172173assert(end < accumulators.size());174assert(end < size);175assert((latest().acc<Dimensions>()).computed[Perspective]);176177const Square ksq = pos.square<KING>(Perspective);178179for (std::int64_t next = std::int64_t(size) - 2; next >= std::int64_t(end); next--)180update_accumulator_incremental<Perspective, false>(181featureTransformer, ksq, accumulators[next], accumulators[next + 1]);182183assert((accumulators[end].acc<Dimensions>()).computed[Perspective]);184}185186// Explicit template instantiations187template void AccumulatorStack::evaluate<TransformedFeatureDimensionsBig>(188const Position& pos,189const FeatureTransformer<TransformedFeatureDimensionsBig>& featureTransformer,190AccumulatorCaches::Cache<TransformedFeatureDimensionsBig>& cache) noexcept;191template void AccumulatorStack::evaluate<TransformedFeatureDimensionsSmall>(192const Position& pos,193const FeatureTransformer<TransformedFeatureDimensionsSmall>& featureTransformer,194AccumulatorCaches::Cache<TransformedFeatureDimensionsSmall>& cache) noexcept;195196197namespace {198199template<typename VectorWrapper,200IndexType Width,201UpdateOperation... ops,202typename ElementType,203typename... Ts,204std::enable_if_t<is_all_same_v<ElementType, Ts...>, bool> = true>205void fused_row_reduce(const ElementType* in, ElementType* out, const Ts* const... rows) {206constexpr IndexType size = Width * sizeof(ElementType) / sizeof(typename VectorWrapper::type);207208auto* vecIn = reinterpret_cast<const typename VectorWrapper::type*>(in);209auto* vecOut = reinterpret_cast<typename VectorWrapper::type*>(out);210211for (IndexType i = 0; i < size; ++i)212vecOut[i] = fused<VectorWrapper, ops...>(213vecIn[i], reinterpret_cast<const typename VectorWrapper::type*>(rows)[i]...);214}215216template<Color Perspective, IndexType Dimensions>217struct AccumulatorUpdateContext {218const FeatureTransformer<Dimensions>& featureTransformer;219const AccumulatorState& from;220AccumulatorState& to;221222AccumulatorUpdateContext(const FeatureTransformer<Dimensions>& ft,223const AccumulatorState& accF,224AccumulatorState& accT) noexcept :225featureTransformer{ft},226from{accF},227to{accT} {}228229template<UpdateOperation... ops,230typename... Ts,231std::enable_if_t<is_all_same_v<IndexType, Ts...>, bool> = true>232void apply(const Ts... indices) {233auto to_weight_vector = [&](const IndexType index) {234return &featureTransformer.weights[index * Dimensions];235};236237auto to_psqt_weight_vector = [&](const IndexType index) {238return &featureTransformer.psqtWeights[index * PSQTBuckets];239};240241fused_row_reduce<Vec16Wrapper, Dimensions, ops...>(242(from.acc<Dimensions>()).accumulation[Perspective],243(to.acc<Dimensions>()).accumulation[Perspective], to_weight_vector(indices)...);244245fused_row_reduce<Vec32Wrapper, PSQTBuckets, ops...>(246(from.acc<Dimensions>()).psqtAccumulation[Perspective],247(to.acc<Dimensions>()).psqtAccumulation[Perspective], to_psqt_weight_vector(indices)...);248}249};250251template<Color Perspective, IndexType Dimensions>252auto make_accumulator_update_context(const FeatureTransformer<Dimensions>& featureTransformer,253const AccumulatorState& accumulatorFrom,254AccumulatorState& accumulatorTo) noexcept {255return AccumulatorUpdateContext<Perspective, Dimensions>{featureTransformer, accumulatorFrom,256accumulatorTo};257}258259template<Color Perspective, IndexType TransformedFeatureDimensions>260void double_inc_update(const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,261const Square ksq,262AccumulatorState& middle_state,263AccumulatorState& target_state,264const AccumulatorState& computed) {265266assert(computed.acc<TransformedFeatureDimensions>().computed[Perspective]);267assert(!middle_state.acc<TransformedFeatureDimensions>().computed[Perspective]);268assert(!target_state.acc<TransformedFeatureDimensions>().computed[Perspective]);269270FeatureSet::IndexList removed, added;271FeatureSet::append_changed_indices<Perspective>(ksq, middle_state.dirtyPiece, removed, added);272// you can't capture a piece that was just involved in castling since the rook ends up273// in a square that the king passed274assert(added.size() < 2);275FeatureSet::append_changed_indices<Perspective>(ksq, target_state.dirtyPiece, removed, added);276277assert(added.size() == 1);278assert(removed.size() == 2 || removed.size() == 3);279280// Workaround compiler warning for uninitialized variables, replicated on281// profile builds on windows with gcc 14.2.0.282// TODO remove once unneeded283sf_assume(added.size() == 1);284sf_assume(removed.size() == 2 || removed.size() == 3);285286auto updateContext =287make_accumulator_update_context<Perspective>(featureTransformer, computed, target_state);288289if (removed.size() == 2)290{291updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);292}293else294{295updateContext.template apply<Add, Sub, Sub, Sub>(added[0], removed[0], removed[1],296removed[2]);297}298299target_state.acc<TransformedFeatureDimensions>().computed[Perspective] = true;300}301302template<Color Perspective, bool Forward, IndexType TransformedFeatureDimensions>303void update_accumulator_incremental(304const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,305const Square ksq,306AccumulatorState& target_state,307const AccumulatorState& computed) {308309assert((computed.acc<TransformedFeatureDimensions>()).computed[Perspective]);310assert(!(target_state.acc<TransformedFeatureDimensions>()).computed[Perspective]);311312// The size must be enough to contain the largest possible update.313// That might depend on the feature set and generally relies on the314// feature set's update cost calculation to be correct and never allow315// updates with more added/removed features than MaxActiveDimensions.316// In this case, the maximum size of both feature addition and removal317// is 2, since we are incrementally updating one move at a time.318FeatureSet::IndexList removed, added;319if constexpr (Forward)320FeatureSet::append_changed_indices<Perspective>(ksq, target_state.dirtyPiece, removed,321added);322else323FeatureSet::append_changed_indices<Perspective>(ksq, computed.dirtyPiece, added, removed);324325assert(added.size() == 1 || added.size() == 2);326assert(removed.size() == 1 || removed.size() == 2);327assert((Forward && added.size() <= removed.size())328|| (!Forward && added.size() >= removed.size()));329330// Workaround compiler warning for uninitialized variables, replicated on331// profile builds on windows with gcc 14.2.0.332// TODO remove once unneeded333sf_assume(added.size() == 1 || added.size() == 2);334sf_assume(removed.size() == 1 || removed.size() == 2);335336auto updateContext =337make_accumulator_update_context<Perspective>(featureTransformer, computed, target_state);338339if ((Forward && removed.size() == 1) || (!Forward && added.size() == 1))340{341assert(added.size() == 1 && removed.size() == 1);342updateContext.template apply<Add, Sub>(added[0], removed[0]);343}344else if (Forward && added.size() == 1)345{346assert(removed.size() == 2);347updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);348}349else if (!Forward && removed.size() == 1)350{351assert(added.size() == 2);352updateContext.template apply<Add, Add, Sub>(added[0], added[1], removed[0]);353}354else355{356assert(added.size() == 2 && removed.size() == 2);357updateContext.template apply<Add, Add, Sub, Sub>(added[0], added[1], removed[0],358removed[1]);359}360361(target_state.acc<TransformedFeatureDimensions>()).computed[Perspective] = true;362}363364template<Color Perspective, IndexType Dimensions>365void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& featureTransformer,366const Position& pos,367AccumulatorState& accumulatorState,368AccumulatorCaches::Cache<Dimensions>& cache) {369370using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;371372const Square ksq = pos.square<KING>(Perspective);373auto& entry = cache[ksq][Perspective];374FeatureSet::IndexList removed, added;375376for (Color c : {WHITE, BLACK})377{378for (PieceType pt = PAWN; pt <= KING; ++pt)379{380const Piece piece = make_piece(c, pt);381const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt];382const Bitboard newBB = pos.pieces(c, pt);383Bitboard toRemove = oldBB & ~newBB;384Bitboard toAdd = newBB & ~oldBB;385386while (toRemove)387{388Square sq = pop_lsb(toRemove);389removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));390}391while (toAdd)392{393Square sq = pop_lsb(toAdd);394added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));395}396}397}398399auto& accumulator = accumulatorState.acc<Dimensions>();400accumulator.computed[Perspective] = true;401402#ifdef VECTOR403vec_t acc[Tiling::NumRegs];404psqt_vec_t psqt[Tiling::NumPsqtRegs];405406for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)407{408auto* accTile =409reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * Tiling::TileHeight]);410auto* entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * Tiling::TileHeight]);411412for (IndexType k = 0; k < Tiling::NumRegs; ++k)413acc[k] = entryTile[k];414415IndexType i = 0;416for (; i < std::min(removed.size(), added.size()); ++i)417{418IndexType indexR = removed[i];419const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;420auto* columnR = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR]);421IndexType indexA = added[i];422const IndexType offsetA = Dimensions * indexA + j * Tiling::TileHeight;423auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);424425for (IndexType k = 0; k < Tiling::NumRegs; ++k)426acc[k] = fused<Vec16Wrapper, Add, Sub>(acc[k], columnA[k], columnR[k]);427}428for (; i < removed.size(); ++i)429{430IndexType index = removed[i];431const IndexType offset = Dimensions * index + j * Tiling::TileHeight;432auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);433434for (IndexType k = 0; k < Tiling::NumRegs; ++k)435acc[k] = vec_sub_16(acc[k], column[k]);436}437for (; i < added.size(); ++i)438{439IndexType index = added[i];440const IndexType offset = Dimensions * index + j * Tiling::TileHeight;441auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);442443for (IndexType k = 0; k < Tiling::NumRegs; ++k)444acc[k] = vec_add_16(acc[k], column[k]);445}446447for (IndexType k = 0; k < Tiling::NumRegs; k++)448vec_store(&entryTile[k], acc[k]);449for (IndexType k = 0; k < Tiling::NumRegs; k++)450vec_store(&accTile[k], acc[k]);451}452453for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)454{455auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(456&accumulator.psqtAccumulation[Perspective][j * Tiling::PsqtTileHeight]);457auto* entryTilePsqt =458reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);459460for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)461psqt[k] = entryTilePsqt[k];462463for (IndexType i = 0; i < removed.size(); ++i)464{465IndexType index = removed[i];466const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;467auto* columnPsqt =468reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);469470for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)471psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);472}473for (IndexType i = 0; i < added.size(); ++i)474{475IndexType index = added[i];476const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;477auto* columnPsqt =478reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);479480for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)481psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);482}483484for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)485vec_store_psqt(&entryTilePsqt[k], psqt[k]);486for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)487vec_store_psqt(&accTilePsqt[k], psqt[k]);488}489490#else491492for (const auto index : removed)493{494const IndexType offset = Dimensions * index;495for (IndexType j = 0; j < Dimensions; ++j)496entry.accumulation[j] -= featureTransformer.weights[offset + j];497498for (std::size_t k = 0; k < PSQTBuckets; ++k)499entry.psqtAccumulation[k] -= featureTransformer.psqtWeights[index * PSQTBuckets + k];500}501for (const auto index : added)502{503const IndexType offset = Dimensions * index;504for (IndexType j = 0; j < Dimensions; ++j)505entry.accumulation[j] += featureTransformer.weights[offset + j];506507for (std::size_t k = 0; k < PSQTBuckets; ++k)508entry.psqtAccumulation[k] += featureTransformer.psqtWeights[index * PSQTBuckets + k];509}510511// The accumulator of the refresh entry has been updated.512// Now copy its content to the actual accumulator we were refreshing.513514std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,515sizeof(BiasType) * Dimensions);516517std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,518sizeof(int32_t) * PSQTBuckets);519#endif520521for (Color c : {WHITE, BLACK})522entry.byColorBB[c] = pos.pieces(c);523524for (PieceType pt = PAWN; pt <= KING; ++pt)525entry.byTypeBB[pt] = pos.pieces(pt);526}527528}529530}531532533