Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
official-stockfish
GitHub Repository: official-stockfish/Stockfish
Path: blob/master/src/nnue/nnue_accumulator.cpp
375 views
1
/*
2
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
3
Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
4
5
Stockfish is free software: you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation, either version 3 of the License, or
8
(at your option) any later version.
9
10
Stockfish is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
14
15
You should have received a copy of the GNU General Public License
16
along with this program. If not, see <http://www.gnu.org/licenses/>.
17
*/
18
19
#include "nnue_accumulator.h"
20
21
#include <cassert>
22
#include <cstdint>
23
#include <initializer_list>
24
#include <type_traits>
25
26
#include "../bitboard.h"
27
#include "../misc.h"
28
#include "../position.h"
29
#include "../types.h"
30
#include "nnue_architecture.h"
31
#include "nnue_feature_transformer.h" // IWYU pragma: keep
32
#include "simd.h"
33
34
namespace Stockfish::Eval::NNUE {
35
36
using namespace SIMD;
37
38
namespace {
39
40
template<Color Perspective, IndexType TransformedFeatureDimensions>
41
void double_inc_update(const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
42
const Square ksq,
43
AccumulatorState& middle_state,
44
AccumulatorState& target_state,
45
const AccumulatorState& computed);
46
47
template<Color Perspective, bool Forward, IndexType TransformedFeatureDimensions>
48
void update_accumulator_incremental(
49
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
50
const Square ksq,
51
AccumulatorState& target_state,
52
const AccumulatorState& computed);
53
54
template<Color Perspective, IndexType Dimensions>
55
void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& featureTransformer,
56
const Position& pos,
57
AccumulatorState& accumulatorState,
58
AccumulatorCaches::Cache<Dimensions>& cache);
59
60
}
61
62
void AccumulatorState::reset(const DirtyPiece& dp) noexcept {
63
dirtyPiece = dp;
64
accumulatorBig.computed.fill(false);
65
accumulatorSmall.computed.fill(false);
66
}
67
68
const AccumulatorState& AccumulatorStack::latest() const noexcept { return accumulators[size - 1]; }
69
70
AccumulatorState& AccumulatorStack::mut_latest() noexcept { return accumulators[size - 1]; }
71
72
void AccumulatorStack::reset() noexcept {
73
accumulators[0].reset({});
74
size = 1;
75
}
76
77
void AccumulatorStack::push(const DirtyPiece& dirtyPiece) noexcept {
78
assert(size + 1 < accumulators.size());
79
accumulators[size].reset(dirtyPiece);
80
size++;
81
}
82
83
void AccumulatorStack::pop() noexcept {
84
assert(size > 1);
85
size--;
86
}
87
88
template<IndexType Dimensions>
89
void AccumulatorStack::evaluate(const Position& pos,
90
const FeatureTransformer<Dimensions>& featureTransformer,
91
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
92
93
evaluate_side<WHITE>(pos, featureTransformer, cache);
94
evaluate_side<BLACK>(pos, featureTransformer, cache);
95
}
96
97
template<Color Perspective, IndexType Dimensions>
98
void AccumulatorStack::evaluate_side(const Position& pos,
99
const FeatureTransformer<Dimensions>& featureTransformer,
100
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
101
102
const auto last_usable_accum = find_last_usable_accumulator<Perspective, Dimensions>();
103
104
if ((accumulators[last_usable_accum].template acc<Dimensions>()).computed[Perspective])
105
forward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);
106
107
else
108
{
109
update_accumulator_refresh_cache<Perspective>(featureTransformer, pos, mut_latest(), cache);
110
backward_update_incremental<Perspective>(pos, featureTransformer, last_usable_accum);
111
}
112
}
113
114
// Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator
115
// state just before a change that requires full refresh.
116
template<Color Perspective, IndexType Dimensions>
117
std::size_t AccumulatorStack::find_last_usable_accumulator() const noexcept {
118
119
for (std::size_t curr_idx = size - 1; curr_idx > 0; curr_idx--)
120
{
121
if ((accumulators[curr_idx].template acc<Dimensions>()).computed[Perspective])
122
return curr_idx;
123
124
if (FeatureSet::requires_refresh(accumulators[curr_idx].dirtyPiece, Perspective))
125
return curr_idx;
126
}
127
128
return 0;
129
}
130
131
template<Color Perspective, IndexType Dimensions>
132
void AccumulatorStack::forward_update_incremental(
133
const Position& pos,
134
const FeatureTransformer<Dimensions>& featureTransformer,
135
const std::size_t begin) noexcept {
136
137
assert(begin < accumulators.size());
138
assert((accumulators[begin].acc<Dimensions>()).computed[Perspective]);
139
140
const Square ksq = pos.square<KING>(Perspective);
141
142
for (std::size_t next = begin + 1; next < size; next++)
143
{
144
if (next + 1 < size)
145
{
146
DirtyPiece& dp1 = accumulators[next].dirtyPiece;
147
DirtyPiece& dp2 = accumulators[next + 1].dirtyPiece;
148
149
if (dp1.to != SQ_NONE && dp1.to == dp2.remove_sq)
150
{
151
const Square captureSq = dp1.to;
152
dp1.to = dp2.remove_sq = SQ_NONE;
153
double_inc_update<Perspective>(featureTransformer, ksq, accumulators[next],
154
accumulators[next + 1], accumulators[next - 1]);
155
dp1.to = dp2.remove_sq = captureSq;
156
157
next++;
158
continue;
159
}
160
}
161
update_accumulator_incremental<Perspective, true>(
162
featureTransformer, ksq, accumulators[next], accumulators[next - 1]);
163
}
164
165
assert((latest().acc<Dimensions>()).computed[Perspective]);
166
}
167
168
template<Color Perspective, IndexType Dimensions>
169
void AccumulatorStack::backward_update_incremental(
170
const Position& pos,
171
const FeatureTransformer<Dimensions>& featureTransformer,
172
const std::size_t end) noexcept {
173
174
assert(end < accumulators.size());
175
assert(end < size);
176
assert((latest().acc<Dimensions>()).computed[Perspective]);
177
178
const Square ksq = pos.square<KING>(Perspective);
179
180
for (std::int64_t next = std::int64_t(size) - 2; next >= std::int64_t(end); next--)
181
update_accumulator_incremental<Perspective, false>(
182
featureTransformer, ksq, accumulators[next], accumulators[next + 1]);
183
184
assert((accumulators[end].acc<Dimensions>()).computed[Perspective]);
185
}
186
187
// Explicit template instantiations
188
template void AccumulatorStack::evaluate<TransformedFeatureDimensionsBig>(
189
const Position& pos,
190
const FeatureTransformer<TransformedFeatureDimensionsBig>& featureTransformer,
191
AccumulatorCaches::Cache<TransformedFeatureDimensionsBig>& cache) noexcept;
192
template void AccumulatorStack::evaluate<TransformedFeatureDimensionsSmall>(
193
const Position& pos,
194
const FeatureTransformer<TransformedFeatureDimensionsSmall>& featureTransformer,
195
AccumulatorCaches::Cache<TransformedFeatureDimensionsSmall>& cache) noexcept;
196
197
198
namespace {
199
200
template<typename VectorWrapper,
201
IndexType Width,
202
UpdateOperation... ops,
203
typename ElementType,
204
typename... Ts,
205
std::enable_if_t<is_all_same_v<ElementType, Ts...>, bool> = true>
206
void fused_row_reduce(const ElementType* in, ElementType* out, const Ts* const... rows) {
207
constexpr IndexType size = Width * sizeof(ElementType) / sizeof(typename VectorWrapper::type);
208
209
auto* vecIn = reinterpret_cast<const typename VectorWrapper::type*>(in);
210
auto* vecOut = reinterpret_cast<typename VectorWrapper::type*>(out);
211
212
for (IndexType i = 0; i < size; ++i)
213
vecOut[i] = fused<VectorWrapper, ops...>(
214
vecIn[i], reinterpret_cast<const typename VectorWrapper::type*>(rows)[i]...);
215
}
216
217
template<Color Perspective, IndexType Dimensions>
218
struct AccumulatorUpdateContext {
219
const FeatureTransformer<Dimensions>& featureTransformer;
220
const AccumulatorState& from;
221
AccumulatorState& to;
222
223
AccumulatorUpdateContext(const FeatureTransformer<Dimensions>& ft,
224
const AccumulatorState& accF,
225
AccumulatorState& accT) noexcept :
226
featureTransformer{ft},
227
from{accF},
228
to{accT} {}
229
230
template<UpdateOperation... ops,
231
typename... Ts,
232
std::enable_if_t<is_all_same_v<IndexType, Ts...>, bool> = true>
233
void apply(const Ts... indices) {
234
auto to_weight_vector = [&](const IndexType index) {
235
return &featureTransformer.weights[index * Dimensions];
236
};
237
238
auto to_psqt_weight_vector = [&](const IndexType index) {
239
return &featureTransformer.psqtWeights[index * PSQTBuckets];
240
};
241
242
fused_row_reduce<Vec16Wrapper, Dimensions, ops...>(
243
(from.acc<Dimensions>()).accumulation[Perspective],
244
(to.acc<Dimensions>()).accumulation[Perspective], to_weight_vector(indices)...);
245
246
fused_row_reduce<Vec32Wrapper, PSQTBuckets, ops...>(
247
(from.acc<Dimensions>()).psqtAccumulation[Perspective],
248
(to.acc<Dimensions>()).psqtAccumulation[Perspective], to_psqt_weight_vector(indices)...);
249
}
250
};
251
252
template<Color Perspective, IndexType Dimensions>
253
auto make_accumulator_update_context(const FeatureTransformer<Dimensions>& featureTransformer,
254
const AccumulatorState& accumulatorFrom,
255
AccumulatorState& accumulatorTo) noexcept {
256
return AccumulatorUpdateContext<Perspective, Dimensions>{featureTransformer, accumulatorFrom,
257
accumulatorTo};
258
}
259
260
template<Color Perspective, IndexType TransformedFeatureDimensions>
261
void double_inc_update(const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
262
const Square ksq,
263
AccumulatorState& middle_state,
264
AccumulatorState& target_state,
265
const AccumulatorState& computed) {
266
267
assert(computed.acc<TransformedFeatureDimensions>().computed[Perspective]);
268
assert(!middle_state.acc<TransformedFeatureDimensions>().computed[Perspective]);
269
assert(!target_state.acc<TransformedFeatureDimensions>().computed[Perspective]);
270
271
FeatureSet::IndexList removed, added;
272
FeatureSet::append_changed_indices<Perspective>(ksq, middle_state.dirtyPiece, removed, added);
273
// you can't capture a piece that was just involved in castling since the rook ends up
274
// in a square that the king passed
275
assert(added.size() < 2);
276
FeatureSet::append_changed_indices<Perspective>(ksq, target_state.dirtyPiece, removed, added);
277
278
assert(added.size() == 1);
279
assert(removed.size() == 2 || removed.size() == 3);
280
281
// Workaround compiler warning for uninitialized variables, replicated on
282
// profile builds on windows with gcc 14.2.0.
283
// TODO remove once unneeded
284
sf_assume(added.size() == 1);
285
sf_assume(removed.size() == 2 || removed.size() == 3);
286
287
auto updateContext =
288
make_accumulator_update_context<Perspective>(featureTransformer, computed, target_state);
289
290
if (removed.size() == 2)
291
{
292
updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
293
}
294
else
295
{
296
updateContext.template apply<Add, Sub, Sub, Sub>(added[0], removed[0], removed[1],
297
removed[2]);
298
}
299
300
target_state.acc<TransformedFeatureDimensions>().computed[Perspective] = true;
301
}
302
303
template<Color Perspective, bool Forward, IndexType TransformedFeatureDimensions>
304
void update_accumulator_incremental(
305
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
306
const Square ksq,
307
AccumulatorState& target_state,
308
const AccumulatorState& computed) {
309
310
assert((computed.acc<TransformedFeatureDimensions>()).computed[Perspective]);
311
assert(!(target_state.acc<TransformedFeatureDimensions>()).computed[Perspective]);
312
313
// The size must be enough to contain the largest possible update.
314
// That might depend on the feature set and generally relies on the
315
// feature set's update cost calculation to be correct and never allow
316
// updates with more added/removed features than MaxActiveDimensions.
317
// In this case, the maximum size of both feature addition and removal
318
// is 2, since we are incrementally updating one move at a time.
319
FeatureSet::IndexList removed, added;
320
if constexpr (Forward)
321
FeatureSet::append_changed_indices<Perspective>(ksq, target_state.dirtyPiece, removed,
322
added);
323
else
324
FeatureSet::append_changed_indices<Perspective>(ksq, computed.dirtyPiece, added, removed);
325
326
assert(added.size() == 1 || added.size() == 2);
327
assert(removed.size() == 1 || removed.size() == 2);
328
assert((Forward && added.size() <= removed.size())
329
|| (!Forward && added.size() >= removed.size()));
330
331
// Workaround compiler warning for uninitialized variables, replicated on
332
// profile builds on windows with gcc 14.2.0.
333
// TODO remove once unneeded
334
sf_assume(added.size() == 1 || added.size() == 2);
335
sf_assume(removed.size() == 1 || removed.size() == 2);
336
337
auto updateContext =
338
make_accumulator_update_context<Perspective>(featureTransformer, computed, target_state);
339
340
if ((Forward && removed.size() == 1) || (!Forward && added.size() == 1))
341
{
342
assert(added.size() == 1 && removed.size() == 1);
343
updateContext.template apply<Add, Sub>(added[0], removed[0]);
344
}
345
else if (Forward && added.size() == 1)
346
{
347
assert(removed.size() == 2);
348
updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
349
}
350
else if (!Forward && removed.size() == 1)
351
{
352
assert(added.size() == 2);
353
updateContext.template apply<Add, Add, Sub>(added[0], added[1], removed[0]);
354
}
355
else
356
{
357
assert(added.size() == 2 && removed.size() == 2);
358
updateContext.template apply<Add, Add, Sub, Sub>(added[0], added[1], removed[0],
359
removed[1]);
360
}
361
362
(target_state.acc<TransformedFeatureDimensions>()).computed[Perspective] = true;
363
}
364
365
template<Color Perspective, IndexType Dimensions>
366
void update_accumulator_refresh_cache(const FeatureTransformer<Dimensions>& featureTransformer,
367
const Position& pos,
368
AccumulatorState& accumulatorState,
369
AccumulatorCaches::Cache<Dimensions>& cache) {
370
371
using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;
372
373
const Square ksq = pos.square<KING>(Perspective);
374
auto& entry = cache[ksq][Perspective];
375
FeatureSet::IndexList removed, added;
376
377
for (Color c : {WHITE, BLACK})
378
{
379
for (PieceType pt = PAWN; pt <= KING; ++pt)
380
{
381
const Piece piece = make_piece(c, pt);
382
const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt];
383
const Bitboard newBB = pos.pieces(c, pt);
384
Bitboard toRemove = oldBB & ~newBB;
385
Bitboard toAdd = newBB & ~oldBB;
386
387
while (toRemove)
388
{
389
Square sq = pop_lsb(toRemove);
390
removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
391
}
392
while (toAdd)
393
{
394
Square sq = pop_lsb(toAdd);
395
added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
396
}
397
}
398
}
399
400
auto& accumulator = accumulatorState.acc<Dimensions>();
401
accumulator.computed[Perspective] = true;
402
403
#ifdef VECTOR
404
vec_t acc[Tiling::NumRegs];
405
psqt_vec_t psqt[Tiling::NumPsqtRegs];
406
407
for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
408
{
409
auto* accTile =
410
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * Tiling::TileHeight]);
411
auto* entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * Tiling::TileHeight]);
412
413
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
414
acc[k] = entryTile[k];
415
416
IndexType i = 0;
417
for (; i < std::min(removed.size(), added.size()); ++i)
418
{
419
IndexType indexR = removed[i];
420
const IndexType offsetR = Dimensions * indexR + j * Tiling::TileHeight;
421
auto* columnR = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetR]);
422
IndexType indexA = added[i];
423
const IndexType offsetA = Dimensions * indexA + j * Tiling::TileHeight;
424
auto* columnA = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offsetA]);
425
426
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
427
acc[k] = fused<Vec16Wrapper, Add, Sub>(acc[k], columnA[k], columnR[k]);
428
}
429
for (; i < removed.size(); ++i)
430
{
431
IndexType index = removed[i];
432
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
433
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
434
435
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
436
acc[k] = vec_sub_16(acc[k], column[k]);
437
}
438
for (; i < added.size(); ++i)
439
{
440
IndexType index = added[i];
441
const IndexType offset = Dimensions * index + j * Tiling::TileHeight;
442
auto* column = reinterpret_cast<const vec_t*>(&featureTransformer.weights[offset]);
443
444
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
445
acc[k] = vec_add_16(acc[k], column[k]);
446
}
447
448
for (IndexType k = 0; k < Tiling::NumRegs; k++)
449
vec_store(&entryTile[k], acc[k]);
450
for (IndexType k = 0; k < Tiling::NumRegs; k++)
451
vec_store(&accTile[k], acc[k]);
452
}
453
454
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
455
{
456
auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
457
&accumulator.psqtAccumulation[Perspective][j * Tiling::PsqtTileHeight]);
458
auto* entryTilePsqt =
459
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);
460
461
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
462
psqt[k] = entryTilePsqt[k];
463
464
for (IndexType i = 0; i < removed.size(); ++i)
465
{
466
IndexType index = removed[i];
467
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
468
auto* columnPsqt =
469
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
470
471
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
472
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
473
}
474
for (IndexType i = 0; i < added.size(); ++i)
475
{
476
IndexType index = added[i];
477
const IndexType offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
478
auto* columnPsqt =
479
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
480
481
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
482
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
483
}
484
485
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
486
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
487
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
488
vec_store_psqt(&accTilePsqt[k], psqt[k]);
489
}
490
491
#else
492
493
for (const auto index : removed)
494
{
495
const IndexType offset = Dimensions * index;
496
for (IndexType j = 0; j < Dimensions; ++j)
497
entry.accumulation[j] -= featureTransformer.weights[offset + j];
498
499
for (std::size_t k = 0; k < PSQTBuckets; ++k)
500
entry.psqtAccumulation[k] -= featureTransformer.psqtWeights[index * PSQTBuckets + k];
501
}
502
for (const auto index : added)
503
{
504
const IndexType offset = Dimensions * index;
505
for (IndexType j = 0; j < Dimensions; ++j)
506
entry.accumulation[j] += featureTransformer.weights[offset + j];
507
508
for (std::size_t k = 0; k < PSQTBuckets; ++k)
509
entry.psqtAccumulation[k] += featureTransformer.psqtWeights[index * PSQTBuckets + k];
510
}
511
512
// The accumulator of the refresh entry has been updated.
513
// Now copy its content to the actual accumulator we were refreshing.
514
515
std::memcpy(accumulator.accumulation[Perspective], entry.accumulation,
516
sizeof(BiasType) * Dimensions);
517
518
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
519
sizeof(int32_t) * PSQTBuckets);
520
#endif
521
522
for (Color c : {WHITE, BLACK})
523
entry.byColorBB[c] = pos.pieces(c);
524
525
for (PieceType pt = PAWN; pt <= KING; ++pt)
526
entry.byTypeBB[pt] = pos.pieces(pt);
527
}
528
529
}
530
531
}
532
533