Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
official-stockfish
GitHub Repository: official-stockfish/Stockfish
Path: blob/master/src/nnue/nnue_accumulator.cpp
636 views
1
/*
2
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
3
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
4
5
Stockfish is free software: you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation, either version 3 of the License, or
8
(at your option) any later version.
9
10
Stockfish is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
14
15
You should have received a copy of the GNU General Public License
16
along with this program. If not, see <http://www.gnu.org/licenses/>.
17
*/
18
19
#include "nnue_accumulator.h"
20
21
#include <cassert>
22
#include <cstdint>
23
#include <new>
24
#include <type_traits>
25
26
#include "../bitboard.h"
27
#include "../misc.h"
28
#include "../position.h"
29
#include "../types.h"
30
#include "features/half_ka_v2_hm.h"
31
#include "nnue_architecture.h"
32
#include "nnue_common.h"
33
#include "nnue_feature_transformer.h" // IWYU pragma: keep
34
#include "simd.h"
35
36
namespace Stockfish::Eval::NNUE {
37
38
using namespace SIMD;
39
40
namespace {
41
42
template<IndexType TransformedFeatureDimensions>
43
void double_inc_update(Color perspective,
44
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
45
const Square ksq,
46
AccumulatorState<PSQFeatureSet>& middle_state,
47
AccumulatorState<PSQFeatureSet>& target_state,
48
const AccumulatorState<PSQFeatureSet>& computed);
49
50
template<IndexType TransformedFeatureDimensions>
51
void double_inc_update(Color perspective,
52
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
53
const Square ksq,
54
AccumulatorState<ThreatFeatureSet>& middle_state,
55
AccumulatorState<ThreatFeatureSet>& target_state,
56
const AccumulatorState<ThreatFeatureSet>& computed,
57
const DirtyPiece& dp2);
58
59
template<bool Forward, typename FeatureSet, IndexType TransformedFeatureDimensions>
60
void update_accumulator_incremental(
61
Color perspective,
62
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
63
const Square ksq,
64
AccumulatorState<FeatureSet>& target_state,
65
const AccumulatorState<FeatureSet>& computed);
66
67
template<IndexType Dimensions>
68
void update_accumulator_refresh_cache(Color perspective,
69
const FeatureTransformer<Dimensions>& featureTransformer,
70
const Position& pos,
71
AccumulatorState<PSQFeatureSet>& accumulatorState,
72
AccumulatorCaches::Cache<Dimensions>& cache);
73
74
template<IndexType Dimensions>
75
void update_threats_accumulator_full(Color perspective,
76
const FeatureTransformer<Dimensions>& featureTransformer,
77
const Position& pos,
78
AccumulatorState<ThreatFeatureSet>& accumulatorState);
79
}
80
81
template<typename T>
82
const AccumulatorState<T>& AccumulatorStack::latest() const noexcept {
83
return accumulators<T>()[size - 1];
84
}
85
86
// Explicit template instantiations
87
template const AccumulatorState<PSQFeatureSet>& AccumulatorStack::latest() const noexcept;
88
template const AccumulatorState<ThreatFeatureSet>& AccumulatorStack::latest() const noexcept;
89
90
template<typename T>
91
AccumulatorState<T>& AccumulatorStack::mut_latest() noexcept {
92
return mut_accumulators<T>()[size - 1];
93
}
94
95
template<typename T>
96
const std::array<AccumulatorState<T>, AccumulatorStack::MaxSize>&
97
AccumulatorStack::accumulators() const noexcept {
98
static_assert(std::is_same_v<T, PSQFeatureSet> || std::is_same_v<T, ThreatFeatureSet>,
99
"Invalid Feature Set Type");
100
101
if constexpr (std::is_same_v<T, PSQFeatureSet>)
102
return psq_accumulators;
103
104
if constexpr (std::is_same_v<T, ThreatFeatureSet>)
105
return threat_accumulators;
106
}
107
108
template<typename T>
109
std::array<AccumulatorState<T>, AccumulatorStack::MaxSize>&
110
AccumulatorStack::mut_accumulators() noexcept {
111
static_assert(std::is_same_v<T, PSQFeatureSet> || std::is_same_v<T, ThreatFeatureSet>,
112
"Invalid Feature Set Type");
113
114
if constexpr (std::is_same_v<T, PSQFeatureSet>)
115
return psq_accumulators;
116
117
if constexpr (std::is_same_v<T, ThreatFeatureSet>)
118
return threat_accumulators;
119
}
120
121
void AccumulatorStack::reset() noexcept {
122
psq_accumulators[0].reset({});
123
threat_accumulators[0].reset({});
124
size = 1;
125
}
126
127
std::pair<DirtyPiece&, DirtyThreats&> AccumulatorStack::push() noexcept {
128
assert(size < MaxSize);
129
auto& dp = psq_accumulators[size].reset();
130
auto& dts = threat_accumulators[size].reset();
131
new (&dts) DirtyThreats;
132
size++;
133
return {dp, dts};
134
}
135
136
void AccumulatorStack::pop() noexcept {
137
assert(size > 1);
138
size--;
139
}
140
141
template<IndexType Dimensions>
142
void AccumulatorStack::evaluate(const Position& pos,
143
const FeatureTransformer<Dimensions>& featureTransformer,
144
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
145
constexpr bool UseThreats = (Dimensions == TransformedFeatureDimensionsBig);
146
147
evaluate_side<PSQFeatureSet>(WHITE, pos, featureTransformer, cache);
148
149
if (UseThreats)
150
evaluate_side<ThreatFeatureSet>(WHITE, pos, featureTransformer, cache);
151
152
evaluate_side<PSQFeatureSet>(BLACK, pos, featureTransformer, cache);
153
154
if (UseThreats)
155
evaluate_side<ThreatFeatureSet>(BLACK, pos, featureTransformer, cache);
156
}
157
158
template<typename FeatureSet, IndexType Dimensions>
159
void AccumulatorStack::evaluate_side(Color perspective,
160
const Position& pos,
161
const FeatureTransformer<Dimensions>& featureTransformer,
162
AccumulatorCaches::Cache<Dimensions>& cache) noexcept {
163
164
const auto last_usable_accum =
165
find_last_usable_accumulator<FeatureSet, Dimensions>(perspective);
166
167
if ((accumulators<FeatureSet>()[last_usable_accum].template acc<Dimensions>())
168
.computed[perspective])
169
forward_update_incremental<FeatureSet>(perspective, pos, featureTransformer,
170
last_usable_accum);
171
172
else
173
{
174
if constexpr (std::is_same_v<FeatureSet, PSQFeatureSet>)
175
update_accumulator_refresh_cache(perspective, featureTransformer, pos,
176
mut_latest<PSQFeatureSet>(), cache);
177
else
178
update_threats_accumulator_full(perspective, featureTransformer, pos,
179
mut_latest<ThreatFeatureSet>());
180
181
backward_update_incremental<FeatureSet>(perspective, pos, featureTransformer,
182
last_usable_accum);
183
}
184
}
185
186
// Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator
187
// state just before a change that requires full refresh.
188
template<typename FeatureSet, IndexType Dimensions>
189
std::size_t AccumulatorStack::find_last_usable_accumulator(Color perspective) const noexcept {
190
191
for (std::size_t curr_idx = size - 1; curr_idx > 0; curr_idx--)
192
{
193
if ((accumulators<FeatureSet>()[curr_idx].template acc<Dimensions>()).computed[perspective])
194
return curr_idx;
195
196
if (FeatureSet::requires_refresh(accumulators<FeatureSet>()[curr_idx].diff, perspective))
197
return curr_idx;
198
}
199
200
return 0;
201
}
202
203
template<typename FeatureSet, IndexType Dimensions>
204
void AccumulatorStack::forward_update_incremental(
205
Color perspective,
206
const Position& pos,
207
const FeatureTransformer<Dimensions>& featureTransformer,
208
const std::size_t begin) noexcept {
209
210
assert(begin < accumulators<FeatureSet>().size());
211
assert((accumulators<FeatureSet>()[begin].template acc<Dimensions>()).computed[perspective]);
212
213
const Square ksq = pos.square<KING>(perspective);
214
215
for (std::size_t next = begin + 1; next < size; next++)
216
{
217
if (next + 1 < size)
218
{
219
DirtyPiece& dp1 = mut_accumulators<PSQFeatureSet>()[next].diff;
220
DirtyPiece& dp2 = mut_accumulators<PSQFeatureSet>()[next + 1].diff;
221
222
auto& accumulators = mut_accumulators<FeatureSet>();
223
224
if constexpr (std::is_same_v<FeatureSet, ThreatFeatureSet>)
225
{
226
if (dp2.remove_sq != SQ_NONE
227
&& (accumulators[next].diff.threateningSqs & square_bb(dp2.remove_sq)))
228
{
229
double_inc_update(perspective, featureTransformer, ksq, accumulators[next],
230
accumulators[next + 1], accumulators[next - 1], dp2);
231
next++;
232
continue;
233
}
234
}
235
236
if constexpr (std::is_same_v<FeatureSet, PSQFeatureSet>)
237
{
238
if (dp1.to != SQ_NONE && dp1.to == dp2.remove_sq)
239
{
240
const Square captureSq = dp1.to;
241
dp1.to = dp2.remove_sq = SQ_NONE;
242
double_inc_update(perspective, featureTransformer, ksq, accumulators[next],
243
accumulators[next + 1], accumulators[next - 1]);
244
dp1.to = dp2.remove_sq = captureSq;
245
next++;
246
continue;
247
}
248
}
249
}
250
251
update_accumulator_incremental<true>(perspective, featureTransformer, ksq,
252
mut_accumulators<FeatureSet>()[next],
253
accumulators<FeatureSet>()[next - 1]);
254
}
255
256
assert((latest<PSQFeatureSet>().acc<Dimensions>()).computed[perspective]);
257
}
258
259
template<typename FeatureSet, IndexType Dimensions>
260
void AccumulatorStack::backward_update_incremental(
261
Color perspective,
262
263
const Position& pos,
264
const FeatureTransformer<Dimensions>& featureTransformer,
265
const std::size_t end) noexcept {
266
267
assert(end < accumulators<FeatureSet>().size());
268
assert(end < size);
269
assert((latest<FeatureSet>().template acc<Dimensions>()).computed[perspective]);
270
271
const Square ksq = pos.square<KING>(perspective);
272
273
for (std::int64_t next = std::int64_t(size) - 2; next >= std::int64_t(end); next--)
274
update_accumulator_incremental<false>(perspective, featureTransformer, ksq,
275
mut_accumulators<FeatureSet>()[next],
276
accumulators<FeatureSet>()[next + 1]);
277
278
assert((accumulators<FeatureSet>()[end].template acc<Dimensions>()).computed[perspective]);
279
}
280
281
// Explicit template instantiations
282
template void AccumulatorStack::evaluate<TransformedFeatureDimensionsBig>(
283
const Position& pos,
284
const FeatureTransformer<TransformedFeatureDimensionsBig>& featureTransformer,
285
AccumulatorCaches::Cache<TransformedFeatureDimensionsBig>& cache) noexcept;
286
template void AccumulatorStack::evaluate<TransformedFeatureDimensionsSmall>(
287
const Position& pos,
288
const FeatureTransformer<TransformedFeatureDimensionsSmall>& featureTransformer,
289
AccumulatorCaches::Cache<TransformedFeatureDimensionsSmall>& cache) noexcept;
290
291
292
namespace {
293
294
template<typename VectorWrapper,
295
IndexType Width,
296
UpdateOperation... ops,
297
typename ElementType,
298
typename... Ts,
299
std::enable_if_t<is_all_same_v<ElementType, Ts...>, bool> = true>
300
void fused_row_reduce(const ElementType* in, ElementType* out, const Ts* const... rows) {
301
constexpr IndexType size = Width * sizeof(ElementType) / sizeof(typename VectorWrapper::type);
302
303
auto* vecIn = reinterpret_cast<const typename VectorWrapper::type*>(in);
304
auto* vecOut = reinterpret_cast<typename VectorWrapper::type*>(out);
305
306
for (IndexType i = 0; i < size; ++i)
307
vecOut[i] = fused<VectorWrapper, ops...>(
308
vecIn[i], reinterpret_cast<const typename VectorWrapper::type*>(rows)[i]...);
309
}
310
311
template<typename FeatureSet, IndexType Dimensions>
312
struct AccumulatorUpdateContext {
313
Color perspective;
314
const FeatureTransformer<Dimensions>& featureTransformer;
315
const AccumulatorState<FeatureSet>& from;
316
AccumulatorState<FeatureSet>& to;
317
318
AccumulatorUpdateContext(Color persp,
319
const FeatureTransformer<Dimensions>& ft,
320
const AccumulatorState<FeatureSet>& accF,
321
AccumulatorState<FeatureSet>& accT) noexcept :
322
perspective{persp},
323
featureTransformer{ft},
324
from{accF},
325
to{accT} {}
326
327
template<UpdateOperation... ops,
328
typename... Ts,
329
std::enable_if_t<is_all_same_v<IndexType, Ts...>, bool> = true>
330
void apply(const Ts... indices) {
331
auto to_weight_vector = [&](const IndexType index) {
332
return &featureTransformer.weights[index * Dimensions];
333
};
334
335
auto to_psqt_weight_vector = [&](const IndexType index) {
336
return &featureTransformer.psqtWeights[index * PSQTBuckets];
337
};
338
339
fused_row_reduce<Vec16Wrapper, Dimensions, ops...>(
340
(from.template acc<Dimensions>()).accumulation[perspective].data(),
341
(to.template acc<Dimensions>()).accumulation[perspective].data(),
342
to_weight_vector(indices)...);
343
344
fused_row_reduce<Vec32Wrapper, PSQTBuckets, ops...>(
345
(from.template acc<Dimensions>()).psqtAccumulation[perspective].data(),
346
(to.template acc<Dimensions>()).psqtAccumulation[perspective].data(),
347
to_psqt_weight_vector(indices)...);
348
}
349
350
void apply(const typename FeatureSet::IndexList& added,
351
const typename FeatureSet::IndexList& removed) {
352
const auto& fromAcc = from.template acc<Dimensions>().accumulation[perspective];
353
auto& toAcc = to.template acc<Dimensions>().accumulation[perspective];
354
355
const auto& fromPsqtAcc = from.template acc<Dimensions>().psqtAccumulation[perspective];
356
auto& toPsqtAcc = to.template acc<Dimensions>().psqtAccumulation[perspective];
357
358
#ifdef VECTOR
359
using Tiling = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;
360
vec_t acc[Tiling::NumRegs];
361
psqt_vec_t psqt[Tiling::NumPsqtRegs];
362
363
const auto* threatWeights = &featureTransformer.threatWeights[0];
364
365
for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
366
{
367
auto* fromTile = reinterpret_cast<const vec_t*>(&fromAcc[j * Tiling::TileHeight]);
368
auto* toTile = reinterpret_cast<vec_t*>(&toAcc[j * Tiling::TileHeight]);
369
370
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
371
acc[k] = fromTile[k];
372
373
for (int i = 0; i < removed.ssize(); ++i)
374
{
375
size_t index = removed[i];
376
const size_t offset = Dimensions * index;
377
auto* column = reinterpret_cast<const vec_i8_t*>(&threatWeights[offset]);
378
379
#ifdef USE_NEON
380
for (IndexType k = 0; k < Tiling::NumRegs; k += 2)
381
{
382
acc[k] = vec_sub_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2])));
383
acc[k + 1] = vec_sub_16(acc[k + 1], vmovl_high_s8(column[k / 2]));
384
}
385
#else
386
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
387
acc[k] = vec_sub_16(acc[k], vec_convert_8_16(column[k]));
388
#endif
389
}
390
391
for (int i = 0; i < added.ssize(); ++i)
392
{
393
size_t index = added[i];
394
const size_t offset = Dimensions * index;
395
auto* column = reinterpret_cast<const vec_i8_t*>(&threatWeights[offset]);
396
397
#ifdef USE_NEON
398
for (IndexType k = 0; k < Tiling::NumRegs; k += 2)
399
{
400
acc[k] = vec_add_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2])));
401
acc[k + 1] = vec_add_16(acc[k + 1], vmovl_high_s8(column[k / 2]));
402
}
403
#else
404
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
405
acc[k] = vec_add_16(acc[k], vec_convert_8_16(column[k]));
406
#endif
407
}
408
409
for (IndexType k = 0; k < Tiling::NumRegs; k++)
410
vec_store(&toTile[k], acc[k]);
411
412
threatWeights += Tiling::TileHeight;
413
}
414
415
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
416
{
417
auto* fromTilePsqt =
418
reinterpret_cast<const psqt_vec_t*>(&fromPsqtAcc[j * Tiling::PsqtTileHeight]);
419
auto* toTilePsqt =
420
reinterpret_cast<psqt_vec_t*>(&toPsqtAcc[j * Tiling::PsqtTileHeight]);
421
422
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
423
psqt[k] = fromTilePsqt[k];
424
425
for (int i = 0; i < removed.ssize(); ++i)
426
{
427
size_t index = removed[i];
428
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
429
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(
430
&featureTransformer.threatPsqtWeights[offset]);
431
432
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
433
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
434
}
435
436
for (int i = 0; i < added.ssize(); ++i)
437
{
438
size_t index = added[i];
439
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
440
auto* columnPsqt = reinterpret_cast<const psqt_vec_t*>(
441
&featureTransformer.threatPsqtWeights[offset]);
442
443
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
444
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
445
}
446
447
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
448
vec_store_psqt(&toTilePsqt[k], psqt[k]);
449
}
450
451
#else
452
453
toAcc = fromAcc;
454
toPsqtAcc = fromPsqtAcc;
455
456
for (const auto index : removed)
457
{
458
const IndexType offset = Dimensions * index;
459
460
for (IndexType j = 0; j < Dimensions; ++j)
461
toAcc[j] -= featureTransformer.threatWeights[offset + j];
462
463
for (std::size_t k = 0; k < PSQTBuckets; ++k)
464
toPsqtAcc[k] -= featureTransformer.threatPsqtWeights[index * PSQTBuckets + k];
465
}
466
467
for (const auto index : added)
468
{
469
const IndexType offset = Dimensions * index;
470
471
for (IndexType j = 0; j < Dimensions; ++j)
472
toAcc[j] += featureTransformer.threatWeights[offset + j];
473
474
for (std::size_t k = 0; k < PSQTBuckets; ++k)
475
toPsqtAcc[k] += featureTransformer.threatPsqtWeights[index * PSQTBuckets + k];
476
}
477
478
#endif
479
}
480
};
481
482
template<typename FeatureSet, IndexType Dimensions>
483
auto make_accumulator_update_context(Color perspective,
484
const FeatureTransformer<Dimensions>& featureTransformer,
485
const AccumulatorState<FeatureSet>& accumulatorFrom,
486
AccumulatorState<FeatureSet>& accumulatorTo) noexcept {
487
return AccumulatorUpdateContext<FeatureSet, Dimensions>{perspective, featureTransformer,
488
accumulatorFrom, accumulatorTo};
489
}
490
491
template<IndexType TransformedFeatureDimensions>
492
void double_inc_update(Color perspective,
493
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
494
const Square ksq,
495
AccumulatorState<PSQFeatureSet>& middle_state,
496
AccumulatorState<PSQFeatureSet>& target_state,
497
const AccumulatorState<PSQFeatureSet>& computed) {
498
499
assert(computed.acc<TransformedFeatureDimensions>().computed[perspective]);
500
assert(!middle_state.acc<TransformedFeatureDimensions>().computed[perspective]);
501
assert(!target_state.acc<TransformedFeatureDimensions>().computed[perspective]);
502
503
PSQFeatureSet::IndexList removed, added;
504
PSQFeatureSet::append_changed_indices(perspective, ksq, middle_state.diff, removed, added);
505
// you can't capture a piece that was just involved in castling since the rook ends up
506
// in a square that the king passed
507
assert(added.size() < 2);
508
PSQFeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added);
509
510
[[maybe_unused]] const int addedSize = added.ssize();
511
[[maybe_unused]] const int removedSize = removed.ssize();
512
513
assert(addedSize == 1);
514
assert(removedSize == 2 || removedSize == 3);
515
516
// Workaround compiler warning for uninitialized variables, replicated on
517
// profile builds on windows with gcc 14.2.0.
518
// Also helps with optimizations on some compilers.
519
520
sf_assume(addedSize == 1);
521
sf_assume(removedSize == 2 || removedSize == 3);
522
523
auto updateContext =
524
make_accumulator_update_context(perspective, featureTransformer, computed, target_state);
525
526
if (removedSize == 2)
527
{
528
updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
529
}
530
else
531
{
532
updateContext.template apply<Add, Sub, Sub, Sub>(added[0], removed[0], removed[1],
533
removed[2]);
534
}
535
536
target_state.acc<TransformedFeatureDimensions>().computed[perspective] = true;
537
}
538
539
template<IndexType TransformedFeatureDimensions>
540
void double_inc_update(Color perspective,
541
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
542
const Square ksq,
543
AccumulatorState<ThreatFeatureSet>& middle_state,
544
AccumulatorState<ThreatFeatureSet>& target_state,
545
const AccumulatorState<ThreatFeatureSet>& computed,
546
const DirtyPiece& dp2) {
547
548
assert(computed.acc<TransformedFeatureDimensions>().computed[perspective]);
549
assert(!middle_state.acc<TransformedFeatureDimensions>().computed[perspective]);
550
assert(!target_state.acc<TransformedFeatureDimensions>().computed[perspective]);
551
552
ThreatFeatureSet::FusedUpdateData fusedData;
553
554
fusedData.dp2removed = dp2.remove_sq;
555
556
ThreatFeatureSet::IndexList removed, added;
557
const auto* pfBase = &featureTransformer.threatWeights[0];
558
auto pfStride = static_cast<IndexType>(TransformedFeatureDimensions);
559
ThreatFeatureSet::append_changed_indices(perspective, ksq, middle_state.diff, removed, added,
560
&fusedData, true, pfBase, pfStride);
561
ThreatFeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added,
562
&fusedData, false, pfBase, pfStride);
563
564
auto updateContext =
565
make_accumulator_update_context(perspective, featureTransformer, computed, target_state);
566
567
updateContext.apply(added, removed);
568
569
target_state.acc<TransformedFeatureDimensions>().computed[perspective] = true;
570
}
571
572
template<bool Forward, typename FeatureSet, IndexType TransformedFeatureDimensions>
573
void update_accumulator_incremental(
574
Color perspective,
575
const FeatureTransformer<TransformedFeatureDimensions>& featureTransformer,
576
const Square ksq,
577
AccumulatorState<FeatureSet>& target_state,
578
const AccumulatorState<FeatureSet>& computed) {
579
580
assert((computed.template acc<TransformedFeatureDimensions>()).computed[perspective]);
581
assert(!(target_state.template acc<TransformedFeatureDimensions>()).computed[perspective]);
582
583
// The size must be enough to contain the largest possible update.
584
// That might depend on the feature set and generally relies on the
585
// feature set's update cost calculation to be correct and never allow
586
// updates with more added/removed features than MaxActiveDimensions.
587
// In this case, the maximum size of both feature addition and removal
588
// is 2, since we are incrementally updating one move at a time.
589
typename FeatureSet::IndexList removed, added;
590
if constexpr (std::is_same_v<FeatureSet, ThreatFeatureSet>)
591
{
592
const auto* pfBase = &featureTransformer.threatWeights[0];
593
auto pfStride = static_cast<IndexType>(TransformedFeatureDimensions);
594
if constexpr (Forward)
595
FeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added,
596
nullptr, false, pfBase, pfStride);
597
else
598
FeatureSet::append_changed_indices(perspective, ksq, computed.diff, added, removed,
599
nullptr, false, pfBase, pfStride);
600
}
601
else
602
{
603
if constexpr (Forward)
604
FeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added);
605
else
606
FeatureSet::append_changed_indices(perspective, ksq, computed.diff, added, removed);
607
}
608
609
auto updateContext =
610
make_accumulator_update_context(perspective, featureTransformer, computed, target_state);
611
612
if constexpr (std::is_same_v<FeatureSet, ThreatFeatureSet>)
613
updateContext.apply(added, removed);
614
else
615
{
616
[[maybe_unused]] const int addedSize = added.ssize();
617
[[maybe_unused]] const int removedSize = removed.ssize();
618
619
assert(addedSize == 1 || addedSize == 2);
620
assert(removedSize == 1 || removedSize == 2);
621
assert((Forward && addedSize <= removedSize) || (!Forward && addedSize >= removedSize));
622
623
// Workaround compiler warning for uninitialized variables, replicated
624
// on profile builds on windows with gcc 14.2.0.
625
// Also helps with optimizations on some compilers.
626
627
sf_assume(addedSize == 1 || addedSize == 2);
628
sf_assume(removedSize == 1 || removedSize == 2);
629
630
if (!(removedSize == 1 || removedSize == 2) || !(addedSize == 1 || addedSize == 2))
631
sf_unreachable();
632
633
if ((Forward && removedSize == 1) || (!Forward && addedSize == 1))
634
{
635
assert(addedSize == 1 && removedSize == 1);
636
updateContext.template apply<Add, Sub>(added[0], removed[0]);
637
}
638
else if (Forward && addedSize == 1)
639
{
640
assert(removedSize == 2);
641
updateContext.template apply<Add, Sub, Sub>(added[0], removed[0], removed[1]);
642
}
643
else if (!Forward && removedSize == 1)
644
{
645
assert(addedSize == 2);
646
updateContext.template apply<Add, Add, Sub>(added[0], added[1], removed[0]);
647
}
648
else
649
{
650
assert(addedSize == 2 && removedSize == 2);
651
updateContext.template apply<Add, Add, Sub, Sub>(added[0], added[1], removed[0],
652
removed[1]);
653
}
654
}
655
656
(target_state.template acc<TransformedFeatureDimensions>()).computed[perspective] = true;
657
}
658
659
Bitboard get_changed_pieces(const std::array<Piece, SQUARE_NB>& oldPieces,
660
const std::array<Piece, SQUARE_NB>& newPieces) {
661
#if defined(USE_AVX512) || defined(USE_AVX2)
662
static_assert(sizeof(Piece) == 1);
663
Bitboard sameBB = 0;
664
665
for (int i = 0; i < 64; i += 32)
666
{
667
const __m256i old_v = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&oldPieces[i]));
668
const __m256i new_v = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&newPieces[i]));
669
const __m256i cmpEqual = _mm256_cmpeq_epi8(old_v, new_v);
670
const std::uint32_t equalMask = _mm256_movemask_epi8(cmpEqual);
671
sameBB |= static_cast<Bitboard>(equalMask) << i;
672
}
673
return ~sameBB;
674
#elif defined(USE_NEON)
675
uint8x16x4_t old_v = vld4q_u8(reinterpret_cast<const uint8_t*>(oldPieces.data()));
676
uint8x16x4_t new_v = vld4q_u8(reinterpret_cast<const uint8_t*>(newPieces.data()));
677
auto cmp = [=](const int i) { return vceqq_u8(old_v.val[i], new_v.val[i]); };
678
679
uint8x16_t cmp0_1 = vsriq_n_u8(cmp(1), cmp(0), 1);
680
uint8x16_t cmp2_3 = vsriq_n_u8(cmp(3), cmp(2), 1);
681
uint8x16_t merged = vsriq_n_u8(cmp2_3, cmp0_1, 2);
682
merged = vsriq_n_u8(merged, merged, 4);
683
uint8x8_t sameBB = vshrn_n_u16(vreinterpretq_u16_u8(merged), 4);
684
685
return ~vget_lane_u64(vreinterpret_u64_u8(sameBB), 0);
686
#else
687
Bitboard changed = 0;
688
689
for (Square sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq)
690
changed |= static_cast<Bitboard>(oldPieces[sq] != newPieces[sq]) << sq;
691
692
return changed;
693
#endif
694
}
695
696
template<IndexType Dimensions>
697
void update_accumulator_refresh_cache(Color perspective,
698
const FeatureTransformer<Dimensions>& featureTransformer,
699
const Position& pos,
700
AccumulatorState<PSQFeatureSet>& accumulatorState,
701
AccumulatorCaches::Cache<Dimensions>& cache) {
702
703
using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;
704
705
const Square ksq = pos.square<KING>(perspective);
706
auto& entry = cache[ksq][perspective];
707
PSQFeatureSet::IndexList removed, added;
708
709
const Bitboard changedBB = get_changed_pieces(entry.pieces, pos.piece_array());
710
Bitboard removedBB = changedBB & entry.pieceBB;
711
Bitboard addedBB = changedBB & pos.pieces();
712
713
while (removedBB)
714
{
715
Square sq = pop_lsb(removedBB);
716
removed.push_back(PSQFeatureSet::make_index(perspective, sq, entry.pieces[sq], ksq));
717
}
718
while (addedBB)
719
{
720
Square sq = pop_lsb(addedBB);
721
added.push_back(PSQFeatureSet::make_index(perspective, sq, pos.piece_on(sq), ksq));
722
}
723
724
entry.pieceBB = pos.pieces();
725
entry.pieces = pos.piece_array();
726
727
auto& accumulator = accumulatorState.acc<Dimensions>();
728
accumulator.computed[perspective] = true;
729
730
#ifdef VECTOR
731
vec_t acc[Tiling::NumRegs];
732
psqt_vec_t psqt[Tiling::NumPsqtRegs];
733
734
const auto* weights = &featureTransformer.weights[0];
735
736
for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
737
{
738
auto* accTile =
739
reinterpret_cast<vec_t*>(&accumulator.accumulation[perspective][j * Tiling::TileHeight]);
740
auto* entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * Tiling::TileHeight]);
741
742
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
743
acc[k] = entryTile[k];
744
745
int i = 0;
746
for (; i < std::min(removed.ssize(), added.ssize()); ++i)
747
{
748
size_t indexR = removed[i];
749
const size_t offsetR = Dimensions * indexR;
750
auto* columnR = reinterpret_cast<const vec_t*>(&weights[offsetR]);
751
size_t indexA = added[i];
752
const size_t offsetA = Dimensions * indexA;
753
auto* columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
754
755
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
756
acc[k] = fused<Vec16Wrapper, Add, Sub>(acc[k], columnA[k], columnR[k]);
757
}
758
for (; i < removed.ssize(); ++i)
759
{
760
size_t index = removed[i];
761
const size_t offset = Dimensions * index;
762
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
763
764
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
765
acc[k] = vec_sub_16(acc[k], column[k]);
766
}
767
for (; i < added.ssize(); ++i)
768
{
769
size_t index = added[i];
770
const size_t offset = Dimensions * index;
771
auto* column = reinterpret_cast<const vec_t*>(&weights[offset]);
772
773
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
774
acc[k] = vec_add_16(acc[k], column[k]);
775
}
776
777
for (IndexType k = 0; k < Tiling::NumRegs; k++)
778
vec_store(&entryTile[k], acc[k]);
779
for (IndexType k = 0; k < Tiling::NumRegs; k++)
780
vec_store(&accTile[k], acc[k]);
781
782
weights += Tiling::TileHeight;
783
}
784
785
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
786
{
787
auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
788
&accumulator.psqtAccumulation[perspective][j * Tiling::PsqtTileHeight]);
789
auto* entryTilePsqt =
790
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]);
791
792
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
793
psqt[k] = entryTilePsqt[k];
794
795
for (int i = 0; i < removed.ssize(); ++i)
796
{
797
size_t index = removed[i];
798
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
799
auto* columnPsqt =
800
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
801
802
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
803
psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
804
}
805
for (int i = 0; i < added.ssize(); ++i)
806
{
807
size_t index = added[i];
808
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
809
auto* columnPsqt =
810
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.psqtWeights[offset]);
811
812
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
813
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
814
}
815
816
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
817
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
818
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
819
vec_store_psqt(&accTilePsqt[k], psqt[k]);
820
}
821
822
#else
823
824
for (const auto index : removed)
825
{
826
const IndexType offset = Dimensions * index;
827
for (IndexType j = 0; j < Dimensions; ++j)
828
entry.accumulation[j] -= featureTransformer.weights[offset + j];
829
830
for (std::size_t k = 0; k < PSQTBuckets; ++k)
831
entry.psqtAccumulation[k] -= featureTransformer.psqtWeights[index * PSQTBuckets + k];
832
}
833
for (const auto index : added)
834
{
835
const IndexType offset = Dimensions * index;
836
for (IndexType j = 0; j < Dimensions; ++j)
837
entry.accumulation[j] += featureTransformer.weights[offset + j];
838
839
for (std::size_t k = 0; k < PSQTBuckets; ++k)
840
entry.psqtAccumulation[k] += featureTransformer.psqtWeights[index * PSQTBuckets + k];
841
}
842
843
// The accumulator of the refresh entry has been updated.
844
// Now copy its content to the actual accumulator we were refreshing.
845
accumulator.accumulation[perspective] = entry.accumulation;
846
accumulator.psqtAccumulation[perspective] = entry.psqtAccumulation;
847
#endif
848
}
849
850
template<IndexType Dimensions>
851
void update_threats_accumulator_full(Color perspective,
852
const FeatureTransformer<Dimensions>& featureTransformer,
853
const Position& pos,
854
AccumulatorState<ThreatFeatureSet>& accumulatorState) {
855
using Tiling [[maybe_unused]] = SIMDTiling<Dimensions, Dimensions, PSQTBuckets>;
856
857
ThreatFeatureSet::IndexList active;
858
ThreatFeatureSet::append_active_indices(perspective, pos, active);
859
860
auto& accumulator = accumulatorState.acc<Dimensions>();
861
accumulator.computed[perspective] = true;
862
863
#ifdef VECTOR
864
vec_t acc[Tiling::NumRegs];
865
psqt_vec_t psqt[Tiling::NumPsqtRegs];
866
867
const auto* threatWeights = &featureTransformer.threatWeights[0];
868
869
for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j)
870
{
871
auto* accTile =
872
reinterpret_cast<vec_t*>(&accumulator.accumulation[perspective][j * Tiling::TileHeight]);
873
874
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
875
acc[k] = vec_zero();
876
877
int i = 0;
878
879
for (; i < active.ssize(); ++i)
880
{
881
size_t index = active[i];
882
const size_t offset = Dimensions * index;
883
auto* column = reinterpret_cast<const vec_i8_t*>(&threatWeights[offset]);
884
885
#ifdef USE_NEON
886
for (IndexType k = 0; k < Tiling::NumRegs; k += 2)
887
{
888
acc[k] = vec_add_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2])));
889
acc[k + 1] = vec_add_16(acc[k + 1], vmovl_high_s8(column[k / 2]));
890
}
891
#else
892
for (IndexType k = 0; k < Tiling::NumRegs; ++k)
893
acc[k] = vec_add_16(acc[k], vec_convert_8_16(column[k]));
894
#endif
895
}
896
897
for (IndexType k = 0; k < Tiling::NumRegs; k++)
898
vec_store(&accTile[k], acc[k]);
899
900
threatWeights += Tiling::TileHeight;
901
}
902
903
for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j)
904
{
905
auto* accTilePsqt = reinterpret_cast<psqt_vec_t*>(
906
&accumulator.psqtAccumulation[perspective][j * Tiling::PsqtTileHeight]);
907
908
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
909
psqt[k] = vec_zero_psqt();
910
911
for (int i = 0; i < active.ssize(); ++i)
912
{
913
size_t index = active[i];
914
const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight;
915
auto* columnPsqt =
916
reinterpret_cast<const psqt_vec_t*>(&featureTransformer.threatPsqtWeights[offset]);
917
918
for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k)
919
psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
920
}
921
922
for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k)
923
vec_store_psqt(&accTilePsqt[k], psqt[k]);
924
}
925
926
#else
927
928
for (IndexType j = 0; j < Dimensions; ++j)
929
accumulator.accumulation[perspective][j] = 0;
930
931
for (std::size_t k = 0; k < PSQTBuckets; ++k)
932
accumulator.psqtAccumulation[perspective][k] = 0;
933
934
for (const auto index : active)
935
{
936
const IndexType offset = Dimensions * index;
937
938
for (IndexType j = 0; j < Dimensions; ++j)
939
accumulator.accumulation[perspective][j] +=
940
featureTransformer.threatWeights[offset + j];
941
942
for (std::size_t k = 0; k < PSQTBuckets; ++k)
943
accumulator.psqtAccumulation[perspective][k] +=
944
featureTransformer.threatPsqtWeights[index * PSQTBuckets + k];
945
}
946
947
#endif
948
}
949
950
}
951
952
}
953
954