Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
official-stockfish
GitHub Repository: official-stockfish/stockfish
Path: blob/master/src/thread.cpp
504 views
1
/*
2
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
3
Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file)
4
5
Stockfish is free software: you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation, either version 3 of the License, or
8
(at your option) any later version.
9
10
Stockfish is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
14
15
You should have received a copy of the GNU General Public License
16
along with this program. If not, see <http://www.gnu.org/licenses/>.
17
*/
18
19
#include "thread.h"
20
21
#include <algorithm>
22
#include <cassert>
23
#include <deque>
24
#include <map>
25
#include <memory>
26
#include <string>
27
#include <unordered_map>
28
#include <utility>
29
30
#include "bitboard.h"
31
#include "history.h"
32
#include "memory.h"
33
#include "movegen.h"
34
#include "search.h"
35
#include "syzygy/tbprobe.h"
36
#include "timeman.h"
37
#include "types.h"
38
#include "uci.h"
39
#include "ucioption.h"
40
41
namespace Stockfish {
42
43
// Constructor launches the thread and waits until it goes to sleep
44
// in idle_loop(). Note that 'searching' and 'exit' should be already set.
45
Thread::Thread(Search::SharedState& sharedState,
46
std::unique_ptr<Search::ISearchManager> sm,
47
size_t n,
48
size_t numaN,
49
size_t totalNumaCount,
50
OptionalThreadToNumaNodeBinder binder) :
51
idx(n),
52
idxInNuma(numaN),
53
totalNuma(totalNumaCount),
54
nthreads(sharedState.options["Threads"]),
55
stdThread(&Thread::idle_loop, this) {
56
57
wait_for_search_finished();
58
59
run_custom_job([this, &binder, &sharedState, &sm, n]() {
60
// Use the binder to [maybe] bind the threads to a NUMA node before doing
61
// the Worker allocation. Ideally we would also allocate the SearchManager
62
// here, but that's minor.
63
this->numaAccessToken = binder();
64
this->worker = make_unique_large_page<Search::Worker>(
65
sharedState, std::move(sm), n, idxInNuma, totalNuma, this->numaAccessToken);
66
});
67
68
wait_for_search_finished();
69
}
70
71
72
// Destructor wakes up the thread in idle_loop() and waits
73
// for its termination. Thread should be already waiting.
74
Thread::~Thread() {
75
76
assert(!searching);
77
78
exit = true;
79
start_searching();
80
stdThread.join();
81
}
82
83
// Wakes up the thread that will start the search
84
void Thread::start_searching() {
85
assert(worker != nullptr);
86
run_custom_job([this]() { worker->start_searching(); });
87
}
88
89
// Clears the histories for the thread worker (usually before a new game)
90
void Thread::clear_worker() {
91
assert(worker != nullptr);
92
run_custom_job([this]() { worker->clear(); });
93
}
94
95
// Blocks on the condition variable until the thread has finished searching
96
void Thread::wait_for_search_finished() {
97
98
std::unique_lock<std::mutex> lk(mutex);
99
cv.wait(lk, [&] { return !searching; });
100
}
101
102
// Launching a function in the thread
103
void Thread::run_custom_job(std::function<void()> f) {
104
{
105
std::unique_lock<std::mutex> lk(mutex);
106
cv.wait(lk, [&] { return !searching; });
107
jobFunc = std::move(f);
108
searching = true;
109
}
110
cv.notify_one();
111
}
112
113
void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); }
114
115
// Thread gets parked here, blocked on the condition variable
116
// when the thread has no work to do.
117
118
void Thread::idle_loop() {
119
while (true)
120
{
121
std::unique_lock<std::mutex> lk(mutex);
122
searching = false;
123
cv.notify_one(); // Wake up anyone waiting for search finished
124
cv.wait(lk, [&] { return searching; });
125
126
if (exit)
127
return;
128
129
std::function<void()> job = std::move(jobFunc);
130
jobFunc = nullptr;
131
132
lk.unlock();
133
134
if (job)
135
job();
136
}
137
}
138
139
Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); }
140
141
uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); }
142
uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); }
143
144
static size_t next_power_of_two(uint64_t count) { return count > 1 ? (2ULL << msb(count - 1)) : 1; }
145
146
// Creates/destroys threads to match the requested number.
147
// Created and launched threads will immediately go to sleep in idle_loop.
148
// Upon resizing, threads are recreated to allow for binding if necessary.
149
void ThreadPool::set(const NumaConfig& numaConfig,
150
Search::SharedState sharedState,
151
const Search::SearchManager::UpdateContext& updateContext) {
152
153
if (threads.size() > 0) // destroy any existing thread(s)
154
{
155
main_thread()->wait_for_search_finished();
156
157
threads.clear();
158
159
boundThreadToNumaNode.clear();
160
}
161
162
const size_t requested = sharedState.options["Threads"];
163
164
if (requested > 0) // create new thread(s)
165
{
166
// Binding threads may be problematic when there's multiple NUMA nodes and
167
// multiple Stockfish instances running. In particular, if each instance
168
// runs a single thread then they would all be mapped to the first NUMA node.
169
// This is undesirable, and so the default behaviour (i.e. when the user does not
170
// change the NumaConfig UCI setting) is to not bind the threads to processors
171
// unless we know for sure that we span NUMA nodes and replication is required.
172
const std::string numaPolicy(sharedState.options["NumaPolicy"]);
173
const bool doBindThreads = [&]() {
174
if (numaPolicy == "none")
175
return false;
176
177
if (numaPolicy == "auto")
178
return numaConfig.suggests_binding_threads(requested);
179
180
// numaPolicy == "system", or explicitly set by the user
181
return true;
182
}();
183
184
std::map<NumaIndex, size_t> counts;
185
boundThreadToNumaNode = doBindThreads
186
? numaConfig.distribute_threads_among_numa_nodes(requested)
187
: std::vector<NumaIndex>{};
188
189
if (boundThreadToNumaNode.empty())
190
counts[0] = requested; // Pretend all threads are part of numa node 0
191
else
192
{
193
for (size_t i = 0; i < boundThreadToNumaNode.size(); ++i)
194
counts[boundThreadToNumaNode[i]]++;
195
}
196
197
sharedState.sharedHistories.clear();
198
for (auto pair : counts)
199
{
200
NumaIndex numaIndex = pair.first;
201
uint64_t count = pair.second;
202
auto f = [&]() {
203
sharedState.sharedHistories.try_emplace(numaIndex, next_power_of_two(count));
204
};
205
if (doBindThreads)
206
numaConfig.execute_on_numa_node(numaIndex, f);
207
else
208
f();
209
}
210
211
auto threadsPerNode = counts;
212
counts.clear();
213
214
while (threads.size() < requested)
215
{
216
const size_t threadId = threads.size();
217
const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0;
218
auto create_thread = [&]() {
219
auto manager = threadId == 0
220
? std::unique_ptr<Search::ISearchManager>(
221
std::make_unique<Search::SearchManager>(updateContext))
222
: std::make_unique<Search::NullSearchManager>();
223
224
// When not binding threads we want to force all access to happen
225
// from the same NUMA node, because in case of NUMA replicated memory
226
// accesses we don't want to trash cache in case the threads get scheduled
227
// on the same NUMA node.
228
auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId)
229
: OptionalThreadToNumaNodeBinder(numaId);
230
231
threads.emplace_back(std::make_unique<Thread>(sharedState, std::move(manager),
232
threadId, counts[numaId]++,
233
threadsPerNode[numaId], binder));
234
};
235
236
// Ensure the worker thread inherits the intended NUMA affinity at creation.
237
if (doBindThreads)
238
numaConfig.execute_on_numa_node(numaId, create_thread);
239
else
240
create_thread();
241
}
242
243
clear();
244
245
main_thread()->wait_for_search_finished();
246
}
247
}
248
249
250
// Sets threadPool data to initial values
251
void ThreadPool::clear() {
252
if (threads.size() == 0)
253
return;
254
255
for (auto&& th : threads)
256
th->clear_worker();
257
258
for (auto&& th : threads)
259
th->wait_for_search_finished();
260
261
// These two affect the time taken on the first move of a game:
262
main_manager()->bestPreviousAverageScore = VALUE_INFINITE;
263
main_manager()->previousTimeReduction = 0.85;
264
265
main_manager()->callsCnt = 0;
266
main_manager()->bestPreviousScore = VALUE_INFINITE;
267
main_manager()->originalTimeAdjust = -1;
268
main_manager()->tm.clear();
269
}
270
271
void ThreadPool::run_on_thread(size_t threadId, std::function<void()> f) {
272
assert(threads.size() > threadId);
273
threads[threadId]->run_custom_job(std::move(f));
274
}
275
276
void ThreadPool::wait_on_thread(size_t threadId) {
277
assert(threads.size() > threadId);
278
threads[threadId]->wait_for_search_finished();
279
}
280
281
size_t ThreadPool::num_threads() const { return threads.size(); }
282
283
284
// Wakes up main thread waiting in idle_loop() and returns immediately.
285
// Main thread will wake up other threads and start the search.
286
void ThreadPool::start_thinking(const OptionsMap& options,
287
Position& pos,
288
StateListPtr& states,
289
Search::LimitsType limits) {
290
291
main_thread()->wait_for_search_finished();
292
293
main_manager()->stopOnPonderhit = stop = abortedSearch = false;
294
main_manager()->ponder = limits.ponderMode;
295
296
increaseDepth = true;
297
298
Search::RootMoves rootMoves;
299
const auto legalmoves = MoveList<LEGAL>(pos);
300
301
for (const auto& uciMove : limits.searchmoves)
302
{
303
auto move = UCIEngine::to_move(pos, uciMove);
304
305
if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end())
306
rootMoves.emplace_back(move);
307
}
308
309
if (rootMoves.empty())
310
for (const auto& m : legalmoves)
311
rootMoves.emplace_back(m);
312
313
Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves);
314
315
// After ownership transfer 'states' becomes empty, so if we stop the search
316
// and call 'go' again without setting a new position states.get() == nullptr.
317
assert(states.get() || setupStates.get());
318
319
if (states.get())
320
setupStates = std::move(states); // Ownership transfer, states is now empty
321
322
// We use Position::set() to set root position across threads. But there are
323
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
324
// be deduced from a fen string, so set() clears them and they are set from
325
// setupStates->back() later. The rootState is per thread, earlier states are
326
// shared since they are read-only.
327
for (auto&& th : threads)
328
{
329
th->run_custom_job([&]() {
330
th->worker->limits = limits;
331
th->worker->nodes = th->worker->tbHits = th->worker->bestMoveChanges = 0;
332
th->worker->nmpMinPly = 0;
333
th->worker->rootDepth = th->worker->completedDepth = 0;
334
th->worker->rootMoves = rootMoves;
335
th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState);
336
th->worker->rootState = setupStates->back();
337
th->worker->tbConfig = tbConfig;
338
});
339
}
340
341
for (auto&& th : threads)
342
th->wait_for_search_finished();
343
344
main_thread()->start_searching();
345
}
346
347
Thread* ThreadPool::get_best_thread() const {
348
349
Thread* bestThread = threads.front().get();
350
Value minScore = VALUE_NONE;
351
352
std::unordered_map<Move, int64_t, Move::MoveHash> votes(
353
2 * std::min(size(), bestThread->worker->rootMoves.size()));
354
355
// Find the minimum score of all threads
356
for (auto&& th : threads)
357
minScore = std::min(minScore, th->worker->rootMoves[0].score);
358
359
// Vote according to score and depth, and select the best thread
360
auto thread_voting_value = [minScore](Thread* th) {
361
return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth);
362
};
363
364
for (auto&& th : threads)
365
votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get());
366
367
for (auto&& th : threads)
368
{
369
const auto bestThreadScore = bestThread->worker->rootMoves[0].score;
370
const auto newThreadScore = th->worker->rootMoves[0].score;
371
372
const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv;
373
const auto& newThreadPV = th->worker->rootMoves[0].pv;
374
375
const auto bestThreadMoveVote = votes[bestThreadPV[0]];
376
const auto newThreadMoveVote = votes[newThreadPV[0]];
377
378
const bool bestThreadInProvenWin = is_win(bestThreadScore);
379
const bool newThreadInProvenWin = is_win(newThreadScore);
380
381
const bool bestThreadInProvenLoss =
382
bestThreadScore != -VALUE_INFINITE && is_loss(bestThreadScore);
383
const bool newThreadInProvenLoss =
384
newThreadScore != -VALUE_INFINITE && is_loss(newThreadScore);
385
386
// We make sure not to pick a thread with truncated principal variation
387
const bool betterVotingValue =
388
thread_voting_value(th.get()) * int(newThreadPV.size() > 2)
389
> thread_voting_value(bestThread) * int(bestThreadPV.size() > 2);
390
391
if (bestThreadInProvenWin)
392
{
393
// Make sure we pick the shortest mate / TB conversion
394
if (newThreadScore > bestThreadScore)
395
bestThread = th.get();
396
}
397
else if (bestThreadInProvenLoss)
398
{
399
// Make sure we pick the shortest mated / TB conversion
400
if (newThreadInProvenLoss && newThreadScore < bestThreadScore)
401
bestThread = th.get();
402
}
403
else if (newThreadInProvenWin || newThreadInProvenLoss
404
|| (!is_loss(newThreadScore)
405
&& (newThreadMoveVote > bestThreadMoveVote
406
|| (newThreadMoveVote == bestThreadMoveVote && betterVotingValue))))
407
bestThread = th.get();
408
}
409
410
return bestThread;
411
}
412
413
414
// Start non-main threads.
415
// Will be invoked by main thread after it has started searching.
416
void ThreadPool::start_searching() {
417
418
for (auto&& th : threads)
419
if (th != threads.front())
420
th->start_searching();
421
}
422
423
424
// Wait for non-main threads
425
void ThreadPool::wait_for_search_finished() const {
426
427
for (auto&& th : threads)
428
if (th != threads.front())
429
th->wait_for_search_finished();
430
}
431
432
std::vector<size_t> ThreadPool::get_bound_thread_count_by_numa_node() const {
433
std::vector<size_t> counts;
434
435
if (!boundThreadToNumaNode.empty())
436
{
437
NumaIndex highestNumaNode = 0;
438
for (NumaIndex n : boundThreadToNumaNode)
439
if (n > highestNumaNode)
440
highestNumaNode = n;
441
442
counts.resize(highestNumaNode + 1, 0);
443
444
for (NumaIndex n : boundThreadToNumaNode)
445
counts[n] += 1;
446
}
447
448
return counts;
449
}
450
451
void ThreadPool::ensure_network_replicated() {
452
for (auto&& th : threads)
453
th->ensure_network_replicated();
454
}
455
456
} // namespace Stockfish
457
458