Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/perf/bench/futex-wake-parallel.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (C) 2015 Davidlohr Bueso.
4
*
5
* Block a bunch of threads and let parallel waker threads wakeup an
6
* equal amount of them. The program output reflects the avg latency
7
* for each individual thread to service its share of work. Ultimately
8
* it can be used to measure futex_wake() changes.
9
*/
10
#include "bench.h"
11
#include <linux/compiler.h>
12
#include "../util/debug.h"
13
#include "../util/mutex.h"
14
15
#ifndef HAVE_PTHREAD_BARRIER
16
int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
17
{
18
pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
19
return 0;
20
}
21
#else /* HAVE_PTHREAD_BARRIER */
22
/* For the CLR_() macros */
23
#include <string.h>
24
#include <pthread.h>
25
26
#include <signal.h>
27
#include "../util/stat.h"
28
#include <subcmd/parse-options.h>
29
#include <linux/kernel.h>
30
#include <linux/time64.h>
31
#include <errno.h>
32
#include "futex.h"
33
#include <perf/cpumap.h>
34
35
#include <err.h>
36
#include <stdlib.h>
37
#include <sys/time.h>
38
#include <sys/mman.h>
39
40
struct thread_data {
41
pthread_t worker;
42
unsigned int nwoken;
43
struct timeval runtime;
44
};
45
46
static unsigned int nwakes = 1;
47
48
/* all threads will block on the same futex -- hash bucket chaos ;) */
49
static u_int32_t futex = 0;
50
51
static pthread_t *blocked_worker;
52
static bool done = false;
53
static struct mutex thread_lock;
54
static struct cond thread_parent, thread_worker;
55
static pthread_barrier_t barrier;
56
static struct stats waketime_stats, wakeup_stats;
57
static unsigned int threads_starting;
58
static int futex_flag = 0;
59
60
static struct bench_futex_parameters params = {
61
.nbuckets = -1,
62
};
63
64
static const struct option options[] = {
65
OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
66
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
67
OPT_UINTEGER('w', "nwakers", &params.nwakes, "Specify amount of waking threads"),
68
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
69
OPT_BOOLEAN( 'S', "shared", &params.fshared, "Use shared futexes instead of private ones"),
70
OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
71
72
OPT_END()
73
};
74
75
static const char * const bench_futex_wake_parallel_usage[] = {
76
"perf bench futex wake-parallel <options>",
77
NULL
78
};
79
80
static void *waking_workerfn(void *arg)
81
{
82
struct thread_data *waker = (struct thread_data *) arg;
83
struct timeval start, end;
84
85
pthread_barrier_wait(&barrier);
86
87
gettimeofday(&start, NULL);
88
89
waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
90
if (waker->nwoken != nwakes)
91
warnx("couldn't wakeup all tasks (%d/%d)",
92
waker->nwoken, nwakes);
93
94
gettimeofday(&end, NULL);
95
timersub(&end, &start, &waker->runtime);
96
97
pthread_exit(NULL);
98
return NULL;
99
}
100
101
static void wakeup_threads(struct thread_data *td)
102
{
103
unsigned int i;
104
pthread_attr_t thread_attr;
105
106
pthread_attr_init(&thread_attr);
107
pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
108
109
pthread_barrier_init(&barrier, NULL, params.nwakes + 1);
110
111
/* create and block all threads */
112
for (i = 0; i < params.nwakes; i++) {
113
/*
114
* Thread creation order will impact per-thread latency
115
* as it will affect the order to acquire the hb spinlock.
116
* For now let the scheduler decide.
117
*/
118
if (pthread_create(&td[i].worker, &thread_attr,
119
waking_workerfn, (void *)&td[i]))
120
err(EXIT_FAILURE, "pthread_create");
121
}
122
123
pthread_barrier_wait(&barrier);
124
125
for (i = 0; i < params.nwakes; i++)
126
if (pthread_join(td[i].worker, NULL))
127
err(EXIT_FAILURE, "pthread_join");
128
129
pthread_barrier_destroy(&barrier);
130
pthread_attr_destroy(&thread_attr);
131
}
132
133
static void *blocked_workerfn(void *arg __maybe_unused)
134
{
135
mutex_lock(&thread_lock);
136
threads_starting--;
137
if (!threads_starting)
138
cond_signal(&thread_parent);
139
cond_wait(&thread_worker, &thread_lock);
140
mutex_unlock(&thread_lock);
141
142
while (1) { /* handle spurious wakeups */
143
if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
144
break;
145
}
146
147
pthread_exit(NULL);
148
return NULL;
149
}
150
151
static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
152
{
153
cpu_set_t *cpuset;
154
unsigned int i;
155
int nrcpus = cpu__max_cpu().cpu;
156
size_t size;
157
158
threads_starting = params.nthreads;
159
160
cpuset = CPU_ALLOC(nrcpus);
161
BUG_ON(!cpuset);
162
size = CPU_ALLOC_SIZE(nrcpus);
163
164
/* create and block all threads */
165
for (i = 0; i < params.nthreads; i++) {
166
pthread_attr_t thread_attr;
167
168
pthread_attr_init(&thread_attr);
169
CPU_ZERO_S(size, cpuset);
170
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
171
172
if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
173
CPU_FREE(cpuset);
174
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
175
}
176
177
if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) {
178
CPU_FREE(cpuset);
179
err(EXIT_FAILURE, "pthread_create");
180
}
181
pthread_attr_destroy(&thread_attr);
182
}
183
CPU_FREE(cpuset);
184
}
185
186
static void print_run(struct thread_data *waking_worker, unsigned int run_num)
187
{
188
unsigned int i, wakeup_avg;
189
double waketime_avg, waketime_stddev;
190
struct stats __waketime_stats, __wakeup_stats;
191
192
init_stats(&__wakeup_stats);
193
init_stats(&__waketime_stats);
194
195
for (i = 0; i < params.nwakes; i++) {
196
update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
197
update_stats(&__wakeup_stats, waking_worker[i].nwoken);
198
}
199
200
waketime_avg = avg_stats(&__waketime_stats);
201
waketime_stddev = stddev_stats(&__waketime_stats);
202
wakeup_avg = avg_stats(&__wakeup_stats);
203
204
printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
205
"in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
206
params.nthreads, waketime_avg / USEC_PER_MSEC,
207
rel_stddev_stats(waketime_stddev, waketime_avg));
208
}
209
210
static void print_summary(void)
211
{
212
unsigned int wakeup_avg;
213
double waketime_avg, waketime_stddev;
214
215
waketime_avg = avg_stats(&waketime_stats);
216
waketime_stddev = stddev_stats(&waketime_stats);
217
wakeup_avg = avg_stats(&wakeup_stats);
218
219
printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
220
wakeup_avg,
221
params.nthreads,
222
waketime_avg / USEC_PER_MSEC,
223
rel_stddev_stats(waketime_stddev, waketime_avg));
224
futex_print_nbuckets(&params);
225
}
226
227
228
static void do_run_stats(struct thread_data *waking_worker)
229
{
230
unsigned int i;
231
232
for (i = 0; i < params.nwakes; i++) {
233
update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
234
update_stats(&wakeup_stats, waking_worker[i].nwoken);
235
}
236
237
}
238
239
static void toggle_done(int sig __maybe_unused,
240
siginfo_t *info __maybe_unused,
241
void *uc __maybe_unused)
242
{
243
done = true;
244
}
245
246
int bench_futex_wake_parallel(int argc, const char **argv)
247
{
248
int ret = 0;
249
unsigned int i, j;
250
struct sigaction act;
251
struct thread_data *waking_worker;
252
struct perf_cpu_map *cpu;
253
254
argc = parse_options(argc, argv, options,
255
bench_futex_wake_parallel_usage, 0);
256
if (argc) {
257
usage_with_options(bench_futex_wake_parallel_usage, options);
258
exit(EXIT_FAILURE);
259
}
260
261
memset(&act, 0, sizeof(act));
262
sigfillset(&act.sa_mask);
263
act.sa_sigaction = toggle_done;
264
sigaction(SIGINT, &act, NULL);
265
266
if (params.mlockall) {
267
if (mlockall(MCL_CURRENT | MCL_FUTURE))
268
err(EXIT_FAILURE, "mlockall");
269
}
270
271
cpu = perf_cpu_map__new_online_cpus();
272
if (!cpu)
273
err(EXIT_FAILURE, "calloc");
274
275
if (!params.nthreads)
276
params.nthreads = perf_cpu_map__nr(cpu);
277
278
/* some sanity checks */
279
if (params.nwakes > params.nthreads ||
280
!params.nwakes)
281
params.nwakes = params.nthreads;
282
283
if (params.nthreads % params.nwakes)
284
errx(EXIT_FAILURE, "Must be perfectly divisible");
285
/*
286
* Each thread will wakeup nwakes tasks in
287
* a single futex_wait call.
288
*/
289
nwakes = params.nthreads/params.nwakes;
290
291
blocked_worker = calloc(params.nthreads, sizeof(*blocked_worker));
292
if (!blocked_worker)
293
err(EXIT_FAILURE, "calloc");
294
295
if (!params.fshared)
296
futex_flag = FUTEX_PRIVATE_FLAG;
297
298
futex_set_nbuckets_param(&params);
299
300
printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
301
"futex %p), %d threads waking up %d at a time.\n\n",
302
getpid(), params.nthreads, params.fshared ? "shared":"private",
303
&futex, params.nwakes, nwakes);
304
305
init_stats(&wakeup_stats);
306
init_stats(&waketime_stats);
307
308
mutex_init(&thread_lock);
309
cond_init(&thread_parent);
310
cond_init(&thread_worker);
311
312
for (j = 0; j < bench_repeat && !done; j++) {
313
waking_worker = calloc(params.nwakes, sizeof(*waking_worker));
314
if (!waking_worker)
315
err(EXIT_FAILURE, "calloc");
316
317
/* create, launch & block all threads */
318
block_threads(blocked_worker, cpu);
319
320
/* make sure all threads are already blocked */
321
mutex_lock(&thread_lock);
322
while (threads_starting)
323
cond_wait(&thread_parent, &thread_lock);
324
cond_broadcast(&thread_worker);
325
mutex_unlock(&thread_lock);
326
327
usleep(200000);
328
329
/* Ok, all threads are patiently blocked, start waking folks up */
330
wakeup_threads(waking_worker);
331
332
for (i = 0; i < params.nthreads; i++) {
333
ret = pthread_join(blocked_worker[i], NULL);
334
if (ret)
335
err(EXIT_FAILURE, "pthread_join");
336
}
337
338
do_run_stats(waking_worker);
339
if (!params.silent)
340
print_run(waking_worker, j);
341
342
free(waking_worker);
343
}
344
345
/* cleanup & report results */
346
cond_destroy(&thread_parent);
347
cond_destroy(&thread_worker);
348
mutex_destroy(&thread_lock);
349
350
print_summary();
351
352
free(blocked_worker);
353
perf_cpu_map__put(cpu);
354
return ret;
355
}
356
#endif /* HAVE_PTHREAD_BARRIER */
357
358