Path: blob/master/tools/perf/bench/futex-wake-parallel.c
26285 views
// SPDX-License-Identifier: GPL-2.01/*2* Copyright (C) 2015 Davidlohr Bueso.3*4* Block a bunch of threads and let parallel waker threads wakeup an5* equal amount of them. The program output reflects the avg latency6* for each individual thread to service its share of work. Ultimately7* it can be used to measure futex_wake() changes.8*/9#include "bench.h"10#include <linux/compiler.h>11#include "../util/debug.h"12#include "../util/mutex.h"1314#ifndef HAVE_PTHREAD_BARRIER15int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)16{17pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);18return 0;19}20#else /* HAVE_PTHREAD_BARRIER */21/* For the CLR_() macros */22#include <string.h>23#include <pthread.h>2425#include <signal.h>26#include "../util/stat.h"27#include <subcmd/parse-options.h>28#include <linux/kernel.h>29#include <linux/time64.h>30#include <errno.h>31#include "futex.h"32#include <perf/cpumap.h>3334#include <err.h>35#include <stdlib.h>36#include <sys/time.h>37#include <sys/mman.h>3839struct thread_data {40pthread_t worker;41unsigned int nwoken;42struct timeval runtime;43};4445static unsigned int nwakes = 1;4647/* all threads will block on the same futex -- hash bucket chaos ;) */48static u_int32_t futex = 0;4950static pthread_t *blocked_worker;51static bool done = false;52static struct mutex thread_lock;53static struct cond thread_parent, thread_worker;54static pthread_barrier_t barrier;55static struct stats waketime_stats, wakeup_stats;56static unsigned int threads_starting;57static int futex_flag = 0;5859static struct bench_futex_parameters params = {60.nbuckets = -1,61};6263static const struct option options[] = {64OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"),65OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"),66OPT_UINTEGER('w', "nwakers", ¶ms.nwakes, "Specify amount of waking threads"),67OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"),68OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, "Use shared futexes instead of private ones"),69OPT_BOOLEAN( 'm', "mlockall", ¶ms.mlockall, "Lock all current and future memory"),7071OPT_END()72};7374static const char * const bench_futex_wake_parallel_usage[] = {75"perf bench futex wake-parallel <options>",76NULL77};7879static void *waking_workerfn(void *arg)80{81struct thread_data *waker = (struct thread_data *) arg;82struct timeval start, end;8384pthread_barrier_wait(&barrier);8586gettimeofday(&start, NULL);8788waker->nwoken = futex_wake(&futex, nwakes, futex_flag);89if (waker->nwoken != nwakes)90warnx("couldn't wakeup all tasks (%d/%d)",91waker->nwoken, nwakes);9293gettimeofday(&end, NULL);94timersub(&end, &start, &waker->runtime);9596pthread_exit(NULL);97return NULL;98}99100static void wakeup_threads(struct thread_data *td)101{102unsigned int i;103pthread_attr_t thread_attr;104105pthread_attr_init(&thread_attr);106pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);107108pthread_barrier_init(&barrier, NULL, params.nwakes + 1);109110/* create and block all threads */111for (i = 0; i < params.nwakes; i++) {112/*113* Thread creation order will impact per-thread latency114* as it will affect the order to acquire the hb spinlock.115* For now let the scheduler decide.116*/117if (pthread_create(&td[i].worker, &thread_attr,118waking_workerfn, (void *)&td[i]))119err(EXIT_FAILURE, "pthread_create");120}121122pthread_barrier_wait(&barrier);123124for (i = 0; i < params.nwakes; i++)125if (pthread_join(td[i].worker, NULL))126err(EXIT_FAILURE, "pthread_join");127128pthread_barrier_destroy(&barrier);129pthread_attr_destroy(&thread_attr);130}131132static void *blocked_workerfn(void *arg __maybe_unused)133{134mutex_lock(&thread_lock);135threads_starting--;136if (!threads_starting)137cond_signal(&thread_parent);138cond_wait(&thread_worker, &thread_lock);139mutex_unlock(&thread_lock);140141while (1) { /* handle spurious wakeups */142if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)143break;144}145146pthread_exit(NULL);147return NULL;148}149150static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)151{152cpu_set_t *cpuset;153unsigned int i;154int nrcpus = cpu__max_cpu().cpu;155size_t size;156157threads_starting = params.nthreads;158159cpuset = CPU_ALLOC(nrcpus);160BUG_ON(!cpuset);161size = CPU_ALLOC_SIZE(nrcpus);162163/* create and block all threads */164for (i = 0; i < params.nthreads; i++) {165pthread_attr_t thread_attr;166167pthread_attr_init(&thread_attr);168CPU_ZERO_S(size, cpuset);169CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);170171if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {172CPU_FREE(cpuset);173err(EXIT_FAILURE, "pthread_attr_setaffinity_np");174}175176if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) {177CPU_FREE(cpuset);178err(EXIT_FAILURE, "pthread_create");179}180pthread_attr_destroy(&thread_attr);181}182CPU_FREE(cpuset);183}184185static void print_run(struct thread_data *waking_worker, unsigned int run_num)186{187unsigned int i, wakeup_avg;188double waketime_avg, waketime_stddev;189struct stats __waketime_stats, __wakeup_stats;190191init_stats(&__wakeup_stats);192init_stats(&__waketime_stats);193194for (i = 0; i < params.nwakes; i++) {195update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);196update_stats(&__wakeup_stats, waking_worker[i].nwoken);197}198199waketime_avg = avg_stats(&__waketime_stats);200waketime_stddev = stddev_stats(&__waketime_stats);201wakeup_avg = avg_stats(&__wakeup_stats);202203printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "204"in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,205params.nthreads, waketime_avg / USEC_PER_MSEC,206rel_stddev_stats(waketime_stddev, waketime_avg));207}208209static void print_summary(void)210{211unsigned int wakeup_avg;212double waketime_avg, waketime_stddev;213214waketime_avg = avg_stats(&waketime_stats);215waketime_stddev = stddev_stats(&waketime_stats);216wakeup_avg = avg_stats(&wakeup_stats);217218printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",219wakeup_avg,220params.nthreads,221waketime_avg / USEC_PER_MSEC,222rel_stddev_stats(waketime_stddev, waketime_avg));223futex_print_nbuckets(¶ms);224}225226227static void do_run_stats(struct thread_data *waking_worker)228{229unsigned int i;230231for (i = 0; i < params.nwakes; i++) {232update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);233update_stats(&wakeup_stats, waking_worker[i].nwoken);234}235236}237238static void toggle_done(int sig __maybe_unused,239siginfo_t *info __maybe_unused,240void *uc __maybe_unused)241{242done = true;243}244245int bench_futex_wake_parallel(int argc, const char **argv)246{247int ret = 0;248unsigned int i, j;249struct sigaction act;250struct thread_data *waking_worker;251struct perf_cpu_map *cpu;252253argc = parse_options(argc, argv, options,254bench_futex_wake_parallel_usage, 0);255if (argc) {256usage_with_options(bench_futex_wake_parallel_usage, options);257exit(EXIT_FAILURE);258}259260memset(&act, 0, sizeof(act));261sigfillset(&act.sa_mask);262act.sa_sigaction = toggle_done;263sigaction(SIGINT, &act, NULL);264265if (params.mlockall) {266if (mlockall(MCL_CURRENT | MCL_FUTURE))267err(EXIT_FAILURE, "mlockall");268}269270cpu = perf_cpu_map__new_online_cpus();271if (!cpu)272err(EXIT_FAILURE, "calloc");273274if (!params.nthreads)275params.nthreads = perf_cpu_map__nr(cpu);276277/* some sanity checks */278if (params.nwakes > params.nthreads ||279!params.nwakes)280params.nwakes = params.nthreads;281282if (params.nthreads % params.nwakes)283errx(EXIT_FAILURE, "Must be perfectly divisible");284/*285* Each thread will wakeup nwakes tasks in286* a single futex_wait call.287*/288nwakes = params.nthreads/params.nwakes;289290blocked_worker = calloc(params.nthreads, sizeof(*blocked_worker));291if (!blocked_worker)292err(EXIT_FAILURE, "calloc");293294if (!params.fshared)295futex_flag = FUTEX_PRIVATE_FLAG;296297futex_set_nbuckets_param(¶ms);298299printf("Run summary [PID %d]: blocking on %d threads (at [%s] "300"futex %p), %d threads waking up %d at a time.\n\n",301getpid(), params.nthreads, params.fshared ? "shared":"private",302&futex, params.nwakes, nwakes);303304init_stats(&wakeup_stats);305init_stats(&waketime_stats);306307mutex_init(&thread_lock);308cond_init(&thread_parent);309cond_init(&thread_worker);310311for (j = 0; j < bench_repeat && !done; j++) {312waking_worker = calloc(params.nwakes, sizeof(*waking_worker));313if (!waking_worker)314err(EXIT_FAILURE, "calloc");315316/* create, launch & block all threads */317block_threads(blocked_worker, cpu);318319/* make sure all threads are already blocked */320mutex_lock(&thread_lock);321while (threads_starting)322cond_wait(&thread_parent, &thread_lock);323cond_broadcast(&thread_worker);324mutex_unlock(&thread_lock);325326usleep(200000);327328/* Ok, all threads are patiently blocked, start waking folks up */329wakeup_threads(waking_worker);330331for (i = 0; i < params.nthreads; i++) {332ret = pthread_join(blocked_worker[i], NULL);333if (ret)334err(EXIT_FAILURE, "pthread_join");335}336337do_run_stats(waking_worker);338if (!params.silent)339print_run(waking_worker, j);340341free(waking_worker);342}343344/* cleanup & report results */345cond_destroy(&thread_parent);346cond_destroy(&thread_worker);347mutex_destroy(&thread_lock);348349print_summary();350351free(blocked_worker);352perf_cpu_map__put(cpu);353return ret;354}355#endif /* HAVE_PTHREAD_BARRIER */356357358