Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/perf/arch/x86/util/pmu.c
26292 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <string.h>
3
#include <stdio.h>
4
#include <sys/types.h>
5
#include <dirent.h>
6
#include <fcntl.h>
7
#include <linux/stddef.h>
8
#include <linux/perf_event.h>
9
#include <linux/zalloc.h>
10
#include <api/fs/fs.h>
11
#include <api/io_dir.h>
12
#include <internal/cpumap.h>
13
#include <errno.h>
14
15
#include "../../../util/intel-pt.h"
16
#include "../../../util/intel-bts.h"
17
#include "../../../util/pmu.h"
18
#include "../../../util/fncache.h"
19
#include "../../../util/pmus.h"
20
#include "mem-events.h"
21
#include "util/debug.h"
22
#include "util/env.h"
23
#include "util/header.h"
24
25
static bool x86__is_intel_graniterapids(void)
26
{
27
static bool checked_if_graniterapids;
28
static bool is_graniterapids;
29
30
if (!checked_if_graniterapids) {
31
const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
32
char *cpuid = get_cpuid_str((struct perf_cpu){0});
33
34
is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
35
free(cpuid);
36
checked_if_graniterapids = true;
37
}
38
return is_graniterapids;
39
}
40
41
static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
42
{
43
struct perf_cpu_map *cpus;
44
char *buf = NULL;
45
size_t buf_len;
46
47
if (sysfs__read_str(sysfs_path, &buf, &buf_len) < 0)
48
return NULL;
49
50
cpus = perf_cpu_map__new(buf);
51
free(buf);
52
return cpus;
53
}
54
55
static int snc_nodes_per_l3_cache(void)
56
{
57
static bool checked_snc;
58
static int snc_nodes;
59
60
if (!checked_snc) {
61
struct perf_cpu_map *node_cpus =
62
read_sysfs_cpu_map("devices/system/node/node0/cpulist");
63
struct perf_cpu_map *cache_cpus =
64
read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
65
66
snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
67
perf_cpu_map__put(cache_cpus);
68
perf_cpu_map__put(node_cpus);
69
checked_snc = true;
70
}
71
return snc_nodes;
72
}
73
74
static bool starts_with(const char *str, const char *prefix)
75
{
76
return !strncmp(prefix, str, strlen(prefix));
77
}
78
79
static int num_chas(void)
80
{
81
static bool checked_chas;
82
static int num_chas;
83
84
if (!checked_chas) {
85
int fd = perf_pmu__event_source_devices_fd();
86
struct io_dir dir;
87
struct io_dirent64 *dent;
88
89
if (fd < 0)
90
return -1;
91
92
io_dir__init(&dir, fd);
93
94
while ((dent = io_dir__readdir(&dir)) != NULL) {
95
/* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
96
if (starts_with(dent->d_name, "uncore_cha_"))
97
num_chas++;
98
}
99
close(fd);
100
checked_chas = true;
101
}
102
return num_chas;
103
}
104
105
#define MAX_SNCS 6
106
107
static int uncore_cha_snc(struct perf_pmu *pmu)
108
{
109
// CHA SNC numbers are ordered correspond to the CHAs number.
110
unsigned int cha_num;
111
int num_cha, chas_per_node, cha_snc;
112
int snc_nodes = snc_nodes_per_l3_cache();
113
114
if (snc_nodes <= 1)
115
return 0;
116
117
num_cha = num_chas();
118
if (num_cha <= 0) {
119
pr_warning("Unexpected: no CHAs found\n");
120
return 0;
121
}
122
123
/* Compute SNC for PMU. */
124
if (sscanf(pmu->name, "uncore_cha_%u", &cha_num) != 1) {
125
pr_warning("Unexpected: unable to compute CHA number '%s'\n", pmu->name);
126
return 0;
127
}
128
chas_per_node = num_cha / snc_nodes;
129
cha_snc = cha_num / chas_per_node;
130
131
/* Range check cha_snc. for unexpected out of bounds. */
132
return cha_snc >= MAX_SNCS ? 0 : cha_snc;
133
}
134
135
static int uncore_imc_snc(struct perf_pmu *pmu)
136
{
137
// Compute the IMC SNC using lookup tables.
138
unsigned int imc_num;
139
int snc_nodes = snc_nodes_per_l3_cache();
140
const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
141
const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
142
const u8 *snc_map;
143
size_t snc_map_len;
144
145
switch (snc_nodes) {
146
case 2:
147
snc_map = snc2_map;
148
snc_map_len = ARRAY_SIZE(snc2_map);
149
break;
150
case 3:
151
snc_map = snc3_map;
152
snc_map_len = ARRAY_SIZE(snc3_map);
153
break;
154
default:
155
/* Error or no lookup support for SNC with >3 nodes. */
156
return 0;
157
}
158
159
/* Compute SNC for PMU. */
160
if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) {
161
pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
162
return 0;
163
}
164
if (imc_num >= snc_map_len) {
165
pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
166
return 0;
167
}
168
return snc_map[imc_num];
169
}
170
171
static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
172
{
173
static bool checked_cpu_adjust[MAX_SNCS];
174
static int cpu_adjust[MAX_SNCS];
175
struct perf_cpu_map *node_cpus;
176
char node_path[] = "devices/system/node/node0/cpulist";
177
178
/* Was adjust already computed? */
179
if (checked_cpu_adjust[pmu_snc])
180
return cpu_adjust[pmu_snc];
181
182
/* SNC0 doesn't need an adjust. */
183
if (pmu_snc == 0) {
184
cpu_adjust[0] = 0;
185
checked_cpu_adjust[0] = true;
186
return 0;
187
}
188
189
/*
190
* Use NUMA topology to compute first CPU of the NUMA node, we want to
191
* adjust CPU 0 to be this and similarly for other CPUs if there is >1
192
* socket.
193
*/
194
assert(pmu_snc >= 0 && pmu_snc <= 9);
195
node_path[24] += pmu_snc; // Shift node0 to be node<pmu_snc>.
196
node_cpus = read_sysfs_cpu_map(node_path);
197
cpu_adjust[pmu_snc] = perf_cpu_map__cpu(node_cpus, 0).cpu;
198
if (cpu_adjust[pmu_snc] < 0) {
199
pr_debug("Failed to read valid CPU list from <sysfs>/%s\n", node_path);
200
cpu_adjust[pmu_snc] = 0;
201
} else {
202
checked_cpu_adjust[pmu_snc] = true;
203
}
204
perf_cpu_map__put(node_cpus);
205
return cpu_adjust[pmu_snc];
206
}
207
208
static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
209
{
210
// With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
211
// topology. For example, a two socket graniterapids machine may be set
212
// up with 3-way SNC meaning there are 6 NUMA nodes that should be
213
// displayed with --per-node. The cpumask of the CHA and IMC PMUs
214
// reflects per-socket information meaning, for example, uncore_cha_60
215
// on a two socket graniterapids machine with 120 cores per socket will
216
// have a cpumask of "0,120". This cpumask needs adjusting to "40,160"
217
// to reflect that uncore_cha_60 is used for the 2nd SNC of each
218
// socket. Without the adjustment events on uncore_cha_60 will appear in
219
// node 0 and node 3 (in our example 2 socket 3-way set up), but with
220
// the adjustment they will appear in node 1 and node 4. The number of
221
// CHAs is typically larger than the number of cores. The CHA numbers
222
// are assumed to split evenly and inorder wrt core numbers. There are
223
// fewer memory IMC PMUs than cores and mapping is handled using lookup
224
// tables.
225
static struct perf_cpu_map *cha_adjusted[MAX_SNCS];
226
static struct perf_cpu_map *imc_adjusted[MAX_SNCS];
227
struct perf_cpu_map **adjusted = cha ? cha_adjusted : imc_adjusted;
228
int idx, pmu_snc, cpu_adjust;
229
struct perf_cpu cpu;
230
bool alloc;
231
232
// Cpus from the kernel holds first CPU of each socket. e.g. 0,120.
233
if (perf_cpu_map__cpu(pmu->cpus, 0).cpu != 0) {
234
pr_debug("Ignoring cpumask adjust for %s as unexpected first CPU\n", pmu->name);
235
return;
236
}
237
238
pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu);
239
if (pmu_snc == 0) {
240
// No adjustment necessary for the first SNC.
241
return;
242
}
243
244
alloc = adjusted[pmu_snc] == NULL;
245
if (alloc) {
246
// Hold onto the perf_cpu_map globally to avoid recomputation.
247
cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc);
248
adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus));
249
if (!adjusted[pmu_snc])
250
return;
251
}
252
253
perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) {
254
// Compute the new cpu map values or if not allocating, assert
255
// that they match expectations. asserts will be removed to
256
// avoid overhead in NDEBUG builds.
257
if (alloc) {
258
RC_CHK_ACCESS(adjusted[pmu_snc])->map[idx].cpu = cpu.cpu + cpu_adjust;
259
} else if (idx == 0) {
260
cpu_adjust = perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu - cpu.cpu;
261
assert(uncore_cha_imc_compute_cpu_adjust(pmu_snc) == cpu_adjust);
262
} else {
263
assert(perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu ==
264
cpu.cpu + cpu_adjust);
265
}
266
}
267
268
perf_cpu_map__put(pmu->cpus);
269
pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]);
270
}
271
272
void perf_pmu__arch_init(struct perf_pmu *pmu)
273
{
274
struct perf_pmu_caps *ldlat_cap;
275
276
#ifdef HAVE_AUXTRACE_SUPPORT
277
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
278
pmu->auxtrace = true;
279
pmu->selectable = true;
280
pmu->perf_event_attr_init_default = intel_pt_pmu_default_config;
281
}
282
if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
283
pmu->auxtrace = true;
284
pmu->selectable = true;
285
}
286
#endif
287
288
if (x86__is_amd_cpu()) {
289
if (strcmp(pmu->name, "ibs_op"))
290
return;
291
292
pmu->mem_events = perf_mem_events_amd;
293
294
if (!perf_pmu__caps_parse(pmu))
295
return;
296
297
ldlat_cap = perf_pmu__get_cap(pmu, "ldlat");
298
if (!ldlat_cap || strcmp(ldlat_cap->value, "1"))
299
return;
300
301
perf_mem_events__loads_ldlat = 0;
302
pmu->mem_events = perf_mem_events_amd_ldlat;
303
} else {
304
if (pmu->is_core) {
305
if (perf_pmu__have_event(pmu, "mem-loads-aux"))
306
pmu->mem_events = perf_mem_events_intel_aux;
307
else
308
pmu->mem_events = perf_mem_events_intel;
309
} else if (x86__is_intel_graniterapids()) {
310
if (starts_with(pmu->name, "uncore_cha_"))
311
gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
312
else if (starts_with(pmu->name, "uncore_imc_"))
313
gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
314
}
315
}
316
}
317
318