Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/power/x86/turbostat/turbostat.c
54337 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* turbostat -- show CPU frequency and C-state residency
4
* on modern Intel and AMD processors.
5
*
6
* Copyright (c) 2010 - 2026 Intel Corporation
7
* Len Brown <[email protected]>
8
*/
9
10
#define _GNU_SOURCE
11
#include MSRHEADER
12
13
// copied from arch/x86/include/asm/cpu_device_id.h
14
#define VFM_MODEL_BIT 0
15
#define VFM_FAMILY_BIT 8
16
#define VFM_VENDOR_BIT 16
17
#define VFM_RSVD_BIT 24
18
19
#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT)
20
#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT)
21
#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT)
22
23
#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT)
24
#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT)
25
#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT)
26
27
#define VFM_MAKE(_vendor, _family, _model) ( \
28
((_model) << VFM_MODEL_BIT) | \
29
((_family) << VFM_FAMILY_BIT) | \
30
((_vendor) << VFM_VENDOR_BIT) \
31
)
32
// end copied section
33
34
#define CPUID_LEAF_MODEL_ID 0x1A
35
#define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24
36
37
#define X86_VENDOR_INTEL 0
38
39
#include INTEL_FAMILY_HEADER
40
#include BUILD_BUG_HEADER
41
#include <stdarg.h>
42
#include <stdio.h>
43
#include <err.h>
44
#include <unistd.h>
45
#include <sys/types.h>
46
#include <sys/wait.h>
47
#include <sys/stat.h>
48
#include <sys/select.h>
49
#include <sys/resource.h>
50
#include <sys/mman.h>
51
#include <fcntl.h>
52
#include <signal.h>
53
#include <sys/time.h>
54
#include <stdlib.h>
55
#include <getopt.h>
56
#include <dirent.h>
57
#include <string.h>
58
#include <ctype.h>
59
#include <sched.h>
60
#include <time.h>
61
#include <cpuid.h>
62
#include <sys/capability.h>
63
#include <errno.h>
64
#include <math.h>
65
#include <linux/perf_event.h>
66
#include <asm/unistd.h>
67
#include <stdbool.h>
68
#include <assert.h>
69
#include <linux/kernel.h>
70
#include <limits.h>
71
72
#define UNUSED(x) (void)(x)
73
74
/*
75
* This list matches the column headers, except
76
* 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
77
* 2. Core and CPU are moved to the end, we can't have strings that contain them
78
* matching on them for --show and --hide.
79
*/
80
81
/*
82
* buffer size used by sscanf() for added column names
83
* Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
84
*/
85
#define NAME_BYTES 20
86
#define PATH_BYTES 128
87
#define PERF_NAME_BYTES 128
88
89
#define MAX_NOFILE 0x8000
90
91
#define COUNTER_KIND_PERF_PREFIX "perf/"
92
#define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX)
93
#define PERF_DEV_NAME_BYTES 32
94
#define PERF_EVT_NAME_BYTES 32
95
96
#define INTEL_ECORE_TYPE 0x20
97
#define INTEL_PCORE_TYPE 0x40
98
99
#define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL))
100
101
enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
102
enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M };
103
enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE };
104
enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR };
105
106
struct perf_counter_info {
107
struct perf_counter_info *next;
108
109
/* How to open the counter / What counter it is. */
110
char device[PERF_DEV_NAME_BYTES];
111
char event[PERF_EVT_NAME_BYTES];
112
113
/* How to show/format the counter. */
114
char name[PERF_NAME_BYTES];
115
unsigned int width;
116
enum counter_scope scope;
117
enum counter_type type;
118
enum counter_format format;
119
double scale;
120
121
/* For reading the counter. */
122
int *fd_perf_per_domain;
123
size_t num_domains;
124
};
125
126
struct sysfs_path {
127
char path[PATH_BYTES];
128
int id;
129
struct sysfs_path *next;
130
};
131
132
struct msr_counter {
133
unsigned int msr_num;
134
char name[NAME_BYTES];
135
struct sysfs_path *sp;
136
unsigned int width;
137
enum counter_type type;
138
enum counter_format format;
139
struct msr_counter *next;
140
unsigned int flags;
141
#define FLAGS_HIDE (1 << 0)
142
#define FLAGS_SHOW (1 << 1)
143
#define SYSFS_PERCPU (1 << 1)
144
};
145
static int use_android_msr_path;
146
147
struct msr_counter bic[] = {
148
{ 0x0, "usec", NULL, 0, 0, 0, NULL, 0 },
149
{ 0x0, "Time_Of_Day_Seconds", NULL, 0, 0, 0, NULL, 0 },
150
{ 0x0, "Package", NULL, 0, 0, 0, NULL, 0 },
151
{ 0x0, "Node", NULL, 0, 0, 0, NULL, 0 },
152
{ 0x0, "Avg_MHz", NULL, 0, 0, 0, NULL, 0 },
153
{ 0x0, "Busy%", NULL, 0, 0, 0, NULL, 0 },
154
{ 0x0, "Bzy_MHz", NULL, 0, 0, 0, NULL, 0 },
155
{ 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 },
156
{ 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 },
157
{ 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 },
158
{ 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 },
159
{ 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 },
160
{ 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 },
161
{ 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 },
162
{ 0x0, "CPU%c7", NULL, 0, 0, 0, NULL, 0 },
163
{ 0x0, "ThreadC", NULL, 0, 0, 0, NULL, 0 },
164
{ 0x0, "CoreTmp", NULL, 0, 0, 0, NULL, 0 },
165
{ 0x0, "CoreCnt", NULL, 0, 0, 0, NULL, 0 },
166
{ 0x0, "PkgTmp", NULL, 0, 0, 0, NULL, 0 },
167
{ 0x0, "GFX%rc6", NULL, 0, 0, 0, NULL, 0 },
168
{ 0x0, "GFXMHz", NULL, 0, 0, 0, NULL, 0 },
169
{ 0x0, "Pkg%pc2", NULL, 0, 0, 0, NULL, 0 },
170
{ 0x0, "Pkg%pc3", NULL, 0, 0, 0, NULL, 0 },
171
{ 0x0, "Pkg%pc6", NULL, 0, 0, 0, NULL, 0 },
172
{ 0x0, "Pkg%pc7", NULL, 0, 0, 0, NULL, 0 },
173
{ 0x0, "Pkg%pc8", NULL, 0, 0, 0, NULL, 0 },
174
{ 0x0, "Pkg%pc9", NULL, 0, 0, 0, NULL, 0 },
175
{ 0x0, "Pk%pc10", NULL, 0, 0, 0, NULL, 0 },
176
{ 0x0, "CPU%LPI", NULL, 0, 0, 0, NULL, 0 },
177
{ 0x0, "SYS%LPI", NULL, 0, 0, 0, NULL, 0 },
178
{ 0x0, "PkgWatt", NULL, 0, 0, 0, NULL, 0 },
179
{ 0x0, "CorWatt", NULL, 0, 0, 0, NULL, 0 },
180
{ 0x0, "GFXWatt", NULL, 0, 0, 0, NULL, 0 },
181
{ 0x0, "PkgCnt", NULL, 0, 0, 0, NULL, 0 },
182
{ 0x0, "RAMWatt", NULL, 0, 0, 0, NULL, 0 },
183
{ 0x0, "PKG_%", NULL, 0, 0, 0, NULL, 0 },
184
{ 0x0, "RAM_%", NULL, 0, 0, 0, NULL, 0 },
185
{ 0x0, "Pkg_J", NULL, 0, 0, 0, NULL, 0 },
186
{ 0x0, "Cor_J", NULL, 0, 0, 0, NULL, 0 },
187
{ 0x0, "GFX_J", NULL, 0, 0, 0, NULL, 0 },
188
{ 0x0, "RAM_J", NULL, 0, 0, 0, NULL, 0 },
189
{ 0x0, "Mod%c6", NULL, 0, 0, 0, NULL, 0 },
190
{ 0x0, "Totl%C0", NULL, 0, 0, 0, NULL, 0 },
191
{ 0x0, "Any%C0", NULL, 0, 0, 0, NULL, 0 },
192
{ 0x0, "GFX%C0", NULL, 0, 0, 0, NULL, 0 },
193
{ 0x0, "CPUGFX%", NULL, 0, 0, 0, NULL, 0 },
194
{ 0x0, "Core", NULL, 0, 0, 0, NULL, 0 },
195
{ 0x0, "CPU", NULL, 0, 0, 0, NULL, 0 },
196
{ 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 },
197
{ 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 },
198
{ 0x0, "Die", NULL, 0, 0, 0, NULL, 0 },
199
{ 0x0, "L3", NULL, 0, 0, 0, NULL, 0 },
200
{ 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 },
201
{ 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 },
202
{ 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 },
203
{ 0x0, "UncMHz", NULL, 0, 0, 0, NULL, 0 },
204
{ 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 },
205
{ 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 },
206
{ 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 },
207
{ 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 },
208
{ 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 },
209
{ 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
210
{ 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
211
{ 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
212
{ 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 },
213
{ 0x0, "LLCMRPS", NULL, 0, 0, 0, NULL, 0 },
214
{ 0x0, "LLC%hit", NULL, 0, 0, 0, NULL, 0 },
215
{ 0x0, "L2MRPS", NULL, 0, 0, 0, NULL, 0 },
216
{ 0x0, "L2%hit", NULL, 0, 0, 0, NULL, 0 },
217
};
218
219
/* n.b. bic_names must match the order in bic[], above */
220
enum bic_names {
221
BIC_USEC,
222
BIC_TOD,
223
BIC_Package,
224
BIC_Node,
225
BIC_Avg_MHz,
226
BIC_Busy,
227
BIC_Bzy_MHz,
228
BIC_TSC_MHz,
229
BIC_IRQ,
230
BIC_SMI,
231
BIC_cpuidle,
232
BIC_CPU_c1,
233
BIC_CPU_c3,
234
BIC_CPU_c6,
235
BIC_CPU_c7,
236
BIC_ThreadC,
237
BIC_CoreTmp,
238
BIC_CoreCnt,
239
BIC_PkgTmp,
240
BIC_GFX_rc6,
241
BIC_GFXMHz,
242
BIC_Pkgpc2,
243
BIC_Pkgpc3,
244
BIC_Pkgpc6,
245
BIC_Pkgpc7,
246
BIC_Pkgpc8,
247
BIC_Pkgpc9,
248
BIC_Pkgpc10,
249
BIC_CPU_LPI,
250
BIC_SYS_LPI,
251
BIC_PkgWatt,
252
BIC_CorWatt,
253
BIC_GFXWatt,
254
BIC_PkgCnt,
255
BIC_RAMWatt,
256
BIC_PKG__,
257
BIC_RAM__,
258
BIC_Pkg_J,
259
BIC_Cor_J,
260
BIC_GFX_J,
261
BIC_RAM_J,
262
BIC_Mod_c6,
263
BIC_Totl_c0,
264
BIC_Any_c0,
265
BIC_GFX_c0,
266
BIC_CPUGFX,
267
BIC_Core,
268
BIC_CPU,
269
BIC_APIC,
270
BIC_X2APIC,
271
BIC_Die,
272
BIC_L3,
273
BIC_GFXACTMHz,
274
BIC_IPC,
275
BIC_CORE_THROT_CNT,
276
BIC_UNCORE_MHZ,
277
BIC_SAM_mc6,
278
BIC_SAMMHz,
279
BIC_SAMACTMHz,
280
BIC_Diec6,
281
BIC_SysWatt,
282
BIC_Sys_J,
283
BIC_NMI,
284
BIC_CPU_c1e,
285
BIC_pct_idle,
286
BIC_LLC_MRPS,
287
BIC_LLC_HIT,
288
BIC_L2_MRPS,
289
BIC_L2_HIT,
290
MAX_BIC
291
};
292
293
void print_bic_set(char *s, cpu_set_t *set)
294
{
295
int i;
296
297
assert(MAX_BIC < CPU_SETSIZE);
298
299
printf("%s:", s);
300
301
for (i = 0; i < MAX_BIC; ++i) {
302
303
if (CPU_ISSET(i, set))
304
printf(" %s", bic[i].name);
305
}
306
putchar('\n');
307
}
308
309
static cpu_set_t bic_group_topology;
310
static cpu_set_t bic_group_thermal_pwr;
311
static cpu_set_t bic_group_frequency;
312
static cpu_set_t bic_group_hw_idle;
313
static cpu_set_t bic_group_sw_idle;
314
static cpu_set_t bic_group_idle;
315
static cpu_set_t bic_group_cache;
316
static cpu_set_t bic_group_other;
317
static cpu_set_t bic_group_disabled_by_default;
318
static cpu_set_t bic_enabled;
319
static cpu_set_t bic_present;
320
321
/* modify */
322
#define BIC_INIT(set) CPU_ZERO(set)
323
324
#define SET_BIC(COUNTER_NUMBER, set) CPU_SET(COUNTER_NUMBER, set)
325
#define CLR_BIC(COUNTER_NUMBER, set) CPU_CLR(COUNTER_NUMBER, set)
326
327
#define BIC_PRESENT(COUNTER_NUMBER) SET_BIC(COUNTER_NUMBER, &bic_present)
328
#define BIC_NOT_PRESENT(COUNTER_NUMBER) CPU_CLR(COUNTER_NUMBER, &bic_present)
329
330
/* test */
331
#define BIC_IS_ENABLED(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_enabled)
332
#define DO_BIC_READ(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_present)
333
#define DO_BIC(COUNTER_NUMBER) (CPU_ISSET(COUNTER_NUMBER, &bic_enabled) && CPU_ISSET(COUNTER_NUMBER, &bic_present))
334
335
static void bic_set_all(cpu_set_t *set)
336
{
337
int i;
338
339
assert(MAX_BIC < CPU_SETSIZE);
340
341
for (i = 0; i < MAX_BIC; ++i)
342
SET_BIC(i, set);
343
}
344
345
/*
346
* bic_clear_bits()
347
* clear all the bits from "clr" in "dst"
348
*/
349
static void bic_clear_bits(cpu_set_t *dst, cpu_set_t *clr)
350
{
351
int i;
352
353
assert(MAX_BIC < CPU_SETSIZE);
354
355
for (i = 0; i < MAX_BIC; ++i)
356
if (CPU_ISSET(i, clr))
357
CLR_BIC(i, dst);
358
}
359
360
static void bic_groups_init(void)
361
{
362
BIC_INIT(&bic_group_topology);
363
SET_BIC(BIC_Package, &bic_group_topology);
364
SET_BIC(BIC_Node, &bic_group_topology);
365
SET_BIC(BIC_CoreCnt, &bic_group_topology);
366
SET_BIC(BIC_PkgCnt, &bic_group_topology);
367
SET_BIC(BIC_Core, &bic_group_topology);
368
SET_BIC(BIC_CPU, &bic_group_topology);
369
SET_BIC(BIC_Die, &bic_group_topology);
370
SET_BIC(BIC_L3, &bic_group_topology);
371
372
BIC_INIT(&bic_group_thermal_pwr);
373
SET_BIC(BIC_CoreTmp, &bic_group_thermal_pwr);
374
SET_BIC(BIC_PkgTmp, &bic_group_thermal_pwr);
375
SET_BIC(BIC_PkgWatt, &bic_group_thermal_pwr);
376
SET_BIC(BIC_CorWatt, &bic_group_thermal_pwr);
377
SET_BIC(BIC_GFXWatt, &bic_group_thermal_pwr);
378
SET_BIC(BIC_RAMWatt, &bic_group_thermal_pwr);
379
SET_BIC(BIC_PKG__, &bic_group_thermal_pwr);
380
SET_BIC(BIC_RAM__, &bic_group_thermal_pwr);
381
SET_BIC(BIC_SysWatt, &bic_group_thermal_pwr);
382
383
BIC_INIT(&bic_group_frequency);
384
SET_BIC(BIC_Avg_MHz, &bic_group_frequency);
385
SET_BIC(BIC_Busy, &bic_group_frequency);
386
SET_BIC(BIC_Bzy_MHz, &bic_group_frequency);
387
SET_BIC(BIC_TSC_MHz, &bic_group_frequency);
388
SET_BIC(BIC_GFXMHz, &bic_group_frequency);
389
SET_BIC(BIC_GFXACTMHz, &bic_group_frequency);
390
SET_BIC(BIC_SAMMHz, &bic_group_frequency);
391
SET_BIC(BIC_SAMACTMHz, &bic_group_frequency);
392
SET_BIC(BIC_UNCORE_MHZ, &bic_group_frequency);
393
394
BIC_INIT(&bic_group_hw_idle);
395
SET_BIC(BIC_Busy, &bic_group_hw_idle);
396
SET_BIC(BIC_CPU_c1, &bic_group_hw_idle);
397
SET_BIC(BIC_CPU_c3, &bic_group_hw_idle);
398
SET_BIC(BIC_CPU_c6, &bic_group_hw_idle);
399
SET_BIC(BIC_CPU_c7, &bic_group_hw_idle);
400
SET_BIC(BIC_GFX_rc6, &bic_group_hw_idle);
401
SET_BIC(BIC_Pkgpc2, &bic_group_hw_idle);
402
SET_BIC(BIC_Pkgpc3, &bic_group_hw_idle);
403
SET_BIC(BIC_Pkgpc6, &bic_group_hw_idle);
404
SET_BIC(BIC_Pkgpc7, &bic_group_hw_idle);
405
SET_BIC(BIC_Pkgpc8, &bic_group_hw_idle);
406
SET_BIC(BIC_Pkgpc9, &bic_group_hw_idle);
407
SET_BIC(BIC_Pkgpc10, &bic_group_hw_idle);
408
SET_BIC(BIC_CPU_LPI, &bic_group_hw_idle);
409
SET_BIC(BIC_SYS_LPI, &bic_group_hw_idle);
410
SET_BIC(BIC_Mod_c6, &bic_group_hw_idle);
411
SET_BIC(BIC_Totl_c0, &bic_group_hw_idle);
412
SET_BIC(BIC_Any_c0, &bic_group_hw_idle);
413
SET_BIC(BIC_GFX_c0, &bic_group_hw_idle);
414
SET_BIC(BIC_CPUGFX, &bic_group_hw_idle);
415
SET_BIC(BIC_SAM_mc6, &bic_group_hw_idle);
416
SET_BIC(BIC_Diec6, &bic_group_hw_idle);
417
418
BIC_INIT(&bic_group_sw_idle);
419
SET_BIC(BIC_Busy, &bic_group_sw_idle);
420
SET_BIC(BIC_cpuidle, &bic_group_sw_idle);
421
SET_BIC(BIC_pct_idle, &bic_group_sw_idle);
422
423
BIC_INIT(&bic_group_idle);
424
425
CPU_OR(&bic_group_idle, &bic_group_idle, &bic_group_hw_idle);
426
SET_BIC(BIC_pct_idle, &bic_group_idle);
427
428
BIC_INIT(&bic_group_cache);
429
SET_BIC(BIC_LLC_MRPS, &bic_group_cache);
430
SET_BIC(BIC_LLC_HIT, &bic_group_cache);
431
SET_BIC(BIC_L2_MRPS, &bic_group_cache);
432
SET_BIC(BIC_L2_HIT, &bic_group_cache);
433
434
BIC_INIT(&bic_group_other);
435
SET_BIC(BIC_IRQ, &bic_group_other);
436
SET_BIC(BIC_NMI, &bic_group_other);
437
SET_BIC(BIC_SMI, &bic_group_other);
438
SET_BIC(BIC_ThreadC, &bic_group_other);
439
SET_BIC(BIC_CoreTmp, &bic_group_other);
440
SET_BIC(BIC_IPC, &bic_group_other);
441
442
BIC_INIT(&bic_group_disabled_by_default);
443
SET_BIC(BIC_USEC, &bic_group_disabled_by_default);
444
SET_BIC(BIC_TOD, &bic_group_disabled_by_default);
445
SET_BIC(BIC_cpuidle, &bic_group_disabled_by_default);
446
SET_BIC(BIC_APIC, &bic_group_disabled_by_default);
447
SET_BIC(BIC_X2APIC, &bic_group_disabled_by_default);
448
449
BIC_INIT(&bic_enabled);
450
bic_set_all(&bic_enabled);
451
bic_clear_bits(&bic_enabled, &bic_group_disabled_by_default);
452
453
BIC_INIT(&bic_present);
454
SET_BIC(BIC_USEC, &bic_present);
455
SET_BIC(BIC_TOD, &bic_present);
456
SET_BIC(BIC_cpuidle, &bic_present);
457
SET_BIC(BIC_APIC, &bic_present);
458
SET_BIC(BIC_X2APIC, &bic_present);
459
SET_BIC(BIC_pct_idle, &bic_present);
460
}
461
462
/*
463
* MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
464
* If you change the values, note they are used both in comparisons
465
* (>= PCL__7) and to index pkg_cstate_limit_strings[].
466
*/
467
#define PCLUKN 0 /* Unknown */
468
#define PCLRSV 1 /* Reserved */
469
#define PCL__0 2 /* PC0 */
470
#define PCL__1 3 /* PC1 */
471
#define PCL__2 4 /* PC2 */
472
#define PCL__3 5 /* PC3 */
473
#define PCL__4 6 /* PC4 */
474
#define PCL__6 7 /* PC6 */
475
#define PCL_6N 8 /* PC6 No Retention */
476
#define PCL_6R 9 /* PC6 Retention */
477
#define PCL__7 10 /* PC7 */
478
#define PCL_7S 11 /* PC7 Shrink */
479
#define PCL__8 12 /* PC8 */
480
#define PCL__9 13 /* PC9 */
481
#define PCL_10 14 /* PC10 */
482
#define PCLUNL 15 /* Unlimited */
483
484
char *proc_stat = "/proc/stat";
485
FILE *outf;
486
int *fd_percpu;
487
int *fd_instr_count_percpu;
488
int *fd_llc_percpu;
489
int *fd_l2_percpu;
490
struct timeval interval_tv = { 5, 0 };
491
struct timespec interval_ts = { 5, 0 };
492
493
unsigned int num_iterations;
494
unsigned int header_iterations;
495
unsigned int debug;
496
unsigned int quiet;
497
unsigned int shown;
498
unsigned int sums_need_wide_columns;
499
unsigned int rapl_joules;
500
unsigned int valid_rapl_msrs;
501
unsigned int summary_only;
502
unsigned int list_header_only;
503
unsigned int dump_only;
504
unsigned int force_load;
505
unsigned int cpuid_has_aperf_mperf;
506
unsigned int cpuid_has_hv;
507
unsigned int has_aperf_access;
508
unsigned int has_epb;
509
unsigned int has_turbo;
510
unsigned int is_hybrid;
511
unsigned int units = 1000000; /* MHz etc */
512
unsigned int genuine_intel;
513
unsigned int authentic_amd;
514
unsigned int hygon_genuine;
515
unsigned int max_level, max_extended_level;
516
unsigned int has_invariant_tsc;
517
unsigned int aperf_mperf_multiplier = 1;
518
double bclk;
519
double base_hz;
520
unsigned int has_base_hz;
521
double tsc_tweak = 1.0;
522
unsigned int show_pkg_only;
523
unsigned int show_core_only;
524
char *output_buffer, *outp;
525
unsigned int do_dts;
526
unsigned int do_ptm;
527
unsigned int do_ipc;
528
unsigned long long cpuidle_cur_cpu_lpi_us;
529
unsigned long long cpuidle_cur_sys_lpi_us;
530
unsigned int tj_max;
531
unsigned int tj_max_override;
532
double rapl_power_units, rapl_time_units;
533
double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units;
534
double rapl_joule_counter_range;
535
unsigned int crystal_hz;
536
unsigned long long tsc_hz;
537
int master_cpu;
538
unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
539
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
540
unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
541
unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
542
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
543
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
544
unsigned int first_counter_read = 1;
545
546
static struct timeval procsysfs_tv_begin;
547
548
int ignore_stdin;
549
bool no_msr;
550
bool no_perf;
551
552
enum gfx_sysfs_idx {
553
GFX_rc6,
554
GFX_MHz,
555
GFX_ACTMHz,
556
SAM_mc6,
557
SAM_MHz,
558
SAM_ACTMHz,
559
GFX_MAX
560
};
561
562
struct gfx_sysfs_info {
563
FILE *fp;
564
unsigned int val;
565
unsigned long long val_ull;
566
};
567
568
static struct gfx_sysfs_info gfx_info[GFX_MAX];
569
570
int get_msr(int cpu, off_t offset, unsigned long long *msr);
571
int add_counter(unsigned int msr_num, char *path, char *name,
572
unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int package_num);
573
574
/* Model specific support Start */
575
576
/* List of features that may diverge among different platforms */
577
struct platform_features {
578
bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */
579
bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */
580
bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
581
bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
582
int bclk_freq; /* CPU base clock */
583
int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */
584
int supported_cstates; /* Core cstates and Package cstates supported */
585
int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */
586
bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
587
bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
588
bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */
589
bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */
590
bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
591
bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */
592
bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */
593
bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
594
bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
595
int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
596
int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
597
int plat_rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
598
bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
599
bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
600
bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
601
bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */
602
int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */
603
int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
604
bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
605
bool need_perf_multiplier; /* mperf/aperf multiplier */
606
};
607
608
struct platform_data {
609
unsigned int vfm;
610
const struct platform_features *features;
611
};
612
613
/* For BCLK */
614
enum bclk_freq {
615
BCLK_100MHZ = 1,
616
BCLK_133MHZ,
617
BCLK_SLV,
618
};
619
620
#define SLM_BCLK_FREQS 5
621
double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
622
623
double slm_bclk(void)
624
{
625
unsigned long long msr = 3;
626
unsigned int i;
627
double freq;
628
629
if (get_msr(master_cpu, MSR_FSB_FREQ, &msr))
630
fprintf(outf, "SLM BCLK: unknown\n");
631
632
i = msr & 0xf;
633
if (i >= SLM_BCLK_FREQS) {
634
fprintf(outf, "SLM BCLK[%d] invalid\n", i);
635
i = 3;
636
}
637
freq = slm_freq_table[i];
638
639
if (!quiet)
640
fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
641
642
return freq;
643
}
644
645
/* For Package cstate limit */
646
enum package_cstate_limit {
647
CST_LIMIT_NHM = 1,
648
CST_LIMIT_SNB,
649
CST_LIMIT_HSW,
650
CST_LIMIT_SKX,
651
CST_LIMIT_ICX,
652
CST_LIMIT_SLV,
653
CST_LIMIT_AMT,
654
CST_LIMIT_KNL,
655
CST_LIMIT_GMT,
656
};
657
658
/* For Turbo Ratio Limit MSRs */
659
enum turbo_ratio_limit_msrs {
660
TRL_BASE = BIT(0),
661
TRL_LIMIT1 = BIT(1),
662
TRL_LIMIT2 = BIT(2),
663
TRL_ATOM = BIT(3),
664
TRL_KNL = BIT(4),
665
TRL_CORECOUNT = BIT(5),
666
};
667
668
/* For Perf Limit Reason MSRs */
669
enum perf_limit_reason_msrs {
670
PLR_CORE = BIT(0),
671
PLR_GFX = BIT(1),
672
PLR_RING = BIT(2),
673
};
674
675
/* For RAPL MSRs */
676
enum rapl_msrs {
677
RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */
678
RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */
679
RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */
680
RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */
681
RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
682
RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */
683
RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
684
RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */
685
RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
686
RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */
687
RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */
688
RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
689
RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */
690
RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */
691
RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
692
RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
693
RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
694
RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */
695
RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */
696
};
697
698
#define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
699
#define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
700
#define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
701
#define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
702
#define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT)
703
704
#define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
705
#define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
706
#define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
707
#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLICY)
708
709
#define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
710
711
/* For Cstates */
712
enum cstates {
713
CC1 = BIT(0),
714
CC3 = BIT(1),
715
CC6 = BIT(2),
716
CC7 = BIT(3),
717
PC2 = BIT(4),
718
PC3 = BIT(5),
719
PC6 = BIT(6),
720
PC7 = BIT(7),
721
PC8 = BIT(8),
722
PC9 = BIT(9),
723
PC10 = BIT(10),
724
};
725
726
static const struct platform_features nhm_features = {
727
.has_msr_misc_pwr_mgmt = 1,
728
.has_nhm_msrs = 1,
729
.bclk_freq = BCLK_133MHZ,
730
.supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
731
.cst_limit = CST_LIMIT_NHM,
732
.trl_msrs = TRL_BASE,
733
};
734
735
static const struct platform_features nhx_features = {
736
.has_msr_misc_pwr_mgmt = 1,
737
.has_nhm_msrs = 1,
738
.bclk_freq = BCLK_133MHZ,
739
.supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
740
.cst_limit = CST_LIMIT_NHM,
741
};
742
743
static const struct platform_features snb_features = {
744
.has_msr_misc_feature_control = 1,
745
.has_msr_misc_pwr_mgmt = 1,
746
.has_nhm_msrs = 1,
747
.bclk_freq = BCLK_100MHZ,
748
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
749
.cst_limit = CST_LIMIT_SNB,
750
.has_irtl_msrs = 1,
751
.trl_msrs = TRL_BASE,
752
.plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
753
};
754
755
static const struct platform_features snx_features = {
756
.has_msr_misc_feature_control = 1,
757
.has_msr_misc_pwr_mgmt = 1,
758
.has_nhm_msrs = 1,
759
.bclk_freq = BCLK_100MHZ,
760
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
761
.cst_limit = CST_LIMIT_SNB,
762
.has_irtl_msrs = 1,
763
.trl_msrs = TRL_BASE,
764
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
765
};
766
767
static const struct platform_features ivb_features = {
768
.has_msr_misc_feature_control = 1,
769
.has_msr_misc_pwr_mgmt = 1,
770
.has_nhm_msrs = 1,
771
.has_config_tdp = 1,
772
.bclk_freq = BCLK_100MHZ,
773
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
774
.cst_limit = CST_LIMIT_SNB,
775
.has_irtl_msrs = 1,
776
.trl_msrs = TRL_BASE,
777
.plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
778
};
779
780
static const struct platform_features ivx_features = {
781
.has_msr_misc_feature_control = 1,
782
.has_msr_misc_pwr_mgmt = 1,
783
.has_nhm_msrs = 1,
784
.bclk_freq = BCLK_100MHZ,
785
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
786
.cst_limit = CST_LIMIT_SNB,
787
.has_irtl_msrs = 1,
788
.trl_msrs = TRL_BASE | TRL_LIMIT1,
789
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
790
};
791
792
static const struct platform_features hsw_features = {
793
.has_msr_misc_feature_control = 1,
794
.has_msr_misc_pwr_mgmt = 1,
795
.has_nhm_msrs = 1,
796
.has_config_tdp = 1,
797
.bclk_freq = BCLK_100MHZ,
798
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
799
.cst_limit = CST_LIMIT_HSW,
800
.has_irtl_msrs = 1,
801
.trl_msrs = TRL_BASE,
802
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
803
.plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
804
};
805
806
static const struct platform_features hsx_features = {
807
.has_msr_misc_feature_control = 1,
808
.has_msr_misc_pwr_mgmt = 1,
809
.has_nhm_msrs = 1,
810
.has_config_tdp = 1,
811
.bclk_freq = BCLK_100MHZ,
812
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
813
.cst_limit = CST_LIMIT_HSW,
814
.has_irtl_msrs = 1,
815
.trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
816
.plr_msrs = PLR_CORE | PLR_RING,
817
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
818
.has_fixed_rapl_unit = 1,
819
};
820
821
static const struct platform_features hswl_features = {
822
.has_msr_misc_feature_control = 1,
823
.has_msr_misc_pwr_mgmt = 1,
824
.has_nhm_msrs = 1,
825
.has_config_tdp = 1,
826
.bclk_freq = BCLK_100MHZ,
827
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
828
.cst_limit = CST_LIMIT_HSW,
829
.has_irtl_msrs = 1,
830
.trl_msrs = TRL_BASE,
831
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
832
.plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
833
};
834
835
static const struct platform_features hswg_features = {
836
.has_msr_misc_feature_control = 1,
837
.has_msr_misc_pwr_mgmt = 1,
838
.has_nhm_msrs = 1,
839
.has_config_tdp = 1,
840
.bclk_freq = BCLK_100MHZ,
841
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
842
.cst_limit = CST_LIMIT_HSW,
843
.has_irtl_msrs = 1,
844
.trl_msrs = TRL_BASE,
845
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
846
.plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
847
};
848
849
static const struct platform_features bdw_features = {
850
.has_msr_misc_feature_control = 1,
851
.has_msr_misc_pwr_mgmt = 1,
852
.has_nhm_msrs = 1,
853
.has_config_tdp = 1,
854
.bclk_freq = BCLK_100MHZ,
855
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
856
.cst_limit = CST_LIMIT_HSW,
857
.has_irtl_msrs = 1,
858
.trl_msrs = TRL_BASE,
859
.plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
860
};
861
862
static const struct platform_features bdwg_features = {
863
.has_msr_misc_feature_control = 1,
864
.has_msr_misc_pwr_mgmt = 1,
865
.has_nhm_msrs = 1,
866
.has_config_tdp = 1,
867
.bclk_freq = BCLK_100MHZ,
868
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
869
.cst_limit = CST_LIMIT_HSW,
870
.has_irtl_msrs = 1,
871
.trl_msrs = TRL_BASE,
872
.plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
873
};
874
875
static const struct platform_features bdx_features = {
876
.has_msr_misc_feature_control = 1,
877
.has_msr_misc_pwr_mgmt = 1,
878
.has_nhm_msrs = 1,
879
.has_config_tdp = 1,
880
.bclk_freq = BCLK_100MHZ,
881
.supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
882
.cst_limit = CST_LIMIT_HSW,
883
.has_irtl_msrs = 1,
884
.has_cst_auto_convension = 1,
885
.trl_msrs = TRL_BASE,
886
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
887
.has_fixed_rapl_unit = 1,
888
};
889
890
static const struct platform_features skl_features = {
891
.has_msr_misc_feature_control = 1,
892
.has_msr_misc_pwr_mgmt = 1,
893
.has_nhm_msrs = 1,
894
.has_config_tdp = 1,
895
.bclk_freq = BCLK_100MHZ,
896
.crystal_freq = 24000000,
897
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
898
.cst_limit = CST_LIMIT_HSW,
899
.has_irtl_msrs = 1,
900
.has_ext_cst_msrs = 1,
901
.trl_msrs = TRL_BASE,
902
.tcc_offset_bits = 6,
903
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
904
.enable_tsc_tweak = 1,
905
};
906
907
static const struct platform_features cnl_features = {
908
.has_msr_misc_feature_control = 1,
909
.has_msr_misc_pwr_mgmt = 1,
910
.has_nhm_msrs = 1,
911
.has_config_tdp = 1,
912
.bclk_freq = BCLK_100MHZ,
913
.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
914
.cst_limit = CST_LIMIT_HSW,
915
.has_irtl_msrs = 1,
916
.has_msr_core_c1_res = 1,
917
.has_ext_cst_msrs = 1,
918
.trl_msrs = TRL_BASE,
919
.tcc_offset_bits = 6,
920
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
921
.enable_tsc_tweak = 1,
922
};
923
924
/* Copied from cnl_features, with PC7/PC9 removed */
925
static const struct platform_features adl_features = {
926
.has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control,
927
.has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt,
928
.has_nhm_msrs = cnl_features.has_nhm_msrs,
929
.has_config_tdp = cnl_features.has_config_tdp,
930
.bclk_freq = cnl_features.bclk_freq,
931
.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
932
.cst_limit = cnl_features.cst_limit,
933
.has_irtl_msrs = cnl_features.has_irtl_msrs,
934
.has_msr_core_c1_res = cnl_features.has_msr_core_c1_res,
935
.has_ext_cst_msrs = cnl_features.has_ext_cst_msrs,
936
.trl_msrs = cnl_features.trl_msrs,
937
.tcc_offset_bits = cnl_features.tcc_offset_bits,
938
.plat_rapl_msrs = cnl_features.plat_rapl_msrs,
939
.enable_tsc_tweak = cnl_features.enable_tsc_tweak,
940
};
941
942
/* Copied from adl_features, with PC3/PC8 removed */
943
static const struct platform_features lnl_features = {
944
.has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control,
945
.has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt,
946
.has_nhm_msrs = adl_features.has_nhm_msrs,
947
.has_config_tdp = adl_features.has_config_tdp,
948
.bclk_freq = adl_features.bclk_freq,
949
.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10,
950
.cst_limit = adl_features.cst_limit,
951
.has_irtl_msrs = adl_features.has_irtl_msrs,
952
.has_msr_core_c1_res = adl_features.has_msr_core_c1_res,
953
.has_ext_cst_msrs = adl_features.has_ext_cst_msrs,
954
.trl_msrs = adl_features.trl_msrs,
955
.tcc_offset_bits = adl_features.tcc_offset_bits,
956
.plat_rapl_msrs = adl_features.plat_rapl_msrs,
957
.enable_tsc_tweak = adl_features.enable_tsc_tweak,
958
};
959
960
static const struct platform_features skx_features = {
961
.has_msr_misc_feature_control = 1,
962
.has_msr_misc_pwr_mgmt = 1,
963
.has_nhm_msrs = 1,
964
.has_config_tdp = 1,
965
.bclk_freq = BCLK_100MHZ,
966
.supported_cstates = CC1 | CC6 | PC2 | PC6,
967
.cst_limit = CST_LIMIT_SKX,
968
.has_irtl_msrs = 1,
969
.has_cst_auto_convension = 1,
970
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
971
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
972
.has_fixed_rapl_unit = 1,
973
};
974
975
static const struct platform_features icx_features = {
976
.has_msr_misc_feature_control = 1,
977
.has_msr_misc_pwr_mgmt = 1,
978
.has_nhm_msrs = 1,
979
.has_config_tdp = 1,
980
.bclk_freq = BCLK_100MHZ,
981
.supported_cstates = CC1 | CC6 | PC2 | PC6,
982
.cst_limit = CST_LIMIT_ICX,
983
.has_msr_core_c1_res = 1,
984
.has_irtl_msrs = 1,
985
.has_cst_prewake_bit = 1,
986
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
987
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
988
.has_fixed_rapl_unit = 1,
989
};
990
991
static const struct platform_features spr_features = {
992
.has_msr_misc_feature_control = 1,
993
.has_msr_misc_pwr_mgmt = 1,
994
.has_nhm_msrs = 1,
995
.has_config_tdp = 1,
996
.bclk_freq = BCLK_100MHZ,
997
.supported_cstates = CC1 | CC6 | PC2 | PC6,
998
.cst_limit = CST_LIMIT_SKX,
999
.has_msr_core_c1_res = 1,
1000
.has_irtl_msrs = 1,
1001
.has_cst_prewake_bit = 1,
1002
.has_fixed_rapl_psys_unit = 1,
1003
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
1004
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
1005
};
1006
1007
static const struct platform_features dmr_features = {
1008
.has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control,
1009
.has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt,
1010
.has_nhm_msrs = spr_features.has_nhm_msrs,
1011
.bclk_freq = spr_features.bclk_freq,
1012
.supported_cstates = spr_features.supported_cstates,
1013
.cst_limit = spr_features.cst_limit,
1014
.has_msr_core_c1_res = spr_features.has_msr_core_c1_res,
1015
.has_cst_prewake_bit = spr_features.has_cst_prewake_bit,
1016
.has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit,
1017
.trl_msrs = spr_features.trl_msrs,
1018
.has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */
1019
.plat_rapl_msrs = 0, /* DMR does not have RAPL MSRs */
1020
.plr_msrs = 0, /* DMR does not have PLR MSRs */
1021
.has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */
1022
.has_config_tdp = 0, /* DMR does not have CTDP MSRs */
1023
};
1024
1025
static const struct platform_features srf_features = {
1026
.has_msr_misc_feature_control = 1,
1027
.has_msr_misc_pwr_mgmt = 1,
1028
.has_nhm_msrs = 1,
1029
.has_config_tdp = 1,
1030
.bclk_freq = BCLK_100MHZ,
1031
.supported_cstates = CC1 | CC6 | PC2 | PC6,
1032
.cst_limit = CST_LIMIT_SKX,
1033
.has_msr_core_c1_res = 1,
1034
.has_msr_module_c6_res_ms = 1,
1035
.has_irtl_msrs = 1,
1036
.has_cst_prewake_bit = 1,
1037
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
1038
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
1039
};
1040
1041
static const struct platform_features grr_features = {
1042
.has_msr_misc_feature_control = 1,
1043
.has_msr_misc_pwr_mgmt = 1,
1044
.has_nhm_msrs = 1,
1045
.has_config_tdp = 1,
1046
.bclk_freq = BCLK_100MHZ,
1047
.supported_cstates = CC1 | CC6,
1048
.cst_limit = CST_LIMIT_SKX,
1049
.has_msr_core_c1_res = 1,
1050
.has_msr_module_c6_res_ms = 1,
1051
.has_irtl_msrs = 1,
1052
.has_cst_prewake_bit = 1,
1053
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
1054
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
1055
};
1056
1057
static const struct platform_features slv_features = {
1058
.has_nhm_msrs = 1,
1059
.bclk_freq = BCLK_SLV,
1060
.supported_cstates = CC1 | CC6 | PC6,
1061
.cst_limit = CST_LIMIT_SLV,
1062
.has_msr_core_c1_res = 1,
1063
.has_msr_module_c6_res_ms = 1,
1064
.has_msr_c6_demotion_policy_config = 1,
1065
.has_msr_atom_pkg_c6_residency = 1,
1066
.trl_msrs = TRL_ATOM,
1067
.plat_rapl_msrs = RAPL_PKG | RAPL_CORE,
1068
.has_rapl_divisor = 1,
1069
.rapl_quirk_tdp = 30,
1070
};
1071
1072
static const struct platform_features slvd_features = {
1073
.has_msr_misc_pwr_mgmt = 1,
1074
.has_nhm_msrs = 1,
1075
.bclk_freq = BCLK_SLV,
1076
.supported_cstates = CC1 | CC6 | PC3 | PC6,
1077
.cst_limit = CST_LIMIT_SLV,
1078
.has_msr_atom_pkg_c6_residency = 1,
1079
.trl_msrs = TRL_BASE,
1080
.plat_rapl_msrs = RAPL_PKG | RAPL_CORE,
1081
.rapl_quirk_tdp = 30,
1082
};
1083
1084
static const struct platform_features amt_features = {
1085
.has_nhm_msrs = 1,
1086
.bclk_freq = BCLK_133MHZ,
1087
.supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
1088
.cst_limit = CST_LIMIT_AMT,
1089
.trl_msrs = TRL_BASE,
1090
};
1091
1092
static const struct platform_features gmt_features = {
1093
.has_msr_misc_pwr_mgmt = 1,
1094
.has_nhm_msrs = 1,
1095
.bclk_freq = BCLK_100MHZ,
1096
.crystal_freq = 19200000,
1097
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
1098
.cst_limit = CST_LIMIT_GMT,
1099
.has_irtl_msrs = 1,
1100
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
1101
.plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
1102
};
1103
1104
static const struct platform_features gmtd_features = {
1105
.has_msr_misc_pwr_mgmt = 1,
1106
.has_nhm_msrs = 1,
1107
.bclk_freq = BCLK_100MHZ,
1108
.crystal_freq = 25000000,
1109
.supported_cstates = CC1 | CC6 | PC2 | PC6,
1110
.cst_limit = CST_LIMIT_GMT,
1111
.has_irtl_msrs = 1,
1112
.has_msr_core_c1_res = 1,
1113
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
1114
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
1115
};
1116
1117
static const struct platform_features gmtp_features = {
1118
.has_msr_misc_pwr_mgmt = 1,
1119
.has_nhm_msrs = 1,
1120
.bclk_freq = BCLK_100MHZ,
1121
.crystal_freq = 19200000,
1122
.supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
1123
.cst_limit = CST_LIMIT_GMT,
1124
.has_irtl_msrs = 1,
1125
.trl_msrs = TRL_BASE,
1126
.plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
1127
};
1128
1129
static const struct platform_features tmt_features = {
1130
.has_msr_misc_pwr_mgmt = 1,
1131
.has_nhm_msrs = 1,
1132
.bclk_freq = BCLK_100MHZ,
1133
.supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
1134
.cst_limit = CST_LIMIT_GMT,
1135
.has_irtl_msrs = 1,
1136
.trl_msrs = TRL_BASE,
1137
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
1138
.enable_tsc_tweak = 1,
1139
};
1140
1141
static const struct platform_features tmtd_features = {
1142
.has_msr_misc_pwr_mgmt = 1,
1143
.has_nhm_msrs = 1,
1144
.bclk_freq = BCLK_100MHZ,
1145
.supported_cstates = CC1 | CC6,
1146
.cst_limit = CST_LIMIT_GMT,
1147
.has_irtl_msrs = 1,
1148
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
1149
.plat_rapl_msrs = RAPL_PKG_ALL,
1150
};
1151
1152
static const struct platform_features knl_features = {
1153
.has_msr_misc_pwr_mgmt = 1,
1154
.has_nhm_msrs = 1,
1155
.has_config_tdp = 1,
1156
.bclk_freq = BCLK_100MHZ,
1157
.supported_cstates = CC1 | CC6 | PC3 | PC6,
1158
.cst_limit = CST_LIMIT_KNL,
1159
.has_msr_knl_core_c6_residency = 1,
1160
.trl_msrs = TRL_KNL,
1161
.plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
1162
.has_fixed_rapl_unit = 1,
1163
.need_perf_multiplier = 1,
1164
};
1165
1166
static const struct platform_features default_features = {
1167
};
1168
1169
static const struct platform_features amd_features_with_rapl = {
1170
.plat_rapl_msrs = RAPL_AMD_F17H,
1171
.has_per_core_rapl = 1,
1172
.rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
1173
};
1174
1175
static const struct platform_data turbostat_pdata[] = {
1176
{ INTEL_NEHALEM, &nhm_features },
1177
{ INTEL_NEHALEM_G, &nhm_features },
1178
{ INTEL_NEHALEM_EP, &nhm_features },
1179
{ INTEL_NEHALEM_EX, &nhx_features },
1180
{ INTEL_WESTMERE, &nhm_features },
1181
{ INTEL_WESTMERE_EP, &nhm_features },
1182
{ INTEL_WESTMERE_EX, &nhx_features },
1183
{ INTEL_SANDYBRIDGE, &snb_features },
1184
{ INTEL_SANDYBRIDGE_X, &snx_features },
1185
{ INTEL_IVYBRIDGE, &ivb_features },
1186
{ INTEL_IVYBRIDGE_X, &ivx_features },
1187
{ INTEL_HASWELL, &hsw_features },
1188
{ INTEL_HASWELL_X, &hsx_features },
1189
{ INTEL_HASWELL_L, &hswl_features },
1190
{ INTEL_HASWELL_G, &hswg_features },
1191
{ INTEL_BROADWELL, &bdw_features },
1192
{ INTEL_BROADWELL_G, &bdwg_features },
1193
{ INTEL_BROADWELL_X, &bdx_features },
1194
{ INTEL_BROADWELL_D, &bdx_features },
1195
{ INTEL_SKYLAKE_L, &skl_features },
1196
{ INTEL_SKYLAKE, &skl_features },
1197
{ INTEL_SKYLAKE_X, &skx_features },
1198
{ INTEL_KABYLAKE_L, &skl_features },
1199
{ INTEL_KABYLAKE, &skl_features },
1200
{ INTEL_COMETLAKE, &skl_features },
1201
{ INTEL_COMETLAKE_L, &skl_features },
1202
{ INTEL_CANNONLAKE_L, &cnl_features },
1203
{ INTEL_ICELAKE_X, &icx_features },
1204
{ INTEL_ICELAKE_D, &icx_features },
1205
{ INTEL_ICELAKE_L, &cnl_features },
1206
{ INTEL_ICELAKE_NNPI, &cnl_features },
1207
{ INTEL_ROCKETLAKE, &cnl_features },
1208
{ INTEL_TIGERLAKE_L, &cnl_features },
1209
{ INTEL_TIGERLAKE, &cnl_features },
1210
{ INTEL_SAPPHIRERAPIDS_X, &spr_features },
1211
{ INTEL_EMERALDRAPIDS_X, &spr_features },
1212
{ INTEL_GRANITERAPIDS_X, &spr_features },
1213
{ INTEL_GRANITERAPIDS_D, &spr_features },
1214
{ INTEL_DIAMONDRAPIDS_X, &dmr_features },
1215
{ INTEL_LAKEFIELD, &cnl_features },
1216
{ INTEL_ALDERLAKE, &adl_features },
1217
{ INTEL_ALDERLAKE_L, &adl_features },
1218
{ INTEL_RAPTORLAKE, &adl_features },
1219
{ INTEL_RAPTORLAKE_P, &adl_features },
1220
{ INTEL_RAPTORLAKE_S, &adl_features },
1221
{ INTEL_BARTLETTLAKE, &adl_features },
1222
{ INTEL_METEORLAKE, &adl_features },
1223
{ INTEL_METEORLAKE_L, &adl_features },
1224
{ INTEL_ARROWLAKE_H, &adl_features },
1225
{ INTEL_ARROWLAKE_U, &adl_features },
1226
{ INTEL_ARROWLAKE, &adl_features },
1227
{ INTEL_LUNARLAKE_M, &lnl_features },
1228
{ INTEL_PANTHERLAKE_L, &lnl_features },
1229
{ INTEL_NOVALAKE, &lnl_features },
1230
{ INTEL_NOVALAKE_L, &lnl_features },
1231
{ INTEL_WILDCATLAKE_L, &lnl_features },
1232
{ INTEL_ATOM_SILVERMONT, &slv_features },
1233
{ INTEL_ATOM_SILVERMONT_D, &slvd_features },
1234
{ INTEL_ATOM_AIRMONT, &amt_features },
1235
{ INTEL_ATOM_GOLDMONT, &gmt_features },
1236
{ INTEL_ATOM_GOLDMONT_D, &gmtd_features },
1237
{ INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features },
1238
{ INTEL_ATOM_TREMONT_D, &tmtd_features },
1239
{ INTEL_ATOM_TREMONT, &tmt_features },
1240
{ INTEL_ATOM_TREMONT_L, &tmt_features },
1241
{ INTEL_ATOM_GRACEMONT, &adl_features },
1242
{ INTEL_ATOM_CRESTMONT_X, &srf_features },
1243
{ INTEL_ATOM_CRESTMONT, &grr_features },
1244
{ INTEL_ATOM_DARKMONT_X, &srf_features },
1245
{ INTEL_XEON_PHI_KNL, &knl_features },
1246
{ INTEL_XEON_PHI_KNM, &knl_features },
1247
/*
1248
* Missing support for
1249
* INTEL_ICELAKE
1250
* INTEL_ATOM_SILVERMONT_MID
1251
* INTEL_ATOM_SILVERMONT_MID2
1252
* INTEL_ATOM_AIRMONT_NP
1253
*/
1254
{ 0, NULL },
1255
};
1256
1257
struct {
1258
unsigned int uniform;
1259
unsigned int pcore;
1260
unsigned int ecore;
1261
unsigned int lcore;
1262
} perf_pmu_types;
1263
1264
/*
1265
* Events are enumerated in https://github.com/intel/perfmon
1266
* and tools/perf/pmu-events/arch/x86/.../cache.json
1267
*/
1268
struct perf_l2_events {
1269
unsigned long long refs; /* L2_REQUEST.ALL */
1270
unsigned long long hits; /* L2_REQUEST.HIT */
1271
};
1272
1273
struct perf_model_support {
1274
unsigned int vfm;
1275
struct perf_l2_events first;
1276
struct perf_l2_events second;
1277
struct perf_l2_events third;
1278
} *perf_model_support;
1279
1280
/* Perf Cache Events */
1281
#define PCE(ext_umask, umask) (((unsigned long long) ext_umask) << 40 | umask << 8 | 0x24)
1282
1283
/*
1284
* Enumerate up to three perf CPU PMU's in a system.
1285
* The first, second, and third columns are populated without skipping, describing
1286
* pcore, ecore, lcore PMUs, in order, if present. (The associated PMU "type" field is
1287
* read from sysfs in all cases.) Eg.
1288
*
1289
* non-hybrid:
1290
* GNR: pcore, {}, {}
1291
* ADL-N: ecore, {}, {}
1292
* hybrid:
1293
* MTL: pcore, ecore, {}%
1294
* ARL-H: pcore, ecore, lcore
1295
* LNL: ecore, ecore%%, {}
1296
*
1297
* % MTL physical lcore share architecture and PMU with ecore, and are thus not enumerated separately.
1298
* %% LNL physical lcore is enumerated by perf as ecore
1299
*/
1300
static struct perf_model_support turbostat_perf_model_support[] = {
1301
{ INTEL_SAPPHIRERAPIDS_X, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, {}, {} },
1302
{ INTEL_EMERALDRAPIDS_X, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, {}, {} },
1303
{ INTEL_GRANITERAPIDS_X, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, {}, {} },
1304
{ INTEL_GRANITERAPIDS_D, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, {}, {} },
1305
{ INTEL_DIAMONDRAPIDS_X, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, {}, {} },
1306
1307
{ INTEL_ATOM_GRACEMONT, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {}, {} }, /* ADL-N */
1308
{ INTEL_ATOM_CRESTMONT_X, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {}, {} }, /* SRF */
1309
{ INTEL_ATOM_CRESTMONT, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {}, {} }, /* GRR */
1310
{ INTEL_ATOM_DARKMONT_X, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {}, {} }, /* CWF */
1311
1312
{ INTEL_ALDERLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} },
1313
{ INTEL_ALDERLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} },
1314
{ INTEL_ALDERLAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} },
1315
{ INTEL_RAPTORLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} },
1316
{ INTEL_RAPTORLAKE_P, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} },
1317
{ INTEL_RAPTORLAKE_S, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} },
1318
{ INTEL_METEORLAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} },
1319
{ INTEL_METEORLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} },
1320
{ INTEL_ARROWLAKE_U, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} },
1321
1322
{ INTEL_LUNARLAKE_M, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x00, 0x07), PCE(0x00, 0x02)}, {} },
1323
{ INTEL_ARROWLAKE_H, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x00, 0x07), PCE(0x00, 0x02)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)} },
1324
{ INTEL_ARROWLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x00, 0x07), PCE(0x00, 0x02)}, {} },
1325
1326
{ INTEL_PANTHERLAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {} },
1327
{ INTEL_WILDCATLAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {} },
1328
1329
{ INTEL_NOVALAKE, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {} },
1330
{ INTEL_NOVALAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {} },
1331
1332
{ 0, {}, {}, {} }
1333
};
1334
1335
static const struct platform_features *platform;
1336
1337
void probe_platform_features(unsigned int family, unsigned int model)
1338
{
1339
int i;
1340
1341
if (authentic_amd || hygon_genuine) {
1342
/* fallback to default features on unsupported models */
1343
force_load++;
1344
if (max_extended_level >= 0x80000007) {
1345
unsigned int eax, ebx, ecx, edx;
1346
1347
__cpuid(0x80000007, eax, ebx, ecx, edx);
1348
/* RAPL (Fam 17h+) */
1349
if ((edx & (1 << 14)) && family >= 0x17)
1350
platform = &amd_features_with_rapl;
1351
}
1352
goto end;
1353
}
1354
1355
if (!genuine_intel)
1356
goto end;
1357
1358
for (i = 0; turbostat_pdata[i].features; i++) {
1359
if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) {
1360
platform = turbostat_pdata[i].features;
1361
return;
1362
}
1363
}
1364
1365
end:
1366
if (force_load && !platform) {
1367
fprintf(outf, "Forced to run on unsupported platform!\n");
1368
platform = &default_features;
1369
}
1370
1371
if (platform)
1372
return;
1373
1374
fprintf(stderr, "Unsupported platform detected.\n\tSee RUN THE LATEST VERSION on turbostat(8)\n");
1375
exit(1);
1376
}
1377
1378
void init_perf_model_support(unsigned int family, unsigned int model)
1379
{
1380
int i;
1381
1382
if (!genuine_intel)
1383
return;
1384
1385
for (i = 0; turbostat_perf_model_support[i].vfm; i++) {
1386
if (VFM_FAMILY(turbostat_perf_model_support[i].vfm) == family && VFM_MODEL(turbostat_perf_model_support[i].vfm) == model) {
1387
perf_model_support = &turbostat_perf_model_support[i];
1388
return;
1389
}
1390
}
1391
}
1392
1393
/* Model specific support End */
1394
1395
#define TJMAX_DEFAULT 100
1396
1397
/* MSRs that are not yet in the kernel-provided header. */
1398
#define MSR_RAPL_PWR_UNIT 0xc0010299
1399
#define MSR_CORE_ENERGY_STAT 0xc001029a
1400
#define MSR_PKG_ENERGY_STAT 0xc001029b
1401
1402
#define MAX(a, b) ((a) > (b) ? (a) : (b))
1403
1404
int backwards_count;
1405
char *progname;
1406
1407
#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */
1408
cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
1409
cpu_set_t *perf_pcore_set, *perf_ecore_set, *perf_lcore_set;
1410
size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
1411
#define MAX_ADDED_THREAD_COUNTERS 24
1412
#define MAX_ADDED_CORE_COUNTERS 8
1413
#define MAX_ADDED_PACKAGE_COUNTERS 16
1414
#define PMT_MAX_ADDED_THREAD_COUNTERS 24
1415
#define PMT_MAX_ADDED_CORE_COUNTERS 8
1416
#define PMT_MAX_ADDED_PACKAGE_COUNTERS 16
1417
#define BITMASK_SIZE 32
1418
1419
#define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr))
1420
1421
/* Indexes used to map data read from perf and MSRs into global variables */
1422
enum rapl_rci_index {
1423
RAPL_RCI_INDEX_ENERGY_PKG = 0,
1424
RAPL_RCI_INDEX_ENERGY_CORES = 1,
1425
RAPL_RCI_INDEX_DRAM = 2,
1426
RAPL_RCI_INDEX_GFX = 3,
1427
RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
1428
RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
1429
RAPL_RCI_INDEX_CORE_ENERGY = 6,
1430
RAPL_RCI_INDEX_ENERGY_PLATFORM = 7,
1431
NUM_RAPL_COUNTERS,
1432
};
1433
1434
enum rapl_unit {
1435
RAPL_UNIT_INVALID,
1436
RAPL_UNIT_JOULES,
1437
RAPL_UNIT_WATTS,
1438
};
1439
1440
struct rapl_counter_info_t {
1441
unsigned long long data[NUM_RAPL_COUNTERS];
1442
enum counter_source source[NUM_RAPL_COUNTERS];
1443
unsigned long long flags[NUM_RAPL_COUNTERS];
1444
double scale[NUM_RAPL_COUNTERS];
1445
enum rapl_unit unit[NUM_RAPL_COUNTERS];
1446
unsigned long long msr[NUM_RAPL_COUNTERS];
1447
unsigned long long msr_mask[NUM_RAPL_COUNTERS];
1448
int msr_shift[NUM_RAPL_COUNTERS];
1449
1450
int fd_perf;
1451
};
1452
1453
/* struct rapl_counter_info_t for each RAPL domain */
1454
struct rapl_counter_info_t *rapl_counter_info_perdomain;
1455
unsigned int rapl_counter_info_perdomain_size;
1456
1457
#define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0)
1458
#define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
1459
1460
struct rapl_counter_arch_info {
1461
int feature_mask; /* Mask for testing if the counter is supported on host */
1462
const char *perf_subsys;
1463
const char *perf_name;
1464
unsigned long long msr;
1465
unsigned long long msr_mask;
1466
int msr_shift; /* Positive mean shift right, negative mean shift left */
1467
double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
1468
unsigned int rci_index; /* Maps data from perf counters to global variables */
1469
unsigned int bic_number;
1470
double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */
1471
unsigned long long flags;
1472
};
1473
1474
static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
1475
{
1476
.feature_mask = RAPL_PKG,
1477
.perf_subsys = "power",
1478
.perf_name = "energy-pkg",
1479
.msr = MSR_PKG_ENERGY_STATUS,
1480
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1481
.msr_shift = 0,
1482
.platform_rapl_msr_scale = &rapl_energy_units,
1483
.rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1484
.bic_number = BIC_PkgWatt,
1485
.compat_scale = 1.0,
1486
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1487
},
1488
{
1489
.feature_mask = RAPL_PKG,
1490
.perf_subsys = "power",
1491
.perf_name = "energy-pkg",
1492
.msr = MSR_PKG_ENERGY_STATUS,
1493
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1494
.msr_shift = 0,
1495
.platform_rapl_msr_scale = &rapl_energy_units,
1496
.rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1497
.bic_number = BIC_Pkg_J,
1498
.compat_scale = 1.0,
1499
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1500
},
1501
{
1502
.feature_mask = RAPL_AMD_F17H,
1503
.perf_subsys = "power",
1504
.perf_name = "energy-pkg",
1505
.msr = MSR_PKG_ENERGY_STAT,
1506
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1507
.msr_shift = 0,
1508
.platform_rapl_msr_scale = &rapl_energy_units,
1509
.rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1510
.bic_number = BIC_PkgWatt,
1511
.compat_scale = 1.0,
1512
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1513
},
1514
{
1515
.feature_mask = RAPL_AMD_F17H,
1516
.perf_subsys = "power",
1517
.perf_name = "energy-pkg",
1518
.msr = MSR_PKG_ENERGY_STAT,
1519
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1520
.msr_shift = 0,
1521
.platform_rapl_msr_scale = &rapl_energy_units,
1522
.rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1523
.bic_number = BIC_Pkg_J,
1524
.compat_scale = 1.0,
1525
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1526
},
1527
{
1528
.feature_mask = RAPL_CORE_ENERGY_STATUS,
1529
.perf_subsys = "power",
1530
.perf_name = "energy-cores",
1531
.msr = MSR_PP0_ENERGY_STATUS,
1532
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1533
.msr_shift = 0,
1534
.platform_rapl_msr_scale = &rapl_energy_units,
1535
.rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
1536
.bic_number = BIC_CorWatt,
1537
.compat_scale = 1.0,
1538
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1539
},
1540
{
1541
.feature_mask = RAPL_CORE_ENERGY_STATUS,
1542
.perf_subsys = "power",
1543
.perf_name = "energy-cores",
1544
.msr = MSR_PP0_ENERGY_STATUS,
1545
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1546
.msr_shift = 0,
1547
.platform_rapl_msr_scale = &rapl_energy_units,
1548
.rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
1549
.bic_number = BIC_Cor_J,
1550
.compat_scale = 1.0,
1551
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1552
},
1553
{
1554
.feature_mask = RAPL_DRAM,
1555
.perf_subsys = "power",
1556
.perf_name = "energy-ram",
1557
.msr = MSR_DRAM_ENERGY_STATUS,
1558
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1559
.msr_shift = 0,
1560
.platform_rapl_msr_scale = &rapl_dram_energy_units,
1561
.rci_index = RAPL_RCI_INDEX_DRAM,
1562
.bic_number = BIC_RAMWatt,
1563
.compat_scale = 1.0,
1564
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1565
},
1566
{
1567
.feature_mask = RAPL_DRAM,
1568
.perf_subsys = "power",
1569
.perf_name = "energy-ram",
1570
.msr = MSR_DRAM_ENERGY_STATUS,
1571
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1572
.msr_shift = 0,
1573
.platform_rapl_msr_scale = &rapl_dram_energy_units,
1574
.rci_index = RAPL_RCI_INDEX_DRAM,
1575
.bic_number = BIC_RAM_J,
1576
.compat_scale = 1.0,
1577
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1578
},
1579
{
1580
.feature_mask = RAPL_GFX,
1581
.perf_subsys = "power",
1582
.perf_name = "energy-gpu",
1583
.msr = MSR_PP1_ENERGY_STATUS,
1584
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1585
.msr_shift = 0,
1586
.platform_rapl_msr_scale = &rapl_energy_units,
1587
.rci_index = RAPL_RCI_INDEX_GFX,
1588
.bic_number = BIC_GFXWatt,
1589
.compat_scale = 1.0,
1590
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1591
},
1592
{
1593
.feature_mask = RAPL_GFX,
1594
.perf_subsys = "power",
1595
.perf_name = "energy-gpu",
1596
.msr = MSR_PP1_ENERGY_STATUS,
1597
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1598
.msr_shift = 0,
1599
.platform_rapl_msr_scale = &rapl_energy_units,
1600
.rci_index = RAPL_RCI_INDEX_GFX,
1601
.bic_number = BIC_GFX_J,
1602
.compat_scale = 1.0,
1603
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1604
},
1605
{
1606
.feature_mask = RAPL_PKG_PERF_STATUS,
1607
.perf_subsys = NULL,
1608
.perf_name = NULL,
1609
.msr = MSR_PKG_PERF_STATUS,
1610
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1611
.msr_shift = 0,
1612
.platform_rapl_msr_scale = &rapl_time_units,
1613
.rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
1614
.bic_number = BIC_PKG__,
1615
.compat_scale = 100.0,
1616
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1617
},
1618
{
1619
.feature_mask = RAPL_DRAM_PERF_STATUS,
1620
.perf_subsys = NULL,
1621
.perf_name = NULL,
1622
.msr = MSR_DRAM_PERF_STATUS,
1623
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1624
.msr_shift = 0,
1625
.platform_rapl_msr_scale = &rapl_time_units,
1626
.rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
1627
.bic_number = BIC_RAM__,
1628
.compat_scale = 100.0,
1629
.flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1630
},
1631
{
1632
.feature_mask = RAPL_AMD_F17H,
1633
.perf_subsys = NULL,
1634
.perf_name = NULL,
1635
.msr = MSR_CORE_ENERGY_STAT,
1636
.msr_mask = 0xFFFFFFFF,
1637
.msr_shift = 0,
1638
.platform_rapl_msr_scale = &rapl_energy_units,
1639
.rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
1640
.bic_number = BIC_CorWatt,
1641
.compat_scale = 1.0,
1642
.flags = 0,
1643
},
1644
{
1645
.feature_mask = RAPL_AMD_F17H,
1646
.perf_subsys = NULL,
1647
.perf_name = NULL,
1648
.msr = MSR_CORE_ENERGY_STAT,
1649
.msr_mask = 0xFFFFFFFF,
1650
.msr_shift = 0,
1651
.platform_rapl_msr_scale = &rapl_energy_units,
1652
.rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
1653
.bic_number = BIC_Cor_J,
1654
.compat_scale = 1.0,
1655
.flags = 0,
1656
},
1657
{
1658
.feature_mask = RAPL_PSYS,
1659
.perf_subsys = "power",
1660
.perf_name = "energy-psys",
1661
.msr = MSR_PLATFORM_ENERGY_STATUS,
1662
.msr_mask = 0x00000000FFFFFFFF,
1663
.msr_shift = 0,
1664
.platform_rapl_msr_scale = &rapl_psys_energy_units,
1665
.rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM,
1666
.bic_number = BIC_SysWatt,
1667
.compat_scale = 1.0,
1668
.flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM,
1669
},
1670
{
1671
.feature_mask = RAPL_PSYS,
1672
.perf_subsys = "power",
1673
.perf_name = "energy-psys",
1674
.msr = MSR_PLATFORM_ENERGY_STATUS,
1675
.msr_mask = 0x00000000FFFFFFFF,
1676
.msr_shift = 0,
1677
.platform_rapl_msr_scale = &rapl_psys_energy_units,
1678
.rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM,
1679
.bic_number = BIC_Sys_J,
1680
.compat_scale = 1.0,
1681
.flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM,
1682
},
1683
};
1684
1685
struct rapl_counter {
1686
unsigned long long raw_value;
1687
enum rapl_unit unit;
1688
double scale;
1689
};
1690
1691
/* Indexes used to map data read from perf and MSRs into global variables */
1692
enum ccstate_rci_index {
1693
CCSTATE_RCI_INDEX_C1_RESIDENCY = 0,
1694
CCSTATE_RCI_INDEX_C3_RESIDENCY = 1,
1695
CCSTATE_RCI_INDEX_C6_RESIDENCY = 2,
1696
CCSTATE_RCI_INDEX_C7_RESIDENCY = 3,
1697
PCSTATE_RCI_INDEX_C2_RESIDENCY = 4,
1698
PCSTATE_RCI_INDEX_C3_RESIDENCY = 5,
1699
PCSTATE_RCI_INDEX_C6_RESIDENCY = 6,
1700
PCSTATE_RCI_INDEX_C7_RESIDENCY = 7,
1701
PCSTATE_RCI_INDEX_C8_RESIDENCY = 8,
1702
PCSTATE_RCI_INDEX_C9_RESIDENCY = 9,
1703
PCSTATE_RCI_INDEX_C10_RESIDENCY = 10,
1704
NUM_CSTATE_COUNTERS,
1705
};
1706
1707
struct cstate_counter_info_t {
1708
unsigned long long data[NUM_CSTATE_COUNTERS];
1709
enum counter_source source[NUM_CSTATE_COUNTERS];
1710
unsigned long long msr[NUM_CSTATE_COUNTERS];
1711
int fd_perf_core;
1712
int fd_perf_pkg;
1713
};
1714
1715
struct cstate_counter_info_t *ccstate_counter_info;
1716
unsigned int ccstate_counter_info_size;
1717
1718
#define CSTATE_COUNTER_FLAG_COLLECT_PER_CORE (1u << 0)
1719
#define CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD ((1u << 1) | CSTATE_COUNTER_FLAG_COLLECT_PER_CORE)
1720
#define CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY (1u << 2)
1721
1722
struct cstate_counter_arch_info {
1723
int feature_mask; /* Mask for testing if the counter is supported on host */
1724
const char *perf_subsys;
1725
const char *perf_name;
1726
unsigned long long msr;
1727
unsigned int rci_index; /* Maps data from perf counters to global variables */
1728
unsigned int bic_number;
1729
unsigned long long flags;
1730
int pkg_cstate_limit;
1731
};
1732
1733
static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = {
1734
{
1735
.feature_mask = CC1,
1736
.perf_subsys = "cstate_core",
1737
.perf_name = "c1-residency",
1738
.msr = MSR_CORE_C1_RES,
1739
.rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY,
1740
.bic_number = BIC_CPU_c1,
1741
.flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD,
1742
.pkg_cstate_limit = 0,
1743
},
1744
{
1745
.feature_mask = CC3,
1746
.perf_subsys = "cstate_core",
1747
.perf_name = "c3-residency",
1748
.msr = MSR_CORE_C3_RESIDENCY,
1749
.rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY,
1750
.bic_number = BIC_CPU_c3,
1751
.flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY,
1752
.pkg_cstate_limit = 0,
1753
},
1754
{
1755
.feature_mask = CC6,
1756
.perf_subsys = "cstate_core",
1757
.perf_name = "c6-residency",
1758
.msr = MSR_CORE_C6_RESIDENCY,
1759
.rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY,
1760
.bic_number = BIC_CPU_c6,
1761
.flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY,
1762
.pkg_cstate_limit = 0,
1763
},
1764
{
1765
.feature_mask = CC7,
1766
.perf_subsys = "cstate_core",
1767
.perf_name = "c7-residency",
1768
.msr = MSR_CORE_C7_RESIDENCY,
1769
.rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY,
1770
.bic_number = BIC_CPU_c7,
1771
.flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY,
1772
.pkg_cstate_limit = 0,
1773
},
1774
{
1775
.feature_mask = PC2,
1776
.perf_subsys = "cstate_pkg",
1777
.perf_name = "c2-residency",
1778
.msr = MSR_PKG_C2_RESIDENCY,
1779
.rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY,
1780
.bic_number = BIC_Pkgpc2,
1781
.flags = 0,
1782
.pkg_cstate_limit = PCL__2,
1783
},
1784
{
1785
.feature_mask = PC3,
1786
.perf_subsys = "cstate_pkg",
1787
.perf_name = "c3-residency",
1788
.msr = MSR_PKG_C3_RESIDENCY,
1789
.rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY,
1790
.bic_number = BIC_Pkgpc3,
1791
.flags = 0,
1792
.pkg_cstate_limit = PCL__3,
1793
},
1794
{
1795
.feature_mask = PC6,
1796
.perf_subsys = "cstate_pkg",
1797
.perf_name = "c6-residency",
1798
.msr = MSR_PKG_C6_RESIDENCY,
1799
.rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY,
1800
.bic_number = BIC_Pkgpc6,
1801
.flags = 0,
1802
.pkg_cstate_limit = PCL__6,
1803
},
1804
{
1805
.feature_mask = PC7,
1806
.perf_subsys = "cstate_pkg",
1807
.perf_name = "c7-residency",
1808
.msr = MSR_PKG_C7_RESIDENCY,
1809
.rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY,
1810
.bic_number = BIC_Pkgpc7,
1811
.flags = 0,
1812
.pkg_cstate_limit = PCL__7,
1813
},
1814
{
1815
.feature_mask = PC8,
1816
.perf_subsys = "cstate_pkg",
1817
.perf_name = "c8-residency",
1818
.msr = MSR_PKG_C8_RESIDENCY,
1819
.rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY,
1820
.bic_number = BIC_Pkgpc8,
1821
.flags = 0,
1822
.pkg_cstate_limit = PCL__8,
1823
},
1824
{
1825
.feature_mask = PC9,
1826
.perf_subsys = "cstate_pkg",
1827
.perf_name = "c9-residency",
1828
.msr = MSR_PKG_C9_RESIDENCY,
1829
.rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY,
1830
.bic_number = BIC_Pkgpc9,
1831
.flags = 0,
1832
.pkg_cstate_limit = PCL__9,
1833
},
1834
{
1835
.feature_mask = PC10,
1836
.perf_subsys = "cstate_pkg",
1837
.perf_name = "c10-residency",
1838
.msr = MSR_PKG_C10_RESIDENCY,
1839
.rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY,
1840
.bic_number = BIC_Pkgpc10,
1841
.flags = 0,
1842
.pkg_cstate_limit = PCL_10,
1843
},
1844
};
1845
1846
/* Indexes used to map data read from perf and MSRs into global variables */
1847
enum msr_rci_index {
1848
MSR_RCI_INDEX_APERF = 0,
1849
MSR_RCI_INDEX_MPERF = 1,
1850
MSR_RCI_INDEX_SMI = 2,
1851
NUM_MSR_COUNTERS,
1852
};
1853
1854
struct msr_counter_info_t {
1855
unsigned long long data[NUM_MSR_COUNTERS];
1856
enum counter_source source[NUM_MSR_COUNTERS];
1857
unsigned long long msr[NUM_MSR_COUNTERS];
1858
unsigned long long msr_mask[NUM_MSR_COUNTERS];
1859
int fd_perf;
1860
};
1861
1862
struct msr_counter_info_t *msr_counter_info;
1863
unsigned int msr_counter_info_size;
1864
1865
struct msr_counter_arch_info {
1866
const char *perf_subsys;
1867
const char *perf_name;
1868
unsigned long long msr;
1869
unsigned long long msr_mask;
1870
unsigned int rci_index; /* Maps data from perf counters to global variables */
1871
bool needed;
1872
bool present;
1873
};
1874
1875
enum msr_arch_info_index {
1876
MSR_ARCH_INFO_APERF_INDEX = 0,
1877
MSR_ARCH_INFO_MPERF_INDEX = 1,
1878
MSR_ARCH_INFO_SMI_INDEX = 2,
1879
};
1880
1881
static struct msr_counter_arch_info msr_counter_arch_infos[] = {
1882
[MSR_ARCH_INFO_APERF_INDEX] = {
1883
.perf_subsys = "msr",
1884
.perf_name = "aperf",
1885
.msr = MSR_IA32_APERF,
1886
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1887
.rci_index = MSR_RCI_INDEX_APERF,
1888
},
1889
1890
[MSR_ARCH_INFO_MPERF_INDEX] = {
1891
.perf_subsys = "msr",
1892
.perf_name = "mperf",
1893
.msr = MSR_IA32_MPERF,
1894
.msr_mask = 0xFFFFFFFFFFFFFFFF,
1895
.rci_index = MSR_RCI_INDEX_MPERF,
1896
},
1897
1898
[MSR_ARCH_INFO_SMI_INDEX] = {
1899
.perf_subsys = "msr",
1900
.perf_name = "smi",
1901
.msr = MSR_SMI_COUNT,
1902
.msr_mask = 0xFFFFFFFF,
1903
.rci_index = MSR_RCI_INDEX_SMI,
1904
},
1905
};
1906
1907
/* Can be redefined when compiling, useful for testing. */
1908
#ifndef SYSFS_TELEM_PATH
1909
#define SYSFS_TELEM_PATH "/sys/class/intel_pmt"
1910
#endif
1911
1912
#define PMT_COUNTER_MTL_DC6_OFFSET 120
1913
#define PMT_COUNTER_MTL_DC6_LSB 0
1914
#define PMT_COUNTER_MTL_DC6_MSB 63
1915
#define PMT_MTL_DC6_GUID 0x1a067102
1916
#define PMT_MTL_DC6_SEQ 0
1917
1918
#define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936
1919
#define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24
1920
#define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12
1921
#define PMT_COUNTER_CWF_CPUS_PER_MODULE 4
1922
#define PMT_COUNTER_CWF_MC1E_LSB 0
1923
#define PMT_COUNTER_CWF_MC1E_MSB 63
1924
#define PMT_CWF_MC1E_GUID 0x14421519
1925
1926
unsigned long long tcore_clock_freq_hz = 800000000;
1927
1928
#define PMT_COUNTER_NAME_SIZE_BYTES 16
1929
#define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32
1930
1931
struct pmt_mmio {
1932
struct pmt_mmio *next;
1933
1934
unsigned int guid;
1935
unsigned int size;
1936
1937
/* Base pointer to the mmaped memory. */
1938
void *mmio_base;
1939
1940
/*
1941
* Offset to be applied to the mmio_base
1942
* to get the beginning of the PMT counters for given GUID.
1943
*/
1944
unsigned long pmt_offset;
1945
} *pmt_mmios;
1946
1947
enum pmt_datatype {
1948
PMT_TYPE_RAW,
1949
PMT_TYPE_XTAL_TIME,
1950
PMT_TYPE_TCORE_CLOCK,
1951
};
1952
1953
struct pmt_domain_info {
1954
/*
1955
* Pointer to the MMIO obtained by applying a counter offset
1956
* to the mmio_base of the mmaped region for the given GUID.
1957
*
1958
* This is where to read the raw value of the counter from.
1959
*/
1960
unsigned long *pcounter;
1961
};
1962
1963
struct pmt_counter {
1964
struct pmt_counter *next;
1965
1966
/* PMT metadata */
1967
char name[PMT_COUNTER_NAME_SIZE_BYTES];
1968
enum pmt_datatype type;
1969
enum counter_scope scope;
1970
unsigned int lsb;
1971
unsigned int msb;
1972
1973
/* BIC-like metadata */
1974
enum counter_format format;
1975
1976
unsigned int num_domains;
1977
struct pmt_domain_info *domains;
1978
};
1979
1980
/*
1981
* PMT telemetry directory iterator.
1982
* Used to iterate telemetry files in sysfs in correct order.
1983
*/
1984
struct pmt_diriter_t {
1985
DIR *dir;
1986
struct dirent **namelist;
1987
unsigned int num_names;
1988
unsigned int current_name_idx;
1989
};
1990
1991
int pmt_telemdir_filter(const struct dirent *e)
1992
{
1993
unsigned int dummy;
1994
1995
return sscanf(e->d_name, "telem%u", &dummy);
1996
}
1997
1998
int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
1999
{
2000
unsigned int aidx = 0, bidx = 0;
2001
2002
sscanf((*a)->d_name, "telem%u", &aidx);
2003
sscanf((*b)->d_name, "telem%u", &bidx);
2004
2005
return (aidx > bidx) ? 1 : (aidx < bidx) ? -1 : 0;
2006
}
2007
2008
const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)
2009
{
2010
const struct dirent *ret = NULL;
2011
2012
if (!iter->dir)
2013
return NULL;
2014
2015
if (iter->current_name_idx >= iter->num_names)
2016
return NULL;
2017
2018
ret = iter->namelist[iter->current_name_idx];
2019
++iter->current_name_idx;
2020
2021
return ret;
2022
}
2023
2024
const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path)
2025
{
2026
int num_names = iter->num_names;
2027
2028
if (!iter->dir) {
2029
iter->dir = opendir(pmt_root_path);
2030
if (iter->dir == NULL)
2031
return NULL;
2032
2033
num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort);
2034
if (num_names == -1)
2035
return NULL;
2036
}
2037
2038
iter->current_name_idx = 0;
2039
iter->num_names = num_names;
2040
2041
return pmt_diriter_next(iter);
2042
}
2043
2044
void pmt_diriter_init(struct pmt_diriter_t *iter)
2045
{
2046
memset(iter, 0, sizeof(*iter));
2047
}
2048
2049
void pmt_diriter_remove(struct pmt_diriter_t *iter)
2050
{
2051
if (iter->namelist) {
2052
for (unsigned int i = 0; i < iter->num_names; i++) {
2053
free(iter->namelist[i]);
2054
iter->namelist[i] = NULL;
2055
}
2056
}
2057
2058
free(iter->namelist);
2059
iter->namelist = NULL;
2060
iter->num_names = 0;
2061
iter->current_name_idx = 0;
2062
2063
closedir(iter->dir);
2064
iter->dir = NULL;
2065
}
2066
2067
unsigned int pmt_counter_get_width(const struct pmt_counter *p)
2068
{
2069
return (p->msb - p->lsb) + 1;
2070
}
2071
2072
void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size)
2073
{
2074
struct pmt_domain_info *new_mem;
2075
2076
new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains));
2077
if (!new_mem) {
2078
fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__);
2079
exit(1);
2080
}
2081
2082
/* Zero initialize just allocated memory. */
2083
const size_t num_new_domains = new_size - pcounter->num_domains;
2084
2085
memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains));
2086
2087
pcounter->num_domains = new_size;
2088
pcounter->domains = new_mem;
2089
}
2090
2091
void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size)
2092
{
2093
/*
2094
* Allocate more memory ahead of time.
2095
*
2096
* Always allocate space for at least 8 elements
2097
* and double the size when growing.
2098
*/
2099
if (new_size < 8)
2100
new_size = 8;
2101
new_size = MAX(new_size, pcounter->num_domains * 2);
2102
2103
pmt_counter_resize_(pcounter, new_size);
2104
}
2105
2106
struct llc_stats {
2107
unsigned long long references;
2108
unsigned long long misses;
2109
};
2110
struct l2_stats {
2111
unsigned long long references;
2112
unsigned long long hits;
2113
};
2114
struct thread_data {
2115
struct timeval tv_begin;
2116
struct timeval tv_end;
2117
struct timeval tv_delta;
2118
unsigned long long tsc;
2119
unsigned long long aperf;
2120
unsigned long long mperf;
2121
unsigned long long c1;
2122
unsigned long long instr_count;
2123
unsigned long long irq_count;
2124
unsigned long long nmi_count;
2125
unsigned int smi_count;
2126
struct llc_stats llc;
2127
struct l2_stats l2;
2128
unsigned int cpu_id;
2129
unsigned int apic_id;
2130
unsigned int x2apic_id;
2131
unsigned int flags;
2132
bool is_atom;
2133
unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
2134
unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS];
2135
unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS];
2136
};
2137
2138
struct core_data {
2139
int first_cpu;
2140
unsigned long long c3;
2141
unsigned long long c6;
2142
unsigned long long c7;
2143
unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
2144
unsigned int core_temp_c;
2145
struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */
2146
unsigned long long core_throt_cnt;
2147
unsigned long long counter[MAX_ADDED_CORE_COUNTERS];
2148
unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS];
2149
unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS];
2150
};
2151
2152
struct pkg_data {
2153
int first_cpu;
2154
unsigned long long pc2;
2155
unsigned long long pc3;
2156
unsigned long long pc6;
2157
unsigned long long pc7;
2158
unsigned long long pc8;
2159
unsigned long long pc9;
2160
unsigned long long pc10;
2161
long long cpu_lpi;
2162
long long sys_lpi;
2163
unsigned long long pkg_wtd_core_c0;
2164
unsigned long long pkg_any_core_c0;
2165
unsigned long long pkg_any_gfxe_c0;
2166
unsigned long long pkg_both_core_gfxe_c0;
2167
long long gfx_rc6_ms;
2168
unsigned int gfx_mhz;
2169
unsigned int gfx_act_mhz;
2170
long long sam_mc6_ms;
2171
unsigned int sam_mhz;
2172
unsigned int sam_act_mhz;
2173
struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
2174
struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */
2175
struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */
2176
struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
2177
struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
2178
struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
2179
unsigned int pkg_temp_c;
2180
unsigned int uncore_mhz;
2181
unsigned long long die_c6;
2182
unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS];
2183
unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS];
2184
unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS];
2185
};
2186
2187
#define ODD_COUNTERS odd.threads, odd.cores, odd.packages
2188
#define EVEN_COUNTERS even.threads, even.cores, even.packages
2189
2190
/*
2191
* The accumulated sum of MSR is defined as a monotonic
2192
* increasing MSR, it will be accumulated periodically,
2193
* despite its register's bit width.
2194
*/
2195
enum {
2196
IDX_PKG_ENERGY,
2197
IDX_DRAM_ENERGY,
2198
IDX_PP0_ENERGY,
2199
IDX_PP1_ENERGY,
2200
IDX_PKG_PERF,
2201
IDX_DRAM_PERF,
2202
IDX_PSYS_ENERGY,
2203
IDX_COUNT,
2204
};
2205
2206
int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
2207
2208
struct msr_sum_array {
2209
/* get_msr_sum() = sum + (get_msr() - last) */
2210
struct {
2211
/*The accumulated MSR value is updated by the timer */
2212
unsigned long long sum;
2213
/*The MSR footprint recorded in last timer */
2214
unsigned long long last;
2215
} entries[IDX_COUNT];
2216
};
2217
2218
/* The percpu MSR sum array.*/
2219
struct msr_sum_array *per_cpu_msr_sum;
2220
2221
off_t idx_to_offset(int idx)
2222
{
2223
off_t offset;
2224
2225
switch (idx) {
2226
case IDX_PKG_ENERGY:
2227
if (platform->plat_rapl_msrs & RAPL_AMD_F17H)
2228
offset = MSR_PKG_ENERGY_STAT;
2229
else
2230
offset = MSR_PKG_ENERGY_STATUS;
2231
break;
2232
case IDX_DRAM_ENERGY:
2233
offset = MSR_DRAM_ENERGY_STATUS;
2234
break;
2235
case IDX_PP0_ENERGY:
2236
offset = MSR_PP0_ENERGY_STATUS;
2237
break;
2238
case IDX_PP1_ENERGY:
2239
offset = MSR_PP1_ENERGY_STATUS;
2240
break;
2241
case IDX_PKG_PERF:
2242
offset = MSR_PKG_PERF_STATUS;
2243
break;
2244
case IDX_DRAM_PERF:
2245
offset = MSR_DRAM_PERF_STATUS;
2246
break;
2247
case IDX_PSYS_ENERGY:
2248
offset = MSR_PLATFORM_ENERGY_STATUS;
2249
break;
2250
default:
2251
offset = -1;
2252
}
2253
return offset;
2254
}
2255
2256
int offset_to_idx(off_t offset)
2257
{
2258
int idx;
2259
2260
switch (offset) {
2261
case MSR_PKG_ENERGY_STATUS:
2262
case MSR_PKG_ENERGY_STAT:
2263
idx = IDX_PKG_ENERGY;
2264
break;
2265
case MSR_DRAM_ENERGY_STATUS:
2266
idx = IDX_DRAM_ENERGY;
2267
break;
2268
case MSR_PP0_ENERGY_STATUS:
2269
idx = IDX_PP0_ENERGY;
2270
break;
2271
case MSR_PP1_ENERGY_STATUS:
2272
idx = IDX_PP1_ENERGY;
2273
break;
2274
case MSR_PKG_PERF_STATUS:
2275
idx = IDX_PKG_PERF;
2276
break;
2277
case MSR_DRAM_PERF_STATUS:
2278
idx = IDX_DRAM_PERF;
2279
break;
2280
case MSR_PLATFORM_ENERGY_STATUS:
2281
idx = IDX_PSYS_ENERGY;
2282
break;
2283
default:
2284
idx = -1;
2285
}
2286
return idx;
2287
}
2288
2289
int idx_valid(int idx)
2290
{
2291
switch (idx) {
2292
case IDX_PKG_ENERGY:
2293
return valid_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
2294
case IDX_DRAM_ENERGY:
2295
return valid_rapl_msrs & RAPL_DRAM;
2296
case IDX_PP0_ENERGY:
2297
return valid_rapl_msrs & RAPL_CORE_ENERGY_STATUS;
2298
case IDX_PP1_ENERGY:
2299
return valid_rapl_msrs & RAPL_GFX;
2300
case IDX_PKG_PERF:
2301
return valid_rapl_msrs & RAPL_PKG_PERF_STATUS;
2302
case IDX_DRAM_PERF:
2303
return valid_rapl_msrs & RAPL_DRAM_PERF_STATUS;
2304
case IDX_PSYS_ENERGY:
2305
return valid_rapl_msrs & RAPL_PSYS;
2306
default:
2307
return 0;
2308
}
2309
}
2310
2311
struct sys_counters {
2312
/* MSR added counters */
2313
unsigned int added_thread_counters;
2314
unsigned int added_core_counters;
2315
unsigned int added_package_counters;
2316
struct msr_counter *tp;
2317
struct msr_counter *cp;
2318
struct msr_counter *pp;
2319
2320
/* perf added counters */
2321
unsigned int added_thread_perf_counters;
2322
unsigned int added_core_perf_counters;
2323
unsigned int added_package_perf_counters;
2324
struct perf_counter_info *perf_tp;
2325
struct perf_counter_info *perf_cp;
2326
struct perf_counter_info *perf_pp;
2327
2328
struct pmt_counter *pmt_tp;
2329
struct pmt_counter *pmt_cp;
2330
struct pmt_counter *pmt_pp;
2331
} sys;
2332
2333
static size_t free_msr_counters_(struct msr_counter **pp)
2334
{
2335
struct msr_counter *p = NULL;
2336
size_t num_freed = 0;
2337
2338
while (*pp) {
2339
p = *pp;
2340
2341
if (p->msr_num != 0) {
2342
*pp = p->next;
2343
2344
free(p);
2345
++num_freed;
2346
2347
continue;
2348
}
2349
2350
pp = &p->next;
2351
}
2352
2353
return num_freed;
2354
}
2355
2356
/*
2357
* Free all added counters accessed via msr.
2358
*/
2359
static void free_sys_msr_counters(void)
2360
{
2361
/* Thread counters */
2362
sys.added_thread_counters -= free_msr_counters_(&sys.tp);
2363
2364
/* Core counters */
2365
sys.added_core_counters -= free_msr_counters_(&sys.cp);
2366
2367
/* Package counters */
2368
sys.added_package_counters -= free_msr_counters_(&sys.pp);
2369
}
2370
2371
struct counters {
2372
struct thread_data *threads;
2373
struct core_data *cores;
2374
struct pkg_data *packages;
2375
} average, even, odd;
2376
2377
struct platform_counters {
2378
struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */
2379
} platform_counters_odd, platform_counters_even;
2380
2381
#define MAX_HT_ID 3 /* support SMT-4 */
2382
2383
struct cpu_topology {
2384
int cpu_id;
2385
int core_id; /* unique within a package */
2386
int package_id;
2387
int die_id;
2388
int l3_id;
2389
int physical_node_id;
2390
int logical_node_id; /* 0-based count within the package */
2391
int ht_id; /* unique within a core */
2392
int ht_sibling_cpu_id[MAX_HT_ID + 1];
2393
int type;
2394
cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
2395
} *cpus;
2396
2397
struct topo_params {
2398
int num_packages;
2399
int num_die;
2400
int num_cpus;
2401
int num_cores; /* system wide */
2402
int allowed_packages;
2403
int allowed_cpus;
2404
int allowed_cores;
2405
int max_cpu_num;
2406
int max_core_id; /* within a package */
2407
int max_package_id;
2408
int max_die_id;
2409
int max_l3_id;
2410
int max_node_num;
2411
int nodes_per_pkg;
2412
int cores_per_node;
2413
int threads_per_core;
2414
} topo;
2415
2416
struct timeval tv_even, tv_odd, tv_delta;
2417
2418
int *irq_column_2_cpu; /* /proc/interrupts column numbers */
2419
int *irqs_per_cpu; /* indexed by cpu_num */
2420
int *nmi_per_cpu; /* indexed by cpu_num */
2421
2422
void setup_all_buffers(bool startup);
2423
2424
char *sys_lpi_file;
2425
char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
2426
char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
2427
2428
int cpu_is_not_present(int cpu)
2429
{
2430
return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
2431
}
2432
2433
int cpu_is_not_allowed(int cpu)
2434
{
2435
return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
2436
}
2437
2438
#define GLOBAL_CORE_ID(core_id, pkg_id) (core_id + pkg_id * (topo.max_core_id + 1))
2439
/*
2440
* run func(thread, core, package) in topology order
2441
* skip non-present cpus
2442
*/
2443
2444
#define PER_THREAD_PARAMS struct thread_data *t, struct core_data *c, struct pkg_data *p
2445
2446
int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
2447
struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
2448
{
2449
int cpu, retval;
2450
2451
retval = 0;
2452
2453
for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2454
struct thread_data *t;
2455
struct core_data *c;
2456
struct pkg_data *p;
2457
2458
int pkg_id = cpus[cpu].package_id;
2459
2460
if (cpu_is_not_allowed(cpu))
2461
continue;
2462
2463
if (cpus[cpu].ht_id > 0) /* skip HT sibling */
2464
continue;
2465
2466
t = &thread_base[cpu];
2467
c = &core_base[GLOBAL_CORE_ID(cpus[cpu].core_id, pkg_id)];
2468
p = &pkg_base[pkg_id];
2469
2470
retval |= func(t, c, p);
2471
2472
/* Handle HT sibling now */
2473
int i;
2474
2475
for (i = MAX_HT_ID; i > 0; --i) { /* ht_id 0 is self */
2476
if (cpus[cpu].ht_sibling_cpu_id[i] <= 0)
2477
continue;
2478
t = &thread_base[cpus[cpu].ht_sibling_cpu_id[i]];
2479
2480
retval |= func(t, c, p);
2481
}
2482
}
2483
return retval;
2484
}
2485
2486
int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c)
2487
{
2488
return ((int)t->cpu_id == c->first_cpu || c->first_cpu < 0);
2489
}
2490
2491
int is_cpu_first_core_in_package(struct thread_data *t, struct pkg_data *p)
2492
{
2493
return ((int)t->cpu_id == p->first_cpu || p->first_cpu < 0);
2494
}
2495
2496
int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2497
{
2498
return is_cpu_first_thread_in_core(t, c) && is_cpu_first_core_in_package(t, p);
2499
}
2500
2501
int cpu_migrate(int cpu)
2502
{
2503
CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
2504
CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
2505
if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
2506
return -1;
2507
else
2508
return 0;
2509
}
2510
2511
int get_msr_fd(int cpu)
2512
{
2513
char pathname[32];
2514
int fd;
2515
2516
fd = fd_percpu[cpu];
2517
2518
if (fd)
2519
return fd;
2520
sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu);
2521
fd = open(pathname, O_RDONLY);
2522
if (fd < 0)
2523
err(-1, "%s open failed, try chown or chmod +r %s, "
2524
"or run with --no-msr, or run as root", pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr");
2525
fd_percpu[cpu] = fd;
2526
2527
return fd;
2528
}
2529
2530
static void bic_disable_msr_access(void)
2531
{
2532
CLR_BIC(BIC_Mod_c6, &bic_enabled);
2533
CLR_BIC(BIC_CoreTmp, &bic_enabled);
2534
CLR_BIC(BIC_Totl_c0, &bic_enabled);
2535
CLR_BIC(BIC_Any_c0, &bic_enabled);
2536
CLR_BIC(BIC_GFX_c0, &bic_enabled);
2537
CLR_BIC(BIC_CPUGFX, &bic_enabled);
2538
CLR_BIC(BIC_PkgTmp, &bic_enabled);
2539
2540
free_sys_msr_counters();
2541
}
2542
2543
static void bic_disable_perf_access(void)
2544
{
2545
CLR_BIC(BIC_IPC, &bic_enabled);
2546
CLR_BIC(BIC_LLC_MRPS, &bic_enabled);
2547
CLR_BIC(BIC_LLC_HIT, &bic_enabled);
2548
CLR_BIC(BIC_L2_MRPS, &bic_enabled);
2549
CLR_BIC(BIC_L2_HIT, &bic_enabled);
2550
}
2551
2552
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
2553
{
2554
assert(!no_perf);
2555
2556
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
2557
}
2558
2559
static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
2560
{
2561
struct perf_event_attr attr;
2562
const pid_t pid = -1;
2563
const unsigned long flags = 0;
2564
2565
assert(!no_perf);
2566
2567
memset(&attr, 0, sizeof(struct perf_event_attr));
2568
2569
attr.type = type;
2570
attr.size = sizeof(struct perf_event_attr);
2571
attr.config = config;
2572
attr.disabled = 0;
2573
attr.sample_type = PERF_SAMPLE_IDENTIFIER;
2574
attr.read_format = read_format;
2575
2576
const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
2577
2578
return fd;
2579
}
2580
2581
int get_instr_count_fd(int cpu)
2582
{
2583
if (fd_instr_count_percpu[cpu])
2584
return fd_instr_count_percpu[cpu];
2585
2586
fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
2587
2588
return fd_instr_count_percpu[cpu];
2589
}
2590
2591
int get_msr(int cpu, off_t offset, unsigned long long *msr)
2592
{
2593
ssize_t retval;
2594
2595
assert(!no_msr);
2596
2597
retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
2598
2599
if (retval != sizeof *msr)
2600
err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
2601
2602
return 0;
2603
}
2604
2605
int add_msr_counter(int cpu, off_t offset)
2606
{
2607
ssize_t retval;
2608
unsigned long long value;
2609
2610
if (no_msr)
2611
return -1;
2612
2613
if (!offset)
2614
return -1;
2615
2616
retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
2617
2618
/* if the read failed, the probe fails */
2619
if (retval != sizeof(value))
2620
return -1;
2621
2622
if (value == 0)
2623
return 0;
2624
2625
return 1;
2626
}
2627
2628
int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai)
2629
{
2630
int ret;
2631
2632
if (!(valid_rapl_msrs & cai->feature_mask))
2633
return -1;
2634
2635
ret = add_msr_counter(cpu, cai->msr);
2636
if (ret < 0)
2637
return -1;
2638
2639
switch (cai->rci_index) {
2640
case RAPL_RCI_INDEX_ENERGY_PKG:
2641
case RAPL_RCI_INDEX_ENERGY_CORES:
2642
case RAPL_RCI_INDEX_DRAM:
2643
case RAPL_RCI_INDEX_GFX:
2644
case RAPL_RCI_INDEX_ENERGY_PLATFORM:
2645
if (ret == 0)
2646
return 1;
2647
}
2648
2649
/* PKG,DRAM_PERF_STATUS MSRs, can return any value */
2650
return 1;
2651
}
2652
2653
/* Convert CPU ID to domain ID for given added perf counter. */
2654
unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu)
2655
{
2656
switch (pc->scope) {
2657
case SCOPE_CPU:
2658
return cpu;
2659
2660
case SCOPE_CORE:
2661
return cpus[cpu].core_id;
2662
2663
case SCOPE_PACKAGE:
2664
return cpus[cpu].package_id;
2665
}
2666
2667
__builtin_unreachable();
2668
}
2669
2670
#define MAX_DEFERRED 16
2671
char *deferred_add_names[MAX_DEFERRED];
2672
char *deferred_skip_names[MAX_DEFERRED];
2673
int deferred_add_index;
2674
int deferred_skip_index;
2675
unsigned int deferred_add_consumed;
2676
unsigned int deferred_skip_consumed;
2677
2678
/*
2679
* HIDE_LIST - hide this list of counters, show the rest [default]
2680
* SHOW_LIST - show this list of counters, hide the rest
2681
*/
2682
enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
2683
2684
void help(void)
2685
{
2686
fprintf(outf,
2687
"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
2688
"\n"
2689
"Turbostat forks the specified COMMAND and prints statistics\n"
2690
"when COMMAND completes.\n"
2691
"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
2692
"to print statistics, until interrupted.\n"
2693
" -a, --add counter\n"
2694
" add a counter\n"
2695
" eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
2696
" eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n"
2697
" eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n"
2698
" -c, --cpu cpu-set\n"
2699
" limit output to summary plus cpu-set:\n"
2700
" {core | package | j,k,l..m,n-p }\n"
2701
" -d, --debug\n"
2702
" displays usec, Time_Of_Day_Seconds and more debugging\n"
2703
" debug messages are printed to stderr\n"
2704
" -D, --Dump\n"
2705
" displays the raw counter values\n"
2706
" -e, --enable [all | column]\n"
2707
" shows all or the specified disabled column\n"
2708
" -f, --force\n"
2709
" force load turbostat with minimum default features on unsupported platforms.\n"
2710
" -H, --hide [column | column,column,...]\n"
2711
" hide the specified column(s)\n"
2712
" -i, --interval sec.subsec\n"
2713
" override default 5-second measurement interval\n"
2714
" -J, --Joules\n"
2715
" displays energy in Joules instead of Watts\n"
2716
" -l, --list\n"
2717
" list column headers only\n"
2718
" -M, --no-msr\n"
2719
" disable all uses of the MSR driver\n"
2720
" -P, --no-perf\n"
2721
" disable all uses of the perf API\n"
2722
" -n, --num_iterations num\n"
2723
" number of the measurement iterations\n"
2724
" -N, --header_iterations num\n"
2725
" print header every num iterations\n"
2726
" -o, --out file\n"
2727
" create or truncate \"file\" for all output\n"
2728
" -q, --quiet\n"
2729
" skip decoding system configuration header\n"
2730
" -s, --show [column | column,column,...]\n"
2731
" show only the specified column(s)\n"
2732
" -S, --Summary\n"
2733
" limits output to 1-line system summary per interval\n"
2734
" -T, --TCC temperature\n"
2735
" sets the Thermal Control Circuit temperature in\n"
2736
" degrees Celsius\n"
2737
" -h, --help\n"
2738
" print this help message\n -v, --version\n\t\tprint version information\n\nFor more help, run \"man turbostat\"\n");
2739
}
2740
2741
/*
2742
* bic_lookup
2743
* for all the strings in comma separate name_list,
2744
* set the approprate bit in return value.
2745
*/
2746
void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode)
2747
{
2748
unsigned int i;
2749
2750
while (name_list) {
2751
char *comma;
2752
2753
comma = strchr(name_list, ',');
2754
2755
if (comma)
2756
*comma = '\0';
2757
2758
for (i = 0; i < MAX_BIC; ++i) {
2759
if (!strcmp(name_list, bic[i].name)) {
2760
SET_BIC(i, ret_set);
2761
break;
2762
}
2763
if (!strcmp(name_list, "all")) {
2764
bic_set_all(ret_set);
2765
break;
2766
} else if (!strcmp(name_list, "topology")) {
2767
CPU_OR(ret_set, ret_set, &bic_group_topology);
2768
break;
2769
} else if (!strcmp(name_list, "power")) {
2770
CPU_OR(ret_set, ret_set, &bic_group_thermal_pwr);
2771
break;
2772
} else if (!strcmp(name_list, "idle")) {
2773
CPU_OR(ret_set, ret_set, &bic_group_idle);
2774
break;
2775
} else if (!strcmp(name_list, "cache")) {
2776
CPU_OR(ret_set, ret_set, &bic_group_cache);
2777
break;
2778
} else if (!strcmp(name_list, "llc")) {
2779
CPU_OR(ret_set, ret_set, &bic_group_cache);
2780
break;
2781
} else if (!strcmp(name_list, "swidle")) {
2782
CPU_OR(ret_set, ret_set, &bic_group_sw_idle);
2783
break;
2784
} else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */
2785
CPU_OR(ret_set, ret_set, &bic_group_sw_idle);
2786
break;
2787
} else if (!strcmp(name_list, "hwidle")) {
2788
CPU_OR(ret_set, ret_set, &bic_group_hw_idle);
2789
break;
2790
} else if (!strcmp(name_list, "frequency")) {
2791
CPU_OR(ret_set, ret_set, &bic_group_frequency);
2792
break;
2793
} else if (!strcmp(name_list, "other")) {
2794
CPU_OR(ret_set, ret_set, &bic_group_other);
2795
break;
2796
}
2797
}
2798
if (i == MAX_BIC) {
2799
if (mode == SHOW_LIST) {
2800
deferred_add_names[deferred_add_index++] = name_list;
2801
if (deferred_add_index >= MAX_DEFERRED) {
2802
fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", MAX_DEFERRED, name_list);
2803
help();
2804
exit(1);
2805
}
2806
} else {
2807
deferred_skip_names[deferred_skip_index++] = name_list;
2808
if (debug)
2809
fprintf(stderr, "deferred \"%s\"\n", name_list);
2810
if (deferred_skip_index >= MAX_DEFERRED) {
2811
fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", MAX_DEFERRED, name_list);
2812
help();
2813
exit(1);
2814
}
2815
}
2816
}
2817
2818
name_list = comma;
2819
if (name_list)
2820
name_list++;
2821
2822
}
2823
}
2824
2825
/*
2826
* print_name()
2827
* Print column header name for raw 64-bit counter in 16 columns (at least 8-char plus a tab)
2828
* Otherwise, allow the name + tab to fit within 8-coumn tab-stop.
2829
* In both cases, left justififed, just like other turbostat columns,
2830
* to allow the column values to consume the tab.
2831
*
2832
* Yes, 32-bit counters can overflow 8-columns, and
2833
* 64-bit counters can overflow 16-columns, but that is uncommon.
2834
*/
2835
static inline int print_name(int width, int *printed, char *delim, char *name, enum counter_type type, enum counter_format format)
2836
{
2837
UNUSED(type);
2838
2839
if (format == FORMAT_RAW && width >= 64)
2840
return (sprintf(outp, "%s%-8s", (*printed++ ? delim : ""), name));
2841
else
2842
return (sprintf(outp, "%s%s", (*printed++ ? delim : ""), name));
2843
}
2844
2845
static inline int print_hex_value(int width, int *printed, char *delim, unsigned long long value)
2846
{
2847
if (width <= 32)
2848
return (sprintf(outp, "%s%08x", (*printed++ ? delim : ""), (unsigned int)value));
2849
else
2850
return (sprintf(outp, "%s%016llx", (*printed++ ? delim : ""), value));
2851
}
2852
2853
static inline int print_decimal_value(int width, int *printed, char *delim, unsigned long long value)
2854
{
2855
if (width <= 32)
2856
return (sprintf(outp, "%s%d", (*printed++ ? delim : ""), (unsigned int)value));
2857
else
2858
return (sprintf(outp, "%s%-8lld", (*printed++ ? delim : ""), value));
2859
}
2860
2861
static inline int print_float_value(int *printed, char *delim, double value)
2862
{
2863
return (sprintf(outp, "%s%0.2f", (*printed++ ? delim : ""), value));
2864
}
2865
2866
void print_header(char *delim)
2867
{
2868
struct msr_counter *mp;
2869
struct perf_counter_info *pp;
2870
struct pmt_counter *ppmt;
2871
int printed = 0;
2872
2873
if (DO_BIC(BIC_USEC))
2874
outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
2875
if (DO_BIC(BIC_TOD))
2876
outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
2877
if (DO_BIC(BIC_Package))
2878
outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
2879
if (DO_BIC(BIC_Die))
2880
outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
2881
if (DO_BIC(BIC_L3))
2882
outp += sprintf(outp, "%sL3", (printed++ ? delim : ""));
2883
if (DO_BIC(BIC_Node))
2884
outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
2885
if (DO_BIC(BIC_Core))
2886
outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
2887
if (DO_BIC(BIC_CPU))
2888
outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
2889
if (DO_BIC(BIC_APIC))
2890
outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
2891
if (DO_BIC(BIC_X2APIC))
2892
outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
2893
if (DO_BIC(BIC_Avg_MHz))
2894
outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
2895
if (DO_BIC(BIC_Busy))
2896
outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
2897
if (DO_BIC(BIC_Bzy_MHz))
2898
outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
2899
if (DO_BIC(BIC_TSC_MHz))
2900
outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
2901
2902
if (DO_BIC(BIC_IPC))
2903
outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
2904
2905
if (DO_BIC(BIC_IRQ)) {
2906
if (sums_need_wide_columns)
2907
outp += sprintf(outp, "%s IRQ", (printed++ ? delim : ""));
2908
else
2909
outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
2910
}
2911
if (DO_BIC(BIC_NMI)) {
2912
if (sums_need_wide_columns)
2913
outp += sprintf(outp, "%s NMI", (printed++ ? delim : ""));
2914
else
2915
outp += sprintf(outp, "%sNMI", (printed++ ? delim : ""));
2916
}
2917
2918
if (DO_BIC(BIC_SMI))
2919
outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
2920
2921
if (DO_BIC(BIC_LLC_MRPS))
2922
outp += sprintf(outp, "%sLLCMRPS", (printed++ ? delim : ""));
2923
2924
if (DO_BIC(BIC_LLC_HIT))
2925
outp += sprintf(outp, "%sLLC%%hit", (printed++ ? delim : ""));
2926
2927
if (DO_BIC(BIC_L2_MRPS))
2928
outp += sprintf(outp, "%sL2MRPS", (printed++ ? delim : ""));
2929
2930
if (DO_BIC(BIC_L2_HIT))
2931
outp += sprintf(outp, "%sL2%%hit", (printed++ ? delim : ""));
2932
2933
for (mp = sys.tp; mp; mp = mp->next)
2934
outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format);
2935
2936
for (pp = sys.perf_tp; pp; pp = pp->next)
2937
outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format);
2938
2939
ppmt = sys.pmt_tp;
2940
while (ppmt) {
2941
switch (ppmt->type) {
2942
case PMT_TYPE_RAW:
2943
outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
2944
break;
2945
2946
case PMT_TYPE_XTAL_TIME:
2947
case PMT_TYPE_TCORE_CLOCK:
2948
outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
2949
break;
2950
}
2951
2952
ppmt = ppmt->next;
2953
}
2954
2955
if (DO_BIC(BIC_CPU_c1))
2956
outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
2957
if (DO_BIC(BIC_CPU_c3))
2958
outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
2959
if (DO_BIC(BIC_CPU_c6))
2960
outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
2961
if (DO_BIC(BIC_CPU_c7))
2962
outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
2963
2964
if (DO_BIC(BIC_Mod_c6))
2965
outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
2966
2967
if (DO_BIC(BIC_CoreTmp))
2968
outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
2969
2970
if (DO_BIC(BIC_CORE_THROT_CNT))
2971
outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
2972
2973
if (valid_rapl_msrs && !rapl_joules) {
2974
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
2975
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
2976
} else if (valid_rapl_msrs && rapl_joules) {
2977
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
2978
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
2979
}
2980
2981
for (mp = sys.cp; mp; mp = mp->next)
2982
outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format);
2983
2984
for (pp = sys.perf_cp; pp; pp = pp->next)
2985
outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format);
2986
2987
ppmt = sys.pmt_cp;
2988
while (ppmt) {
2989
switch (ppmt->type) {
2990
case PMT_TYPE_RAW:
2991
outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
2992
2993
break;
2994
2995
case PMT_TYPE_XTAL_TIME:
2996
case PMT_TYPE_TCORE_CLOCK:
2997
outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
2998
break;
2999
}
3000
3001
ppmt = ppmt->next;
3002
}
3003
if (DO_BIC(BIC_PkgTmp))
3004
outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
3005
3006
if (DO_BIC(BIC_GFX_rc6))
3007
outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
3008
3009
if (DO_BIC(BIC_GFXMHz))
3010
outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
3011
3012
if (DO_BIC(BIC_GFXACTMHz))
3013
outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
3014
3015
if (DO_BIC(BIC_SAM_mc6))
3016
outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
3017
3018
if (DO_BIC(BIC_SAMMHz))
3019
outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
3020
3021
if (DO_BIC(BIC_SAMACTMHz))
3022
outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
3023
3024
if (DO_BIC(BIC_Totl_c0))
3025
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
3026
if (DO_BIC(BIC_Any_c0))
3027
outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
3028
if (DO_BIC(BIC_GFX_c0))
3029
outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
3030
if (DO_BIC(BIC_CPUGFX))
3031
outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
3032
3033
if (DO_BIC(BIC_Pkgpc2))
3034
outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
3035
if (DO_BIC(BIC_Pkgpc3))
3036
outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
3037
if (DO_BIC(BIC_Pkgpc6))
3038
outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
3039
if (DO_BIC(BIC_Pkgpc7))
3040
outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
3041
if (DO_BIC(BIC_Pkgpc8))
3042
outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
3043
if (DO_BIC(BIC_Pkgpc9))
3044
outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
3045
if (DO_BIC(BIC_Pkgpc10))
3046
outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
3047
if (DO_BIC(BIC_Diec6))
3048
outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : ""));
3049
if (DO_BIC(BIC_CPU_LPI))
3050
outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
3051
if (DO_BIC(BIC_SYS_LPI))
3052
outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
3053
3054
if (!rapl_joules) {
3055
if (DO_BIC(BIC_PkgWatt))
3056
outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
3057
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
3058
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
3059
if (DO_BIC(BIC_GFXWatt))
3060
outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
3061
if (DO_BIC(BIC_RAMWatt))
3062
outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
3063
if (DO_BIC(BIC_PKG__))
3064
outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
3065
if (DO_BIC(BIC_RAM__))
3066
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
3067
} else {
3068
if (DO_BIC(BIC_Pkg_J))
3069
outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
3070
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
3071
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
3072
if (DO_BIC(BIC_GFX_J))
3073
outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
3074
if (DO_BIC(BIC_RAM_J))
3075
outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
3076
if (DO_BIC(BIC_PKG__))
3077
outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
3078
if (DO_BIC(BIC_RAM__))
3079
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
3080
}
3081
if (DO_BIC(BIC_UNCORE_MHZ))
3082
outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
3083
3084
for (mp = sys.pp; mp; mp = mp->next)
3085
outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format);
3086
3087
for (pp = sys.perf_pp; pp; pp = pp->next)
3088
outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format);
3089
3090
ppmt = sys.pmt_pp;
3091
while (ppmt) {
3092
switch (ppmt->type) {
3093
case PMT_TYPE_RAW:
3094
outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
3095
break;
3096
3097
case PMT_TYPE_XTAL_TIME:
3098
case PMT_TYPE_TCORE_CLOCK:
3099
outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
3100
break;
3101
}
3102
3103
ppmt = ppmt->next;
3104
}
3105
3106
if (DO_BIC(BIC_SysWatt))
3107
outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : ""));
3108
if (DO_BIC(BIC_Sys_J))
3109
outp += sprintf(outp, "%sSys_J", (printed++ ? delim : ""));
3110
3111
outp += sprintf(outp, "\n");
3112
}
3113
3114
/*
3115
* pct(numerator, denominator)
3116
*
3117
* Return sanity checked percentage (100.0 * numerator/denominotor)
3118
*
3119
* n < 0: nan
3120
* d <= 0: nan
3121
* n/d > 1.1: nan
3122
*/
3123
double pct(double numerator, double denominator)
3124
{
3125
double retval;
3126
3127
if (numerator < 0)
3128
return nan("");
3129
3130
if (denominator <= 0)
3131
return nan("");
3132
3133
retval = 100.0 * numerator / denominator;
3134
3135
if (retval > 110.0)
3136
return nan("");
3137
3138
return retval;
3139
}
3140
3141
int dump_counters(PER_THREAD_PARAMS)
3142
{
3143
int i;
3144
struct msr_counter *mp;
3145
struct platform_counters *pplat_cnt = p == odd.packages ? &platform_counters_odd : &platform_counters_even;
3146
3147
outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
3148
3149
if (t) {
3150
outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
3151
outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
3152
outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
3153
outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
3154
outp += sprintf(outp, "c1: %016llX\n", t->c1);
3155
3156
if (DO_BIC(BIC_IPC))
3157
outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
3158
3159
if (DO_BIC(BIC_IRQ))
3160
outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
3161
if (DO_BIC(BIC_NMI))
3162
outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count);
3163
if (DO_BIC(BIC_SMI))
3164
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
3165
3166
outp += sprintf(outp, "LLC refs: %lld", t->llc.references);
3167
outp += sprintf(outp, "LLC miss: %lld", t->llc.misses);
3168
outp += sprintf(outp, "LLC Hit%%: %.2f", pct((t->llc.references - t->llc.misses), t->llc.references));
3169
3170
outp += sprintf(outp, "L2 refs: %lld", t->l2.references);
3171
outp += sprintf(outp, "L2 hits: %lld", t->l2.hits);
3172
outp += sprintf(outp, "L2 Hit%%: %.2f", pct(t->l2.hits, t->l2.references));
3173
3174
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
3175
outp += sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, t->counter[i], mp->sp->path);
3176
}
3177
}
3178
3179
if (c && is_cpu_first_thread_in_core(t, c)) {
3180
outp += sprintf(outp, "core: %d\n", cpus[t->cpu_id].core_id);
3181
outp += sprintf(outp, "c3: %016llX\n", c->c3);
3182
outp += sprintf(outp, "c6: %016llX\n", c->c6);
3183
outp += sprintf(outp, "c7: %016llX\n", c->c7);
3184
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
3185
outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
3186
3187
const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
3188
const double energy_scale = c->core_energy.scale;
3189
3190
if (c->core_energy.unit == RAPL_UNIT_JOULES)
3191
outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
3192
3193
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3194
outp += sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, c->counter[i], mp->sp->path);
3195
}
3196
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
3197
}
3198
3199
if (p && is_cpu_first_core_in_package(t, p)) {
3200
outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
3201
outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
3202
outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
3203
outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
3204
3205
outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
3206
if (DO_BIC(BIC_Pkgpc3))
3207
outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
3208
if (DO_BIC(BIC_Pkgpc6))
3209
outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
3210
if (DO_BIC(BIC_Pkgpc7))
3211
outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
3212
outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
3213
outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
3214
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
3215
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
3216
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
3217
outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
3218
outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
3219
outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
3220
outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
3221
outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value);
3222
outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
3223
outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
3224
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
3225
3226
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3227
outp += sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, p->counter[i], mp->sp->path);
3228
}
3229
}
3230
3231
outp += sprintf(outp, "\n");
3232
3233
return 0;
3234
}
3235
3236
double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
3237
{
3238
assert(desired_unit != RAPL_UNIT_INVALID);
3239
3240
/*
3241
* For now we don't expect anything other than joules,
3242
* so just simplify the logic.
3243
*/
3244
assert(c->unit == RAPL_UNIT_JOULES);
3245
3246
const double scaled = c->raw_value * c->scale;
3247
3248
if (desired_unit == RAPL_UNIT_WATTS)
3249
return scaled / interval;
3250
return scaled;
3251
}
3252
3253
void get_perf_llc_stats(int cpu, struct llc_stats *llc)
3254
{
3255
struct read_format {
3256
unsigned long long num_read;
3257
struct llc_stats llc;
3258
} r;
3259
const ssize_t expected_read_size = sizeof(r);
3260
ssize_t actual_read_size;
3261
3262
actual_read_size = read(fd_llc_percpu[cpu], &r, expected_read_size);
3263
3264
if (actual_read_size == -1)
3265
err(-1, "%s(cpu%d,) %d,,%ld", __func__, cpu, fd_llc_percpu[cpu], expected_read_size);
3266
3267
llc->references = r.llc.references;
3268
llc->misses = r.llc.misses;
3269
if (actual_read_size != expected_read_size)
3270
warn("%s: failed to read perf_data (req %zu act %zu)", __func__, expected_read_size, actual_read_size);
3271
}
3272
3273
void get_perf_l2_stats(int cpu, struct l2_stats *l2)
3274
{
3275
struct read_format {
3276
unsigned long long num_read;
3277
struct l2_stats l2;
3278
} r;
3279
const ssize_t expected_read_size = sizeof(r);
3280
ssize_t actual_read_size;
3281
3282
actual_read_size = read(fd_l2_percpu[cpu], &r, expected_read_size);
3283
3284
if (actual_read_size == -1)
3285
err(-1, "%s(cpu%d,) %d,,%ld", __func__, cpu, fd_l2_percpu[cpu], expected_read_size);
3286
3287
l2->references = r.l2.references;
3288
l2->hits = r.l2.hits;
3289
if (actual_read_size != expected_read_size)
3290
warn("%s: cpu%d: failed to read(%d) perf_data (req %zu act %zu)", __func__, cpu, fd_l2_percpu[cpu], expected_read_size, actual_read_size);
3291
}
3292
3293
/*
3294
* column formatting convention & formats
3295
*/
3296
int format_counters(PER_THREAD_PARAMS)
3297
{
3298
static int count;
3299
3300
struct platform_counters *pplat_cnt = NULL;
3301
double interval_float, tsc;
3302
char *fmt8 = "%s%.2f";
3303
3304
int i;
3305
struct msr_counter *mp;
3306
struct perf_counter_info *pp;
3307
struct pmt_counter *ppmt;
3308
char *delim = "\t";
3309
int printed = 0;
3310
3311
if (t == average.threads) {
3312
pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even;
3313
++count;
3314
}
3315
3316
/* if showing only 1st thread in core and this isn't one, bail out */
3317
if (show_core_only && !is_cpu_first_thread_in_core(t, c))
3318
return 0;
3319
3320
/* if showing only 1st thread in pkg and this isn't one, bail out */
3321
if (show_pkg_only && !is_cpu_first_core_in_package(t, p))
3322
return 0;
3323
3324
/*if not summary line and --cpu is used */
3325
if ((t != average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
3326
return 0;
3327
3328
if (DO_BIC(BIC_USEC)) {
3329
/* on each row, print how many usec each timestamp took to gather */
3330
struct timeval tv;
3331
3332
timersub(&t->tv_end, &t->tv_begin, &tv);
3333
outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
3334
}
3335
3336
/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
3337
if (DO_BIC(BIC_TOD))
3338
outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
3339
3340
interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
3341
3342
tsc = t->tsc * tsc_tweak;
3343
3344
/* topo columns, print blanks on 1st (average) line */
3345
if (t == average.threads) {
3346
if (DO_BIC(BIC_Package))
3347
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3348
if (DO_BIC(BIC_Die))
3349
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3350
if (DO_BIC(BIC_L3))
3351
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3352
if (DO_BIC(BIC_Node))
3353
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3354
if (DO_BIC(BIC_Core))
3355
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3356
if (DO_BIC(BIC_CPU))
3357
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3358
if (DO_BIC(BIC_APIC))
3359
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3360
if (DO_BIC(BIC_X2APIC))
3361
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3362
} else {
3363
if (DO_BIC(BIC_Package)) {
3364
if (p)
3365
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].package_id);
3366
else
3367
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3368
}
3369
if (DO_BIC(BIC_Die)) {
3370
if (c)
3371
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
3372
else
3373
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3374
}
3375
if (DO_BIC(BIC_L3)) {
3376
if (c)
3377
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].l3_id);
3378
else
3379
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3380
}
3381
if (DO_BIC(BIC_Node)) {
3382
if (t)
3383
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
3384
else
3385
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3386
}
3387
if (DO_BIC(BIC_Core)) {
3388
if (c)
3389
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].core_id);
3390
else
3391
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
3392
}
3393
if (DO_BIC(BIC_CPU))
3394
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
3395
if (DO_BIC(BIC_APIC))
3396
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
3397
if (DO_BIC(BIC_X2APIC))
3398
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
3399
}
3400
3401
if (DO_BIC(BIC_Avg_MHz))
3402
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
3403
3404
if (DO_BIC(BIC_Busy))
3405
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->mperf, tsc));
3406
3407
if (DO_BIC(BIC_Bzy_MHz)) {
3408
if (has_base_hz)
3409
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
3410
else
3411
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), tsc / units * t->aperf / t->mperf / interval_float);
3412
}
3413
3414
if (DO_BIC(BIC_TSC_MHz))
3415
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
3416
3417
if (DO_BIC(BIC_IPC))
3418
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
3419
3420
/* IRQ */
3421
if (DO_BIC(BIC_IRQ)) {
3422
if (sums_need_wide_columns)
3423
outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
3424
else
3425
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
3426
}
3427
3428
/* NMI */
3429
if (DO_BIC(BIC_NMI)) {
3430
if (sums_need_wide_columns)
3431
outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count);
3432
else
3433
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count);
3434
}
3435
3436
/* SMI */
3437
if (DO_BIC(BIC_SMI))
3438
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
3439
3440
/* LLC Stats */
3441
if (DO_BIC(BIC_LLC_MRPS))
3442
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), t->llc.references / interval_float / 1000000);
3443
3444
if (DO_BIC(BIC_LLC_HIT))
3445
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), pct((t->llc.references - t->llc.misses), t->llc.references));
3446
3447
/* L2 Stats */
3448
if (DO_BIC(BIC_L2_MRPS))
3449
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), t->l2.references / interval_float / 1000000);
3450
3451
if (DO_BIC(BIC_L2_HIT))
3452
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), pct(t->l2.hits, t->l2.references));
3453
3454
/* Added Thread Counters */
3455
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
3456
if (mp->format == FORMAT_RAW)
3457
outp += print_hex_value(mp->width, &printed, delim, t->counter[i]);
3458
else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
3459
outp += print_decimal_value(mp->width, &printed, delim, t->counter[i]);
3460
else if (mp->format == FORMAT_PERCENT) {
3461
if (mp->type == COUNTER_USEC)
3462
outp += print_float_value(&printed, delim, t->counter[i] / interval_float / 10000);
3463
else
3464
outp += print_float_value(&printed, delim, pct(t->counter[i], tsc));
3465
}
3466
}
3467
3468
/* Added perf Thread Counters */
3469
for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) {
3470
if (pp->format == FORMAT_RAW)
3471
outp += print_hex_value(pp->width, &printed, delim, t->perf_counter[i]);
3472
else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
3473
outp += print_decimal_value(pp->width, &printed, delim, t->perf_counter[i]);
3474
else if (pp->format == FORMAT_PERCENT) {
3475
if (pp->type == COUNTER_USEC)
3476
outp += print_float_value(&printed, delim, t->perf_counter[i] / interval_float / 10000);
3477
else
3478
outp += print_float_value(&printed, delim, pct(t->perf_counter[i], tsc));
3479
}
3480
}
3481
3482
/* Added PMT Thread Counters */
3483
for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
3484
const unsigned long value_raw = t->pmt_counter[i];
3485
double value_converted;
3486
switch (ppmt->type) {
3487
case PMT_TYPE_RAW:
3488
outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, t->pmt_counter[i]);
3489
break;
3490
3491
case PMT_TYPE_XTAL_TIME:
3492
value_converted = pct(value_raw / crystal_hz, interval_float);
3493
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3494
break;
3495
3496
case PMT_TYPE_TCORE_CLOCK:
3497
value_converted = pct(value_raw / tcore_clock_freq_hz, interval_float);
3498
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3499
}
3500
}
3501
3502
/* C1 */
3503
if (DO_BIC(BIC_CPU_c1))
3504
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->c1, tsc));
3505
3506
/* print per-core data only for 1st thread in core */
3507
if (!is_cpu_first_thread_in_core(t, c))
3508
goto done;
3509
3510
if (DO_BIC(BIC_CPU_c3))
3511
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c3, tsc));
3512
if (DO_BIC(BIC_CPU_c6))
3513
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c6, tsc));
3514
if (DO_BIC(BIC_CPU_c7))
3515
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c7, tsc));
3516
3517
/* Mod%c6 */
3518
if (DO_BIC(BIC_Mod_c6))
3519
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->mc6_us, tsc));
3520
3521
if (DO_BIC(BIC_CoreTmp))
3522
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
3523
3524
/* Core throttle count */
3525
if (DO_BIC(BIC_CORE_THROT_CNT))
3526
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
3527
3528
/* Added Core Counters */
3529
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3530
if (mp->format == FORMAT_RAW)
3531
outp += print_hex_value(mp->width, &printed, delim, c->counter[i]);
3532
else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
3533
outp += print_decimal_value(mp->width, &printed, delim, c->counter[i]);
3534
else if (mp->format == FORMAT_PERCENT)
3535
outp += print_float_value(&printed, delim, pct(c->counter[i], tsc));
3536
}
3537
3538
/* Added perf Core counters */
3539
for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
3540
if (pp->format == FORMAT_RAW)
3541
outp += print_hex_value(pp->width, &printed, delim, c->perf_counter[i]);
3542
else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
3543
outp += print_decimal_value(pp->width, &printed, delim, c->perf_counter[i]);
3544
else if (pp->format == FORMAT_PERCENT)
3545
outp += print_float_value(&printed, delim, pct(c->perf_counter[i], tsc));
3546
}
3547
3548
/* Added PMT Core counters */
3549
for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
3550
const unsigned long value_raw = c->pmt_counter[i];
3551
double value_converted;
3552
switch (ppmt->type) {
3553
case PMT_TYPE_RAW:
3554
outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, c->pmt_counter[i]);
3555
break;
3556
3557
case PMT_TYPE_XTAL_TIME:
3558
value_converted = pct(value_raw / crystal_hz, interval_float);
3559
outp += print_float_value(&printed, delim, value_converted);
3560
break;
3561
3562
case PMT_TYPE_TCORE_CLOCK:
3563
value_converted = pct(value_raw / tcore_clock_freq_hz, interval_float);
3564
outp += print_float_value(&printed, delim, value_converted);
3565
}
3566
}
3567
3568
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
3569
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
3570
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
3571
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
3572
3573
/* print per-package data only for 1st core in package */
3574
if (!is_cpu_first_core_in_package(t, p))
3575
goto done;
3576
3577
/* PkgTmp */
3578
if (DO_BIC(BIC_PkgTmp))
3579
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
3580
3581
/* GFXrc6 */
3582
if (DO_BIC(BIC_GFX_rc6)) {
3583
if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */
3584
outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
3585
} else {
3586
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->gfx_rc6_ms / 10.0 / interval_float);
3587
}
3588
}
3589
3590
/* GFXMHz */
3591
if (DO_BIC(BIC_GFXMHz))
3592
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
3593
3594
/* GFXACTMHz */
3595
if (DO_BIC(BIC_GFXACTMHz))
3596
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
3597
3598
/* SAMmc6 */
3599
if (DO_BIC(BIC_SAM_mc6)) {
3600
if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
3601
outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
3602
} else {
3603
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->sam_mc6_ms / 10.0 / interval_float);
3604
}
3605
}
3606
3607
/* SAMMHz */
3608
if (DO_BIC(BIC_SAMMHz))
3609
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
3610
3611
/* SAMACTMHz */
3612
if (DO_BIC(BIC_SAMACTMHz))
3613
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
3614
3615
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
3616
if (DO_BIC(BIC_Totl_c0))
3617
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100 * p->pkg_wtd_core_c0 / tsc); /* can exceed 100% */
3618
if (DO_BIC(BIC_Any_c0))
3619
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_core_c0, tsc));
3620
if (DO_BIC(BIC_GFX_c0))
3621
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_gfxe_c0, tsc));
3622
if (DO_BIC(BIC_CPUGFX))
3623
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_both_core_gfxe_c0, tsc));
3624
3625
if (DO_BIC(BIC_Pkgpc2))
3626
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc2, tsc));
3627
if (DO_BIC(BIC_Pkgpc3))
3628
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc3, tsc));
3629
if (DO_BIC(BIC_Pkgpc6))
3630
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc6, tsc));
3631
if (DO_BIC(BIC_Pkgpc7))
3632
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc7, tsc));
3633
if (DO_BIC(BIC_Pkgpc8))
3634
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc8, tsc));
3635
if (DO_BIC(BIC_Pkgpc9))
3636
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc9, tsc));
3637
if (DO_BIC(BIC_Pkgpc10))
3638
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc10, tsc));
3639
3640
if (DO_BIC(BIC_Diec6))
3641
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->die_c6 / crystal_hz, interval_float));
3642
3643
if (DO_BIC(BIC_CPU_LPI)) {
3644
if (p->cpu_lpi >= 0)
3645
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->cpu_lpi / 1000000.0, interval_float));
3646
else
3647
outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
3648
}
3649
if (DO_BIC(BIC_SYS_LPI)) {
3650
if (p->sys_lpi >= 0)
3651
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->sys_lpi / 1000000.0, interval_float));
3652
else
3653
outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
3654
}
3655
3656
if (DO_BIC(BIC_PkgWatt))
3657
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
3658
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
3659
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
3660
if (DO_BIC(BIC_GFXWatt))
3661
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
3662
if (DO_BIC(BIC_RAMWatt))
3663
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
3664
if (DO_BIC(BIC_Pkg_J))
3665
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
3666
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
3667
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
3668
if (DO_BIC(BIC_GFX_J))
3669
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
3670
if (DO_BIC(BIC_RAM_J))
3671
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
3672
if (DO_BIC(BIC_PKG__))
3673
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
3674
if (DO_BIC(BIC_RAM__))
3675
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
3676
/* UncMHz */
3677
if (DO_BIC(BIC_UNCORE_MHZ))
3678
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
3679
3680
/* Added Package Counters */
3681
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3682
if (mp->format == FORMAT_RAW)
3683
outp += print_hex_value(mp->width, &printed, delim, p->counter[i]);
3684
else if (mp->type == COUNTER_K2M)
3685
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000);
3686
else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
3687
outp += print_decimal_value(mp->width, &printed, delim, p->counter[i]);
3688
else if (mp->format == FORMAT_PERCENT)
3689
outp += print_float_value(&printed, delim, pct(p->counter[i], tsc));
3690
}
3691
3692
/* Added perf Package Counters */
3693
for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
3694
if (pp->format == FORMAT_RAW)
3695
outp += print_hex_value(pp->width, &printed, delim, p->perf_counter[i]);
3696
else if (pp->type == COUNTER_K2M)
3697
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000);
3698
else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
3699
outp += print_decimal_value(pp->width, &printed, delim, p->perf_counter[i]);
3700
else if (pp->format == FORMAT_PERCENT)
3701
outp += print_float_value(&printed, delim, pct(p->perf_counter[i], tsc));
3702
}
3703
3704
/* Added PMT Package Counters */
3705
for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
3706
const unsigned long value_raw = p->pmt_counter[i];
3707
double value_converted;
3708
switch (ppmt->type) {
3709
case PMT_TYPE_RAW:
3710
outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, p->pmt_counter[i]);
3711
break;
3712
3713
case PMT_TYPE_XTAL_TIME:
3714
value_converted = pct(value_raw / crystal_hz, interval_float);
3715
outp += print_float_value(&printed, delim, value_converted);
3716
break;
3717
3718
case PMT_TYPE_TCORE_CLOCK:
3719
value_converted = pct(value_raw / tcore_clock_freq_hz, interval_float);
3720
outp += print_float_value(&printed, delim, value_converted);
3721
}
3722
}
3723
3724
if (DO_BIC(BIC_SysWatt) && (t == average.threads))
3725
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float));
3726
if (DO_BIC(BIC_Sys_J) && (t == average.threads))
3727
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float));
3728
3729
done:
3730
if (*(outp - 1) != '\n')
3731
outp += sprintf(outp, "\n");
3732
3733
return 0;
3734
}
3735
3736
void flush_output_stdout(void)
3737
{
3738
FILE *filep;
3739
3740
if (outf == stderr)
3741
filep = stdout;
3742
else
3743
filep = outf;
3744
3745
fputs(output_buffer, filep);
3746
fflush(filep);
3747
3748
outp = output_buffer;
3749
}
3750
3751
void flush_output_stderr(void)
3752
{
3753
fputs(output_buffer, outf);
3754
fflush(outf);
3755
outp = output_buffer;
3756
}
3757
3758
void format_all_counters(PER_THREAD_PARAMS)
3759
{
3760
static int count;
3761
3762
if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
3763
print_header("\t");
3764
3765
format_counters(average.threads, average.cores, average.packages);
3766
3767
count++;
3768
3769
if (summary_only)
3770
return;
3771
3772
for_all_cpus(format_counters, t, c, p);
3773
}
3774
3775
#define DELTA_WRAP32(new, old) \
3776
old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
3777
3778
int delta_package(struct pkg_data *new, struct pkg_data *old)
3779
{
3780
int i;
3781
struct msr_counter *mp;
3782
struct perf_counter_info *pp;
3783
struct pmt_counter *ppmt;
3784
3785
if (DO_BIC(BIC_Totl_c0))
3786
old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
3787
if (DO_BIC(BIC_Any_c0))
3788
old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
3789
if (DO_BIC(BIC_GFX_c0))
3790
old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
3791
if (DO_BIC(BIC_CPUGFX))
3792
old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
3793
3794
old->pc2 = new->pc2 - old->pc2;
3795
if (DO_BIC(BIC_Pkgpc3))
3796
old->pc3 = new->pc3 - old->pc3;
3797
if (DO_BIC(BIC_Pkgpc6))
3798
old->pc6 = new->pc6 - old->pc6;
3799
if (DO_BIC(BIC_Pkgpc7))
3800
old->pc7 = new->pc7 - old->pc7;
3801
old->pc8 = new->pc8 - old->pc8;
3802
old->pc9 = new->pc9 - old->pc9;
3803
old->pc10 = new->pc10 - old->pc10;
3804
old->die_c6 = new->die_c6 - old->die_c6;
3805
old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
3806
old->sys_lpi = new->sys_lpi - old->sys_lpi;
3807
old->pkg_temp_c = new->pkg_temp_c;
3808
3809
/* flag an error when rc6 counter resets/wraps */
3810
if (old->gfx_rc6_ms > new->gfx_rc6_ms)
3811
old->gfx_rc6_ms = -1;
3812
else
3813
old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
3814
3815
old->uncore_mhz = new->uncore_mhz;
3816
old->gfx_mhz = new->gfx_mhz;
3817
old->gfx_act_mhz = new->gfx_act_mhz;
3818
3819
/* flag an error when mc6 counter resets/wraps */
3820
if (old->sam_mc6_ms > new->sam_mc6_ms)
3821
old->sam_mc6_ms = -1;
3822
else
3823
old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
3824
3825
old->sam_mhz = new->sam_mhz;
3826
old->sam_act_mhz = new->sam_act_mhz;
3827
3828
old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
3829
old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
3830
old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
3831
old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
3832
old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
3833
old->rapl_dram_perf_status.raw_value = new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
3834
3835
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3836
if (mp->format == FORMAT_RAW)
3837
old->counter[i] = new->counter[i];
3838
else if (mp->format == FORMAT_AVERAGE)
3839
old->counter[i] = new->counter[i];
3840
else
3841
old->counter[i] = new->counter[i] - old->counter[i];
3842
}
3843
3844
for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
3845
if (pp->format == FORMAT_RAW)
3846
old->perf_counter[i] = new->perf_counter[i];
3847
else if (pp->format == FORMAT_AVERAGE)
3848
old->perf_counter[i] = new->perf_counter[i];
3849
else
3850
old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i];
3851
}
3852
3853
for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
3854
if (ppmt->format == FORMAT_RAW)
3855
old->pmt_counter[i] = new->pmt_counter[i];
3856
else
3857
old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i];
3858
}
3859
3860
return 0;
3861
}
3862
3863
void delta_core(struct core_data *new, struct core_data *old)
3864
{
3865
int i;
3866
struct msr_counter *mp;
3867
struct perf_counter_info *pp;
3868
struct pmt_counter *ppmt;
3869
3870
old->c3 = new->c3 - old->c3;
3871
old->c6 = new->c6 - old->c6;
3872
old->c7 = new->c7 - old->c7;
3873
old->core_temp_c = new->core_temp_c;
3874
old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt;
3875
old->mc6_us = new->mc6_us - old->mc6_us;
3876
3877
DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
3878
3879
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3880
if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE)
3881
old->counter[i] = new->counter[i];
3882
else
3883
old->counter[i] = new->counter[i] - old->counter[i];
3884
}
3885
3886
for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
3887
if (pp->format == FORMAT_RAW)
3888
old->perf_counter[i] = new->perf_counter[i];
3889
else
3890
old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i];
3891
}
3892
3893
for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
3894
if (ppmt->format == FORMAT_RAW)
3895
old->pmt_counter[i] = new->pmt_counter[i];
3896
else
3897
old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i];
3898
}
3899
}
3900
3901
int soft_c1_residency_display(int bic)
3902
{
3903
if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
3904
return 0;
3905
3906
return DO_BIC_READ(bic);
3907
}
3908
3909
/*
3910
* old = new - old
3911
*/
3912
int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
3913
{
3914
int i;
3915
struct msr_counter *mp;
3916
struct perf_counter_info *pp;
3917
struct pmt_counter *ppmt;
3918
3919
/* we run cpuid just the 1st time, copy the results */
3920
if (DO_BIC(BIC_APIC))
3921
new->apic_id = old->apic_id;
3922
if (DO_BIC(BIC_X2APIC))
3923
new->x2apic_id = old->x2apic_id;
3924
3925
/*
3926
* the timestamps from start of measurement interval are in "old"
3927
* the timestamp from end of measurement interval are in "new"
3928
* over-write old w/ new so we can print end of interval values
3929
*/
3930
3931
timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
3932
old->tv_begin = new->tv_begin;
3933
old->tv_end = new->tv_end;
3934
3935
old->tsc = new->tsc - old->tsc;
3936
3937
/* check for TSC < 1 Mcycles over interval */
3938
if (old->tsc < (1000 * 1000))
3939
errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
3940
"You can disable all c-states by booting with \"idle=poll\"\nor just the deep ones with \"processor.max_cstate=1\"");
3941
3942
old->c1 = new->c1 - old->c1;
3943
3944
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
3945
|| soft_c1_residency_display(BIC_Avg_MHz)) {
3946
if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
3947
old->aperf = new->aperf - old->aperf;
3948
old->mperf = new->mperf - old->mperf;
3949
} else {
3950
return -1;
3951
}
3952
}
3953
3954
if (platform->has_msr_core_c1_res) {
3955
/*
3956
* Some models have a dedicated C1 residency MSR,
3957
* which should be more accurate than the derivation below.
3958
*/
3959
} else {
3960
/*
3961
* As counter collection is not atomic,
3962
* it is possible for mperf's non-halted cycles + idle states
3963
* to exceed TSC's all cycles: show c1 = 0% in that case.
3964
*/
3965
if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
3966
old->c1 = 0;
3967
else {
3968
/* normal case, derive c1 */
3969
old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - core_delta->c6 - core_delta->c7;
3970
}
3971
}
3972
3973
if (old->mperf == 0) {
3974
if (debug > 1)
3975
fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
3976
old->mperf = 1; /* divide by 0 protection */
3977
}
3978
3979
if (DO_BIC(BIC_IPC))
3980
old->instr_count = new->instr_count - old->instr_count;
3981
3982
if (DO_BIC(BIC_IRQ))
3983
old->irq_count = new->irq_count - old->irq_count;
3984
3985
if (DO_BIC(BIC_NMI))
3986
old->nmi_count = new->nmi_count - old->nmi_count;
3987
3988
if (DO_BIC(BIC_SMI))
3989
old->smi_count = new->smi_count - old->smi_count;
3990
3991
if (DO_BIC(BIC_LLC_MRPS) || DO_BIC(BIC_LLC_HIT))
3992
old->llc.references = new->llc.references - old->llc.references;
3993
3994
if (DO_BIC(BIC_LLC_HIT))
3995
old->llc.misses = new->llc.misses - old->llc.misses;
3996
3997
if (DO_BIC(BIC_L2_MRPS) || DO_BIC(BIC_L2_HIT))
3998
old->l2.references = new->l2.references - old->l2.references;
3999
4000
if (DO_BIC(BIC_L2_HIT))
4001
old->l2.hits = new->l2.hits - old->l2.hits;
4002
4003
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
4004
if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE)
4005
old->counter[i] = new->counter[i];
4006
else
4007
old->counter[i] = new->counter[i] - old->counter[i];
4008
}
4009
4010
for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) {
4011
if (pp->format == FORMAT_RAW)
4012
old->perf_counter[i] = new->perf_counter[i];
4013
else
4014
old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i];
4015
}
4016
4017
for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
4018
if (ppmt->format == FORMAT_RAW)
4019
old->pmt_counter[i] = new->pmt_counter[i];
4020
else
4021
old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i];
4022
}
4023
4024
return 0;
4025
}
4026
4027
int delta_cpu(struct thread_data *t, struct core_data *c, struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
4028
{
4029
int retval = 0;
4030
4031
/* calculate core delta only for 1st thread in core */
4032
if (is_cpu_first_thread_in_core(t, c))
4033
delta_core(c, c2);
4034
4035
/* always calculate thread delta */
4036
retval = delta_thread(t, t2, c2); /* c2 is core delta */
4037
4038
/* calculate package delta only for 1st core in package */
4039
if (is_cpu_first_core_in_package(t, p))
4040
retval |= delta_package(p, p2);
4041
4042
return retval;
4043
}
4044
4045
void delta_platform(struct platform_counters *new, struct platform_counters *old)
4046
{
4047
old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value;
4048
}
4049
4050
void rapl_counter_clear(struct rapl_counter *c)
4051
{
4052
c->raw_value = 0;
4053
c->scale = 0.0;
4054
c->unit = RAPL_UNIT_INVALID;
4055
}
4056
4057
void clear_counters(PER_THREAD_PARAMS)
4058
{
4059
int i;
4060
struct msr_counter *mp;
4061
4062
t->tv_begin.tv_sec = 0;
4063
t->tv_begin.tv_usec = 0;
4064
t->tv_end.tv_sec = 0;
4065
t->tv_end.tv_usec = 0;
4066
t->tv_delta.tv_sec = 0;
4067
t->tv_delta.tv_usec = 0;
4068
4069
t->tsc = 0;
4070
t->aperf = 0;
4071
t->mperf = 0;
4072
t->c1 = 0;
4073
4074
t->instr_count = 0;
4075
4076
t->irq_count = 0;
4077
t->nmi_count = 0;
4078
t->smi_count = 0;
4079
4080
t->llc.references = 0;
4081
t->llc.misses = 0;
4082
4083
t->l2.references = 0;
4084
t->l2.hits = 0;
4085
4086
c->c3 = 0;
4087
c->c6 = 0;
4088
c->c7 = 0;
4089
c->mc6_us = 0;
4090
c->core_temp_c = 0;
4091
rapl_counter_clear(&c->core_energy);
4092
c->core_throt_cnt = 0;
4093
4094
p->pkg_wtd_core_c0 = 0;
4095
p->pkg_any_core_c0 = 0;
4096
p->pkg_any_gfxe_c0 = 0;
4097
p->pkg_both_core_gfxe_c0 = 0;
4098
4099
p->pc2 = 0;
4100
if (DO_BIC(BIC_Pkgpc3))
4101
p->pc3 = 0;
4102
if (DO_BIC(BIC_Pkgpc6))
4103
p->pc6 = 0;
4104
if (DO_BIC(BIC_Pkgpc7))
4105
p->pc7 = 0;
4106
p->pc8 = 0;
4107
p->pc9 = 0;
4108
p->pc10 = 0;
4109
p->die_c6 = 0;
4110
p->cpu_lpi = 0;
4111
p->sys_lpi = 0;
4112
4113
rapl_counter_clear(&p->energy_pkg);
4114
rapl_counter_clear(&p->energy_dram);
4115
rapl_counter_clear(&p->energy_cores);
4116
rapl_counter_clear(&p->energy_gfx);
4117
rapl_counter_clear(&p->rapl_pkg_perf_status);
4118
rapl_counter_clear(&p->rapl_dram_perf_status);
4119
p->pkg_temp_c = 0;
4120
4121
p->gfx_rc6_ms = 0;
4122
p->uncore_mhz = 0;
4123
p->gfx_mhz = 0;
4124
p->gfx_act_mhz = 0;
4125
p->sam_mc6_ms = 0;
4126
p->sam_mhz = 0;
4127
p->sam_act_mhz = 0;
4128
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
4129
t->counter[i] = 0;
4130
4131
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
4132
c->counter[i] = 0;
4133
4134
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
4135
p->counter[i] = 0;
4136
4137
memset(&t->perf_counter[0], 0, sizeof(t->perf_counter));
4138
memset(&c->perf_counter[0], 0, sizeof(c->perf_counter));
4139
memset(&p->perf_counter[0], 0, sizeof(p->perf_counter));
4140
4141
memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter));
4142
memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter));
4143
memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter));
4144
}
4145
4146
void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
4147
{
4148
/* Copy unit and scale from src if dst is not initialized */
4149
if (dst->unit == RAPL_UNIT_INVALID) {
4150
dst->unit = src->unit;
4151
dst->scale = src->scale;
4152
}
4153
4154
assert(dst->unit == src->unit);
4155
assert(dst->scale == src->scale);
4156
4157
dst->raw_value += src->raw_value;
4158
}
4159
4160
int sum_counters(PER_THREAD_PARAMS)
4161
{
4162
int i;
4163
struct msr_counter *mp;
4164
struct perf_counter_info *pp;
4165
struct pmt_counter *ppmt;
4166
4167
/* copy un-changing apic_id's */
4168
if (DO_BIC(BIC_APIC))
4169
average.threads->apic_id = t->apic_id;
4170
if (DO_BIC(BIC_X2APIC))
4171
average.threads->x2apic_id = t->x2apic_id;
4172
4173
/* remember first tv_begin */
4174
if (average.threads->tv_begin.tv_sec == 0)
4175
average.threads->tv_begin = procsysfs_tv_begin;
4176
4177
/* remember last tv_end */
4178
average.threads->tv_end = t->tv_end;
4179
4180
average.threads->tsc += t->tsc;
4181
average.threads->aperf += t->aperf;
4182
average.threads->mperf += t->mperf;
4183
average.threads->c1 += t->c1;
4184
4185
average.threads->instr_count += t->instr_count;
4186
4187
average.threads->irq_count += t->irq_count;
4188
average.threads->nmi_count += t->nmi_count;
4189
average.threads->smi_count += t->smi_count;
4190
4191
average.threads->llc.references += t->llc.references;
4192
average.threads->llc.misses += t->llc.misses;
4193
4194
average.threads->l2.references += t->l2.references;
4195
average.threads->l2.hits += t->l2.hits;
4196
4197
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
4198
if (mp->format == FORMAT_RAW)
4199
continue;
4200
average.threads->counter[i] += t->counter[i];
4201
}
4202
4203
for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) {
4204
if (pp->format == FORMAT_RAW)
4205
continue;
4206
average.threads->perf_counter[i] += t->perf_counter[i];
4207
}
4208
4209
for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
4210
average.threads->pmt_counter[i] += t->pmt_counter[i];
4211
}
4212
4213
/* sum per-core values only for 1st thread in core */
4214
if (!is_cpu_first_thread_in_core(t, c))
4215
return 0;
4216
4217
average.cores->c3 += c->c3;
4218
average.cores->c6 += c->c6;
4219
average.cores->c7 += c->c7;
4220
average.cores->mc6_us += c->mc6_us;
4221
4222
average.cores->core_temp_c = MAX(average.cores->core_temp_c, c->core_temp_c);
4223
average.cores->core_throt_cnt = MAX(average.cores->core_throt_cnt, c->core_throt_cnt);
4224
4225
rapl_counter_accumulate(&average.cores->core_energy, &c->core_energy);
4226
4227
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
4228
if (mp->format == FORMAT_RAW)
4229
continue;
4230
average.cores->counter[i] += c->counter[i];
4231
}
4232
4233
for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
4234
if (pp->format == FORMAT_RAW)
4235
continue;
4236
average.cores->perf_counter[i] += c->perf_counter[i];
4237
}
4238
4239
for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
4240
average.cores->pmt_counter[i] += c->pmt_counter[i];
4241
}
4242
4243
/* sum per-pkg values only for 1st core in pkg */
4244
if (!is_cpu_first_core_in_package(t, p))
4245
return 0;
4246
4247
if (DO_BIC(BIC_Totl_c0))
4248
average.packages->pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
4249
if (DO_BIC(BIC_Any_c0))
4250
average.packages->pkg_any_core_c0 += p->pkg_any_core_c0;
4251
if (DO_BIC(BIC_GFX_c0))
4252
average.packages->pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
4253
if (DO_BIC(BIC_CPUGFX))
4254
average.packages->pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
4255
4256
average.packages->pc2 += p->pc2;
4257
if (DO_BIC(BIC_Pkgpc3))
4258
average.packages->pc3 += p->pc3;
4259
if (DO_BIC(BIC_Pkgpc6))
4260
average.packages->pc6 += p->pc6;
4261
if (DO_BIC(BIC_Pkgpc7))
4262
average.packages->pc7 += p->pc7;
4263
average.packages->pc8 += p->pc8;
4264
average.packages->pc9 += p->pc9;
4265
average.packages->pc10 += p->pc10;
4266
average.packages->die_c6 += p->die_c6;
4267
4268
average.packages->cpu_lpi = p->cpu_lpi;
4269
average.packages->sys_lpi = p->sys_lpi;
4270
4271
rapl_counter_accumulate(&average.packages->energy_pkg, &p->energy_pkg);
4272
rapl_counter_accumulate(&average.packages->energy_dram, &p->energy_dram);
4273
rapl_counter_accumulate(&average.packages->energy_cores, &p->energy_cores);
4274
rapl_counter_accumulate(&average.packages->energy_gfx, &p->energy_gfx);
4275
4276
average.packages->gfx_rc6_ms = p->gfx_rc6_ms;
4277
average.packages->uncore_mhz = p->uncore_mhz;
4278
average.packages->gfx_mhz = p->gfx_mhz;
4279
average.packages->gfx_act_mhz = p->gfx_act_mhz;
4280
average.packages->sam_mc6_ms = p->sam_mc6_ms;
4281
average.packages->sam_mhz = p->sam_mhz;
4282
average.packages->sam_act_mhz = p->sam_act_mhz;
4283
4284
average.packages->pkg_temp_c = MAX(average.packages->pkg_temp_c, p->pkg_temp_c);
4285
4286
rapl_counter_accumulate(&average.packages->rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
4287
rapl_counter_accumulate(&average.packages->rapl_dram_perf_status, &p->rapl_dram_perf_status);
4288
4289
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
4290
if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
4291
average.packages->counter[i] = p->counter[i];
4292
else
4293
average.packages->counter[i] += p->counter[i];
4294
}
4295
4296
for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
4297
if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0))
4298
average.packages->perf_counter[i] = p->perf_counter[i];
4299
else
4300
average.packages->perf_counter[i] += p->perf_counter[i];
4301
}
4302
4303
for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
4304
average.packages->pmt_counter[i] += p->pmt_counter[i];
4305
}
4306
4307
return 0;
4308
}
4309
4310
/*
4311
* sum the counters for all cpus in the system
4312
* compute the weighted average
4313
*/
4314
void compute_average(PER_THREAD_PARAMS)
4315
{
4316
int i;
4317
struct msr_counter *mp;
4318
struct perf_counter_info *pp;
4319
struct pmt_counter *ppmt;
4320
4321
clear_counters(average.threads, average.cores, average.packages);
4322
4323
for_all_cpus(sum_counters, t, c, p);
4324
4325
/* Use the global time delta for the average. */
4326
average.threads->tv_delta = tv_delta;
4327
4328
average.threads->tsc /= topo.allowed_cpus;
4329
average.threads->aperf /= topo.allowed_cpus;
4330
average.threads->mperf /= topo.allowed_cpus;
4331
average.threads->instr_count /= topo.allowed_cpus;
4332
average.threads->c1 /= topo.allowed_cpus;
4333
4334
if (average.threads->irq_count > 9999999)
4335
sums_need_wide_columns = 1;
4336
if (average.threads->nmi_count > 9999999)
4337
sums_need_wide_columns = 1;
4338
4339
average.cores->c3 /= topo.allowed_cores;
4340
average.cores->c6 /= topo.allowed_cores;
4341
average.cores->c7 /= topo.allowed_cores;
4342
average.cores->mc6_us /= topo.allowed_cores;
4343
4344
if (DO_BIC(BIC_Totl_c0))
4345
average.packages->pkg_wtd_core_c0 /= topo.allowed_packages;
4346
if (DO_BIC(BIC_Any_c0))
4347
average.packages->pkg_any_core_c0 /= topo.allowed_packages;
4348
if (DO_BIC(BIC_GFX_c0))
4349
average.packages->pkg_any_gfxe_c0 /= topo.allowed_packages;
4350
if (DO_BIC(BIC_CPUGFX))
4351
average.packages->pkg_both_core_gfxe_c0 /= topo.allowed_packages;
4352
4353
average.packages->pc2 /= topo.allowed_packages;
4354
if (DO_BIC(BIC_Pkgpc3))
4355
average.packages->pc3 /= topo.allowed_packages;
4356
if (DO_BIC(BIC_Pkgpc6))
4357
average.packages->pc6 /= topo.allowed_packages;
4358
if (DO_BIC(BIC_Pkgpc7))
4359
average.packages->pc7 /= topo.allowed_packages;
4360
4361
average.packages->pc8 /= topo.allowed_packages;
4362
average.packages->pc9 /= topo.allowed_packages;
4363
average.packages->pc10 /= topo.allowed_packages;
4364
average.packages->die_c6 /= topo.allowed_packages;
4365
4366
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
4367
if (mp->format == FORMAT_RAW)
4368
continue;
4369
if (mp->type == COUNTER_ITEMS) {
4370
if (average.threads->counter[i] > 9999999)
4371
sums_need_wide_columns = 1;
4372
continue;
4373
}
4374
average.threads->counter[i] /= topo.allowed_cpus;
4375
}
4376
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
4377
if (mp->format == FORMAT_RAW)
4378
continue;
4379
if (mp->type == COUNTER_ITEMS) {
4380
if (average.cores->counter[i] > 9999999)
4381
sums_need_wide_columns = 1;
4382
}
4383
average.cores->counter[i] /= topo.allowed_cores;
4384
}
4385
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
4386
if (mp->format == FORMAT_RAW)
4387
continue;
4388
if (mp->type == COUNTER_ITEMS) {
4389
if (average.packages->counter[i] > 9999999)
4390
sums_need_wide_columns = 1;
4391
}
4392
average.packages->counter[i] /= topo.allowed_packages;
4393
}
4394
4395
for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) {
4396
if (pp->format == FORMAT_RAW)
4397
continue;
4398
if (pp->type == COUNTER_ITEMS) {
4399
if (average.threads->perf_counter[i] > 9999999)
4400
sums_need_wide_columns = 1;
4401
continue;
4402
}
4403
average.threads->perf_counter[i] /= topo.allowed_cpus;
4404
}
4405
for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
4406
if (pp->format == FORMAT_RAW)
4407
continue;
4408
if (pp->type == COUNTER_ITEMS) {
4409
if (average.cores->perf_counter[i] > 9999999)
4410
sums_need_wide_columns = 1;
4411
}
4412
average.cores->perf_counter[i] /= topo.allowed_cores;
4413
}
4414
for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
4415
if (pp->format == FORMAT_RAW)
4416
continue;
4417
if (pp->type == COUNTER_ITEMS) {
4418
if (average.packages->perf_counter[i] > 9999999)
4419
sums_need_wide_columns = 1;
4420
}
4421
average.packages->perf_counter[i] /= topo.allowed_packages;
4422
}
4423
4424
for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
4425
average.threads->pmt_counter[i] /= topo.allowed_cpus;
4426
}
4427
for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
4428
average.cores->pmt_counter[i] /= topo.allowed_cores;
4429
}
4430
for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
4431
average.packages->pmt_counter[i] /= topo.allowed_packages;
4432
}
4433
}
4434
4435
static unsigned long long rdtsc(void)
4436
{
4437
unsigned int low, high;
4438
4439
asm volatile ("rdtsc":"=a" (low), "=d"(high));
4440
4441
return low | ((unsigned long long)high) << 32;
4442
}
4443
4444
/*
4445
* Open a file, and exit on failure
4446
*/
4447
FILE *fopen_or_die(const char *path, const char *mode)
4448
{
4449
FILE *filep = fopen(path, mode);
4450
4451
if (!filep)
4452
err(1, "%s: open failed", path);
4453
return filep;
4454
}
4455
4456
/*
4457
* snapshot_sysfs_counter()
4458
*
4459
* return snapshot of given counter
4460
*/
4461
unsigned long long snapshot_sysfs_counter(char *path)
4462
{
4463
FILE *fp;
4464
int retval;
4465
unsigned long long counter;
4466
4467
fp = fopen_or_die(path, "r");
4468
4469
retval = fscanf(fp, "%lld", &counter);
4470
if (retval != 1)
4471
err(1, "snapshot_sysfs_counter(%s)", path);
4472
4473
fclose(fp);
4474
4475
return counter;
4476
}
4477
4478
int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp, char *counter_path)
4479
{
4480
if (mp->msr_num != 0) {
4481
assert(!no_msr);
4482
if (get_msr(cpu, mp->msr_num, counterp))
4483
return -1;
4484
} else {
4485
char path[128 + PATH_BYTES];
4486
4487
if (mp->flags & SYSFS_PERCPU) {
4488
sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->sp->path);
4489
4490
*counterp = snapshot_sysfs_counter(path);
4491
} else {
4492
*counterp = snapshot_sysfs_counter(counter_path);
4493
}
4494
}
4495
4496
return 0;
4497
}
4498
4499
unsigned long long get_legacy_uncore_mhz(int package)
4500
{
4501
char path[128];
4502
int die;
4503
static int warn_once;
4504
4505
/*
4506
* for this package, use the first die_id that exists
4507
*/
4508
for (die = 0; die <= topo.max_die_id; ++die) {
4509
4510
sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package, die);
4511
4512
if (access(path, R_OK) == 0)
4513
return (snapshot_sysfs_counter(path) / 1000);
4514
}
4515
if (!warn_once) {
4516
warnx("BUG: %s: No %s", __func__, path);
4517
warn_once = 1;
4518
}
4519
4520
return 0;
4521
}
4522
4523
int get_epb(int cpu)
4524
{
4525
char path[128 + PATH_BYTES];
4526
unsigned long long msr;
4527
int ret, epb = -1;
4528
FILE *fp;
4529
4530
sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
4531
4532
fp = fopen(path, "r");
4533
if (!fp)
4534
goto msr_fallback;
4535
4536
ret = fscanf(fp, "%d", &epb);
4537
if (ret != 1)
4538
err(1, "%s(%s)", __func__, path);
4539
4540
fclose(fp);
4541
4542
return epb;
4543
4544
msr_fallback:
4545
if (no_msr)
4546
return -1;
4547
4548
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
4549
4550
return msr & 0xf;
4551
}
4552
4553
void get_apic_id(struct thread_data *t)
4554
{
4555
unsigned int eax, ebx, ecx, edx;
4556
4557
if (DO_BIC(BIC_APIC)) {
4558
eax = ebx = ecx = edx = 0;
4559
__cpuid(1, eax, ebx, ecx, edx);
4560
4561
t->apic_id = (ebx >> 24) & 0xff;
4562
}
4563
4564
if (!DO_BIC(BIC_X2APIC))
4565
return;
4566
4567
if (authentic_amd || hygon_genuine) {
4568
unsigned int topology_extensions;
4569
4570
if (max_extended_level < 0x8000001e)
4571
return;
4572
4573
eax = ebx = ecx = edx = 0;
4574
__cpuid(0x80000001, eax, ebx, ecx, edx);
4575
topology_extensions = ecx & (1 << 22);
4576
4577
if (topology_extensions == 0)
4578
return;
4579
4580
eax = ebx = ecx = edx = 0;
4581
__cpuid(0x8000001e, eax, ebx, ecx, edx);
4582
4583
t->x2apic_id = eax;
4584
return;
4585
}
4586
4587
if (!genuine_intel)
4588
return;
4589
4590
if (max_level < 0xb)
4591
return;
4592
4593
ecx = 0;
4594
__cpuid(0xb, eax, ebx, ecx, edx);
4595
t->x2apic_id = edx;
4596
4597
if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
4598
fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
4599
}
4600
4601
int get_core_throt_cnt(int cpu, unsigned long long *cnt)
4602
{
4603
char path[128 + PATH_BYTES];
4604
unsigned long long tmp;
4605
FILE *fp;
4606
int ret;
4607
4608
sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
4609
fp = fopen(path, "r");
4610
if (!fp)
4611
return -1;
4612
ret = fscanf(fp, "%lld", &tmp);
4613
fclose(fp);
4614
if (ret != 1)
4615
return -1;
4616
*cnt = tmp;
4617
4618
return 0;
4619
}
4620
4621
static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
4622
{
4623
int fdmt;
4624
int bytes_read;
4625
char buf[64];
4626
int ret = -1;
4627
4628
fdmt = open(path, O_RDONLY, 0);
4629
if (fdmt == -1) {
4630
if (debug)
4631
fprintf(stderr, "Failed to parse perf counter info %s\n", path);
4632
ret = -1;
4633
goto cleanup_and_exit;
4634
}
4635
4636
bytes_read = read(fdmt, buf, sizeof(buf) - 1);
4637
if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
4638
if (debug)
4639
fprintf(stderr, "Failed to parse perf counter info %s\n", path);
4640
ret = -1;
4641
goto cleanup_and_exit;
4642
}
4643
4644
buf[bytes_read] = '\0';
4645
4646
if (sscanf(buf, parse_format, value_ptr) != 1) {
4647
if (debug)
4648
fprintf(stderr, "Failed to parse perf counter info %s\n", path);
4649
ret = -1;
4650
goto cleanup_and_exit;
4651
}
4652
4653
ret = 0;
4654
4655
cleanup_and_exit:
4656
close(fdmt);
4657
return ret;
4658
}
4659
4660
static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
4661
{
4662
unsigned int v;
4663
int status;
4664
4665
status = read_perf_counter_info(path, parse_format, &v);
4666
if (status)
4667
v = -1;
4668
4669
return v;
4670
}
4671
4672
static unsigned int read_perf_type(const char *subsys)
4673
{
4674
const char *const path_format = "/sys/bus/event_source/devices/%s/type";
4675
const char *const format = "%u";
4676
char path[128];
4677
4678
snprintf(path, sizeof(path), path_format, subsys);
4679
4680
return read_perf_counter_info_n(path, format);
4681
}
4682
4683
static unsigned int read_perf_config(const char *subsys, const char *event_name)
4684
{
4685
const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
4686
FILE *fconfig = NULL;
4687
char path[128];
4688
char config_str[64];
4689
unsigned int config;
4690
unsigned int umask;
4691
bool has_config = false;
4692
bool has_umask = false;
4693
unsigned int ret = -1;
4694
4695
snprintf(path, sizeof(path), path_format, subsys, event_name);
4696
4697
fconfig = fopen(path, "r");
4698
if (!fconfig)
4699
return -1;
4700
4701
if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str)
4702
goto cleanup_and_exit;
4703
4704
for (char *pconfig_str = &config_str[0]; pconfig_str;) {
4705
if (sscanf(pconfig_str, "event=%x", &config) == 1) {
4706
has_config = true;
4707
goto next;
4708
}
4709
4710
if (sscanf(pconfig_str, "umask=%x", &umask) == 1) {
4711
has_umask = true;
4712
goto next;
4713
}
4714
4715
next:
4716
pconfig_str = strchr(pconfig_str, ',');
4717
if (pconfig_str) {
4718
*pconfig_str = '\0';
4719
++pconfig_str;
4720
}
4721
}
4722
4723
if (!has_umask)
4724
umask = 0;
4725
4726
if (has_config)
4727
ret = (umask << 8) | config;
4728
4729
cleanup_and_exit:
4730
fclose(fconfig);
4731
return ret;
4732
}
4733
4734
static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
4735
{
4736
const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
4737
const char *const format = "%s";
4738
char path[128];
4739
char unit_buffer[16];
4740
4741
snprintf(path, sizeof(path), path_format, subsys, event_name);
4742
4743
read_perf_counter_info(path, format, &unit_buffer);
4744
if (strcmp("Joules", unit_buffer) == 0)
4745
return RAPL_UNIT_JOULES;
4746
4747
return RAPL_UNIT_INVALID;
4748
}
4749
4750
static double read_perf_scale(const char *subsys, const char *event_name)
4751
{
4752
const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
4753
const char *const format = "%lf";
4754
char path[128];
4755
double scale;
4756
4757
snprintf(path, sizeof(path), path_format, subsys, event_name);
4758
4759
if (read_perf_counter_info(path, format, &scale))
4760
return 0.0;
4761
4762
return scale;
4763
}
4764
4765
size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
4766
{
4767
size_t ret = 0;
4768
4769
for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
4770
if (rci->source[i] == COUNTER_SOURCE_PERF)
4771
++ret;
4772
4773
return ret;
4774
}
4775
4776
static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t *cci)
4777
{
4778
size_t ret = 0;
4779
4780
for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i)
4781
if (cci->source[i] == COUNTER_SOURCE_PERF)
4782
++ret;
4783
4784
return ret;
4785
}
4786
4787
void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
4788
{
4789
if (rci->source[idx] == COUNTER_SOURCE_NONE)
4790
return;
4791
4792
rc->raw_value = rci->data[idx];
4793
rc->unit = rci->unit[idx];
4794
rc->scale = rci->scale[idx];
4795
}
4796
4797
int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p)
4798
{
4799
struct platform_counters *pplat_cnt = p == odd.packages ? &platform_counters_odd : &platform_counters_even;
4800
unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
4801
struct rapl_counter_info_t *rci;
4802
4803
if (debug >= 2)
4804
fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
4805
4806
assert(rapl_counter_info_perdomain);
4807
assert(domain < rapl_counter_info_perdomain_size);
4808
4809
rci = &rapl_counter_info_perdomain[domain];
4810
4811
/*
4812
* If we have any perf counters to read, read them all now, in bulk
4813
*/
4814
if (rci->fd_perf != -1) {
4815
size_t num_perf_counters = rapl_counter_info_count_perf(rci);
4816
const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
4817
const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
4818
4819
if (actual_read_size != expected_read_size)
4820
err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size);
4821
}
4822
4823
for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
4824
switch (rci->source[i]) {
4825
case COUNTER_SOURCE_NONE:
4826
rci->data[i] = 0;
4827
break;
4828
4829
case COUNTER_SOURCE_PERF:
4830
assert(pi < ARRAY_SIZE(perf_data));
4831
assert(rci->fd_perf != -1);
4832
4833
if (debug >= 2)
4834
fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
4835
i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
4836
4837
rci->data[i] = perf_data[pi];
4838
4839
++pi;
4840
break;
4841
4842
case COUNTER_SOURCE_MSR:
4843
if (debug >= 2)
4844
fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
4845
4846
assert(!no_msr);
4847
if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
4848
if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
4849
return -13 - i;
4850
} else {
4851
if (get_msr(cpu, rci->msr[i], &rci->data[i]))
4852
return -13 - i;
4853
}
4854
4855
rci->data[i] &= rci->msr_mask[i];
4856
if (rci->msr_shift[i] >= 0)
4857
rci->data[i] >>= abs(rci->msr_shift[i]);
4858
else
4859
rci->data[i] <<= abs(rci->msr_shift[i]);
4860
4861
break;
4862
}
4863
}
4864
4865
BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8);
4866
write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
4867
write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
4868
write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
4869
write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
4870
write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
4871
write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
4872
write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
4873
write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM);
4874
4875
return 0;
4876
}
4877
4878
char *find_sysfs_path_by_id(struct sysfs_path *sp, int id)
4879
{
4880
while (sp) {
4881
if (sp->id == id)
4882
return (sp->path);
4883
sp = sp->next;
4884
}
4885
if (debug)
4886
warnx("%s: id%d not found", __func__, id);
4887
return NULL;
4888
}
4889
4890
int get_cstate_counters(unsigned int cpu, PER_THREAD_PARAMS)
4891
{
4892
/*
4893
* Overcommit memory a little bit here,
4894
* but skip calculating exact sizes for the buffers.
4895
*/
4896
unsigned long long perf_data[NUM_CSTATE_COUNTERS];
4897
unsigned long long perf_data_core[NUM_CSTATE_COUNTERS + 1];
4898
unsigned long long perf_data_pkg[NUM_CSTATE_COUNTERS + 1];
4899
4900
struct cstate_counter_info_t *cci;
4901
4902
if (debug >= 2)
4903
fprintf(stderr, "%s: cpu%d\n", __func__, cpu);
4904
4905
assert(ccstate_counter_info);
4906
assert(cpu <= ccstate_counter_info_size);
4907
4908
ZERO_ARRAY(perf_data);
4909
ZERO_ARRAY(perf_data_core);
4910
ZERO_ARRAY(perf_data_pkg);
4911
4912
cci = &ccstate_counter_info[cpu];
4913
4914
/*
4915
* If we have any perf counters to read, read them all now, in bulk
4916
*/
4917
const size_t num_perf_counters = cstate_counter_info_count_perf(cci);
4918
ssize_t expected_read_size = num_perf_counters * sizeof(unsigned long long);
4919
ssize_t actual_read_size_core = 0, actual_read_size_pkg = 0;
4920
4921
if (cci->fd_perf_core != -1) {
4922
/* Each descriptor read begins with number of counters read. */
4923
expected_read_size += sizeof(unsigned long long);
4924
4925
actual_read_size_core = read(cci->fd_perf_core, &perf_data_core[0], sizeof(perf_data_core));
4926
4927
if (actual_read_size_core <= 0)
4928
err(-1, "%s: read perf %s: %ld", __func__, "core", actual_read_size_core);
4929
}
4930
4931
if (cci->fd_perf_pkg != -1) {
4932
/* Each descriptor read begins with number of counters read. */
4933
expected_read_size += sizeof(unsigned long long);
4934
4935
actual_read_size_pkg = read(cci->fd_perf_pkg, &perf_data_pkg[0], sizeof(perf_data_pkg));
4936
4937
if (actual_read_size_pkg <= 0)
4938
err(-1, "%s: read perf %s: %ld", __func__, "pkg", actual_read_size_pkg);
4939
}
4940
4941
const ssize_t actual_read_size_total = actual_read_size_core + actual_read_size_pkg;
4942
4943
if (actual_read_size_total != expected_read_size)
4944
err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size_total);
4945
4946
/*
4947
* Copy ccstate and pcstate data into unified buffer.
4948
*
4949
* Skip first element from core and pkg buffers.
4950
* Kernel puts there how many counters were read.
4951
*/
4952
const size_t num_core_counters = perf_data_core[0];
4953
const size_t num_pkg_counters = perf_data_pkg[0];
4954
4955
assert(num_perf_counters == num_core_counters + num_pkg_counters);
4956
4957
/* Copy ccstate perf data */
4958
memcpy(&perf_data[0], &perf_data_core[1], num_core_counters * sizeof(unsigned long long));
4959
4960
/* Copy pcstate perf data */
4961
memcpy(&perf_data[num_core_counters], &perf_data_pkg[1], num_pkg_counters * sizeof(unsigned long long));
4962
4963
for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) {
4964
switch (cci->source[i]) {
4965
case COUNTER_SOURCE_NONE:
4966
break;
4967
4968
case COUNTER_SOURCE_PERF:
4969
assert(pi < ARRAY_SIZE(perf_data));
4970
assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1);
4971
4972
if (debug >= 2)
4973
fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]);
4974
4975
cci->data[i] = perf_data[pi];
4976
4977
++pi;
4978
break;
4979
4980
case COUNTER_SOURCE_MSR:
4981
assert(!no_msr);
4982
if (get_msr(cpu, cci->msr[i], &cci->data[i]))
4983
return -13 - i;
4984
4985
if (debug >= 2)
4986
fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]);
4987
4988
break;
4989
}
4990
}
4991
4992
/*
4993
* Helper to write the data only if the source of
4994
* the counter for the current cpu is not none.
4995
*
4996
* Otherwise we would overwrite core data with 0 (default value),
4997
* when invoked for the thread sibling.
4998
*/
4999
#define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \
5000
if (cci->source[index] != COUNTER_SOURCE_NONE) \
5001
out_counter = cci->data[index]; \
5002
} while (0)
5003
5004
BUILD_BUG_ON(NUM_CSTATE_COUNTERS != 11);
5005
5006
PERF_COUNTER_WRITE_DATA(t->c1, CCSTATE_RCI_INDEX_C1_RESIDENCY);
5007
PERF_COUNTER_WRITE_DATA(c->c3, CCSTATE_RCI_INDEX_C3_RESIDENCY);
5008
PERF_COUNTER_WRITE_DATA(c->c6, CCSTATE_RCI_INDEX_C6_RESIDENCY);
5009
PERF_COUNTER_WRITE_DATA(c->c7, CCSTATE_RCI_INDEX_C7_RESIDENCY);
5010
5011
PERF_COUNTER_WRITE_DATA(p->pc2, PCSTATE_RCI_INDEX_C2_RESIDENCY);
5012
PERF_COUNTER_WRITE_DATA(p->pc3, PCSTATE_RCI_INDEX_C3_RESIDENCY);
5013
PERF_COUNTER_WRITE_DATA(p->pc6, PCSTATE_RCI_INDEX_C6_RESIDENCY);
5014
PERF_COUNTER_WRITE_DATA(p->pc7, PCSTATE_RCI_INDEX_C7_RESIDENCY);
5015
PERF_COUNTER_WRITE_DATA(p->pc8, PCSTATE_RCI_INDEX_C8_RESIDENCY);
5016
PERF_COUNTER_WRITE_DATA(p->pc9, PCSTATE_RCI_INDEX_C9_RESIDENCY);
5017
PERF_COUNTER_WRITE_DATA(p->pc10, PCSTATE_RCI_INDEX_C10_RESIDENCY);
5018
5019
#undef PERF_COUNTER_WRITE_DATA
5020
5021
return 0;
5022
}
5023
5024
size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci)
5025
{
5026
size_t ret = 0;
5027
5028
for (int i = 0; i < NUM_MSR_COUNTERS; ++i)
5029
if (mci->source[i] == COUNTER_SOURCE_PERF)
5030
++ret;
5031
5032
return ret;
5033
}
5034
5035
int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t)
5036
{
5037
unsigned long long perf_data[NUM_MSR_COUNTERS + 1];
5038
5039
struct msr_counter_info_t *mci;
5040
5041
if (debug >= 2)
5042
fprintf(stderr, "%s: cpu%d\n", __func__, cpu);
5043
5044
assert(msr_counter_info);
5045
assert(cpu <= msr_counter_info_size);
5046
5047
mci = &msr_counter_info[cpu];
5048
5049
ZERO_ARRAY(perf_data);
5050
ZERO_ARRAY(mci->data);
5051
5052
if (mci->fd_perf != -1) {
5053
const size_t num_perf_counters = msr_counter_info_count_perf(mci);
5054
const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
5055
const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data));
5056
5057
if (actual_read_size != expected_read_size)
5058
err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size);
5059
}
5060
5061
for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) {
5062
switch (mci->source[i]) {
5063
case COUNTER_SOURCE_NONE:
5064
break;
5065
5066
case COUNTER_SOURCE_PERF:
5067
assert(pi < ARRAY_SIZE(perf_data));
5068
assert(mci->fd_perf != -1);
5069
5070
if (debug >= 2)
5071
fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]);
5072
5073
mci->data[i] = perf_data[pi];
5074
5075
++pi;
5076
break;
5077
5078
case COUNTER_SOURCE_MSR:
5079
assert(!no_msr);
5080
5081
if (get_msr(cpu, mci->msr[i], &mci->data[i]))
5082
return -2 - i;
5083
5084
mci->data[i] &= mci->msr_mask[i];
5085
5086
if (debug >= 2)
5087
fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]);
5088
5089
break;
5090
}
5091
}
5092
5093
BUILD_BUG_ON(NUM_MSR_COUNTERS != 3);
5094
t->aperf = mci->data[MSR_RCI_INDEX_APERF];
5095
t->mperf = mci->data[MSR_RCI_INDEX_MPERF];
5096
t->smi_count = mci->data[MSR_RCI_INDEX_SMI];
5097
5098
return 0;
5099
}
5100
5101
int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size)
5102
{
5103
unsigned int domain;
5104
unsigned long long value;
5105
int fd_counter;
5106
5107
for (size_t i = 0; pp; ++i, pp = pp->next) {
5108
domain = cpu_to_domain(pp, cpu);
5109
assert(domain < pp->num_domains);
5110
5111
fd_counter = pp->fd_perf_per_domain[domain];
5112
5113
if (fd_counter == -1)
5114
continue;
5115
5116
if (read(fd_counter, &value, sizeof(value)) != sizeof(value))
5117
return 1;
5118
5119
assert(i < out_size);
5120
out[i] = value * pp->scale;
5121
}
5122
5123
return 0;
5124
}
5125
5126
unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb)
5127
{
5128
unsigned long mask;
5129
5130
if (msb == 63)
5131
mask = 0xffffffffffffffff;
5132
else
5133
mask = ((1 << (msb + 1)) - 1);
5134
5135
mask -= (1 << lsb) - 1;
5136
5137
return mask;
5138
}
5139
5140
unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id)
5141
{
5142
if (domain_id >= ppmt->num_domains)
5143
return 0;
5144
5145
const unsigned long *pmmio = ppmt->domains[domain_id].pcounter;
5146
const unsigned long value = pmmio ? *pmmio : 0;
5147
const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb);
5148
const unsigned long value_shift = ppmt->lsb;
5149
5150
return (value & value_mask) >> value_shift;
5151
}
5152
5153
/* Rapl domain enumeration helpers */
5154
static inline int get_rapl_num_domains(void)
5155
{
5156
if (!platform->has_per_core_rapl)
5157
return topo.num_packages;
5158
5159
return topo.num_cores;
5160
}
5161
5162
static inline int get_rapl_domain_id(int cpu)
5163
{
5164
if (!platform->has_per_core_rapl)
5165
return cpus[cpu].package_id;
5166
5167
return GLOBAL_CORE_ID(cpus[cpu].core_id, cpus[cpu].package_id);
5168
}
5169
5170
/*
5171
* get_counters(...)
5172
* migrate to cpu
5173
* acquire and record local counters for that cpu
5174
*/
5175
int get_counters(PER_THREAD_PARAMS)
5176
{
5177
int cpu = t->cpu_id;
5178
unsigned long long msr;
5179
struct msr_counter *mp;
5180
struct pmt_counter *pp;
5181
int i;
5182
int status;
5183
5184
if (cpu_migrate(cpu)) {
5185
fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
5186
return -1;
5187
}
5188
5189
gettimeofday(&t->tv_begin, (struct timezone *)NULL);
5190
5191
if (first_counter_read)
5192
get_apic_id(t);
5193
5194
t->tsc = rdtsc(); /* we are running on local CPU of interest */
5195
5196
get_smi_aperf_mperf(cpu, t);
5197
5198
if (DO_BIC(BIC_LLC_MRPS) || DO_BIC(BIC_LLC_HIT))
5199
get_perf_llc_stats(cpu, &t->llc);
5200
5201
if (DO_BIC(BIC_L2_MRPS) || DO_BIC(BIC_L2_HIT))
5202
get_perf_l2_stats(cpu, &t->l2);
5203
5204
if (DO_BIC(BIC_IPC))
5205
if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
5206
return -4;
5207
5208
if (DO_BIC(BIC_IRQ))
5209
t->irq_count = irqs_per_cpu[cpu];
5210
if (DO_BIC(BIC_NMI))
5211
t->nmi_count = nmi_per_cpu[cpu];
5212
5213
get_cstate_counters(cpu, t, c, p);
5214
5215
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
5216
if (get_mp(cpu, mp, &t->counter[i], mp->sp->path))
5217
return -10;
5218
}
5219
5220
if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS))
5221
return -10;
5222
5223
for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next)
5224
t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id);
5225
5226
/* collect core counters only for 1st thread in core */
5227
if (!is_cpu_first_thread_in_core(t, c))
5228
goto done;
5229
5230
if (platform->has_per_core_rapl) {
5231
status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p);
5232
if (status != 0)
5233
return status;
5234
}
5235
5236
if (DO_BIC(BIC_CPU_c7) && t->is_atom) {
5237
/*
5238
* For Atom CPUs that has core cstate deeper than c6,
5239
* MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
5240
* Minus CC7 (and deeper cstates) residency to get
5241
* accturate cc6 residency.
5242
*/
5243
c->c6 -= c->c7;
5244
}
5245
5246
if (DO_BIC(BIC_Mod_c6))
5247
if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
5248
return -8;
5249
5250
if (DO_BIC(BIC_CoreTmp)) {
5251
if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
5252
return -9;
5253
c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
5254
}
5255
5256
if (DO_BIC(BIC_CORE_THROT_CNT))
5257
get_core_throt_cnt(cpu, &c->core_throt_cnt);
5258
5259
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
5260
if (get_mp(cpu, mp, &c->counter[i], mp->sp->path))
5261
return -10;
5262
}
5263
5264
if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS))
5265
return -10;
5266
5267
for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next)
5268
c->pmt_counter[i] = pmt_read_counter(pp, cpus[t->cpu_id].core_id);
5269
5270
/* collect package counters only for 1st core in package */
5271
if (!is_cpu_first_core_in_package(t, p))
5272
goto done;
5273
5274
if (DO_BIC(BIC_Totl_c0)) {
5275
if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
5276
return -10;
5277
}
5278
if (DO_BIC(BIC_Any_c0)) {
5279
if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
5280
return -11;
5281
}
5282
if (DO_BIC(BIC_GFX_c0)) {
5283
if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
5284
return -12;
5285
}
5286
if (DO_BIC(BIC_CPUGFX)) {
5287
if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
5288
return -13;
5289
}
5290
5291
if (DO_BIC(BIC_CPU_LPI))
5292
p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
5293
if (DO_BIC(BIC_SYS_LPI))
5294
p->sys_lpi = cpuidle_cur_sys_lpi_us;
5295
5296
if (!platform->has_per_core_rapl) {
5297
status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p);
5298
if (status != 0)
5299
return status;
5300
}
5301
5302
if (DO_BIC(BIC_PkgTmp)) {
5303
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
5304
return -17;
5305
p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
5306
}
5307
5308
if (DO_BIC(BIC_UNCORE_MHZ))
5309
p->uncore_mhz = get_legacy_uncore_mhz(cpus[t->cpu_id].package_id);
5310
5311
if (DO_BIC(BIC_GFX_rc6))
5312
p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
5313
5314
if (DO_BIC(BIC_GFXMHz))
5315
p->gfx_mhz = gfx_info[GFX_MHz].val;
5316
5317
if (DO_BIC(BIC_GFXACTMHz))
5318
p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
5319
5320
if (DO_BIC(BIC_SAM_mc6))
5321
p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
5322
5323
if (DO_BIC(BIC_SAMMHz))
5324
p->sam_mhz = gfx_info[SAM_MHz].val;
5325
5326
if (DO_BIC(BIC_SAMACTMHz))
5327
p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
5328
5329
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
5330
char *path = NULL;
5331
5332
if (mp->msr_num == 0) {
5333
path = find_sysfs_path_by_id(mp->sp, cpus[t->cpu_id].package_id);
5334
if (path == NULL) {
5335
warnx("%s: package_id %d not found", __func__, cpus[t->cpu_id].package_id);
5336
return -10;
5337
}
5338
}
5339
if (get_mp(cpu, mp, &p->counter[i], path))
5340
return -10;
5341
}
5342
5343
if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS))
5344
return -10;
5345
5346
for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next)
5347
p->pmt_counter[i] = pmt_read_counter(pp, cpus[t->cpu_id].package_id);
5348
5349
done:
5350
gettimeofday(&t->tv_end, (struct timezone *)NULL);
5351
5352
return 0;
5353
}
5354
5355
int pkg_cstate_limit = PCLUKN;
5356
char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2",
5357
"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
5358
};
5359
5360
int nhm_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5361
PCLRSV, PCLRSV
5362
};
5363
5364
int snb_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5365
PCLRSV, PCLRSV
5366
};
5367
5368
int hsw_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5369
PCLRSV, PCLRSV
5370
};
5371
5372
int slv_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5373
PCL__6, PCL__7
5374
};
5375
5376
int amt_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5377
PCLRSV, PCLRSV
5378
};
5379
5380
int phi_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5381
PCLRSV, PCLRSV
5382
};
5383
5384
int glm_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5385
PCLRSV, PCLRSV
5386
};
5387
5388
int skx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5389
PCLRSV, PCLRSV
5390
};
5391
5392
int icx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5393
PCLRSV, PCLRSV
5394
};
5395
5396
void probe_cst_limit(void)
5397
{
5398
unsigned long long msr;
5399
int *pkg_cstate_limits;
5400
5401
if (!platform->has_nhm_msrs || no_msr)
5402
return;
5403
5404
switch (platform->cst_limit) {
5405
case CST_LIMIT_NHM:
5406
pkg_cstate_limits = nhm_pkg_cstate_limits;
5407
break;
5408
case CST_LIMIT_SNB:
5409
pkg_cstate_limits = snb_pkg_cstate_limits;
5410
break;
5411
case CST_LIMIT_HSW:
5412
pkg_cstate_limits = hsw_pkg_cstate_limits;
5413
break;
5414
case CST_LIMIT_SKX:
5415
pkg_cstate_limits = skx_pkg_cstate_limits;
5416
break;
5417
case CST_LIMIT_ICX:
5418
pkg_cstate_limits = icx_pkg_cstate_limits;
5419
break;
5420
case CST_LIMIT_SLV:
5421
pkg_cstate_limits = slv_pkg_cstate_limits;
5422
break;
5423
case CST_LIMIT_AMT:
5424
pkg_cstate_limits = amt_pkg_cstate_limits;
5425
break;
5426
case CST_LIMIT_KNL:
5427
pkg_cstate_limits = phi_pkg_cstate_limits;
5428
break;
5429
case CST_LIMIT_GMT:
5430
pkg_cstate_limits = glm_pkg_cstate_limits;
5431
break;
5432
default:
5433
return;
5434
}
5435
5436
get_msr(master_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
5437
pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
5438
}
5439
5440
static void dump_platform_info(void)
5441
{
5442
unsigned long long msr;
5443
unsigned int ratio;
5444
5445
if (!platform->has_nhm_msrs || no_msr)
5446
return;
5447
5448
get_msr(master_cpu, MSR_PLATFORM_INFO, &msr);
5449
5450
fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", master_cpu, msr);
5451
5452
ratio = (msr >> 40) & 0xFF;
5453
fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
5454
5455
ratio = (msr >> 8) & 0xFF;
5456
fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
5457
}
5458
5459
static void dump_power_ctl(void)
5460
{
5461
unsigned long long msr;
5462
5463
if (!platform->has_nhm_msrs || no_msr)
5464
return;
5465
5466
get_msr(master_cpu, MSR_IA32_POWER_CTL, &msr);
5467
fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", master_cpu, msr, msr & 0x2 ? "EN" : "DIS");
5468
5469
/* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
5470
if (platform->has_cst_prewake_bit)
5471
fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
5472
5473
return;
5474
}
5475
5476
static void dump_turbo_ratio_limit2(void)
5477
{
5478
unsigned long long msr;
5479
unsigned int ratio;
5480
5481
get_msr(master_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
5482
5483
fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", master_cpu, msr);
5484
5485
ratio = (msr >> 8) & 0xFF;
5486
if (ratio)
5487
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
5488
5489
ratio = (msr >> 0) & 0xFF;
5490
if (ratio)
5491
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
5492
return;
5493
}
5494
5495
static void dump_turbo_ratio_limit1(void)
5496
{
5497
unsigned long long msr;
5498
unsigned int ratio;
5499
5500
get_msr(master_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
5501
5502
fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", master_cpu, msr);
5503
5504
ratio = (msr >> 56) & 0xFF;
5505
if (ratio)
5506
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
5507
5508
ratio = (msr >> 48) & 0xFF;
5509
if (ratio)
5510
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
5511
5512
ratio = (msr >> 40) & 0xFF;
5513
if (ratio)
5514
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
5515
5516
ratio = (msr >> 32) & 0xFF;
5517
if (ratio)
5518
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
5519
5520
ratio = (msr >> 24) & 0xFF;
5521
if (ratio)
5522
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
5523
5524
ratio = (msr >> 16) & 0xFF;
5525
if (ratio)
5526
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
5527
5528
ratio = (msr >> 8) & 0xFF;
5529
if (ratio)
5530
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
5531
5532
ratio = (msr >> 0) & 0xFF;
5533
if (ratio)
5534
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
5535
return;
5536
}
5537
5538
static void dump_turbo_ratio_limits(int trl_msr_offset)
5539
{
5540
unsigned long long msr, core_counts;
5541
int shift;
5542
5543
get_msr(master_cpu, trl_msr_offset, &msr);
5544
fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", master_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
5545
5546
if (platform->trl_msrs & TRL_CORECOUNT) {
5547
get_msr(master_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
5548
fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", master_cpu, core_counts);
5549
} else {
5550
core_counts = 0x0807060504030201;
5551
}
5552
5553
for (shift = 56; shift >= 0; shift -= 8) {
5554
unsigned int ratio, group_size;
5555
5556
ratio = (msr >> shift) & 0xFF;
5557
group_size = (core_counts >> shift) & 0xFF;
5558
if (ratio)
5559
fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio, bclk, ratio * bclk, group_size);
5560
}
5561
5562
return;
5563
}
5564
5565
static void dump_atom_turbo_ratio_limits(void)
5566
{
5567
unsigned long long msr;
5568
unsigned int ratio;
5569
5570
get_msr(master_cpu, MSR_ATOM_CORE_RATIOS, &msr);
5571
fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", master_cpu, msr & 0xFFFFFFFF);
5572
5573
ratio = (msr >> 0) & 0x3F;
5574
if (ratio)
5575
fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
5576
5577
ratio = (msr >> 8) & 0x3F;
5578
if (ratio)
5579
fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
5580
5581
ratio = (msr >> 16) & 0x3F;
5582
if (ratio)
5583
fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
5584
5585
get_msr(master_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
5586
fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", master_cpu, msr & 0xFFFFFFFF);
5587
5588
ratio = (msr >> 24) & 0x3F;
5589
if (ratio)
5590
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
5591
5592
ratio = (msr >> 16) & 0x3F;
5593
if (ratio)
5594
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
5595
5596
ratio = (msr >> 8) & 0x3F;
5597
if (ratio)
5598
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
5599
5600
ratio = (msr >> 0) & 0x3F;
5601
if (ratio)
5602
fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
5603
}
5604
5605
static void dump_knl_turbo_ratio_limits(void)
5606
{
5607
const unsigned int buckets_no = 7;
5608
5609
unsigned long long msr;
5610
int delta_cores, delta_ratio;
5611
int i, b_nr;
5612
unsigned int cores[buckets_no];
5613
unsigned int ratio[buckets_no];
5614
5615
get_msr(master_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
5616
5617
fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", master_cpu, msr);
5618
5619
/*
5620
* Turbo encoding in KNL is as follows:
5621
* [0] -- Reserved
5622
* [7:1] -- Base value of number of active cores of bucket 1.
5623
* [15:8] -- Base value of freq ratio of bucket 1.
5624
* [20:16] -- +ve delta of number of active cores of bucket 2.
5625
* i.e. active cores of bucket 2 =
5626
* active cores of bucket 1 + delta
5627
* [23:21] -- Negative delta of freq ratio of bucket 2.
5628
* i.e. freq ratio of bucket 2 =
5629
* freq ratio of bucket 1 - delta
5630
* [28:24]-- +ve delta of number of active cores of bucket 3.
5631
* [31:29]-- -ve delta of freq ratio of bucket 3.
5632
* [36:32]-- +ve delta of number of active cores of bucket 4.
5633
* [39:37]-- -ve delta of freq ratio of bucket 4.
5634
* [44:40]-- +ve delta of number of active cores of bucket 5.
5635
* [47:45]-- -ve delta of freq ratio of bucket 5.
5636
* [52:48]-- +ve delta of number of active cores of bucket 6.
5637
* [55:53]-- -ve delta of freq ratio of bucket 6.
5638
* [60:56]-- +ve delta of number of active cores of bucket 7.
5639
* [63:61]-- -ve delta of freq ratio of bucket 7.
5640
*/
5641
5642
b_nr = 0;
5643
cores[b_nr] = (msr & 0xFF) >> 1;
5644
ratio[b_nr] = (msr >> 8) & 0xFF;
5645
5646
for (i = 16; i < 64; i += 8) {
5647
delta_cores = (msr >> i) & 0x1F;
5648
delta_ratio = (msr >> (i + 5)) & 0x7;
5649
5650
cores[b_nr + 1] = cores[b_nr] + delta_cores;
5651
ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
5652
b_nr++;
5653
}
5654
5655
for (i = buckets_no - 1; i >= 0; i--)
5656
if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
5657
fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio[i], bclk, ratio[i] * bclk, cores[i]);
5658
}
5659
5660
static void dump_cst_cfg(void)
5661
{
5662
unsigned long long msr;
5663
5664
if (!platform->has_nhm_msrs || no_msr)
5665
return;
5666
5667
get_msr(master_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
5668
5669
fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", master_cpu, msr);
5670
5671
fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
5672
(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
5673
(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
5674
(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
5675
(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
5676
(msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
5677
5678
#define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
5679
if (platform->has_cst_auto_convension) {
5680
fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
5681
}
5682
5683
fprintf(outf, ")\n");
5684
5685
return;
5686
}
5687
5688
static void dump_config_tdp(void)
5689
{
5690
unsigned long long msr;
5691
5692
get_msr(master_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
5693
fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", master_cpu, msr);
5694
fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
5695
5696
get_msr(master_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
5697
fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", master_cpu, msr);
5698
if (msr) {
5699
fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
5700
fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
5701
fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
5702
fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
5703
}
5704
fprintf(outf, ")\n");
5705
5706
get_msr(master_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
5707
fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", master_cpu, msr);
5708
if (msr) {
5709
fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
5710
fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
5711
fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
5712
fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
5713
}
5714
fprintf(outf, ")\n");
5715
5716
get_msr(master_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
5717
fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", master_cpu, msr);
5718
if ((msr) & 0x3)
5719
fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
5720
fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
5721
fprintf(outf, ")\n");
5722
5723
get_msr(master_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
5724
fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", master_cpu, msr);
5725
fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
5726
fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
5727
fprintf(outf, ")\n");
5728
}
5729
5730
unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
5731
5732
void print_irtl(void)
5733
{
5734
unsigned long long msr;
5735
5736
if (!platform->has_irtl_msrs || no_msr)
5737
return;
5738
5739
if (platform->supported_cstates & PC3) {
5740
get_msr(master_cpu, MSR_PKGC3_IRTL, &msr);
5741
fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", master_cpu, msr);
5742
fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5743
}
5744
5745
if (platform->supported_cstates & PC6) {
5746
get_msr(master_cpu, MSR_PKGC6_IRTL, &msr);
5747
fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", master_cpu, msr);
5748
fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5749
}
5750
5751
if (platform->supported_cstates & PC7) {
5752
get_msr(master_cpu, MSR_PKGC7_IRTL, &msr);
5753
fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", master_cpu, msr);
5754
fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5755
}
5756
5757
if (platform->supported_cstates & PC8) {
5758
get_msr(master_cpu, MSR_PKGC8_IRTL, &msr);
5759
fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", master_cpu, msr);
5760
fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5761
}
5762
5763
if (platform->supported_cstates & PC9) {
5764
get_msr(master_cpu, MSR_PKGC9_IRTL, &msr);
5765
fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", master_cpu, msr);
5766
fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5767
}
5768
5769
if (platform->supported_cstates & PC10) {
5770
get_msr(master_cpu, MSR_PKGC10_IRTL, &msr);
5771
fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", master_cpu, msr);
5772
fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5773
}
5774
}
5775
5776
void free_fd_percpu(void)
5777
{
5778
int i;
5779
5780
if (!fd_percpu)
5781
return;
5782
5783
for (i = 0; i < topo.max_cpu_num + 1; ++i) {
5784
if (fd_percpu[i] != 0)
5785
close(fd_percpu[i]);
5786
}
5787
5788
free(fd_percpu);
5789
fd_percpu = NULL;
5790
}
5791
5792
void free_fd_instr_count_percpu(void)
5793
{
5794
if (!fd_instr_count_percpu)
5795
return;
5796
5797
for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
5798
if (fd_instr_count_percpu[i] != 0)
5799
close(fd_instr_count_percpu[i]);
5800
}
5801
5802
free(fd_instr_count_percpu);
5803
fd_instr_count_percpu = NULL;
5804
}
5805
5806
void free_fd_llc_percpu(void)
5807
{
5808
if (!fd_llc_percpu)
5809
return;
5810
5811
for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
5812
if (fd_llc_percpu[i] != 0)
5813
close(fd_llc_percpu[i]);
5814
}
5815
5816
free(fd_llc_percpu);
5817
fd_llc_percpu = NULL;
5818
5819
BIC_NOT_PRESENT(BIC_LLC_MRPS);
5820
BIC_NOT_PRESENT(BIC_LLC_HIT);
5821
}
5822
5823
void free_fd_l2_percpu(void)
5824
{
5825
if (!fd_l2_percpu)
5826
return;
5827
5828
for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
5829
if (fd_l2_percpu[i] != 0)
5830
close(fd_l2_percpu[i]);
5831
}
5832
5833
free(fd_l2_percpu);
5834
fd_l2_percpu = NULL;
5835
5836
BIC_NOT_PRESENT(BIC_L2_MRPS);
5837
BIC_NOT_PRESENT(BIC_L2_HIT);
5838
}
5839
5840
void free_fd_cstate(void)
5841
{
5842
if (!ccstate_counter_info)
5843
return;
5844
5845
const int counter_info_num = ccstate_counter_info_size;
5846
5847
for (int counter_id = 0; counter_id < counter_info_num; ++counter_id) {
5848
if (ccstate_counter_info[counter_id].fd_perf_core != -1)
5849
close(ccstate_counter_info[counter_id].fd_perf_core);
5850
5851
if (ccstate_counter_info[counter_id].fd_perf_pkg != -1)
5852
close(ccstate_counter_info[counter_id].fd_perf_pkg);
5853
}
5854
5855
free(ccstate_counter_info);
5856
ccstate_counter_info = NULL;
5857
ccstate_counter_info_size = 0;
5858
}
5859
5860
void free_fd_msr(void)
5861
{
5862
if (!msr_counter_info)
5863
return;
5864
5865
for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) {
5866
if (msr_counter_info[cpu].fd_perf != -1)
5867
close(msr_counter_info[cpu].fd_perf);
5868
}
5869
5870
free(msr_counter_info);
5871
msr_counter_info = NULL;
5872
msr_counter_info_size = 0;
5873
}
5874
5875
void free_fd_rapl_percpu(void)
5876
{
5877
if (!rapl_counter_info_perdomain)
5878
return;
5879
5880
const int num_domains = rapl_counter_info_perdomain_size;
5881
5882
for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
5883
if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
5884
close(rapl_counter_info_perdomain[domain_id].fd_perf);
5885
}
5886
5887
free(rapl_counter_info_perdomain);
5888
rapl_counter_info_perdomain = NULL;
5889
rapl_counter_info_perdomain_size = 0;
5890
}
5891
5892
void free_fd_added_perf_counters_(struct perf_counter_info *pp)
5893
{
5894
if (!pp)
5895
return;
5896
5897
if (!pp->fd_perf_per_domain)
5898
return;
5899
5900
while (pp) {
5901
for (size_t domain = 0; domain < pp->num_domains; ++domain) {
5902
if (pp->fd_perf_per_domain[domain] != -1) {
5903
close(pp->fd_perf_per_domain[domain]);
5904
pp->fd_perf_per_domain[domain] = -1;
5905
}
5906
}
5907
5908
free(pp->fd_perf_per_domain);
5909
pp->fd_perf_per_domain = NULL;
5910
5911
pp = pp->next;
5912
}
5913
}
5914
5915
void free_fd_added_perf_counters(void)
5916
{
5917
free_fd_added_perf_counters_(sys.perf_tp);
5918
free_fd_added_perf_counters_(sys.perf_cp);
5919
free_fd_added_perf_counters_(sys.perf_pp);
5920
}
5921
5922
void free_all_buffers(void)
5923
{
5924
int i;
5925
5926
CPU_FREE(cpu_present_set);
5927
cpu_present_set = NULL;
5928
cpu_present_setsize = 0;
5929
5930
CPU_FREE(cpu_effective_set);
5931
cpu_effective_set = NULL;
5932
cpu_effective_setsize = 0;
5933
5934
CPU_FREE(cpu_allowed_set);
5935
cpu_allowed_set = NULL;
5936
cpu_allowed_setsize = 0;
5937
5938
CPU_FREE(cpu_affinity_set);
5939
cpu_affinity_set = NULL;
5940
cpu_affinity_setsize = 0;
5941
5942
if (perf_pcore_set) {
5943
CPU_FREE(perf_pcore_set);
5944
perf_pcore_set = NULL;
5945
}
5946
5947
if (perf_ecore_set) {
5948
CPU_FREE(perf_ecore_set);
5949
perf_ecore_set = NULL;
5950
}
5951
5952
if (perf_lcore_set) {
5953
CPU_FREE(perf_lcore_set);
5954
perf_lcore_set = NULL;
5955
}
5956
5957
free(even.threads);
5958
free(even.cores);
5959
free(even.packages);
5960
5961
even.threads = NULL;
5962
even.cores = NULL;
5963
even.packages = NULL;
5964
5965
free(odd.threads);
5966
free(odd.cores);
5967
free(odd.packages);
5968
5969
odd.threads = NULL;
5970
odd.cores = NULL;
5971
odd.packages = NULL;
5972
5973
free(output_buffer);
5974
output_buffer = NULL;
5975
outp = NULL;
5976
5977
free_fd_percpu();
5978
free_fd_instr_count_percpu();
5979
free_fd_llc_percpu();
5980
free_fd_l2_percpu();
5981
free_fd_msr();
5982
free_fd_rapl_percpu();
5983
free_fd_cstate();
5984
free_fd_added_perf_counters();
5985
5986
free(irq_column_2_cpu);
5987
free(irqs_per_cpu);
5988
free(nmi_per_cpu);
5989
5990
for (i = 0; i <= topo.max_cpu_num; ++i) {
5991
if (cpus[i].put_ids)
5992
CPU_FREE(cpus[i].put_ids);
5993
}
5994
free(cpus);
5995
}
5996
5997
/*
5998
* Parse a file containing a single int.
5999
* Return 0 if file can not be opened
6000
* Exit if file can be opened, but can not be parsed
6001
*/
6002
int parse_int_file(const char *fmt, ...)
6003
{
6004
va_list args;
6005
char path[PATH_MAX];
6006
FILE *filep;
6007
int value;
6008
6009
va_start(args, fmt);
6010
vsnprintf(path, sizeof(path), fmt, args);
6011
va_end(args);
6012
filep = fopen(path, "r");
6013
if (!filep)
6014
return 0;
6015
if (fscanf(filep, "%d", &value) != 1)
6016
err(1, "%s: failed to parse number from file", path);
6017
fclose(filep);
6018
return value;
6019
}
6020
6021
/*
6022
* cpu_is_first_core_in_package(cpu)
6023
* return 1 if given CPU is 1st core in package
6024
*/
6025
int cpu_is_first_core_in_package(int cpu)
6026
{
6027
return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
6028
}
6029
6030
int get_package_id(int cpu)
6031
{
6032
return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
6033
}
6034
6035
int get_die_id(int cpu)
6036
{
6037
return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
6038
}
6039
6040
int get_l3_id(int cpu)
6041
{
6042
return parse_int_file("/sys/devices/system/cpu/cpu%d/cache/index3/id", cpu);
6043
}
6044
6045
int get_core_id(int cpu)
6046
{
6047
return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
6048
}
6049
6050
void set_node_data(void)
6051
{
6052
int pkg, node, lnode, cpu, cpux;
6053
int cpu_count;
6054
6055
/* initialize logical_node_id */
6056
for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
6057
cpus[cpu].logical_node_id = -1;
6058
6059
cpu_count = 0;
6060
for (pkg = 0; pkg < topo.num_packages; pkg++) {
6061
lnode = 0;
6062
for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
6063
if (cpus[cpu].package_id != pkg)
6064
continue;
6065
/* find a cpu with an unset logical_node_id */
6066
if (cpus[cpu].logical_node_id != -1)
6067
continue;
6068
cpus[cpu].logical_node_id = lnode;
6069
node = cpus[cpu].physical_node_id;
6070
cpu_count++;
6071
/*
6072
* find all matching cpus on this pkg and set
6073
* the logical_node_id
6074
*/
6075
for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
6076
if ((cpus[cpux].package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
6077
cpus[cpux].logical_node_id = lnode;
6078
cpu_count++;
6079
}
6080
}
6081
lnode++;
6082
if (lnode > topo.nodes_per_pkg)
6083
topo.nodes_per_pkg = lnode;
6084
}
6085
if (cpu_count >= topo.max_cpu_num)
6086
break;
6087
}
6088
}
6089
6090
int get_physical_node_id(struct cpu_topology *thiscpu)
6091
{
6092
char path[80];
6093
FILE *filep;
6094
int i;
6095
int cpu = thiscpu->cpu_id;
6096
6097
for (i = 0; i <= topo.max_cpu_num; i++) {
6098
sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
6099
filep = fopen(path, "r");
6100
if (!filep)
6101
continue;
6102
fclose(filep);
6103
return i;
6104
}
6105
return -1;
6106
}
6107
6108
static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
6109
{
6110
unsigned int start, end;
6111
char *next = cpu_str;
6112
6113
while (next && *next) {
6114
6115
if (*next == '-') /* no negative cpu numbers */
6116
return 1;
6117
6118
if (*next == '\0' || *next == '\n')
6119
break;
6120
6121
start = strtoul(next, &next, 10);
6122
6123
if (start >= CPU_SUBSET_MAXCPUS)
6124
return 1;
6125
CPU_SET_S(start, cpu_set_size, cpu_set);
6126
6127
if (*next == '\0' || *next == '\n')
6128
break;
6129
6130
if (*next == ',') {
6131
next += 1;
6132
continue;
6133
}
6134
6135
if (*next == '-') {
6136
next += 1; /* start range */
6137
} else if (*next == '.') {
6138
next += 1;
6139
if (*next == '.')
6140
next += 1; /* start range */
6141
else
6142
return 1;
6143
}
6144
6145
end = strtoul(next, &next, 10);
6146
if (end <= start)
6147
return 1;
6148
6149
while (++start <= end) {
6150
if (start >= CPU_SUBSET_MAXCPUS)
6151
return 1;
6152
CPU_SET_S(start, cpu_set_size, cpu_set);
6153
}
6154
6155
if (*next == ',')
6156
next += 1;
6157
else if (*next != '\0' && *next != '\n')
6158
return 1;
6159
}
6160
6161
return 0;
6162
}
6163
6164
int set_thread_siblings(struct cpu_topology *thiscpu)
6165
{
6166
char path[80], character;
6167
FILE *filep;
6168
unsigned long map;
6169
int so, shift, sib_core;
6170
int cpu = thiscpu->cpu_id;
6171
int offset = topo.max_cpu_num + 1;
6172
size_t size;
6173
int thread_id = 0;
6174
6175
thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
6176
if (thiscpu->ht_id < 0)
6177
thiscpu->ht_id = thread_id++;
6178
if (!thiscpu->put_ids)
6179
return -1;
6180
6181
size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
6182
CPU_ZERO_S(size, thiscpu->put_ids);
6183
6184
sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
6185
filep = fopen(path, "r");
6186
6187
if (!filep) {
6188
warnx("%s: open failed", path);
6189
return -1;
6190
}
6191
do {
6192
offset -= BITMASK_SIZE;
6193
if (fscanf(filep, "%lx%c", &map, &character) != 2)
6194
err(1, "%s: failed to parse file", path);
6195
for (shift = 0; shift < BITMASK_SIZE; shift++) {
6196
if ((map >> shift) & 0x1) {
6197
so = shift + offset;
6198
sib_core = get_core_id(so);
6199
if (sib_core == thiscpu->core_id) {
6200
CPU_SET_S(so, size, thiscpu->put_ids);
6201
if ((so != cpu) && (cpus[so].ht_id < 0)) {
6202
cpus[so].ht_id = thread_id;
6203
cpus[cpu].ht_sibling_cpu_id[thread_id] = so;
6204
if (debug)
6205
fprintf(stderr, "%s: cpu%d.ht_sibling_cpu_id[%d] = %d\n", __func__, cpu, thread_id, so);
6206
thread_id += 1;
6207
}
6208
}
6209
}
6210
}
6211
} while (character == ',');
6212
fclose(filep);
6213
6214
return CPU_COUNT_S(size, thiscpu->put_ids);
6215
}
6216
6217
/*
6218
* run func(thread, core, package) in topology order
6219
* skip non-present cpus
6220
*/
6221
6222
int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
6223
struct pkg_data *, struct thread_data *, struct core_data *,
6224
struct pkg_data *), struct thread_data *thread_base,
6225
struct core_data *core_base, struct pkg_data *pkg_base,
6226
struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
6227
{
6228
int cpu, retval;
6229
6230
retval = 0;
6231
6232
for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
6233
struct thread_data *t, *t2;
6234
struct core_data *c, *c2;
6235
struct pkg_data *p, *p2;
6236
6237
if (cpu_is_not_allowed(cpu))
6238
continue;
6239
6240
if (cpus[cpu].ht_id > 0) /* skip HT sibling */
6241
continue;
6242
6243
t = &thread_base[cpu];
6244
t2 = &thread_base2[cpu];
6245
c = &core_base[GLOBAL_CORE_ID(cpus[cpu].core_id, cpus[cpu].package_id)];
6246
c2 = &core_base2[GLOBAL_CORE_ID(cpus[cpu].core_id, cpus[cpu].package_id)];
6247
p = &pkg_base[cpus[cpu].package_id];
6248
p2 = &pkg_base2[cpus[cpu].package_id];
6249
6250
retval |= func(t, c, p, t2, c2, p2);
6251
6252
/* Handle HT sibling now */
6253
int i;
6254
6255
for (i = MAX_HT_ID; i > 0; --i) { /* ht_id 0 is self */
6256
if (cpus[cpu].ht_sibling_cpu_id[i] <= 0)
6257
continue;
6258
t = &thread_base[cpus[cpu].ht_sibling_cpu_id[i]];
6259
t2 = &thread_base2[cpus[cpu].ht_sibling_cpu_id[i]];
6260
6261
retval |= func(t, c, p, t2, c2, p2);
6262
}
6263
}
6264
return retval;
6265
}
6266
6267
/*
6268
* run func(cpu) on every cpu in /proc/stat
6269
* return max_cpu number
6270
*/
6271
int for_all_proc_cpus(int (func) (int))
6272
{
6273
FILE *fp;
6274
int cpu_num;
6275
int retval;
6276
6277
fp = fopen_or_die(proc_stat, "r");
6278
6279
retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
6280
if (retval != 0)
6281
err(1, "%s: failed to parse format", proc_stat);
6282
6283
while (1) {
6284
retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
6285
if (retval != 1)
6286
break;
6287
6288
retval = func(cpu_num);
6289
if (retval) {
6290
fclose(fp);
6291
return (retval);
6292
}
6293
}
6294
fclose(fp);
6295
return 0;
6296
}
6297
6298
#define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective"
6299
6300
static char cpu_effective_str[1024];
6301
6302
static int update_effective_str(bool startup)
6303
{
6304
FILE *fp;
6305
char *pos;
6306
char buf[1024];
6307
int ret;
6308
6309
if (cpu_effective_str[0] == '\0' && !startup)
6310
return 0;
6311
6312
fp = fopen(PATH_EFFECTIVE_CPUS, "r");
6313
if (!fp)
6314
return 0;
6315
6316
pos = fgets(buf, 1024, fp);
6317
if (!pos)
6318
err(1, "%s: file read failed", PATH_EFFECTIVE_CPUS);
6319
6320
fclose(fp);
6321
6322
ret = strncmp(cpu_effective_str, buf, 1024);
6323
if (!ret)
6324
return 0;
6325
6326
strncpy(cpu_effective_str, buf, 1024);
6327
return 1;
6328
}
6329
6330
static void update_effective_set(bool startup)
6331
{
6332
update_effective_str(startup);
6333
6334
if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize))
6335
err(1, "%s: cpu str malformat %s", PATH_EFFECTIVE_CPUS, cpu_effective_str);
6336
}
6337
6338
void linux_perf_init(void);
6339
void msr_perf_init(void);
6340
void rapl_perf_init(void);
6341
void cstate_perf_init(void);
6342
void perf_llc_init(void);
6343
void perf_l2_init(void);
6344
void added_perf_counters_init(void);
6345
void pmt_init(void);
6346
6347
void re_initialize(void)
6348
{
6349
free_all_buffers();
6350
setup_all_buffers(false);
6351
linux_perf_init();
6352
msr_perf_init();
6353
rapl_perf_init();
6354
cstate_perf_init();
6355
perf_llc_init();
6356
perf_l2_init();
6357
added_perf_counters_init();
6358
pmt_init();
6359
fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
6360
}
6361
6362
void set_max_cpu_num(void)
6363
{
6364
FILE *filep;
6365
int current_cpu;
6366
unsigned long dummy;
6367
char pathname[64];
6368
6369
current_cpu = sched_getcpu();
6370
if (current_cpu < 0)
6371
err(1, "cannot find calling cpu ID");
6372
sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", current_cpu);
6373
6374
filep = fopen_or_die(pathname, "r");
6375
topo.max_cpu_num = 0;
6376
while (fscanf(filep, "%lx,", &dummy) == 1)
6377
topo.max_cpu_num += BITMASK_SIZE;
6378
fclose(filep);
6379
topo.max_cpu_num--; /* 0 based */
6380
}
6381
6382
/*
6383
* count_cpus()
6384
* remember the last one seen, it will be the max
6385
*/
6386
int count_cpus(int cpu)
6387
{
6388
UNUSED(cpu);
6389
6390
topo.num_cpus++;
6391
return 0;
6392
}
6393
6394
int mark_cpu_present(int cpu)
6395
{
6396
CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
6397
return 0;
6398
}
6399
6400
int clear_ht_id(int cpu)
6401
{
6402
int i;
6403
6404
cpus[cpu].ht_id = -1;
6405
for (i = 0; i <= MAX_HT_ID; ++i)
6406
cpus[cpu].ht_sibling_cpu_id[i] = -1;
6407
return 0;
6408
}
6409
6410
int set_my_cpu_type(void)
6411
{
6412
unsigned int eax, ebx, ecx, edx;
6413
unsigned int max_level;
6414
6415
__cpuid(0, max_level, ebx, ecx, edx);
6416
6417
if (max_level < CPUID_LEAF_MODEL_ID)
6418
return 0;
6419
6420
__cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx);
6421
6422
return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT);
6423
}
6424
6425
int set_cpu_hybrid_type(int cpu)
6426
{
6427
if (cpu_migrate(cpu))
6428
return -1;
6429
6430
int type = set_my_cpu_type();
6431
6432
cpus[cpu].type = type;
6433
return 0;
6434
}
6435
6436
/*
6437
* snapshot_proc_interrupts()
6438
*
6439
* read and record summary of /proc/interrupts
6440
*
6441
* return 1 if config change requires a restart, else return 0
6442
*/
6443
int snapshot_proc_interrupts(void)
6444
{
6445
static FILE *fp;
6446
int column, retval;
6447
6448
if (fp == NULL)
6449
fp = fopen_or_die("/proc/interrupts", "r");
6450
else
6451
rewind(fp);
6452
6453
/* read 1st line of /proc/interrupts to get cpu* name for each column */
6454
for (column = 0; column < topo.num_cpus; ++column) {
6455
int cpu_number;
6456
6457
retval = fscanf(fp, " CPU%d", &cpu_number);
6458
if (retval != 1)
6459
break;
6460
6461
if (cpu_number > topo.max_cpu_num) {
6462
warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
6463
return 1;
6464
}
6465
6466
irq_column_2_cpu[column] = cpu_number;
6467
irqs_per_cpu[cpu_number] = 0;
6468
nmi_per_cpu[cpu_number] = 0;
6469
}
6470
6471
/* read /proc/interrupt count lines and sum up irqs per cpu */
6472
while (1) {
6473
int column;
6474
char buf[64];
6475
int this_row_is_nmi = 0;
6476
6477
retval = fscanf(fp, " %s:", buf); /* irq# "N:" */
6478
if (retval != 1)
6479
break;
6480
6481
if (strncmp(buf, "NMI", strlen("NMI")) == 0)
6482
this_row_is_nmi = 1;
6483
6484
/* read the count per cpu */
6485
for (column = 0; column < topo.num_cpus; ++column) {
6486
6487
int cpu_number, irq_count;
6488
6489
retval = fscanf(fp, " %d", &irq_count);
6490
6491
if (retval != 1)
6492
break;
6493
6494
cpu_number = irq_column_2_cpu[column];
6495
irqs_per_cpu[cpu_number] += irq_count;
6496
if (this_row_is_nmi)
6497
nmi_per_cpu[cpu_number] += irq_count;
6498
}
6499
while (getc(fp) != '\n') ; /* flush interrupt description */
6500
6501
}
6502
return 0;
6503
}
6504
6505
/*
6506
* snapshot_graphics()
6507
*
6508
* record snapshot of specified graphics sysfs knob
6509
*
6510
* return 1 if config change requires a restart, else return 0
6511
*/
6512
int snapshot_graphics(int idx)
6513
{
6514
int retval;
6515
6516
rewind(gfx_info[idx].fp);
6517
fflush(gfx_info[idx].fp);
6518
6519
switch (idx) {
6520
case GFX_rc6:
6521
case SAM_mc6:
6522
retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull);
6523
if (retval != 1)
6524
err(1, "rc6");
6525
return 0;
6526
case GFX_MHz:
6527
case GFX_ACTMHz:
6528
case SAM_MHz:
6529
case SAM_ACTMHz:
6530
retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
6531
if (retval != 1)
6532
err(1, "MHz");
6533
return 0;
6534
default:
6535
return -EINVAL;
6536
}
6537
}
6538
6539
/*
6540
* snapshot_cpu_lpi()
6541
*
6542
* record snapshot of
6543
* /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
6544
*/
6545
int snapshot_cpu_lpi_us(void)
6546
{
6547
FILE *fp;
6548
int retval;
6549
6550
fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
6551
6552
retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
6553
if (retval != 1) {
6554
fprintf(stderr, "Disabling Low Power Idle CPU output\n");
6555
BIC_NOT_PRESENT(BIC_CPU_LPI);
6556
fclose(fp);
6557
return -1;
6558
}
6559
6560
fclose(fp);
6561
6562
return 0;
6563
}
6564
6565
/*
6566
* snapshot_sys_lpi()
6567
*
6568
* record snapshot of sys_lpi_file
6569
*/
6570
int snapshot_sys_lpi_us(void)
6571
{
6572
FILE *fp;
6573
int retval;
6574
6575
fp = fopen_or_die(sys_lpi_file, "r");
6576
6577
retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
6578
if (retval != 1) {
6579
fprintf(stderr, "Disabling Low Power Idle System output\n");
6580
BIC_NOT_PRESENT(BIC_SYS_LPI);
6581
fclose(fp);
6582
return -1;
6583
}
6584
fclose(fp);
6585
6586
return 0;
6587
}
6588
6589
/*
6590
* snapshot /proc and /sys files
6591
*
6592
* return 1 if configuration restart needed, else return 0
6593
*/
6594
int snapshot_proc_sysfs_files(void)
6595
{
6596
gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL);
6597
6598
if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI))
6599
if (snapshot_proc_interrupts())
6600
return 1;
6601
6602
if (DO_BIC(BIC_GFX_rc6))
6603
snapshot_graphics(GFX_rc6);
6604
6605
if (DO_BIC(BIC_GFXMHz))
6606
snapshot_graphics(GFX_MHz);
6607
6608
if (DO_BIC(BIC_GFXACTMHz))
6609
snapshot_graphics(GFX_ACTMHz);
6610
6611
if (DO_BIC(BIC_SAM_mc6))
6612
snapshot_graphics(SAM_mc6);
6613
6614
if (DO_BIC(BIC_SAMMHz))
6615
snapshot_graphics(SAM_MHz);
6616
6617
if (DO_BIC(BIC_SAMACTMHz))
6618
snapshot_graphics(SAM_ACTMHz);
6619
6620
if (DO_BIC(BIC_CPU_LPI))
6621
snapshot_cpu_lpi_us();
6622
6623
if (DO_BIC(BIC_SYS_LPI))
6624
snapshot_sys_lpi_us();
6625
6626
return 0;
6627
}
6628
6629
int exit_requested;
6630
6631
static void signal_handler(int signal)
6632
{
6633
switch (signal) {
6634
case SIGINT:
6635
exit_requested = 1;
6636
if (debug)
6637
fprintf(stderr, " SIGINT\n");
6638
break;
6639
case SIGUSR1:
6640
if (debug > 1)
6641
fprintf(stderr, "SIGUSR1\n");
6642
break;
6643
}
6644
}
6645
6646
void setup_signal_handler(void)
6647
{
6648
struct sigaction sa;
6649
6650
memset(&sa, 0, sizeof(sa));
6651
6652
sa.sa_handler = &signal_handler;
6653
6654
if (sigaction(SIGINT, &sa, NULL) < 0)
6655
err(1, "sigaction SIGINT");
6656
if (sigaction(SIGUSR1, &sa, NULL) < 0)
6657
err(1, "sigaction SIGUSR1");
6658
}
6659
6660
void do_sleep(void)
6661
{
6662
struct timeval tout;
6663
struct timespec rest;
6664
fd_set readfds;
6665
int retval;
6666
6667
FD_ZERO(&readfds);
6668
FD_SET(0, &readfds);
6669
6670
if (ignore_stdin) {
6671
nanosleep(&interval_ts, NULL);
6672
return;
6673
}
6674
6675
tout = interval_tv;
6676
retval = select(1, &readfds, NULL, NULL, &tout);
6677
6678
if (retval == 1) {
6679
switch (getc(stdin)) {
6680
case 'q':
6681
exit_requested = 1;
6682
break;
6683
case EOF:
6684
/*
6685
* 'stdin' is a pipe closed on the other end. There
6686
* won't be any further input.
6687
*/
6688
ignore_stdin = 1;
6689
/* Sleep the rest of the time */
6690
rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
6691
rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
6692
nanosleep(&rest, NULL);
6693
}
6694
}
6695
}
6696
6697
int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
6698
{
6699
int ret, idx;
6700
unsigned long long msr_cur, msr_last;
6701
6702
assert(!no_msr);
6703
6704
if (!per_cpu_msr_sum)
6705
return 1;
6706
6707
idx = offset_to_idx(offset);
6708
if (idx < 0)
6709
return idx;
6710
/* get_msr_sum() = sum + (get_msr() - last) */
6711
ret = get_msr(cpu, offset, &msr_cur);
6712
if (ret)
6713
return ret;
6714
msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
6715
DELTA_WRAP32(msr_cur, msr_last);
6716
*msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
6717
6718
return 0;
6719
}
6720
6721
timer_t timerid;
6722
6723
/* Timer callback, update the sum of MSRs periodically. */
6724
static int update_msr_sum(PER_THREAD_PARAMS)
6725
{
6726
int i, ret;
6727
int cpu = t->cpu_id;
6728
6729
UNUSED(c);
6730
UNUSED(p);
6731
6732
assert(!no_msr);
6733
6734
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
6735
unsigned long long msr_cur, msr_last;
6736
off_t offset;
6737
6738
if (!idx_valid(i))
6739
continue;
6740
offset = idx_to_offset(i);
6741
if (offset < 0)
6742
continue;
6743
ret = get_msr(cpu, offset, &msr_cur);
6744
if (ret) {
6745
fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
6746
continue;
6747
}
6748
6749
msr_last = per_cpu_msr_sum[cpu].entries[i].last;
6750
per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
6751
6752
DELTA_WRAP32(msr_cur, msr_last);
6753
per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
6754
}
6755
return 0;
6756
}
6757
6758
static void msr_record_handler(union sigval v)
6759
{
6760
UNUSED(v);
6761
6762
for_all_cpus(update_msr_sum, EVEN_COUNTERS);
6763
}
6764
6765
void msr_sum_record(void)
6766
{
6767
struct itimerspec its;
6768
struct sigevent sev;
6769
6770
per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
6771
if (!per_cpu_msr_sum) {
6772
fprintf(outf, "Can not allocate memory for long time MSR.\n");
6773
return;
6774
}
6775
/*
6776
* Signal handler might be restricted, so use thread notifier instead.
6777
*/
6778
memset(&sev, 0, sizeof(struct sigevent));
6779
sev.sigev_notify = SIGEV_THREAD;
6780
sev.sigev_notify_function = msr_record_handler;
6781
6782
sev.sigev_value.sival_ptr = &timerid;
6783
if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
6784
fprintf(outf, "Can not create timer.\n");
6785
goto release_msr;
6786
}
6787
6788
its.it_value.tv_sec = 0;
6789
its.it_value.tv_nsec = 1;
6790
/*
6791
* A wraparound time has been calculated early.
6792
* Some sources state that the peak power for a
6793
* microprocessor is usually 1.5 times the TDP rating,
6794
* use 2 * TDP for safety.
6795
*/
6796
its.it_interval.tv_sec = rapl_joule_counter_range / 2;
6797
its.it_interval.tv_nsec = 0;
6798
6799
if (timer_settime(timerid, 0, &its, NULL) == -1) {
6800
fprintf(outf, "Can not set timer.\n");
6801
goto release_timer;
6802
}
6803
return;
6804
6805
release_timer:
6806
timer_delete(timerid);
6807
release_msr:
6808
free(per_cpu_msr_sum);
6809
per_cpu_msr_sum = NULL;
6810
}
6811
6812
/*
6813
* set_my_sched_priority(pri)
6814
* return previous priority on success
6815
* return value < -20 on failure
6816
*/
6817
int set_my_sched_priority(int priority)
6818
{
6819
int retval;
6820
int original_priority;
6821
6822
errno = 0;
6823
original_priority = getpriority(PRIO_PROCESS, 0);
6824
if (errno && (original_priority == -1))
6825
return -21;
6826
6827
retval = setpriority(PRIO_PROCESS, 0, priority);
6828
if (retval)
6829
return -21;
6830
6831
errno = 0;
6832
retval = getpriority(PRIO_PROCESS, 0);
6833
if (retval != priority)
6834
return -21;
6835
6836
return original_priority;
6837
}
6838
6839
void turbostat_loop()
6840
{
6841
int retval;
6842
int restarted = 0;
6843
unsigned int done_iters = 0;
6844
6845
setup_signal_handler();
6846
6847
/*
6848
* elevate own priority for interval mode
6849
*
6850
* ignore on error - we probably don't have permission to set it, but
6851
* it's not a big deal
6852
*/
6853
set_my_sched_priority(-20);
6854
6855
restart:
6856
restarted++;
6857
6858
snapshot_proc_sysfs_files();
6859
retval = for_all_cpus(get_counters, EVEN_COUNTERS);
6860
first_counter_read = 0;
6861
if (retval < -1) {
6862
exit(retval);
6863
} else if (retval == -1) {
6864
if (restarted > 10) {
6865
exit(retval);
6866
}
6867
re_initialize();
6868
goto restart;
6869
}
6870
restarted = 0;
6871
done_iters = 0;
6872
gettimeofday(&tv_even, (struct timezone *)NULL);
6873
6874
while (1) {
6875
if (for_all_proc_cpus(cpu_is_not_present)) {
6876
re_initialize();
6877
goto restart;
6878
}
6879
if (update_effective_str(false)) {
6880
re_initialize();
6881
goto restart;
6882
}
6883
do_sleep();
6884
if (snapshot_proc_sysfs_files())
6885
goto restart;
6886
retval = for_all_cpus(get_counters, ODD_COUNTERS);
6887
if (retval < -1) {
6888
exit(retval);
6889
} else if (retval == -1) {
6890
re_initialize();
6891
goto restart;
6892
}
6893
gettimeofday(&tv_odd, (struct timezone *)NULL);
6894
timersub(&tv_odd, &tv_even, &tv_delta);
6895
if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
6896
re_initialize();
6897
goto restart;
6898
}
6899
delta_platform(&platform_counters_odd, &platform_counters_even);
6900
compute_average(EVEN_COUNTERS);
6901
format_all_counters(EVEN_COUNTERS);
6902
flush_output_stdout();
6903
if (exit_requested)
6904
break;
6905
if (num_iterations && ++done_iters >= num_iterations)
6906
break;
6907
do_sleep();
6908
if (snapshot_proc_sysfs_files())
6909
goto restart;
6910
retval = for_all_cpus(get_counters, EVEN_COUNTERS);
6911
if (retval < -1) {
6912
exit(retval);
6913
} else if (retval == -1) {
6914
re_initialize();
6915
goto restart;
6916
}
6917
gettimeofday(&tv_even, (struct timezone *)NULL);
6918
timersub(&tv_even, &tv_odd, &tv_delta);
6919
if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
6920
re_initialize();
6921
goto restart;
6922
}
6923
delta_platform(&platform_counters_even, &platform_counters_odd);
6924
compute_average(ODD_COUNTERS);
6925
format_all_counters(ODD_COUNTERS);
6926
flush_output_stdout();
6927
if (exit_requested)
6928
break;
6929
if (num_iterations && ++done_iters >= num_iterations)
6930
break;
6931
}
6932
}
6933
6934
int probe_dev_msr(void)
6935
{
6936
struct stat sb;
6937
char pathname[32];
6938
6939
sprintf(pathname, "/dev/msr%d", master_cpu);
6940
return !stat(pathname, &sb);
6941
}
6942
6943
int probe_dev_cpu_msr(void)
6944
{
6945
struct stat sb;
6946
char pathname[32];
6947
6948
sprintf(pathname, "/dev/cpu/%d/msr", master_cpu);
6949
return !stat(pathname, &sb);
6950
}
6951
6952
int probe_msr_driver(void)
6953
{
6954
if (probe_dev_msr()) {
6955
use_android_msr_path = 1;
6956
return 1;
6957
}
6958
return probe_dev_cpu_msr();
6959
}
6960
6961
void check_msr_driver(void)
6962
{
6963
if (probe_msr_driver())
6964
return;
6965
6966
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
6967
no_msr = 1;
6968
6969
if (!probe_msr_driver())
6970
no_msr = 1;
6971
}
6972
6973
/*
6974
* check for CAP_SYS_RAWIO
6975
* return 0 on success
6976
* return 1 on fail
6977
*/
6978
int check_for_cap_sys_rawio(void)
6979
{
6980
cap_t caps;
6981
cap_flag_value_t cap_flag_value;
6982
int ret = 0;
6983
6984
caps = cap_get_proc();
6985
if (caps == NULL) {
6986
/*
6987
* CONFIG_MULTIUSER=n kernels have no cap_get_proc()
6988
* Allow them to continue and attempt to access MSRs
6989
*/
6990
if (errno == ENOSYS)
6991
return 0;
6992
6993
return 1;
6994
}
6995
6996
if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
6997
ret = 1;
6998
goto free_and_exit;
6999
}
7000
7001
if (cap_flag_value != CAP_SET) {
7002
ret = 1;
7003
goto free_and_exit;
7004
}
7005
7006
free_and_exit:
7007
if (cap_free(caps) == -1)
7008
err(-6, "cap_free");
7009
7010
return ret;
7011
}
7012
7013
void check_msr_permission(void)
7014
{
7015
int failed = 0;
7016
char pathname[32];
7017
7018
if (no_msr)
7019
return;
7020
7021
/* check for CAP_SYS_RAWIO */
7022
failed += check_for_cap_sys_rawio();
7023
7024
/* test file permissions */
7025
sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", master_cpu);
7026
if (euidaccess(pathname, R_OK)) {
7027
failed++;
7028
}
7029
7030
if (failed) {
7031
warnx("Failed to access %s. Some of the counters may not be available\n"
7032
"\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
7033
no_msr = 1;
7034
}
7035
}
7036
7037
void probe_bclk(void)
7038
{
7039
unsigned long long msr;
7040
unsigned int base_ratio;
7041
7042
if (!platform->has_nhm_msrs || no_msr)
7043
return;
7044
7045
if (platform->bclk_freq == BCLK_100MHZ)
7046
bclk = 100.00;
7047
else if (platform->bclk_freq == BCLK_133MHZ)
7048
bclk = 133.33;
7049
else if (platform->bclk_freq == BCLK_SLV)
7050
bclk = slm_bclk();
7051
else
7052
return;
7053
7054
get_msr(master_cpu, MSR_PLATFORM_INFO, &msr);
7055
base_ratio = (msr >> 8) & 0xFF;
7056
7057
base_hz = base_ratio * bclk * 1000000;
7058
has_base_hz = 1;
7059
7060
if (platform->enable_tsc_tweak)
7061
tsc_tweak = base_hz / tsc_hz;
7062
}
7063
7064
static void remove_underbar(char *s)
7065
{
7066
char *to = s;
7067
7068
while (*s) {
7069
if (*s != '_')
7070
*to++ = *s;
7071
s++;
7072
}
7073
7074
*to = 0;
7075
}
7076
7077
static void dump_turbo_ratio_info(void)
7078
{
7079
if (!has_turbo)
7080
return;
7081
7082
if (!platform->has_nhm_msrs || no_msr)
7083
return;
7084
7085
if (platform->trl_msrs & TRL_LIMIT2)
7086
dump_turbo_ratio_limit2();
7087
7088
if (platform->trl_msrs & TRL_LIMIT1)
7089
dump_turbo_ratio_limit1();
7090
7091
if (platform->trl_msrs & TRL_BASE) {
7092
dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT);
7093
7094
if (is_hybrid)
7095
dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT);
7096
}
7097
7098
if (platform->trl_msrs & TRL_ATOM)
7099
dump_atom_turbo_ratio_limits();
7100
7101
if (platform->trl_msrs & TRL_KNL)
7102
dump_knl_turbo_ratio_limits();
7103
7104
if (platform->has_config_tdp)
7105
dump_config_tdp();
7106
}
7107
7108
static int read_sysfs_int(char *path)
7109
{
7110
FILE *input;
7111
int retval = -1;
7112
7113
input = fopen(path, "r");
7114
if (input == NULL) {
7115
if (debug)
7116
fprintf(outf, "NSFOD %s\n", path);
7117
return (-1);
7118
}
7119
if (fscanf(input, "%d", &retval) != 1)
7120
err(1, "%s: failed to read int from file", path);
7121
fclose(input);
7122
7123
return (retval);
7124
}
7125
7126
static void dump_sysfs_file(char *path)
7127
{
7128
FILE *input;
7129
char cpuidle_buf[64];
7130
7131
input = fopen(path, "r");
7132
if (input == NULL) {
7133
if (debug)
7134
fprintf(outf, "NSFOD %s\n", path);
7135
return;
7136
}
7137
if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
7138
err(1, "%s: failed to read file", path);
7139
fclose(input);
7140
7141
fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
7142
}
7143
7144
static void probe_intel_uncore_frequency_legacy(void)
7145
{
7146
int i, j;
7147
char path[256];
7148
7149
for (i = 0; i < topo.num_packages; ++i) {
7150
for (j = 0; j <= topo.max_die_id; ++j) {
7151
int k, l;
7152
char path_base[128];
7153
7154
sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, j);
7155
7156
sprintf(path, "%s/current_freq_khz", path_base);
7157
if (access(path, R_OK))
7158
continue;
7159
7160
BIC_PRESENT(BIC_UNCORE_MHZ);
7161
7162
if (quiet)
7163
return;
7164
7165
sprintf(path, "%s/min_freq_khz", path_base);
7166
k = read_sysfs_int(path);
7167
sprintf(path, "%s/max_freq_khz", path_base);
7168
l = read_sysfs_int(path);
7169
fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
7170
7171
sprintf(path, "%s/initial_min_freq_khz", path_base);
7172
k = read_sysfs_int(path);
7173
sprintf(path, "%s/initial_max_freq_khz", path_base);
7174
l = read_sysfs_int(path);
7175
fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
7176
7177
sprintf(path, "%s/current_freq_khz", path_base);
7178
k = read_sysfs_int(path);
7179
fprintf(outf, " %d MHz\n", k / 1000);
7180
}
7181
}
7182
}
7183
7184
static void probe_intel_uncore_frequency_cluster(void)
7185
{
7186
int i, uncore_max_id;
7187
char path[256];
7188
char path_base[128];
7189
7190
if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
7191
return;
7192
7193
for (uncore_max_id = 0;; ++uncore_max_id) {
7194
7195
sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id);
7196
7197
/* uncore## start at 00 and skips no numbers, so stop upon first missing */
7198
if (access(path_base, R_OK)) {
7199
uncore_max_id -= 1;
7200
break;
7201
}
7202
}
7203
for (i = uncore_max_id; i >= 0; --i) {
7204
int k, l;
7205
int unc_pkg_id, domain_id, cluster_id;
7206
char name_buf[16];
7207
7208
sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
7209
7210
if (access(path_base, R_OK))
7211
err(1, "%s: %s", __func__, path_base);
7212
7213
sprintf(path, "%s/package_id", path_base);
7214
unc_pkg_id = read_sysfs_int(path);
7215
7216
sprintf(path, "%s/domain_id", path_base);
7217
domain_id = read_sysfs_int(path);
7218
7219
sprintf(path, "%s/fabric_cluster_id", path_base);
7220
cluster_id = read_sysfs_int(path);
7221
7222
sprintf(path, "%s/current_freq_khz", path_base);
7223
sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
7224
7225
/*
7226
* Once add_couter() is called, that counter is always read
7227
* and reported -- So it is effectively (enabled & present).
7228
* Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz)
7229
* is (enabled). Since we are in this routine, we
7230
* know we will not probe and set (present) the legacy counter.
7231
*
7232
* This allows "--show/--hide UncMHz" to be effective for
7233
* the clustered MHz counters, as a group.
7234
*/
7235
if BIC_IS_ENABLED
7236
(BIC_UNCORE_MHZ)
7237
add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, unc_pkg_id);
7238
7239
if (quiet)
7240
continue;
7241
7242
sprintf(path, "%s/min_freq_khz", path_base);
7243
k = read_sysfs_int(path);
7244
sprintf(path, "%s/max_freq_khz", path_base);
7245
l = read_sysfs_int(path);
7246
fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", unc_pkg_id, domain_id, cluster_id, k / 1000, l / 1000);
7247
7248
sprintf(path, "%s/initial_min_freq_khz", path_base);
7249
k = read_sysfs_int(path);
7250
sprintf(path, "%s/initial_max_freq_khz", path_base);
7251
l = read_sysfs_int(path);
7252
fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
7253
7254
sprintf(path, "%s/current_freq_khz", path_base);
7255
k = read_sysfs_int(path);
7256
fprintf(outf, " %d MHz\n", k / 1000);
7257
}
7258
}
7259
7260
static void probe_intel_uncore_frequency(void)
7261
{
7262
if (!genuine_intel)
7263
return;
7264
7265
if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK) == 0)
7266
probe_intel_uncore_frequency_cluster();
7267
else
7268
probe_intel_uncore_frequency_legacy();
7269
}
7270
7271
static void set_graphics_fp(char *path, int idx)
7272
{
7273
if (!access(path, R_OK))
7274
gfx_info[idx].fp = fopen_or_die(path, "r");
7275
}
7276
7277
/* Enlarge this if there are /sys/class/drm/card2 ... */
7278
#define GFX_MAX_CARDS 2
7279
7280
static void probe_graphics(void)
7281
{
7282
char path[PATH_MAX];
7283
int i;
7284
7285
/* Xe graphics sysfs knobs */
7286
if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
7287
FILE *fp;
7288
char buf[8];
7289
bool gt0_is_gt;
7290
7291
fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
7292
if (!fp)
7293
goto next;
7294
7295
if (!fread(buf, sizeof(char), 7, fp)) {
7296
fclose(fp);
7297
goto next;
7298
}
7299
fclose(fp);
7300
7301
if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
7302
gt0_is_gt = true;
7303
else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
7304
gt0_is_gt = false;
7305
else
7306
goto next;
7307
7308
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6);
7309
7310
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz);
7311
7312
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz);
7313
7314
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6);
7315
7316
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz);
7317
7318
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz);
7319
7320
goto end;
7321
}
7322
7323
next:
7324
/* New i915 graphics sysfs knobs */
7325
for (i = 0; i < GFX_MAX_CARDS; i++) {
7326
snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i);
7327
if (!access(path, R_OK))
7328
break;
7329
}
7330
7331
if (i == GFX_MAX_CARDS)
7332
goto legacy_i915;
7333
7334
snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i);
7335
set_graphics_fp(path, GFX_rc6);
7336
7337
snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i);
7338
set_graphics_fp(path, GFX_MHz);
7339
7340
snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i);
7341
set_graphics_fp(path, GFX_ACTMHz);
7342
7343
snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i);
7344
set_graphics_fp(path, SAM_mc6);
7345
7346
snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i);
7347
set_graphics_fp(path, SAM_MHz);
7348
7349
snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i);
7350
set_graphics_fp(path, SAM_ACTMHz);
7351
7352
goto end;
7353
7354
legacy_i915:
7355
/* Fall back to traditional i915 graphics sysfs knobs */
7356
set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6);
7357
7358
set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz);
7359
if (!gfx_info[GFX_MHz].fp)
7360
set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz);
7361
7362
set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz);
7363
if (!gfx_info[GFX_ACTMHz].fp)
7364
set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz);
7365
7366
end:
7367
if (gfx_info[GFX_rc6].fp)
7368
BIC_PRESENT(BIC_GFX_rc6);
7369
if (gfx_info[GFX_MHz].fp)
7370
BIC_PRESENT(BIC_GFXMHz);
7371
if (gfx_info[GFX_ACTMHz].fp)
7372
BIC_PRESENT(BIC_GFXACTMHz);
7373
if (gfx_info[SAM_mc6].fp)
7374
BIC_PRESENT(BIC_SAM_mc6);
7375
if (gfx_info[SAM_MHz].fp)
7376
BIC_PRESENT(BIC_SAMMHz);
7377
if (gfx_info[SAM_ACTMHz].fp)
7378
BIC_PRESENT(BIC_SAMACTMHz);
7379
}
7380
7381
static void dump_sysfs_cstate_config(void)
7382
{
7383
char path[64];
7384
char name_buf[16];
7385
char desc[64];
7386
FILE *input;
7387
int state;
7388
char *sp;
7389
7390
if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
7391
fprintf(outf, "cpuidle not loaded\n");
7392
return;
7393
}
7394
7395
dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
7396
dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
7397
dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
7398
7399
for (state = 0; state < 10; ++state) {
7400
7401
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", master_cpu, state);
7402
input = fopen(path, "r");
7403
if (input == NULL)
7404
continue;
7405
if (!fgets(name_buf, sizeof(name_buf), input))
7406
err(1, "%s: failed to read file", path);
7407
7408
/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
7409
sp = strchr(name_buf, '-');
7410
if (!sp)
7411
sp = strchrnul(name_buf, '\n');
7412
*sp = '\0';
7413
fclose(input);
7414
7415
remove_underbar(name_buf);
7416
7417
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", master_cpu, state);
7418
input = fopen(path, "r");
7419
if (input == NULL)
7420
continue;
7421
if (!fgets(desc, sizeof(desc), input))
7422
err(1, "%s: failed to read file", path);
7423
7424
fprintf(outf, "cpu%d: %s: %s", master_cpu, name_buf, desc);
7425
fclose(input);
7426
}
7427
}
7428
7429
static void dump_sysfs_pstate_config(void)
7430
{
7431
char path[64];
7432
char driver_buf[64];
7433
char governor_buf[64];
7434
FILE *input;
7435
int turbo;
7436
7437
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", master_cpu);
7438
input = fopen(path, "r");
7439
if (input == NULL) {
7440
fprintf(outf, "NSFOD %s\n", path);
7441
return;
7442
}
7443
if (!fgets(driver_buf, sizeof(driver_buf), input))
7444
err(1, "%s: failed to read file", path);
7445
fclose(input);
7446
7447
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", master_cpu);
7448
input = fopen(path, "r");
7449
if (input == NULL) {
7450
fprintf(outf, "NSFOD %s\n", path);
7451
return;
7452
}
7453
if (!fgets(governor_buf, sizeof(governor_buf), input))
7454
err(1, "%s: failed to read file", path);
7455
fclose(input);
7456
7457
fprintf(outf, "cpu%d: cpufreq driver: %s", master_cpu, driver_buf);
7458
fprintf(outf, "cpu%d: cpufreq governor: %s", master_cpu, governor_buf);
7459
7460
sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
7461
input = fopen(path, "r");
7462
if (input != NULL) {
7463
if (fscanf(input, "%d", &turbo) != 1)
7464
err(1, "%s: failed to parse number from file", path);
7465
fprintf(outf, "cpufreq boost: %d\n", turbo);
7466
fclose(input);
7467
}
7468
7469
sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
7470
input = fopen(path, "r");
7471
if (input != NULL) {
7472
if (fscanf(input, "%d", &turbo) != 1)
7473
err(1, "%s: failed to parse number from file", path);
7474
fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
7475
fclose(input);
7476
}
7477
}
7478
7479
/*
7480
* print_epb()
7481
* Decode the ENERGY_PERF_BIAS MSR
7482
*/
7483
int print_epb(PER_THREAD_PARAMS)
7484
{
7485
char *epb_string;
7486
int cpu, epb;
7487
7488
UNUSED(c);
7489
UNUSED(p);
7490
7491
if (!has_epb)
7492
return 0;
7493
7494
cpu = t->cpu_id;
7495
7496
/* EPB is per-package */
7497
if (!is_cpu_first_thread_in_package(t, c, p))
7498
return 0;
7499
7500
if (cpu_migrate(cpu)) {
7501
fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
7502
return -1;
7503
}
7504
7505
epb = get_epb(cpu);
7506
if (epb < 0)
7507
return 0;
7508
7509
switch (epb) {
7510
case ENERGY_PERF_BIAS_PERFORMANCE:
7511
epb_string = "performance";
7512
break;
7513
case ENERGY_PERF_BIAS_NORMAL:
7514
epb_string = "balanced";
7515
break;
7516
case ENERGY_PERF_BIAS_POWERSAVE:
7517
epb_string = "powersave";
7518
break;
7519
default:
7520
epb_string = "custom";
7521
break;
7522
}
7523
fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
7524
7525
return 0;
7526
}
7527
7528
/*
7529
* print_hwp()
7530
* Decode the MSR_HWP_CAPABILITIES
7531
*/
7532
int print_hwp(PER_THREAD_PARAMS)
7533
{
7534
unsigned long long msr;
7535
int cpu;
7536
7537
UNUSED(c);
7538
UNUSED(p);
7539
7540
if (no_msr)
7541
return 0;
7542
7543
if (!has_hwp)
7544
return 0;
7545
7546
cpu = t->cpu_id;
7547
7548
/* MSR_HWP_CAPABILITIES is per-package */
7549
if (!is_cpu_first_thread_in_package(t, c, p))
7550
return 0;
7551
7552
if (cpu_migrate(cpu)) {
7553
fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
7554
return -1;
7555
}
7556
7557
if (get_msr(cpu, MSR_PM_ENABLE, &msr))
7558
return 0;
7559
7560
fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
7561
7562
/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
7563
if ((msr & (1 << 0)) == 0)
7564
return 0;
7565
7566
if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
7567
return 0;
7568
7569
fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
7570
"(high %d guar %d eff %d low %d)\n",
7571
cpu, msr,
7572
(unsigned int)HWP_HIGHEST_PERF(msr),
7573
(unsigned int)HWP_GUARANTEED_PERF(msr), (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
7574
7575
if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
7576
return 0;
7577
7578
fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
7579
"(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
7580
cpu, msr,
7581
(unsigned int)(((msr) >> 0) & 0xff),
7582
(unsigned int)(((msr) >> 8) & 0xff),
7583
(unsigned int)(((msr) >> 16) & 0xff),
7584
(unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
7585
7586
if (has_hwp_pkg) {
7587
if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
7588
return 0;
7589
7590
fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
7591
"(min %d max %d des %d epp 0x%x window 0x%x)\n",
7592
cpu, msr,
7593
(unsigned int)(((msr) >> 0) & 0xff),
7594
(unsigned int)(((msr) >> 8) & 0xff),
7595
(unsigned int)(((msr) >> 16) & 0xff), (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
7596
}
7597
if (has_hwp_notify) {
7598
if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
7599
return 0;
7600
7601
fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
7602
"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
7603
}
7604
if (get_msr(cpu, MSR_HWP_STATUS, &msr))
7605
return 0;
7606
7607
fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
7608
"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
7609
7610
return 0;
7611
}
7612
7613
/*
7614
* print_perf_limit()
7615
*/
7616
int print_perf_limit(PER_THREAD_PARAMS)
7617
{
7618
unsigned long long msr;
7619
int cpu;
7620
7621
UNUSED(c);
7622
UNUSED(p);
7623
7624
if (no_msr)
7625
return 0;
7626
7627
cpu = t->cpu_id;
7628
7629
/* per-package */
7630
if (!is_cpu_first_thread_in_package(t, c, p))
7631
return 0;
7632
7633
if (cpu_migrate(cpu)) {
7634
fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
7635
return -1;
7636
}
7637
7638
if (platform->plr_msrs & PLR_CORE) {
7639
get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
7640
fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
7641
fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
7642
(msr & 1 << 15) ? "bit15, " : "",
7643
(msr & 1 << 14) ? "bit14, " : "",
7644
(msr & 1 << 13) ? "Transitions, " : "",
7645
(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
7646
(msr & 1 << 11) ? "PkgPwrL2, " : "",
7647
(msr & 1 << 10) ? "PkgPwrL1, " : "",
7648
(msr & 1 << 9) ? "CorePwr, " : "",
7649
(msr & 1 << 8) ? "Amps, " : "",
7650
(msr & 1 << 6) ? "VR-Therm, " : "",
7651
(msr & 1 << 5) ? "Auto-HWP, " : "",
7652
(msr & 1 << 4) ? "Graphics, " : "",
7653
(msr & 1 << 2) ? "bit2, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
7654
fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
7655
(msr & 1 << 31) ? "bit31, " : "",
7656
(msr & 1 << 30) ? "bit30, " : "",
7657
(msr & 1 << 29) ? "Transitions, " : "",
7658
(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
7659
(msr & 1 << 27) ? "PkgPwrL2, " : "",
7660
(msr & 1 << 26) ? "PkgPwrL1, " : "",
7661
(msr & 1 << 25) ? "CorePwr, " : "",
7662
(msr & 1 << 24) ? "Amps, " : "",
7663
(msr & 1 << 22) ? "VR-Therm, " : "",
7664
(msr & 1 << 21) ? "Auto-HWP, " : "",
7665
(msr & 1 << 20) ? "Graphics, " : "",
7666
(msr & 1 << 18) ? "bit18, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
7667
7668
}
7669
if (platform->plr_msrs & PLR_GFX) {
7670
get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
7671
fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
7672
fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
7673
(msr & 1 << 0) ? "PROCHOT, " : "",
7674
(msr & 1 << 1) ? "ThermStatus, " : "",
7675
(msr & 1 << 4) ? "Graphics, " : "",
7676
(msr & 1 << 6) ? "VR-Therm, " : "",
7677
(msr & 1 << 8) ? "Amps, " : "",
7678
(msr & 1 << 9) ? "GFXPwr, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
7679
fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
7680
(msr & 1 << 16) ? "PROCHOT, " : "",
7681
(msr & 1 << 17) ? "ThermStatus, " : "",
7682
(msr & 1 << 20) ? "Graphics, " : "",
7683
(msr & 1 << 22) ? "VR-Therm, " : "",
7684
(msr & 1 << 24) ? "Amps, " : "",
7685
(msr & 1 << 25) ? "GFXPwr, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
7686
}
7687
if (platform->plr_msrs & PLR_RING) {
7688
get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
7689
fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
7690
fprintf(outf, " (Active: %s%s%s%s%s%s)",
7691
(msr & 1 << 0) ? "PROCHOT, " : "",
7692
(msr & 1 << 1) ? "ThermStatus, " : "",
7693
(msr & 1 << 6) ? "VR-Therm, " : "",
7694
(msr & 1 << 8) ? "Amps, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
7695
fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
7696
(msr & 1 << 16) ? "PROCHOT, " : "",
7697
(msr & 1 << 17) ? "ThermStatus, " : "",
7698
(msr & 1 << 22) ? "VR-Therm, " : "",
7699
(msr & 1 << 24) ? "Amps, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
7700
}
7701
return 0;
7702
}
7703
7704
#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
7705
#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
7706
7707
double get_quirk_tdp(void)
7708
{
7709
if (platform->rapl_quirk_tdp)
7710
return platform->rapl_quirk_tdp;
7711
7712
return 135.0;
7713
}
7714
7715
double get_tdp_intel(void)
7716
{
7717
unsigned long long msr;
7718
7719
if (valid_rapl_msrs & RAPL_PKG_POWER_INFO)
7720
if (!get_msr(master_cpu, MSR_PKG_POWER_INFO, &msr))
7721
return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
7722
return get_quirk_tdp();
7723
}
7724
7725
double get_tdp_amd(void)
7726
{
7727
return get_quirk_tdp();
7728
}
7729
7730
void rapl_probe_intel(void)
7731
{
7732
unsigned long long msr;
7733
unsigned int time_unit;
7734
double tdp;
7735
7736
if (rapl_joules) {
7737
CLR_BIC(BIC_SysWatt, &bic_enabled);
7738
CLR_BIC(BIC_PkgWatt, &bic_enabled);
7739
CLR_BIC(BIC_CorWatt, &bic_enabled);
7740
CLR_BIC(BIC_RAMWatt, &bic_enabled);
7741
CLR_BIC(BIC_GFXWatt, &bic_enabled);
7742
} else {
7743
CLR_BIC(BIC_Sys_J, &bic_enabled);
7744
CLR_BIC(BIC_Pkg_J, &bic_enabled);
7745
CLR_BIC(BIC_Cor_J, &bic_enabled);
7746
CLR_BIC(BIC_RAM_J, &bic_enabled);
7747
CLR_BIC(BIC_GFX_J, &bic_enabled);
7748
}
7749
7750
if (!valid_rapl_msrs || no_msr)
7751
return;
7752
7753
if (!(valid_rapl_msrs & RAPL_PKG_PERF_STATUS))
7754
CLR_BIC(BIC_PKG__, &bic_enabled);
7755
if (!(valid_rapl_msrs & RAPL_DRAM_PERF_STATUS))
7756
CLR_BIC(BIC_RAM__, &bic_enabled);
7757
7758
/* units on package 0, verify later other packages match */
7759
if (get_msr(master_cpu, MSR_RAPL_POWER_UNIT, &msr))
7760
return;
7761
7762
rapl_power_units = 1.0 / (1 << (msr & 0xF));
7763
if (platform->has_rapl_divisor)
7764
rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
7765
else
7766
rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
7767
7768
if (platform->has_fixed_rapl_unit)
7769
rapl_dram_energy_units = (15.3 / 1000000);
7770
else
7771
rapl_dram_energy_units = rapl_energy_units;
7772
7773
if (platform->has_fixed_rapl_psys_unit)
7774
rapl_psys_energy_units = 1.0;
7775
else
7776
rapl_psys_energy_units = rapl_energy_units;
7777
7778
time_unit = msr >> 16 & 0xF;
7779
if (time_unit == 0)
7780
time_unit = 0xA;
7781
7782
rapl_time_units = 1.0 / (1 << (time_unit));
7783
7784
tdp = get_tdp_intel();
7785
7786
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
7787
if (!quiet)
7788
fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
7789
}
7790
7791
void rapl_probe_amd(void)
7792
{
7793
unsigned long long msr;
7794
double tdp;
7795
7796
if (rapl_joules) {
7797
CLR_BIC(BIC_SysWatt, &bic_enabled);
7798
CLR_BIC(BIC_CorWatt, &bic_enabled);
7799
} else {
7800
CLR_BIC(BIC_Pkg_J, &bic_enabled);
7801
CLR_BIC(BIC_Cor_J, &bic_enabled);
7802
}
7803
7804
if (!valid_rapl_msrs || no_msr)
7805
return;
7806
7807
if (get_msr(master_cpu, MSR_RAPL_PWR_UNIT, &msr))
7808
return;
7809
7810
rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
7811
rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
7812
rapl_power_units = ldexp(1.0, -(msr & 0xf));
7813
7814
tdp = get_tdp_amd();
7815
7816
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
7817
if (!quiet)
7818
fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
7819
}
7820
7821
void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
7822
{
7823
fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
7824
cpu, label,
7825
((msr >> 15) & 1) ? "EN" : "DIS",
7826
((msr >> 0) & 0x7FFF) * rapl_power_units,
7827
(1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, (((msr >> 16) & 1) ? "EN" : "DIS"));
7828
7829
return;
7830
}
7831
7832
static int fread_int(char *path, int *val)
7833
{
7834
FILE *filep;
7835
int ret;
7836
7837
filep = fopen(path, "r");
7838
if (!filep)
7839
return -1;
7840
7841
ret = fscanf(filep, "%d", val);
7842
fclose(filep);
7843
return ret;
7844
}
7845
7846
static int fread_ull(char *path, unsigned long long *val)
7847
{
7848
FILE *filep;
7849
int ret;
7850
7851
filep = fopen(path, "r");
7852
if (!filep)
7853
return -1;
7854
7855
ret = fscanf(filep, "%llu", val);
7856
fclose(filep);
7857
return ret;
7858
}
7859
7860
static int fread_str(char *path, char *buf, int size)
7861
{
7862
FILE *filep;
7863
int ret;
7864
char *cp;
7865
7866
filep = fopen(path, "r");
7867
if (!filep)
7868
return -1;
7869
7870
ret = fread(buf, 1, size, filep);
7871
fclose(filep);
7872
7873
/* replace '\n' with '\0' */
7874
cp = strchr(buf, '\n');
7875
if (cp != NULL)
7876
*cp = '\0';
7877
7878
return ret;
7879
}
7880
7881
#define PATH_RAPL_SYSFS "/sys/class/powercap"
7882
7883
static int dump_one_domain(char *domain_path)
7884
{
7885
char path[PATH_MAX];
7886
char str[PATH_MAX];
7887
unsigned long long val;
7888
int constraint;
7889
int enable;
7890
int ret;
7891
7892
snprintf(path, PATH_MAX, "%s/name", domain_path);
7893
ret = fread_str(path, str, PATH_MAX);
7894
if (ret <= 0)
7895
return -1;
7896
7897
fprintf(outf, "%s: %s", domain_path + strlen(PATH_RAPL_SYSFS) + 1, str);
7898
7899
snprintf(path, PATH_MAX, "%s/enabled", domain_path);
7900
ret = fread_int(path, &enable);
7901
if (ret <= 0)
7902
return -1;
7903
7904
if (!enable) {
7905
fputs(" disabled\n", outf);
7906
return 0;
7907
}
7908
7909
for (constraint = 0;; constraint++) {
7910
snprintf(path, PATH_MAX, "%s/constraint_%d_time_window_us", domain_path, constraint);
7911
ret = fread_ull(path, &val);
7912
if (ret <= 0)
7913
break;
7914
7915
if (val > 1000000)
7916
fprintf(outf, " %0.1fs", (double)val / 1000000);
7917
else if (val > 1000)
7918
fprintf(outf, " %0.1fms", (double)val / 1000);
7919
else
7920
fprintf(outf, " %0.1fus", (double)val);
7921
7922
snprintf(path, PATH_MAX, "%s/constraint_%d_power_limit_uw", domain_path, constraint);
7923
ret = fread_ull(path, &val);
7924
if (ret > 0 && val)
7925
fprintf(outf, ":%lluW", val / 1000000);
7926
7927
snprintf(path, PATH_MAX, "%s/constraint_%d_max_power_uw", domain_path, constraint);
7928
ret = fread_ull(path, &val);
7929
if (ret > 0 && val)
7930
fprintf(outf, ",max:%lluW", val / 1000000);
7931
}
7932
fputc('\n', outf);
7933
7934
return 0;
7935
}
7936
7937
static int print_rapl_sysfs(void)
7938
{
7939
DIR *dir, *cdir;
7940
struct dirent *entry, *centry;
7941
char path[PATH_MAX];
7942
char str[PATH_MAX];
7943
7944
if ((dir = opendir(PATH_RAPL_SYSFS)) == NULL) {
7945
warn("open %s failed", PATH_RAPL_SYSFS);
7946
return 1;
7947
}
7948
7949
while ((entry = readdir(dir)) != NULL) {
7950
if (strlen(entry->d_name) > 100)
7951
continue;
7952
7953
if (strncmp(entry->d_name, "intel-rapl", strlen("intel-rapl")))
7954
continue;
7955
7956
snprintf(path, PATH_MAX, "%s/%s/name", PATH_RAPL_SYSFS, entry->d_name);
7957
7958
/* Parse top level domains first, including package and psys */
7959
fread_str(path, str, PATH_MAX);
7960
if (strncmp(str, "package", strlen("package")) && strncmp(str, "psys", strlen("psys")))
7961
continue;
7962
7963
snprintf(path, PATH_MAX, "%s/%s", PATH_RAPL_SYSFS, entry->d_name);
7964
if ((cdir = opendir(path)) == NULL) {
7965
perror("opendir() error");
7966
return 1;
7967
}
7968
7969
dump_one_domain(path);
7970
7971
while ((centry = readdir(cdir)) != NULL) {
7972
if (strncmp(centry->d_name, "intel-rapl", strlen("intel-rapl")))
7973
continue;
7974
snprintf(path, PATH_MAX, "%s/%s/%s", PATH_RAPL_SYSFS, entry->d_name, centry->d_name);
7975
dump_one_domain(path);
7976
}
7977
closedir(cdir);
7978
}
7979
7980
closedir(dir);
7981
return 0;
7982
}
7983
7984
int print_rapl(PER_THREAD_PARAMS)
7985
{
7986
unsigned long long msr;
7987
const char *msr_name;
7988
int cpu;
7989
7990
UNUSED(c);
7991
UNUSED(p);
7992
7993
if (!valid_rapl_msrs)
7994
return 0;
7995
7996
/* RAPL counters are per package, so print only for 1st thread/package */
7997
if (!is_cpu_first_thread_in_package(t, c, p))
7998
return 0;
7999
8000
cpu = t->cpu_id;
8001
if (cpu_migrate(cpu)) {
8002
fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
8003
return -1;
8004
}
8005
8006
if (valid_rapl_msrs & RAPL_AMD_F17H) {
8007
msr_name = "MSR_RAPL_PWR_UNIT";
8008
if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
8009
return -1;
8010
} else {
8011
msr_name = "MSR_RAPL_POWER_UNIT";
8012
if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
8013
return -1;
8014
}
8015
8016
fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, rapl_power_units, rapl_energy_units, rapl_time_units);
8017
8018
if (valid_rapl_msrs & RAPL_PKG_POWER_INFO) {
8019
8020
if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
8021
return -5;
8022
8023
fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
8024
cpu, msr,
8025
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
8026
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
8027
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
8028
8029
}
8030
if (valid_rapl_msrs & RAPL_PKG) {
8031
8032
if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
8033
return -9;
8034
8035
fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 63) & 1 ? "" : "UN");
8036
8037
print_power_limit_msr(cpu, msr, "PKG Limit #1");
8038
fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
8039
cpu,
8040
((msr >> 47) & 1) ? "EN" : "DIS",
8041
((msr >> 32) & 0x7FFF) * rapl_power_units,
8042
(1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, ((msr >> 48) & 1) ? "EN" : "DIS");
8043
8044
if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
8045
return -9;
8046
8047
fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
8048
fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
8049
}
8050
8051
if (valid_rapl_msrs & RAPL_DRAM_POWER_INFO) {
8052
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
8053
return -6;
8054
8055
fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
8056
cpu, msr,
8057
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
8058
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
8059
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
8060
}
8061
if (valid_rapl_msrs & RAPL_DRAM) {
8062
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
8063
return -9;
8064
fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN");
8065
8066
print_power_limit_msr(cpu, msr, "DRAM Limit");
8067
}
8068
if (valid_rapl_msrs & RAPL_CORE_POLICY) {
8069
if (get_msr(cpu, MSR_PP0_POLICY, &msr))
8070
return -7;
8071
8072
fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
8073
}
8074
if (valid_rapl_msrs & RAPL_CORE_POWER_LIMIT) {
8075
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
8076
return -9;
8077
fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN");
8078
print_power_limit_msr(cpu, msr, "Cores Limit");
8079
}
8080
if (valid_rapl_msrs & RAPL_GFX) {
8081
if (get_msr(cpu, MSR_PP1_POLICY, &msr))
8082
return -8;
8083
8084
fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
8085
8086
if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
8087
return -9;
8088
fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN");
8089
print_power_limit_msr(cpu, msr, "GFX Limit");
8090
}
8091
return 0;
8092
}
8093
8094
/*
8095
* probe_rapl_msrs
8096
*
8097
* initialize global valid_rapl_msrs to platform->plat_rapl_msrs
8098
* only if PKG_ENERGY counter is enumerated and reads non-zero
8099
*/
8100
void probe_rapl_msrs(void)
8101
{
8102
int ret;
8103
off_t offset;
8104
unsigned long long msr_value;
8105
8106
if (no_msr)
8107
return;
8108
8109
if ((platform->plat_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H)) == 0)
8110
return;
8111
8112
offset = idx_to_offset(IDX_PKG_ENERGY);
8113
if (offset < 0)
8114
return;
8115
8116
ret = get_msr(master_cpu, offset, &msr_value);
8117
if (ret) {
8118
if (debug)
8119
fprintf(outf, "Can not read RAPL_PKG_ENERGY MSR(0x%llx)\n", (unsigned long long)offset);
8120
return;
8121
}
8122
if (msr_value == 0) {
8123
if (debug)
8124
fprintf(outf, "RAPL_PKG_ENERGY MSR(0x%llx) == ZERO: disabling all RAPL MSRs\n", (unsigned long long)offset);
8125
return;
8126
}
8127
8128
valid_rapl_msrs = platform->plat_rapl_msrs; /* success */
8129
}
8130
8131
/*
8132
* probe_rapl()
8133
*
8134
* sets rapl_power_units, rapl_energy_units, rapl_time_units
8135
*/
8136
void probe_rapl(void)
8137
{
8138
probe_rapl_msrs();
8139
8140
if (genuine_intel)
8141
rapl_probe_intel();
8142
if (authentic_amd || hygon_genuine)
8143
rapl_probe_amd();
8144
8145
if (quiet)
8146
return;
8147
8148
print_rapl_sysfs();
8149
8150
if (!valid_rapl_msrs || no_msr)
8151
return;
8152
8153
for_all_cpus(print_rapl, ODD_COUNTERS);
8154
}
8155
8156
/*
8157
* MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
8158
* the Thermal Control Circuit (TCC) activates.
8159
* This is usually equal to tjMax.
8160
*
8161
* Older processors do not have this MSR, so there we guess,
8162
* but also allow cmdline over-ride with -T.
8163
*
8164
* Several MSR temperature values are in units of degrees-C
8165
* below this value, including the Digital Thermal Sensor (DTS),
8166
* Package Thermal Management Sensor (PTM), and thermal event thresholds.
8167
*/
8168
int set_temperature_target(PER_THREAD_PARAMS)
8169
{
8170
unsigned long long msr;
8171
unsigned int tcc_default, tcc_offset;
8172
int cpu;
8173
8174
UNUSED(c);
8175
UNUSED(p);
8176
8177
/* tj_max is used only for dts or ptm */
8178
if (!(do_dts || do_ptm))
8179
return 0;
8180
8181
/* this is a per-package concept */
8182
if (!is_cpu_first_thread_in_package(t, c, p))
8183
return 0;
8184
8185
cpu = t->cpu_id;
8186
if (cpu_migrate(cpu)) {
8187
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
8188
return -1;
8189
}
8190
8191
if (tj_max_override != 0) {
8192
tj_max = tj_max_override;
8193
fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max);
8194
return 0;
8195
}
8196
8197
/* Temperature Target MSR is Nehalem and newer only */
8198
if (!platform->has_nhm_msrs || no_msr)
8199
goto guess;
8200
8201
if (get_msr(master_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
8202
goto guess;
8203
8204
tcc_default = (msr >> 16) & 0xFF;
8205
8206
if (!quiet) {
8207
int bits = platform->tcc_offset_bits;
8208
unsigned long long enabled = 0;
8209
8210
if (bits && !get_msr(master_cpu, MSR_PLATFORM_INFO, &enabled))
8211
enabled = (enabled >> 30) & 1;
8212
8213
if (bits && enabled) {
8214
tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0);
8215
fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
8216
cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
8217
} else {
8218
fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
8219
}
8220
}
8221
8222
if (!tcc_default)
8223
goto guess;
8224
8225
tj_max = tcc_default;
8226
8227
return 0;
8228
8229
guess:
8230
tj_max = TJMAX_DEFAULT;
8231
fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
8232
8233
return 0;
8234
}
8235
8236
int print_thermal(PER_THREAD_PARAMS)
8237
{
8238
unsigned long long msr;
8239
unsigned int dts, dts2;
8240
int cpu;
8241
8242
UNUSED(c);
8243
UNUSED(p);
8244
8245
if (no_msr)
8246
return 0;
8247
8248
if (!(do_dts || do_ptm))
8249
return 0;
8250
8251
cpu = t->cpu_id;
8252
8253
/* DTS is per-core, no need to print for each thread */
8254
if (!is_cpu_first_thread_in_core(t, c))
8255
return 0;
8256
8257
if (cpu_migrate(cpu)) {
8258
fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
8259
return -1;
8260
}
8261
8262
if (do_ptm && is_cpu_first_core_in_package(t, p)) {
8263
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
8264
return 0;
8265
8266
dts = (msr >> 16) & 0x7F;
8267
fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
8268
8269
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
8270
return 0;
8271
8272
dts = (msr >> 16) & 0x7F;
8273
dts2 = (msr >> 8) & 0x7F;
8274
fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2);
8275
}
8276
8277
if (do_dts && debug) {
8278
unsigned int resolution;
8279
8280
if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
8281
return 0;
8282
8283
dts = (msr >> 16) & 0x7F;
8284
resolution = (msr >> 27) & 0xF;
8285
fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", cpu, msr, tj_max - dts, resolution);
8286
8287
if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
8288
return 0;
8289
8290
dts = (msr >> 16) & 0x7F;
8291
dts2 = (msr >> 8) & 0x7F;
8292
fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2);
8293
}
8294
8295
return 0;
8296
}
8297
8298
void probe_thermal(void)
8299
{
8300
if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
8301
BIC_PRESENT(BIC_CORE_THROT_CNT);
8302
else
8303
BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
8304
8305
for_all_cpus(set_temperature_target, ODD_COUNTERS);
8306
8307
if (quiet)
8308
return;
8309
8310
for_all_cpus(print_thermal, ODD_COUNTERS);
8311
}
8312
8313
int get_cpu_type(PER_THREAD_PARAMS)
8314
{
8315
unsigned int eax, ebx, ecx, edx;
8316
8317
UNUSED(c);
8318
UNUSED(p);
8319
8320
if (!genuine_intel)
8321
return 0;
8322
8323
if (cpu_migrate(t->cpu_id)) {
8324
fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
8325
return -1;
8326
}
8327
8328
if (max_level < 0x1a)
8329
return 0;
8330
8331
__cpuid(0x1a, eax, ebx, ecx, edx);
8332
eax = (eax >> 24) & 0xFF;
8333
if (eax == 0x20)
8334
t->is_atom = true;
8335
return 0;
8336
}
8337
8338
void decode_feature_control_msr(void)
8339
{
8340
unsigned long long msr;
8341
8342
if (no_msr)
8343
return;
8344
8345
if (quiet)
8346
return;
8347
8348
if (!get_msr(master_cpu, MSR_IA32_FEAT_CTL, &msr))
8349
fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
8350
master_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
8351
}
8352
8353
void decode_misc_enable_msr(void)
8354
{
8355
unsigned long long msr;
8356
8357
if (no_msr)
8358
return;
8359
8360
if (!genuine_intel)
8361
return;
8362
8363
if (!get_msr(master_cpu, MSR_IA32_MISC_ENABLE, &msr))
8364
fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
8365
master_cpu, msr,
8366
msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
8367
msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
8368
msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
8369
msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
8370
}
8371
8372
void decode_misc_feature_control(void)
8373
{
8374
unsigned long long msr;
8375
8376
if (no_msr)
8377
return;
8378
8379
if (!platform->has_msr_misc_feature_control)
8380
return;
8381
8382
if (!get_msr(master_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
8383
fprintf(outf,
8384
"cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
8385
master_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : "");
8386
}
8387
8388
/*
8389
* Decode MSR_MISC_PWR_MGMT
8390
*
8391
* Decode the bits according to the Nehalem documentation
8392
* bit[0] seems to continue to have same meaning going forward
8393
* bit[1] less so...
8394
*/
8395
void decode_misc_pwr_mgmt_msr(void)
8396
{
8397
unsigned long long msr;
8398
8399
if (no_msr)
8400
return;
8401
8402
if (!platform->has_msr_misc_pwr_mgmt)
8403
return;
8404
8405
if (!get_msr(master_cpu, MSR_MISC_PWR_MGMT, &msr))
8406
fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
8407
master_cpu, msr, msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
8408
}
8409
8410
/*
8411
* Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
8412
*
8413
* This MSRs are present on Silvermont processors,
8414
* Intel Atom processor E3000 series (Baytrail), and friends.
8415
*/
8416
void decode_c6_demotion_policy_msr(void)
8417
{
8418
unsigned long long msr;
8419
8420
if (no_msr)
8421
return;
8422
8423
if (!platform->has_msr_c6_demotion_policy_config)
8424
return;
8425
8426
if (!get_msr(master_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
8427
fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", master_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
8428
8429
if (!get_msr(master_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
8430
fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", master_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
8431
}
8432
8433
void print_dev_latency(void)
8434
{
8435
char *path = "/dev/cpu_dma_latency";
8436
int fd;
8437
int value;
8438
int retval;
8439
8440
fd = open(path, O_RDONLY);
8441
if (fd < 0) {
8442
if (debug)
8443
warnx("Read %s failed", path);
8444
return;
8445
}
8446
8447
retval = read(fd, (void *)&value, sizeof(int));
8448
if (retval != sizeof(int)) {
8449
warn("read failed %s", path);
8450
close(fd);
8451
return;
8452
}
8453
fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained");
8454
8455
close(fd);
8456
}
8457
8458
static int has_perf_instr_count_access(void)
8459
{
8460
int fd;
8461
8462
if (no_perf)
8463
return 0;
8464
8465
fd = open_perf_counter(master_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
8466
if (fd != -1)
8467
close(fd);
8468
8469
if (fd == -1)
8470
warnx("Failed to access %s. Some of the counters may not be available\n"
8471
"\tRun as root to enable them or use %s to disable the access explicitly", "perf instructions retired counter",
8472
"'--hide IPC' or '--no-perf'");
8473
8474
return (fd != -1);
8475
}
8476
8477
int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, double *scale_, enum rapl_unit *unit_)
8478
{
8479
int ret = -1;
8480
8481
if (no_perf)
8482
return -1;
8483
8484
if (!cai->perf_name)
8485
return -1;
8486
8487
const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name);
8488
8489
if (scale == 0.0)
8490
goto end;
8491
8492
const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
8493
8494
if (unit == RAPL_UNIT_INVALID)
8495
goto end;
8496
8497
const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
8498
const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name);
8499
8500
ret = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
8501
if (ret == -1)
8502
goto end;
8503
8504
/* If it's the first counter opened, make it a group descriptor */
8505
if (rci->fd_perf == -1)
8506
rci->fd_perf = ret;
8507
8508
*scale_ = scale;
8509
*unit_ = unit;
8510
8511
end:
8512
if (debug >= 2)
8513
fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
8514
8515
return ret;
8516
}
8517
8518
char cpuset_buf[1024];
8519
int initialize_cpu_set_from_sysfs(cpu_set_t *cpu_set, char *sysfs_path, char *sysfs_file)
8520
{
8521
FILE *fp;
8522
char path[128];
8523
8524
if (snprintf(path, 128, "%s/%s", sysfs_path, sysfs_file) > 128)
8525
err(-1, "%s %s", sysfs_path, sysfs_file);
8526
8527
fp = fopen(path, "r");
8528
if (!fp) {
8529
warn("open %s", path);
8530
return -1;
8531
}
8532
if (fread(cpuset_buf, sizeof(char), 1024, fp) == 0) {
8533
warn("read %s", sysfs_path);
8534
goto err;
8535
}
8536
if (parse_cpu_str(cpuset_buf, cpu_set, cpu_possible_setsize)) {
8537
warnx("%s: cpu str malformat %s\n", sysfs_path, cpu_effective_str);
8538
goto err;
8539
}
8540
return 0;
8541
8542
err:
8543
fclose(fp);
8544
return -1;
8545
}
8546
8547
void print_cpu_set(char *s, cpu_set_t *set)
8548
{
8549
int i;
8550
8551
assert(MAX_BIC < CPU_SETSIZE);
8552
8553
printf("%s:", s);
8554
8555
for (i = 0; i <= topo.max_cpu_num; ++i)
8556
if (CPU_ISSET(i, set))
8557
printf(" %d", i);
8558
putchar('\n');
8559
}
8560
8561
void linux_perf_init_hybrid_cpus(void)
8562
{
8563
char *perf_cpu_pcore_path = "/sys/devices/cpu_core";
8564
char *perf_cpu_ecore_path = "/sys/devices/cpu_atom";
8565
char *perf_cpu_lcore_path = "/sys/devices/cpu_lowpower";
8566
char path[128];
8567
8568
if (!access(perf_cpu_pcore_path, F_OK)) {
8569
perf_pcore_set = CPU_ALLOC((topo.max_cpu_num + 1));
8570
if (perf_pcore_set == NULL)
8571
err(3, "CPU_ALLOC");
8572
CPU_ZERO_S(cpu_possible_setsize, perf_pcore_set);
8573
initialize_cpu_set_from_sysfs(perf_pcore_set, perf_cpu_pcore_path, "cpus");
8574
if (debug)
8575
print_cpu_set("perf pcores", perf_pcore_set);
8576
sprintf(path, "%s/%s", perf_cpu_pcore_path, "type");
8577
perf_pmu_types.pcore = snapshot_sysfs_counter(path);
8578
}
8579
8580
if (!access(perf_cpu_ecore_path, F_OK)) {
8581
perf_ecore_set = CPU_ALLOC((topo.max_cpu_num + 1));
8582
if (perf_ecore_set == NULL)
8583
err(3, "CPU_ALLOC");
8584
CPU_ZERO_S(cpu_possible_setsize, perf_ecore_set);
8585
initialize_cpu_set_from_sysfs(perf_ecore_set, perf_cpu_ecore_path, "cpus");
8586
if (debug)
8587
print_cpu_set("perf ecores", perf_ecore_set);
8588
sprintf(path, "%s/%s", perf_cpu_ecore_path, "type");
8589
perf_pmu_types.ecore = snapshot_sysfs_counter(path);
8590
}
8591
8592
if (!access(perf_cpu_lcore_path, F_OK)) {
8593
perf_lcore_set = CPU_ALLOC((topo.max_cpu_num + 1));
8594
if (perf_lcore_set == NULL)
8595
err(3, "CPU_ALLOC");
8596
CPU_ZERO_S(cpu_possible_setsize, perf_lcore_set);
8597
initialize_cpu_set_from_sysfs(perf_lcore_set, perf_cpu_lcore_path, "cpus");
8598
if (debug)
8599
print_cpu_set("perf lcores", perf_lcore_set);
8600
sprintf(path, "%s/%s", perf_cpu_lcore_path, "type");
8601
perf_pmu_types.lcore = snapshot_sysfs_counter(path);
8602
}
8603
}
8604
8605
/*
8606
* Linux-perf related initialization
8607
*/
8608
void linux_perf_init(void)
8609
{
8610
char path[128];
8611
char *perf_cpu_path = "/sys/devices/cpu";
8612
8613
if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
8614
return;
8615
8616
if (!access(perf_cpu_path, F_OK)) {
8617
sprintf(path, "%s/%s", perf_cpu_path, "type");
8618
perf_pmu_types.uniform = snapshot_sysfs_counter(path);
8619
} else {
8620
linux_perf_init_hybrid_cpus();
8621
}
8622
8623
if (BIC_IS_ENABLED(BIC_IPC) && cpuid_has_aperf_mperf) {
8624
fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
8625
if (fd_instr_count_percpu == NULL)
8626
err(-1, "calloc fd_instr_count_percpu");
8627
}
8628
if (BIC_IS_ENABLED(BIC_LLC_MRPS) || BIC_IS_ENABLED(BIC_LLC_HIT)) {
8629
fd_llc_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
8630
if (fd_llc_percpu == NULL)
8631
err(-1, "calloc fd_llc_percpu");
8632
}
8633
if (BIC_IS_ENABLED(BIC_L2_MRPS) || BIC_IS_ENABLED(BIC_L2_HIT)) {
8634
fd_l2_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
8635
if (fd_l2_percpu == NULL)
8636
err(-1, "calloc fd_l2_percpu");
8637
}
8638
}
8639
8640
void rapl_perf_init(void)
8641
{
8642
const unsigned int num_domains = get_rapl_num_domains();
8643
bool *domain_visited = calloc(num_domains, sizeof(bool));
8644
8645
rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
8646
if (rapl_counter_info_perdomain == NULL)
8647
err(-1, "calloc rapl_counter_info_percpu");
8648
rapl_counter_info_perdomain_size = num_domains;
8649
8650
/*
8651
* Initialize rapl_counter_info_percpu
8652
*/
8653
for (unsigned int domain_id = 0; domain_id < num_domains; ++domain_id) {
8654
struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
8655
8656
rci->fd_perf = -1;
8657
for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
8658
rci->data[i] = 0;
8659
rci->source[i] = COUNTER_SOURCE_NONE;
8660
}
8661
}
8662
8663
/*
8664
* Open/probe the counters
8665
* If can't get it via perf, fallback to MSR
8666
*/
8667
for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
8668
8669
const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
8670
bool has_counter = 0;
8671
double scale;
8672
enum rapl_unit unit;
8673
unsigned int next_domain;
8674
8675
if (!BIC_IS_ENABLED(cai->bic_number))
8676
continue;
8677
8678
memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
8679
8680
for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
8681
8682
if (cpu_is_not_allowed(cpu))
8683
continue;
8684
8685
/* Skip already seen and handled RAPL domains */
8686
next_domain = get_rapl_domain_id(cpu);
8687
8688
assert(next_domain < num_domains);
8689
8690
if (domain_visited[next_domain])
8691
continue;
8692
8693
domain_visited[next_domain] = 1;
8694
8695
if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != master_cpu))
8696
continue;
8697
8698
struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
8699
8700
/*
8701
* rapl_counter_arch_infos[] can have multiple entries describing the same
8702
* counter, due to the difference from different platforms/Vendors.
8703
* E.g. rapl_counter_arch_infos[0] and rapl_counter_arch_infos[1] share the
8704
* same perf_subsys and perf_name, but with different MSR address.
8705
* rapl_counter_arch_infos[0] is for Intel and rapl_counter_arch_infos[1]
8706
* is for AMD.
8707
* In this case, it is possible that multiple rapl_counter_arch_infos[]
8708
* entries are probed just because their perf/msr is duplicate and valid.
8709
*
8710
* Thus need a check to avoid re-probe the same counters.
8711
*/
8712
if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE)
8713
break;
8714
8715
/* Use perf API for this counter */
8716
if (add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
8717
rci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
8718
rci->scale[cai->rci_index] = scale * cai->compat_scale;
8719
rci->unit[cai->rci_index] = unit;
8720
rci->flags[cai->rci_index] = cai->flags;
8721
8722
/* Use MSR for this counter */
8723
} else if (add_rapl_msr_counter(cpu, cai) >= 0) {
8724
rci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
8725
rci->msr[cai->rci_index] = cai->msr;
8726
rci->msr_mask[cai->rci_index] = cai->msr_mask;
8727
rci->msr_shift[cai->rci_index] = cai->msr_shift;
8728
rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
8729
rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
8730
rci->flags[cai->rci_index] = cai->flags;
8731
}
8732
8733
if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE)
8734
has_counter = 1;
8735
}
8736
8737
/* If any CPU has access to the counter, make it present */
8738
if (has_counter)
8739
BIC_PRESENT(cai->bic_number);
8740
}
8741
8742
free(domain_visited);
8743
}
8744
8745
/* Assumes msr_counter_info is populated */
8746
static int has_amperf_access(void)
8747
{
8748
return cpuid_has_aperf_mperf && msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present;
8749
}
8750
8751
int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name)
8752
{
8753
if (strcmp(group_name, "cstate_core") == 0)
8754
return &cci->fd_perf_core;
8755
8756
if (strcmp(group_name, "cstate_pkg") == 0)
8757
return &cci->fd_perf_pkg;
8758
8759
return NULL;
8760
}
8761
8762
int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
8763
{
8764
int ret = -1;
8765
8766
if (no_perf)
8767
return -1;
8768
8769
if (!cai->perf_name)
8770
return -1;
8771
8772
int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys);
8773
8774
if (pfd_group == NULL)
8775
goto end;
8776
8777
const unsigned int type = read_perf_type(cai->perf_subsys);
8778
const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
8779
8780
ret = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP);
8781
8782
if (ret == -1)
8783
goto end;
8784
8785
/* If it's the first counter opened, make it a group descriptor */
8786
if (*pfd_group == -1)
8787
*pfd_group = ret;
8788
8789
end:
8790
if (debug >= 2)
8791
fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
8792
8793
return ret;
8794
}
8795
8796
int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
8797
{
8798
int ret = -1;
8799
8800
if (no_perf)
8801
return -1;
8802
8803
if (!cai->perf_name)
8804
return -1;
8805
8806
const unsigned int type = read_perf_type(cai->perf_subsys);
8807
const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
8808
8809
ret = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP);
8810
8811
if (ret == -1)
8812
goto end;
8813
8814
/* If it's the first counter opened, make it a group descriptor */
8815
if (cci->fd_perf == -1)
8816
cci->fd_perf = ret;
8817
8818
end:
8819
if (debug)
8820
fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu);
8821
8822
return ret;
8823
}
8824
8825
void msr_perf_init_(void)
8826
{
8827
const int mci_num = topo.max_cpu_num + 1;
8828
8829
msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info));
8830
if (!msr_counter_info)
8831
err(1, "calloc msr_counter_info");
8832
msr_counter_info_size = mci_num;
8833
8834
for (int cpu = 0; cpu < mci_num; ++cpu)
8835
msr_counter_info[cpu].fd_perf = -1;
8836
8837
for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) {
8838
8839
struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx];
8840
8841
cai->present = false;
8842
8843
for (int cpu = 0; cpu < mci_num; ++cpu) {
8844
8845
struct msr_counter_info_t *const cci = &msr_counter_info[cpu];
8846
8847
if (cpu_is_not_allowed(cpu))
8848
continue;
8849
8850
if (cai->needed) {
8851
/* Use perf API for this counter */
8852
if (add_msr_perf_counter(cpu, cci, cai) != -1) {
8853
cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
8854
cai->present = true;
8855
8856
/* User MSR for this counter */
8857
} else if (add_msr_counter(cpu, cai->msr) >= 0) {
8858
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
8859
cci->msr[cai->rci_index] = cai->msr;
8860
cci->msr_mask[cai->rci_index] = cai->msr_mask;
8861
cai->present = true;
8862
}
8863
}
8864
}
8865
}
8866
}
8867
8868
/* Initialize data for reading perf counters from the MSR group. */
8869
void msr_perf_init(void)
8870
{
8871
bool need_amperf = false, need_smi = false;
8872
const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1);
8873
8874
need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz)
8875
|| BIC_IS_ENABLED(BIC_IPC) || need_soft_c1;
8876
8877
if (BIC_IS_ENABLED(BIC_SMI))
8878
need_smi = true;
8879
8880
/* Enable needed counters */
8881
msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf;
8882
msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf;
8883
msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi;
8884
8885
msr_perf_init_();
8886
8887
const bool has_amperf = has_amperf_access();
8888
const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present;
8889
8890
has_aperf_access = has_amperf;
8891
8892
if (has_amperf) {
8893
BIC_PRESENT(BIC_Avg_MHz);
8894
BIC_PRESENT(BIC_Busy);
8895
BIC_PRESENT(BIC_Bzy_MHz);
8896
BIC_PRESENT(BIC_SMI);
8897
}
8898
8899
if (has_smi)
8900
BIC_PRESENT(BIC_SMI);
8901
}
8902
8903
void cstate_perf_init_(bool soft_c1)
8904
{
8905
bool has_counter;
8906
bool *cores_visited = NULL, *pkg_visited = NULL;
8907
const int cores_visited_elems = topo.max_core_id + 1;
8908
const int pkg_visited_elems = topo.max_package_id + 1;
8909
const int cci_num = topo.max_cpu_num + 1;
8910
8911
ccstate_counter_info = calloc(cci_num, sizeof(*ccstate_counter_info));
8912
if (!ccstate_counter_info)
8913
err(1, "calloc ccstate_counter_arch_info");
8914
ccstate_counter_info_size = cci_num;
8915
8916
cores_visited = calloc(cores_visited_elems, sizeof(*cores_visited));
8917
if (!cores_visited)
8918
err(1, "calloc cores_visited");
8919
8920
pkg_visited = calloc(pkg_visited_elems, sizeof(*pkg_visited));
8921
if (!pkg_visited)
8922
err(1, "calloc pkg_visited");
8923
8924
/* Initialize cstate_counter_info_percpu */
8925
for (int cpu = 0; cpu < cci_num; ++cpu) {
8926
ccstate_counter_info[cpu].fd_perf_core = -1;
8927
ccstate_counter_info[cpu].fd_perf_pkg = -1;
8928
}
8929
8930
for (int cidx = 0; cidx < NUM_CSTATE_COUNTERS; ++cidx) {
8931
has_counter = false;
8932
memset(cores_visited, 0, cores_visited_elems * sizeof(*cores_visited));
8933
memset(pkg_visited, 0, pkg_visited_elems * sizeof(*pkg_visited));
8934
8935
const struct cstate_counter_arch_info *cai = &ccstate_counter_arch_infos[cidx];
8936
8937
for (int cpu = 0; cpu < cci_num; ++cpu) {
8938
8939
struct cstate_counter_info_t *const cci = &ccstate_counter_info[cpu];
8940
8941
if (cpu_is_not_allowed(cpu))
8942
continue;
8943
8944
const int core_id = cpus[cpu].core_id;
8945
const int pkg_id = cpus[cpu].package_id;
8946
8947
assert(core_id < cores_visited_elems);
8948
assert(pkg_id < pkg_visited_elems);
8949
8950
const bool per_thread = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD;
8951
const bool per_core = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_CORE;
8952
8953
if (!per_thread && cores_visited[core_id])
8954
continue;
8955
8956
if (!per_core && pkg_visited[pkg_id])
8957
continue;
8958
8959
const bool counter_needed = BIC_IS_ENABLED(cai->bic_number) || (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY));
8960
const bool counter_supported = (platform->supported_cstates & cai->feature_mask);
8961
8962
if (counter_needed && counter_supported) {
8963
/* Use perf API for this counter */
8964
if (add_cstate_perf_counter(cpu, cci, cai) != -1) {
8965
8966
cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
8967
8968
/* User MSR for this counter */
8969
} else if (pkg_cstate_limit >= cai->pkg_cstate_limit && add_msr_counter(cpu, cai->msr) >= 0) {
8970
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
8971
cci->msr[cai->rci_index] = cai->msr;
8972
}
8973
}
8974
8975
if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) {
8976
has_counter = true;
8977
cores_visited[core_id] = true;
8978
pkg_visited[pkg_id] = true;
8979
}
8980
}
8981
8982
/* If any CPU has access to the counter, make it present */
8983
if (has_counter)
8984
BIC_PRESENT(cai->bic_number);
8985
}
8986
8987
free(cores_visited);
8988
free(pkg_visited);
8989
}
8990
8991
void cstate_perf_init(void)
8992
{
8993
/*
8994
* If we don't have a C1 residency MSR, we calculate it "in software",
8995
* but we need APERF, MPERF too.
8996
*/
8997
const bool soft_c1 = !platform->has_msr_core_c1_res && has_amperf_access()
8998
&& platform->supported_cstates & CC1;
8999
9000
if (soft_c1)
9001
BIC_PRESENT(BIC_CPU_c1);
9002
9003
cstate_perf_init_(soft_c1);
9004
}
9005
9006
void probe_cstates(void)
9007
{
9008
probe_cst_limit();
9009
9010
if (platform->has_msr_module_c6_res_ms)
9011
BIC_PRESENT(BIC_Mod_c6);
9012
9013
if (platform->has_ext_cst_msrs && !no_msr) {
9014
BIC_PRESENT(BIC_Totl_c0);
9015
BIC_PRESENT(BIC_Any_c0);
9016
BIC_PRESENT(BIC_GFX_c0);
9017
BIC_PRESENT(BIC_CPUGFX);
9018
}
9019
9020
if (quiet)
9021
return;
9022
9023
dump_power_ctl();
9024
dump_cst_cfg();
9025
decode_c6_demotion_policy_msr();
9026
print_dev_latency();
9027
dump_sysfs_cstate_config();
9028
print_irtl();
9029
}
9030
9031
void probe_lpi(void)
9032
{
9033
if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
9034
BIC_PRESENT(BIC_CPU_LPI);
9035
else
9036
BIC_NOT_PRESENT(BIC_CPU_LPI);
9037
9038
if (!access(sys_lpi_file_sysfs, R_OK)) {
9039
sys_lpi_file = sys_lpi_file_sysfs;
9040
BIC_PRESENT(BIC_SYS_LPI);
9041
} else if (!access(sys_lpi_file_debugfs, R_OK)) {
9042
sys_lpi_file = sys_lpi_file_debugfs;
9043
BIC_PRESENT(BIC_SYS_LPI);
9044
} else {
9045
sys_lpi_file_sysfs = NULL;
9046
BIC_NOT_PRESENT(BIC_SYS_LPI);
9047
}
9048
9049
}
9050
9051
void probe_pstates(void)
9052
{
9053
probe_bclk();
9054
9055
if (quiet)
9056
return;
9057
9058
dump_platform_info();
9059
dump_turbo_ratio_info();
9060
dump_sysfs_pstate_config();
9061
decode_misc_pwr_mgmt_msr();
9062
9063
for_all_cpus(print_hwp, ODD_COUNTERS);
9064
for_all_cpus(print_epb, ODD_COUNTERS);
9065
for_all_cpus(print_perf_limit, ODD_COUNTERS);
9066
}
9067
9068
void dump_word_chars(unsigned int word)
9069
{
9070
int i;
9071
9072
for (i = 0; i < 4; ++i)
9073
fprintf(outf, "%c", (word >> (i * 8)) & 0xFF);
9074
}
9075
9076
void dump_cpuid_hypervisor(void)
9077
{
9078
unsigned int ebx = 0;
9079
unsigned int ecx = 0;
9080
unsigned int edx = 0;
9081
9082
__cpuid(0x40000000, max_extended_level, ebx, ecx, edx);
9083
9084
fprintf(outf, "Hypervisor: ");
9085
dump_word_chars(ebx);
9086
dump_word_chars(ecx);
9087
dump_word_chars(edx);
9088
fprintf(outf, "\n");
9089
}
9090
9091
void process_cpuid()
9092
{
9093
unsigned int eax, ebx, ecx, edx;
9094
unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
9095
unsigned long long ucode_patch = 0;
9096
bool ucode_patch_valid = false;
9097
9098
eax = ebx = ecx = edx = 0;
9099
9100
__cpuid(0, max_level, ebx, ecx, edx);
9101
9102
if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
9103
genuine_intel = 1;
9104
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
9105
authentic_amd = 1;
9106
else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
9107
hygon_genuine = 1;
9108
9109
if (!quiet)
9110
fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
9111
9112
__cpuid(1, fms, ebx, ecx, edx);
9113
family = (fms >> 8) & 0xf;
9114
model = (fms >> 4) & 0xf;
9115
stepping = fms & 0xf;
9116
if (family == 0xf)
9117
family += (fms >> 20) & 0xff;
9118
if (family >= 6)
9119
model += ((fms >> 16) & 0xf) << 4;
9120
ecx_flags = ecx;
9121
edx_flags = edx;
9122
cpuid_has_hv = ecx_flags & (1 << 31);
9123
9124
if (!no_msr) {
9125
if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
9126
warnx("get_msr(UCODE)");
9127
else
9128
ucode_patch_valid = true;
9129
}
9130
9131
/*
9132
* check max extended function levels of CPUID.
9133
* This is needed to check for invariant TSC.
9134
* This check is valid for both Intel and AMD.
9135
*/
9136
ebx = ecx = edx = 0;
9137
__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
9138
9139
if (!quiet) {
9140
fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", family, model, stepping, family, model, stepping);
9141
if (ucode_patch_valid)
9142
fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
9143
fputc('\n', outf);
9144
9145
fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
9146
fprintf(outf, "CPUID(1): %sSSE3 %sMONITOR %sSMX %sEIST %sTM2 %sHV %sTSC %sMSR %sACPI-TM %sHT %sTM\n",
9147
ecx_flags & (1 << 0) ? "" : "No-",
9148
ecx_flags & (1 << 3) ? "" : "No-",
9149
ecx_flags & (1 << 6) ? "" : "No-",
9150
ecx_flags & (1 << 7) ? "" : "No-",
9151
ecx_flags & (1 << 8) ? "" : "No-",
9152
cpuid_has_hv ? "" : "No-",
9153
edx_flags & (1 << 4) ? "" : "No-",
9154
edx_flags & (1 << 5) ? "" : "No-",
9155
edx_flags & (1 << 22) ? "" : "No-", edx_flags & (1 << 28) ? "" : "No-", edx_flags & (1 << 29) ? "" : "No-");
9156
}
9157
if (!quiet && cpuid_has_hv)
9158
dump_cpuid_hypervisor();
9159
9160
probe_platform_features(family, model);
9161
init_perf_model_support(family, model);
9162
9163
if (!(edx_flags & (1 << 5)))
9164
errx(1, "CPUID: no MSR");
9165
9166
if (max_extended_level >= 0x80000007) {
9167
9168
/*
9169
* Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
9170
* this check is valid for both Intel and AMD
9171
*/
9172
__cpuid(0x80000007, eax, ebx, ecx, edx);
9173
has_invariant_tsc = edx & (1 << 8);
9174
}
9175
9176
/*
9177
* APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
9178
* this check is valid for both Intel and AMD
9179
*/
9180
9181
__cpuid(0x6, eax, ebx, ecx, edx);
9182
cpuid_has_aperf_mperf = ecx & (1 << 0);
9183
do_dts = eax & (1 << 0);
9184
if (do_dts)
9185
BIC_PRESENT(BIC_CoreTmp);
9186
has_turbo = eax & (1 << 1);
9187
do_ptm = eax & (1 << 6);
9188
if (do_ptm)
9189
BIC_PRESENT(BIC_PkgTmp);
9190
has_hwp = eax & (1 << 7);
9191
has_hwp_notify = eax & (1 << 8);
9192
has_hwp_activity_window = eax & (1 << 9);
9193
has_hwp_epp = eax & (1 << 10);
9194
has_hwp_pkg = eax & (1 << 11);
9195
has_epb = ecx & (1 << 3);
9196
9197
if (!quiet)
9198
fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
9199
"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
9200
cpuid_has_aperf_mperf ? "" : "No-",
9201
has_turbo ? "" : "No-",
9202
do_dts ? "" : "No-",
9203
do_ptm ? "" : "No-",
9204
has_hwp ? "" : "No-",
9205
has_hwp_notify ? "" : "No-",
9206
has_hwp_activity_window ? "" : "No-", has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
9207
9208
if (!quiet)
9209
decode_misc_enable_msr();
9210
9211
if (max_level >= 0x7) {
9212
int has_sgx;
9213
9214
ecx = 0;
9215
9216
__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
9217
9218
has_sgx = ebx & (1 << 2);
9219
9220
is_hybrid = !!(edx & (1 << 15));
9221
9222
if (!quiet)
9223
fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-");
9224
9225
if (has_sgx)
9226
decode_feature_control_msr();
9227
}
9228
9229
if (max_level >= 0x15) {
9230
unsigned int eax_crystal;
9231
unsigned int ebx_tsc;
9232
9233
/*
9234
* CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
9235
*/
9236
eax_crystal = ebx_tsc = crystal_hz = edx = 0;
9237
__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
9238
9239
if (ebx_tsc != 0) {
9240
if (!quiet && (ebx != 0))
9241
fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz);
9242
9243
if (crystal_hz == 0)
9244
crystal_hz = platform->crystal_freq;
9245
9246
if (crystal_hz) {
9247
tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
9248
if (!quiet)
9249
fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
9250
}
9251
}
9252
}
9253
if (max_level >= 0x16) {
9254
unsigned int base_mhz, max_mhz, bus_mhz, edx;
9255
9256
/*
9257
* CPUID 16H Base MHz, Max MHz, Bus MHz
9258
*/
9259
base_mhz = max_mhz = bus_mhz = edx = 0;
9260
9261
__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
9262
9263
bclk = bus_mhz;
9264
9265
base_hz = base_mhz * 1000000;
9266
has_base_hz = 1;
9267
9268
if (platform->enable_tsc_tweak)
9269
tsc_tweak = base_hz / tsc_hz;
9270
9271
if (!quiet)
9272
fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", base_mhz, max_mhz, bus_mhz);
9273
}
9274
9275
if (cpuid_has_aperf_mperf)
9276
aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
9277
9278
BIC_PRESENT(BIC_IRQ);
9279
BIC_PRESENT(BIC_NMI);
9280
BIC_PRESENT(BIC_TSC_MHz);
9281
}
9282
9283
static void counter_info_init(void)
9284
{
9285
for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) {
9286
struct cstate_counter_arch_info *const cai = &ccstate_counter_arch_infos[i];
9287
9288
if (platform->has_msr_knl_core_c6_residency && cai->msr == MSR_CORE_C6_RESIDENCY)
9289
cai->msr = MSR_KNL_CORE_C6_RESIDENCY;
9290
9291
if (!platform->has_msr_core_c1_res && cai->msr == MSR_CORE_C1_RES)
9292
cai->msr = 0;
9293
9294
if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY)
9295
cai->msr = MSR_ATOM_PKG_C6_RESIDENCY;
9296
}
9297
9298
for (int i = 0; i < NUM_MSR_COUNTERS; ++i) {
9299
msr_counter_arch_infos[i].present = false;
9300
msr_counter_arch_infos[i].needed = false;
9301
}
9302
}
9303
9304
void probe_pm_features(void)
9305
{
9306
probe_pstates();
9307
9308
probe_cstates();
9309
9310
probe_lpi();
9311
9312
probe_intel_uncore_frequency();
9313
9314
probe_graphics();
9315
9316
probe_rapl();
9317
9318
probe_thermal();
9319
9320
if (platform->has_nhm_msrs && !no_msr)
9321
BIC_PRESENT(BIC_SMI);
9322
9323
if (!quiet)
9324
decode_misc_feature_control();
9325
}
9326
9327
/*
9328
* has_perf_llc_access()
9329
*
9330
* return 1 on success, else 0
9331
*/
9332
int has_perf_llc_access(void)
9333
{
9334
int fd;
9335
9336
if (no_perf)
9337
return 0;
9338
9339
fd = open_perf_counter(master_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP);
9340
if (fd != -1)
9341
close(fd);
9342
9343
if (fd == -1)
9344
warnx("Failed to access %s. Some of the counters may not be available\n"
9345
"\tRun as root to enable them or use %s to disable the access explicitly", "perf LLC counters", "'--hide LLC' or '--no-perf'");
9346
9347
return (fd != -1);
9348
}
9349
9350
void perf_llc_init(void)
9351
{
9352
int cpu;
9353
int retval;
9354
9355
if (no_perf)
9356
return;
9357
if (!(BIC_IS_ENABLED(BIC_LLC_MRPS) || BIC_IS_ENABLED(BIC_LLC_HIT)))
9358
return;
9359
9360
assert(fd_llc_percpu != 0);
9361
9362
for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
9363
9364
if (cpu_is_not_allowed(cpu))
9365
continue;
9366
9367
fd_llc_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP);
9368
if (fd_llc_percpu[cpu] == -1) {
9369
warnx("%s: perf REFS: failed to open counter on cpu%d", __func__, cpu);
9370
free_fd_llc_percpu();
9371
return;
9372
}
9373
retval = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, fd_llc_percpu[cpu], PERF_FORMAT_GROUP);
9374
if (retval == -1) {
9375
warnx("%s: perf MISS: failed to open counter on cpu%d", __func__, cpu);
9376
free_fd_llc_percpu();
9377
return;
9378
}
9379
}
9380
BIC_PRESENT(BIC_LLC_MRPS);
9381
BIC_PRESENT(BIC_LLC_HIT);
9382
}
9383
9384
void perf_l2_init(void)
9385
{
9386
int cpu;
9387
int retval;
9388
9389
if (no_perf)
9390
return;
9391
if (!(BIC_IS_ENABLED(BIC_L2_MRPS) || BIC_IS_ENABLED(BIC_L2_HIT)))
9392
return;
9393
if (perf_model_support == NULL)
9394
return;
9395
9396
assert(fd_l2_percpu != 0);
9397
9398
for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
9399
9400
if (cpu_is_not_allowed(cpu))
9401
continue;
9402
9403
if (!is_hybrid) {
9404
fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.uniform, perf_model_support->first.refs, -1, PERF_FORMAT_GROUP);
9405
if (fd_l2_percpu[cpu] == -1) {
9406
err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.uniform, perf_model_support->first.refs);
9407
free_fd_l2_percpu();
9408
return;
9409
}
9410
retval = open_perf_counter(cpu, perf_pmu_types.uniform, perf_model_support->first.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP);
9411
if (retval == -1) {
9412
err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.uniform, perf_model_support->first.hits);
9413
free_fd_l2_percpu();
9414
return;
9415
}
9416
continue;
9417
}
9418
if (perf_pcore_set && CPU_ISSET_S(cpu, cpu_possible_setsize, perf_pcore_set)) {
9419
fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.pcore, perf_model_support->first.refs, -1, PERF_FORMAT_GROUP);
9420
if (fd_l2_percpu[cpu] == -1) {
9421
err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->first.refs);
9422
free_fd_l2_percpu();
9423
return;
9424
}
9425
retval = open_perf_counter(cpu, perf_pmu_types.pcore, perf_model_support->first.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP);
9426
if (retval == -1) {
9427
err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->first.hits);
9428
free_fd_l2_percpu();
9429
return;
9430
}
9431
} else if (perf_ecore_set && CPU_ISSET_S(cpu, cpu_possible_setsize, perf_ecore_set)) {
9432
fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.ecore, perf_model_support->second.refs, -1, PERF_FORMAT_GROUP);
9433
if (fd_l2_percpu[cpu] == -1) {
9434
err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->second.refs);
9435
free_fd_l2_percpu();
9436
return;
9437
}
9438
retval = open_perf_counter(cpu, perf_pmu_types.ecore, perf_model_support->second.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP);
9439
if (retval == -1) {
9440
err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->second.hits);
9441
free_fd_l2_percpu();
9442
return;
9443
}
9444
} else if (perf_lcore_set && CPU_ISSET_S(cpu, cpu_possible_setsize, perf_lcore_set)) {
9445
fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.lcore, perf_model_support->third.refs, -1, PERF_FORMAT_GROUP);
9446
if (fd_l2_percpu[cpu] == -1) {
9447
err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->third.refs);
9448
free_fd_l2_percpu();
9449
return;
9450
}
9451
retval = open_perf_counter(cpu, perf_pmu_types.lcore, perf_model_support->third.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP);
9452
if (retval == -1) {
9453
err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->third.hits);
9454
free_fd_l2_percpu();
9455
return;
9456
}
9457
} else
9458
err(-1, "%s: cpu%d: type %d", __func__, cpu, cpus[cpu].type);
9459
}
9460
BIC_PRESENT(BIC_L2_MRPS);
9461
BIC_PRESENT(BIC_L2_HIT);
9462
}
9463
9464
/*
9465
* in /dev/cpu/ return success for names that are numbers
9466
* ie. filter out ".", "..", "microcode".
9467
*/
9468
int dir_filter(const struct dirent *dirp)
9469
{
9470
if (isdigit(dirp->d_name[0]))
9471
return 1;
9472
else
9473
return 0;
9474
}
9475
9476
void topology_probe(bool startup)
9477
{
9478
int i;
9479
int max_core_id = 0;
9480
int max_package_id = 0;
9481
int max_siblings = 0;
9482
9483
/* Initialize num_cpus, max_cpu_num */
9484
set_max_cpu_num();
9485
topo.num_cpus = 0;
9486
for_all_proc_cpus(count_cpus);
9487
if (!summary_only)
9488
BIC_PRESENT(BIC_CPU);
9489
9490
if (debug > 1)
9491
fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
9492
9493
cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
9494
if (cpus == NULL)
9495
err(1, "calloc cpus");
9496
9497
/*
9498
* Allocate and initialize cpu_present_set
9499
*/
9500
cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
9501
if (cpu_present_set == NULL)
9502
err(3, "CPU_ALLOC");
9503
cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
9504
CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
9505
for_all_proc_cpus(mark_cpu_present);
9506
9507
/*
9508
* Allocate and initialize cpu_possible_set
9509
*/
9510
cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1));
9511
if (cpu_possible_set == NULL)
9512
err(3, "CPU_ALLOC");
9513
cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
9514
CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set);
9515
initialize_cpu_set_from_sysfs(cpu_possible_set, "/sys/devices/system/cpu", "possible");
9516
9517
/*
9518
* Allocate and initialize cpu_effective_set
9519
*/
9520
cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
9521
if (cpu_effective_set == NULL)
9522
err(3, "CPU_ALLOC");
9523
cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
9524
CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set);
9525
update_effective_set(startup);
9526
9527
/*
9528
* Allocate and initialize cpu_allowed_set
9529
*/
9530
cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1));
9531
if (cpu_allowed_set == NULL)
9532
err(3, "CPU_ALLOC");
9533
cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
9534
CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set);
9535
9536
/*
9537
* Validate and update cpu_allowed_set.
9538
*
9539
* Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
9540
* Give a warning when cpus in cpu_subset become unavailable at runtime.
9541
* Give a warning when cpus are not effective because of cgroup setting.
9542
*
9543
* cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
9544
*/
9545
for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
9546
if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
9547
continue;
9548
9549
if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) {
9550
if (cpu_subset) {
9551
/* cpus in cpu_subset must be in cpu_present_set during startup */
9552
if (startup)
9553
err(1, "cpu%d not present", i);
9554
else
9555
fprintf(stderr, "cpu%d not present\n", i);
9556
}
9557
continue;
9558
}
9559
9560
if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) {
9561
if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) {
9562
fprintf(stderr, "cpu%d not effective\n", i);
9563
continue;
9564
}
9565
}
9566
9567
CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set);
9568
}
9569
9570
if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set))
9571
err(-ENODEV, "No valid cpus found");
9572
sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set);
9573
9574
/*
9575
* Allocate and initialize cpu_affinity_set
9576
*/
9577
cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
9578
if (cpu_affinity_set == NULL)
9579
err(3, "CPU_ALLOC");
9580
cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
9581
CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
9582
9583
for_all_proc_cpus(clear_ht_id);
9584
9585
for_all_proc_cpus(set_cpu_hybrid_type);
9586
9587
/*
9588
* For online cpus
9589
* find max_core_id, max_package_id, num_cores (per system)
9590
*/
9591
for (i = 0; i <= topo.max_cpu_num; ++i) {
9592
int siblings;
9593
9594
if (cpu_is_not_present(i)) {
9595
if (debug > 1)
9596
fprintf(outf, "cpu%d NOT PRESENT\n", i);
9597
continue;
9598
}
9599
9600
cpus[i].cpu_id = i;
9601
9602
/* get package information */
9603
cpus[i].package_id = get_package_id(i);
9604
if (cpus[i].package_id > max_package_id)
9605
max_package_id = cpus[i].package_id;
9606
9607
/* get die information */
9608
cpus[i].die_id = get_die_id(i);
9609
if (cpus[i].die_id > topo.max_die_id)
9610
topo.max_die_id = cpus[i].die_id;
9611
9612
/* get l3 information */
9613
cpus[i].l3_id = get_l3_id(i);
9614
if (cpus[i].l3_id > topo.max_l3_id)
9615
topo.max_l3_id = cpus[i].l3_id;
9616
9617
/* get numa node information */
9618
cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
9619
if (cpus[i].physical_node_id > topo.max_node_num)
9620
topo.max_node_num = cpus[i].physical_node_id;
9621
9622
/* get core information */
9623
cpus[i].core_id = get_core_id(i);
9624
if (cpus[i].core_id > max_core_id)
9625
max_core_id = cpus[i].core_id;
9626
9627
/* get thread information */
9628
siblings = set_thread_siblings(&cpus[i]);
9629
if (siblings > max_siblings)
9630
max_siblings = siblings;
9631
if (cpus[i].ht_id == 0)
9632
topo.num_cores++;
9633
}
9634
topo.max_core_id = max_core_id; /* within a package */
9635
topo.max_package_id = max_package_id;
9636
9637
topo.cores_per_node = max_core_id + 1;
9638
if (debug > 1)
9639
fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node);
9640
if (!summary_only)
9641
BIC_PRESENT(BIC_Core);
9642
9643
topo.num_die = topo.max_die_id + 1;
9644
if (debug > 1)
9645
fprintf(outf, "max_die_id %d, sizing for %d die\n", topo.max_die_id, topo.num_die);
9646
if (!summary_only && topo.num_die > 1)
9647
BIC_PRESENT(BIC_Die);
9648
9649
if (!summary_only && topo.max_l3_id > 0)
9650
BIC_PRESENT(BIC_L3);
9651
9652
topo.num_packages = max_package_id + 1;
9653
if (debug > 1)
9654
fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages);
9655
if (!summary_only && topo.num_packages > 1)
9656
BIC_PRESENT(BIC_Package);
9657
9658
set_node_data();
9659
if (debug > 1)
9660
fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
9661
if (!summary_only && topo.nodes_per_pkg > 1)
9662
BIC_PRESENT(BIC_Node);
9663
9664
topo.threads_per_core = max_siblings;
9665
if (debug > 1)
9666
fprintf(outf, "max_siblings %d\n", max_siblings);
9667
9668
if (debug < 1)
9669
return;
9670
9671
for (i = 0; i <= topo.max_cpu_num; ++i) {
9672
if (cpu_is_not_present(i))
9673
continue;
9674
fprintf(outf,
9675
"cpu %d pkg %d die %d l3 %d node %d lnode %d core %d thread %d\n",
9676
i, cpus[i].package_id, cpus[i].die_id, cpus[i].l3_id,
9677
cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].core_id, cpus[i].ht_id);
9678
}
9679
9680
}
9681
9682
void allocate_counters_1(struct counters *counters)
9683
{
9684
counters->threads = calloc(1, sizeof(struct thread_data));
9685
if (counters->threads == NULL)
9686
goto error;
9687
9688
counters->cores = calloc(1, sizeof(struct core_data));
9689
if (counters->cores == NULL)
9690
goto error;
9691
9692
counters->packages = calloc(1, sizeof(struct pkg_data));
9693
if (counters->packages == NULL)
9694
goto error;
9695
9696
return;
9697
error:
9698
err(1, "calloc counters_1");
9699
}
9700
9701
void allocate_counters(struct counters *counters)
9702
{
9703
int i;
9704
int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages;
9705
int num_threads = topo.threads_per_core * num_cores;
9706
9707
counters->threads = calloc(num_threads, sizeof(struct thread_data));
9708
if (counters->threads == NULL)
9709
goto error;
9710
9711
for (i = 0; i < num_threads; i++)
9712
(counters->threads)[i].cpu_id = -1;
9713
9714
counters->cores = calloc(num_cores, sizeof(struct core_data));
9715
if (counters->cores == NULL)
9716
goto error;
9717
9718
for (i = 0; i < num_cores; i++)
9719
(counters->cores)[i].first_cpu = -1;
9720
9721
counters->packages = calloc(topo.num_packages, sizeof(struct pkg_data));
9722
if (counters->packages == NULL)
9723
goto error;
9724
9725
for (i = 0; i < topo.num_packages; i++)
9726
(counters->packages)[i].first_cpu = -1;
9727
9728
return;
9729
error:
9730
err(1, "calloc counters");
9731
}
9732
9733
/*
9734
* init_counter()
9735
*
9736
* set t->cpu_id, FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
9737
*/
9738
void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id)
9739
{
9740
int pkg_id = cpus[cpu_id].package_id;
9741
int node_id = cpus[cpu_id].logical_node_id;
9742
int core_id = cpus[cpu_id].core_id;
9743
struct thread_data *t;
9744
struct core_data *c;
9745
9746
/* Workaround for systems where physical_node_id==-1
9747
* and logical_node_id==(-1 - topo.num_cpus)
9748
*/
9749
if (node_id < 0)
9750
node_id = 0;
9751
9752
t = &thread_base[cpu_id];
9753
c = &core_base[GLOBAL_CORE_ID(core_id, pkg_id)];
9754
9755
t->cpu_id = cpu_id;
9756
if (!cpu_is_not_allowed(cpu_id)) {
9757
9758
if (c->first_cpu < 0)
9759
c->first_cpu = t->cpu_id;
9760
if (pkg_base[pkg_id].first_cpu < 0)
9761
pkg_base[pkg_id].first_cpu = t->cpu_id;
9762
}
9763
}
9764
9765
int initialize_counters(int cpu_id)
9766
{
9767
init_counter(EVEN_COUNTERS, cpu_id);
9768
init_counter(ODD_COUNTERS, cpu_id);
9769
return 0;
9770
}
9771
9772
void allocate_output_buffer()
9773
{
9774
output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
9775
outp = output_buffer;
9776
if (outp == NULL)
9777
err(-1, "calloc output buffer");
9778
}
9779
9780
void allocate_fd_percpu(void)
9781
{
9782
fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
9783
if (fd_percpu == NULL)
9784
err(-1, "calloc fd_percpu");
9785
}
9786
9787
void allocate_irq_buffers(void)
9788
{
9789
irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
9790
if (irq_column_2_cpu == NULL)
9791
err(-1, "calloc %d", topo.num_cpus);
9792
9793
irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
9794
if (irqs_per_cpu == NULL)
9795
err(-1, "calloc %d IRQ", topo.max_cpu_num + 1);
9796
9797
nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
9798
if (nmi_per_cpu == NULL)
9799
err(-1, "calloc %d NMI", topo.max_cpu_num + 1);
9800
}
9801
9802
int update_topo(PER_THREAD_PARAMS)
9803
{
9804
topo.allowed_cpus++;
9805
if ((int)t->cpu_id == c->first_cpu)
9806
topo.allowed_cores++;
9807
if ((int)t->cpu_id == p->first_cpu)
9808
topo.allowed_packages++;
9809
9810
return 0;
9811
}
9812
9813
void topology_update(void)
9814
{
9815
topo.allowed_cpus = 0;
9816
topo.allowed_cores = 0;
9817
topo.allowed_packages = 0;
9818
for_all_cpus(update_topo, ODD_COUNTERS);
9819
}
9820
9821
void setup_all_buffers(bool startup)
9822
{
9823
topology_probe(startup);
9824
allocate_irq_buffers();
9825
allocate_fd_percpu();
9826
allocate_counters_1(&average);
9827
allocate_counters(&even);
9828
allocate_counters(&odd);
9829
allocate_output_buffer();
9830
for_all_proc_cpus(initialize_counters);
9831
topology_update();
9832
}
9833
9834
void set_master_cpu(void)
9835
{
9836
int i;
9837
9838
for (i = 0; i < topo.max_cpu_num + 1; ++i) {
9839
if (cpu_is_not_allowed(i))
9840
continue;
9841
master_cpu = i;
9842
if (debug > 1)
9843
fprintf(outf, "master_cpu = %d\n", master_cpu);
9844
return;
9845
}
9846
err(-ENODEV, "No valid cpus found");
9847
}
9848
9849
bool has_added_counters(void)
9850
{
9851
/*
9852
* It only makes sense to call this after the command line is parsed,
9853
* otherwise sys structure is not populated.
9854
*/
9855
9856
return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
9857
}
9858
9859
void check_msr_access(void)
9860
{
9861
check_msr_driver();
9862
check_msr_permission();
9863
9864
if (no_msr)
9865
bic_disable_msr_access();
9866
}
9867
9868
void check_perf_access(void)
9869
{
9870
if (BIC_IS_ENABLED(BIC_IPC))
9871
if (!has_perf_instr_count_access())
9872
no_perf = 1;
9873
9874
if (BIC_IS_ENABLED(BIC_LLC_MRPS) || BIC_IS_ENABLED(BIC_LLC_HIT))
9875
if (!has_perf_llc_access())
9876
no_perf = 1;
9877
9878
if (no_perf)
9879
bic_disable_perf_access();
9880
}
9881
9882
bool perf_has_hybrid_devices(void)
9883
{
9884
/*
9885
* 0: unknown
9886
* 1: has separate perf device for p and e core
9887
* -1: doesn't have separate perf device for p and e core
9888
*/
9889
static int cached;
9890
9891
if (cached > 0)
9892
return true;
9893
9894
if (cached < 0)
9895
return false;
9896
9897
if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) {
9898
cached = -1;
9899
return false;
9900
}
9901
9902
if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) {
9903
cached = -1;
9904
return false;
9905
}
9906
9907
cached = 1;
9908
return true;
9909
}
9910
9911
int added_perf_counters_init_(struct perf_counter_info *pinfo)
9912
{
9913
size_t num_domains = 0;
9914
unsigned int next_domain;
9915
bool *domain_visited;
9916
unsigned int perf_type, perf_config;
9917
double perf_scale;
9918
int fd_perf;
9919
9920
if (!pinfo)
9921
return 0;
9922
9923
const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1));
9924
9925
domain_visited = calloc(max_num_domains, sizeof(*domain_visited));
9926
9927
while (pinfo) {
9928
switch (pinfo->scope) {
9929
case SCOPE_CPU:
9930
num_domains = topo.max_cpu_num + 1;
9931
break;
9932
9933
case SCOPE_CORE:
9934
num_domains = topo.max_core_id + 1;
9935
break;
9936
9937
case SCOPE_PACKAGE:
9938
num_domains = topo.max_package_id + 1;
9939
break;
9940
}
9941
9942
/* Allocate buffer for file descriptor for each domain. */
9943
pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain));
9944
if (!pinfo->fd_perf_per_domain)
9945
errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain");
9946
9947
for (size_t i = 0; i < num_domains; ++i)
9948
pinfo->fd_perf_per_domain[i] = -1;
9949
9950
pinfo->num_domains = num_domains;
9951
pinfo->scale = 1.0;
9952
9953
memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited));
9954
9955
for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
9956
9957
next_domain = cpu_to_domain(pinfo, cpu);
9958
9959
assert(next_domain < num_domains);
9960
9961
if (cpu_is_not_allowed(cpu))
9962
continue;
9963
9964
if (domain_visited[next_domain])
9965
continue;
9966
9967
/*
9968
* Intel hybrid platforms expose different perf devices for P and E cores.
9969
* Instead of one, "/sys/bus/event_source/devices/cpu" device, there are
9970
* "/sys/bus/event_source/devices/{cpu_core,cpu_atom}".
9971
*
9972
* This makes it more complicated to the user, because most of the counters
9973
* are available on both and have to be handled manually, otherwise.
9974
*
9975
* Code below, allow user to use the old "cpu" name, which is translated accordingly.
9976
*/
9977
const char *perf_device = pinfo->device;
9978
9979
if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) {
9980
switch (cpus[cpu].type) {
9981
case INTEL_PCORE_TYPE:
9982
perf_device = "cpu_core";
9983
break;
9984
9985
case INTEL_ECORE_TYPE:
9986
perf_device = "cpu_atom";
9987
break;
9988
9989
default: /* Don't change, we will probably fail and report a problem soon. */
9990
break;
9991
}
9992
}
9993
9994
perf_type = read_perf_type(perf_device);
9995
if (perf_type == (unsigned int)-1) {
9996
warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "type");
9997
continue;
9998
}
9999
10000
perf_config = read_perf_config(perf_device, pinfo->event);
10001
if (perf_config == (unsigned int)-1) {
10002
warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "config");
10003
continue;
10004
}
10005
10006
/* Scale is not required, some counters just don't have it. */
10007
perf_scale = read_perf_scale(perf_device, pinfo->event);
10008
if (perf_scale == 0.0)
10009
perf_scale = 1.0;
10010
10011
fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0);
10012
if (fd_perf == -1) {
10013
warnx("%s: perf/%s/%s: failed to open counter on cpu%d", __func__, perf_device, pinfo->event, cpu);
10014
continue;
10015
}
10016
10017
domain_visited[next_domain] = 1;
10018
pinfo->fd_perf_per_domain[next_domain] = fd_perf;
10019
pinfo->scale = perf_scale;
10020
10021
if (debug)
10022
fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]);
10023
}
10024
10025
pinfo = pinfo->next;
10026
}
10027
10028
free(domain_visited);
10029
10030
return 0;
10031
}
10032
10033
void added_perf_counters_init(void)
10034
{
10035
if (added_perf_counters_init_(sys.perf_tp))
10036
errx(1, "%s: %s", __func__, "thread");
10037
10038
if (added_perf_counters_init_(sys.perf_cp))
10039
errx(1, "%s: %s", __func__, "core");
10040
10041
if (added_perf_counters_init_(sys.perf_pp))
10042
errx(1, "%s: %s", __func__, "package");
10043
}
10044
10045
int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output)
10046
{
10047
int fd_telem_info;
10048
FILE *file_telem_info;
10049
unsigned long value;
10050
10051
fd_telem_info = openat(fd_dir, info_filename, O_RDONLY);
10052
if (fd_telem_info == -1)
10053
return -1;
10054
10055
file_telem_info = fdopen(fd_telem_info, "r");
10056
if (file_telem_info == NULL) {
10057
close(fd_telem_info);
10058
return -1;
10059
}
10060
10061
if (fscanf(file_telem_info, format, &value) != 1) {
10062
fclose(file_telem_info);
10063
return -1;
10064
}
10065
10066
fclose(file_telem_info);
10067
10068
*output = value;
10069
10070
return 0;
10071
}
10072
10073
struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
10074
{
10075
struct pmt_diriter_t pmt_iter;
10076
const struct dirent *entry;
10077
struct stat st;
10078
int fd_telem_dir, fd_pmt;
10079
unsigned long guid, size, offset;
10080
size_t mmap_size;
10081
void *mmio;
10082
struct pmt_mmio *head = NULL, *last = NULL;
10083
struct pmt_mmio *new_pmt = NULL;
10084
10085
if (stat(SYSFS_TELEM_PATH, &st) == -1)
10086
return NULL;
10087
10088
pmt_diriter_init(&pmt_iter);
10089
entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH);
10090
if (!entry) {
10091
pmt_diriter_remove(&pmt_iter);
10092
return NULL;
10093
}
10094
10095
for (; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
10096
if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1)
10097
break;
10098
10099
if (!S_ISDIR(st.st_mode))
10100
continue;
10101
10102
fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY);
10103
if (fd_telem_dir == -1)
10104
break;
10105
10106
if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
10107
close(fd_telem_dir);
10108
break;
10109
}
10110
10111
if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) {
10112
close(fd_telem_dir);
10113
break;
10114
}
10115
10116
if (guid != target_guid) {
10117
close(fd_telem_dir);
10118
continue;
10119
}
10120
10121
if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) {
10122
close(fd_telem_dir);
10123
break;
10124
}
10125
10126
assert(offset == 0);
10127
10128
fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY);
10129
if (fd_pmt == -1)
10130
goto loop_cleanup_and_break;
10131
10132
mmap_size = ROUND_UP_TO_PAGE_SIZE(size);
10133
mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0);
10134
if (mmio != MAP_FAILED) {
10135
if (debug)
10136
fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio);
10137
10138
new_pmt = calloc(1, sizeof(*new_pmt));
10139
10140
if (!new_pmt) {
10141
fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__);
10142
exit(1);
10143
}
10144
10145
/*
10146
* Create linked list of mmaped regions,
10147
* but preserve the ordering from sysfs.
10148
* Ordering is important for the user to
10149
* use the seq=%u parameter when adding a counter.
10150
*/
10151
new_pmt->guid = guid;
10152
new_pmt->mmio_base = mmio;
10153
new_pmt->pmt_offset = offset;
10154
new_pmt->size = size;
10155
new_pmt->next = pmt_mmios;
10156
10157
if (last)
10158
last->next = new_pmt;
10159
else
10160
head = new_pmt;
10161
10162
last = new_pmt;
10163
}
10164
10165
loop_cleanup_and_break:
10166
close(fd_pmt);
10167
close(fd_telem_dir);
10168
}
10169
10170
pmt_diriter_remove(&pmt_iter);
10171
10172
/*
10173
* If we found something, stick just
10174
* created linked list to the front.
10175
*/
10176
if (head)
10177
pmt_mmios = head;
10178
10179
return head;
10180
}
10181
10182
struct pmt_mmio *pmt_mmio_find(unsigned int guid)
10183
{
10184
struct pmt_mmio *pmmio = pmt_mmios;
10185
10186
while (pmmio) {
10187
if (pmmio->guid == guid)
10188
return pmmio;
10189
10190
pmmio = pmmio->next;
10191
}
10192
10193
return NULL;
10194
}
10195
10196
void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset)
10197
{
10198
char *ret;
10199
10200
/* Get base of mmaped PMT file. */
10201
ret = (char *)pmmio->mmio_base;
10202
10203
/*
10204
* Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data.
10205
* It's not guaranteed that the mmaped memory begins with the telemetry data
10206
* - we might have to apply the offset first.
10207
*/
10208
ret += pmmio->pmt_offset;
10209
10210
/* Apply the counter offset to get the address to the mmaped counter. */
10211
ret += counter_offset;
10212
10213
return ret;
10214
}
10215
10216
struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq)
10217
{
10218
struct pmt_mmio *ret;
10219
10220
ret = pmt_mmio_find(guid);
10221
if (!ret)
10222
ret = pmt_mmio_open(guid);
10223
10224
while (ret && seq) {
10225
ret = ret->next;
10226
--seq;
10227
}
10228
10229
return ret;
10230
}
10231
10232
enum pmt_open_mode {
10233
PMT_OPEN_TRY, /* Open failure is not an error. */
10234
PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */
10235
};
10236
10237
struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name)
10238
{
10239
while (pcounter) {
10240
if (strcmp(pcounter->name, name) == 0)
10241
break;
10242
10243
pcounter = pcounter->next;
10244
}
10245
10246
return pcounter;
10247
}
10248
10249
struct pmt_counter **pmt_get_scope_root(enum counter_scope scope)
10250
{
10251
switch (scope) {
10252
case SCOPE_CPU:
10253
return &sys.pmt_tp;
10254
case SCOPE_CORE:
10255
return &sys.pmt_cp;
10256
case SCOPE_PACKAGE:
10257
return &sys.pmt_pp;
10258
}
10259
10260
__builtin_unreachable();
10261
}
10262
10263
void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id)
10264
{
10265
/* Make sure the new domain fits. */
10266
if (domain_id >= pcounter->num_domains)
10267
pmt_counter_resize(pcounter, domain_id + 1);
10268
10269
assert(pcounter->domains);
10270
assert(domain_id < pcounter->num_domains);
10271
10272
pcounter->domains[domain_id].pcounter = pmmio;
10273
}
10274
10275
int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type,
10276
unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope,
10277
enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode)
10278
{
10279
struct pmt_mmio *mmio;
10280
struct pmt_counter *pcounter;
10281
struct pmt_counter **const pmt_root = pmt_get_scope_root(scope);
10282
bool new_counter = false;
10283
int conflict = 0;
10284
10285
if (lsb > msb) {
10286
fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name);
10287
exit(1);
10288
}
10289
10290
if (msb >= 64) {
10291
fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name);
10292
exit(1);
10293
}
10294
10295
mmio = pmt_add_guid(guid, seq);
10296
if (!mmio) {
10297
if (mode != PMT_OPEN_TRY) {
10298
fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq);
10299
exit(1);
10300
}
10301
10302
return 1;
10303
}
10304
10305
if (offset >= mmio->size) {
10306
if (mode != PMT_OPEN_TRY) {
10307
fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size);
10308
exit(1);
10309
}
10310
10311
return 1;
10312
}
10313
10314
pcounter = pmt_find_counter(*pmt_root, name);
10315
if (!pcounter) {
10316
pcounter = calloc(1, sizeof(*pcounter));
10317
new_counter = true;
10318
}
10319
10320
if (new_counter) {
10321
strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1);
10322
pcounter->type = type;
10323
pcounter->scope = scope;
10324
pcounter->lsb = lsb;
10325
pcounter->msb = msb;
10326
pcounter->format = format;
10327
} else {
10328
conflict += pcounter->type != type;
10329
conflict += pcounter->scope != scope;
10330
conflict += pcounter->lsb != lsb;
10331
conflict += pcounter->msb != msb;
10332
conflict += pcounter->format != format;
10333
}
10334
10335
if (conflict) {
10336
fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", __func__, name);
10337
exit(1);
10338
}
10339
10340
pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id);
10341
10342
if (new_counter) {
10343
pcounter->next = *pmt_root;
10344
*pmt_root = pcounter;
10345
}
10346
10347
return 0;
10348
}
10349
10350
void pmt_init(void)
10351
{
10352
int cpu_num;
10353
unsigned long seq, offset, mod_num;
10354
10355
if (BIC_IS_ENABLED(BIC_Diec6)) {
10356
pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME,
10357
PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY);
10358
}
10359
10360
if (BIC_IS_ENABLED(BIC_CPU_c1e)) {
10361
seq = 0;
10362
offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
10363
mod_num = 0; /* Relative module number for current PMT file. */
10364
10365
/* Open the counter for each CPU. */
10366
for (cpu_num = 0; cpu_num < topo.max_cpu_num;) {
10367
10368
if (cpu_is_not_allowed(cpu_num))
10369
goto next_loop_iter;
10370
10371
/*
10372
* Set the scope to CPU, even though CWF report the counter per module.
10373
* CPUs inside the same module will read from the same location, instead of reporting zeros.
10374
*
10375
* CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK.
10376
*/
10377
pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK,
10378
PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, FORMAT_DELTA, cpu_num, PMT_OPEN_TRY);
10379
10380
/*
10381
* Rather complex logic for each time we go to the next loop iteration,
10382
* so keep it as a label.
10383
*/
10384
next_loop_iter:
10385
/*
10386
* Advance the cpu number and check if we should also advance offset to
10387
* the next counter inside the PMT file.
10388
*
10389
* On Clearwater Forest platform, the counter is reported per module,
10390
* so open the same counter for all of the CPUs inside the module.
10391
* That way, reported table show the correct value for all of the CPUs inside the module,
10392
* instead of zeros.
10393
*/
10394
++cpu_num;
10395
if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) {
10396
offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT;
10397
++mod_num;
10398
}
10399
10400
/*
10401
* There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file.
10402
*
10403
* If that number is reached, seq must be incremented to advance to the next file in a sequence.
10404
* Offset inside that file and a module counter has to be reset.
10405
*/
10406
if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) {
10407
++seq;
10408
offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
10409
mod_num = 0;
10410
}
10411
}
10412
}
10413
}
10414
10415
void turbostat_init()
10416
{
10417
setup_all_buffers(true);
10418
set_master_cpu();
10419
check_msr_access();
10420
check_perf_access();
10421
process_cpuid();
10422
counter_info_init();
10423
probe_pm_features();
10424
msr_perf_init();
10425
linux_perf_init();
10426
rapl_perf_init();
10427
cstate_perf_init();
10428
perf_llc_init();
10429
perf_l2_init();
10430
added_perf_counters_init();
10431
pmt_init();
10432
10433
for_all_cpus(get_cpu_type, ODD_COUNTERS);
10434
for_all_cpus(get_cpu_type, EVEN_COUNTERS);
10435
10436
if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(master_cpu) != -1)
10437
BIC_PRESENT(BIC_IPC);
10438
10439
/*
10440
* If TSC tweak is needed, but couldn't get it,
10441
* disable more BICs, since it can't be reported accurately.
10442
*/
10443
if (platform->enable_tsc_tweak && !has_base_hz) {
10444
CLR_BIC(BIC_Busy, &bic_enabled);
10445
CLR_BIC(BIC_Bzy_MHz, &bic_enabled);
10446
}
10447
}
10448
10449
void affinitize_child(void)
10450
{
10451
/* Prefer cpu_possible_set, if available */
10452
if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) {
10453
warn("sched_setaffinity cpu_possible_set");
10454
10455
/* Otherwise, allow child to run on same cpu set as turbostat */
10456
if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set))
10457
warn("sched_setaffinity cpu_allowed_set");
10458
}
10459
}
10460
10461
int fork_it(char **argv)
10462
{
10463
pid_t child_pid;
10464
int status;
10465
10466
snapshot_proc_sysfs_files();
10467
status = for_all_cpus(get_counters, EVEN_COUNTERS);
10468
first_counter_read = 0;
10469
if (status)
10470
exit(status);
10471
gettimeofday(&tv_even, (struct timezone *)NULL);
10472
10473
child_pid = fork();
10474
if (!child_pid) {
10475
/* child */
10476
affinitize_child();
10477
execvp(argv[0], argv);
10478
err(errno, "exec %s", argv[0]);
10479
} else {
10480
10481
/* parent */
10482
if (child_pid == -1)
10483
err(1, "fork");
10484
10485
signal(SIGINT, SIG_IGN);
10486
signal(SIGQUIT, SIG_IGN);
10487
if (waitpid(child_pid, &status, 0) == -1)
10488
err(status, "waitpid");
10489
10490
if (WIFEXITED(status))
10491
status = WEXITSTATUS(status);
10492
}
10493
/*
10494
* n.b. fork_it() does not check for errors from for_all_cpus()
10495
* because re-starting is problematic when forking
10496
*/
10497
snapshot_proc_sysfs_files();
10498
for_all_cpus(get_counters, ODD_COUNTERS);
10499
gettimeofday(&tv_odd, (struct timezone *)NULL);
10500
timersub(&tv_odd, &tv_even, &tv_delta);
10501
if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
10502
fprintf(outf, "%s: Counter reset detected\n", progname);
10503
delta_platform(&platform_counters_odd, &platform_counters_even);
10504
10505
compute_average(EVEN_COUNTERS);
10506
format_all_counters(EVEN_COUNTERS);
10507
10508
fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
10509
10510
flush_output_stderr();
10511
10512
return status;
10513
}
10514
10515
int get_and_dump_counters(void)
10516
{
10517
int status;
10518
10519
snapshot_proc_sysfs_files();
10520
status = for_all_cpus(get_counters, ODD_COUNTERS);
10521
if (status)
10522
return status;
10523
10524
status = for_all_cpus(dump_counters, ODD_COUNTERS);
10525
if (status)
10526
return status;
10527
10528
flush_output_stdout();
10529
10530
return status;
10531
}
10532
10533
void print_version()
10534
{
10535
fprintf(outf, "turbostat version 2026.02.14 - Len Brown <[email protected]>\n");
10536
}
10537
10538
#define COMMAND_LINE_SIZE 2048
10539
10540
void print_bootcmd(void)
10541
{
10542
char bootcmd[COMMAND_LINE_SIZE];
10543
FILE *fp;
10544
int ret;
10545
10546
memset(bootcmd, 0, COMMAND_LINE_SIZE);
10547
fp = fopen("/proc/cmdline", "r");
10548
if (!fp)
10549
return;
10550
10551
ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp);
10552
if (ret) {
10553
bootcmd[ret] = '\0';
10554
/* the last character is already '\n' */
10555
fprintf(outf, "Kernel command line: %s", bootcmd);
10556
}
10557
10558
fclose(fp);
10559
}
10560
10561
struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name)
10562
{
10563
struct msr_counter *mp;
10564
10565
for (mp = head; mp; mp = mp->next) {
10566
if (debug)
10567
fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name);
10568
if (!strcmp(name, mp->name))
10569
return mp;
10570
}
10571
return NULL;
10572
}
10573
10574
int add_counter(unsigned int msr_num, char *path, char *name,
10575
unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int id)
10576
{
10577
struct msr_counter *msrp;
10578
10579
if (no_msr && msr_num)
10580
errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
10581
10582
if (debug)
10583
fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n",
10584
__func__, msr_num, path, name, width, scope, type, format, flags, id);
10585
10586
switch (scope) {
10587
10588
case SCOPE_CPU:
10589
msrp = find_msrp_by_name(sys.tp, name);
10590
if (msrp) {
10591
if (debug)
10592
fprintf(stderr, "%s: %s FOUND\n", __func__, name);
10593
break;
10594
}
10595
if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) {
10596
warnx("ignoring thread counter %s", name);
10597
return -1;
10598
}
10599
break;
10600
case SCOPE_CORE:
10601
msrp = find_msrp_by_name(sys.cp, name);
10602
if (msrp) {
10603
if (debug)
10604
fprintf(stderr, "%s: %s FOUND\n", __func__, name);
10605
break;
10606
}
10607
if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) {
10608
warnx("ignoring core counter %s", name);
10609
return -1;
10610
}
10611
break;
10612
case SCOPE_PACKAGE:
10613
msrp = find_msrp_by_name(sys.pp, name);
10614
if (msrp) {
10615
if (debug)
10616
fprintf(stderr, "%s: %s FOUND\n", __func__, name);
10617
break;
10618
}
10619
if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) {
10620
warnx("ignoring package counter %s", name);
10621
return -1;
10622
}
10623
break;
10624
default:
10625
warnx("ignoring counter %s with unknown scope", name);
10626
return -1;
10627
}
10628
10629
if (msrp == NULL) {
10630
msrp = calloc(1, sizeof(struct msr_counter));
10631
if (msrp == NULL)
10632
err(-1, "calloc msr_counter");
10633
10634
msrp->msr_num = msr_num;
10635
strncpy(msrp->name, name, NAME_BYTES - 1);
10636
msrp->width = width;
10637
msrp->type = type;
10638
msrp->format = format;
10639
msrp->flags = flags;
10640
10641
switch (scope) {
10642
case SCOPE_CPU:
10643
msrp->next = sys.tp;
10644
sys.tp = msrp;
10645
break;
10646
case SCOPE_CORE:
10647
msrp->next = sys.cp;
10648
sys.cp = msrp;
10649
break;
10650
case SCOPE_PACKAGE:
10651
msrp->next = sys.pp;
10652
sys.pp = msrp;
10653
break;
10654
}
10655
}
10656
10657
if (path) {
10658
struct sysfs_path *sp;
10659
10660
sp = calloc(1, sizeof(struct sysfs_path));
10661
if (sp == NULL) {
10662
perror("calloc");
10663
exit(1);
10664
}
10665
strncpy(sp->path, path, PATH_BYTES - 1);
10666
sp->id = id;
10667
sp->next = msrp->sp;
10668
msrp->sp = sp;
10669
}
10670
10671
return 0;
10672
}
10673
10674
/*
10675
* Initialize the fields used for identifying and opening the counter.
10676
*
10677
* Defer the initialization of any runtime buffers for actually reading
10678
* the counters for when we initialize all perf counters, so we can later
10679
* easily call re_initialize().
10680
*/
10681
struct perf_counter_info *make_perf_counter_info(const char *perf_device,
10682
const char *perf_event,
10683
const char *name,
10684
unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format)
10685
{
10686
struct perf_counter_info *pinfo;
10687
10688
pinfo = calloc(1, sizeof(*pinfo));
10689
if (!pinfo)
10690
errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event);
10691
10692
strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1);
10693
strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1);
10694
10695
strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1);
10696
pinfo->width = width;
10697
pinfo->scope = scope;
10698
pinfo->type = type;
10699
pinfo->format = format;
10700
10701
return pinfo;
10702
}
10703
10704
int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width,
10705
enum counter_scope scope, enum counter_type type, enum counter_format format)
10706
{
10707
struct perf_counter_info *pinfo;
10708
10709
switch (scope) {
10710
case SCOPE_CPU:
10711
if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) {
10712
warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event);
10713
return -1;
10714
}
10715
break;
10716
10717
case SCOPE_CORE:
10718
if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) {
10719
warnx("ignoring core counter perf/%s/%s", perf_device, perf_event);
10720
return -1;
10721
}
10722
break;
10723
10724
case SCOPE_PACKAGE:
10725
if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) {
10726
warnx("ignoring package counter perf/%s/%s", perf_device, perf_event);
10727
return -1;
10728
}
10729
break;
10730
}
10731
10732
pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format);
10733
10734
if (!pinfo)
10735
return -1;
10736
10737
switch (scope) {
10738
case SCOPE_CPU:
10739
pinfo->next = sys.perf_tp;
10740
sys.perf_tp = pinfo;
10741
++sys.added_thread_perf_counters;
10742
break;
10743
10744
case SCOPE_CORE:
10745
pinfo->next = sys.perf_cp;
10746
sys.perf_cp = pinfo;
10747
++sys.added_core_perf_counters;
10748
break;
10749
10750
case SCOPE_PACKAGE:
10751
pinfo->next = sys.perf_pp;
10752
sys.perf_pp = pinfo;
10753
++sys.added_package_perf_counters;
10754
break;
10755
}
10756
10757
// FIXME: we might not have debug here yet
10758
if (debug)
10759
fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope);
10760
10761
return 0;
10762
}
10763
10764
void parse_add_command_msr(char *add_command)
10765
{
10766
int msr_num = 0;
10767
char *path = NULL;
10768
char perf_device[PERF_DEV_NAME_BYTES] = "";
10769
char perf_event[PERF_EVT_NAME_BYTES] = "";
10770
char name_buffer[PERF_NAME_BYTES] = "";
10771
int width = 64;
10772
int fail = 0;
10773
enum counter_scope scope = SCOPE_CPU;
10774
enum counter_type type = COUNTER_CYCLES;
10775
enum counter_format format = FORMAT_DELTA;
10776
10777
while (add_command) {
10778
10779
if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
10780
goto next;
10781
10782
if (sscanf(add_command, "msr%d", &msr_num) == 1)
10783
goto next;
10784
10785
BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31);
10786
BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31);
10787
if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2)
10788
goto next;
10789
10790
if (*add_command == '/') {
10791
path = add_command;
10792
goto next;
10793
}
10794
10795
if (sscanf(add_command, "u%d", &width) == 1) {
10796
if ((width == 32) || (width == 64))
10797
goto next;
10798
width = 64;
10799
}
10800
if (!strncmp(add_command, "cpu", strlen("cpu"))) {
10801
scope = SCOPE_CPU;
10802
goto next;
10803
}
10804
if (!strncmp(add_command, "core", strlen("core"))) {
10805
scope = SCOPE_CORE;
10806
goto next;
10807
}
10808
if (!strncmp(add_command, "package", strlen("package"))) {
10809
scope = SCOPE_PACKAGE;
10810
goto next;
10811
}
10812
if (!strncmp(add_command, "cycles", strlen("cycles"))) {
10813
type = COUNTER_CYCLES;
10814
goto next;
10815
}
10816
if (!strncmp(add_command, "seconds", strlen("seconds"))) {
10817
type = COUNTER_SECONDS;
10818
goto next;
10819
}
10820
if (!strncmp(add_command, "usec", strlen("usec"))) {
10821
type = COUNTER_USEC;
10822
goto next;
10823
}
10824
if (!strncmp(add_command, "raw", strlen("raw"))) {
10825
format = FORMAT_RAW;
10826
goto next;
10827
}
10828
if (!strncmp(add_command, "average", strlen("average"))) {
10829
format = FORMAT_AVERAGE;
10830
goto next;
10831
}
10832
if (!strncmp(add_command, "delta", strlen("delta"))) {
10833
format = FORMAT_DELTA;
10834
goto next;
10835
}
10836
if (!strncmp(add_command, "percent", strlen("percent"))) {
10837
format = FORMAT_PERCENT;
10838
goto next;
10839
}
10840
10841
BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18);
10842
if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {
10843
char *eos;
10844
10845
eos = strchr(name_buffer, ',');
10846
if (eos)
10847
*eos = '\0';
10848
goto next;
10849
}
10850
10851
next:
10852
add_command = strchr(add_command, ',');
10853
if (add_command) {
10854
*add_command = '\0';
10855
add_command++;
10856
}
10857
10858
}
10859
if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) {
10860
fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n");
10861
fail++;
10862
}
10863
10864
/* Test for non-empty perf_device and perf_event */
10865
const bool is_perf_counter = perf_device[0] && perf_event[0];
10866
10867
/* generate default column header */
10868
if (*name_buffer == '\0') {
10869
if (is_perf_counter) {
10870
snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event);
10871
} else {
10872
if (width == 32)
10873
sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
10874
else
10875
sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
10876
}
10877
}
10878
10879
if (is_perf_counter) {
10880
if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format))
10881
fail++;
10882
} else {
10883
if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0))
10884
fail++;
10885
}
10886
10887
if (fail) {
10888
help();
10889
exit(1);
10890
}
10891
}
10892
10893
bool starts_with(const char *str, const char *prefix)
10894
{
10895
return strncmp(prefix, str, strlen(prefix)) == 0;
10896
}
10897
10898
int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq)
10899
{
10900
struct pmt_diriter_t pmt_iter;
10901
const struct dirent *dirname;
10902
struct stat stat, target_stat;
10903
int fd_telem_dir = -1;
10904
int fd_target_dir;
10905
unsigned int seq = 0;
10906
unsigned long guid, target_guid;
10907
int ret = -1;
10908
10909
fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY);
10910
if (fd_target_dir == -1) {
10911
return -1;
10912
}
10913
10914
if (fstat(fd_target_dir, &target_stat) == -1) {
10915
fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno));
10916
exit(1);
10917
}
10918
10919
if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) {
10920
fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno));
10921
exit(1);
10922
}
10923
10924
close(fd_target_dir);
10925
10926
pmt_diriter_init(&pmt_iter);
10927
10928
for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; dirname = pmt_diriter_next(&pmt_iter)) {
10929
10930
fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY);
10931
if (fd_telem_dir == -1)
10932
continue;
10933
10934
if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
10935
fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno));
10936
continue;
10937
}
10938
10939
if (fstat(fd_telem_dir, &stat) == -1) {
10940
fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, dirname->d_name, strerror(errno));
10941
continue;
10942
}
10943
10944
/*
10945
* If reached the same directory as target, exit the loop.
10946
* Seq has the correct value now.
10947
*/
10948
if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) {
10949
ret = 0;
10950
break;
10951
}
10952
10953
/*
10954
* If reached directory with the same guid,
10955
* but it's not the target directory yet,
10956
* increment seq and continue the search.
10957
*/
10958
if (guid == target_guid)
10959
++seq;
10960
10961
close(fd_telem_dir);
10962
fd_telem_dir = -1;
10963
}
10964
10965
pmt_diriter_remove(&pmt_iter);
10966
10967
if (fd_telem_dir != -1)
10968
close(fd_telem_dir);
10969
10970
if (!ret) {
10971
*out_guid = target_guid;
10972
*out_seq = seq;
10973
}
10974
10975
return ret;
10976
}
10977
10978
void parse_add_command_pmt(char *add_command)
10979
{
10980
char *name = NULL;
10981
char *type_name = NULL;
10982
char *format_name = NULL;
10983
char *direct_path = NULL;
10984
static const char direct_path_prefix[] = "path=";
10985
unsigned int offset;
10986
unsigned int lsb;
10987
unsigned int msb;
10988
unsigned int guid;
10989
unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */
10990
unsigned int domain_id;
10991
enum counter_scope scope = 0;
10992
enum pmt_datatype type = PMT_TYPE_RAW;
10993
enum counter_format format = FORMAT_RAW;
10994
bool has_offset = false;
10995
bool has_lsb = false;
10996
bool has_msb = false;
10997
bool has_format = true; /* Format has a default value. */
10998
bool has_guid = false;
10999
bool has_scope = false;
11000
bool has_type = true; /* Type has a default value. */
11001
11002
/* Consume the "pmt," prefix. */
11003
add_command = strchr(add_command, ',');
11004
if (!add_command) {
11005
help();
11006
exit(1);
11007
}
11008
++add_command;
11009
11010
while (add_command) {
11011
if (starts_with(add_command, "name=")) {
11012
name = add_command + strlen("name=");
11013
goto next;
11014
}
11015
11016
if (starts_with(add_command, "type=")) {
11017
type_name = add_command + strlen("type=");
11018
goto next;
11019
}
11020
11021
if (starts_with(add_command, "domain=")) {
11022
const size_t prefix_len = strlen("domain=");
11023
11024
if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) {
11025
scope = SCOPE_CPU;
11026
has_scope = true;
11027
} else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) {
11028
scope = SCOPE_CORE;
11029
has_scope = true;
11030
} else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) {
11031
scope = SCOPE_PACKAGE;
11032
has_scope = true;
11033
}
11034
11035
if (!has_scope) {
11036
printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", __func__);
11037
exit(1);
11038
}
11039
11040
goto next;
11041
}
11042
11043
if (starts_with(add_command, "format=")) {
11044
format_name = add_command + strlen("format=");
11045
goto next;
11046
}
11047
11048
if (sscanf(add_command, "offset=%u", &offset) == 1) {
11049
has_offset = true;
11050
goto next;
11051
}
11052
11053
if (sscanf(add_command, "lsb=%u", &lsb) == 1) {
11054
has_lsb = true;
11055
goto next;
11056
}
11057
11058
if (sscanf(add_command, "msb=%u", &msb) == 1) {
11059
has_msb = true;
11060
goto next;
11061
}
11062
11063
if (sscanf(add_command, "guid=%x", &guid) == 1) {
11064
has_guid = true;
11065
goto next;
11066
}
11067
11068
if (sscanf(add_command, "seq=%x", &seq) == 1)
11069
goto next;
11070
11071
if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) {
11072
direct_path = add_command + strlen(direct_path_prefix);
11073
goto next;
11074
}
11075
next:
11076
add_command = strchr(add_command, ',');
11077
if (add_command) {
11078
*add_command = '\0';
11079
add_command++;
11080
}
11081
}
11082
11083
if (!name) {
11084
printf("%s: missing %s\n", __func__, "name");
11085
exit(1);
11086
}
11087
11088
if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) {
11089
printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES);
11090
exit(1);
11091
}
11092
11093
if (format_name) {
11094
has_format = false;
11095
11096
if (strcmp("raw", format_name) == 0) {
11097
format = FORMAT_RAW;
11098
has_format = true;
11099
}
11100
11101
if (strcmp("average", format_name) == 0) {
11102
format = FORMAT_AVERAGE;
11103
has_format = true;
11104
}
11105
11106
if (strcmp("delta", format_name) == 0) {
11107
format = FORMAT_DELTA;
11108
has_format = true;
11109
}
11110
11111
if (!has_format) {
11112
fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", __func__, format_name);
11113
exit(1);
11114
}
11115
}
11116
11117
if (type_name) {
11118
has_type = false;
11119
11120
if (strcmp("raw", type_name) == 0) {
11121
type = PMT_TYPE_RAW;
11122
has_type = true;
11123
}
11124
11125
if (strcmp("txtal_time", type_name) == 0) {
11126
type = PMT_TYPE_XTAL_TIME;
11127
has_type = true;
11128
}
11129
11130
if (strcmp("tcore_clock", type_name) == 0) {
11131
type = PMT_TYPE_TCORE_CLOCK;
11132
has_type = true;
11133
}
11134
11135
if (!has_type) {
11136
printf("%s: invalid %s: %s\n", __func__, "type", type_name);
11137
exit(1);
11138
}
11139
}
11140
11141
if (!has_offset) {
11142
printf("%s : missing %s\n", __func__, "offset");
11143
exit(1);
11144
}
11145
11146
if (!has_lsb) {
11147
printf("%s: missing %s\n", __func__, "lsb");
11148
exit(1);
11149
}
11150
11151
if (!has_msb) {
11152
printf("%s: missing %s\n", __func__, "msb");
11153
exit(1);
11154
}
11155
11156
if (direct_path && has_guid) {
11157
printf("%s: path and guid+seq parameters are mutually exclusive\nnotice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path);
11158
exit(1);
11159
}
11160
11161
if (direct_path) {
11162
if (pmt_parse_from_path(direct_path, &guid, &seq)) {
11163
printf("%s: failed to parse PMT file from %s\n", __func__, direct_path);
11164
exit(1);
11165
}
11166
11167
/* GUID was just infered from the direct path. */
11168
has_guid = true;
11169
}
11170
11171
if (!has_guid) {
11172
printf("%s: missing %s\n", __func__, "guid or path");
11173
exit(1);
11174
}
11175
11176
if (!has_scope) {
11177
printf("%s: missing %s\n", __func__, "scope");
11178
exit(1);
11179
}
11180
11181
if (lsb > msb) {
11182
printf("%s: lsb > msb doesn't make sense\n", __func__);
11183
exit(1);
11184
}
11185
11186
pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
11187
}
11188
11189
void parse_add_command(char *add_command)
11190
{
11191
if (strncmp(add_command, "pmt", strlen("pmt")) == 0)
11192
return parse_add_command_pmt(add_command);
11193
return parse_add_command_msr(add_command);
11194
}
11195
11196
int is_deferred_add(char *name)
11197
{
11198
int i;
11199
11200
for (i = 0; i < deferred_add_index; ++i)
11201
if (!strcmp(name, deferred_add_names[i])) {
11202
deferred_add_consumed |= (1 << i);
11203
return 1;
11204
}
11205
return 0;
11206
}
11207
11208
int is_deferred_skip(char *name)
11209
{
11210
int i;
11211
11212
for (i = 0; i < deferred_skip_index; ++i)
11213
if (!strcmp(name, deferred_skip_names[i])) {
11214
deferred_skip_consumed |= (1 << i);
11215
return 1;
11216
}
11217
return 0;
11218
}
11219
11220
void verify_deferred_consumed(void)
11221
{
11222
int i;
11223
int fail = 0;
11224
11225
for (i = 0; i < deferred_add_index; ++i) {
11226
if (!(deferred_add_consumed & (1 << i))) {
11227
warnx("Counter '%s' can not be added.", deferred_add_names[i]);
11228
fail++;
11229
}
11230
}
11231
for (i = 0; i < deferred_skip_index; ++i) {
11232
if (!(deferred_skip_consumed & (1 << i))) {
11233
warnx("Counter '%s' can not be skipped.", deferred_skip_names[i]);
11234
fail++;
11235
}
11236
}
11237
if (fail)
11238
exit(-EINVAL);
11239
}
11240
11241
void probe_cpuidle_residency(void)
11242
{
11243
char path[64];
11244
char name_buf[16];
11245
FILE *input;
11246
int state;
11247
int min_state = 1024, max_state = 0;
11248
char *sp;
11249
11250
for (state = 10; state >= 0; --state) {
11251
11252
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", master_cpu, state);
11253
input = fopen(path, "r");
11254
if (input == NULL)
11255
continue;
11256
if (!fgets(name_buf, sizeof(name_buf), input))
11257
err(1, "%s: failed to read file", path);
11258
11259
/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
11260
sp = strchr(name_buf, '-');
11261
if (!sp)
11262
sp = strchrnul(name_buf, '\n');
11263
*sp = '%';
11264
*(sp + 1) = '\0';
11265
11266
remove_underbar(name_buf);
11267
11268
fclose(input);
11269
11270
sprintf(path, "cpuidle/state%d/time", state);
11271
11272
if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf))
11273
continue;
11274
11275
if (is_deferred_skip(name_buf))
11276
continue;
11277
11278
add_counter(0, path, name_buf, 32, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0);
11279
11280
if (state > max_state)
11281
max_state = state;
11282
if (state < min_state)
11283
min_state = state;
11284
}
11285
}
11286
11287
void probe_cpuidle_counts(void)
11288
{
11289
char path[64];
11290
char name_buf[16];
11291
FILE *input;
11292
int state;
11293
int min_state = 1024, max_state = 0;
11294
char *sp;
11295
11296
if (!DO_BIC(BIC_cpuidle))
11297
return;
11298
11299
for (state = 10; state >= 0; --state) {
11300
11301
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", master_cpu, state);
11302
input = fopen(path, "r");
11303
if (input == NULL)
11304
continue;
11305
if (!fgets(name_buf, sizeof(name_buf), input))
11306
err(1, "%s: failed to read file", path);
11307
fclose(input);
11308
11309
remove_underbar(name_buf);
11310
11311
if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf))
11312
continue;
11313
11314
if (is_deferred_skip(name_buf))
11315
continue;
11316
11317
/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
11318
sp = strchr(name_buf, '-');
11319
if (!sp)
11320
sp = strchrnul(name_buf, '\n');
11321
11322
/*
11323
* The 'below' sysfs file always contains 0 for the deepest state (largest index),
11324
* do not add it.
11325
*/
11326
if (state != max_state) {
11327
/*
11328
* Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for
11329
* the last state, so do not add it.
11330
*/
11331
11332
*sp = '+';
11333
*(sp + 1) = '\0';
11334
sprintf(path, "cpuidle/state%d/below", state);
11335
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
11336
}
11337
11338
*sp = '\0';
11339
sprintf(path, "cpuidle/state%d/usage", state);
11340
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
11341
11342
/*
11343
* The 'above' sysfs file always contains 0 for the shallowest state (smallest
11344
* index), do not add it.
11345
*/
11346
if (state != min_state) {
11347
*sp = '-';
11348
*(sp + 1) = '\0';
11349
sprintf(path, "cpuidle/state%d/above", state);
11350
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
11351
}
11352
}
11353
}
11354
11355
/*
11356
* parse cpuset with following syntax
11357
* 1,2,4..6,8-10 and set bits in cpu_subset
11358
*/
11359
void parse_cpu_command(char *optarg)
11360
{
11361
if (!strcmp(optarg, "core")) {
11362
if (cpu_subset)
11363
goto error;
11364
show_core_only++;
11365
return;
11366
}
11367
if (!strcmp(optarg, "package")) {
11368
if (cpu_subset)
11369
goto error;
11370
show_pkg_only++;
11371
return;
11372
}
11373
if (show_core_only || show_pkg_only)
11374
goto error;
11375
11376
cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
11377
if (cpu_subset == NULL)
11378
err(3, "CPU_ALLOC");
11379
cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
11380
11381
CPU_ZERO_S(cpu_subset_size, cpu_subset);
11382
11383
if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size))
11384
goto error;
11385
11386
return;
11387
11388
error:
11389
fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
11390
help();
11391
exit(-1);
11392
}
11393
11394
void cmdline(int argc, char **argv)
11395
{
11396
int opt;
11397
int option_index = 0;
11398
static struct option long_options[] = {
11399
{ "add", required_argument, 0, 'a' },
11400
{ "cpu", required_argument, 0, 'c' },
11401
{ "Dump", no_argument, 0, 'D' },
11402
{ "debug", no_argument, 0, 'd' }, /* internal, not documented */
11403
{ "enable", required_argument, 0, 'e' },
11404
{ "force", no_argument, 0, 'f' },
11405
{ "interval", required_argument, 0, 'i' },
11406
{ "IPC", no_argument, 0, 'I' },
11407
{ "num_iterations", required_argument, 0, 'n' },
11408
{ "header_iterations", required_argument, 0, 'N' },
11409
{ "help", no_argument, 0, 'h' },
11410
{ "hide", required_argument, 0, 'H' }, // meh, -h taken by --help
11411
{ "Joules", no_argument, 0, 'J' },
11412
{ "list", no_argument, 0, 'l' },
11413
{ "out", required_argument, 0, 'o' },
11414
{ "quiet", no_argument, 0, 'q' },
11415
{ "no-msr", no_argument, 0, 'M' },
11416
{ "no-perf", no_argument, 0, 'P' },
11417
{ "show", required_argument, 0, 's' },
11418
{ "Summary", no_argument, 0, 'S' },
11419
{ "TCC", required_argument, 0, 'T' },
11420
{ "version", no_argument, 0, 'v' },
11421
{ 0, 0, 0, 0 }
11422
};
11423
11424
progname = argv[0];
11425
11426
/*
11427
* Parse some options early, because they may make other options invalid,
11428
* like adding the MSR counter with --add and at the same time using --no-msr.
11429
*/
11430
while ((opt = getopt_long_only(argc, argv, "+:MP", long_options, &option_index)) != -1) {
11431
switch (opt) {
11432
case 'M':
11433
no_msr = 1;
11434
break;
11435
case 'P':
11436
no_perf = 1;
11437
break;
11438
default:
11439
break;
11440
}
11441
}
11442
optind = 0;
11443
11444
while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:N:o:qMST:v", long_options, &option_index)) != -1) {
11445
switch (opt) {
11446
case 'a':
11447
parse_add_command(optarg);
11448
break;
11449
case 'c':
11450
parse_cpu_command(optarg);
11451
break;
11452
case 'D':
11453
dump_only++;
11454
/*
11455
* Force the no_perf early to prevent using it as a source.
11456
* User asks for raw values, but perf returns them relative
11457
* to the opening of the file descriptor.
11458
*/
11459
no_perf = 1;
11460
break;
11461
case 'e':
11462
/* --enable specified counter, without clearning existing list */
11463
bic_lookup(&bic_enabled, optarg, SHOW_LIST);
11464
break;
11465
case 'f':
11466
force_load++;
11467
break;
11468
case 'd':
11469
debug++;
11470
bic_set_all(&bic_enabled);
11471
break;
11472
case 'H':
11473
/*
11474
* --hide: do not show those specified
11475
* multiple invocations simply clear more bits in enabled mask
11476
*/
11477
{
11478
cpu_set_t bic_group_hide;
11479
11480
BIC_INIT(&bic_group_hide);
11481
11482
bic_lookup(&bic_group_hide, optarg, HIDE_LIST);
11483
bic_clear_bits(&bic_enabled, &bic_group_hide);
11484
}
11485
break;
11486
case 'h':
11487
help();
11488
exit(1);
11489
case 'i':
11490
{
11491
double interval = strtod(optarg, NULL);
11492
11493
if (interval < 0.001) {
11494
fprintf(outf, "interval %f seconds is too small\n", interval);
11495
exit(2);
11496
}
11497
11498
interval_tv.tv_sec = interval_ts.tv_sec = interval;
11499
interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
11500
interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
11501
}
11502
break;
11503
case 'J':
11504
rapl_joules++;
11505
break;
11506
case 'l':
11507
bic_set_all(&bic_enabled);
11508
list_header_only++;
11509
quiet++;
11510
break;
11511
case 'o':
11512
outf = fopen_or_die(optarg, "w");
11513
break;
11514
case 'q':
11515
quiet = 1;
11516
break;
11517
case 'M':
11518
case 'P':
11519
/* Parsed earlier */
11520
break;
11521
case 'n':
11522
num_iterations = strtoul(optarg, NULL, 0);
11523
errno = 0;
11524
11525
if (errno || num_iterations == 0)
11526
errx(-1, "invalid iteration count: %s", optarg);
11527
break;
11528
case 'N':
11529
header_iterations = strtoul(optarg, NULL, 0);
11530
errno = 0;
11531
11532
if (errno || header_iterations == 0)
11533
errx(-1, "invalid header iteration count: %s", optarg);
11534
break;
11535
case 's':
11536
/*
11537
* --show: show only those specified
11538
* The 1st invocation will clear and replace the enabled mask
11539
* subsequent invocations can add to it.
11540
*/
11541
if (shown == 0)
11542
BIC_INIT(&bic_enabled);
11543
bic_lookup(&bic_enabled, optarg, SHOW_LIST);
11544
shown = 1;
11545
break;
11546
case 'S':
11547
summary_only++;
11548
break;
11549
case 'T':
11550
tj_max_override = atoi(optarg);
11551
break;
11552
case 'v':
11553
print_version();
11554
exit(0);
11555
break;
11556
default:
11557
help();
11558
exit(1);
11559
}
11560
}
11561
}
11562
11563
void set_rlimit(void)
11564
{
11565
struct rlimit limit;
11566
11567
if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
11568
err(1, "Failed to get rlimit");
11569
11570
if (limit.rlim_max < MAX_NOFILE)
11571
limit.rlim_max = MAX_NOFILE;
11572
if (limit.rlim_cur < MAX_NOFILE)
11573
limit.rlim_cur = MAX_NOFILE;
11574
11575
if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
11576
err(1, "Failed to set rlimit");
11577
}
11578
11579
int main(int argc, char **argv)
11580
{
11581
int fd, ret;
11582
11583
bic_groups_init();
11584
11585
fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY);
11586
if (fd < 0)
11587
goto skip_cgroup_setting;
11588
11589
ret = write(fd, "0\n", 2);
11590
if (ret == -1)
11591
perror("Can't update cgroup\n");
11592
11593
close(fd);
11594
11595
skip_cgroup_setting:
11596
outf = stderr;
11597
cmdline(argc, argv);
11598
11599
if (!quiet) {
11600
print_version();
11601
print_bootcmd();
11602
}
11603
11604
probe_cpuidle_residency();
11605
probe_cpuidle_counts();
11606
11607
verify_deferred_consumed();
11608
11609
if (!getuid())
11610
set_rlimit();
11611
11612
turbostat_init();
11613
11614
if (!no_msr)
11615
msr_sum_record();
11616
11617
/* dump counters and exit */
11618
if (dump_only)
11619
return get_and_dump_counters();
11620
11621
/* list header and exit */
11622
if (list_header_only) {
11623
print_header(",");
11624
flush_output_stdout();
11625
return 0;
11626
}
11627
11628
/*
11629
* if any params left, it must be a command to fork
11630
*/
11631
if (argc - optind)
11632
return fork_it(argv + optind);
11633
else
11634
turbostat_loop();
11635
11636
return 0;
11637
}
11638
11639