Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/resctrl/intel_aet.c
121848 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Resource Director Technology(RDT)
4
* - Intel Application Energy Telemetry
5
*
6
* Copyright (C) 2025 Intel Corporation
7
*
8
* Author:
9
* Tony Luck <[email protected]>
10
*/
11
12
#define pr_fmt(fmt) "resctrl: " fmt
13
14
#include <linux/bits.h>
15
#include <linux/compiler_types.h>
16
#include <linux/container_of.h>
17
#include <linux/cpumask.h>
18
#include <linux/err.h>
19
#include <linux/errno.h>
20
#include <linux/gfp_types.h>
21
#include <linux/init.h>
22
#include <linux/intel_pmt_features.h>
23
#include <linux/intel_vsec.h>
24
#include <linux/io.h>
25
#include <linux/minmax.h>
26
#include <linux/printk.h>
27
#include <linux/rculist.h>
28
#include <linux/rcupdate.h>
29
#include <linux/resctrl.h>
30
#include <linux/resctrl_types.h>
31
#include <linux/slab.h>
32
#include <linux/stddef.h>
33
#include <linux/topology.h>
34
#include <linux/types.h>
35
36
#include "internal.h"
37
38
/**
39
* struct pmt_event - Telemetry event.
40
* @id: Resctrl event id.
41
* @idx: Counter index within each per-RMID block of counters.
42
* @bin_bits: Zero for integer valued events, else number bits in fraction
43
* part of fixed-point.
44
*/
45
struct pmt_event {
46
enum resctrl_event_id id;
47
unsigned int idx;
48
unsigned int bin_bits;
49
};
50
51
#define EVT(_id, _idx, _bits) { .id = _id, .idx = _idx, .bin_bits = _bits }
52
53
/**
54
* struct event_group - Events with the same feature type ("energy" or "perf") and GUID.
55
* @pfname: PMT feature name ("energy" or "perf") of this event group.
56
* Used by boot rdt= option.
57
* @pfg: Points to the aggregated telemetry space information
58
* returned by the intel_pmt_get_regions_by_feature()
59
* call to the INTEL_PMT_TELEMETRY driver that contains
60
* data for all telemetry regions of type @pfname.
61
* Valid if the system supports the event group,
62
* NULL otherwise.
63
* @force_off: True when "rdt" command line or architecture code disables
64
* this event group due to insufficient RMIDs.
65
* @force_on: True when "rdt" command line overrides disable of this
66
* event group.
67
* @guid: Unique number per XML description file.
68
* @num_rmid: Number of RMIDs supported by this group. May be
69
* adjusted downwards if enumeration from
70
* intel_pmt_get_regions_by_feature() indicates fewer
71
* RMIDs can be tracked simultaneously.
72
* @mmio_size: Number of bytes of MMIO registers for this group.
73
* @num_events: Number of events in this group.
74
* @evts: Array of event descriptors.
75
*/
76
struct event_group {
77
/* Data fields for additional structures to manage this group. */
78
const char *pfname;
79
struct pmt_feature_group *pfg;
80
bool force_off, force_on;
81
82
/* Remaining fields initialized from XML file. */
83
u32 guid;
84
u32 num_rmid;
85
size_t mmio_size;
86
unsigned int num_events;
87
struct pmt_event evts[] __counted_by(num_events);
88
};
89
90
#define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status) \
91
(((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64))
92
93
/*
94
* Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml
95
*/
96
static struct event_group energy_0x26696143 = {
97
.pfname = "energy",
98
.guid = 0x26696143,
99
.num_rmid = 576,
100
.mmio_size = XML_MMIO_SIZE(576, 2, 3),
101
.num_events = 2,
102
.evts = {
103
EVT(PMT_EVENT_ENERGY, 0, 18),
104
EVT(PMT_EVENT_ACTIVITY, 1, 18),
105
}
106
};
107
108
/*
109
* Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml
110
*/
111
static struct event_group perf_0x26557651 = {
112
.pfname = "perf",
113
.guid = 0x26557651,
114
.num_rmid = 576,
115
.mmio_size = XML_MMIO_SIZE(576, 7, 3),
116
.num_events = 7,
117
.evts = {
118
EVT(PMT_EVENT_STALLS_LLC_HIT, 0, 0),
119
EVT(PMT_EVENT_C1_RES, 1, 0),
120
EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, 2, 0),
121
EVT(PMT_EVENT_STALLS_LLC_MISS, 3, 0),
122
EVT(PMT_EVENT_AUTO_C6_RES, 4, 0),
123
EVT(PMT_EVENT_UNHALTED_REF_CYCLES, 5, 0),
124
EVT(PMT_EVENT_UOPS_RETIRED, 6, 0),
125
}
126
};
127
128
static struct event_group *known_event_groups[] = {
129
&energy_0x26696143,
130
&perf_0x26557651,
131
};
132
133
#define for_each_event_group(_peg) \
134
for (_peg = known_event_groups; \
135
_peg < &known_event_groups[ARRAY_SIZE(known_event_groups)]; \
136
_peg++)
137
138
bool intel_handle_aet_option(bool force_off, char *tok)
139
{
140
struct event_group **peg;
141
bool ret = false;
142
u32 guid = 0;
143
char *name;
144
145
if (!tok)
146
return false;
147
148
name = strsep(&tok, ":");
149
if (tok && kstrtou32(tok, 16, &guid))
150
return false;
151
152
for_each_event_group(peg) {
153
if (strcmp(name, (*peg)->pfname))
154
continue;
155
if (guid && (*peg)->guid != guid)
156
continue;
157
if (force_off)
158
(*peg)->force_off = true;
159
else
160
(*peg)->force_on = true;
161
ret = true;
162
}
163
164
return ret;
165
}
166
167
static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e)
168
{
169
if (tr->guid != e->guid)
170
return true;
171
if (tr->plat_info.package_id >= topology_max_packages()) {
172
pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id,
173
tr->guid);
174
return true;
175
}
176
if (tr->size != e->mmio_size) {
177
pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n",
178
tr->size, e->guid, e->mmio_size);
179
return true;
180
}
181
182
return false;
183
}
184
185
static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p)
186
{
187
bool usable_regions = false;
188
189
for (int i = 0; i < p->count; i++) {
190
if (skip_telem_region(&p->regions[i], e)) {
191
/*
192
* Clear the address field of regions that did not pass the checks in
193
* skip_telem_region() so they will not be used by intel_aet_read_event().
194
* This is safe to do because intel_pmt_get_regions_by_feature() allocates
195
* a new pmt_feature_group structure to return to each caller and only makes
196
* use of the pmt_feature_group::kref field when intel_pmt_put_feature_group()
197
* returns the structure.
198
*/
199
p->regions[i].addr = NULL;
200
201
continue;
202
}
203
usable_regions = true;
204
}
205
206
return usable_regions;
207
}
208
209
static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_feature_group *p)
210
{
211
struct telemetry_region *tr;
212
213
for (int i = 0; i < p->count; i++) {
214
if (!p->regions[i].addr)
215
continue;
216
tr = &p->regions[i];
217
if (tr->num_rmids < e->num_rmid) {
218
e->force_off = true;
219
return false;
220
}
221
}
222
223
return true;
224
}
225
226
static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
227
{
228
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;
229
int skipped_events = 0;
230
231
if (e->force_off)
232
return false;
233
234
if (!group_has_usable_regions(e, p))
235
return false;
236
237
/*
238
* Only enable event group with insufficient RMIDs if the user requested
239
* it from the kernel command line.
240
*/
241
if (!all_regions_have_sufficient_rmid(e, p) && !e->force_on) {
242
pr_info("%s %s:0x%x monitoring not enabled due to insufficient RMIDs\n",
243
r->name, e->pfname, e->guid);
244
return false;
245
}
246
247
for (int i = 0; i < p->count; i++) {
248
if (!p->regions[i].addr)
249
continue;
250
/*
251
* e->num_rmid only adjusted lower if user (via rdt= kernel
252
* parameter) forces an event group with insufficient RMID
253
* to be enabled.
254
*/
255
e->num_rmid = min(e->num_rmid, p->regions[i].num_rmids);
256
}
257
258
for (int j = 0; j < e->num_events; j++) {
259
if (!resctrl_enable_mon_event(e->evts[j].id, true,
260
e->evts[j].bin_bits, &e->evts[j]))
261
skipped_events++;
262
}
263
if (e->num_events == skipped_events) {
264
pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid);
265
return false;
266
}
267
268
if (r->mon.num_rmid)
269
r->mon.num_rmid = min(r->mon.num_rmid, e->num_rmid);
270
else
271
r->mon.num_rmid = e->num_rmid;
272
273
if (skipped_events)
274
pr_info("%s %s:0x%x monitoring detected (skipped %d events)\n", r->name,
275
e->pfname, e->guid, skipped_events);
276
else
277
pr_info("%s %s:0x%x monitoring detected\n", r->name, e->pfname, e->guid);
278
279
return true;
280
}
281
282
static enum pmt_feature_id lookup_pfid(const char *pfname)
283
{
284
if (!strcmp(pfname, "energy"))
285
return FEATURE_PER_RMID_ENERGY_TELEM;
286
else if (!strcmp(pfname, "perf"))
287
return FEATURE_PER_RMID_PERF_TELEM;
288
289
pr_warn("Unknown PMT feature name '%s'\n", pfname);
290
291
return FEATURE_INVALID;
292
}
293
294
/*
295
* Request a copy of struct pmt_feature_group for each event group. If there is
296
* one, the returned structure has an array of telemetry_region structures,
297
* each element of the array describes one telemetry aggregator. The
298
* telemetry aggregators may have different GUIDs so obtain duplicate struct
299
* pmt_feature_group for event groups with same feature type but different
300
* GUID. Post-processing ensures an event group can only use the telemetry
301
* aggregators that match its GUID. An event group keeps a pointer to its
302
* struct pmt_feature_group to indicate that its events are successfully
303
* enabled.
304
*/
305
bool intel_aet_get_events(void)
306
{
307
struct pmt_feature_group *p;
308
enum pmt_feature_id pfid;
309
struct event_group **peg;
310
bool ret = false;
311
312
for_each_event_group(peg) {
313
pfid = lookup_pfid((*peg)->pfname);
314
p = intel_pmt_get_regions_by_feature(pfid);
315
if (IS_ERR_OR_NULL(p))
316
continue;
317
if (enable_events(*peg, p)) {
318
(*peg)->pfg = p;
319
ret = true;
320
} else {
321
intel_pmt_put_feature_group(p);
322
}
323
}
324
325
return ret;
326
}
327
328
void __exit intel_aet_exit(void)
329
{
330
struct event_group **peg;
331
332
for_each_event_group(peg) {
333
if ((*peg)->pfg) {
334
intel_pmt_put_feature_group((*peg)->pfg);
335
(*peg)->pfg = NULL;
336
}
337
}
338
}
339
340
#define DATA_VALID BIT_ULL(63)
341
#define DATA_BITS GENMASK_ULL(62, 0)
342
343
/*
344
* Read counter for an event on a domain (summing all aggregators on the
345
* domain). If an aggregator hasn't received any data for a specific RMID,
346
* the MMIO read indicates that data is not valid. Return success if at
347
* least one aggregator has valid data.
348
*/
349
int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val)
350
{
351
struct pmt_event *pevt = arch_priv;
352
struct event_group *e;
353
bool valid = false;
354
u64 total = 0;
355
u64 evtcount;
356
void *pevt0;
357
u32 idx;
358
359
pevt0 = pevt - pevt->idx;
360
e = container_of(pevt0, struct event_group, evts);
361
idx = rmid * e->num_events;
362
idx += pevt->idx;
363
364
if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {
365
pr_warn_once("MMIO index %u out of range\n", idx);
366
return -EIO;
367
}
368
369
for (int i = 0; i < e->pfg->count; i++) {
370
if (!e->pfg->regions[i].addr)
371
continue;
372
if (e->pfg->regions[i].plat_info.package_id != domid)
373
continue;
374
evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64));
375
if (!(evtcount & DATA_VALID))
376
continue;
377
total += evtcount & DATA_BITS;
378
valid = true;
379
}
380
381
if (valid)
382
*val = total;
383
384
return valid ? 0 : -EINVAL;
385
}
386
387
void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
388
struct list_head *add_pos)
389
{
390
struct rdt_perf_pkg_mon_domain *d;
391
int err;
392
393
d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
394
if (!d)
395
return;
396
397
d->hdr.id = id;
398
d->hdr.type = RESCTRL_MON_DOMAIN;
399
d->hdr.rid = RDT_RESOURCE_PERF_PKG;
400
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
401
list_add_tail_rcu(&d->hdr.list, add_pos);
402
403
err = resctrl_online_mon_domain(r, &d->hdr);
404
if (err) {
405
list_del_rcu(&d->hdr.list);
406
synchronize_rcu();
407
kfree(d);
408
}
409
}
410
411