Path: blob/master/arch/x86/kernel/cpu/resctrl/intel_aet.c
121848 views
// SPDX-License-Identifier: GPL-2.0-only1/*2* Resource Director Technology(RDT)3* - Intel Application Energy Telemetry4*5* Copyright (C) 2025 Intel Corporation6*7* Author:8* Tony Luck <[email protected]>9*/1011#define pr_fmt(fmt) "resctrl: " fmt1213#include <linux/bits.h>14#include <linux/compiler_types.h>15#include <linux/container_of.h>16#include <linux/cpumask.h>17#include <linux/err.h>18#include <linux/errno.h>19#include <linux/gfp_types.h>20#include <linux/init.h>21#include <linux/intel_pmt_features.h>22#include <linux/intel_vsec.h>23#include <linux/io.h>24#include <linux/minmax.h>25#include <linux/printk.h>26#include <linux/rculist.h>27#include <linux/rcupdate.h>28#include <linux/resctrl.h>29#include <linux/resctrl_types.h>30#include <linux/slab.h>31#include <linux/stddef.h>32#include <linux/topology.h>33#include <linux/types.h>3435#include "internal.h"3637/**38* struct pmt_event - Telemetry event.39* @id: Resctrl event id.40* @idx: Counter index within each per-RMID block of counters.41* @bin_bits: Zero for integer valued events, else number bits in fraction42* part of fixed-point.43*/44struct pmt_event {45enum resctrl_event_id id;46unsigned int idx;47unsigned int bin_bits;48};4950#define EVT(_id, _idx, _bits) { .id = _id, .idx = _idx, .bin_bits = _bits }5152/**53* struct event_group - Events with the same feature type ("energy" or "perf") and GUID.54* @pfname: PMT feature name ("energy" or "perf") of this event group.55* Used by boot rdt= option.56* @pfg: Points to the aggregated telemetry space information57* returned by the intel_pmt_get_regions_by_feature()58* call to the INTEL_PMT_TELEMETRY driver that contains59* data for all telemetry regions of type @pfname.60* Valid if the system supports the event group,61* NULL otherwise.62* @force_off: True when "rdt" command line or architecture code disables63* this event group due to insufficient RMIDs.64* @force_on: True when "rdt" command line overrides disable of this65* event group.66* @guid: Unique number per XML description file.67* @num_rmid: Number of RMIDs supported by this group. May be68* adjusted downwards if enumeration from69* intel_pmt_get_regions_by_feature() indicates fewer70* RMIDs can be tracked simultaneously.71* @mmio_size: Number of bytes of MMIO registers for this group.72* @num_events: Number of events in this group.73* @evts: Array of event descriptors.74*/75struct event_group {76/* Data fields for additional structures to manage this group. */77const char *pfname;78struct pmt_feature_group *pfg;79bool force_off, force_on;8081/* Remaining fields initialized from XML file. */82u32 guid;83u32 num_rmid;84size_t mmio_size;85unsigned int num_events;86struct pmt_event evts[] __counted_by(num_events);87};8889#define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status) \90(((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64))9192/*93* Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml94*/95static struct event_group energy_0x26696143 = {96.pfname = "energy",97.guid = 0x26696143,98.num_rmid = 576,99.mmio_size = XML_MMIO_SIZE(576, 2, 3),100.num_events = 2,101.evts = {102EVT(PMT_EVENT_ENERGY, 0, 18),103EVT(PMT_EVENT_ACTIVITY, 1, 18),104}105};106107/*108* Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml109*/110static struct event_group perf_0x26557651 = {111.pfname = "perf",112.guid = 0x26557651,113.num_rmid = 576,114.mmio_size = XML_MMIO_SIZE(576, 7, 3),115.num_events = 7,116.evts = {117EVT(PMT_EVENT_STALLS_LLC_HIT, 0, 0),118EVT(PMT_EVENT_C1_RES, 1, 0),119EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, 2, 0),120EVT(PMT_EVENT_STALLS_LLC_MISS, 3, 0),121EVT(PMT_EVENT_AUTO_C6_RES, 4, 0),122EVT(PMT_EVENT_UNHALTED_REF_CYCLES, 5, 0),123EVT(PMT_EVENT_UOPS_RETIRED, 6, 0),124}125};126127static struct event_group *known_event_groups[] = {128&energy_0x26696143,129&perf_0x26557651,130};131132#define for_each_event_group(_peg) \133for (_peg = known_event_groups; \134_peg < &known_event_groups[ARRAY_SIZE(known_event_groups)]; \135_peg++)136137bool intel_handle_aet_option(bool force_off, char *tok)138{139struct event_group **peg;140bool ret = false;141u32 guid = 0;142char *name;143144if (!tok)145return false;146147name = strsep(&tok, ":");148if (tok && kstrtou32(tok, 16, &guid))149return false;150151for_each_event_group(peg) {152if (strcmp(name, (*peg)->pfname))153continue;154if (guid && (*peg)->guid != guid)155continue;156if (force_off)157(*peg)->force_off = true;158else159(*peg)->force_on = true;160ret = true;161}162163return ret;164}165166static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e)167{168if (tr->guid != e->guid)169return true;170if (tr->plat_info.package_id >= topology_max_packages()) {171pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id,172tr->guid);173return true;174}175if (tr->size != e->mmio_size) {176pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n",177tr->size, e->guid, e->mmio_size);178return true;179}180181return false;182}183184static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p)185{186bool usable_regions = false;187188for (int i = 0; i < p->count; i++) {189if (skip_telem_region(&p->regions[i], e)) {190/*191* Clear the address field of regions that did not pass the checks in192* skip_telem_region() so they will not be used by intel_aet_read_event().193* This is safe to do because intel_pmt_get_regions_by_feature() allocates194* a new pmt_feature_group structure to return to each caller and only makes195* use of the pmt_feature_group::kref field when intel_pmt_put_feature_group()196* returns the structure.197*/198p->regions[i].addr = NULL;199200continue;201}202usable_regions = true;203}204205return usable_regions;206}207208static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_feature_group *p)209{210struct telemetry_region *tr;211212for (int i = 0; i < p->count; i++) {213if (!p->regions[i].addr)214continue;215tr = &p->regions[i];216if (tr->num_rmids < e->num_rmid) {217e->force_off = true;218return false;219}220}221222return true;223}224225static bool enable_events(struct event_group *e, struct pmt_feature_group *p)226{227struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;228int skipped_events = 0;229230if (e->force_off)231return false;232233if (!group_has_usable_regions(e, p))234return false;235236/*237* Only enable event group with insufficient RMIDs if the user requested238* it from the kernel command line.239*/240if (!all_regions_have_sufficient_rmid(e, p) && !e->force_on) {241pr_info("%s %s:0x%x monitoring not enabled due to insufficient RMIDs\n",242r->name, e->pfname, e->guid);243return false;244}245246for (int i = 0; i < p->count; i++) {247if (!p->regions[i].addr)248continue;249/*250* e->num_rmid only adjusted lower if user (via rdt= kernel251* parameter) forces an event group with insufficient RMID252* to be enabled.253*/254e->num_rmid = min(e->num_rmid, p->regions[i].num_rmids);255}256257for (int j = 0; j < e->num_events; j++) {258if (!resctrl_enable_mon_event(e->evts[j].id, true,259e->evts[j].bin_bits, &e->evts[j]))260skipped_events++;261}262if (e->num_events == skipped_events) {263pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid);264return false;265}266267if (r->mon.num_rmid)268r->mon.num_rmid = min(r->mon.num_rmid, e->num_rmid);269else270r->mon.num_rmid = e->num_rmid;271272if (skipped_events)273pr_info("%s %s:0x%x monitoring detected (skipped %d events)\n", r->name,274e->pfname, e->guid, skipped_events);275else276pr_info("%s %s:0x%x monitoring detected\n", r->name, e->pfname, e->guid);277278return true;279}280281static enum pmt_feature_id lookup_pfid(const char *pfname)282{283if (!strcmp(pfname, "energy"))284return FEATURE_PER_RMID_ENERGY_TELEM;285else if (!strcmp(pfname, "perf"))286return FEATURE_PER_RMID_PERF_TELEM;287288pr_warn("Unknown PMT feature name '%s'\n", pfname);289290return FEATURE_INVALID;291}292293/*294* Request a copy of struct pmt_feature_group for each event group. If there is295* one, the returned structure has an array of telemetry_region structures,296* each element of the array describes one telemetry aggregator. The297* telemetry aggregators may have different GUIDs so obtain duplicate struct298* pmt_feature_group for event groups with same feature type but different299* GUID. Post-processing ensures an event group can only use the telemetry300* aggregators that match its GUID. An event group keeps a pointer to its301* struct pmt_feature_group to indicate that its events are successfully302* enabled.303*/304bool intel_aet_get_events(void)305{306struct pmt_feature_group *p;307enum pmt_feature_id pfid;308struct event_group **peg;309bool ret = false;310311for_each_event_group(peg) {312pfid = lookup_pfid((*peg)->pfname);313p = intel_pmt_get_regions_by_feature(pfid);314if (IS_ERR_OR_NULL(p))315continue;316if (enable_events(*peg, p)) {317(*peg)->pfg = p;318ret = true;319} else {320intel_pmt_put_feature_group(p);321}322}323324return ret;325}326327void __exit intel_aet_exit(void)328{329struct event_group **peg;330331for_each_event_group(peg) {332if ((*peg)->pfg) {333intel_pmt_put_feature_group((*peg)->pfg);334(*peg)->pfg = NULL;335}336}337}338339#define DATA_VALID BIT_ULL(63)340#define DATA_BITS GENMASK_ULL(62, 0)341342/*343* Read counter for an event on a domain (summing all aggregators on the344* domain). If an aggregator hasn't received any data for a specific RMID,345* the MMIO read indicates that data is not valid. Return success if at346* least one aggregator has valid data.347*/348int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val)349{350struct pmt_event *pevt = arch_priv;351struct event_group *e;352bool valid = false;353u64 total = 0;354u64 evtcount;355void *pevt0;356u32 idx;357358pevt0 = pevt - pevt->idx;359e = container_of(pevt0, struct event_group, evts);360idx = rmid * e->num_events;361idx += pevt->idx;362363if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {364pr_warn_once("MMIO index %u out of range\n", idx);365return -EIO;366}367368for (int i = 0; i < e->pfg->count; i++) {369if (!e->pfg->regions[i].addr)370continue;371if (e->pfg->regions[i].plat_info.package_id != domid)372continue;373evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64));374if (!(evtcount & DATA_VALID))375continue;376total += evtcount & DATA_BITS;377valid = true;378}379380if (valid)381*val = total;382383return valid ? 0 : -EINVAL;384}385386void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,387struct list_head *add_pos)388{389struct rdt_perf_pkg_mon_domain *d;390int err;391392d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));393if (!d)394return;395396d->hdr.id = id;397d->hdr.type = RESCTRL_MON_DOMAIN;398d->hdr.rid = RDT_RESOURCE_PERF_PKG;399cpumask_set_cpu(cpu, &d->hdr.cpu_mask);400list_add_tail_rcu(&d->hdr.list, add_pos);401402err = resctrl_online_mon_domain(r, &d->hdr);403if (err) {404list_del_rcu(&d->hdr.list);405synchronize_rcu();406kfree(d);407}408}409410411