Path: blob/master/arch/x86/kernel/cpu/perf_event.c
10699 views
/*1* Performance events x86 architecture code2*3* Copyright (C) 2008 Thomas Gleixner <[email protected]>4* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar5* Copyright (C) 2009 Jaswinder Singh Rajput6* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter7* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <[email protected]>8* Copyright (C) 2009 Intel Corporation, <[email protected]>9* Copyright (C) 2009 Google, Inc., Stephane Eranian10*11* For licencing details see kernel-base/COPYING12*/1314#include <linux/perf_event.h>15#include <linux/capability.h>16#include <linux/notifier.h>17#include <linux/hardirq.h>18#include <linux/kprobes.h>19#include <linux/module.h>20#include <linux/kdebug.h>21#include <linux/sched.h>22#include <linux/uaccess.h>23#include <linux/slab.h>24#include <linux/highmem.h>25#include <linux/cpu.h>26#include <linux/bitops.h>2728#include <asm/apic.h>29#include <asm/stacktrace.h>30#include <asm/nmi.h>31#include <asm/compat.h>32#include <asm/smp.h>33#include <asm/alternative.h>3435#if 036#undef wrmsrl37#define wrmsrl(msr, val) \38do { \39trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\40(unsigned long)(val)); \41native_write_msr((msr), (u32)((u64)(val)), \42(u32)((u64)(val) >> 32)); \43} while (0)44#endif4546/*47* best effort, GUP based copy_from_user() that assumes IRQ or NMI context48*/49static unsigned long50copy_from_user_nmi(void *to, const void __user *from, unsigned long n)51{52unsigned long offset, addr = (unsigned long)from;53unsigned long size, len = 0;54struct page *page;55void *map;56int ret;5758do {59ret = __get_user_pages_fast(addr, 1, 0, &page);60if (!ret)61break;6263offset = addr & (PAGE_SIZE - 1);64size = min(PAGE_SIZE - offset, n - len);6566map = kmap_atomic(page);67memcpy(to, map+offset, size);68kunmap_atomic(map);69put_page(page);7071len += size;72to += size;73addr += size;7475} while (len < n);7677return len;78}7980struct event_constraint {81union {82unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];83u64 idxmsk64;84};85u64 code;86u64 cmask;87int weight;88};8990struct amd_nb {91int nb_id; /* NorthBridge id */92int refcnt; /* reference count */93struct perf_event *owners[X86_PMC_IDX_MAX];94struct event_constraint event_constraints[X86_PMC_IDX_MAX];95};9697struct intel_percore;9899#define MAX_LBR_ENTRIES 16100101struct cpu_hw_events {102/*103* Generic x86 PMC bits104*/105struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */106unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];107unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];108int enabled;109110int n_events;111int n_added;112int n_txn;113int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */114u64 tags[X86_PMC_IDX_MAX];115struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */116117unsigned int group_flag;118119/*120* Intel DebugStore bits121*/122struct debug_store *ds;123u64 pebs_enabled;124125/*126* Intel LBR bits127*/128int lbr_users;129void *lbr_context;130struct perf_branch_stack lbr_stack;131struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];132133/*134* Intel percore register state.135* Coordinate shared resources between HT threads.136*/137int percore_used; /* Used by this CPU? */138struct intel_percore *per_core;139140/*141* AMD specific bits142*/143struct amd_nb *amd_nb;144};145146#define __EVENT_CONSTRAINT(c, n, m, w) {\147{ .idxmsk64 = (n) }, \148.code = (c), \149.cmask = (m), \150.weight = (w), \151}152153#define EVENT_CONSTRAINT(c, n, m) \154__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))155156/*157* Constraint on the Event code.158*/159#define INTEL_EVENT_CONSTRAINT(c, n) \160EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)161162/*163* Constraint on the Event code + UMask + fixed-mask164*165* filter mask to validate fixed counter events.166* the following filters disqualify for fixed counters:167* - inv168* - edge169* - cnt-mask170* The other filters are supported by fixed counters.171* The any-thread option is supported starting with v3.172*/173#define FIXED_EVENT_CONSTRAINT(c, n) \174EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)175176/*177* Constraint on the Event code + UMask178*/179#define INTEL_UEVENT_CONSTRAINT(c, n) \180EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)181182#define EVENT_CONSTRAINT_END \183EVENT_CONSTRAINT(0, 0, 0)184185#define for_each_event_constraint(e, c) \186for ((e) = (c); (e)->weight; (e)++)187188/*189* Extra registers for specific events.190* Some events need large masks and require external MSRs.191* Define a mapping to these extra registers.192*/193struct extra_reg {194unsigned int event;195unsigned int msr;196u64 config_mask;197u64 valid_mask;198};199200#define EVENT_EXTRA_REG(e, ms, m, vm) { \201.event = (e), \202.msr = (ms), \203.config_mask = (m), \204.valid_mask = (vm), \205}206#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \207EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)208#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)209210union perf_capabilities {211struct {212u64 lbr_format : 6;213u64 pebs_trap : 1;214u64 pebs_arch_reg : 1;215u64 pebs_format : 4;216u64 smm_freeze : 1;217};218u64 capabilities;219};220221/*222* struct x86_pmu - generic x86 pmu223*/224struct x86_pmu {225/*226* Generic x86 PMC bits227*/228const char *name;229int version;230int (*handle_irq)(struct pt_regs *);231void (*disable_all)(void);232void (*enable_all)(int added);233void (*enable)(struct perf_event *);234void (*disable)(struct perf_event *);235int (*hw_config)(struct perf_event *event);236int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);237unsigned eventsel;238unsigned perfctr;239u64 (*event_map)(int);240int max_events;241int num_counters;242int num_counters_fixed;243int cntval_bits;244u64 cntval_mask;245int apic;246u64 max_period;247struct event_constraint *248(*get_event_constraints)(struct cpu_hw_events *cpuc,249struct perf_event *event);250251void (*put_event_constraints)(struct cpu_hw_events *cpuc,252struct perf_event *event);253struct event_constraint *event_constraints;254struct event_constraint *percore_constraints;255void (*quirks)(void);256int perfctr_second_write;257258int (*cpu_prepare)(int cpu);259void (*cpu_starting)(int cpu);260void (*cpu_dying)(int cpu);261void (*cpu_dead)(int cpu);262263/*264* Intel Arch Perfmon v2+265*/266u64 intel_ctrl;267union perf_capabilities intel_cap;268269/*270* Intel DebugStore bits271*/272int bts, pebs;273int bts_active, pebs_active;274int pebs_record_size;275void (*drain_pebs)(struct pt_regs *regs);276struct event_constraint *pebs_constraints;277278/*279* Intel LBR280*/281unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */282int lbr_nr; /* hardware stack size */283284/*285* Extra registers for events286*/287struct extra_reg *extra_regs;288};289290static struct x86_pmu x86_pmu __read_mostly;291292static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {293.enabled = 1,294};295296static int x86_perf_event_set_period(struct perf_event *event);297298/*299* Generalized hw caching related hw_event table, filled300* in on a per model basis. A value of 0 means301* 'not supported', -1 means 'hw_event makes no sense on302* this CPU', any other value means the raw hw_event303* ID.304*/305306#define C(x) PERF_COUNT_HW_CACHE_##x307308static u64 __read_mostly hw_cache_event_ids309[PERF_COUNT_HW_CACHE_MAX]310[PERF_COUNT_HW_CACHE_OP_MAX]311[PERF_COUNT_HW_CACHE_RESULT_MAX];312static u64 __read_mostly hw_cache_extra_regs313[PERF_COUNT_HW_CACHE_MAX]314[PERF_COUNT_HW_CACHE_OP_MAX]315[PERF_COUNT_HW_CACHE_RESULT_MAX];316317/*318* Propagate event elapsed time into the generic event.319* Can only be executed on the CPU where the event is active.320* Returns the delta events processed.321*/322static u64323x86_perf_event_update(struct perf_event *event)324{325struct hw_perf_event *hwc = &event->hw;326int shift = 64 - x86_pmu.cntval_bits;327u64 prev_raw_count, new_raw_count;328int idx = hwc->idx;329s64 delta;330331if (idx == X86_PMC_IDX_FIXED_BTS)332return 0;333334/*335* Careful: an NMI might modify the previous event value.336*337* Our tactic to handle this is to first atomically read and338* exchange a new raw count - then add that new-prev delta339* count to the generic event atomically:340*/341again:342prev_raw_count = local64_read(&hwc->prev_count);343rdmsrl(hwc->event_base, new_raw_count);344345if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,346new_raw_count) != prev_raw_count)347goto again;348349/*350* Now we have the new raw value and have updated the prev351* timestamp already. We can now calculate the elapsed delta352* (event-)time and add that to the generic event.353*354* Careful, not all hw sign-extends above the physical width355* of the count.356*/357delta = (new_raw_count << shift) - (prev_raw_count << shift);358delta >>= shift;359360local64_add(delta, &event->count);361local64_sub(delta, &hwc->period_left);362363return new_raw_count;364}365366static inline int x86_pmu_addr_offset(int index)367{368int offset;369370/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */371alternative_io(ASM_NOP2,372"shll $1, %%eax",373X86_FEATURE_PERFCTR_CORE,374"=a" (offset),375"a" (index));376377return offset;378}379380static inline unsigned int x86_pmu_config_addr(int index)381{382return x86_pmu.eventsel + x86_pmu_addr_offset(index);383}384385static inline unsigned int x86_pmu_event_addr(int index)386{387return x86_pmu.perfctr + x86_pmu_addr_offset(index);388}389390/*391* Find and validate any extra registers to set up.392*/393static int x86_pmu_extra_regs(u64 config, struct perf_event *event)394{395struct extra_reg *er;396397event->hw.extra_reg = 0;398event->hw.extra_config = 0;399400if (!x86_pmu.extra_regs)401return 0;402403for (er = x86_pmu.extra_regs; er->msr; er++) {404if (er->event != (config & er->config_mask))405continue;406if (event->attr.config1 & ~er->valid_mask)407return -EINVAL;408event->hw.extra_reg = er->msr;409event->hw.extra_config = event->attr.config1;410break;411}412return 0;413}414415static atomic_t active_events;416static DEFINE_MUTEX(pmc_reserve_mutex);417418#ifdef CONFIG_X86_LOCAL_APIC419420static bool reserve_pmc_hardware(void)421{422int i;423424for (i = 0; i < x86_pmu.num_counters; i++) {425if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))426goto perfctr_fail;427}428429for (i = 0; i < x86_pmu.num_counters; i++) {430if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))431goto eventsel_fail;432}433434return true;435436eventsel_fail:437for (i--; i >= 0; i--)438release_evntsel_nmi(x86_pmu_config_addr(i));439440i = x86_pmu.num_counters;441442perfctr_fail:443for (i--; i >= 0; i--)444release_perfctr_nmi(x86_pmu_event_addr(i));445446return false;447}448449static void release_pmc_hardware(void)450{451int i;452453for (i = 0; i < x86_pmu.num_counters; i++) {454release_perfctr_nmi(x86_pmu_event_addr(i));455release_evntsel_nmi(x86_pmu_config_addr(i));456}457}458459#else460461static bool reserve_pmc_hardware(void) { return true; }462static void release_pmc_hardware(void) {}463464#endif465466static bool check_hw_exists(void)467{468u64 val, val_new = 0;469int i, reg, ret = 0;470471/*472* Check to see if the BIOS enabled any of the counters, if so473* complain and bail.474*/475for (i = 0; i < x86_pmu.num_counters; i++) {476reg = x86_pmu_config_addr(i);477ret = rdmsrl_safe(reg, &val);478if (ret)479goto msr_fail;480if (val & ARCH_PERFMON_EVENTSEL_ENABLE)481goto bios_fail;482}483484if (x86_pmu.num_counters_fixed) {485reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;486ret = rdmsrl_safe(reg, &val);487if (ret)488goto msr_fail;489for (i = 0; i < x86_pmu.num_counters_fixed; i++) {490if (val & (0x03 << i*4))491goto bios_fail;492}493}494495/*496* Now write a value and read it back to see if it matches,497* this is needed to detect certain hardware emulators (qemu/kvm)498* that don't trap on the MSR access and always return 0s.499*/500val = 0xabcdUL;501ret = checking_wrmsrl(x86_pmu_event_addr(0), val);502ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);503if (ret || val != val_new)504goto msr_fail;505506return true;507508bios_fail:509/*510* We still allow the PMU driver to operate:511*/512printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");513printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);514515return true;516517msr_fail:518printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");519520return false;521}522523static void reserve_ds_buffers(void);524static void release_ds_buffers(void);525526static void hw_perf_event_destroy(struct perf_event *event)527{528if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {529release_pmc_hardware();530release_ds_buffers();531mutex_unlock(&pmc_reserve_mutex);532}533}534535static inline int x86_pmu_initialized(void)536{537return x86_pmu.handle_irq != NULL;538}539540static inline int541set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)542{543struct perf_event_attr *attr = &event->attr;544unsigned int cache_type, cache_op, cache_result;545u64 config, val;546547config = attr->config;548549cache_type = (config >> 0) & 0xff;550if (cache_type >= PERF_COUNT_HW_CACHE_MAX)551return -EINVAL;552553cache_op = (config >> 8) & 0xff;554if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)555return -EINVAL;556557cache_result = (config >> 16) & 0xff;558if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)559return -EINVAL;560561val = hw_cache_event_ids[cache_type][cache_op][cache_result];562563if (val == 0)564return -ENOENT;565566if (val == -1)567return -EINVAL;568569hwc->config |= val;570attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];571return x86_pmu_extra_regs(val, event);572}573574static int x86_setup_perfctr(struct perf_event *event)575{576struct perf_event_attr *attr = &event->attr;577struct hw_perf_event *hwc = &event->hw;578u64 config;579580if (!is_sampling_event(event)) {581hwc->sample_period = x86_pmu.max_period;582hwc->last_period = hwc->sample_period;583local64_set(&hwc->period_left, hwc->sample_period);584} else {585/*586* If we have a PMU initialized but no APIC587* interrupts, we cannot sample hardware588* events (user-space has to fall back and589* sample via a hrtimer based software event):590*/591if (!x86_pmu.apic)592return -EOPNOTSUPP;593}594595/*596* Do not allow config1 (extended registers) to propagate,597* there's no sane user-space generalization yet:598*/599if (attr->type == PERF_TYPE_RAW)600return 0;601602if (attr->type == PERF_TYPE_HW_CACHE)603return set_ext_hw_attr(hwc, event);604605if (attr->config >= x86_pmu.max_events)606return -EINVAL;607608/*609* The generic map:610*/611config = x86_pmu.event_map(attr->config);612613if (config == 0)614return -ENOENT;615616if (config == -1LL)617return -EINVAL;618619/*620* Branch tracing:621*/622if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&623!attr->freq && hwc->sample_period == 1) {624/* BTS is not supported by this architecture. */625if (!x86_pmu.bts_active)626return -EOPNOTSUPP;627628/* BTS is currently only allowed for user-mode. */629if (!attr->exclude_kernel)630return -EOPNOTSUPP;631}632633hwc->config |= config;634635return 0;636}637638static int x86_pmu_hw_config(struct perf_event *event)639{640if (event->attr.precise_ip) {641int precise = 0;642643/* Support for constant skid */644if (x86_pmu.pebs_active) {645precise++;646647/* Support for IP fixup */648if (x86_pmu.lbr_nr)649precise++;650}651652if (event->attr.precise_ip > precise)653return -EOPNOTSUPP;654}655656/*657* Generate PMC IRQs:658* (keep 'enabled' bit clear for now)659*/660event->hw.config = ARCH_PERFMON_EVENTSEL_INT;661662/*663* Count user and OS events unless requested not to664*/665if (!event->attr.exclude_user)666event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;667if (!event->attr.exclude_kernel)668event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;669670if (event->attr.type == PERF_TYPE_RAW)671event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;672673return x86_setup_perfctr(event);674}675676/*677* Setup the hardware configuration for a given attr_type678*/679static int __x86_pmu_event_init(struct perf_event *event)680{681int err;682683if (!x86_pmu_initialized())684return -ENODEV;685686err = 0;687if (!atomic_inc_not_zero(&active_events)) {688mutex_lock(&pmc_reserve_mutex);689if (atomic_read(&active_events) == 0) {690if (!reserve_pmc_hardware())691err = -EBUSY;692else693reserve_ds_buffers();694}695if (!err)696atomic_inc(&active_events);697mutex_unlock(&pmc_reserve_mutex);698}699if (err)700return err;701702event->destroy = hw_perf_event_destroy;703704event->hw.idx = -1;705event->hw.last_cpu = -1;706event->hw.last_tag = ~0ULL;707708return x86_pmu.hw_config(event);709}710711static void x86_pmu_disable_all(void)712{713struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);714int idx;715716for (idx = 0; idx < x86_pmu.num_counters; idx++) {717u64 val;718719if (!test_bit(idx, cpuc->active_mask))720continue;721rdmsrl(x86_pmu_config_addr(idx), val);722if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))723continue;724val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;725wrmsrl(x86_pmu_config_addr(idx), val);726}727}728729static void x86_pmu_disable(struct pmu *pmu)730{731struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);732733if (!x86_pmu_initialized())734return;735736if (!cpuc->enabled)737return;738739cpuc->n_added = 0;740cpuc->enabled = 0;741barrier();742743x86_pmu.disable_all();744}745746static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,747u64 enable_mask)748{749if (hwc->extra_reg)750wrmsrl(hwc->extra_reg, hwc->extra_config);751wrmsrl(hwc->config_base, hwc->config | enable_mask);752}753754static void x86_pmu_enable_all(int added)755{756struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);757int idx;758759for (idx = 0; idx < x86_pmu.num_counters; idx++) {760struct hw_perf_event *hwc = &cpuc->events[idx]->hw;761762if (!test_bit(idx, cpuc->active_mask))763continue;764765__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);766}767}768769static struct pmu pmu;770771static inline int is_x86_event(struct perf_event *event)772{773return event->pmu == &pmu;774}775776static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)777{778struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];779unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];780int i, j, w, wmax, num = 0;781struct hw_perf_event *hwc;782783bitmap_zero(used_mask, X86_PMC_IDX_MAX);784785for (i = 0; i < n; i++) {786c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);787constraints[i] = c;788}789790/*791* fastpath, try to reuse previous register792*/793for (i = 0; i < n; i++) {794hwc = &cpuc->event_list[i]->hw;795c = constraints[i];796797/* never assigned */798if (hwc->idx == -1)799break;800801/* constraint still honored */802if (!test_bit(hwc->idx, c->idxmsk))803break;804805/* not already used */806if (test_bit(hwc->idx, used_mask))807break;808809__set_bit(hwc->idx, used_mask);810if (assign)811assign[i] = hwc->idx;812}813if (i == n)814goto done;815816/*817* begin slow path818*/819820bitmap_zero(used_mask, X86_PMC_IDX_MAX);821822/*823* weight = number of possible counters824*825* 1 = most constrained, only works on one counter826* wmax = least constrained, works on any counter827*828* assign events to counters starting with most829* constrained events.830*/831wmax = x86_pmu.num_counters;832833/*834* when fixed event counters are present,835* wmax is incremented by 1 to account836* for one more choice837*/838if (x86_pmu.num_counters_fixed)839wmax++;840841for (w = 1, num = n; num && w <= wmax; w++) {842/* for each event */843for (i = 0; num && i < n; i++) {844c = constraints[i];845hwc = &cpuc->event_list[i]->hw;846847if (c->weight != w)848continue;849850for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {851if (!test_bit(j, used_mask))852break;853}854855if (j == X86_PMC_IDX_MAX)856break;857858__set_bit(j, used_mask);859860if (assign)861assign[i] = j;862num--;863}864}865done:866/*867* scheduling failed or is just a simulation,868* free resources if necessary869*/870if (!assign || num) {871for (i = 0; i < n; i++) {872if (x86_pmu.put_event_constraints)873x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);874}875}876return num ? -ENOSPC : 0;877}878879/*880* dogrp: true if must collect siblings events (group)881* returns total number of events and error code882*/883static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)884{885struct perf_event *event;886int n, max_count;887888max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;889890/* current number of events already accepted */891n = cpuc->n_events;892893if (is_x86_event(leader)) {894if (n >= max_count)895return -ENOSPC;896cpuc->event_list[n] = leader;897n++;898}899if (!dogrp)900return n;901902list_for_each_entry(event, &leader->sibling_list, group_entry) {903if (!is_x86_event(event) ||904event->state <= PERF_EVENT_STATE_OFF)905continue;906907if (n >= max_count)908return -ENOSPC;909910cpuc->event_list[n] = event;911n++;912}913return n;914}915916static inline void x86_assign_hw_event(struct perf_event *event,917struct cpu_hw_events *cpuc, int i)918{919struct hw_perf_event *hwc = &event->hw;920921hwc->idx = cpuc->assign[i];922hwc->last_cpu = smp_processor_id();923hwc->last_tag = ++cpuc->tags[i];924925if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {926hwc->config_base = 0;927hwc->event_base = 0;928} else if (hwc->idx >= X86_PMC_IDX_FIXED) {929hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;930hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);931} else {932hwc->config_base = x86_pmu_config_addr(hwc->idx);933hwc->event_base = x86_pmu_event_addr(hwc->idx);934}935}936937static inline int match_prev_assignment(struct hw_perf_event *hwc,938struct cpu_hw_events *cpuc,939int i)940{941return hwc->idx == cpuc->assign[i] &&942hwc->last_cpu == smp_processor_id() &&943hwc->last_tag == cpuc->tags[i];944}945946static void x86_pmu_start(struct perf_event *event, int flags);947static void x86_pmu_stop(struct perf_event *event, int flags);948949static void x86_pmu_enable(struct pmu *pmu)950{951struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);952struct perf_event *event;953struct hw_perf_event *hwc;954int i, added = cpuc->n_added;955956if (!x86_pmu_initialized())957return;958959if (cpuc->enabled)960return;961962if (cpuc->n_added) {963int n_running = cpuc->n_events - cpuc->n_added;964/*965* apply assignment obtained either from966* hw_perf_group_sched_in() or x86_pmu_enable()967*968* step1: save events moving to new counters969* step2: reprogram moved events into new counters970*/971for (i = 0; i < n_running; i++) {972event = cpuc->event_list[i];973hwc = &event->hw;974975/*976* we can avoid reprogramming counter if:977* - assigned same counter as last time978* - running on same CPU as last time979* - no other event has used the counter since980*/981if (hwc->idx == -1 ||982match_prev_assignment(hwc, cpuc, i))983continue;984985/*986* Ensure we don't accidentally enable a stopped987* counter simply because we rescheduled.988*/989if (hwc->state & PERF_HES_STOPPED)990hwc->state |= PERF_HES_ARCH;991992x86_pmu_stop(event, PERF_EF_UPDATE);993}994995for (i = 0; i < cpuc->n_events; i++) {996event = cpuc->event_list[i];997hwc = &event->hw;998999if (!match_prev_assignment(hwc, cpuc, i))1000x86_assign_hw_event(event, cpuc, i);1001else if (i < n_running)1002continue;10031004if (hwc->state & PERF_HES_ARCH)1005continue;10061007x86_pmu_start(event, PERF_EF_RELOAD);1008}1009cpuc->n_added = 0;1010perf_events_lapic_init();1011}10121013cpuc->enabled = 1;1014barrier();10151016x86_pmu.enable_all(added);1017}10181019static inline void x86_pmu_disable_event(struct perf_event *event)1020{1021struct hw_perf_event *hwc = &event->hw;10221023wrmsrl(hwc->config_base, hwc->config);1024}10251026static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);10271028/*1029* Set the next IRQ period, based on the hwc->period_left value.1030* To be called with the event disabled in hw:1031*/1032static int1033x86_perf_event_set_period(struct perf_event *event)1034{1035struct hw_perf_event *hwc = &event->hw;1036s64 left = local64_read(&hwc->period_left);1037s64 period = hwc->sample_period;1038int ret = 0, idx = hwc->idx;10391040if (idx == X86_PMC_IDX_FIXED_BTS)1041return 0;10421043/*1044* If we are way outside a reasonable range then just skip forward:1045*/1046if (unlikely(left <= -period)) {1047left = period;1048local64_set(&hwc->period_left, left);1049hwc->last_period = period;1050ret = 1;1051}10521053if (unlikely(left <= 0)) {1054left += period;1055local64_set(&hwc->period_left, left);1056hwc->last_period = period;1057ret = 1;1058}1059/*1060* Quirk: certain CPUs dont like it if just 1 hw_event is left:1061*/1062if (unlikely(left < 2))1063left = 2;10641065if (left > x86_pmu.max_period)1066left = x86_pmu.max_period;10671068per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;10691070/*1071* The hw event starts counting from this event offset,1072* mark it to be able to extra future deltas:1073*/1074local64_set(&hwc->prev_count, (u64)-left);10751076wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);10771078/*1079* Due to erratum on certan cpu we need1080* a second write to be sure the register1081* is updated properly1082*/1083if (x86_pmu.perfctr_second_write) {1084wrmsrl(hwc->event_base,1085(u64)(-left) & x86_pmu.cntval_mask);1086}10871088perf_event_update_userpage(event);10891090return ret;1091}10921093static void x86_pmu_enable_event(struct perf_event *event)1094{1095if (__this_cpu_read(cpu_hw_events.enabled))1096__x86_pmu_enable_event(&event->hw,1097ARCH_PERFMON_EVENTSEL_ENABLE);1098}10991100/*1101* Add a single event to the PMU.1102*1103* The event is added to the group of enabled events1104* but only if it can be scehduled with existing events.1105*/1106static int x86_pmu_add(struct perf_event *event, int flags)1107{1108struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);1109struct hw_perf_event *hwc;1110int assign[X86_PMC_IDX_MAX];1111int n, n0, ret;11121113hwc = &event->hw;11141115perf_pmu_disable(event->pmu);1116n0 = cpuc->n_events;1117ret = n = collect_events(cpuc, event, false);1118if (ret < 0)1119goto out;11201121hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;1122if (!(flags & PERF_EF_START))1123hwc->state |= PERF_HES_ARCH;11241125/*1126* If group events scheduling transaction was started,1127* skip the schedulability test here, it will be performed1128* at commit time (->commit_txn) as a whole1129*/1130if (cpuc->group_flag & PERF_EVENT_TXN)1131goto done_collect;11321133ret = x86_pmu.schedule_events(cpuc, n, assign);1134if (ret)1135goto out;1136/*1137* copy new assignment, now we know it is possible1138* will be used by hw_perf_enable()1139*/1140memcpy(cpuc->assign, assign, n*sizeof(int));11411142done_collect:1143cpuc->n_events = n;1144cpuc->n_added += n - n0;1145cpuc->n_txn += n - n0;11461147ret = 0;1148out:1149perf_pmu_enable(event->pmu);1150return ret;1151}11521153static void x86_pmu_start(struct perf_event *event, int flags)1154{1155struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);1156int idx = event->hw.idx;11571158if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))1159return;11601161if (WARN_ON_ONCE(idx == -1))1162return;11631164if (flags & PERF_EF_RELOAD) {1165WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));1166x86_perf_event_set_period(event);1167}11681169event->hw.state = 0;11701171cpuc->events[idx] = event;1172__set_bit(idx, cpuc->active_mask);1173__set_bit(idx, cpuc->running);1174x86_pmu.enable(event);1175perf_event_update_userpage(event);1176}11771178void perf_event_print_debug(void)1179{1180u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;1181u64 pebs;1182struct cpu_hw_events *cpuc;1183unsigned long flags;1184int cpu, idx;11851186if (!x86_pmu.num_counters)1187return;11881189local_irq_save(flags);11901191cpu = smp_processor_id();1192cpuc = &per_cpu(cpu_hw_events, cpu);11931194if (x86_pmu.version >= 2) {1195rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);1196rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);1197rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);1198rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);1199rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);12001201pr_info("\n");1202pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);1203pr_info("CPU#%d: status: %016llx\n", cpu, status);1204pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);1205pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);1206pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);1207}1208pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);12091210for (idx = 0; idx < x86_pmu.num_counters; idx++) {1211rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);1212rdmsrl(x86_pmu_event_addr(idx), pmc_count);12131214prev_left = per_cpu(pmc_prev_left[idx], cpu);12151216pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",1217cpu, idx, pmc_ctrl);1218pr_info("CPU#%d: gen-PMC%d count: %016llx\n",1219cpu, idx, pmc_count);1220pr_info("CPU#%d: gen-PMC%d left: %016llx\n",1221cpu, idx, prev_left);1222}1223for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {1224rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);12251226pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",1227cpu, idx, pmc_count);1228}1229local_irq_restore(flags);1230}12311232static void x86_pmu_stop(struct perf_event *event, int flags)1233{1234struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);1235struct hw_perf_event *hwc = &event->hw;12361237if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {1238x86_pmu.disable(event);1239cpuc->events[hwc->idx] = NULL;1240WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);1241hwc->state |= PERF_HES_STOPPED;1242}12431244if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {1245/*1246* Drain the remaining delta count out of a event1247* that we are disabling:1248*/1249x86_perf_event_update(event);1250hwc->state |= PERF_HES_UPTODATE;1251}1252}12531254static void x86_pmu_del(struct perf_event *event, int flags)1255{1256struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);1257int i;12581259/*1260* If we're called during a txn, we don't need to do anything.1261* The events never got scheduled and ->cancel_txn will truncate1262* the event_list.1263*/1264if (cpuc->group_flag & PERF_EVENT_TXN)1265return;12661267x86_pmu_stop(event, PERF_EF_UPDATE);12681269for (i = 0; i < cpuc->n_events; i++) {1270if (event == cpuc->event_list[i]) {12711272if (x86_pmu.put_event_constraints)1273x86_pmu.put_event_constraints(cpuc, event);12741275while (++i < cpuc->n_events)1276cpuc->event_list[i-1] = cpuc->event_list[i];12771278--cpuc->n_events;1279break;1280}1281}1282perf_event_update_userpage(event);1283}12841285static int x86_pmu_handle_irq(struct pt_regs *regs)1286{1287struct perf_sample_data data;1288struct cpu_hw_events *cpuc;1289struct perf_event *event;1290int idx, handled = 0;1291u64 val;12921293perf_sample_data_init(&data, 0);12941295cpuc = &__get_cpu_var(cpu_hw_events);12961297/*1298* Some chipsets need to unmask the LVTPC in a particular spot1299* inside the nmi handler. As a result, the unmasking was pushed1300* into all the nmi handlers.1301*1302* This generic handler doesn't seem to have any issues where the1303* unmasking occurs so it was left at the top.1304*/1305apic_write(APIC_LVTPC, APIC_DM_NMI);13061307for (idx = 0; idx < x86_pmu.num_counters; idx++) {1308if (!test_bit(idx, cpuc->active_mask)) {1309/*1310* Though we deactivated the counter some cpus1311* might still deliver spurious interrupts still1312* in flight. Catch them:1313*/1314if (__test_and_clear_bit(idx, cpuc->running))1315handled++;1316continue;1317}13181319event = cpuc->events[idx];13201321val = x86_perf_event_update(event);1322if (val & (1ULL << (x86_pmu.cntval_bits - 1)))1323continue;13241325/*1326* event overflow1327*/1328handled++;1329data.period = event->hw.last_period;13301331if (!x86_perf_event_set_period(event))1332continue;13331334if (perf_event_overflow(event, 1, &data, regs))1335x86_pmu_stop(event, 0);1336}13371338if (handled)1339inc_irq_stat(apic_perf_irqs);13401341return handled;1342}13431344void perf_events_lapic_init(void)1345{1346if (!x86_pmu.apic || !x86_pmu_initialized())1347return;13481349/*1350* Always use NMI for PMU1351*/1352apic_write(APIC_LVTPC, APIC_DM_NMI);1353}13541355struct pmu_nmi_state {1356unsigned int marked;1357int handled;1358};13591360static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);13611362static int __kprobes1363perf_event_nmi_handler(struct notifier_block *self,1364unsigned long cmd, void *__args)1365{1366struct die_args *args = __args;1367unsigned int this_nmi;1368int handled;13691370if (!atomic_read(&active_events))1371return NOTIFY_DONE;13721373switch (cmd) {1374case DIE_NMI:1375break;1376case DIE_NMIUNKNOWN:1377this_nmi = percpu_read(irq_stat.__nmi_count);1378if (this_nmi != __this_cpu_read(pmu_nmi.marked))1379/* let the kernel handle the unknown nmi */1380return NOTIFY_DONE;1381/*1382* This one is a PMU back-to-back nmi. Two events1383* trigger 'simultaneously' raising two back-to-back1384* NMIs. If the first NMI handles both, the latter1385* will be empty and daze the CPU. So, we drop it to1386* avoid false-positive 'unknown nmi' messages.1387*/1388return NOTIFY_STOP;1389default:1390return NOTIFY_DONE;1391}13921393handled = x86_pmu.handle_irq(args->regs);1394if (!handled)1395return NOTIFY_DONE;13961397this_nmi = percpu_read(irq_stat.__nmi_count);1398if ((handled > 1) ||1399/* the next nmi could be a back-to-back nmi */1400((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&1401(__this_cpu_read(pmu_nmi.handled) > 1))) {1402/*1403* We could have two subsequent back-to-back nmis: The1404* first handles more than one counter, the 2nd1405* handles only one counter and the 3rd handles no1406* counter.1407*1408* This is the 2nd nmi because the previous was1409* handling more than one counter. We will mark the1410* next (3rd) and then drop it if unhandled.1411*/1412__this_cpu_write(pmu_nmi.marked, this_nmi + 1);1413__this_cpu_write(pmu_nmi.handled, handled);1414}14151416return NOTIFY_STOP;1417}14181419static __read_mostly struct notifier_block perf_event_nmi_notifier = {1420.notifier_call = perf_event_nmi_handler,1421.next = NULL,1422.priority = NMI_LOCAL_LOW_PRIOR,1423};14241425static struct event_constraint unconstrained;1426static struct event_constraint emptyconstraint;14271428static struct event_constraint *1429x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)1430{1431struct event_constraint *c;14321433if (x86_pmu.event_constraints) {1434for_each_event_constraint(c, x86_pmu.event_constraints) {1435if ((event->hw.config & c->cmask) == c->code)1436return c;1437}1438}14391440return &unconstrained;1441}14421443#include "perf_event_amd.c"1444#include "perf_event_p6.c"1445#include "perf_event_p4.c"1446#include "perf_event_intel_lbr.c"1447#include "perf_event_intel_ds.c"1448#include "perf_event_intel.c"14491450static int __cpuinit1451x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)1452{1453unsigned int cpu = (long)hcpu;1454int ret = NOTIFY_OK;14551456switch (action & ~CPU_TASKS_FROZEN) {1457case CPU_UP_PREPARE:1458if (x86_pmu.cpu_prepare)1459ret = x86_pmu.cpu_prepare(cpu);1460break;14611462case CPU_STARTING:1463if (x86_pmu.cpu_starting)1464x86_pmu.cpu_starting(cpu);1465break;14661467case CPU_DYING:1468if (x86_pmu.cpu_dying)1469x86_pmu.cpu_dying(cpu);1470break;14711472case CPU_UP_CANCELED:1473case CPU_DEAD:1474if (x86_pmu.cpu_dead)1475x86_pmu.cpu_dead(cpu);1476break;14771478default:1479break;1480}14811482return ret;1483}14841485static void __init pmu_check_apic(void)1486{1487if (cpu_has_apic)1488return;14891490x86_pmu.apic = 0;1491pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");1492pr_info("no hardware sampling interrupt available.\n");1493}14941495static int __init init_hw_perf_events(void)1496{1497struct event_constraint *c;1498int err;14991500pr_info("Performance Events: ");15011502switch (boot_cpu_data.x86_vendor) {1503case X86_VENDOR_INTEL:1504err = intel_pmu_init();1505break;1506case X86_VENDOR_AMD:1507err = amd_pmu_init();1508break;1509default:1510return 0;1511}1512if (err != 0) {1513pr_cont("no PMU driver, software events only.\n");1514return 0;1515}15161517pmu_check_apic();15181519/* sanity check that the hardware exists or is emulated */1520if (!check_hw_exists())1521return 0;15221523pr_cont("%s PMU driver.\n", x86_pmu.name);15241525if (x86_pmu.quirks)1526x86_pmu.quirks();15271528if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {1529WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",1530x86_pmu.num_counters, X86_PMC_MAX_GENERIC);1531x86_pmu.num_counters = X86_PMC_MAX_GENERIC;1532}1533x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;15341535if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {1536WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",1537x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);1538x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;1539}15401541x86_pmu.intel_ctrl |=1542((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;15431544perf_events_lapic_init();1545register_die_notifier(&perf_event_nmi_notifier);15461547unconstrained = (struct event_constraint)1548__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,15490, x86_pmu.num_counters);15501551if (x86_pmu.event_constraints) {1552for_each_event_constraint(c, x86_pmu.event_constraints) {1553if (c->cmask != X86_RAW_EVENT_MASK)1554continue;15551556c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;1557c->weight += x86_pmu.num_counters;1558}1559}15601561pr_info("... version: %d\n", x86_pmu.version);1562pr_info("... bit width: %d\n", x86_pmu.cntval_bits);1563pr_info("... generic registers: %d\n", x86_pmu.num_counters);1564pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);1565pr_info("... max period: %016Lx\n", x86_pmu.max_period);1566pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);1567pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);15681569perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);1570perf_cpu_notifier(x86_pmu_notifier);15711572return 0;1573}1574early_initcall(init_hw_perf_events);15751576static inline void x86_pmu_read(struct perf_event *event)1577{1578x86_perf_event_update(event);1579}15801581/*1582* Start group events scheduling transaction1583* Set the flag to make pmu::enable() not perform the1584* schedulability test, it will be performed at commit time1585*/1586static void x86_pmu_start_txn(struct pmu *pmu)1587{1588perf_pmu_disable(pmu);1589__this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);1590__this_cpu_write(cpu_hw_events.n_txn, 0);1591}15921593/*1594* Stop group events scheduling transaction1595* Clear the flag and pmu::enable() will perform the1596* schedulability test.1597*/1598static void x86_pmu_cancel_txn(struct pmu *pmu)1599{1600__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);1601/*1602* Truncate the collected events.1603*/1604__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));1605__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));1606perf_pmu_enable(pmu);1607}16081609/*1610* Commit group events scheduling transaction1611* Perform the group schedulability test as a whole1612* Return 0 if success1613*/1614static int x86_pmu_commit_txn(struct pmu *pmu)1615{1616struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);1617int assign[X86_PMC_IDX_MAX];1618int n, ret;16191620n = cpuc->n_events;16211622if (!x86_pmu_initialized())1623return -EAGAIN;16241625ret = x86_pmu.schedule_events(cpuc, n, assign);1626if (ret)1627return ret;16281629/*1630* copy new assignment, now we know it is possible1631* will be used by hw_perf_enable()1632*/1633memcpy(cpuc->assign, assign, n*sizeof(int));16341635cpuc->group_flag &= ~PERF_EVENT_TXN;1636perf_pmu_enable(pmu);1637return 0;1638}16391640/*1641* validate that we can schedule this event1642*/1643static int validate_event(struct perf_event *event)1644{1645struct cpu_hw_events *fake_cpuc;1646struct event_constraint *c;1647int ret = 0;16481649fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);1650if (!fake_cpuc)1651return -ENOMEM;16521653c = x86_pmu.get_event_constraints(fake_cpuc, event);16541655if (!c || !c->weight)1656ret = -ENOSPC;16571658if (x86_pmu.put_event_constraints)1659x86_pmu.put_event_constraints(fake_cpuc, event);16601661kfree(fake_cpuc);16621663return ret;1664}16651666/*1667* validate a single event group1668*1669* validation include:1670* - check events are compatible which each other1671* - events do not compete for the same counter1672* - number of events <= number of counters1673*1674* validation ensures the group can be loaded onto the1675* PMU if it was the only group available.1676*/1677static int validate_group(struct perf_event *event)1678{1679struct perf_event *leader = event->group_leader;1680struct cpu_hw_events *fake_cpuc;1681int ret, n;16821683ret = -ENOMEM;1684fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);1685if (!fake_cpuc)1686goto out;16871688/*1689* the event is not yet connected with its1690* siblings therefore we must first collect1691* existing siblings, then add the new event1692* before we can simulate the scheduling1693*/1694ret = -ENOSPC;1695n = collect_events(fake_cpuc, leader, true);1696if (n < 0)1697goto out_free;16981699fake_cpuc->n_events = n;1700n = collect_events(fake_cpuc, event, false);1701if (n < 0)1702goto out_free;17031704fake_cpuc->n_events = n;17051706ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);17071708out_free:1709kfree(fake_cpuc);1710out:1711return ret;1712}17131714static int x86_pmu_event_init(struct perf_event *event)1715{1716struct pmu *tmp;1717int err;17181719switch (event->attr.type) {1720case PERF_TYPE_RAW:1721case PERF_TYPE_HARDWARE:1722case PERF_TYPE_HW_CACHE:1723break;17241725default:1726return -ENOENT;1727}17281729err = __x86_pmu_event_init(event);1730if (!err) {1731/*1732* we temporarily connect event to its pmu1733* such that validate_group() can classify1734* it as an x86 event using is_x86_event()1735*/1736tmp = event->pmu;1737event->pmu = &pmu;17381739if (event->group_leader != event)1740err = validate_group(event);1741else1742err = validate_event(event);17431744event->pmu = tmp;1745}1746if (err) {1747if (event->destroy)1748event->destroy(event);1749}17501751return err;1752}17531754static struct pmu pmu = {1755.pmu_enable = x86_pmu_enable,1756.pmu_disable = x86_pmu_disable,17571758.event_init = x86_pmu_event_init,17591760.add = x86_pmu_add,1761.del = x86_pmu_del,1762.start = x86_pmu_start,1763.stop = x86_pmu_stop,1764.read = x86_pmu_read,17651766.start_txn = x86_pmu_start_txn,1767.cancel_txn = x86_pmu_cancel_txn,1768.commit_txn = x86_pmu_commit_txn,1769};17701771/*1772* callchain support1773*/17741775static int backtrace_stack(void *data, char *name)1776{1777return 0;1778}17791780static void backtrace_address(void *data, unsigned long addr, int reliable)1781{1782struct perf_callchain_entry *entry = data;17831784perf_callchain_store(entry, addr);1785}17861787static const struct stacktrace_ops backtrace_ops = {1788.stack = backtrace_stack,1789.address = backtrace_address,1790.walk_stack = print_context_stack_bp,1791};17921793void1794perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)1795{1796if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {1797/* TODO: We don't support guest os callchain now */1798return;1799}18001801perf_callchain_store(entry, regs->ip);18021803dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);1804}18051806#ifdef CONFIG_COMPAT1807static inline int1808perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)1809{1810/* 32-bit process in 64-bit kernel. */1811struct stack_frame_ia32 frame;1812const void __user *fp;18131814if (!test_thread_flag(TIF_IA32))1815return 0;18161817fp = compat_ptr(regs->bp);1818while (entry->nr < PERF_MAX_STACK_DEPTH) {1819unsigned long bytes;1820frame.next_frame = 0;1821frame.return_address = 0;18221823bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));1824if (bytes != sizeof(frame))1825break;18261827if (fp < compat_ptr(regs->sp))1828break;18291830perf_callchain_store(entry, frame.return_address);1831fp = compat_ptr(frame.next_frame);1832}1833return 1;1834}1835#else1836static inline int1837perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)1838{1839return 0;1840}1841#endif18421843void1844perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)1845{1846struct stack_frame frame;1847const void __user *fp;18481849if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {1850/* TODO: We don't support guest os callchain now */1851return;1852}18531854fp = (void __user *)regs->bp;18551856perf_callchain_store(entry, regs->ip);18571858if (perf_callchain_user32(regs, entry))1859return;18601861while (entry->nr < PERF_MAX_STACK_DEPTH) {1862unsigned long bytes;1863frame.next_frame = NULL;1864frame.return_address = 0;18651866bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));1867if (bytes != sizeof(frame))1868break;18691870if ((unsigned long)fp < regs->sp)1871break;18721873perf_callchain_store(entry, frame.return_address);1874fp = frame.next_frame;1875}1876}18771878unsigned long perf_instruction_pointer(struct pt_regs *regs)1879{1880unsigned long ip;18811882if (perf_guest_cbs && perf_guest_cbs->is_in_guest())1883ip = perf_guest_cbs->get_guest_ip();1884else1885ip = instruction_pointer(regs);18861887return ip;1888}18891890unsigned long perf_misc_flags(struct pt_regs *regs)1891{1892int misc = 0;18931894if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {1895if (perf_guest_cbs->is_user_mode())1896misc |= PERF_RECORD_MISC_GUEST_USER;1897else1898misc |= PERF_RECORD_MISC_GUEST_KERNEL;1899} else {1900if (user_mode(regs))1901misc |= PERF_RECORD_MISC_USER;1902else1903misc |= PERF_RECORD_MISC_KERNEL;1904}19051906if (regs->flags & PERF_EFLAGS_EXACT)1907misc |= PERF_RECORD_MISC_EXACT_IP;19081909return misc;1910}191119121913