Path: blob/master/arch/x86/oprofile/op_model_amd.c
10817 views
/*1* @file op_model_amd.c2* athlon / K7 / K8 / Family 10h model-specific MSR operations3*4* @remark Copyright 2002-2009 OProfile authors5* @remark Read the file COPYING6*7* @author John Levon8* @author Philippe Elie9* @author Graydon Hoare10* @author Robert Richter <[email protected]>11* @author Barry Kasindorf <[email protected]>12* @author Jason Yeh <[email protected]>13* @author Suravee Suthikulpanit <[email protected]>14*/1516#include <linux/oprofile.h>17#include <linux/device.h>18#include <linux/pci.h>19#include <linux/percpu.h>2021#include <asm/ptrace.h>22#include <asm/msr.h>23#include <asm/nmi.h>24#include <asm/apic.h>25#include <asm/processor.h>26#include <asm/cpufeature.h>2728#include "op_x86_model.h"29#include "op_counter.h"3031#define NUM_COUNTERS 432#define NUM_COUNTERS_F15H 633#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX34#define NUM_VIRT_COUNTERS 3235#else36#define NUM_VIRT_COUNTERS 037#endif3839#define OP_EVENT_MASK 0x0FFF40#define OP_CTR_OVERFLOW (1ULL<<31)4142#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))4344static int num_counters;45static unsigned long reset_value[OP_MAX_COUNTER];4647#define IBS_FETCH_SIZE 648#define IBS_OP_SIZE 124950static u32 ibs_caps;5152struct ibs_config {53unsigned long op_enabled;54unsigned long fetch_enabled;55unsigned long max_cnt_fetch;56unsigned long max_cnt_op;57unsigned long rand_en;58unsigned long dispatched_ops;59unsigned long branch_target;60};6162struct ibs_state {63u64 ibs_op_ctl;64int branch_target;65unsigned long sample_size;66};6768static struct ibs_config ibs_config;69static struct ibs_state ibs_state;7071/*72* IBS cpuid feature detection73*/7475#define IBS_CPUID_FEATURES 0x8000001b7677/*78* Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but79* bit 0 is used to indicate the existence of IBS.80*/81#define IBS_CAPS_AVAIL (1U<<0)82#define IBS_CAPS_FETCHSAM (1U<<1)83#define IBS_CAPS_OPSAM (1U<<2)84#define IBS_CAPS_RDWROPCNT (1U<<3)85#define IBS_CAPS_OPCNT (1U<<4)86#define IBS_CAPS_BRNTRGT (1U<<5)87#define IBS_CAPS_OPCNTEXT (1U<<6)8889#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \90| IBS_CAPS_FETCHSAM \91| IBS_CAPS_OPSAM)9293/*94* IBS APIC setup95*/96#define IBSCTL 0x1cc97#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)98#define IBSCTL_LVT_OFFSET_MASK 0x0F99100/*101* IBS randomization macros102*/103#define IBS_RANDOM_BITS 12104#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)105#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))106107static u32 get_ibs_caps(void)108{109u32 ibs_caps;110unsigned int max_level;111112if (!boot_cpu_has(X86_FEATURE_IBS))113return 0;114115/* check IBS cpuid feature flags */116max_level = cpuid_eax(0x80000000);117if (max_level < IBS_CPUID_FEATURES)118return IBS_CAPS_DEFAULT;119120ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);121if (!(ibs_caps & IBS_CAPS_AVAIL))122/* cpuid flags not valid */123return IBS_CAPS_DEFAULT;124125return ibs_caps;126}127128/*129* 16-bit Linear Feedback Shift Register (LFSR)130*131* 16 14 13 11132* Feedback polynomial = X + X + X + X + 1133*/134static unsigned int lfsr_random(void)135{136static unsigned int lfsr_value = 0xF00D;137unsigned int bit;138139/* Compute next bit to shift in */140bit = ((lfsr_value >> 0) ^141(lfsr_value >> 2) ^142(lfsr_value >> 3) ^143(lfsr_value >> 5)) & 0x0001;144145/* Advance to next register value */146lfsr_value = (lfsr_value >> 1) | (bit << 15);147148return lfsr_value;149}150151/*152* IBS software randomization153*154* The IBS periodic op counter is randomized in software. The lower 12155* bits of the 20 bit counter are randomized. IbsOpCurCnt is156* initialized with a 12 bit random value.157*/158static inline u64 op_amd_randomize_ibs_op(u64 val)159{160unsigned int random = lfsr_random();161162if (!(ibs_caps & IBS_CAPS_RDWROPCNT))163/*164* Work around if the hw can not write to IbsOpCurCnt165*166* Randomize the lower 8 bits of the 16 bit167* IbsOpMaxCnt [15:0] value in the range of -128 to168* +127 by adding/subtracting an offset to the169* maximum count (IbsOpMaxCnt).170*171* To avoid over or underflows and protect upper bits172* starting at bit 16, the initial value for173* IbsOpMaxCnt must fit in the range from 0x0081 to174* 0xff80.175*/176val += (s8)(random >> 4);177else178val |= (u64)(random & IBS_RANDOM_MASK) << 32;179180return val;181}182183static inline void184op_amd_handle_ibs(struct pt_regs * const regs,185struct op_msrs const * const msrs)186{187u64 val, ctl;188struct op_entry entry;189190if (!ibs_caps)191return;192193if (ibs_config.fetch_enabled) {194rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);195if (ctl & IBS_FETCH_VAL) {196rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);197oprofile_write_reserve(&entry, regs, val,198IBS_FETCH_CODE, IBS_FETCH_SIZE);199oprofile_add_data64(&entry, val);200oprofile_add_data64(&entry, ctl);201rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);202oprofile_add_data64(&entry, val);203oprofile_write_commit(&entry);204205/* reenable the IRQ */206ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);207ctl |= IBS_FETCH_ENABLE;208wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);209}210}211212if (ibs_config.op_enabled) {213rdmsrl(MSR_AMD64_IBSOPCTL, ctl);214if (ctl & IBS_OP_VAL) {215rdmsrl(MSR_AMD64_IBSOPRIP, val);216oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,217ibs_state.sample_size);218oprofile_add_data64(&entry, val);219rdmsrl(MSR_AMD64_IBSOPDATA, val);220oprofile_add_data64(&entry, val);221rdmsrl(MSR_AMD64_IBSOPDATA2, val);222oprofile_add_data64(&entry, val);223rdmsrl(MSR_AMD64_IBSOPDATA3, val);224oprofile_add_data64(&entry, val);225rdmsrl(MSR_AMD64_IBSDCLINAD, val);226oprofile_add_data64(&entry, val);227rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);228oprofile_add_data64(&entry, val);229if (ibs_state.branch_target) {230rdmsrl(MSR_AMD64_IBSBRTARGET, val);231oprofile_add_data(&entry, (unsigned long)val);232}233oprofile_write_commit(&entry);234235/* reenable the IRQ */236ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);237wrmsrl(MSR_AMD64_IBSOPCTL, ctl);238}239}240}241242static inline void op_amd_start_ibs(void)243{244u64 val;245246if (!ibs_caps)247return;248249memset(&ibs_state, 0, sizeof(ibs_state));250251/*252* Note: Since the max count settings may out of range we253* write back the actual used values so that userland can read254* it.255*/256257if (ibs_config.fetch_enabled) {258val = ibs_config.max_cnt_fetch >> 4;259val = min(val, IBS_FETCH_MAX_CNT);260ibs_config.max_cnt_fetch = val << 4;261val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;262val |= IBS_FETCH_ENABLE;263wrmsrl(MSR_AMD64_IBSFETCHCTL, val);264}265266if (ibs_config.op_enabled) {267val = ibs_config.max_cnt_op >> 4;268if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {269/*270* IbsOpCurCnt not supported. See271* op_amd_randomize_ibs_op() for details.272*/273val = clamp(val, 0x0081ULL, 0xFF80ULL);274ibs_config.max_cnt_op = val << 4;275} else {276/*277* The start value is randomized with a278* positive offset, we need to compensate it279* with the half of the randomized range. Also280* avoid underflows.281*/282val += IBS_RANDOM_MAXCNT_OFFSET;283if (ibs_caps & IBS_CAPS_OPCNTEXT)284val = min(val, IBS_OP_MAX_CNT_EXT);285else286val = min(val, IBS_OP_MAX_CNT);287ibs_config.max_cnt_op =288(val - IBS_RANDOM_MAXCNT_OFFSET) << 4;289}290val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);291val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;292val |= IBS_OP_ENABLE;293ibs_state.ibs_op_ctl = val;294ibs_state.sample_size = IBS_OP_SIZE;295if (ibs_config.branch_target) {296ibs_state.branch_target = 1;297ibs_state.sample_size++;298}299val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);300wrmsrl(MSR_AMD64_IBSOPCTL, val);301}302}303304static void op_amd_stop_ibs(void)305{306if (!ibs_caps)307return;308309if (ibs_config.fetch_enabled)310/* clear max count and enable */311wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);312313if (ibs_config.op_enabled)314/* clear max count and enable */315wrmsrl(MSR_AMD64_IBSOPCTL, 0);316}317318static inline int get_eilvt(int offset)319{320return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);321}322323static inline int put_eilvt(int offset)324{325return !setup_APIC_eilvt(offset, 0, 0, 1);326}327328static inline int ibs_eilvt_valid(void)329{330int offset;331u64 val;332int valid = 0;333334preempt_disable();335336rdmsrl(MSR_AMD64_IBSCTL, val);337offset = val & IBSCTL_LVT_OFFSET_MASK;338339if (!(val & IBSCTL_LVT_OFFSET_VALID)) {340pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",341smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);342goto out;343}344345if (!get_eilvt(offset)) {346pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",347smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);348goto out;349}350351valid = 1;352out:353preempt_enable();354355return valid;356}357358static inline int get_ibs_offset(void)359{360u64 val;361362rdmsrl(MSR_AMD64_IBSCTL, val);363if (!(val & IBSCTL_LVT_OFFSET_VALID))364return -EINVAL;365366return val & IBSCTL_LVT_OFFSET_MASK;367}368369static void setup_APIC_ibs(void)370{371int offset;372373offset = get_ibs_offset();374if (offset < 0)375goto failed;376377if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))378return;379failed:380pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n",381smp_processor_id());382}383384static void clear_APIC_ibs(void)385{386int offset;387388offset = get_ibs_offset();389if (offset >= 0)390setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);391}392393#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX394395static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,396struct op_msrs const * const msrs)397{398u64 val;399int i;400401/* enable active counters */402for (i = 0; i < num_counters; ++i) {403int virt = op_x86_phys_to_virt(i);404if (!reset_value[virt])405continue;406rdmsrl(msrs->controls[i].addr, val);407val &= model->reserved;408val |= op_x86_get_ctrl(model, &counter_config[virt]);409wrmsrl(msrs->controls[i].addr, val);410}411}412413#endif414415/* functions for op_amd_spec */416417static void op_amd_shutdown(struct op_msrs const * const msrs)418{419int i;420421for (i = 0; i < num_counters; ++i) {422if (!msrs->counters[i].addr)423continue;424release_perfctr_nmi(MSR_K7_PERFCTR0 + i);425release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);426}427}428429static int op_amd_fill_in_addresses(struct op_msrs * const msrs)430{431int i;432433for (i = 0; i < num_counters; i++) {434if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))435goto fail;436if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {437release_perfctr_nmi(MSR_K7_PERFCTR0 + i);438goto fail;439}440/* both registers must be reserved */441if (num_counters == NUM_COUNTERS_F15H) {442msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);443msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);444} else {445msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;446msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;447}448continue;449fail:450if (!counter_config[i].enabled)451continue;452op_x86_warn_reserved(i);453op_amd_shutdown(msrs);454return -EBUSY;455}456457return 0;458}459460static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,461struct op_msrs const * const msrs)462{463u64 val;464int i;465466/* setup reset_value */467for (i = 0; i < OP_MAX_COUNTER; ++i) {468if (counter_config[i].enabled469&& msrs->counters[op_x86_virt_to_phys(i)].addr)470reset_value[i] = counter_config[i].count;471else472reset_value[i] = 0;473}474475/* clear all counters */476for (i = 0; i < num_counters; ++i) {477if (!msrs->controls[i].addr)478continue;479rdmsrl(msrs->controls[i].addr, val);480if (val & ARCH_PERFMON_EVENTSEL_ENABLE)481op_x86_warn_in_use(i);482val &= model->reserved;483wrmsrl(msrs->controls[i].addr, val);484/*485* avoid a false detection of ctr overflows in NMI486* handler487*/488wrmsrl(msrs->counters[i].addr, -1LL);489}490491/* enable active counters */492for (i = 0; i < num_counters; ++i) {493int virt = op_x86_phys_to_virt(i);494if (!reset_value[virt])495continue;496497/* setup counter registers */498wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);499500/* setup control registers */501rdmsrl(msrs->controls[i].addr, val);502val &= model->reserved;503val |= op_x86_get_ctrl(model, &counter_config[virt]);504wrmsrl(msrs->controls[i].addr, val);505}506507if (ibs_caps)508setup_APIC_ibs();509}510511static void op_amd_cpu_shutdown(void)512{513if (ibs_caps)514clear_APIC_ibs();515}516517static int op_amd_check_ctrs(struct pt_regs * const regs,518struct op_msrs const * const msrs)519{520u64 val;521int i;522523for (i = 0; i < num_counters; ++i) {524int virt = op_x86_phys_to_virt(i);525if (!reset_value[virt])526continue;527rdmsrl(msrs->counters[i].addr, val);528/* bit is clear if overflowed: */529if (val & OP_CTR_OVERFLOW)530continue;531oprofile_add_sample(regs, virt);532wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);533}534535op_amd_handle_ibs(regs, msrs);536537/* See op_model_ppro.c */538return 1;539}540541static void op_amd_start(struct op_msrs const * const msrs)542{543u64 val;544int i;545546for (i = 0; i < num_counters; ++i) {547if (!reset_value[op_x86_phys_to_virt(i)])548continue;549rdmsrl(msrs->controls[i].addr, val);550val |= ARCH_PERFMON_EVENTSEL_ENABLE;551wrmsrl(msrs->controls[i].addr, val);552}553554op_amd_start_ibs();555}556557static void op_amd_stop(struct op_msrs const * const msrs)558{559u64 val;560int i;561562/*563* Subtle: stop on all counters to avoid race with setting our564* pm callback565*/566for (i = 0; i < num_counters; ++i) {567if (!reset_value[op_x86_phys_to_virt(i)])568continue;569rdmsrl(msrs->controls[i].addr, val);570val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;571wrmsrl(msrs->controls[i].addr, val);572}573574op_amd_stop_ibs();575}576577static int setup_ibs_ctl(int ibs_eilvt_off)578{579struct pci_dev *cpu_cfg;580int nodes;581u32 value = 0;582583nodes = 0;584cpu_cfg = NULL;585do {586cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,587PCI_DEVICE_ID_AMD_10H_NB_MISC,588cpu_cfg);589if (!cpu_cfg)590break;591++nodes;592pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off593| IBSCTL_LVT_OFFSET_VALID);594pci_read_config_dword(cpu_cfg, IBSCTL, &value);595if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {596pci_dev_put(cpu_cfg);597printk(KERN_DEBUG "Failed to setup IBS LVT offset, "598"IBSCTL = 0x%08x\n", value);599return -EINVAL;600}601} while (1);602603if (!nodes) {604printk(KERN_DEBUG "No CPU node configured for IBS\n");605return -ENODEV;606}607608return 0;609}610611/*612* This runs only on the current cpu. We try to find an LVT offset and613* setup the local APIC. For this we must disable preemption. On614* success we initialize all nodes with this offset. This updates then615* the offset in the IBS_CTL per-node msr. The per-core APIC setup of616* the IBS interrupt vector is called from op_amd_setup_ctrs()/op_-617* amd_cpu_shutdown() using the new offset.618*/619static int force_ibs_eilvt_setup(void)620{621int offset;622int ret;623624preempt_disable();625/* find the next free available EILVT entry, skip offset 0 */626for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {627if (get_eilvt(offset))628break;629}630preempt_enable();631632if (offset == APIC_EILVT_NR_MAX) {633printk(KERN_DEBUG "No EILVT entry available\n");634return -EBUSY;635}636637ret = setup_ibs_ctl(offset);638if (ret)639goto out;640641if (!ibs_eilvt_valid()) {642ret = -EFAULT;643goto out;644}645646pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);647pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");648649return 0;650out:651preempt_disable();652put_eilvt(offset);653preempt_enable();654return ret;655}656657/*658* check and reserve APIC extended interrupt LVT offset for IBS if659* available660*/661662static void init_ibs(void)663{664ibs_caps = get_ibs_caps();665666if (!ibs_caps)667return;668669if (ibs_eilvt_valid())670goto out;671672if (!force_ibs_eilvt_setup())673goto out;674675/* Failed to setup ibs */676ibs_caps = 0;677return;678679out:680printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);681}682683static int (*create_arch_files)(struct super_block *sb, struct dentry *root);684685static int setup_ibs_files(struct super_block *sb, struct dentry *root)686{687struct dentry *dir;688int ret = 0;689690/* architecture specific files */691if (create_arch_files)692ret = create_arch_files(sb, root);693694if (ret)695return ret;696697if (!ibs_caps)698return ret;699700/* model specific files */701702/* setup some reasonable defaults */703memset(&ibs_config, 0, sizeof(ibs_config));704ibs_config.max_cnt_fetch = 250000;705ibs_config.max_cnt_op = 250000;706707if (ibs_caps & IBS_CAPS_FETCHSAM) {708dir = oprofilefs_mkdir(sb, root, "ibs_fetch");709oprofilefs_create_ulong(sb, dir, "enable",710&ibs_config.fetch_enabled);711oprofilefs_create_ulong(sb, dir, "max_count",712&ibs_config.max_cnt_fetch);713oprofilefs_create_ulong(sb, dir, "rand_enable",714&ibs_config.rand_en);715}716717if (ibs_caps & IBS_CAPS_OPSAM) {718dir = oprofilefs_mkdir(sb, root, "ibs_op");719oprofilefs_create_ulong(sb, dir, "enable",720&ibs_config.op_enabled);721oprofilefs_create_ulong(sb, dir, "max_count",722&ibs_config.max_cnt_op);723if (ibs_caps & IBS_CAPS_OPCNT)724oprofilefs_create_ulong(sb, dir, "dispatched_ops",725&ibs_config.dispatched_ops);726if (ibs_caps & IBS_CAPS_BRNTRGT)727oprofilefs_create_ulong(sb, dir, "branch_target",728&ibs_config.branch_target);729}730731return 0;732}733734struct op_x86_model_spec op_amd_spec;735736static int op_amd_init(struct oprofile_operations *ops)737{738init_ibs();739create_arch_files = ops->create_files;740ops->create_files = setup_ibs_files;741742if (boot_cpu_data.x86 == 0x15) {743num_counters = NUM_COUNTERS_F15H;744} else {745num_counters = NUM_COUNTERS;746}747748op_amd_spec.num_counters = num_counters;749op_amd_spec.num_controls = num_counters;750op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);751752return 0;753}754755struct op_x86_model_spec op_amd_spec = {756/* num_counters/num_controls filled in at runtime */757.reserved = MSR_AMD_EVENTSEL_RESERVED,758.event_mask = OP_EVENT_MASK,759.init = op_amd_init,760.fill_in_addresses = &op_amd_fill_in_addresses,761.setup_ctrs = &op_amd_setup_ctrs,762.cpu_down = &op_amd_cpu_shutdown,763.check_ctrs = &op_amd_check_ctrs,764.start = &op_amd_start,765.stop = &op_amd_stop,766.shutdown = &op_amd_shutdown,767#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX768.switch_ctrl = &op_mux_switch_ctrl,769#endif770};771772773