Path: blob/master/arch/x86/kernel/cpu/mcheck/mce_intel.c
10775 views
/*1* Intel specific MCE features.2* Copyright 2004 Zwane Mwaikambo <[email protected]>3* Copyright (C) 2008, 2009 Intel Corporation4* Author: Andi Kleen5*/67#include <linux/gfp.h>8#include <linux/init.h>9#include <linux/interrupt.h>10#include <linux/percpu.h>11#include <linux/sched.h>12#include <asm/apic.h>13#include <asm/processor.h>14#include <asm/msr.h>15#include <asm/mce.h>1617/*18* Support for Intel Correct Machine Check Interrupts. This allows19* the CPU to raise an interrupt when a corrected machine check happened.20* Normally we pick those up using a regular polling timer.21* Also supports reliable discovery of shared banks.22*/2324static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);2526/*27* cmci_discover_lock protects against parallel discovery attempts28* which could race against each other.29*/30static DEFINE_SPINLOCK(cmci_discover_lock);3132#define CMCI_THRESHOLD 13334static int cmci_supported(int *banks)35{36u64 cap;3738if (mce_cmci_disabled || mce_ignore_ce)39return 0;4041/*42* Vendor check is not strictly needed, but the initial43* initialization is vendor keyed and this44* makes sure none of the backdoors are entered otherwise.45*/46if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)47return 0;48if (!cpu_has_apic || lapic_get_maxlvt() < 6)49return 0;50rdmsrl(MSR_IA32_MCG_CAP, cap);51*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);52return !!(cap & MCG_CMCI_P);53}5455/*56* The interrupt handler. This is called on every event.57* Just call the poller directly to log any events.58* This could in theory increase the threshold under high load,59* but doesn't for now.60*/61static void intel_threshold_interrupt(void)62{63machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));64mce_notify_irq();65}6667static void print_update(char *type, int *hdr, int num)68{69if (*hdr == 0)70printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());71*hdr = 1;72printk(KERN_CONT " %s:%d", type, num);73}7475/*76* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks77* on this CPU. Use the algorithm recommended in the SDM to discover shared78* banks.79*/80static void cmci_discover(int banks, int boot)81{82unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);83unsigned long flags;84int hdr = 0;85int i;8687spin_lock_irqsave(&cmci_discover_lock, flags);88for (i = 0; i < banks; i++) {89u64 val;9091if (test_bit(i, owned))92continue;9394rdmsrl(MSR_IA32_MCx_CTL2(i), val);9596/* Already owned by someone else? */97if (val & MCI_CTL2_CMCI_EN) {98if (test_and_clear_bit(i, owned) && !boot)99print_update("SHD", &hdr, i);100__clear_bit(i, __get_cpu_var(mce_poll_banks));101continue;102}103104val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;105val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;106wrmsrl(MSR_IA32_MCx_CTL2(i), val);107rdmsrl(MSR_IA32_MCx_CTL2(i), val);108109/* Did the enable bit stick? -- the bank supports CMCI */110if (val & MCI_CTL2_CMCI_EN) {111if (!test_and_set_bit(i, owned) && !boot)112print_update("CMCI", &hdr, i);113__clear_bit(i, __get_cpu_var(mce_poll_banks));114} else {115WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));116}117}118spin_unlock_irqrestore(&cmci_discover_lock, flags);119if (hdr)120printk(KERN_CONT "\n");121}122123/*124* Just in case we missed an event during initialization check125* all the CMCI owned banks.126*/127void cmci_recheck(void)128{129unsigned long flags;130int banks;131132if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))133return;134local_irq_save(flags);135machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));136local_irq_restore(flags);137}138139/*140* Disable CMCI on this CPU for all banks it owns when it goes down.141* This allows other CPUs to claim the banks on rediscovery.142*/143void cmci_clear(void)144{145unsigned long flags;146int i;147int banks;148u64 val;149150if (!cmci_supported(&banks))151return;152spin_lock_irqsave(&cmci_discover_lock, flags);153for (i = 0; i < banks; i++) {154if (!test_bit(i, __get_cpu_var(mce_banks_owned)))155continue;156/* Disable CMCI */157rdmsrl(MSR_IA32_MCx_CTL2(i), val);158val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);159wrmsrl(MSR_IA32_MCx_CTL2(i), val);160__clear_bit(i, __get_cpu_var(mce_banks_owned));161}162spin_unlock_irqrestore(&cmci_discover_lock, flags);163}164165/*166* After a CPU went down cycle through all the others and rediscover167* Must run in process context.168*/169void cmci_rediscover(int dying)170{171int banks;172int cpu;173cpumask_var_t old;174175if (!cmci_supported(&banks))176return;177if (!alloc_cpumask_var(&old, GFP_KERNEL))178return;179cpumask_copy(old, ¤t->cpus_allowed);180181for_each_online_cpu(cpu) {182if (cpu == dying)183continue;184if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))185continue;186/* Recheck banks in case CPUs don't all have the same */187if (cmci_supported(&banks))188cmci_discover(banks, 0);189}190191set_cpus_allowed_ptr(current, old);192free_cpumask_var(old);193}194195/*196* Reenable CMCI on this CPU in case a CPU down failed.197*/198void cmci_reenable(void)199{200int banks;201if (cmci_supported(&banks))202cmci_discover(banks, 0);203}204205static void intel_init_cmci(void)206{207int banks;208209if (!cmci_supported(&banks))210return;211212mce_threshold_vector = intel_threshold_interrupt;213cmci_discover(banks, 1);214/*215* For CPU #0 this runs with still disabled APIC, but that's216* ok because only the vector is set up. We still do another217* check for the banks later for CPU #0 just to make sure218* to not miss any events.219*/220apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);221cmci_recheck();222}223224void mce_intel_feature_init(struct cpuinfo_x86 *c)225{226intel_init_thermal(c);227intel_init_cmci();228}229230231