// SPDX-License-Identifier: GPL-2.01/*2* Intel specific MCE features.3* Copyright 2004 Zwane Mwaikambo <[email protected]>4* Copyright (C) 2008, 2009 Intel Corporation5* Author: Andi Kleen6*/78#include <linux/gfp.h>9#include <linux/interrupt.h>10#include <linux/percpu.h>11#include <linux/sched.h>12#include <linux/cpumask.h>13#include <asm/apic.h>14#include <asm/cpufeature.h>15#include <asm/cpu_device_id.h>16#include <asm/processor.h>17#include <asm/msr.h>18#include <asm/mce.h>1920#include "internal.h"2122/*23* Support for Intel Correct Machine Check Interrupts. This allows24* the CPU to raise an interrupt when a corrected machine check happened.25* Normally we pick those up using a regular polling timer.26* Also supports reliable discovery of shared banks.27*/2829/*30* CMCI can be delivered to multiple cpus that share a machine check bank31* so we need to designate a single cpu to process errors logged in each bank32* in the interrupt handler (otherwise we would have many races and potential33* double reporting of the same error).34* Note that this can change when a cpu is offlined or brought online since35* some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()36* disables CMCI on all banks owned by the cpu and clears this bitfield. At37* this point, cmci_rediscover() kicks in and a different cpu may end up38* taking ownership of some of the shared MCA banks that were previously39* owned by the offlined cpu.40*/41static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);4243/*44* cmci_discover_lock protects against parallel discovery attempts45* which could race against each other.46*/47static DEFINE_RAW_SPINLOCK(cmci_discover_lock);4849/*50* On systems that do support CMCI but it's disabled, polling for MCEs can51* cause the same event to be reported multiple times because IA32_MCi_STATUS52* is shared by the same package.53*/54static DEFINE_SPINLOCK(cmci_poll_lock);5556/* Linux non-storm CMCI threshold (may be overridden by BIOS) */57#define CMCI_THRESHOLD 15859/*60* MCi_CTL2 threshold for each bank when there is no storm.61* Default value for each bank may have been set by BIOS.62*/63static u16 cmci_threshold[MAX_NR_BANKS];6465/*66* High threshold to limit CMCI rate during storms. Max supported is67* 0x7FFF. Use this slightly smaller value so it has a distinctive68* signature when some asks "Why am I not seeing all corrected errors?"69* A high threshold is used instead of just disabling CMCI for a70* bank because both corrected and uncorrected errors may be logged71* in the same bank and signalled with CMCI. The threshold only applies72* to corrected errors, so keeping CMCI enabled means that uncorrected73* errors will still be processed in a timely fashion.74*/75#define CMCI_STORM_THRESHOLD 327497677static bool cmci_supported(int *banks)78{79u64 cap;8081if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)82return false;8384/*85* Vendor check is not strictly needed, but the initial86* initialization is vendor keyed and this87* makes sure none of the backdoors are entered otherwise.88*/89if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&90boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)91return false;9293if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)94return false;9596rdmsrq(MSR_IA32_MCG_CAP, cap);97*banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK);98return !!(cap & MCG_CMCI_P);99}100101static bool lmce_supported(void)102{103u64 tmp;104105if (mca_cfg.lmce_disabled)106return false;107108rdmsrq(MSR_IA32_MCG_CAP, tmp);109110/*111* LMCE depends on recovery support in the processor. Hence both112* MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.113*/114if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=115(MCG_SER_P | MCG_LMCE_P))116return false;117118/*119* BIOS should indicate support for LMCE by setting bit 20 in120* IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP121* fault. The MSR must also be locked for LMCE_ENABLED to take effect.122* WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally123* locks the MSR in the event that it wasn't already locked by BIOS.124*/125rdmsrq(MSR_IA32_FEAT_CTL, tmp);126if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))127return false;128129return tmp & FEAT_CTL_LMCE_ENABLED;130}131132/*133* Set a new CMCI threshold value. Preserve the state of the134* MCI_CTL2_CMCI_EN bit in case this happens during a135* cmci_rediscover() operation.136*/137static void cmci_set_threshold(int bank, int thresh)138{139unsigned long flags;140u64 val;141142raw_spin_lock_irqsave(&cmci_discover_lock, flags);143rdmsrq(MSR_IA32_MCx_CTL2(bank), val);144val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;145wrmsrq(MSR_IA32_MCx_CTL2(bank), val | thresh);146raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);147}148149void mce_intel_handle_storm(int bank, bool on)150{151if (on)152cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);153else154cmci_set_threshold(bank, cmci_threshold[bank]);155}156157/*158* The interrupt handler. This is called on every event.159* Just call the poller directly to log any events.160* This could in theory increase the threshold under high load,161* but doesn't for now.162*/163static void intel_threshold_interrupt(void)164{165machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));166}167168/*169* Check all the reasons why current CPU cannot claim170* ownership of a bank.171* 1: CPU already owns this bank172* 2: BIOS owns this bank173* 3: Some other CPU owns this bank174*/175static bool cmci_skip_bank(int bank, u64 *val)176{177unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);178179if (test_bit(bank, owned))180return true;181182/* Skip banks in firmware first mode */183if (test_bit(bank, mce_banks_ce_disabled))184return true;185186rdmsrq(MSR_IA32_MCx_CTL2(bank), *val);187188/* Already owned by someone else? */189if (*val & MCI_CTL2_CMCI_EN) {190clear_bit(bank, owned);191__clear_bit(bank, this_cpu_ptr(mce_poll_banks));192return true;193}194195return false;196}197198/*199* Decide which CMCI interrupt threshold to use:200* 1: If this bank is in storm mode from whichever CPU was201* the previous owner, stay in storm mode.202* 2: If ignoring any threshold set by BIOS, set Linux default203* 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero).204*/205static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh)206{207if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)208return val;209210if (!mca_cfg.bios_cmci_threshold) {211val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;212val |= CMCI_THRESHOLD;213} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {214/*215* If bios_cmci_threshold boot option was specified216* but the threshold is zero, we'll try to initialize217* it to 1.218*/219*bios_zero_thresh = 1;220val |= CMCI_THRESHOLD;221}222223return val;224}225226/*227* Try to claim ownership of a bank.228*/229static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh)230{231struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);232233val |= MCI_CTL2_CMCI_EN;234wrmsrq(MSR_IA32_MCx_CTL2(bank), val);235rdmsrq(MSR_IA32_MCx_CTL2(bank), val);236237/* If the enable bit did not stick, this bank should be polled. */238if (!(val & MCI_CTL2_CMCI_EN)) {239WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks)));240storm->banks[bank].poll_only = true;241return;242}243244/* This CPU successfully set the enable bit. */245set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned));246247if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) {248pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank);249mce_inherit_storm(bank);250cmci_storm_begin(bank);251} else {252__clear_bit(bank, this_cpu_ptr(mce_poll_banks));253}254255/*256* We are able to set thresholds for some banks that257* had a threshold of 0. This means the BIOS has not258* set the thresholds properly or does not work with259* this boot option. Note down now and report later.260*/261if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&262(val & MCI_CTL2_CMCI_THRESHOLD_MASK))263*bios_wrong_thresh = 1;264265/* Save default threshold for each bank */266if (cmci_threshold[bank] == 0)267cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK;268}269270/*271* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks272* on this CPU. Use the algorithm recommended in the SDM to discover shared273* banks. Called during initial bootstrap, and also for hotplug CPU operations274* to rediscover/reassign machine check banks.275*/276static void cmci_discover(int banks)277{278int bios_wrong_thresh = 0;279unsigned long flags;280int i;281282raw_spin_lock_irqsave(&cmci_discover_lock, flags);283for (i = 0; i < banks; i++) {284u64 val;285int bios_zero_thresh = 0;286287if (cmci_skip_bank(i, &val))288continue;289290val = cmci_pick_threshold(val, &bios_zero_thresh);291cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh);292}293raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);294if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {295pr_info_once(296"bios_cmci_threshold: Some banks do not have valid thresholds set\n");297pr_info_once(298"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");299}300}301302/*303* Just in case we missed an event during initialization check304* all the CMCI owned banks.305*/306void cmci_recheck(void)307{308unsigned long flags;309int banks;310311if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))312return;313314local_irq_save(flags);315machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));316local_irq_restore(flags);317}318319/* Caller must hold the lock on cmci_discover_lock */320static void __cmci_disable_bank(int bank)321{322u64 val;323324if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))325return;326rdmsrq(MSR_IA32_MCx_CTL2(bank), val);327val &= ~MCI_CTL2_CMCI_EN;328wrmsrq(MSR_IA32_MCx_CTL2(bank), val);329__clear_bit(bank, this_cpu_ptr(mce_banks_owned));330331if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)332cmci_storm_end(bank);333}334335/*336* Disable CMCI on this CPU for all banks it owns when it goes down.337* This allows other CPUs to claim the banks on rediscovery.338*/339void cmci_clear(void)340{341unsigned long flags;342int i;343int banks;344345if (!cmci_supported(&banks))346return;347raw_spin_lock_irqsave(&cmci_discover_lock, flags);348for (i = 0; i < banks; i++)349__cmci_disable_bank(i);350raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);351}352353static void cmci_rediscover_work_func(void *arg)354{355int banks;356357/* Recheck banks in case CPUs don't all have the same */358if (cmci_supported(&banks))359cmci_discover(banks);360}361362/* After a CPU went down cycle through all the others and rediscover */363void cmci_rediscover(void)364{365int banks;366367if (!cmci_supported(&banks))368return;369370on_each_cpu(cmci_rediscover_work_func, NULL, 1);371}372373/*374* Reenable CMCI on this CPU in case a CPU down failed.375*/376void cmci_reenable(void)377{378int banks;379if (cmci_supported(&banks))380cmci_discover(banks);381}382383void cmci_disable_bank(int bank)384{385int banks;386unsigned long flags;387388if (!cmci_supported(&banks))389return;390391raw_spin_lock_irqsave(&cmci_discover_lock, flags);392__cmci_disable_bank(bank);393raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);394}395396/* Bank polling function when CMCI is disabled. */397static void cmci_mc_poll_banks(void)398{399spin_lock(&cmci_poll_lock);400machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));401spin_unlock(&cmci_poll_lock);402}403404void intel_init_cmci(void)405{406int banks;407408if (!cmci_supported(&banks)) {409mc_poll_banks = cmci_mc_poll_banks;410return;411}412413mce_threshold_vector = intel_threshold_interrupt;414cmci_discover(banks);415/*416* For CPU #0 this runs with still disabled APIC, but that's417* ok because only the vector is set up. We still do another418* check for the banks later for CPU #0 just to make sure419* to not miss any events.420*/421apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);422cmci_recheck();423}424425void intel_init_lmce(void)426{427u64 val;428429if (!lmce_supported())430return;431432rdmsrq(MSR_IA32_MCG_EXT_CTL, val);433434if (!(val & MCG_EXT_CTL_LMCE_EN))435wrmsrq(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);436}437438void intel_clear_lmce(void)439{440u64 val;441442if (!lmce_supported())443return;444445rdmsrq(MSR_IA32_MCG_EXT_CTL, val);446val &= ~MCG_EXT_CTL_LMCE_EN;447wrmsrq(MSR_IA32_MCG_EXT_CTL, val);448}449450/*451* Enable additional error logs from the integrated452* memory controller on processors that support this.453*/454static void intel_imc_init(struct cpuinfo_x86 *c)455{456u64 error_control;457458switch (c->x86_vfm) {459case INTEL_SANDYBRIDGE_X:460case INTEL_IVYBRIDGE_X:461case INTEL_HASWELL_X:462if (rdmsrq_safe(MSR_ERROR_CONTROL, &error_control))463return;464error_control |= 2;465wrmsrq_safe(MSR_ERROR_CONTROL, error_control);466break;467}468}469470void mce_intel_feature_init(struct cpuinfo_x86 *c)471{472intel_init_cmci();473intel_init_lmce();474intel_imc_init(c);475}476477void mce_intel_feature_clear(struct cpuinfo_x86 *c)478{479intel_clear_lmce();480cmci_clear();481}482483bool intel_filter_mce(struct mce *m)484{485struct cpuinfo_x86 *c = &boot_cpu_data;486487/* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */488if ((c->x86_vfm == INTEL_HASWELL ||489c->x86_vfm == INTEL_HASWELL_L ||490c->x86_vfm == INTEL_BROADWELL ||491c->x86_vfm == INTEL_HASWELL_G ||492c->x86_vfm == INTEL_SKYLAKE_X) &&493(m->bank == 0) &&494((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))495return true;496497return false;498}499500/*501* Check if the address reported by the CPU is in a format we can parse.502* It would be possible to add code for most other cases, but all would503* be somewhat complicated (e.g. segment offset would require an instruction504* parser). So only support physical addresses up to page granularity for now.505*/506bool intel_mce_usable_address(struct mce *m)507{508if (!(m->status & MCI_STATUS_MISCV))509return false;510511if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)512return false;513514if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)515return false;516517return true;518}519520521