Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/mce/intel.c
26516 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Intel specific MCE features.
4
* Copyright 2004 Zwane Mwaikambo <[email protected]>
5
* Copyright (C) 2008, 2009 Intel Corporation
6
* Author: Andi Kleen
7
*/
8
9
#include <linux/gfp.h>
10
#include <linux/interrupt.h>
11
#include <linux/percpu.h>
12
#include <linux/sched.h>
13
#include <linux/cpumask.h>
14
#include <asm/apic.h>
15
#include <asm/cpufeature.h>
16
#include <asm/cpu_device_id.h>
17
#include <asm/processor.h>
18
#include <asm/msr.h>
19
#include <asm/mce.h>
20
21
#include "internal.h"
22
23
/*
24
* Support for Intel Correct Machine Check Interrupts. This allows
25
* the CPU to raise an interrupt when a corrected machine check happened.
26
* Normally we pick those up using a regular polling timer.
27
* Also supports reliable discovery of shared banks.
28
*/
29
30
/*
31
* CMCI can be delivered to multiple cpus that share a machine check bank
32
* so we need to designate a single cpu to process errors logged in each bank
33
* in the interrupt handler (otherwise we would have many races and potential
34
* double reporting of the same error).
35
* Note that this can change when a cpu is offlined or brought online since
36
* some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
37
* disables CMCI on all banks owned by the cpu and clears this bitfield. At
38
* this point, cmci_rediscover() kicks in and a different cpu may end up
39
* taking ownership of some of the shared MCA banks that were previously
40
* owned by the offlined cpu.
41
*/
42
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
43
44
/*
45
* cmci_discover_lock protects against parallel discovery attempts
46
* which could race against each other.
47
*/
48
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
49
50
/*
51
* On systems that do support CMCI but it's disabled, polling for MCEs can
52
* cause the same event to be reported multiple times because IA32_MCi_STATUS
53
* is shared by the same package.
54
*/
55
static DEFINE_SPINLOCK(cmci_poll_lock);
56
57
/* Linux non-storm CMCI threshold (may be overridden by BIOS) */
58
#define CMCI_THRESHOLD 1
59
60
/*
61
* MCi_CTL2 threshold for each bank when there is no storm.
62
* Default value for each bank may have been set by BIOS.
63
*/
64
static u16 cmci_threshold[MAX_NR_BANKS];
65
66
/*
67
* High threshold to limit CMCI rate during storms. Max supported is
68
* 0x7FFF. Use this slightly smaller value so it has a distinctive
69
* signature when some asks "Why am I not seeing all corrected errors?"
70
* A high threshold is used instead of just disabling CMCI for a
71
* bank because both corrected and uncorrected errors may be logged
72
* in the same bank and signalled with CMCI. The threshold only applies
73
* to corrected errors, so keeping CMCI enabled means that uncorrected
74
* errors will still be processed in a timely fashion.
75
*/
76
#define CMCI_STORM_THRESHOLD 32749
77
78
static bool cmci_supported(int *banks)
79
{
80
u64 cap;
81
82
if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
83
return false;
84
85
/*
86
* Vendor check is not strictly needed, but the initial
87
* initialization is vendor keyed and this
88
* makes sure none of the backdoors are entered otherwise.
89
*/
90
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
91
boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
92
return false;
93
94
if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
95
return false;
96
97
rdmsrq(MSR_IA32_MCG_CAP, cap);
98
*banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK);
99
return !!(cap & MCG_CMCI_P);
100
}
101
102
static bool lmce_supported(void)
103
{
104
u64 tmp;
105
106
if (mca_cfg.lmce_disabled)
107
return false;
108
109
rdmsrq(MSR_IA32_MCG_CAP, tmp);
110
111
/*
112
* LMCE depends on recovery support in the processor. Hence both
113
* MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
114
*/
115
if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
116
(MCG_SER_P | MCG_LMCE_P))
117
return false;
118
119
/*
120
* BIOS should indicate support for LMCE by setting bit 20 in
121
* IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
122
* fault. The MSR must also be locked for LMCE_ENABLED to take effect.
123
* WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
124
* locks the MSR in the event that it wasn't already locked by BIOS.
125
*/
126
rdmsrq(MSR_IA32_FEAT_CTL, tmp);
127
if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
128
return false;
129
130
return tmp & FEAT_CTL_LMCE_ENABLED;
131
}
132
133
/*
134
* Set a new CMCI threshold value. Preserve the state of the
135
* MCI_CTL2_CMCI_EN bit in case this happens during a
136
* cmci_rediscover() operation.
137
*/
138
static void cmci_set_threshold(int bank, int thresh)
139
{
140
unsigned long flags;
141
u64 val;
142
143
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
144
rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
145
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
146
wrmsrq(MSR_IA32_MCx_CTL2(bank), val | thresh);
147
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
148
}
149
150
void mce_intel_handle_storm(int bank, bool on)
151
{
152
if (on)
153
cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
154
else
155
cmci_set_threshold(bank, cmci_threshold[bank]);
156
}
157
158
/*
159
* The interrupt handler. This is called on every event.
160
* Just call the poller directly to log any events.
161
* This could in theory increase the threshold under high load,
162
* but doesn't for now.
163
*/
164
static void intel_threshold_interrupt(void)
165
{
166
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
167
}
168
169
/*
170
* Check all the reasons why current CPU cannot claim
171
* ownership of a bank.
172
* 1: CPU already owns this bank
173
* 2: BIOS owns this bank
174
* 3: Some other CPU owns this bank
175
*/
176
static bool cmci_skip_bank(int bank, u64 *val)
177
{
178
unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
179
180
if (test_bit(bank, owned))
181
return true;
182
183
/* Skip banks in firmware first mode */
184
if (test_bit(bank, mce_banks_ce_disabled))
185
return true;
186
187
rdmsrq(MSR_IA32_MCx_CTL2(bank), *val);
188
189
/* Already owned by someone else? */
190
if (*val & MCI_CTL2_CMCI_EN) {
191
clear_bit(bank, owned);
192
__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
193
return true;
194
}
195
196
return false;
197
}
198
199
/*
200
* Decide which CMCI interrupt threshold to use:
201
* 1: If this bank is in storm mode from whichever CPU was
202
* the previous owner, stay in storm mode.
203
* 2: If ignoring any threshold set by BIOS, set Linux default
204
* 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero).
205
*/
206
static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh)
207
{
208
if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
209
return val;
210
211
if (!mca_cfg.bios_cmci_threshold) {
212
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
213
val |= CMCI_THRESHOLD;
214
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
215
/*
216
* If bios_cmci_threshold boot option was specified
217
* but the threshold is zero, we'll try to initialize
218
* it to 1.
219
*/
220
*bios_zero_thresh = 1;
221
val |= CMCI_THRESHOLD;
222
}
223
224
return val;
225
}
226
227
/*
228
* Try to claim ownership of a bank.
229
*/
230
static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh)
231
{
232
struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
233
234
val |= MCI_CTL2_CMCI_EN;
235
wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
236
rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
237
238
/* If the enable bit did not stick, this bank should be polled. */
239
if (!(val & MCI_CTL2_CMCI_EN)) {
240
WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks)));
241
storm->banks[bank].poll_only = true;
242
return;
243
}
244
245
/* This CPU successfully set the enable bit. */
246
set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned));
247
248
if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) {
249
pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank);
250
mce_inherit_storm(bank);
251
cmci_storm_begin(bank);
252
} else {
253
__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
254
}
255
256
/*
257
* We are able to set thresholds for some banks that
258
* had a threshold of 0. This means the BIOS has not
259
* set the thresholds properly or does not work with
260
* this boot option. Note down now and report later.
261
*/
262
if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
263
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
264
*bios_wrong_thresh = 1;
265
266
/* Save default threshold for each bank */
267
if (cmci_threshold[bank] == 0)
268
cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK;
269
}
270
271
/*
272
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
273
* on this CPU. Use the algorithm recommended in the SDM to discover shared
274
* banks. Called during initial bootstrap, and also for hotplug CPU operations
275
* to rediscover/reassign machine check banks.
276
*/
277
static void cmci_discover(int banks)
278
{
279
int bios_wrong_thresh = 0;
280
unsigned long flags;
281
int i;
282
283
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
284
for (i = 0; i < banks; i++) {
285
u64 val;
286
int bios_zero_thresh = 0;
287
288
if (cmci_skip_bank(i, &val))
289
continue;
290
291
val = cmci_pick_threshold(val, &bios_zero_thresh);
292
cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh);
293
}
294
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
295
if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
296
pr_info_once(
297
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
298
pr_info_once(
299
"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
300
}
301
}
302
303
/*
304
* Just in case we missed an event during initialization check
305
* all the CMCI owned banks.
306
*/
307
void cmci_recheck(void)
308
{
309
unsigned long flags;
310
int banks;
311
312
if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
313
return;
314
315
local_irq_save(flags);
316
machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
317
local_irq_restore(flags);
318
}
319
320
/* Caller must hold the lock on cmci_discover_lock */
321
static void __cmci_disable_bank(int bank)
322
{
323
u64 val;
324
325
if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
326
return;
327
rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
328
val &= ~MCI_CTL2_CMCI_EN;
329
wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
330
__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
331
332
if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
333
cmci_storm_end(bank);
334
}
335
336
/*
337
* Disable CMCI on this CPU for all banks it owns when it goes down.
338
* This allows other CPUs to claim the banks on rediscovery.
339
*/
340
void cmci_clear(void)
341
{
342
unsigned long flags;
343
int i;
344
int banks;
345
346
if (!cmci_supported(&banks))
347
return;
348
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
349
for (i = 0; i < banks; i++)
350
__cmci_disable_bank(i);
351
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
352
}
353
354
static void cmci_rediscover_work_func(void *arg)
355
{
356
int banks;
357
358
/* Recheck banks in case CPUs don't all have the same */
359
if (cmci_supported(&banks))
360
cmci_discover(banks);
361
}
362
363
/* After a CPU went down cycle through all the others and rediscover */
364
void cmci_rediscover(void)
365
{
366
int banks;
367
368
if (!cmci_supported(&banks))
369
return;
370
371
on_each_cpu(cmci_rediscover_work_func, NULL, 1);
372
}
373
374
/*
375
* Reenable CMCI on this CPU in case a CPU down failed.
376
*/
377
void cmci_reenable(void)
378
{
379
int banks;
380
if (cmci_supported(&banks))
381
cmci_discover(banks);
382
}
383
384
void cmci_disable_bank(int bank)
385
{
386
int banks;
387
unsigned long flags;
388
389
if (!cmci_supported(&banks))
390
return;
391
392
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
393
__cmci_disable_bank(bank);
394
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
395
}
396
397
/* Bank polling function when CMCI is disabled. */
398
static void cmci_mc_poll_banks(void)
399
{
400
spin_lock(&cmci_poll_lock);
401
machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
402
spin_unlock(&cmci_poll_lock);
403
}
404
405
void intel_init_cmci(void)
406
{
407
int banks;
408
409
if (!cmci_supported(&banks)) {
410
mc_poll_banks = cmci_mc_poll_banks;
411
return;
412
}
413
414
mce_threshold_vector = intel_threshold_interrupt;
415
cmci_discover(banks);
416
/*
417
* For CPU #0 this runs with still disabled APIC, but that's
418
* ok because only the vector is set up. We still do another
419
* check for the banks later for CPU #0 just to make sure
420
* to not miss any events.
421
*/
422
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
423
cmci_recheck();
424
}
425
426
void intel_init_lmce(void)
427
{
428
u64 val;
429
430
if (!lmce_supported())
431
return;
432
433
rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
434
435
if (!(val & MCG_EXT_CTL_LMCE_EN))
436
wrmsrq(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
437
}
438
439
void intel_clear_lmce(void)
440
{
441
u64 val;
442
443
if (!lmce_supported())
444
return;
445
446
rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
447
val &= ~MCG_EXT_CTL_LMCE_EN;
448
wrmsrq(MSR_IA32_MCG_EXT_CTL, val);
449
}
450
451
/*
452
* Enable additional error logs from the integrated
453
* memory controller on processors that support this.
454
*/
455
static void intel_imc_init(struct cpuinfo_x86 *c)
456
{
457
u64 error_control;
458
459
switch (c->x86_vfm) {
460
case INTEL_SANDYBRIDGE_X:
461
case INTEL_IVYBRIDGE_X:
462
case INTEL_HASWELL_X:
463
if (rdmsrq_safe(MSR_ERROR_CONTROL, &error_control))
464
return;
465
error_control |= 2;
466
wrmsrq_safe(MSR_ERROR_CONTROL, error_control);
467
break;
468
}
469
}
470
471
void mce_intel_feature_init(struct cpuinfo_x86 *c)
472
{
473
intel_init_cmci();
474
intel_init_lmce();
475
intel_imc_init(c);
476
}
477
478
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
479
{
480
intel_clear_lmce();
481
cmci_clear();
482
}
483
484
bool intel_filter_mce(struct mce *m)
485
{
486
struct cpuinfo_x86 *c = &boot_cpu_data;
487
488
/* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
489
if ((c->x86_vfm == INTEL_HASWELL ||
490
c->x86_vfm == INTEL_HASWELL_L ||
491
c->x86_vfm == INTEL_BROADWELL ||
492
c->x86_vfm == INTEL_HASWELL_G ||
493
c->x86_vfm == INTEL_SKYLAKE_X) &&
494
(m->bank == 0) &&
495
((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
496
return true;
497
498
return false;
499
}
500
501
/*
502
* Check if the address reported by the CPU is in a format we can parse.
503
* It would be possible to add code for most other cases, but all would
504
* be somewhat complicated (e.g. segment offset would require an instruction
505
* parser). So only support physical addresses up to page granularity for now.
506
*/
507
bool intel_mce_usable_address(struct mce *m)
508
{
509
if (!(m->status & MCI_STATUS_MISCV))
510
return false;
511
512
if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
513
return false;
514
515
if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
516
return false;
517
518
return true;
519
}
520
521