CoCalc -- intel.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/mce/intel.c
²⁶⁵¹⁶ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * Intel specific MCE features.
4
 * Copyright 2004 Zwane Mwaikambo <[email protected]>
5
 * Copyright (C) 2008, 2009 Intel Corporation
6
 * Author: Andi Kleen
7
 */
8

9
#include <linux/gfp.h>
10
#include <linux/interrupt.h>
11
#include <linux/percpu.h>
12
#include <linux/sched.h>
13
#include <linux/cpumask.h>
14
#include <asm/apic.h>
15
#include <asm/cpufeature.h>
16
#include <asm/cpu_device_id.h>
17
#include <asm/processor.h>
18
#include <asm/msr.h>
19
#include <asm/mce.h>
20

21
#include "internal.h"
22

23
/*
24
 * Support for Intel Correct Machine Check Interrupts. This allows
25
 * the CPU to raise an interrupt when a corrected machine check happened.
26
 * Normally we pick those up using a regular polling timer.
27
 * Also supports reliable discovery of shared banks.
28
 */
29

30
/*
31
 * CMCI can be delivered to multiple cpus that share a machine check bank
32
 * so we need to designate a single cpu to process errors logged in each bank
33
 * in the interrupt handler (otherwise we would have many races and potential
34
 * double reporting of the same error).
35
 * Note that this can change when a cpu is offlined or brought online since
36
 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
37
 * disables CMCI on all banks owned by the cpu and clears this bitfield. At
38
 * this point, cmci_rediscover() kicks in and a different cpu may end up
39
 * taking ownership of some of the shared MCA banks that were previously
40
 * owned by the offlined cpu.
41
 */
42
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
43

44
/*
45
 * cmci_discover_lock protects against parallel discovery attempts
46
 * which could race against each other.
47
 */
48
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
49

50
/*
51
 * On systems that do support CMCI but it's disabled, polling for MCEs can
52
 * cause the same event to be reported multiple times because IA32_MCi_STATUS
53
 * is shared by the same package.
54
 */
55
static DEFINE_SPINLOCK(cmci_poll_lock);
56

57
/* Linux non-storm CMCI threshold (may be overridden by BIOS) */
58
#define CMCI_THRESHOLD		1
59

60
/*
61
 * MCi_CTL2 threshold for each bank when there is no storm.
62
 * Default value for each bank may have been set by BIOS.
63
 */
64
static u16 cmci_threshold[MAX_NR_BANKS];
65

66
/*
67
 * High threshold to limit CMCI rate during storms. Max supported is
68
 * 0x7FFF. Use this slightly smaller value so it has a distinctive
69
 * signature when some asks "Why am I not seeing all corrected errors?"
70
 * A high threshold is used instead of just disabling CMCI for a
71
 * bank because both corrected and uncorrected errors may be logged
72
 * in the same bank and signalled with CMCI. The threshold only applies
73
 * to corrected errors, so keeping CMCI enabled means that uncorrected
74
 * errors will still be processed in a timely fashion.
75
 */
76
#define CMCI_STORM_THRESHOLD	32749
77

78
static bool cmci_supported(int *banks)
79
{
80
	u64 cap;
81

82
	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
83
		return false;
84

85
	/*
86
	 * Vendor check is not strictly needed, but the initial
87
	 * initialization is vendor keyed and this
88
	 * makes sure none of the backdoors are entered otherwise.
89
	 */
90
	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
91
	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
92
		return false;
93

94
	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
95
		return false;
96

97
	rdmsrq(MSR_IA32_MCG_CAP, cap);
98
	*banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK);
99
	return !!(cap & MCG_CMCI_P);
100
}
101

102
static bool lmce_supported(void)
103
{
104
	u64 tmp;
105

106
	if (mca_cfg.lmce_disabled)
107
		return false;
108

109
	rdmsrq(MSR_IA32_MCG_CAP, tmp);
110

111
	/*
112
	 * LMCE depends on recovery support in the processor. Hence both
113
	 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
114
	 */
115
	if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
116
		   (MCG_SER_P | MCG_LMCE_P))
117
		return false;
118

119
	/*
120
	 * BIOS should indicate support for LMCE by setting bit 20 in
121
	 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
122
	 * fault.  The MSR must also be locked for LMCE_ENABLED to take effect.
123
	 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
124
	 * locks the MSR in the event that it wasn't already locked by BIOS.
125
	 */
126
	rdmsrq(MSR_IA32_FEAT_CTL, tmp);
127
	if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
128
		return false;
129

130
	return tmp & FEAT_CTL_LMCE_ENABLED;
131
}
132

133
/*
134
 * Set a new CMCI threshold value. Preserve the state of the
135
 * MCI_CTL2_CMCI_EN bit in case this happens during a
136
 * cmci_rediscover() operation.
137
 */
138
static void cmci_set_threshold(int bank, int thresh)
139
{
140
	unsigned long flags;
141
	u64 val;
142

143
	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
144
	rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
145
	val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
146
	wrmsrq(MSR_IA32_MCx_CTL2(bank), val | thresh);
147
	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
148
}
149

150
void mce_intel_handle_storm(int bank, bool on)
151
{
152
	if (on)
153
		cmci_set_threshold(bank, CMCI_STORM_THRESHOLD);
154
	else
155
		cmci_set_threshold(bank, cmci_threshold[bank]);
156
}
157

158
/*
159
 * The interrupt handler. This is called on every event.
160
 * Just call the poller directly to log any events.
161
 * This could in theory increase the threshold under high load,
162
 * but doesn't for now.
163
 */
164
static void intel_threshold_interrupt(void)
165
{
166
	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
167
}
168

169
/*
170
 * Check all the reasons why current CPU cannot claim
171
 * ownership of a bank.
172
 * 1: CPU already owns this bank
173
 * 2: BIOS owns this bank
174
 * 3: Some other CPU owns this bank
175
 */
176
static bool cmci_skip_bank(int bank, u64 *val)
177
{
178
	unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
179

180
	if (test_bit(bank, owned))
181
		return true;
182

183
	/* Skip banks in firmware first mode */
184
	if (test_bit(bank, mce_banks_ce_disabled))
185
		return true;
186

187
	rdmsrq(MSR_IA32_MCx_CTL2(bank), *val);
188

189
	/* Already owned by someone else? */
190
	if (*val & MCI_CTL2_CMCI_EN) {
191
		clear_bit(bank, owned);
192
		__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
193
		return true;
194
	}
195

196
	return false;
197
}
198

199
/*
200
 * Decide which CMCI interrupt threshold to use:
201
 * 1: If this bank is in storm mode from whichever CPU was
202
 *    the previous owner, stay in storm mode.
203
 * 2: If ignoring any threshold set by BIOS, set Linux default
204
 * 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero).
205
 */
206
static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh)
207
{
208
	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
209
		return val;
210

211
	if (!mca_cfg.bios_cmci_threshold) {
212
		val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
213
		val |= CMCI_THRESHOLD;
214
	} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
215
		/*
216
		 * If bios_cmci_threshold boot option was specified
217
		 * but the threshold is zero, we'll try to initialize
218
		 * it to 1.
219
		 */
220
		*bios_zero_thresh = 1;
221
		val |= CMCI_THRESHOLD;
222
	}
223

224
	return val;
225
}
226

227
/*
228
 * Try to claim ownership of a bank.
229
 */
230
static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh)
231
{
232
	struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
233

234
	val |= MCI_CTL2_CMCI_EN;
235
	wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
236
	rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
237

238
	/* If the enable bit did not stick, this bank should be polled. */
239
	if (!(val & MCI_CTL2_CMCI_EN)) {
240
		WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks)));
241
		storm->banks[bank].poll_only = true;
242
		return;
243
	}
244

245
	/* This CPU successfully set the enable bit. */
246
	set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned));
247

248
	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) {
249
		pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank);
250
		mce_inherit_storm(bank);
251
		cmci_storm_begin(bank);
252
	} else {
253
		__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
254
	}
255

256
	/*
257
	 * We are able to set thresholds for some banks that
258
	 * had a threshold of 0. This means the BIOS has not
259
	 * set the thresholds properly or does not work with
260
	 * this boot option. Note down now and report later.
261
	 */
262
	if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
263
	    (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
264
		*bios_wrong_thresh = 1;
265

266
	/* Save default threshold for each bank */
267
	if (cmci_threshold[bank] == 0)
268
		cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK;
269
}
270

271
/*
272
 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
273
 * on this CPU. Use the algorithm recommended in the SDM to discover shared
274
 * banks. Called during initial bootstrap, and also for hotplug CPU operations
275
 * to rediscover/reassign machine check banks.
276
 */
277
static void cmci_discover(int banks)
278
{
279
	int bios_wrong_thresh = 0;
280
	unsigned long flags;
281
	int i;
282

283
	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
284
	for (i = 0; i < banks; i++) {
285
		u64 val;
286
		int bios_zero_thresh = 0;
287

288
		if (cmci_skip_bank(i, &val))
289
			continue;
290

291
		val = cmci_pick_threshold(val, &bios_zero_thresh);
292
		cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh);
293
	}
294
	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
295
	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
296
		pr_info_once(
297
			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
298
		pr_info_once(
299
			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
300
	}
301
}
302

303
/*
304
 * Just in case we missed an event during initialization check
305
 * all the CMCI owned banks.
306
 */
307
void cmci_recheck(void)
308
{
309
	unsigned long flags;
310
	int banks;
311

312
	if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
313
		return;
314

315
	local_irq_save(flags);
316
	machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
317
	local_irq_restore(flags);
318
}
319

320
/* Caller must hold the lock on cmci_discover_lock */
321
static void __cmci_disable_bank(int bank)
322
{
323
	u64 val;
324

325
	if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
326
		return;
327
	rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
328
	val &= ~MCI_CTL2_CMCI_EN;
329
	wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
330
	__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
331

332
	if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
333
		cmci_storm_end(bank);
334
}
335

336
/*
337
 * Disable CMCI on this CPU for all banks it owns when it goes down.
338
 * This allows other CPUs to claim the banks on rediscovery.
339
 */
340
void cmci_clear(void)
341
{
342
	unsigned long flags;
343
	int i;
344
	int banks;
345

346
	if (!cmci_supported(&banks))
347
		return;
348
	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
349
	for (i = 0; i < banks; i++)
350
		__cmci_disable_bank(i);
351
	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
352
}
353

354
static void cmci_rediscover_work_func(void *arg)
355
{
356
	int banks;
357

358
	/* Recheck banks in case CPUs don't all have the same */
359
	if (cmci_supported(&banks))
360
		cmci_discover(banks);
361
}
362

363
/* After a CPU went down cycle through all the others and rediscover */
364
void cmci_rediscover(void)
365
{
366
	int banks;
367

368
	if (!cmci_supported(&banks))
369
		return;
370

371
	on_each_cpu(cmci_rediscover_work_func, NULL, 1);
372
}
373

374
/*
375
 * Reenable CMCI on this CPU in case a CPU down failed.
376
 */
377
void cmci_reenable(void)
378
{
379
	int banks;
380
	if (cmci_supported(&banks))
381
		cmci_discover(banks);
382
}
383

384
void cmci_disable_bank(int bank)
385
{
386
	int banks;
387
	unsigned long flags;
388

389
	if (!cmci_supported(&banks))
390
		return;
391

392
	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
393
	__cmci_disable_bank(bank);
394
	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
395
}
396

397
/* Bank polling function when CMCI is disabled. */
398
static void cmci_mc_poll_banks(void)
399
{
400
	spin_lock(&cmci_poll_lock);
401
	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
402
	spin_unlock(&cmci_poll_lock);
403
}
404

405
void intel_init_cmci(void)
406
{
407
	int banks;
408

409
	if (!cmci_supported(&banks)) {
410
		mc_poll_banks = cmci_mc_poll_banks;
411
		return;
412
	}
413

414
	mce_threshold_vector = intel_threshold_interrupt;
415
	cmci_discover(banks);
416
	/*
417
	 * For CPU #0 this runs with still disabled APIC, but that's
418
	 * ok because only the vector is set up. We still do another
419
	 * check for the banks later for CPU #0 just to make sure
420
	 * to not miss any events.
421
	 */
422
	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
423
	cmci_recheck();
424
}
425

426
void intel_init_lmce(void)
427
{
428
	u64 val;
429

430
	if (!lmce_supported())
431
		return;
432

433
	rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
434

435
	if (!(val & MCG_EXT_CTL_LMCE_EN))
436
		wrmsrq(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
437
}
438

439
void intel_clear_lmce(void)
440
{
441
	u64 val;
442

443
	if (!lmce_supported())
444
		return;
445

446
	rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
447
	val &= ~MCG_EXT_CTL_LMCE_EN;
448
	wrmsrq(MSR_IA32_MCG_EXT_CTL, val);
449
}
450

451
/*
452
 * Enable additional error logs from the integrated
453
 * memory controller on processors that support this.
454
 */
455
static void intel_imc_init(struct cpuinfo_x86 *c)
456
{
457
	u64 error_control;
458

459
	switch (c->x86_vfm) {
460
	case INTEL_SANDYBRIDGE_X:
461
	case INTEL_IVYBRIDGE_X:
462
	case INTEL_HASWELL_X:
463
		if (rdmsrq_safe(MSR_ERROR_CONTROL, &error_control))
464
			return;
465
		error_control |= 2;
466
		wrmsrq_safe(MSR_ERROR_CONTROL, error_control);
467
		break;
468
	}
469
}
470

471
void mce_intel_feature_init(struct cpuinfo_x86 *c)
472
{
473
	intel_init_cmci();
474
	intel_init_lmce();
475
	intel_imc_init(c);
476
}
477

478
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
479
{
480
	intel_clear_lmce();
481
	cmci_clear();
482
}
483

484
bool intel_filter_mce(struct mce *m)
485
{
486
	struct cpuinfo_x86 *c = &boot_cpu_data;
487

488
	/* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
489
	if ((c->x86_vfm == INTEL_HASWELL ||
490
	     c->x86_vfm == INTEL_HASWELL_L ||
491
	     c->x86_vfm == INTEL_BROADWELL ||
492
	     c->x86_vfm == INTEL_HASWELL_G ||
493
	     c->x86_vfm == INTEL_SKYLAKE_X) &&
494
	    (m->bank == 0) &&
495
	    ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
496
		return true;
497

498
	return false;
499
}
500

501
/*
502
 * Check if the address reported by the CPU is in a format we can parse.
503
 * It would be possible to add code for most other cases, but all would
504
 * be somewhat complicated (e.g. segment offset would require an instruction
505
 * parser). So only support physical addresses up to page granularity for now.
506
 */
507
bool intel_mce_usable_address(struct mce *m)
508
{
509
	if (!(m->status & MCI_STATUS_MISCV))
510
		return false;
511

512
	if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
513
		return false;
514

515
	if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
516
		return false;
517

518
	return true;
519
}
520

521
Product

Resources

Company