Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/mce/amd.c
50063 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* (c) 2005-2016 Advanced Micro Devices, Inc.
4
*
5
* Written by Jacob Shin - AMD, Inc.
6
* Maintained by: Borislav Petkov <[email protected]>
7
*/
8
#include <linux/interrupt.h>
9
#include <linux/notifier.h>
10
#include <linux/kobject.h>
11
#include <linux/percpu.h>
12
#include <linux/errno.h>
13
#include <linux/sched.h>
14
#include <linux/sysfs.h>
15
#include <linux/slab.h>
16
#include <linux/init.h>
17
#include <linux/cpu.h>
18
#include <linux/smp.h>
19
#include <linux/string.h>
20
21
#include <asm/traps.h>
22
#include <asm/apic.h>
23
#include <asm/mce.h>
24
#include <asm/msr.h>
25
#include <asm/trace/irq_vectors.h>
26
27
#include "internal.h"
28
29
#define NR_BLOCKS 5
30
#define THRESHOLD_MAX 0xFFF
31
#define INT_TYPE_APIC 0x00020000
32
#define MASK_VALID_HI 0x80000000
33
#define MASK_CNTP_HI 0x40000000
34
#define MASK_LOCKED_HI 0x20000000
35
#define MASK_LVTOFF_HI 0x00F00000
36
#define MASK_COUNT_EN_HI 0x00080000
37
#define MASK_INT_TYPE_HI 0x00060000
38
#define MASK_OVERFLOW_HI 0x00010000
39
#define MASK_ERR_COUNT_HI 0x00000FFF
40
#define MASK_BLKPTR_LO 0xFF000000
41
#define MCG_XBLK_ADDR 0xC0000400
42
43
/* Deferred error settings */
44
#define MSR_CU_DEF_ERR 0xC0000410
45
#define MASK_DEF_LVTOFF 0x000000F0
46
47
/* Scalable MCA: */
48
49
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
50
#define SMCA_THR_LVT_OFF 0xF000
51
52
static bool thresholding_irq_en;
53
54
struct mce_amd_cpu_data {
55
mce_banks_t thr_intr_banks;
56
mce_banks_t dfr_intr_banks;
57
58
u32 thr_intr_en: 1,
59
dfr_intr_en: 1,
60
__resv: 30;
61
};
62
63
static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
64
65
static const char * const th_names[] = {
66
"load_store",
67
"insn_fetch",
68
"combined_unit",
69
"decode_unit",
70
"northbridge",
71
"execution_unit",
72
};
73
74
static const char * const smca_umc_block_names[] = {
75
"dram_ecc",
76
"misc_umc"
77
};
78
79
#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
80
81
struct smca_hwid {
82
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
83
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
84
};
85
86
struct smca_bank {
87
const struct smca_hwid *hwid;
88
u32 id; /* Value of MCA_IPID[InstanceId]. */
89
u8 sysfs_id; /* Value used for sysfs name. */
90
u64 paddrv :1, /* Physical Address Valid bit in MCA_CONFIG */
91
__reserved :63;
92
};
93
94
static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
95
static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
96
97
static const char * const smca_names[] = {
98
[SMCA_LS ... SMCA_LS_V2] = "load_store",
99
[SMCA_IF] = "insn_fetch",
100
[SMCA_L2_CACHE] = "l2_cache",
101
[SMCA_DE] = "decode_unit",
102
[SMCA_RESERVED] = "reserved",
103
[SMCA_EX] = "execution_unit",
104
[SMCA_FP] = "floating_point",
105
[SMCA_L3_CACHE] = "l3_cache",
106
[SMCA_CS ... SMCA_CS_V2] = "coherent_slave",
107
[SMCA_PIE] = "pie",
108
109
/* UMC v2 is separate because both of them can exist in a single system. */
110
[SMCA_UMC] = "umc",
111
[SMCA_UMC_V2] = "umc_v2",
112
[SMCA_MA_LLC] = "ma_llc",
113
[SMCA_PB] = "param_block",
114
[SMCA_PSP ... SMCA_PSP_V2] = "psp",
115
[SMCA_SMU ... SMCA_SMU_V2] = "smu",
116
[SMCA_MP5] = "mp5",
117
[SMCA_MPDMA] = "mpdma",
118
[SMCA_NBIO] = "nbio",
119
[SMCA_PCIE ... SMCA_PCIE_V2] = "pcie",
120
[SMCA_XGMI_PCS] = "xgmi_pcs",
121
[SMCA_NBIF] = "nbif",
122
[SMCA_SHUB] = "shub",
123
[SMCA_SATA] = "sata",
124
[SMCA_USB] = "usb",
125
[SMCA_USR_DP] = "usr_dp",
126
[SMCA_USR_CP] = "usr_cp",
127
[SMCA_GMI_PCS] = "gmi_pcs",
128
[SMCA_XGMI_PHY] = "xgmi_phy",
129
[SMCA_WAFL_PHY] = "wafl_phy",
130
[SMCA_GMI_PHY] = "gmi_phy",
131
};
132
133
static const char *smca_get_name(enum smca_bank_types t)
134
{
135
if (t >= N_SMCA_BANK_TYPES)
136
return NULL;
137
138
return smca_names[t];
139
}
140
141
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
142
{
143
struct smca_bank *b;
144
145
if (bank >= MAX_NR_BANKS)
146
return N_SMCA_BANK_TYPES;
147
148
b = &per_cpu(smca_banks, cpu)[bank];
149
if (!b->hwid)
150
return N_SMCA_BANK_TYPES;
151
152
return b->hwid->bank_type;
153
}
154
EXPORT_SYMBOL_GPL(smca_get_bank_type);
155
156
static const struct smca_hwid smca_hwid_mcatypes[] = {
157
/* { bank_type, hwid_mcatype } */
158
159
/* Reserved type */
160
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
161
162
/* ZN Core (HWID=0xB0) MCA types */
163
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
164
{ SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
165
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
166
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
167
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3) },
168
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
169
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5) },
170
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6) },
171
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) },
172
173
/* Data Fabric MCA types */
174
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
175
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
176
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
177
{ SMCA_MA_LLC, HWID_MCATYPE(0x2E, 0x4) },
178
179
/* Unified Memory Controller MCA type */
180
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
181
{ SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) },
182
183
/* Parameter Block MCA type */
184
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
185
186
/* Platform Security Processor MCA type */
187
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
188
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
189
190
/* System Management Unit MCA type */
191
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
192
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
193
194
/* Microprocessor 5 Unit MCA type */
195
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
196
197
/* MPDMA MCA type */
198
{ SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) },
199
200
/* Northbridge IO Unit MCA type */
201
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
202
203
/* PCI Express Unit MCA type */
204
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
205
{ SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) },
206
207
{ SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
208
{ SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) },
209
{ SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
210
{ SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) },
211
{ SMCA_USB, HWID_MCATYPE(0xAA, 0x0) },
212
{ SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) },
213
{ SMCA_USR_CP, HWID_MCATYPE(0x180, 0x0) },
214
{ SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
215
{ SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
216
{ SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },
217
{ SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
218
};
219
220
/*
221
* In SMCA enabled processors, we can have multiple banks for a given IP type.
222
* So to define a unique name for each bank, we use a temp c-string to append
223
* the MCA_IPID[InstanceId] to type's name in get_name().
224
*
225
* InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN
226
* is greater than 8 plus 1 (for underscore) plus length of longest type name.
227
*/
228
#define MAX_MCATYPE_NAME_LEN 30
229
static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
230
231
struct threshold_block {
232
/* This block's number within its bank. */
233
unsigned int block;
234
/* MCA bank number that contains this block. */
235
unsigned int bank;
236
/* CPU which controls this block's MCA bank. */
237
unsigned int cpu;
238
/* MCA_MISC MSR address for this block. */
239
u32 address;
240
/* Enable/Disable APIC interrupt. */
241
bool interrupt_enable;
242
/* Bank can generate an interrupt. */
243
bool interrupt_capable;
244
/* Value upon which threshold interrupt is generated. */
245
u16 threshold_limit;
246
/* sysfs object */
247
struct kobject kobj;
248
/* List of threshold blocks within this block's MCA bank. */
249
struct list_head miscj;
250
};
251
252
struct threshold_bank {
253
struct kobject *kobj;
254
/* List of threshold blocks within this MCA bank. */
255
struct list_head miscj;
256
};
257
258
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
259
260
/*
261
* A list of the banks enabled on each logical CPU. Controls which respective
262
* descriptors to initialize later in mce_threshold_create_device().
263
*/
264
static DEFINE_PER_CPU(u64, bank_map);
265
266
static void amd_threshold_interrupt(void);
267
static void amd_deferred_error_interrupt(void);
268
269
static void default_deferred_error_interrupt(void)
270
{
271
pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
272
}
273
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
274
275
static void smca_configure(unsigned int bank, unsigned int cpu)
276
{
277
struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
278
u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
279
const struct smca_hwid *s_hwid;
280
unsigned int i, hwid_mcatype;
281
u32 high, low;
282
u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank);
283
284
/* Set appropriate bits in MCA_CONFIG */
285
if (!rdmsr_safe(smca_config, &low, &high)) {
286
/*
287
* OS is required to set the MCAX bit to acknowledge that it is
288
* now using the new MSR ranges and new registers under each
289
* bank. It also means that the OS will configure deferred
290
* errors in the new MCx_CONFIG register. If the bit is not set,
291
* uncorrectable errors will cause a system panic.
292
*
293
* MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
294
*/
295
high |= BIT(0);
296
297
/*
298
* SMCA sets the Deferred Error Interrupt type per bank.
299
*
300
* MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
301
* if the DeferredIntType bit field is available.
302
*
303
* MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
304
* high portion of the MSR). OS should set this to 0x1 to enable
305
* APIC based interrupt. First, check that no interrupt has been
306
* set.
307
*/
308
if ((low & BIT(5)) && !((high >> 5) & 0x3) && data->dfr_intr_en) {
309
__set_bit(bank, data->dfr_intr_banks);
310
high |= BIT(5);
311
}
312
313
/*
314
* SMCA Corrected Error Interrupt
315
*
316
* MCA_CONFIG[IntPresent] is bit 10, and tells us if the bank can
317
* send an MCA Thresholding interrupt without the OS initializing
318
* this feature. This can be used if the threshold limit is managed
319
* by the platform.
320
*
321
* MCA_CONFIG[IntEn] is bit 40 (8 in the high portion of the MSR).
322
* The OS should set this to inform the platform that the OS is ready
323
* to handle the MCA Thresholding interrupt.
324
*/
325
if ((low & BIT(10)) && data->thr_intr_en) {
326
__set_bit(bank, data->thr_intr_banks);
327
high |= BIT(8);
328
}
329
330
this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
331
332
if (low & MCI_CONFIG_PADDRV)
333
this_cpu_ptr(smca_banks)[bank].paddrv = 1;
334
335
wrmsr(smca_config, low, high);
336
}
337
338
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
339
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
340
return;
341
}
342
343
hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID,
344
(high & MCI_IPID_MCATYPE) >> 16);
345
346
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
347
s_hwid = &smca_hwid_mcatypes[i];
348
349
if (hwid_mcatype == s_hwid->hwid_mcatype) {
350
this_cpu_ptr(smca_banks)[bank].hwid = s_hwid;
351
this_cpu_ptr(smca_banks)[bank].id = low;
352
this_cpu_ptr(smca_banks)[bank].sysfs_id = bank_counts[s_hwid->bank_type]++;
353
break;
354
}
355
}
356
}
357
358
struct thresh_restart {
359
struct threshold_block *b;
360
int set_lvt_off;
361
int lvt_off;
362
u16 old_limit;
363
};
364
365
static const char *bank4_names(const struct threshold_block *b)
366
{
367
switch (b->address) {
368
/* MSR4_MISC0 */
369
case 0x00000413:
370
return "dram";
371
372
case 0xc0000408:
373
return "ht_links";
374
375
case 0xc0000409:
376
return "l3_cache";
377
378
default:
379
WARN(1, "Funny MSR: 0x%08x\n", b->address);
380
return "";
381
}
382
};
383
384
385
static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
386
{
387
/*
388
* bank 4 supports APIC LVT interrupts implicitly since forever.
389
*/
390
if (bank == 4)
391
return true;
392
393
/*
394
* IntP: interrupt present; if this bit is set, the thresholding
395
* bank can generate APIC LVT interrupts
396
*/
397
return msr_high_bits & BIT(28);
398
}
399
400
static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
401
{
402
int msr = (hi & MASK_LVTOFF_HI) >> 20;
403
404
/*
405
* On SMCA CPUs, LVT offset is programmed at a different MSR, and
406
* the BIOS provides the value. The original field where LVT offset
407
* was set is reserved. Return early here:
408
*/
409
if (mce_flags.smca)
410
return false;
411
412
if (apic < 0) {
413
pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
414
"for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
415
b->bank, b->block, b->address, hi, lo);
416
return false;
417
}
418
419
if (apic != msr) {
420
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
421
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
422
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
423
return false;
424
}
425
426
return true;
427
};
428
429
/* Reprogram MCx_MISC MSR behind this threshold block. */
430
static void threshold_restart_block(void *_tr)
431
{
432
struct thresh_restart *tr = _tr;
433
u32 hi, lo;
434
435
/* sysfs write might race against an offline operation */
436
if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off)
437
return;
438
439
rdmsr(tr->b->address, lo, hi);
440
441
/*
442
* Reset error count and overflow bit.
443
* This is done during init or after handling an interrupt.
444
*/
445
if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
446
hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
447
hi |= THRESHOLD_MAX - tr->b->threshold_limit;
448
} else if (tr->old_limit) { /* change limit w/o reset */
449
int new_count = (hi & THRESHOLD_MAX) +
450
(tr->old_limit - tr->b->threshold_limit);
451
452
hi = (hi & ~MASK_ERR_COUNT_HI) |
453
(new_count & THRESHOLD_MAX);
454
}
455
456
/* clear IntType */
457
hi &= ~MASK_INT_TYPE_HI;
458
459
if (!tr->b->interrupt_capable)
460
goto done;
461
462
if (tr->set_lvt_off) {
463
if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
464
/* set new lvt offset */
465
hi &= ~MASK_LVTOFF_HI;
466
hi |= tr->lvt_off << 20;
467
}
468
}
469
470
if (tr->b->interrupt_enable)
471
hi |= INT_TYPE_APIC;
472
473
done:
474
475
hi |= MASK_COUNT_EN_HI;
476
wrmsr(tr->b->address, lo, hi);
477
}
478
479
static void threshold_restart_bank(unsigned int bank, bool intr_en)
480
{
481
struct threshold_bank **thr_banks = this_cpu_read(threshold_banks);
482
struct threshold_block *block, *tmp;
483
struct thresh_restart tr;
484
485
if (!thr_banks || !thr_banks[bank])
486
return;
487
488
memset(&tr, 0, sizeof(tr));
489
490
list_for_each_entry_safe(block, tmp, &thr_banks[bank]->miscj, miscj) {
491
tr.b = block;
492
tr.b->interrupt_enable = intr_en;
493
threshold_restart_block(&tr);
494
}
495
}
496
497
/* Try to use the threshold limit reported through APEI. */
498
static u16 get_thr_limit(void)
499
{
500
u32 thr_limit = mce_get_apei_thr_limit();
501
502
/* Fallback to old default if APEI limit is not available. */
503
if (!thr_limit)
504
return THRESHOLD_MAX;
505
506
return min(thr_limit, THRESHOLD_MAX);
507
}
508
509
static void mce_threshold_block_init(struct threshold_block *b, int offset)
510
{
511
struct thresh_restart tr = {
512
.b = b,
513
.set_lvt_off = 1,
514
.lvt_off = offset,
515
};
516
517
b->threshold_limit = get_thr_limit();
518
threshold_restart_block(&tr);
519
};
520
521
static int setup_APIC_mce_threshold(int reserved, int new)
522
{
523
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
524
APIC_EILVT_MSG_FIX, 0))
525
return new;
526
527
return reserved;
528
}
529
530
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
531
unsigned int bank, unsigned int block,
532
unsigned int cpu)
533
{
534
u32 addr = 0, offset = 0;
535
536
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
537
return addr;
538
539
if (mce_flags.smca) {
540
if (!block)
541
return MSR_AMD64_SMCA_MCx_MISC(bank);
542
543
if (!(low & MASK_BLKPTR_LO))
544
return 0;
545
546
return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
547
}
548
549
/* Fall back to method we used for older processors: */
550
switch (block) {
551
case 0:
552
addr = mca_msr_reg(bank, MCA_MISC);
553
break;
554
case 1:
555
offset = ((low & MASK_BLKPTR_LO) >> 21);
556
if (offset)
557
addr = MCG_XBLK_ADDR + offset;
558
break;
559
default:
560
addr = ++current_addr;
561
}
562
return addr;
563
}
564
565
static int prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
566
int offset, u32 misc_high)
567
{
568
unsigned int cpu = smp_processor_id();
569
struct threshold_block b;
570
int new;
571
572
if (!block)
573
per_cpu(bank_map, cpu) |= BIT_ULL(bank);
574
575
memset(&b, 0, sizeof(b));
576
b.cpu = cpu;
577
b.bank = bank;
578
b.block = block;
579
b.address = addr;
580
b.interrupt_capable = lvt_interrupt_supported(bank, misc_high);
581
582
if (!b.interrupt_capable)
583
goto done;
584
585
__set_bit(bank, this_cpu_ptr(&mce_amd_data)->thr_intr_banks);
586
b.interrupt_enable = 1;
587
588
if (mce_flags.smca)
589
goto done;
590
591
new = (misc_high & MASK_LVTOFF_HI) >> 20;
592
offset = setup_APIC_mce_threshold(offset, new);
593
if (offset == new)
594
thresholding_irq_en = true;
595
596
done:
597
mce_threshold_block_init(&b, offset);
598
599
return offset;
600
}
601
602
bool amd_filter_mce(struct mce *m)
603
{
604
enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
605
struct cpuinfo_x86 *c = &boot_cpu_data;
606
607
/* See Family 17h Models 10h-2Fh Erratum #1114. */
608
if (c->x86 == 0x17 &&
609
c->x86_model >= 0x10 && c->x86_model <= 0x2F &&
610
bank_type == SMCA_IF && XEC(m->status, 0x3f) == 10)
611
return true;
612
613
/* NB GART TLB error reporting is disabled by default. */
614
if (c->x86 < 0x17) {
615
if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5)
616
return true;
617
}
618
619
return false;
620
}
621
622
/*
623
* Turn off thresholding banks for the following conditions:
624
* - MC4_MISC thresholding is not supported on Family 0x15.
625
* - Prevent possible spurious interrupts from the IF bank on Family 0x17
626
* Models 0x10-0x2F due to Erratum #1114.
627
*/
628
static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
629
{
630
int i, num_msrs;
631
u64 hwcr;
632
bool need_toggle;
633
u32 msrs[NR_BLOCKS];
634
635
if (c->x86 == 0x15 && bank == 4) {
636
msrs[0] = 0x00000413; /* MC4_MISC0 */
637
msrs[1] = 0xc0000408; /* MC4_MISC1 */
638
num_msrs = 2;
639
} else if (c->x86 == 0x17 &&
640
(c->x86_model >= 0x10 && c->x86_model <= 0x2F)) {
641
642
if (smca_get_bank_type(smp_processor_id(), bank) != SMCA_IF)
643
return;
644
645
msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank);
646
num_msrs = 1;
647
} else {
648
return;
649
}
650
651
rdmsrq(MSR_K7_HWCR, hwcr);
652
653
/* McStatusWrEn has to be set */
654
need_toggle = !(hwcr & BIT(18));
655
if (need_toggle)
656
wrmsrq(MSR_K7_HWCR, hwcr | BIT(18));
657
658
/* Clear CntP bit safely */
659
for (i = 0; i < num_msrs; i++)
660
msr_clear_bit(msrs[i], 62);
661
662
/* restore old settings */
663
if (need_toggle)
664
wrmsrq(MSR_K7_HWCR, hwcr);
665
}
666
667
static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
668
{
669
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
670
671
/* This should be disabled by the BIOS, but isn't always */
672
if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
673
/*
674
* disable GART TBL walk error reporting, which
675
* trips off incorrectly with the IOMMU & 3ware
676
* & Cerberus:
677
*/
678
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
679
}
680
681
/*
682
* Various K7s with broken bank 0 around. Always disable
683
* by default.
684
*/
685
if (c->x86 == 6 && this_cpu_read(mce_num_banks))
686
mce_banks[0].ctl = 0;
687
}
688
689
/*
690
* Enable the APIC LVT interrupt vectors once per-CPU. This should be done before hardware is
691
* ready to send interrupts.
692
*
693
* Individual error sources are enabled later during per-bank init.
694
*/
695
static void smca_enable_interrupt_vectors(void)
696
{
697
struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
698
u64 mca_intr_cfg, offset;
699
700
if (!mce_flags.smca || !mce_flags.succor)
701
return;
702
703
if (rdmsrq_safe(MSR_CU_DEF_ERR, &mca_intr_cfg))
704
return;
705
706
offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12;
707
if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0))
708
data->thr_intr_en = 1;
709
710
offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4;
711
if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_EILVT_MSG_FIX, 0))
712
data->dfr_intr_en = 1;
713
}
714
715
/* cpu init entry point, called from mce.c with preempt off */
716
void mce_amd_feature_init(struct cpuinfo_x86 *c)
717
{
718
unsigned int bank, block, cpu = smp_processor_id();
719
u32 low = 0, high = 0, address = 0;
720
int offset = -1;
721
722
amd_apply_cpu_quirks(c);
723
724
mce_flags.amd_threshold = 1;
725
726
smca_enable_interrupt_vectors();
727
728
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
729
if (mce_flags.smca) {
730
smca_configure(bank, cpu);
731
732
if (!this_cpu_ptr(&mce_amd_data)->thr_intr_en)
733
continue;
734
}
735
736
disable_err_thresholding(c, bank);
737
738
for (block = 0; block < NR_BLOCKS; ++block) {
739
address = get_block_address(address, low, high, bank, block, cpu);
740
if (!address)
741
break;
742
743
if (rdmsr_safe(address, &low, &high))
744
break;
745
746
if (!(high & MASK_VALID_HI))
747
continue;
748
749
if (!(high & MASK_CNTP_HI) ||
750
(high & MASK_LOCKED_HI))
751
continue;
752
753
offset = prepare_threshold_block(bank, block, address, offset, high);
754
}
755
}
756
}
757
758
void smca_bsp_init(void)
759
{
760
mce_threshold_vector = amd_threshold_interrupt;
761
deferred_error_int_vector = amd_deferred_error_interrupt;
762
}
763
764
/*
765
* DRAM ECC errors are reported in the Northbridge (bank 4) with
766
* Extended Error Code 8.
767
*/
768
static bool legacy_mce_is_memory_error(struct mce *m)
769
{
770
return m->bank == 4 && XEC(m->status, 0x1f) == 8;
771
}
772
773
/*
774
* DRAM ECC errors are reported in Unified Memory Controllers with
775
* Extended Error Code 0.
776
*/
777
static bool smca_mce_is_memory_error(struct mce *m)
778
{
779
enum smca_bank_types bank_type;
780
781
if (XEC(m->status, 0x3f))
782
return false;
783
784
bank_type = smca_get_bank_type(m->extcpu, m->bank);
785
786
return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
787
}
788
789
bool amd_mce_is_memory_error(struct mce *m)
790
{
791
if (mce_flags.smca)
792
return smca_mce_is_memory_error(m);
793
else
794
return legacy_mce_is_memory_error(m);
795
}
796
797
/*
798
* Some AMD systems have an explicit indicator that the value in MCA_ADDR is a
799
* system physical address. Individual cases though, need to be detected for
800
* other systems. Future cases will be added as needed.
801
*
802
* 1) General case
803
* a) Assume address is not usable.
804
* 2) Poison errors
805
* a) Indicated by MCA_STATUS[43]: poison. Defined for all banks except legacy
806
* northbridge (bank 4).
807
* b) Refers to poison consumption in the core. Does not include "no action",
808
* "action optional", or "deferred" error severities.
809
* c) Will include a usable address so that immediate action can be taken.
810
* 3) Northbridge DRAM ECC errors
811
* a) Reported in legacy bank 4 with extended error code (XEC) 8.
812
* b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore,
813
* this bit should not be checked.
814
* 4) MCI_STATUS_PADDRVAL is set
815
* a) Will provide a valid system physical address.
816
*
817
* NOTE: SMCA UMC memory errors fall into case #1.
818
*/
819
bool amd_mce_usable_address(struct mce *m)
820
{
821
/* Check special northbridge case 3) first. */
822
if (!mce_flags.smca) {
823
if (legacy_mce_is_memory_error(m))
824
return true;
825
else if (m->bank == 4)
826
return false;
827
}
828
829
if (this_cpu_ptr(smca_banks)[m->bank].paddrv)
830
return m->status & MCI_STATUS_PADDRV;
831
832
/* Check poison bit for all other bank types. */
833
if (m->status & MCI_STATUS_POISON)
834
return true;
835
836
/* Assume address is not usable for all others. */
837
return false;
838
}
839
840
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
841
{
842
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
843
inc_irq_stat(irq_deferred_error_count);
844
deferred_error_int_vector();
845
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
846
apic_eoi();
847
}
848
849
/* APIC interrupt handler for deferred errors */
850
static void amd_deferred_error_interrupt(void)
851
{
852
machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
853
}
854
855
void mce_amd_handle_storm(unsigned int bank, bool on)
856
{
857
threshold_restart_bank(bank, on);
858
}
859
860
static void amd_reset_thr_limit(unsigned int bank)
861
{
862
threshold_restart_bank(bank, true);
863
}
864
865
/*
866
* Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
867
* goes off when error_count reaches threshold_limit.
868
*/
869
static void amd_threshold_interrupt(void)
870
{
871
machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->thr_intr_banks);
872
}
873
874
void amd_clear_bank(struct mce *m)
875
{
876
amd_reset_thr_limit(m->bank);
877
878
/* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */
879
if (m->status & MCI_STATUS_DEFERRED)
880
mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
881
882
/* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
883
if (m->kflags & MCE_CHECK_DFR_REGS)
884
return;
885
886
mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
887
}
888
889
/*
890
* Sysfs Interface
891
*/
892
893
struct threshold_attr {
894
struct attribute attr;
895
ssize_t (*show) (struct threshold_block *, char *);
896
ssize_t (*store) (struct threshold_block *, const char *, size_t count);
897
};
898
899
#define SHOW_FIELDS(name) \
900
static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
901
{ \
902
return sprintf(buf, "%lu\n", (unsigned long) b->name); \
903
}
904
SHOW_FIELDS(interrupt_enable)
905
SHOW_FIELDS(threshold_limit)
906
907
static ssize_t
908
store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
909
{
910
struct thresh_restart tr;
911
unsigned long new;
912
913
if (!b->interrupt_capable)
914
return -EINVAL;
915
916
if (kstrtoul(buf, 0, &new) < 0)
917
return -EINVAL;
918
919
b->interrupt_enable = !!new;
920
921
memset(&tr, 0, sizeof(tr));
922
tr.b = b;
923
924
if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
925
return -ENODEV;
926
927
return size;
928
}
929
930
static ssize_t
931
store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
932
{
933
struct thresh_restart tr;
934
unsigned long new;
935
936
if (kstrtoul(buf, 0, &new) < 0)
937
return -EINVAL;
938
939
if (new > THRESHOLD_MAX)
940
new = THRESHOLD_MAX;
941
if (new < 1)
942
new = 1;
943
944
memset(&tr, 0, sizeof(tr));
945
tr.old_limit = b->threshold_limit;
946
b->threshold_limit = new;
947
tr.b = b;
948
949
if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
950
return -ENODEV;
951
952
return size;
953
}
954
955
static ssize_t show_error_count(struct threshold_block *b, char *buf)
956
{
957
u32 lo, hi;
958
959
/* CPU might be offline by now */
960
if (rdmsr_on_cpu(b->cpu, b->address, &lo, &hi))
961
return -ENODEV;
962
963
return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
964
(THRESHOLD_MAX - b->threshold_limit)));
965
}
966
967
static struct threshold_attr error_count = {
968
.attr = {.name = __stringify(error_count), .mode = 0444 },
969
.show = show_error_count,
970
};
971
972
#define RW_ATTR(val) \
973
static struct threshold_attr val = { \
974
.attr = {.name = __stringify(val), .mode = 0644 }, \
975
.show = show_## val, \
976
.store = store_## val, \
977
};
978
979
RW_ATTR(interrupt_enable);
980
RW_ATTR(threshold_limit);
981
982
static struct attribute *default_attrs[] = {
983
&threshold_limit.attr,
984
&error_count.attr,
985
NULL, /* possibly interrupt_enable if supported, see below */
986
NULL,
987
};
988
ATTRIBUTE_GROUPS(default);
989
990
#define to_block(k) container_of(k, struct threshold_block, kobj)
991
#define to_attr(a) container_of(a, struct threshold_attr, attr)
992
993
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
994
{
995
struct threshold_block *b = to_block(kobj);
996
struct threshold_attr *a = to_attr(attr);
997
ssize_t ret;
998
999
ret = a->show ? a->show(b, buf) : -EIO;
1000
1001
return ret;
1002
}
1003
1004
static ssize_t store(struct kobject *kobj, struct attribute *attr,
1005
const char *buf, size_t count)
1006
{
1007
struct threshold_block *b = to_block(kobj);
1008
struct threshold_attr *a = to_attr(attr);
1009
ssize_t ret;
1010
1011
ret = a->store ? a->store(b, buf, count) : -EIO;
1012
1013
return ret;
1014
}
1015
1016
static const struct sysfs_ops threshold_ops = {
1017
.show = show,
1018
.store = store,
1019
};
1020
1021
static void threshold_block_release(struct kobject *kobj);
1022
1023
static const struct kobj_type threshold_ktype = {
1024
.sysfs_ops = &threshold_ops,
1025
.default_groups = default_groups,
1026
.release = threshold_block_release,
1027
};
1028
1029
static const char *get_name(unsigned int cpu, unsigned int bank, struct threshold_block *b)
1030
{
1031
enum smca_bank_types bank_type;
1032
1033
if (!mce_flags.smca) {
1034
if (b && bank == 4)
1035
return bank4_names(b);
1036
1037
return th_names[bank];
1038
}
1039
1040
bank_type = smca_get_bank_type(cpu, bank);
1041
1042
if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
1043
if (b->block < ARRAY_SIZE(smca_umc_block_names))
1044
return smca_umc_block_names[b->block];
1045
}
1046
1047
if (b && b->block) {
1048
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
1049
return buf_mcatype;
1050
}
1051
1052
if (bank_type >= N_SMCA_BANK_TYPES) {
1053
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
1054
return buf_mcatype;
1055
}
1056
1057
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
1058
return smca_get_name(bank_type);
1059
1060
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
1061
"%s_%u", smca_get_name(bank_type),
1062
per_cpu(smca_banks, cpu)[bank].sysfs_id);
1063
return buf_mcatype;
1064
}
1065
1066
static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb,
1067
unsigned int bank, unsigned int block,
1068
u32 address)
1069
{
1070
struct threshold_block *b = NULL;
1071
u32 low, high;
1072
int err;
1073
1074
if ((bank >= this_cpu_read(mce_num_banks)) || (block >= NR_BLOCKS))
1075
return 0;
1076
1077
if (rdmsr_safe(address, &low, &high))
1078
return 0;
1079
1080
if (!(high & MASK_VALID_HI)) {
1081
if (block)
1082
goto recurse;
1083
else
1084
return 0;
1085
}
1086
1087
if (!(high & MASK_CNTP_HI) ||
1088
(high & MASK_LOCKED_HI))
1089
goto recurse;
1090
1091
b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
1092
if (!b)
1093
return -ENOMEM;
1094
1095
b->block = block;
1096
b->bank = bank;
1097
b->cpu = cpu;
1098
b->address = address;
1099
b->interrupt_enable = 0;
1100
b->interrupt_capable = lvt_interrupt_supported(bank, high);
1101
b->threshold_limit = get_thr_limit();
1102
1103
if (b->interrupt_capable) {
1104
default_attrs[2] = &interrupt_enable.attr;
1105
b->interrupt_enable = 1;
1106
} else {
1107
default_attrs[2] = NULL;
1108
}
1109
1110
list_add(&b->miscj, &tb->miscj);
1111
1112
mce_threshold_block_init(b, (high & MASK_LVTOFF_HI) >> 20);
1113
1114
err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
1115
if (err)
1116
goto out_free;
1117
recurse:
1118
address = get_block_address(address, low, high, bank, ++block, cpu);
1119
if (!address)
1120
return 0;
1121
1122
err = allocate_threshold_blocks(cpu, tb, bank, block, address);
1123
if (err)
1124
goto out_free;
1125
1126
if (b)
1127
kobject_uevent(&b->kobj, KOBJ_ADD);
1128
1129
return 0;
1130
1131
out_free:
1132
if (b) {
1133
list_del(&b->miscj);
1134
kobject_put(&b->kobj);
1135
}
1136
return err;
1137
}
1138
1139
static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
1140
unsigned int bank)
1141
{
1142
struct device *dev = this_cpu_read(mce_device);
1143
struct threshold_bank *b = NULL;
1144
const char *name = get_name(cpu, bank, NULL);
1145
int err = 0;
1146
1147
if (!dev)
1148
return -ENODEV;
1149
1150
b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
1151
if (!b) {
1152
err = -ENOMEM;
1153
goto out;
1154
}
1155
1156
/* Associate the bank with the per-CPU MCE device */
1157
b->kobj = kobject_create_and_add(name, &dev->kobj);
1158
if (!b->kobj) {
1159
err = -EINVAL;
1160
goto out_free;
1161
}
1162
1163
INIT_LIST_HEAD(&b->miscj);
1164
1165
err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
1166
if (err)
1167
goto out_kobj;
1168
1169
bp[bank] = b;
1170
return 0;
1171
1172
out_kobj:
1173
kobject_put(b->kobj);
1174
out_free:
1175
kfree(b);
1176
out:
1177
return err;
1178
}
1179
1180
static void threshold_block_release(struct kobject *kobj)
1181
{
1182
kfree(to_block(kobj));
1183
}
1184
1185
static void threshold_remove_bank(struct threshold_bank *bank)
1186
{
1187
struct threshold_block *pos, *tmp;
1188
1189
list_for_each_entry_safe(pos, tmp, &bank->miscj, miscj) {
1190
list_del(&pos->miscj);
1191
kobject_put(&pos->kobj);
1192
}
1193
1194
kobject_put(bank->kobj);
1195
kfree(bank);
1196
}
1197
1198
static void __threshold_remove_device(struct threshold_bank **bp)
1199
{
1200
unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
1201
1202
for (bank = 0; bank < numbanks; bank++) {
1203
if (!bp[bank])
1204
continue;
1205
1206
threshold_remove_bank(bp[bank]);
1207
bp[bank] = NULL;
1208
}
1209
kfree(bp);
1210
}
1211
1212
void mce_threshold_remove_device(unsigned int cpu)
1213
{
1214
struct threshold_bank **bp = this_cpu_read(threshold_banks);
1215
1216
if (!bp)
1217
return;
1218
1219
/*
1220
* Clear the pointer before cleaning up, so that the interrupt won't
1221
* touch anything of this.
1222
*/
1223
this_cpu_write(threshold_banks, NULL);
1224
1225
__threshold_remove_device(bp);
1226
return;
1227
}
1228
1229
/**
1230
* mce_threshold_create_device - Create the per-CPU MCE threshold device
1231
* @cpu: The plugged in CPU
1232
*
1233
* Create directories and files for all valid threshold banks.
1234
*
1235
* This is invoked from the CPU hotplug callback which was installed in
1236
* mcheck_init_device(). The invocation happens in context of the hotplug
1237
* thread running on @cpu. The callback is invoked on all CPUs which are
1238
* online when the callback is installed or during a real hotplug event.
1239
*/
1240
void mce_threshold_create_device(unsigned int cpu)
1241
{
1242
unsigned int numbanks, bank;
1243
struct threshold_bank **bp;
1244
1245
if (!mce_flags.amd_threshold)
1246
return;
1247
1248
bp = this_cpu_read(threshold_banks);
1249
if (bp)
1250
return;
1251
1252
numbanks = this_cpu_read(mce_num_banks);
1253
bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL);
1254
if (!bp)
1255
return;
1256
1257
for (bank = 0; bank < numbanks; ++bank) {
1258
if (!(this_cpu_read(bank_map) & BIT_ULL(bank)))
1259
continue;
1260
if (threshold_create_bank(bp, cpu, bank)) {
1261
__threshold_remove_device(bp);
1262
return;
1263
}
1264
}
1265
this_cpu_write(threshold_banks, bp);
1266
1267
if (thresholding_irq_en)
1268
mce_threshold_vector = amd_threshold_interrupt;
1269
return;
1270
}
1271
1272