CoCalc -- severity.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/mce/severity.c
⁵¹⁴⁸¹ views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
 * MCE grading rules.
4
 * Copyright 2008, 2009 Intel Corporation.
5
 *
6
 * Author: Andi Kleen
7
 */
8
#include <linux/kernel.h>
9
#include <linux/seq_file.h>
10
#include <linux/init.h>
11
#include <linux/debugfs.h>
12
#include <linux/uaccess.h>
13

14
#include <asm/mce.h>
15
#include <asm/cpu_device_id.h>
16
#include <asm/traps.h>
17
#include <asm/insn.h>
18
#include <asm/insn-eval.h>
19

20
#include "internal.h"
21

22
/*
23
 * Grade an mce by severity. In general the most severe ones are processed
24
 * first. Since there are quite a lot of combinations test the bits in a
25
 * table-driven way. The rules are simply processed in order, first
26
 * match wins.
27
 *
28
 * Note this is only used for machine check exceptions, the corrected
29
 * errors use much simpler rules. The exceptions still check for the corrected
30
 * errors, but only to leave them alone for the CMCI handler (except for
31
 * panic situations)
32
 */
33

34
enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
35
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
36
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
37

38
static struct severity {
39
	u64 mask;
40
	u64 result;
41
	unsigned char sev;
42
	unsigned short mcgmask;
43
	unsigned short mcgres;
44
	unsigned char ser;
45
	unsigned char context;
46
	unsigned char excp;
47
	unsigned char covered;
48
	unsigned int cpu_vfm;
49
	unsigned char cpu_minstepping;
50
	unsigned char bank_lo, bank_hi;
51
	char *msg;
52
} severities[] = {
53
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
54
#define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
55
#define VFM_STEPPING(m, s) .cpu_vfm = m, .cpu_minstepping = s
56
#define  KERNEL		.context = IN_KERNEL
57
#define  USER		.context = IN_USER
58
#define  KERNEL_RECOV	.context = IN_KERNEL_RECOV
59
#define  SER		.ser = SER_REQUIRED
60
#define  NOSER		.ser = NO_SER
61
#define  EXCP		.excp = EXCP_CONTEXT
62
#define  NOEXCP		.excp = NO_EXCP
63
#define  BITCLR(x)	.mask = x, .result = 0
64
#define  BITSET(x)	.mask = x, .result = x
65
#define  MCGMASK(x, y)	.mcgmask = x, .mcgres = y
66
#define  MASK(x, y)	.mask = x, .result = y
67
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
68
#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
69
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
70
#define	MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
71

72
	MCESEV(
73
		NO, "Invalid",
74
		BITCLR(MCI_STATUS_VAL)
75
		),
76
	MCESEV(
77
		NO, "Not enabled",
78
		EXCP, BITCLR(MCI_STATUS_EN)
79
		),
80
	MCESEV(
81
		PANIC, "Processor context corrupt",
82
		BITSET(MCI_STATUS_PCC)
83
		),
84
	/* When MCIP is not set something is very confused */
85
	MCESEV(
86
		PANIC, "MCIP not set in MCA handler",
87
		EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
88
		),
89
	/* Neither return not error IP -- no chance to recover -> PANIC */
90
	MCESEV(
91
		PANIC, "Neither restart nor error IP",
92
		EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
93
		),
94
	MCESEV(
95
		PANIC, "In kernel and no restart IP",
96
		EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
97
		),
98
	MCESEV(
99
		PANIC, "In kernel and no restart IP",
100
		EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
101
		),
102
	MCESEV(
103
		KEEP, "Corrected error",
104
		NOSER, BITCLR(MCI_STATUS_UC)
105
		),
106
	/*
107
	 * known AO MCACODs reported via MCE or CMC:
108
	 *
109
	 * SRAO could be signaled either via a machine check exception or
110
	 * CMCI with the corresponding bit S 1 or 0. So we don't need to
111
	 * check bit S for SRAO.
112
	 */
113
	MCESEV(
114
		AO, "Action optional: memory scrubbing error",
115
		SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
116
		),
117
	MCESEV(
118
		AO, "Action optional: last level cache writeback error",
119
		SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
120
		),
121
	/*
122
	 * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured
123
	 * to report uncorrected errors using CMCI with a special signature.
124
	 * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported
125
	 * in one of the memory controller banks.
126
	 * Set severity to "AO" for same action as normal patrol scrub error.
127
	 */
128
	MCESEV(
129
		AO, "Uncorrected Patrol Scrub Error",
130
		SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0),
131
		VFM_STEPPING(INTEL_SKYLAKE_X, 4), BANK_RANGE(13, 18)
132
	),
133

134
	/* ignore OVER for UCNA */
135
	MCESEV(
136
		UCNA, "Uncorrected no action required",
137
		SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
138
		),
139
	MCESEV(
140
		PANIC, "Illegal combination (UCNA with AR=1)",
141
		SER,
142
		MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
143
		),
144
	MCESEV(
145
		KEEP, "Non signaled machine check",
146
		SER, BITCLR(MCI_STATUS_S)
147
		),
148

149
	MCESEV(
150
		PANIC, "Action required with lost events",
151
		SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
152
		),
153

154
	/* known AR MCACODs: */
155
#ifdef	CONFIG_MEMORY_FAILURE
156
	MCESEV(
157
		KEEP, "Action required but unaffected thread is continuable",
158
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
159
		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
160
		),
161
	MCESEV(
162
		AR, "Action required: data load in error recoverable area of kernel",
163
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
164
		KERNEL_RECOV
165
		),
166
	MCESEV(
167
		AR, "Action required: data load error in a user process",
168
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
169
		USER
170
		),
171
	MCESEV(
172
		AR, "Action required: instruction fetch error in a user process",
173
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
174
		USER
175
		),
176
	MCESEV(
177
		AR, "Data load error in SEAM non-root mode",
178
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
179
		MCGMASK(MCG_STATUS_SEAM_NR, MCG_STATUS_SEAM_NR),
180
		KERNEL
181
		),
182
	MCESEV(
183
		AR, "Instruction fetch error in SEAM non-root mode",
184
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
185
		MCGMASK(MCG_STATUS_SEAM_NR, MCG_STATUS_SEAM_NR),
186
		KERNEL
187
		),
188
	MCESEV(
189
		PANIC, "Data load in unrecoverable area of kernel",
190
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
191
		KERNEL
192
		),
193
	MCESEV(
194
		PANIC, "Instruction fetch error in kernel",
195
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
196
		KERNEL
197
		),
198
#endif
199
	MCESEV(
200
		PANIC, "Action required: unknown MCACOD",
201
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
202
		),
203

204
	MCESEV(
205
		SOME, "Action optional: unknown MCACOD",
206
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
207
		),
208
	MCESEV(
209
		SOME, "Action optional with lost events",
210
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
211
		),
212

213
	MCESEV(
214
		PANIC, "Overflowed uncorrected",
215
		BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
216
		),
217
	MCESEV(
218
		PANIC, "Uncorrected in kernel",
219
		BITSET(MCI_STATUS_UC),
220
		KERNEL
221
		),
222
	MCESEV(
223
		UC, "Uncorrected",
224
		BITSET(MCI_STATUS_UC)
225
		),
226
	MCESEV(
227
		SOME, "No match",
228
		BITSET(0)
229
		)	/* always matches. keep at end */
230
};
231

232
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
233
				(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
234

235
static bool is_copy_from_user(struct pt_regs *regs)
236
{
237
	u8 insn_buf[MAX_INSN_SIZE];
238
	unsigned long addr;
239
	struct insn insn;
240
	int ret;
241

242
	if (!regs)
243
		return false;
244

245
	if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
246
		return false;
247

248
	ret = insn_decode_kernel(&insn, insn_buf);
249
	if (ret < 0)
250
		return false;
251

252
	switch (insn.opcode.value) {
253
	/* MOV mem,reg */
254
	case 0x8A: case 0x8B:
255
	/* MOVZ mem,reg */
256
	case 0xB60F: case 0xB70F:
257
		addr = (unsigned long)insn_get_addr_ref(&insn, regs);
258
		break;
259
	/* REP MOVS */
260
	case 0xA4: case 0xA5:
261
		addr = regs->si;
262
		break;
263
	default:
264
		return false;
265
	}
266

267
	if (fault_in_kernel_space(addr))
268
		return false;
269

270
	current->mce_vaddr = (void __user *)addr;
271

272
	return true;
273
}
274

275
/*
276
 * If mcgstatus indicated that ip/cs on the stack were
277
 * no good, then "m->cs" will be zero and we will have
278
 * to assume the worst case (IN_KERNEL) as we actually
279
 * have no idea what we were executing when the machine
280
 * check hit.
281
 * If we do have a good "m->cs" (or a faked one in the
282
 * case we were executing in VM86 mode) we can use it to
283
 * distinguish an exception taken in user from from one
284
 * taken in the kernel.
285
 */
286
static noinstr int error_context(struct mce *m, struct pt_regs *regs)
287
{
288
	int fixup_type;
289
	bool copy_user;
290

291
	if ((m->cs & 3) == 3)
292
		return IN_USER;
293

294
	if (!mc_recoverable(m->mcgstatus))
295
		return IN_KERNEL;
296

297
	/* Allow instrumentation around external facilities usage. */
298
	instrumentation_begin();
299
	fixup_type = ex_get_fixup_type(m->ip);
300
	copy_user  = is_copy_from_user(regs);
301
	instrumentation_end();
302

303
	if (copy_user) {
304
		m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
305
		return IN_KERNEL_RECOV;
306
	}
307

308
	switch (fixup_type) {
309
	case EX_TYPE_FAULT_MCE_SAFE:
310
	case EX_TYPE_DEFAULT_MCE_SAFE:
311
		m->kflags |= MCE_IN_KERNEL_RECOV;
312
		return IN_KERNEL_RECOV;
313

314
	default:
315
		return IN_KERNEL;
316
	}
317
}
318

319
/* See AMD PPR(s) section Machine Check Error Handling. */
320
static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
321
{
322
	char *panic_msg = NULL;
323
	int ret;
324

325
	/*
326
	 * Default return value: Action required, the error must be handled
327
	 * immediately.
328
	 */
329
	ret = MCE_AR_SEVERITY;
330

331
	/* Processor Context Corrupt, no need to fumble too much, die! */
332
	if (m->status & MCI_STATUS_PCC) {
333
		panic_msg = "Processor Context Corrupt";
334
		ret = MCE_PANIC_SEVERITY;
335
		goto out;
336
	}
337

338
	if (m->status & MCI_STATUS_DEFERRED) {
339
		ret = MCE_DEFERRED_SEVERITY;
340
		goto out;
341
	}
342

343
	/*
344
	 * If the UC bit is not set, the system either corrected or deferred
345
	 * the error. No action will be required after logging the error.
346
	 */
347
	if (!(m->status & MCI_STATUS_UC)) {
348
		ret = MCE_KEEP_SEVERITY;
349
		goto out;
350
	}
351

352
	/*
353
	 * On MCA overflow, without the MCA overflow recovery feature the
354
	 * system will not be able to recover, panic.
355
	 */
356
	if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) {
357
		panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery";
358
		ret = MCE_PANIC_SEVERITY;
359
		goto out;
360
	}
361

362
	if (!mce_flags.succor) {
363
		panic_msg = "Uncorrected error without MCA Recovery";
364
		ret = MCE_PANIC_SEVERITY;
365
		goto out;
366
	}
367

368
	if (error_context(m, regs) == IN_KERNEL) {
369
		panic_msg = "Uncorrected unrecoverable error in kernel context";
370
		ret = MCE_PANIC_SEVERITY;
371
	}
372

373
out:
374
	if (msg && panic_msg)
375
		*msg = panic_msg;
376

377
	return ret;
378
}
379

380
static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
381
{
382
	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
383
	enum context ctx = error_context(m, regs);
384
	struct severity *s;
385

386
	for (s = severities;; s++) {
387
		if ((m->status & s->mask) != s->result)
388
			continue;
389
		if ((m->mcgstatus & s->mcgmask) != s->mcgres)
390
			continue;
391
		if (s->ser == SER_REQUIRED && !mca_cfg.ser)
392
			continue;
393
		if (s->ser == NO_SER && mca_cfg.ser)
394
			continue;
395
		if (s->context && ctx != s->context)
396
			continue;
397
		if (s->excp && excp != s->excp)
398
			continue;
399
		if (s->cpu_vfm && boot_cpu_data.x86_vfm != s->cpu_vfm)
400
			continue;
401
		if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping)
402
			continue;
403
		if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi))
404
			continue;
405
		if (msg)
406
			*msg = s->msg;
407
		s->covered = 1;
408

409
		return s->sev;
410
	}
411
}
412

413
int noinstr mce_severity(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
414
{
415
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
416
	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
417
		return mce_severity_amd(m, regs, msg, is_excp);
418
	else
419
		return mce_severity_intel(m, regs, msg, is_excp);
420
}
421

422
#ifdef CONFIG_DEBUG_FS
423
static void *s_start(struct seq_file *f, loff_t *pos)
424
{
425
	if (*pos >= ARRAY_SIZE(severities))
426
		return NULL;
427
	return &severities[*pos];
428
}
429

430
static void *s_next(struct seq_file *f, void *data, loff_t *pos)
431
{
432
	if (++(*pos) >= ARRAY_SIZE(severities))
433
		return NULL;
434
	return &severities[*pos];
435
}
436

437
static void s_stop(struct seq_file *f, void *data)
438
{
439
}
440

441
static int s_show(struct seq_file *f, void *data)
442
{
443
	struct severity *ser = data;
444
	seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
445
	return 0;
446
}
447

448
static const struct seq_operations severities_seq_ops = {
449
	.start	= s_start,
450
	.next	= s_next,
451
	.stop	= s_stop,
452
	.show	= s_show,
453
};
454

455
static int severities_coverage_open(struct inode *inode, struct file *file)
456
{
457
	return seq_open(file, &severities_seq_ops);
458
}
459

460
static ssize_t severities_coverage_write(struct file *file,
461
					 const char __user *ubuf,
462
					 size_t count, loff_t *ppos)
463
{
464
	int i;
465
	for (i = 0; i < ARRAY_SIZE(severities); i++)
466
		severities[i].covered = 0;
467
	return count;
468
}
469

470
static const struct file_operations severities_coverage_fops = {
471
	.open		= severities_coverage_open,
472
	.release	= seq_release,
473
	.read		= seq_read,
474
	.write		= severities_coverage_write,
475
	.llseek		= seq_lseek,
476
};
477

478
static int __init severities_debugfs_init(void)
479
{
480
	struct dentry *dmce;
481

482
	dmce = mce_get_debugfs_dir();
483

484
	debugfs_create_file("severities-coverage", 0444, dmce, NULL,
485
			    &severities_coverage_fops);
486
	return 0;
487
}
488
late_initcall(severities_debugfs_init);
489
#endif /* CONFIG_DEBUG_FS */
490

491
Product

Resources

Company