Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/mce/apei.c
26516 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Bridge between MCE and APEI
4
*
5
* On some machine, corrected memory errors are reported via APEI
6
* generic hardware error source (GHES) instead of corrected Machine
7
* Check. These corrected memory errors can be reported to user space
8
* through /dev/mcelog via faking a corrected Machine Check, so that
9
* the error memory page can be offlined by /sbin/mcelog if the error
10
* count for one page is beyond the threshold.
11
*
12
* For fatal MCE, save MCE record into persistent storage via ERST, so
13
* that the MCE record can be logged after reboot via ERST.
14
*
15
* Copyright 2010 Intel Corp.
16
* Author: Huang Ying <[email protected]>
17
*/
18
19
#include <linux/export.h>
20
#include <linux/kernel.h>
21
#include <linux/acpi.h>
22
#include <linux/cper.h>
23
#include <acpi/apei.h>
24
#include <acpi/ghes.h>
25
#include <asm/mce.h>
26
27
#include "internal.h"
28
29
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
30
{
31
struct mce_hw_err err;
32
struct mce *m;
33
int lsb;
34
35
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
36
return;
37
38
/*
39
* Even if the ->validation_bits are set for address mask,
40
* to be extra safe, check and reject an error radius '0',
41
* and fall back to the default page size.
42
*/
43
if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
44
lsb = find_first_bit((void *)&mem_err->physical_addr_mask, PAGE_SHIFT);
45
else
46
lsb = PAGE_SHIFT;
47
48
mce_prep_record(&err);
49
m = &err.m;
50
m->bank = -1;
51
/* Fake a memory read error with unknown channel */
52
m->status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
53
m->misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
54
55
if (severity >= GHES_SEV_RECOVERABLE)
56
m->status |= MCI_STATUS_UC;
57
58
if (severity >= GHES_SEV_PANIC) {
59
m->status |= MCI_STATUS_PCC;
60
m->tsc = rdtsc();
61
}
62
63
m->addr = mem_err->physical_addr;
64
mce_log(&err);
65
}
66
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
67
68
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
69
{
70
const u64 *i_mce = ((const u64 *) (ctx_info + 1));
71
unsigned int cpu, num_regs;
72
bool apicid_found = false;
73
struct mce_hw_err err;
74
struct mce *m;
75
76
if (!boot_cpu_has(X86_FEATURE_SMCA))
77
return -EINVAL;
78
79
/*
80
* The starting address of the register array extracted from BERT must
81
* match with the first expected register in the register layout of
82
* SMCA address space. This address corresponds to banks's MCA_STATUS
83
* register.
84
*
85
* Match any MCi_STATUS register by turning off bank numbers.
86
*/
87
if ((ctx_info->msr_addr & MSR_AMD64_SMCA_MC0_STATUS) !=
88
MSR_AMD64_SMCA_MC0_STATUS)
89
return -EINVAL;
90
91
/*
92
* The number of registers in the register array is determined by
93
* Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2.
94
* Sanity-check registers array size.
95
*/
96
num_regs = ctx_info->reg_arr_size >> 3;
97
if (!num_regs)
98
return -EINVAL;
99
100
for_each_possible_cpu(cpu) {
101
if (cpu_data(cpu).topo.initial_apicid == lapic_id) {
102
apicid_found = true;
103
break;
104
}
105
}
106
107
if (!apicid_found)
108
return -EINVAL;
109
110
m = &err.m;
111
memset(&err, 0, sizeof(struct mce_hw_err));
112
mce_prep_record_common(m);
113
mce_prep_record_per_cpu(cpu, m);
114
115
m->bank = (ctx_info->msr_addr >> 4) & 0xFF;
116
117
/*
118
* The SMCA register layout is fixed and includes 16 registers.
119
* The end of the array may be variable, but the beginning is known.
120
* Cap the number of registers to expected max (15).
121
*/
122
if (num_regs > 15)
123
num_regs = 15;
124
125
switch (num_regs) {
126
/* MCA_SYND2 */
127
case 15:
128
err.vendor.amd.synd2 = *(i_mce + 14);
129
fallthrough;
130
/* MCA_SYND1 */
131
case 14:
132
err.vendor.amd.synd1 = *(i_mce + 13);
133
fallthrough;
134
/* MCA_MISC4 */
135
case 13:
136
/* MCA_MISC3 */
137
case 12:
138
/* MCA_MISC2 */
139
case 11:
140
/* MCA_MISC1 */
141
case 10:
142
/* MCA_DEADDR */
143
case 9:
144
/* MCA_DESTAT */
145
case 8:
146
/* reserved */
147
case 7:
148
/* MCA_SYND */
149
case 6:
150
m->synd = *(i_mce + 5);
151
fallthrough;
152
/* MCA_IPID */
153
case 5:
154
m->ipid = *(i_mce + 4);
155
fallthrough;
156
/* MCA_CONFIG */
157
case 4:
158
/* MCA_MISC0 */
159
case 3:
160
m->misc = *(i_mce + 2);
161
fallthrough;
162
/* MCA_ADDR */
163
case 2:
164
m->addr = *(i_mce + 1);
165
fallthrough;
166
/* MCA_STATUS */
167
case 1:
168
m->status = *i_mce;
169
}
170
171
mce_log(&err);
172
173
return 0;
174
}
175
176
#define CPER_CREATOR_MCE \
177
GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
178
0x64, 0x90, 0xb8, 0x9d)
179
#define CPER_SECTION_TYPE_MCE \
180
GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
181
0x04, 0x4a, 0x38, 0xfc)
182
183
/*
184
* CPER specification (in UEFI specification 2.3 appendix N) requires
185
* byte-packed.
186
*/
187
struct cper_mce_record {
188
struct cper_record_header hdr;
189
struct cper_section_descriptor sec_hdr;
190
struct mce mce;
191
} __packed;
192
193
int apei_write_mce(struct mce *m)
194
{
195
struct cper_mce_record rcd;
196
197
memset(&rcd, 0, sizeof(rcd));
198
memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
199
rcd.hdr.revision = CPER_RECORD_REV;
200
rcd.hdr.signature_end = CPER_SIG_END;
201
rcd.hdr.section_count = 1;
202
rcd.hdr.error_severity = CPER_SEV_FATAL;
203
/* timestamp, platform_id, partition_id are all invalid */
204
rcd.hdr.validation_bits = 0;
205
rcd.hdr.record_length = sizeof(rcd);
206
rcd.hdr.creator_id = CPER_CREATOR_MCE;
207
rcd.hdr.notification_type = CPER_NOTIFY_MCE;
208
rcd.hdr.record_id = cper_next_record_id();
209
rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
210
211
rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
212
rcd.sec_hdr.section_length = sizeof(rcd.mce);
213
rcd.sec_hdr.revision = CPER_SEC_REV;
214
/* fru_id and fru_text is invalid */
215
rcd.sec_hdr.validation_bits = 0;
216
rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
217
rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
218
rcd.sec_hdr.section_severity = CPER_SEV_FATAL;
219
220
memcpy(&rcd.mce, m, sizeof(*m));
221
222
return erst_write(&rcd.hdr);
223
}
224
225
ssize_t apei_read_mce(struct mce *m, u64 *record_id)
226
{
227
struct cper_mce_record rcd;
228
int rc, pos;
229
230
rc = erst_get_record_id_begin(&pos);
231
if (rc)
232
return rc;
233
retry:
234
rc = erst_get_record_id_next(&pos, record_id);
235
if (rc)
236
goto out;
237
/* no more record */
238
if (*record_id == APEI_ERST_INVALID_RECORD_ID)
239
goto out;
240
rc = erst_read_record(*record_id, &rcd.hdr, sizeof(rcd), sizeof(rcd),
241
&CPER_CREATOR_MCE);
242
/* someone else has cleared the record, try next one */
243
if (rc == -ENOENT)
244
goto retry;
245
else if (rc < 0)
246
goto out;
247
248
memcpy(m, &rcd.mce, sizeof(*m));
249
rc = sizeof(*m);
250
out:
251
erst_get_record_id_end();
252
253
return rc;
254
}
255
256
/* Check whether there is record in ERST */
257
int apei_check_mce(void)
258
{
259
return erst_get_record_count();
260
}
261
262
int apei_clear_mce(u64 record_id)
263
{
264
return erst_clear(record_id);
265
}
266
267