Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/parisc/kernel/pdt.c
26292 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Page Deallocation Table (PDT) support
4
*
5
* The Page Deallocation Table (PDT) is maintained by firmware and holds a
6
* list of memory addresses in which memory errors were detected.
7
* The list contains both single-bit (correctable) and double-bit
8
* (uncorrectable) errors.
9
*
10
* Copyright 2017 by Helge Deller <[email protected]>
11
*
12
* possible future enhancements:
13
* - add userspace interface via procfs or sysfs to clear PDT
14
*/
15
16
#include <linux/memblock.h>
17
#include <linux/seq_file.h>
18
#include <linux/kthread.h>
19
#include <linux/proc_fs.h>
20
#include <linux/initrd.h>
21
#include <linux/pgtable.h>
22
#include <linux/mm.h>
23
24
#include <asm/pdc.h>
25
#include <asm/pdcpat.h>
26
#include <asm/sections.h>
27
#include <asm/pgtable.h>
28
29
enum pdt_access_type {
30
PDT_NONE,
31
PDT_PDC,
32
PDT_PAT_NEW,
33
PDT_PAT_CELL
34
};
35
36
static enum pdt_access_type pdt_type;
37
38
/* PDT poll interval: 1 minute if errors, 5 minutes if everything OK. */
39
#define PDT_POLL_INTERVAL_DEFAULT (5*60*HZ)
40
#define PDT_POLL_INTERVAL_SHORT (1*60*HZ)
41
static unsigned long pdt_poll_interval = PDT_POLL_INTERVAL_DEFAULT;
42
43
/* global PDT status information */
44
static struct pdc_mem_retinfo pdt_status;
45
46
#define MAX_PDT_TABLE_SIZE PAGE_SIZE
47
#define MAX_PDT_ENTRIES (MAX_PDT_TABLE_SIZE / sizeof(unsigned long))
48
static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss;
49
50
/*
51
* Constants for the pdt_entry format:
52
* A pdt_entry holds the physical address in bits 0-57, bits 58-61 are
53
* reserved, bit 62 is the perm bit and bit 63 is the error_type bit.
54
* The perm bit indicates whether the error have been verified as a permanent
55
* error (value of 1) or has not been verified, and may be transient (value
56
* of 0). The error_type bit indicates whether the error is a single bit error
57
* (value of 1) or a multiple bit error.
58
* On non-PAT machines phys_addr is encoded in bits 0-59 and error_type in bit
59
* 63. Those machines don't provide the perm bit.
60
*/
61
62
#define PDT_ADDR_PHYS_MASK (pdt_type != PDT_PDC ? ~0x3f : ~0x0f)
63
#define PDT_ADDR_PERM_ERR (pdt_type != PDT_PDC ? 2UL : 0UL)
64
#define PDT_ADDR_SINGLE_ERR 1UL
65
66
#ifdef CONFIG_PROC_FS
67
/* report PDT entries via /proc/meminfo */
68
void arch_report_meminfo(struct seq_file *m)
69
{
70
if (pdt_type == PDT_NONE)
71
return;
72
73
seq_printf(m, "PDT_max_entries: %7lu\n",
74
pdt_status.pdt_size);
75
seq_printf(m, "PDT_cur_entries: %7lu\n",
76
pdt_status.pdt_entries);
77
}
78
#endif
79
80
static int get_info_pat_new(void)
81
{
82
struct pdc_pat_mem_retinfo pat_rinfo;
83
int ret;
84
85
/* newer PAT machines like C8000 report info for all cells */
86
if (is_pdc_pat())
87
ret = pdc_pat_mem_pdt_info(&pat_rinfo);
88
else
89
return PDC_BAD_PROC;
90
91
pdt_status.pdt_size = pat_rinfo.max_pdt_entries;
92
pdt_status.pdt_entries = pat_rinfo.current_pdt_entries;
93
pdt_status.pdt_status = 0;
94
pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc;
95
pdt_status.good_mem = pat_rinfo.good_mem;
96
97
return ret;
98
}
99
100
static int get_info_pat_cell(void)
101
{
102
struct pdc_pat_mem_cell_pdt_retinfo cell_rinfo;
103
int ret;
104
105
/* older PAT machines like rp5470 report cell info only */
106
if (is_pdc_pat())
107
ret = pdc_pat_mem_pdt_cell_info(&cell_rinfo, parisc_cell_num);
108
else
109
return PDC_BAD_PROC;
110
111
pdt_status.pdt_size = cell_rinfo.max_pdt_entries;
112
pdt_status.pdt_entries = cell_rinfo.current_pdt_entries;
113
pdt_status.pdt_status = 0;
114
pdt_status.first_dbe_loc = cell_rinfo.first_dbe_loc;
115
pdt_status.good_mem = cell_rinfo.good_mem;
116
117
return ret;
118
}
119
120
static void report_mem_err(unsigned long pde)
121
{
122
struct pdc_pat_mem_phys_mem_location loc;
123
unsigned long addr;
124
char dimm_txt[32];
125
126
addr = pde & PDT_ADDR_PHYS_MASK;
127
128
/* show DIMM slot description on PAT machines */
129
if (is_pdc_pat()) {
130
pdc_pat_mem_get_dimm_phys_location(&loc, addr);
131
sprintf(dimm_txt, "DIMM slot %02x, ", loc.dimm_slot);
132
} else
133
dimm_txt[0] = 0;
134
135
pr_warn("PDT: BAD MEMORY at 0x%08lx, %s%s%s-bit error.\n",
136
addr, dimm_txt,
137
pde & PDT_ADDR_PERM_ERR ? "permanent ":"",
138
pde & PDT_ADDR_SINGLE_ERR ? "single":"multi");
139
}
140
141
142
/*
143
* pdc_pdt_init()
144
*
145
* Initialize kernel PDT structures, read initial PDT table from firmware,
146
* report all current PDT entries and mark bad memory with memblock_reserve()
147
* to avoid that the kernel will use broken memory areas.
148
*
149
*/
150
void __init pdc_pdt_init(void)
151
{
152
int ret, i;
153
unsigned long entries;
154
struct pdc_mem_read_pdt pdt_read_ret;
155
156
pdt_type = PDT_PAT_NEW;
157
ret = get_info_pat_new();
158
159
if (ret != PDC_OK) {
160
pdt_type = PDT_PAT_CELL;
161
ret = get_info_pat_cell();
162
}
163
164
if (ret != PDC_OK) {
165
pdt_type = PDT_PDC;
166
/* non-PAT machines provide the standard PDC call */
167
ret = pdc_mem_pdt_info(&pdt_status);
168
}
169
170
if (ret != PDC_OK) {
171
pdt_type = PDT_NONE;
172
pr_info("PDT: Firmware does not provide any page deallocation"
173
" information.\n");
174
return;
175
}
176
177
entries = pdt_status.pdt_entries;
178
if (WARN_ON(entries > MAX_PDT_ENTRIES))
179
entries = pdt_status.pdt_entries = MAX_PDT_ENTRIES;
180
181
pr_info("PDT: type %s, size %lu, entries %lu, status %lu, dbe_loc 0x%lx,"
182
" good_mem %lu MB\n",
183
pdt_type == PDT_PDC ? __stringify(PDT_PDC) :
184
pdt_type == PDT_PAT_CELL ? __stringify(PDT_PAT_CELL)
185
: __stringify(PDT_PAT_NEW),
186
pdt_status.pdt_size, pdt_status.pdt_entries,
187
pdt_status.pdt_status, pdt_status.first_dbe_loc,
188
pdt_status.good_mem / 1024 / 1024);
189
190
if (entries == 0) {
191
pr_info("PDT: Firmware reports all memory OK.\n");
192
return;
193
}
194
195
if (pdt_status.first_dbe_loc &&
196
pdt_status.first_dbe_loc <= __pa((unsigned long)&_end))
197
pr_crit("CRITICAL: Bad memory inside kernel image memory area!\n");
198
199
pr_warn("PDT: Firmware reports %lu entries of faulty memory:\n",
200
entries);
201
202
if (pdt_type == PDT_PDC)
203
ret = pdc_mem_pdt_read_entries(&pdt_read_ret, pdt_entry);
204
else {
205
#ifdef CONFIG_64BIT
206
struct pdc_pat_mem_read_pd_retinfo pat_pret;
207
208
if (pdt_type == PDT_PAT_CELL)
209
ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
210
MAX_PDT_ENTRIES);
211
else
212
ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry,
213
MAX_PDT_TABLE_SIZE, 0);
214
#else
215
ret = PDC_BAD_PROC;
216
#endif
217
}
218
219
if (ret != PDC_OK) {
220
pdt_type = PDT_NONE;
221
pr_warn("PDT: Get PDT entries failed with %d\n", ret);
222
return;
223
}
224
225
for (i = 0; i < pdt_status.pdt_entries; i++) {
226
unsigned long addr;
227
228
report_mem_err(pdt_entry[i]);
229
230
addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK;
231
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) &&
232
addr >= initrd_start && addr < initrd_end)
233
pr_crit("CRITICAL: initrd possibly broken "
234
"due to bad memory!\n");
235
236
/* mark memory page bad */
237
memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
238
num_poisoned_pages_inc(addr >> PAGE_SHIFT);
239
}
240
}
241
242
243
/*
244
* This is the PDT kernel thread main loop.
245
*/
246
247
static int pdt_mainloop(void *unused)
248
{
249
struct pdc_mem_read_pdt pdt_read_ret;
250
struct pdc_pat_mem_read_pd_retinfo pat_pret __maybe_unused;
251
unsigned long old_num_entries;
252
unsigned long *bad_mem_ptr;
253
int num, ret;
254
255
for (;;) {
256
set_current_state(TASK_INTERRUPTIBLE);
257
258
old_num_entries = pdt_status.pdt_entries;
259
260
schedule_timeout(pdt_poll_interval);
261
if (kthread_should_stop())
262
break;
263
264
/* Do we have new PDT entries? */
265
switch (pdt_type) {
266
case PDT_PAT_NEW:
267
ret = get_info_pat_new();
268
break;
269
case PDT_PAT_CELL:
270
ret = get_info_pat_cell();
271
break;
272
default:
273
ret = pdc_mem_pdt_info(&pdt_status);
274
break;
275
}
276
277
if (ret != PDC_OK) {
278
pr_warn("PDT: unexpected failure %d\n", ret);
279
return -EINVAL;
280
}
281
282
/* if no new PDT entries, just wait again */
283
num = pdt_status.pdt_entries - old_num_entries;
284
if (num <= 0)
285
continue;
286
287
/* decrease poll interval in case we found memory errors */
288
if (pdt_status.pdt_entries &&
289
pdt_poll_interval == PDT_POLL_INTERVAL_DEFAULT)
290
pdt_poll_interval = PDT_POLL_INTERVAL_SHORT;
291
292
/* limit entries to get */
293
if (num > MAX_PDT_ENTRIES) {
294
num = MAX_PDT_ENTRIES;
295
pdt_status.pdt_entries = old_num_entries + num;
296
}
297
298
/* get new entries */
299
switch (pdt_type) {
300
#ifdef CONFIG_64BIT
301
case PDT_PAT_CELL:
302
if (pdt_status.pdt_entries > MAX_PDT_ENTRIES) {
303
pr_crit("PDT: too many entries.\n");
304
return -ENOMEM;
305
}
306
ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
307
MAX_PDT_ENTRIES);
308
bad_mem_ptr = &pdt_entry[old_num_entries];
309
break;
310
case PDT_PAT_NEW:
311
ret = pdc_pat_mem_read_pd_pdt(&pat_pret,
312
pdt_entry,
313
num * sizeof(unsigned long),
314
old_num_entries * sizeof(unsigned long));
315
bad_mem_ptr = &pdt_entry[0];
316
break;
317
#endif
318
default:
319
ret = pdc_mem_pdt_read_entries(&pdt_read_ret,
320
pdt_entry);
321
bad_mem_ptr = &pdt_entry[old_num_entries];
322
break;
323
}
324
325
/* report and mark memory broken */
326
while (num--) {
327
unsigned long pde = *bad_mem_ptr++;
328
329
report_mem_err(pde);
330
331
#ifdef CONFIG_MEMORY_FAILURE
332
if ((pde & PDT_ADDR_PERM_ERR) ||
333
((pde & PDT_ADDR_SINGLE_ERR) == 0))
334
memory_failure(pde >> PAGE_SHIFT, 0);
335
else
336
soft_offline_page(pde >> PAGE_SHIFT, 0);
337
#else
338
pr_crit("PDT: memory error at 0x%lx ignored.\n"
339
"Rebuild kernel with CONFIG_MEMORY_FAILURE=y "
340
"for real handling.\n",
341
pde & PDT_ADDR_PHYS_MASK);
342
#endif
343
344
}
345
}
346
347
return 0;
348
}
349
350
351
static int __init pdt_initcall(void)
352
{
353
struct task_struct *kpdtd_task;
354
355
if (pdt_type == PDT_NONE)
356
return -ENODEV;
357
358
kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd");
359
360
return PTR_ERR_OR_ZERO(kpdtd_task);
361
}
362
363
late_initcall(pdt_initcall);
364
365