Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/edac/igen6_edac.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Driver for Intel client SoC with integrated memory controller using IBECC
4
*
5
* Copyright (C) 2020 Intel Corporation
6
*
7
* The In-Band ECC (IBECC) IP provides ECC protection to all or specific
8
* regions of the physical memory space. It's used for memory controllers
9
* that don't support the out-of-band ECC which often needs an additional
10
* storage device to each channel for storing ECC data.
11
*/
12
13
#include <linux/module.h>
14
#include <linux/init.h>
15
#include <linux/pci.h>
16
#include <linux/slab.h>
17
#include <linux/irq_work.h>
18
#include <linux/llist.h>
19
#include <linux/genalloc.h>
20
#include <linux/edac.h>
21
#include <linux/bits.h>
22
#include <linux/io.h>
23
#include <asm/mach_traps.h>
24
#include <asm/nmi.h>
25
#include <asm/mce.h>
26
27
#include "edac_mc.h"
28
#include "edac_module.h"
29
30
#define IGEN6_REVISION "v2.5.1"
31
32
#define EDAC_MOD_STR "igen6_edac"
33
#define IGEN6_NMI_NAME "igen6_ibecc"
34
35
/* Debug macros */
36
#define igen6_printk(level, fmt, arg...) \
37
edac_printk(level, "igen6", fmt, ##arg)
38
39
#define igen6_mc_printk(mci, level, fmt, arg...) \
40
edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg)
41
42
#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
43
44
#define NUM_IMC 2 /* Max memory controllers */
45
#define NUM_CHANNELS 2 /* Max channels */
46
#define NUM_DIMMS 2 /* Max DIMMs per channel */
47
48
#define _4GB BIT_ULL(32)
49
50
/* Size of physical memory */
51
#define TOM_OFFSET 0xa0
52
/* Top of low usable DRAM */
53
#define TOLUD_OFFSET 0xbc
54
/* Capability register C */
55
#define CAPID_C_OFFSET 0xec
56
#define CAPID_C_IBECC BIT(15)
57
58
/* Capability register E */
59
#define CAPID_E_OFFSET 0xf0
60
#define CAPID_E_IBECC BIT(12)
61
#define CAPID_E_IBECC_BIT18 BIT(18)
62
63
/* Error Status */
64
#define ERRSTS_OFFSET 0xc8
65
#define ERRSTS_CE BIT_ULL(6)
66
#define ERRSTS_UE BIT_ULL(7)
67
68
/* Error Command */
69
#define ERRCMD_OFFSET 0xca
70
#define ERRCMD_CE BIT_ULL(6)
71
#define ERRCMD_UE BIT_ULL(7)
72
73
/* IBECC MMIO base address */
74
#define IBECC_BASE (res_cfg->ibecc_base)
75
#define IBECC_ACTIVATE_OFFSET IBECC_BASE
76
#define IBECC_ACTIVATE_EN BIT(0)
77
78
/* IBECC error log */
79
#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset)
80
#define ECC_ERROR_LOG_CE BIT_ULL(62)
81
#define ECC_ERROR_LOG_UE BIT_ULL(63)
82
#define ECC_ERROR_LOG_ADDR_SHIFT 5
83
#define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38)
84
#define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45)
85
#define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61)
86
87
/* Host MMIO base address */
88
#define MCHBAR_OFFSET 0x48
89
#define MCHBAR_EN BIT_ULL(0)
90
#define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16)
91
#define MCHBAR_SIZE 0x10000
92
93
/* Parameters for the channel decode stage */
94
#define IMC_BASE (res_cfg->imc_base)
95
#define MAD_INTER_CHANNEL_OFFSET IMC_BASE
96
#define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2)
97
#define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3)
98
#define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4)
99
#define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29)
100
101
/* Parameters for DRAM decode stage */
102
#define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4)
103
#define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0)
104
105
/* DIMM characteristics */
106
#define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc)
107
#define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29)
108
#define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8)
109
#define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29)
110
#define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25)
111
112
/* Hash for memory controller selection */
113
#define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8)
114
#define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3)
115
116
/* Hash for channel selection */
117
#define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24)
118
/* Hash for enhanced channel selection */
119
#define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28)
120
#define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
121
#define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
122
#define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28)
123
124
/* Parameters for memory slice decode stage */
125
#define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
126
#define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
127
128
static struct res_config {
129
bool machine_check;
130
/* The number of present memory controllers. */
131
int num_imc;
132
u32 imc_base;
133
u32 cmf_base;
134
u32 cmf_size;
135
u32 ms_hash_offset;
136
u32 ibecc_base;
137
u32 ibecc_error_log_offset;
138
bool (*ibecc_available)(struct pci_dev *pdev);
139
/* Extract error address logged in IBECC */
140
u64 (*err_addr)(u64 ecclog);
141
/* Convert error address logged in IBECC to system physical address */
142
u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
143
/* Convert error address logged in IBECC to integrated memory controller address */
144
u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
145
} *res_cfg;
146
147
struct igen6_imc {
148
int mc;
149
struct mem_ctl_info *mci;
150
struct pci_dev *pdev;
151
struct device dev;
152
void __iomem *window;
153
u64 size;
154
u64 ch_s_size;
155
int ch_l_map;
156
u64 dimm_s_size[NUM_CHANNELS];
157
u64 dimm_l_size[NUM_CHANNELS];
158
int dimm_l_map[NUM_CHANNELS];
159
};
160
161
static struct igen6_pvt {
162
struct igen6_imc imc[NUM_IMC];
163
u64 ms_hash;
164
u64 ms_s_size;
165
int ms_l_map;
166
} *igen6_pvt;
167
168
/* The top of low usable DRAM */
169
static u32 igen6_tolud;
170
/* The size of physical memory */
171
static u64 igen6_tom;
172
173
struct decoded_addr {
174
int mc;
175
u64 imc_addr;
176
u64 sys_addr;
177
int channel_idx;
178
u64 channel_addr;
179
int sub_channel_idx;
180
u64 sub_channel_addr;
181
};
182
183
struct ecclog_node {
184
struct llist_node llnode;
185
int mc;
186
u64 ecclog;
187
};
188
189
/*
190
* In the NMI handler, the driver uses the lock-less memory allocator
191
* to allocate memory to store the IBECC error logs and links the logs
192
* to the lock-less list. Delay printk() and the work of error reporting
193
* to EDAC core in a worker.
194
*/
195
#define ECCLOG_POOL_SIZE PAGE_SIZE
196
static LLIST_HEAD(ecclog_llist);
197
static struct gen_pool *ecclog_pool;
198
static char ecclog_buf[ECCLOG_POOL_SIZE];
199
static struct irq_work ecclog_irq_work;
200
static struct work_struct ecclog_work;
201
202
/* Compute die IDs for Elkhart Lake with IBECC */
203
#define DID_EHL_SKU5 0x4514
204
#define DID_EHL_SKU6 0x4528
205
#define DID_EHL_SKU7 0x452a
206
#define DID_EHL_SKU8 0x4516
207
#define DID_EHL_SKU9 0x452c
208
#define DID_EHL_SKU10 0x452e
209
#define DID_EHL_SKU11 0x4532
210
#define DID_EHL_SKU12 0x4518
211
#define DID_EHL_SKU13 0x451a
212
#define DID_EHL_SKU14 0x4534
213
#define DID_EHL_SKU15 0x4536
214
215
/* Compute die IDs for ICL-NNPI with IBECC */
216
#define DID_ICL_SKU8 0x4581
217
#define DID_ICL_SKU10 0x4585
218
#define DID_ICL_SKU11 0x4589
219
#define DID_ICL_SKU12 0x458d
220
221
/* Compute die IDs for Tiger Lake with IBECC */
222
#define DID_TGL_SKU 0x9a14
223
224
/* Compute die IDs for Alder Lake with IBECC */
225
#define DID_ADL_SKU1 0x4601
226
#define DID_ADL_SKU2 0x4602
227
#define DID_ADL_SKU3 0x4621
228
#define DID_ADL_SKU4 0x4641
229
230
/* Compute die IDs for Alder Lake-N with IBECC */
231
#define DID_ADL_N_SKU1 0x4614
232
#define DID_ADL_N_SKU2 0x4617
233
#define DID_ADL_N_SKU3 0x461b
234
#define DID_ADL_N_SKU4 0x461c
235
#define DID_ADL_N_SKU5 0x4673
236
#define DID_ADL_N_SKU6 0x4674
237
#define DID_ADL_N_SKU7 0x4675
238
#define DID_ADL_N_SKU8 0x4677
239
#define DID_ADL_N_SKU9 0x4678
240
#define DID_ADL_N_SKU10 0x4679
241
#define DID_ADL_N_SKU11 0x467c
242
#define DID_ADL_N_SKU12 0x4632
243
244
/* Compute die IDs for Arizona Beach with IBECC */
245
#define DID_AZB_SKU1 0x4676
246
247
/* Compute did IDs for Amston Lake with IBECC */
248
#define DID_ASL_SKU1 0x464a
249
250
/* Compute die IDs for Raptor Lake-P with IBECC */
251
#define DID_RPL_P_SKU1 0xa706
252
#define DID_RPL_P_SKU2 0xa707
253
#define DID_RPL_P_SKU3 0xa708
254
#define DID_RPL_P_SKU4 0xa716
255
#define DID_RPL_P_SKU5 0xa718
256
257
/* Compute die IDs for Meteor Lake-PS with IBECC */
258
#define DID_MTL_PS_SKU1 0x7d21
259
#define DID_MTL_PS_SKU2 0x7d22
260
#define DID_MTL_PS_SKU3 0x7d23
261
#define DID_MTL_PS_SKU4 0x7d24
262
263
/* Compute die IDs for Meteor Lake-P with IBECC */
264
#define DID_MTL_P_SKU1 0x7d01
265
#define DID_MTL_P_SKU2 0x7d02
266
#define DID_MTL_P_SKU3 0x7d14
267
268
/* Compute die IDs for Arrow Lake-UH with IBECC */
269
#define DID_ARL_UH_SKU1 0x7d06
270
#define DID_ARL_UH_SKU2 0x7d20
271
#define DID_ARL_UH_SKU3 0x7d30
272
273
/* Compute die IDs for Panther Lake-H with IBECC */
274
#define DID_PTL_H_SKU1 0xb000
275
#define DID_PTL_H_SKU2 0xb001
276
#define DID_PTL_H_SKU3 0xb002
277
278
/* Compute die IDs for Wildcat Lake with IBECC */
279
#define DID_WCL_SKU1 0xfd00
280
281
static int get_mchbar(struct pci_dev *pdev, u64 *mchbar)
282
{
283
union {
284
u64 v;
285
struct {
286
u32 v_lo;
287
u32 v_hi;
288
};
289
} u;
290
291
if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) {
292
igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n");
293
return -ENODEV;
294
}
295
296
if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) {
297
igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n");
298
return -ENODEV;
299
}
300
301
if (!(u.v & MCHBAR_EN)) {
302
igen6_printk(KERN_ERR, "MCHBAR is disabled\n");
303
return -ENODEV;
304
}
305
306
*mchbar = MCHBAR_BASE(u.v);
307
308
return 0;
309
}
310
311
static bool ehl_ibecc_available(struct pci_dev *pdev)
312
{
313
u32 v;
314
315
if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
316
return false;
317
318
return !!(CAPID_C_IBECC & v);
319
}
320
321
static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
322
{
323
return eaddr;
324
}
325
326
static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
327
{
328
if (eaddr < igen6_tolud)
329
return eaddr;
330
331
if (igen6_tom <= _4GB)
332
return eaddr + igen6_tolud - _4GB;
333
334
if (eaddr >= igen6_tom)
335
return eaddr + igen6_tolud - igen6_tom;
336
337
return eaddr;
338
}
339
340
static bool icl_ibecc_available(struct pci_dev *pdev)
341
{
342
u32 v;
343
344
if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
345
return false;
346
347
return !(CAPID_C_IBECC & v) &&
348
(boot_cpu_data.x86_stepping >= 1);
349
}
350
351
static bool tgl_ibecc_available(struct pci_dev *pdev)
352
{
353
u32 v;
354
355
if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
356
return false;
357
358
return !(CAPID_E_IBECC & v);
359
}
360
361
static bool mtl_p_ibecc_available(struct pci_dev *pdev)
362
{
363
u32 v;
364
365
if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
366
return false;
367
368
return !(CAPID_E_IBECC_BIT18 & v);
369
}
370
371
static bool mtl_ps_ibecc_available(struct pci_dev *pdev)
372
{
373
#define MCHBAR_MEMSS_IBECCDIS 0x13c00
374
void __iomem *window;
375
u64 mchbar;
376
u32 val;
377
378
if (get_mchbar(pdev, &mchbar))
379
return false;
380
381
window = ioremap(mchbar, MCHBAR_SIZE * 2);
382
if (!window) {
383
igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
384
return false;
385
}
386
387
val = readl(window + MCHBAR_MEMSS_IBECCDIS);
388
iounmap(window);
389
390
/* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */
391
return !GET_BITFIELD(val, 6, 6);
392
}
393
394
static u64 mem_addr_to_sys_addr(u64 maddr)
395
{
396
if (maddr < igen6_tolud)
397
return maddr;
398
399
if (igen6_tom <= _4GB)
400
return maddr - igen6_tolud + _4GB;
401
402
if (maddr < _4GB)
403
return maddr - igen6_tolud + igen6_tom;
404
405
return maddr;
406
}
407
408
static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
409
{
410
u64 hash_addr = addr & mask, hash = hash_init;
411
u64 intlv = (addr >> intlv_bit) & 1;
412
int i;
413
414
for (i = 6; i < 20; i++)
415
hash ^= (hash_addr >> i) & 1;
416
417
return hash ^ intlv;
418
}
419
420
static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
421
{
422
u64 maddr, hash, mask, ms_s_size;
423
int intlv_bit;
424
u32 ms_hash;
425
426
ms_s_size = igen6_pvt->ms_s_size;
427
if (eaddr >= ms_s_size)
428
return eaddr + ms_s_size;
429
430
ms_hash = igen6_pvt->ms_hash;
431
432
mask = MEM_SLICE_HASH_MASK(ms_hash);
433
intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
434
435
maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
436
GET_BITFIELD(eaddr, 0, intlv_bit - 1);
437
438
hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
439
440
return maddr | (hash << intlv_bit);
441
}
442
443
static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
444
{
445
u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
446
447
return mem_addr_to_sys_addr(maddr);
448
}
449
450
static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
451
{
452
return eaddr;
453
}
454
455
static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
456
{
457
return mem_addr_to_sys_addr(eaddr);
458
}
459
460
static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
461
{
462
u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
463
struct igen6_imc *imc = &igen6_pvt->imc[mc];
464
int intlv_bit;
465
u32 mc_hash;
466
467
if (eaddr >= 2 * ms_s_size)
468
return eaddr - ms_s_size;
469
470
mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
471
472
intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
473
474
imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
475
GET_BITFIELD(eaddr, 0, intlv_bit - 1);
476
477
return imc_addr;
478
}
479
480
static u64 rpl_p_err_addr(u64 ecclog)
481
{
482
return ECC_ERROR_LOG_ADDR45(ecclog);
483
}
484
485
static struct res_config ehl_cfg = {
486
.num_imc = 1,
487
.imc_base = 0x5000,
488
.ibecc_base = 0xdc00,
489
.ibecc_available = ehl_ibecc_available,
490
.ibecc_error_log_offset = 0x170,
491
.err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
492
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
493
};
494
495
static struct res_config icl_cfg = {
496
.num_imc = 1,
497
.imc_base = 0x5000,
498
.ibecc_base = 0xd800,
499
.ibecc_error_log_offset = 0x170,
500
.ibecc_available = icl_ibecc_available,
501
.err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
502
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
503
};
504
505
static struct res_config tgl_cfg = {
506
.machine_check = true,
507
.num_imc = 2,
508
.imc_base = 0x5000,
509
.cmf_base = 0x11000,
510
.cmf_size = 0x800,
511
.ms_hash_offset = 0xac,
512
.ibecc_base = 0xd400,
513
.ibecc_error_log_offset = 0x170,
514
.ibecc_available = tgl_ibecc_available,
515
.err_addr_to_sys_addr = tgl_err_addr_to_sys_addr,
516
.err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
517
};
518
519
static struct res_config adl_cfg = {
520
.machine_check = true,
521
.num_imc = 2,
522
.imc_base = 0xd800,
523
.ibecc_base = 0xd400,
524
.ibecc_error_log_offset = 0x68,
525
.ibecc_available = tgl_ibecc_available,
526
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
527
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
528
};
529
530
static struct res_config adl_n_cfg = {
531
.machine_check = true,
532
.num_imc = 1,
533
.imc_base = 0xd800,
534
.ibecc_base = 0xd400,
535
.ibecc_error_log_offset = 0x68,
536
.ibecc_available = tgl_ibecc_available,
537
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
538
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
539
};
540
541
static struct res_config rpl_p_cfg = {
542
.machine_check = true,
543
.num_imc = 2,
544
.imc_base = 0xd800,
545
.ibecc_base = 0xd400,
546
.ibecc_error_log_offset = 0x68,
547
.ibecc_available = tgl_ibecc_available,
548
.err_addr = rpl_p_err_addr,
549
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
550
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
551
};
552
553
static struct res_config mtl_ps_cfg = {
554
.machine_check = true,
555
.num_imc = 2,
556
.imc_base = 0xd800,
557
.ibecc_base = 0xd400,
558
.ibecc_error_log_offset = 0x170,
559
.ibecc_available = mtl_ps_ibecc_available,
560
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
561
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
562
};
563
564
static struct res_config mtl_p_cfg = {
565
.machine_check = true,
566
.num_imc = 2,
567
.imc_base = 0xd800,
568
.ibecc_base = 0xd400,
569
.ibecc_error_log_offset = 0x170,
570
.ibecc_available = mtl_p_ibecc_available,
571
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
572
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
573
};
574
575
static struct res_config wcl_cfg = {
576
.machine_check = true,
577
.num_imc = 1,
578
.imc_base = 0xd800,
579
.ibecc_base = 0xd400,
580
.ibecc_error_log_offset = 0x170,
581
.ibecc_available = mtl_p_ibecc_available,
582
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
583
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
584
};
585
586
static struct pci_device_id igen6_pci_tbl[] = {
587
{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
588
{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
589
{ PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
590
{ PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg },
591
{ PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg },
592
{ PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg },
593
{ PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg },
594
{ PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg },
595
{ PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
596
{ PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
597
{ PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
598
{ PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
599
{ PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
600
{ PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
601
{ PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
602
{ PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
603
{ PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
604
{ PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
605
{ PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
606
{ PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
607
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg },
608
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg },
609
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg },
610
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg },
611
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg },
612
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg },
613
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg },
614
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg },
615
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg },
616
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
617
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
618
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
619
{ PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg },
620
{ PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg },
621
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
622
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
623
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
624
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg },
625
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg },
626
{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg },
627
{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg },
628
{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg },
629
{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg },
630
{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg },
631
{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg },
632
{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg },
633
{ PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg },
634
{ PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg },
635
{ PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg },
636
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg },
637
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg },
638
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg },
639
{ PCI_VDEVICE(INTEL, DID_WCL_SKU1), (kernel_ulong_t)&wcl_cfg },
640
{ },
641
};
642
MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
643
644
static enum dev_type get_width(int dimm_l, u32 mad_dimm)
645
{
646
u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) :
647
MAD_DIMM_CH_DSW(mad_dimm);
648
649
switch (w) {
650
case 0:
651
return DEV_X8;
652
case 1:
653
return DEV_X16;
654
case 2:
655
return DEV_X32;
656
default:
657
return DEV_UNKNOWN;
658
}
659
}
660
661
static enum mem_type get_memory_type(u32 mad_inter)
662
{
663
u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter);
664
665
switch (t) {
666
case 0:
667
return MEM_DDR4;
668
case 1:
669
return MEM_DDR3;
670
case 2:
671
return MEM_LPDDR3;
672
case 3:
673
return MEM_LPDDR4;
674
case 4:
675
return MEM_WIO2;
676
default:
677
return MEM_UNKNOWN;
678
}
679
}
680
681
static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit)
682
{
683
u64 hash_addr = addr & mask, hash = 0;
684
u64 intlv = (addr >> intlv_bit) & 1;
685
int i;
686
687
for (i = 6; i < 20; i++)
688
hash ^= (hash_addr >> i) & 1;
689
690
return (int)hash ^ intlv;
691
}
692
693
static u64 decode_channel_addr(u64 addr, int intlv_bit)
694
{
695
u64 channel_addr;
696
697
/* Remove the interleave bit and shift upper part down to fill gap */
698
channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit;
699
channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1);
700
701
return channel_addr;
702
}
703
704
static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map,
705
int *idx, u64 *sub_addr)
706
{
707
int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6;
708
709
if (addr > 2 * s_size) {
710
*sub_addr = addr - s_size;
711
*idx = l_map;
712
return;
713
}
714
715
if (CHANNEL_HASH_MODE(hash)) {
716
*sub_addr = decode_channel_addr(addr, intlv_bit);
717
*idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit);
718
} else {
719
*sub_addr = decode_channel_addr(addr, 6);
720
*idx = GET_BITFIELD(addr, 6, 6);
721
}
722
}
723
724
static int igen6_decode(struct decoded_addr *res)
725
{
726
struct igen6_imc *imc = &igen6_pvt->imc[res->mc];
727
u64 addr = res->imc_addr, sub_addr, s_size;
728
int idx, l_map;
729
u32 hash;
730
731
if (addr >= igen6_tom) {
732
edac_dbg(0, "Address 0x%llx out of range\n", addr);
733
return -EINVAL;
734
}
735
736
/* Decode channel */
737
hash = readl(imc->window + CHANNEL_HASH_OFFSET);
738
s_size = imc->ch_s_size;
739
l_map = imc->ch_l_map;
740
decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr);
741
res->channel_idx = idx;
742
res->channel_addr = sub_addr;
743
744
/* Decode sub-channel/DIMM */
745
hash = readl(imc->window + CHANNEL_EHASH_OFFSET);
746
s_size = imc->dimm_s_size[idx];
747
l_map = imc->dimm_l_map[idx];
748
decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr);
749
res->sub_channel_idx = idx;
750
res->sub_channel_addr = sub_addr;
751
752
return 0;
753
}
754
755
static void igen6_output_error(struct decoded_addr *res,
756
struct mem_ctl_info *mci, u64 ecclog)
757
{
758
enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ?
759
HW_EVENT_ERR_UNCORRECTED :
760
HW_EVENT_ERR_CORRECTED;
761
762
edac_mc_handle_error(type, mci, 1,
763
res->sys_addr >> PAGE_SHIFT,
764
res->sys_addr & ~PAGE_MASK,
765
ECC_ERROR_LOG_SYND(ecclog),
766
res->channel_idx, res->sub_channel_idx,
767
-1, "", "");
768
}
769
770
static struct gen_pool *ecclog_gen_pool_create(void)
771
{
772
struct gen_pool *pool;
773
774
pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1);
775
if (!pool)
776
return NULL;
777
778
if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) {
779
gen_pool_destroy(pool);
780
return NULL;
781
}
782
783
return pool;
784
}
785
786
static int ecclog_gen_pool_add(int mc, u64 ecclog)
787
{
788
struct ecclog_node *node;
789
790
node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node));
791
if (!node)
792
return -ENOMEM;
793
794
node->mc = mc;
795
node->ecclog = ecclog;
796
llist_add(&node->llnode, &ecclog_llist);
797
798
return 0;
799
}
800
801
/*
802
* Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI
803
* configuration space status register ERRSTS can indicate whether a
804
* correctable error or an uncorrectable error occurred. We only use the
805
* ECC_ERROR_LOG register to check error type, but need to clear both
806
* registers to enable future error events.
807
*/
808
static u64 ecclog_read_and_clear(struct igen6_imc *imc)
809
{
810
u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
811
812
/*
813
* Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
814
* the invalid value ~0. This will result in a flood of invalid
815
* error reports in polling mode. Skip it.
816
*/
817
if (ecclog == ~0)
818
return 0;
819
820
/* Neither a CE nor a UE. Skip it.*/
821
if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
822
return 0;
823
824
/* Clear CE/UE bits by writing 1s */
825
writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
826
827
return ecclog;
828
}
829
830
static void errsts_clear(struct igen6_imc *imc)
831
{
832
u16 errsts;
833
834
if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) {
835
igen6_printk(KERN_ERR, "Failed to read ERRSTS\n");
836
return;
837
}
838
839
/* Clear CE/UE bits by writing 1s */
840
if (errsts & (ERRSTS_CE | ERRSTS_UE))
841
pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts);
842
}
843
844
static int errcmd_enable_error_reporting(bool enable)
845
{
846
struct igen6_imc *imc = &igen6_pvt->imc[0];
847
u16 errcmd;
848
int rc;
849
850
rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
851
if (rc)
852
return pcibios_err_to_errno(rc);
853
854
if (enable)
855
errcmd |= ERRCMD_CE | ERRSTS_UE;
856
else
857
errcmd &= ~(ERRCMD_CE | ERRSTS_UE);
858
859
rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
860
if (rc)
861
return pcibios_err_to_errno(rc);
862
863
return 0;
864
}
865
866
static int ecclog_handler(void)
867
{
868
struct igen6_imc *imc;
869
int i, n = 0;
870
u64 ecclog;
871
872
for (i = 0; i < res_cfg->num_imc; i++) {
873
imc = &igen6_pvt->imc[i];
874
875
/* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
876
877
ecclog = ecclog_read_and_clear(imc);
878
if (!ecclog)
879
continue;
880
881
if (!ecclog_gen_pool_add(i, ecclog))
882
irq_work_queue(&ecclog_irq_work);
883
884
n++;
885
}
886
887
return n;
888
}
889
890
static void ecclog_work_cb(struct work_struct *work)
891
{
892
struct ecclog_node *node, *tmp;
893
struct mem_ctl_info *mci;
894
struct llist_node *head;
895
struct decoded_addr res;
896
u64 eaddr;
897
898
head = llist_del_all(&ecclog_llist);
899
if (!head)
900
return;
901
902
llist_for_each_entry_safe(node, tmp, head, llnode) {
903
memset(&res, 0, sizeof(res));
904
if (res_cfg->err_addr)
905
eaddr = res_cfg->err_addr(node->ecclog);
906
else
907
eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
908
ECC_ERROR_LOG_ADDR_SHIFT;
909
res.mc = node->mc;
910
res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
911
res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
912
913
mci = igen6_pvt->imc[res.mc].mci;
914
915
edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog);
916
igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n");
917
igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr);
918
919
if (!igen6_decode(&res))
920
igen6_output_error(&res, mci, node->ecclog);
921
922
gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node));
923
}
924
}
925
926
static void ecclog_irq_work_cb(struct irq_work *irq_work)
927
{
928
int i;
929
930
for (i = 0; i < res_cfg->num_imc; i++)
931
errsts_clear(&igen6_pvt->imc[i]);
932
933
if (!llist_empty(&ecclog_llist))
934
schedule_work(&ecclog_work);
935
}
936
937
static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
938
{
939
unsigned char reason;
940
941
if (!ecclog_handler())
942
return NMI_DONE;
943
944
/*
945
* Both In-Band ECC correctable error and uncorrectable error are
946
* reported by SERR# NMI. The NMI generic code (see pci_serr_error())
947
* doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to
948
* re-enable the SERR# NMI after NMI handling. So clear this bit here
949
* to re-enable SERR# NMI for receiving future In-Band ECC errors.
950
*/
951
reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK;
952
reason |= NMI_REASON_CLEAR_SERR;
953
outb(reason, NMI_REASON_PORT);
954
reason &= ~NMI_REASON_CLEAR_SERR;
955
outb(reason, NMI_REASON_PORT);
956
957
return NMI_HANDLED;
958
}
959
960
static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
961
void *data)
962
{
963
struct mce *mce = (struct mce *)data;
964
char *type;
965
966
if (mce->kflags & MCE_HANDLED_CEC)
967
return NOTIFY_DONE;
968
969
/*
970
* Ignore unless this is a memory related error.
971
* We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
972
* since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
973
*/
974
if ((mce->status & 0xefff) >> 7 != 1)
975
return NOTIFY_DONE;
976
977
if (mce->mcgstatus & MCG_STATUS_MCIP)
978
type = "Exception";
979
else
980
type = "Event";
981
982
edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
983
mce->extcpu, type, mce->mcgstatus,
984
mce->bank, mce->status);
985
edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
986
edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
987
edac_dbg(0, "MISC 0x%llx\n", mce->misc);
988
edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
989
mce->cpuvendor, mce->cpuid, mce->time,
990
mce->socketid, mce->apicid);
991
/*
992
* We just use the Machine Check for the memory error notification.
993
* Each memory controller is associated with an IBECC instance.
994
* Directly read and clear the error information(error address and
995
* error type) on all the IBECC instances so that we know on which
996
* memory controller the memory error(s) occurred.
997
*/
998
if (!ecclog_handler())
999
return NOTIFY_DONE;
1000
1001
mce->kflags |= MCE_HANDLED_EDAC;
1002
1003
return NOTIFY_DONE;
1004
}
1005
1006
static struct notifier_block ecclog_mce_dec = {
1007
.notifier_call = ecclog_mce_handler,
1008
.priority = MCE_PRIO_EDAC,
1009
};
1010
1011
static bool igen6_check_ecc(struct igen6_imc *imc)
1012
{
1013
u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
1014
1015
return !!(activate & IBECC_ACTIVATE_EN);
1016
}
1017
1018
static int igen6_get_dimm_config(struct mem_ctl_info *mci)
1019
{
1020
struct igen6_imc *imc = mci->pvt_info;
1021
u32 mad_inter, mad_intra, mad_dimm;
1022
int i, j, ndimms, mc = imc->mc;
1023
struct dimm_info *dimm;
1024
enum mem_type mtype;
1025
enum dev_type dtype;
1026
u64 dsize;
1027
bool ecc;
1028
1029
edac_dbg(2, "\n");
1030
1031
mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET);
1032
mtype = get_memory_type(mad_inter);
1033
ecc = igen6_check_ecc(imc);
1034
imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter);
1035
imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter);
1036
1037
for (i = 0; i < NUM_CHANNELS; i++) {
1038
mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4);
1039
mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4);
1040
1041
imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
1042
imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
1043
imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
1044
imc->size += imc->dimm_s_size[i];
1045
imc->size += imc->dimm_l_size[i];
1046
ndimms = 0;
1047
1048
for (j = 0; j < NUM_DIMMS; j++) {
1049
dimm = edac_get_dimm(mci, i, j, 0);
1050
1051
if (j ^ imc->dimm_l_map[i]) {
1052
dtype = get_width(0, mad_dimm);
1053
dsize = imc->dimm_s_size[i];
1054
} else {
1055
dtype = get_width(1, mad_dimm);
1056
dsize = imc->dimm_l_size[i];
1057
}
1058
1059
if (!dsize)
1060
continue;
1061
1062
dimm->grain = 64;
1063
dimm->mtype = mtype;
1064
dimm->dtype = dtype;
1065
dimm->nr_pages = MiB_TO_PAGES(dsize >> 20);
1066
dimm->edac_mode = EDAC_SECDED;
1067
snprintf(dimm->label, sizeof(dimm->label),
1068
"MC#%d_Chan#%d_DIMM#%d", mc, i, j);
1069
edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n",
1070
mc, i, j, dsize >> 20, dimm->nr_pages);
1071
1072
ndimms++;
1073
}
1074
1075
if (ndimms && !ecc) {
1076
igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc);
1077
return -ENODEV;
1078
}
1079
}
1080
1081
edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
1082
1083
return 0;
1084
}
1085
1086
#ifdef CONFIG_EDAC_DEBUG
1087
/* Top of upper usable DRAM */
1088
static u64 igen6_touud;
1089
#define TOUUD_OFFSET 0xa8
1090
1091
static void igen6_reg_dump(struct igen6_imc *imc)
1092
{
1093
int i;
1094
1095
edac_dbg(2, "CHANNEL_HASH : 0x%x\n",
1096
readl(imc->window + CHANNEL_HASH_OFFSET));
1097
edac_dbg(2, "CHANNEL_EHASH : 0x%x\n",
1098
readl(imc->window + CHANNEL_EHASH_OFFSET));
1099
edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n",
1100
readl(imc->window + MAD_INTER_CHANNEL_OFFSET));
1101
edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n",
1102
readq(imc->window + ECC_ERROR_LOG_OFFSET));
1103
1104
for (i = 0; i < NUM_CHANNELS; i++) {
1105
edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i,
1106
readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4));
1107
edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i,
1108
readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4));
1109
}
1110
edac_dbg(2, "TOLUD : 0x%x", igen6_tolud);
1111
edac_dbg(2, "TOUUD : 0x%llx", igen6_touud);
1112
edac_dbg(2, "TOM : 0x%llx", igen6_tom);
1113
}
1114
1115
static struct dentry *igen6_test;
1116
1117
static int debugfs_u64_set(void *data, u64 val)
1118
{
1119
u64 ecclog;
1120
1121
if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) {
1122
edac_dbg(0, "Address 0x%llx out of range\n", val);
1123
return 0;
1124
}
1125
1126
pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
1127
1128
val >>= ECC_ERROR_LOG_ADDR_SHIFT;
1129
ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE;
1130
1131
if (!ecclog_gen_pool_add(0, ecclog))
1132
irq_work_queue(&ecclog_irq_work);
1133
1134
return 0;
1135
}
1136
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
1137
1138
static void igen6_debug_setup(void)
1139
{
1140
igen6_test = edac_debugfs_create_dir("igen6_test");
1141
if (!igen6_test)
1142
return;
1143
1144
if (!edac_debugfs_create_file("addr", 0200, igen6_test,
1145
NULL, &fops_u64_wo)) {
1146
debugfs_remove(igen6_test);
1147
igen6_test = NULL;
1148
}
1149
}
1150
1151
static void igen6_debug_teardown(void)
1152
{
1153
debugfs_remove_recursive(igen6_test);
1154
}
1155
#else
1156
static void igen6_reg_dump(struct igen6_imc *imc) {}
1157
static void igen6_debug_setup(void) {}
1158
static void igen6_debug_teardown(void) {}
1159
#endif
1160
1161
static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
1162
{
1163
union {
1164
u64 v;
1165
struct {
1166
u32 v_lo;
1167
u32 v_hi;
1168
};
1169
} u;
1170
1171
edac_dbg(2, "\n");
1172
1173
if (!res_cfg->ibecc_available(pdev)) {
1174
edac_dbg(2, "No In-Band ECC IP\n");
1175
goto fail;
1176
}
1177
1178
if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) {
1179
igen6_printk(KERN_ERR, "Failed to read TOLUD\n");
1180
goto fail;
1181
}
1182
1183
igen6_tolud &= GENMASK(31, 20);
1184
1185
if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) {
1186
igen6_printk(KERN_ERR, "Failed to read lower TOM\n");
1187
goto fail;
1188
}
1189
1190
if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) {
1191
igen6_printk(KERN_ERR, "Failed to read upper TOM\n");
1192
goto fail;
1193
}
1194
1195
igen6_tom = u.v & GENMASK_ULL(38, 20);
1196
1197
if (get_mchbar(pdev, mchbar))
1198
goto fail;
1199
1200
#ifdef CONFIG_EDAC_DEBUG
1201
if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo))
1202
edac_dbg(2, "Failed to read lower TOUUD\n");
1203
else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi))
1204
edac_dbg(2, "Failed to read upper TOUUD\n");
1205
else
1206
igen6_touud = u.v & GENMASK_ULL(38, 20);
1207
#endif
1208
1209
return 0;
1210
fail:
1211
return -ENODEV;
1212
}
1213
1214
static void igen6_check(struct mem_ctl_info *mci)
1215
{
1216
struct igen6_imc *imc = mci->pvt_info;
1217
u64 ecclog;
1218
1219
/* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
1220
ecclog = ecclog_read_and_clear(imc);
1221
if (!ecclog)
1222
return;
1223
1224
if (!ecclog_gen_pool_add(imc->mc, ecclog))
1225
irq_work_queue(&ecclog_irq_work);
1226
}
1227
1228
/* Check whether the memory controller is absent. */
1229
static bool igen6_imc_absent(void __iomem *window)
1230
{
1231
return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0;
1232
}
1233
1234
static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev)
1235
{
1236
struct edac_mc_layer layers[2];
1237
struct mem_ctl_info *mci;
1238
struct igen6_imc *imc;
1239
int rc;
1240
1241
edac_dbg(2, "\n");
1242
1243
layers[0].type = EDAC_MC_LAYER_CHANNEL;
1244
layers[0].size = NUM_CHANNELS;
1245
layers[0].is_virt_csrow = false;
1246
layers[1].type = EDAC_MC_LAYER_SLOT;
1247
layers[1].size = NUM_DIMMS;
1248
layers[1].is_virt_csrow = true;
1249
1250
mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0);
1251
if (!mci) {
1252
rc = -ENOMEM;
1253
goto fail;
1254
}
1255
1256
mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc);
1257
if (!mci->ctl_name) {
1258
rc = -ENOMEM;
1259
goto fail2;
1260
}
1261
1262
mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4;
1263
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
1264
mci->edac_cap = EDAC_FLAG_SECDED;
1265
mci->mod_name = EDAC_MOD_STR;
1266
mci->dev_name = pci_name(pdev);
1267
if (edac_op_state == EDAC_OPSTATE_POLL)
1268
mci->edac_check = igen6_check;
1269
mci->pvt_info = &igen6_pvt->imc[mc];
1270
1271
imc = mci->pvt_info;
1272
device_initialize(&imc->dev);
1273
/*
1274
* EDAC core uses mci->pdev(pointer of structure device) as
1275
* memory controller ID. The client SoCs attach one or more
1276
* memory controllers to single pci_dev (single pci_dev->dev
1277
* can be for multiple memory controllers).
1278
*
1279
* To make mci->pdev unique, assign pci_dev->dev to mci->pdev
1280
* for the first memory controller and assign a unique imc->dev
1281
* to mci->pdev for each non-first memory controller.
1282
*/
1283
mci->pdev = mc ? &imc->dev : &pdev->dev;
1284
imc->mc = mc;
1285
imc->pdev = pdev;
1286
imc->window = window;
1287
1288
igen6_reg_dump(imc);
1289
1290
rc = igen6_get_dimm_config(mci);
1291
if (rc)
1292
goto fail3;
1293
1294
rc = edac_mc_add_mc(mci);
1295
if (rc) {
1296
igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc);
1297
goto fail3;
1298
}
1299
1300
imc->mci = mci;
1301
return 0;
1302
fail3:
1303
mci->pvt_info = NULL;
1304
kfree(mci->ctl_name);
1305
fail2:
1306
edac_mc_free(mci);
1307
fail:
1308
return rc;
1309
}
1310
1311
static void igen6_unregister_mcis(void)
1312
{
1313
struct mem_ctl_info *mci;
1314
struct igen6_imc *imc;
1315
int i;
1316
1317
edac_dbg(2, "\n");
1318
1319
for (i = 0; i < res_cfg->num_imc; i++) {
1320
imc = &igen6_pvt->imc[i];
1321
mci = imc->mci;
1322
if (!mci)
1323
continue;
1324
1325
edac_mc_del_mc(mci->pdev);
1326
kfree(mci->ctl_name);
1327
mci->pvt_info = NULL;
1328
edac_mc_free(mci);
1329
iounmap(imc->window);
1330
}
1331
}
1332
1333
static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
1334
{
1335
void __iomem *window;
1336
int lmc, pmc, rc;
1337
u64 base;
1338
1339
for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) {
1340
base = mchbar + pmc * MCHBAR_SIZE;
1341
window = ioremap(base, MCHBAR_SIZE);
1342
if (!window) {
1343
igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc);
1344
rc = -ENOMEM;
1345
goto out_unregister_mcis;
1346
}
1347
1348
if (igen6_imc_absent(window)) {
1349
iounmap(window);
1350
edac_dbg(2, "Skip absent mc%d\n", pmc);
1351
continue;
1352
}
1353
1354
rc = igen6_register_mci(lmc, window, pdev);
1355
if (rc)
1356
goto out_iounmap;
1357
1358
/* Done, if all present MCs are detected and registered. */
1359
if (++lmc >= res_cfg->num_imc)
1360
break;
1361
}
1362
1363
if (!lmc) {
1364
igen6_printk(KERN_ERR, "No mc found.\n");
1365
return -ENODEV;
1366
}
1367
1368
if (lmc < res_cfg->num_imc) {
1369
igen6_printk(KERN_DEBUG, "Expected %d mcs, but only %d detected.",
1370
res_cfg->num_imc, lmc);
1371
res_cfg->num_imc = lmc;
1372
}
1373
1374
return 0;
1375
1376
out_iounmap:
1377
iounmap(window);
1378
1379
out_unregister_mcis:
1380
igen6_unregister_mcis();
1381
1382
return rc;
1383
}
1384
1385
static int igen6_mem_slice_setup(u64 mchbar)
1386
{
1387
struct igen6_imc *imc = &igen6_pvt->imc[0];
1388
u64 base = mchbar + res_cfg->cmf_base;
1389
u32 offset = res_cfg->ms_hash_offset;
1390
u32 size = res_cfg->cmf_size;
1391
u64 ms_s_size, ms_hash;
1392
void __iomem *cmf;
1393
int ms_l_map;
1394
1395
edac_dbg(2, "\n");
1396
1397
if (imc[0].size < imc[1].size) {
1398
ms_s_size = imc[0].size;
1399
ms_l_map = 1;
1400
} else {
1401
ms_s_size = imc[1].size;
1402
ms_l_map = 0;
1403
}
1404
1405
igen6_pvt->ms_s_size = ms_s_size;
1406
igen6_pvt->ms_l_map = ms_l_map;
1407
1408
edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
1409
ms_s_size >> 20, ms_l_map);
1410
1411
if (!size)
1412
return 0;
1413
1414
cmf = ioremap(base, size);
1415
if (!cmf) {
1416
igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
1417
return -ENODEV;
1418
}
1419
1420
ms_hash = readq(cmf + offset);
1421
igen6_pvt->ms_hash = ms_hash;
1422
1423
edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
1424
1425
iounmap(cmf);
1426
1427
return 0;
1428
}
1429
1430
static int register_err_handler(void)
1431
{
1432
int rc;
1433
1434
if (res_cfg->machine_check) {
1435
mce_register_decode_chain(&ecclog_mce_dec);
1436
return 0;
1437
}
1438
1439
rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
1440
0, IGEN6_NMI_NAME);
1441
if (rc) {
1442
igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
1443
return rc;
1444
}
1445
1446
return 0;
1447
}
1448
1449
static void unregister_err_handler(void)
1450
{
1451
if (res_cfg->machine_check) {
1452
mce_unregister_decode_chain(&ecclog_mce_dec);
1453
return;
1454
}
1455
1456
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1457
}
1458
1459
static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent)
1460
{
1461
/*
1462
* Quirk: Certain SoCs' error reporting interrupts don't work.
1463
* Force polling mode for them to ensure that memory error
1464
* events can be handled.
1465
*/
1466
if (ent->device == DID_ADL_N_SKU4) {
1467
edac_op_state = EDAC_OPSTATE_POLL;
1468
return;
1469
}
1470
1471
/* Set the mode according to the configuration data. */
1472
if (cfg->machine_check)
1473
edac_op_state = EDAC_OPSTATE_INT;
1474
else
1475
edac_op_state = EDAC_OPSTATE_NMI;
1476
}
1477
1478
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1479
{
1480
u64 mchbar;
1481
int rc;
1482
1483
edac_dbg(2, "\n");
1484
1485
igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL);
1486
if (!igen6_pvt)
1487
return -ENOMEM;
1488
1489
res_cfg = (struct res_config *)ent->driver_data;
1490
1491
rc = igen6_pci_setup(pdev, &mchbar);
1492
if (rc)
1493
goto fail;
1494
1495
opstate_set(res_cfg, ent);
1496
1497
rc = igen6_register_mcis(pdev, mchbar);
1498
if (rc)
1499
goto fail;
1500
1501
if (res_cfg->num_imc > 1) {
1502
rc = igen6_mem_slice_setup(mchbar);
1503
if (rc)
1504
goto fail2;
1505
}
1506
1507
ecclog_pool = ecclog_gen_pool_create();
1508
if (!ecclog_pool) {
1509
rc = -ENOMEM;
1510
goto fail2;
1511
}
1512
1513
INIT_WORK(&ecclog_work, ecclog_work_cb);
1514
init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
1515
1516
rc = register_err_handler();
1517
if (rc)
1518
goto fail3;
1519
1520
/* Enable error reporting */
1521
rc = errcmd_enable_error_reporting(true);
1522
if (rc) {
1523
igen6_printk(KERN_ERR, "Failed to enable error reporting\n");
1524
goto fail4;
1525
}
1526
1527
/* Check if any pending errors before/during the registration of the error handler */
1528
ecclog_handler();
1529
1530
igen6_debug_setup();
1531
return 0;
1532
fail4:
1533
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1534
fail3:
1535
gen_pool_destroy(ecclog_pool);
1536
fail2:
1537
igen6_unregister_mcis();
1538
fail:
1539
kfree(igen6_pvt);
1540
return rc;
1541
}
1542
1543
static void igen6_remove(struct pci_dev *pdev)
1544
{
1545
edac_dbg(2, "\n");
1546
1547
igen6_debug_teardown();
1548
errcmd_enable_error_reporting(false);
1549
unregister_err_handler();
1550
irq_work_sync(&ecclog_irq_work);
1551
flush_work(&ecclog_work);
1552
gen_pool_destroy(ecclog_pool);
1553
igen6_unregister_mcis();
1554
kfree(igen6_pvt);
1555
}
1556
1557
static struct pci_driver igen6_driver = {
1558
.name = EDAC_MOD_STR,
1559
.probe = igen6_probe,
1560
.remove = igen6_remove,
1561
.id_table = igen6_pci_tbl,
1562
};
1563
1564
static int __init igen6_init(void)
1565
{
1566
const char *owner;
1567
int rc;
1568
1569
edac_dbg(2, "\n");
1570
1571
if (ghes_get_devices())
1572
return -EBUSY;
1573
1574
owner = edac_get_owner();
1575
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
1576
return -EBUSY;
1577
1578
rc = pci_register_driver(&igen6_driver);
1579
if (rc)
1580
return rc;
1581
1582
igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION);
1583
1584
return 0;
1585
}
1586
1587
static void __exit igen6_exit(void)
1588
{
1589
edac_dbg(2, "\n");
1590
1591
pci_unregister_driver(&igen6_driver);
1592
}
1593
1594
module_init(igen6_init);
1595
module_exit(igen6_exit);
1596
1597
MODULE_LICENSE("GPL v2");
1598
MODULE_AUTHOR("Qiuxu Zhuo");
1599
MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC");
1600
1601