Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/edac/i7core_edac.c
15109 views
1
/* Intel i7 core/Nehalem Memory Controller kernel module
2
*
3
* This driver supports the memory controllers found on the Intel
4
* processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5
* Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6
* and Westmere-EP.
7
*
8
* This file may be distributed under the terms of the
9
* GNU General Public License version 2 only.
10
*
11
* Copyright (c) 2009-2010 by:
12
* Mauro Carvalho Chehab <[email protected]>
13
*
14
* Red Hat Inc. http://www.redhat.com
15
*
16
* Forked and adapted from the i5400_edac driver
17
*
18
* Based on the following public Intel datasheets:
19
* Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20
* Datasheet, Volume 2:
21
* http://download.intel.com/design/processor/datashts/320835.pdf
22
* Intel Xeon Processor 5500 Series Datasheet Volume 2
23
* http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24
* also available at:
25
* http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26
*/
27
28
#include <linux/module.h>
29
#include <linux/init.h>
30
#include <linux/pci.h>
31
#include <linux/pci_ids.h>
32
#include <linux/slab.h>
33
#include <linux/delay.h>
34
#include <linux/edac.h>
35
#include <linux/mmzone.h>
36
#include <linux/edac_mce.h>
37
#include <linux/smp.h>
38
#include <asm/processor.h>
39
40
#include "edac_core.h"
41
42
/* Static vars */
43
static LIST_HEAD(i7core_edac_list);
44
static DEFINE_MUTEX(i7core_edac_lock);
45
static int probed;
46
47
static int use_pci_fixup;
48
module_param(use_pci_fixup, int, 0444);
49
MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50
/*
51
* This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52
* registers start at bus 255, and are not reported by BIOS.
53
* We currently find devices with only 2 sockets. In order to support more QPI
54
* Quick Path Interconnect, just increment this number.
55
*/
56
#define MAX_SOCKET_BUSES 2
57
58
59
/*
60
* Alter this version for the module when modifications are made
61
*/
62
#define I7CORE_REVISION " Ver: 1.0.0"
63
#define EDAC_MOD_STR "i7core_edac"
64
65
/*
66
* Debug macros
67
*/
68
#define i7core_printk(level, fmt, arg...) \
69
edac_printk(level, "i7core", fmt, ##arg)
70
71
#define i7core_mc_printk(mci, level, fmt, arg...) \
72
edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74
/*
75
* i7core Memory Controller Registers
76
*/
77
78
/* OFFSETS for Device 0 Function 0 */
79
80
#define MC_CFG_CONTROL 0x90
81
82
/* OFFSETS for Device 3 Function 0 */
83
84
#define MC_CONTROL 0x48
85
#define MC_STATUS 0x4c
86
#define MC_MAX_DOD 0x64
87
88
/*
89
* OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
90
* http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
91
*/
92
93
#define MC_TEST_ERR_RCV1 0x60
94
#define DIMM2_COR_ERR(r) ((r) & 0x7fff)
95
96
#define MC_TEST_ERR_RCV0 0x64
97
#define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
98
#define DIMM0_COR_ERR(r) ((r) & 0x7fff)
99
100
/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
101
#define MC_COR_ECC_CNT_0 0x80
102
#define MC_COR_ECC_CNT_1 0x84
103
#define MC_COR_ECC_CNT_2 0x88
104
#define MC_COR_ECC_CNT_3 0x8c
105
#define MC_COR_ECC_CNT_4 0x90
106
#define MC_COR_ECC_CNT_5 0x94
107
108
#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
109
#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
110
111
112
/* OFFSETS for Devices 4,5 and 6 Function 0 */
113
114
#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
115
#define THREE_DIMMS_PRESENT (1 << 24)
116
#define SINGLE_QUAD_RANK_PRESENT (1 << 23)
117
#define QUAD_RANK_PRESENT (1 << 22)
118
#define REGISTERED_DIMM (1 << 15)
119
120
#define MC_CHANNEL_MAPPER 0x60
121
#define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
122
#define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
123
124
#define MC_CHANNEL_RANK_PRESENT 0x7c
125
#define RANK_PRESENT_MASK 0xffff
126
127
#define MC_CHANNEL_ADDR_MATCH 0xf0
128
#define MC_CHANNEL_ERROR_MASK 0xf8
129
#define MC_CHANNEL_ERROR_INJECT 0xfc
130
#define INJECT_ADDR_PARITY 0x10
131
#define INJECT_ECC 0x08
132
#define MASK_CACHELINE 0x06
133
#define MASK_FULL_CACHELINE 0x06
134
#define MASK_MSB32_CACHELINE 0x04
135
#define MASK_LSB32_CACHELINE 0x02
136
#define NO_MASK_CACHELINE 0x00
137
#define REPEAT_EN 0x01
138
139
/* OFFSETS for Devices 4,5 and 6 Function 1 */
140
141
#define MC_DOD_CH_DIMM0 0x48
142
#define MC_DOD_CH_DIMM1 0x4c
143
#define MC_DOD_CH_DIMM2 0x50
144
#define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
145
#define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
146
#define DIMM_PRESENT_MASK (1 << 9)
147
#define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
148
#define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
149
#define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
150
#define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
151
#define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
152
#define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
153
#define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
154
#define MC_DOD_NUMCOL_MASK 3
155
#define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
156
157
#define MC_RANK_PRESENT 0x7c
158
159
#define MC_SAG_CH_0 0x80
160
#define MC_SAG_CH_1 0x84
161
#define MC_SAG_CH_2 0x88
162
#define MC_SAG_CH_3 0x8c
163
#define MC_SAG_CH_4 0x90
164
#define MC_SAG_CH_5 0x94
165
#define MC_SAG_CH_6 0x98
166
#define MC_SAG_CH_7 0x9c
167
168
#define MC_RIR_LIMIT_CH_0 0x40
169
#define MC_RIR_LIMIT_CH_1 0x44
170
#define MC_RIR_LIMIT_CH_2 0x48
171
#define MC_RIR_LIMIT_CH_3 0x4C
172
#define MC_RIR_LIMIT_CH_4 0x50
173
#define MC_RIR_LIMIT_CH_5 0x54
174
#define MC_RIR_LIMIT_CH_6 0x58
175
#define MC_RIR_LIMIT_CH_7 0x5C
176
#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
177
178
#define MC_RIR_WAY_CH 0x80
179
#define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
180
#define MC_RIR_WAY_RANK_MASK 0x7
181
182
/*
183
* i7core structs
184
*/
185
186
#define NUM_CHANS 3
187
#define MAX_DIMMS 3 /* Max DIMMS per channel */
188
#define MAX_MCR_FUNC 4
189
#define MAX_CHAN_FUNC 3
190
191
struct i7core_info {
192
u32 mc_control;
193
u32 mc_status;
194
u32 max_dod;
195
u32 ch_map;
196
};
197
198
199
struct i7core_inject {
200
int enable;
201
202
u32 section;
203
u32 type;
204
u32 eccmask;
205
206
/* Error address mask */
207
int channel, dimm, rank, bank, page, col;
208
};
209
210
struct i7core_channel {
211
u32 ranks;
212
u32 dimms;
213
};
214
215
struct pci_id_descr {
216
int dev;
217
int func;
218
int dev_id;
219
int optional;
220
};
221
222
struct pci_id_table {
223
const struct pci_id_descr *descr;
224
int n_devs;
225
};
226
227
struct i7core_dev {
228
struct list_head list;
229
u8 socket;
230
struct pci_dev **pdev;
231
int n_devs;
232
struct mem_ctl_info *mci;
233
};
234
235
struct i7core_pvt {
236
struct pci_dev *pci_noncore;
237
struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
238
struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
239
240
struct i7core_dev *i7core_dev;
241
242
struct i7core_info info;
243
struct i7core_inject inject;
244
struct i7core_channel channel[NUM_CHANS];
245
246
int ce_count_available;
247
int csrow_map[NUM_CHANS][MAX_DIMMS];
248
249
/* ECC corrected errors counts per udimm */
250
unsigned long udimm_ce_count[MAX_DIMMS];
251
int udimm_last_ce_count[MAX_DIMMS];
252
/* ECC corrected errors counts per rdimm */
253
unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
254
int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
255
256
unsigned int is_registered;
257
258
/* mcelog glue */
259
struct edac_mce edac_mce;
260
261
/* Fifo double buffers */
262
struct mce mce_entry[MCE_LOG_LEN];
263
struct mce mce_outentry[MCE_LOG_LEN];
264
265
/* Fifo in/out counters */
266
unsigned mce_in, mce_out;
267
268
/* Count indicator to show errors not got */
269
unsigned mce_overrun;
270
271
/* Struct to control EDAC polling */
272
struct edac_pci_ctl_info *i7core_pci;
273
};
274
275
#define PCI_DESCR(device, function, device_id) \
276
.dev = (device), \
277
.func = (function), \
278
.dev_id = (device_id)
279
280
static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
281
/* Memory controller */
282
{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
283
{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
284
285
/* Exists only for RDIMM */
286
{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
287
{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
288
289
/* Channel 0 */
290
{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
291
{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
292
{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
293
{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
294
295
/* Channel 1 */
296
{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
297
{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
298
{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
299
{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
300
301
/* Channel 2 */
302
{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
303
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
304
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
305
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
306
};
307
308
static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
309
{ PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
310
{ PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
311
{ PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
312
313
{ PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
314
{ PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
315
{ PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
316
{ PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
317
318
{ PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
319
{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
320
{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
321
{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
322
};
323
324
static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
325
/* Memory controller */
326
{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2) },
327
{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2) },
328
/* Exists only for RDIMM */
329
{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1 },
330
{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
331
332
/* Channel 0 */
333
{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
334
{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
335
{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
336
{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2) },
337
338
/* Channel 1 */
339
{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
340
{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
341
{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
342
{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2) },
343
344
/* Channel 2 */
345
{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
346
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
347
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
348
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
349
};
350
351
#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
352
static const struct pci_id_table pci_dev_table[] = {
353
PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
354
PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
355
PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
356
{0,} /* 0 terminated list. */
357
};
358
359
/*
360
* pci_device_id table for which devices we are looking for
361
*/
362
static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
363
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
364
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
365
{0,} /* 0 terminated list. */
366
};
367
368
/****************************************************************************
369
Anciliary status routines
370
****************************************************************************/
371
372
/* MC_CONTROL bits */
373
#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
374
#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
375
376
/* MC_STATUS bits */
377
#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
378
#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
379
380
/* MC_MAX_DOD read functions */
381
static inline int numdimms(u32 dimms)
382
{
383
return (dimms & 0x3) + 1;
384
}
385
386
static inline int numrank(u32 rank)
387
{
388
static int ranks[4] = { 1, 2, 4, -EINVAL };
389
390
return ranks[rank & 0x3];
391
}
392
393
static inline int numbank(u32 bank)
394
{
395
static int banks[4] = { 4, 8, 16, -EINVAL };
396
397
return banks[bank & 0x3];
398
}
399
400
static inline int numrow(u32 row)
401
{
402
static int rows[8] = {
403
1 << 12, 1 << 13, 1 << 14, 1 << 15,
404
1 << 16, -EINVAL, -EINVAL, -EINVAL,
405
};
406
407
return rows[row & 0x7];
408
}
409
410
static inline int numcol(u32 col)
411
{
412
static int cols[8] = {
413
1 << 10, 1 << 11, 1 << 12, -EINVAL,
414
};
415
return cols[col & 0x3];
416
}
417
418
static struct i7core_dev *get_i7core_dev(u8 socket)
419
{
420
struct i7core_dev *i7core_dev;
421
422
list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
423
if (i7core_dev->socket == socket)
424
return i7core_dev;
425
}
426
427
return NULL;
428
}
429
430
static struct i7core_dev *alloc_i7core_dev(u8 socket,
431
const struct pci_id_table *table)
432
{
433
struct i7core_dev *i7core_dev;
434
435
i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
436
if (!i7core_dev)
437
return NULL;
438
439
i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
440
GFP_KERNEL);
441
if (!i7core_dev->pdev) {
442
kfree(i7core_dev);
443
return NULL;
444
}
445
446
i7core_dev->socket = socket;
447
i7core_dev->n_devs = table->n_devs;
448
list_add_tail(&i7core_dev->list, &i7core_edac_list);
449
450
return i7core_dev;
451
}
452
453
static void free_i7core_dev(struct i7core_dev *i7core_dev)
454
{
455
list_del(&i7core_dev->list);
456
kfree(i7core_dev->pdev);
457
kfree(i7core_dev);
458
}
459
460
/****************************************************************************
461
Memory check routines
462
****************************************************************************/
463
static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
464
unsigned func)
465
{
466
struct i7core_dev *i7core_dev = get_i7core_dev(socket);
467
int i;
468
469
if (!i7core_dev)
470
return NULL;
471
472
for (i = 0; i < i7core_dev->n_devs; i++) {
473
if (!i7core_dev->pdev[i])
474
continue;
475
476
if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
477
PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
478
return i7core_dev->pdev[i];
479
}
480
}
481
482
return NULL;
483
}
484
485
/**
486
* i7core_get_active_channels() - gets the number of channels and csrows
487
* @socket: Quick Path Interconnect socket
488
* @channels: Number of channels that will be returned
489
* @csrows: Number of csrows found
490
*
491
* Since EDAC core needs to know in advance the number of available channels
492
* and csrows, in order to allocate memory for csrows/channels, it is needed
493
* to run two similar steps. At the first step, implemented on this function,
494
* it checks the number of csrows/channels present at one socket.
495
* this is used in order to properly allocate the size of mci components.
496
*
497
* It should be noticed that none of the current available datasheets explain
498
* or even mention how csrows are seen by the memory controller. So, we need
499
* to add a fake description for csrows.
500
* So, this driver is attributing one DIMM memory for one csrow.
501
*/
502
static int i7core_get_active_channels(const u8 socket, unsigned *channels,
503
unsigned *csrows)
504
{
505
struct pci_dev *pdev = NULL;
506
int i, j;
507
u32 status, control;
508
509
*channels = 0;
510
*csrows = 0;
511
512
pdev = get_pdev_slot_func(socket, 3, 0);
513
if (!pdev) {
514
i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
515
socket);
516
return -ENODEV;
517
}
518
519
/* Device 3 function 0 reads */
520
pci_read_config_dword(pdev, MC_STATUS, &status);
521
pci_read_config_dword(pdev, MC_CONTROL, &control);
522
523
for (i = 0; i < NUM_CHANS; i++) {
524
u32 dimm_dod[3];
525
/* Check if the channel is active */
526
if (!(control & (1 << (8 + i))))
527
continue;
528
529
/* Check if the channel is disabled */
530
if (status & (1 << i))
531
continue;
532
533
pdev = get_pdev_slot_func(socket, i + 4, 1);
534
if (!pdev) {
535
i7core_printk(KERN_ERR, "Couldn't find socket %d "
536
"fn %d.%d!!!\n",
537
socket, i + 4, 1);
538
return -ENODEV;
539
}
540
/* Devices 4-6 function 1 */
541
pci_read_config_dword(pdev,
542
MC_DOD_CH_DIMM0, &dimm_dod[0]);
543
pci_read_config_dword(pdev,
544
MC_DOD_CH_DIMM1, &dimm_dod[1]);
545
pci_read_config_dword(pdev,
546
MC_DOD_CH_DIMM2, &dimm_dod[2]);
547
548
(*channels)++;
549
550
for (j = 0; j < 3; j++) {
551
if (!DIMM_PRESENT(dimm_dod[j]))
552
continue;
553
(*csrows)++;
554
}
555
}
556
557
debugf0("Number of active channels on socket %d: %d\n",
558
socket, *channels);
559
560
return 0;
561
}
562
563
static int get_dimm_config(const struct mem_ctl_info *mci)
564
{
565
struct i7core_pvt *pvt = mci->pvt_info;
566
struct csrow_info *csr;
567
struct pci_dev *pdev;
568
int i, j;
569
int csrow = 0;
570
unsigned long last_page = 0;
571
enum edac_type mode;
572
enum mem_type mtype;
573
574
/* Get data from the MC register, function 0 */
575
pdev = pvt->pci_mcr[0];
576
if (!pdev)
577
return -ENODEV;
578
579
/* Device 3 function 0 reads */
580
pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
581
pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
582
pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
583
pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
584
585
debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
586
pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
587
pvt->info.max_dod, pvt->info.ch_map);
588
589
if (ECC_ENABLED(pvt)) {
590
debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
591
if (ECCx8(pvt))
592
mode = EDAC_S8ECD8ED;
593
else
594
mode = EDAC_S4ECD4ED;
595
} else {
596
debugf0("ECC disabled\n");
597
mode = EDAC_NONE;
598
}
599
600
/* FIXME: need to handle the error codes */
601
debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
602
"x%x x 0x%x\n",
603
numdimms(pvt->info.max_dod),
604
numrank(pvt->info.max_dod >> 2),
605
numbank(pvt->info.max_dod >> 4),
606
numrow(pvt->info.max_dod >> 6),
607
numcol(pvt->info.max_dod >> 9));
608
609
for (i = 0; i < NUM_CHANS; i++) {
610
u32 data, dimm_dod[3], value[8];
611
612
if (!pvt->pci_ch[i][0])
613
continue;
614
615
if (!CH_ACTIVE(pvt, i)) {
616
debugf0("Channel %i is not active\n", i);
617
continue;
618
}
619
if (CH_DISABLED(pvt, i)) {
620
debugf0("Channel %i is disabled\n", i);
621
continue;
622
}
623
624
/* Devices 4-6 function 0 */
625
pci_read_config_dword(pvt->pci_ch[i][0],
626
MC_CHANNEL_DIMM_INIT_PARAMS, &data);
627
628
pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
629
4 : 2;
630
631
if (data & REGISTERED_DIMM)
632
mtype = MEM_RDDR3;
633
else
634
mtype = MEM_DDR3;
635
#if 0
636
if (data & THREE_DIMMS_PRESENT)
637
pvt->channel[i].dimms = 3;
638
else if (data & SINGLE_QUAD_RANK_PRESENT)
639
pvt->channel[i].dimms = 1;
640
else
641
pvt->channel[i].dimms = 2;
642
#endif
643
644
/* Devices 4-6 function 1 */
645
pci_read_config_dword(pvt->pci_ch[i][1],
646
MC_DOD_CH_DIMM0, &dimm_dod[0]);
647
pci_read_config_dword(pvt->pci_ch[i][1],
648
MC_DOD_CH_DIMM1, &dimm_dod[1]);
649
pci_read_config_dword(pvt->pci_ch[i][1],
650
MC_DOD_CH_DIMM2, &dimm_dod[2]);
651
652
debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
653
"%d ranks, %cDIMMs\n",
654
i,
655
RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
656
data,
657
pvt->channel[i].ranks,
658
(data & REGISTERED_DIMM) ? 'R' : 'U');
659
660
for (j = 0; j < 3; j++) {
661
u32 banks, ranks, rows, cols;
662
u32 size, npages;
663
664
if (!DIMM_PRESENT(dimm_dod[j]))
665
continue;
666
667
banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
668
ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
669
rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
670
cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
671
672
/* DDR3 has 8 I/O banks */
673
size = (rows * cols * banks * ranks) >> (20 - 3);
674
675
pvt->channel[i].dimms++;
676
677
debugf0("\tdimm %d %d Mb offset: %x, "
678
"bank: %d, rank: %d, row: %#x, col: %#x\n",
679
j, size,
680
RANKOFFSET(dimm_dod[j]),
681
banks, ranks, rows, cols);
682
683
npages = MiB_TO_PAGES(size);
684
685
csr = &mci->csrows[csrow];
686
csr->first_page = last_page + 1;
687
last_page += npages;
688
csr->last_page = last_page;
689
csr->nr_pages = npages;
690
691
csr->page_mask = 0;
692
csr->grain = 8;
693
csr->csrow_idx = csrow;
694
csr->nr_channels = 1;
695
696
csr->channels[0].chan_idx = i;
697
csr->channels[0].ce_count = 0;
698
699
pvt->csrow_map[i][j] = csrow;
700
701
switch (banks) {
702
case 4:
703
csr->dtype = DEV_X4;
704
break;
705
case 8:
706
csr->dtype = DEV_X8;
707
break;
708
case 16:
709
csr->dtype = DEV_X16;
710
break;
711
default:
712
csr->dtype = DEV_UNKNOWN;
713
}
714
715
csr->edac_mode = mode;
716
csr->mtype = mtype;
717
718
csrow++;
719
}
720
721
pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
722
pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
723
pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
724
pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
725
pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
726
pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
727
pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
728
pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
729
debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
730
for (j = 0; j < 8; j++)
731
debugf1("\t\t%#x\t%#x\t%#x\n",
732
(value[j] >> 27) & 0x1,
733
(value[j] >> 24) & 0x7,
734
(value[j] && ((1 << 24) - 1)));
735
}
736
737
return 0;
738
}
739
740
/****************************************************************************
741
Error insertion routines
742
****************************************************************************/
743
744
/* The i7core has independent error injection features per channel.
745
However, to have a simpler code, we don't allow enabling error injection
746
on more than one channel.
747
Also, since a change at an inject parameter will be applied only at enable,
748
we're disabling error injection on all write calls to the sysfs nodes that
749
controls the error code injection.
750
*/
751
static int disable_inject(const struct mem_ctl_info *mci)
752
{
753
struct i7core_pvt *pvt = mci->pvt_info;
754
755
pvt->inject.enable = 0;
756
757
if (!pvt->pci_ch[pvt->inject.channel][0])
758
return -ENODEV;
759
760
pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
761
MC_CHANNEL_ERROR_INJECT, 0);
762
763
return 0;
764
}
765
766
/*
767
* i7core inject inject.section
768
*
769
* accept and store error injection inject.section value
770
* bit 0 - refers to the lower 32-byte half cacheline
771
* bit 1 - refers to the upper 32-byte half cacheline
772
*/
773
static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
774
const char *data, size_t count)
775
{
776
struct i7core_pvt *pvt = mci->pvt_info;
777
unsigned long value;
778
int rc;
779
780
if (pvt->inject.enable)
781
disable_inject(mci);
782
783
rc = strict_strtoul(data, 10, &value);
784
if ((rc < 0) || (value > 3))
785
return -EIO;
786
787
pvt->inject.section = (u32) value;
788
return count;
789
}
790
791
static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
792
char *data)
793
{
794
struct i7core_pvt *pvt = mci->pvt_info;
795
return sprintf(data, "0x%08x\n", pvt->inject.section);
796
}
797
798
/*
799
* i7core inject.type
800
*
801
* accept and store error injection inject.section value
802
* bit 0 - repeat enable - Enable error repetition
803
* bit 1 - inject ECC error
804
* bit 2 - inject parity error
805
*/
806
static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
807
const char *data, size_t count)
808
{
809
struct i7core_pvt *pvt = mci->pvt_info;
810
unsigned long value;
811
int rc;
812
813
if (pvt->inject.enable)
814
disable_inject(mci);
815
816
rc = strict_strtoul(data, 10, &value);
817
if ((rc < 0) || (value > 7))
818
return -EIO;
819
820
pvt->inject.type = (u32) value;
821
return count;
822
}
823
824
static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
825
char *data)
826
{
827
struct i7core_pvt *pvt = mci->pvt_info;
828
return sprintf(data, "0x%08x\n", pvt->inject.type);
829
}
830
831
/*
832
* i7core_inject_inject.eccmask_store
833
*
834
* The type of error (UE/CE) will depend on the inject.eccmask value:
835
* Any bits set to a 1 will flip the corresponding ECC bit
836
* Correctable errors can be injected by flipping 1 bit or the bits within
837
* a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
838
* 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
839
* uncorrectable error to be injected.
840
*/
841
static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
842
const char *data, size_t count)
843
{
844
struct i7core_pvt *pvt = mci->pvt_info;
845
unsigned long value;
846
int rc;
847
848
if (pvt->inject.enable)
849
disable_inject(mci);
850
851
rc = strict_strtoul(data, 10, &value);
852
if (rc < 0)
853
return -EIO;
854
855
pvt->inject.eccmask = (u32) value;
856
return count;
857
}
858
859
static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
860
char *data)
861
{
862
struct i7core_pvt *pvt = mci->pvt_info;
863
return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
864
}
865
866
/*
867
* i7core_addrmatch
868
*
869
* The type of error (UE/CE) will depend on the inject.eccmask value:
870
* Any bits set to a 1 will flip the corresponding ECC bit
871
* Correctable errors can be injected by flipping 1 bit or the bits within
872
* a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
873
* 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
874
* uncorrectable error to be injected.
875
*/
876
877
#define DECLARE_ADDR_MATCH(param, limit) \
878
static ssize_t i7core_inject_store_##param( \
879
struct mem_ctl_info *mci, \
880
const char *data, size_t count) \
881
{ \
882
struct i7core_pvt *pvt; \
883
long value; \
884
int rc; \
885
\
886
debugf1("%s()\n", __func__); \
887
pvt = mci->pvt_info; \
888
\
889
if (pvt->inject.enable) \
890
disable_inject(mci); \
891
\
892
if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
893
value = -1; \
894
else { \
895
rc = strict_strtoul(data, 10, &value); \
896
if ((rc < 0) || (value >= limit)) \
897
return -EIO; \
898
} \
899
\
900
pvt->inject.param = value; \
901
\
902
return count; \
903
} \
904
\
905
static ssize_t i7core_inject_show_##param( \
906
struct mem_ctl_info *mci, \
907
char *data) \
908
{ \
909
struct i7core_pvt *pvt; \
910
\
911
pvt = mci->pvt_info; \
912
debugf1("%s() pvt=%p\n", __func__, pvt); \
913
if (pvt->inject.param < 0) \
914
return sprintf(data, "any\n"); \
915
else \
916
return sprintf(data, "%d\n", pvt->inject.param);\
917
}
918
919
#define ATTR_ADDR_MATCH(param) \
920
{ \
921
.attr = { \
922
.name = #param, \
923
.mode = (S_IRUGO | S_IWUSR) \
924
}, \
925
.show = i7core_inject_show_##param, \
926
.store = i7core_inject_store_##param, \
927
}
928
929
DECLARE_ADDR_MATCH(channel, 3);
930
DECLARE_ADDR_MATCH(dimm, 3);
931
DECLARE_ADDR_MATCH(rank, 4);
932
DECLARE_ADDR_MATCH(bank, 32);
933
DECLARE_ADDR_MATCH(page, 0x10000);
934
DECLARE_ADDR_MATCH(col, 0x4000);
935
936
static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
937
{
938
u32 read;
939
int count;
940
941
debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
942
dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
943
where, val);
944
945
for (count = 0; count < 10; count++) {
946
if (count)
947
msleep(100);
948
pci_write_config_dword(dev, where, val);
949
pci_read_config_dword(dev, where, &read);
950
951
if (read == val)
952
return 0;
953
}
954
955
i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
956
"write=%08x. Read=%08x\n",
957
dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
958
where, val, read);
959
960
return -EINVAL;
961
}
962
963
/*
964
* This routine prepares the Memory Controller for error injection.
965
* The error will be injected when some process tries to write to the
966
* memory that matches the given criteria.
967
* The criteria can be set in terms of a mask where dimm, rank, bank, page
968
* and col can be specified.
969
* A -1 value for any of the mask items will make the MCU to ignore
970
* that matching criteria for error injection.
971
*
972
* It should be noticed that the error will only happen after a write operation
973
* on a memory that matches the condition. if REPEAT_EN is not enabled at
974
* inject mask, then it will produce just one error. Otherwise, it will repeat
975
* until the injectmask would be cleaned.
976
*
977
* FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
978
* is reliable enough to check if the MC is using the
979
* three channels. However, this is not clear at the datasheet.
980
*/
981
static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
982
const char *data, size_t count)
983
{
984
struct i7core_pvt *pvt = mci->pvt_info;
985
u32 injectmask;
986
u64 mask = 0;
987
int rc;
988
long enable;
989
990
if (!pvt->pci_ch[pvt->inject.channel][0])
991
return 0;
992
993
rc = strict_strtoul(data, 10, &enable);
994
if ((rc < 0))
995
return 0;
996
997
if (enable) {
998
pvt->inject.enable = 1;
999
} else {
1000
disable_inject(mci);
1001
return count;
1002
}
1003
1004
/* Sets pvt->inject.dimm mask */
1005
if (pvt->inject.dimm < 0)
1006
mask |= 1LL << 41;
1007
else {
1008
if (pvt->channel[pvt->inject.channel].dimms > 2)
1009
mask |= (pvt->inject.dimm & 0x3LL) << 35;
1010
else
1011
mask |= (pvt->inject.dimm & 0x1LL) << 36;
1012
}
1013
1014
/* Sets pvt->inject.rank mask */
1015
if (pvt->inject.rank < 0)
1016
mask |= 1LL << 40;
1017
else {
1018
if (pvt->channel[pvt->inject.channel].dimms > 2)
1019
mask |= (pvt->inject.rank & 0x1LL) << 34;
1020
else
1021
mask |= (pvt->inject.rank & 0x3LL) << 34;
1022
}
1023
1024
/* Sets pvt->inject.bank mask */
1025
if (pvt->inject.bank < 0)
1026
mask |= 1LL << 39;
1027
else
1028
mask |= (pvt->inject.bank & 0x15LL) << 30;
1029
1030
/* Sets pvt->inject.page mask */
1031
if (pvt->inject.page < 0)
1032
mask |= 1LL << 38;
1033
else
1034
mask |= (pvt->inject.page & 0xffff) << 14;
1035
1036
/* Sets pvt->inject.column mask */
1037
if (pvt->inject.col < 0)
1038
mask |= 1LL << 37;
1039
else
1040
mask |= (pvt->inject.col & 0x3fff);
1041
1042
/*
1043
* bit 0: REPEAT_EN
1044
* bits 1-2: MASK_HALF_CACHELINE
1045
* bit 3: INJECT_ECC
1046
* bit 4: INJECT_ADDR_PARITY
1047
*/
1048
1049
injectmask = (pvt->inject.type & 1) |
1050
(pvt->inject.section & 0x3) << 1 |
1051
(pvt->inject.type & 0x6) << (3 - 1);
1052
1053
/* Unlock writes to registers - this register is write only */
1054
pci_write_config_dword(pvt->pci_noncore,
1055
MC_CFG_CONTROL, 0x2);
1056
1057
write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1058
MC_CHANNEL_ADDR_MATCH, mask);
1059
write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1060
MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1061
1062
write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1063
MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1064
1065
write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1066
MC_CHANNEL_ERROR_INJECT, injectmask);
1067
1068
/*
1069
* This is something undocumented, based on my tests
1070
* Without writing 8 to this register, errors aren't injected. Not sure
1071
* why.
1072
*/
1073
pci_write_config_dword(pvt->pci_noncore,
1074
MC_CFG_CONTROL, 8);
1075
1076
debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1077
" inject 0x%08x\n",
1078
mask, pvt->inject.eccmask, injectmask);
1079
1080
1081
return count;
1082
}
1083
1084
static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1085
char *data)
1086
{
1087
struct i7core_pvt *pvt = mci->pvt_info;
1088
u32 injectmask;
1089
1090
if (!pvt->pci_ch[pvt->inject.channel][0])
1091
return 0;
1092
1093
pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1094
MC_CHANNEL_ERROR_INJECT, &injectmask);
1095
1096
debugf0("Inject error read: 0x%018x\n", injectmask);
1097
1098
if (injectmask & 0x0c)
1099
pvt->inject.enable = 1;
1100
1101
return sprintf(data, "%d\n", pvt->inject.enable);
1102
}
1103
1104
#define DECLARE_COUNTER(param) \
1105
static ssize_t i7core_show_counter_##param( \
1106
struct mem_ctl_info *mci, \
1107
char *data) \
1108
{ \
1109
struct i7core_pvt *pvt = mci->pvt_info; \
1110
\
1111
debugf1("%s() \n", __func__); \
1112
if (!pvt->ce_count_available || (pvt->is_registered)) \
1113
return sprintf(data, "data unavailable\n"); \
1114
return sprintf(data, "%lu\n", \
1115
pvt->udimm_ce_count[param]); \
1116
}
1117
1118
#define ATTR_COUNTER(param) \
1119
{ \
1120
.attr = { \
1121
.name = __stringify(udimm##param), \
1122
.mode = (S_IRUGO | S_IWUSR) \
1123
}, \
1124
.show = i7core_show_counter_##param \
1125
}
1126
1127
DECLARE_COUNTER(0);
1128
DECLARE_COUNTER(1);
1129
DECLARE_COUNTER(2);
1130
1131
/*
1132
* Sysfs struct
1133
*/
1134
1135
static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1136
ATTR_ADDR_MATCH(channel),
1137
ATTR_ADDR_MATCH(dimm),
1138
ATTR_ADDR_MATCH(rank),
1139
ATTR_ADDR_MATCH(bank),
1140
ATTR_ADDR_MATCH(page),
1141
ATTR_ADDR_MATCH(col),
1142
{ } /* End of list */
1143
};
1144
1145
static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1146
.name = "inject_addrmatch",
1147
.mcidev_attr = i7core_addrmatch_attrs,
1148
};
1149
1150
static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1151
ATTR_COUNTER(0),
1152
ATTR_COUNTER(1),
1153
ATTR_COUNTER(2),
1154
{ .attr = { .name = NULL } }
1155
};
1156
1157
static const struct mcidev_sysfs_group i7core_udimm_counters = {
1158
.name = "all_channel_counts",
1159
.mcidev_attr = i7core_udimm_counters_attrs,
1160
};
1161
1162
static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1163
{
1164
.attr = {
1165
.name = "inject_section",
1166
.mode = (S_IRUGO | S_IWUSR)
1167
},
1168
.show = i7core_inject_section_show,
1169
.store = i7core_inject_section_store,
1170
}, {
1171
.attr = {
1172
.name = "inject_type",
1173
.mode = (S_IRUGO | S_IWUSR)
1174
},
1175
.show = i7core_inject_type_show,
1176
.store = i7core_inject_type_store,
1177
}, {
1178
.attr = {
1179
.name = "inject_eccmask",
1180
.mode = (S_IRUGO | S_IWUSR)
1181
},
1182
.show = i7core_inject_eccmask_show,
1183
.store = i7core_inject_eccmask_store,
1184
}, {
1185
.grp = &i7core_inject_addrmatch,
1186
}, {
1187
.attr = {
1188
.name = "inject_enable",
1189
.mode = (S_IRUGO | S_IWUSR)
1190
},
1191
.show = i7core_inject_enable_show,
1192
.store = i7core_inject_enable_store,
1193
},
1194
{ } /* End of list */
1195
};
1196
1197
static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1198
{
1199
.attr = {
1200
.name = "inject_section",
1201
.mode = (S_IRUGO | S_IWUSR)
1202
},
1203
.show = i7core_inject_section_show,
1204
.store = i7core_inject_section_store,
1205
}, {
1206
.attr = {
1207
.name = "inject_type",
1208
.mode = (S_IRUGO | S_IWUSR)
1209
},
1210
.show = i7core_inject_type_show,
1211
.store = i7core_inject_type_store,
1212
}, {
1213
.attr = {
1214
.name = "inject_eccmask",
1215
.mode = (S_IRUGO | S_IWUSR)
1216
},
1217
.show = i7core_inject_eccmask_show,
1218
.store = i7core_inject_eccmask_store,
1219
}, {
1220
.grp = &i7core_inject_addrmatch,
1221
}, {
1222
.attr = {
1223
.name = "inject_enable",
1224
.mode = (S_IRUGO | S_IWUSR)
1225
},
1226
.show = i7core_inject_enable_show,
1227
.store = i7core_inject_enable_store,
1228
}, {
1229
.grp = &i7core_udimm_counters,
1230
},
1231
{ } /* End of list */
1232
};
1233
1234
/****************************************************************************
1235
Device initialization routines: put/get, init/exit
1236
****************************************************************************/
1237
1238
/*
1239
* i7core_put_all_devices 'put' all the devices that we have
1240
* reserved via 'get'
1241
*/
1242
static void i7core_put_devices(struct i7core_dev *i7core_dev)
1243
{
1244
int i;
1245
1246
debugf0(__FILE__ ": %s()\n", __func__);
1247
for (i = 0; i < i7core_dev->n_devs; i++) {
1248
struct pci_dev *pdev = i7core_dev->pdev[i];
1249
if (!pdev)
1250
continue;
1251
debugf0("Removing dev %02x:%02x.%d\n",
1252
pdev->bus->number,
1253
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1254
pci_dev_put(pdev);
1255
}
1256
}
1257
1258
static void i7core_put_all_devices(void)
1259
{
1260
struct i7core_dev *i7core_dev, *tmp;
1261
1262
list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1263
i7core_put_devices(i7core_dev);
1264
free_i7core_dev(i7core_dev);
1265
}
1266
}
1267
1268
static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1269
{
1270
struct pci_dev *pdev = NULL;
1271
int i;
1272
1273
/*
1274
* On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1275
* aren't announced by acpi. So, we need to use a legacy scan probing
1276
* to detect them
1277
*/
1278
while (table && table->descr) {
1279
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1280
if (unlikely(!pdev)) {
1281
for (i = 0; i < MAX_SOCKET_BUSES; i++)
1282
pcibios_scan_specific_bus(255-i);
1283
}
1284
pci_dev_put(pdev);
1285
table++;
1286
}
1287
}
1288
1289
static unsigned i7core_pci_lastbus(void)
1290
{
1291
int last_bus = 0, bus;
1292
struct pci_bus *b = NULL;
1293
1294
while ((b = pci_find_next_bus(b)) != NULL) {
1295
bus = b->number;
1296
debugf0("Found bus %d\n", bus);
1297
if (bus > last_bus)
1298
last_bus = bus;
1299
}
1300
1301
debugf0("Last bus %d\n", last_bus);
1302
1303
return last_bus;
1304
}
1305
1306
/*
1307
* i7core_get_all_devices Find and perform 'get' operation on the MCH's
1308
* device/functions we want to reference for this driver
1309
*
1310
* Need to 'get' device 16 func 1 and func 2
1311
*/
1312
static int i7core_get_onedevice(struct pci_dev **prev,
1313
const struct pci_id_table *table,
1314
const unsigned devno,
1315
const unsigned last_bus)
1316
{
1317
struct i7core_dev *i7core_dev;
1318
const struct pci_id_descr *dev_descr = &table->descr[devno];
1319
1320
struct pci_dev *pdev = NULL;
1321
u8 bus = 0;
1322
u8 socket = 0;
1323
1324
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1325
dev_descr->dev_id, *prev);
1326
1327
if (!pdev) {
1328
if (*prev) {
1329
*prev = pdev;
1330
return 0;
1331
}
1332
1333
if (dev_descr->optional)
1334
return 0;
1335
1336
if (devno == 0)
1337
return -ENODEV;
1338
1339
i7core_printk(KERN_INFO,
1340
"Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1341
dev_descr->dev, dev_descr->func,
1342
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1343
1344
/* End of list, leave */
1345
return -ENODEV;
1346
}
1347
bus = pdev->bus->number;
1348
1349
socket = last_bus - bus;
1350
1351
i7core_dev = get_i7core_dev(socket);
1352
if (!i7core_dev) {
1353
i7core_dev = alloc_i7core_dev(socket, table);
1354
if (!i7core_dev) {
1355
pci_dev_put(pdev);
1356
return -ENOMEM;
1357
}
1358
}
1359
1360
if (i7core_dev->pdev[devno]) {
1361
i7core_printk(KERN_ERR,
1362
"Duplicated device for "
1363
"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1364
bus, dev_descr->dev, dev_descr->func,
1365
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1366
pci_dev_put(pdev);
1367
return -ENODEV;
1368
}
1369
1370
i7core_dev->pdev[devno] = pdev;
1371
1372
/* Sanity check */
1373
if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1374
PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1375
i7core_printk(KERN_ERR,
1376
"Device PCI ID %04x:%04x "
1377
"has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1378
PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1379
bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1380
bus, dev_descr->dev, dev_descr->func);
1381
return -ENODEV;
1382
}
1383
1384
/* Be sure that the device is enabled */
1385
if (unlikely(pci_enable_device(pdev) < 0)) {
1386
i7core_printk(KERN_ERR,
1387
"Couldn't enable "
1388
"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1389
bus, dev_descr->dev, dev_descr->func,
1390
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1391
return -ENODEV;
1392
}
1393
1394
debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1395
socket, bus, dev_descr->dev,
1396
dev_descr->func,
1397
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1398
1399
/*
1400
* As stated on drivers/pci/search.c, the reference count for
1401
* @from is always decremented if it is not %NULL. So, as we need
1402
* to get all devices up to null, we need to do a get for the device
1403
*/
1404
pci_dev_get(pdev);
1405
1406
*prev = pdev;
1407
1408
return 0;
1409
}
1410
1411
static int i7core_get_all_devices(void)
1412
{
1413
int i, rc, last_bus;
1414
struct pci_dev *pdev = NULL;
1415
const struct pci_id_table *table = pci_dev_table;
1416
1417
last_bus = i7core_pci_lastbus();
1418
1419
while (table && table->descr) {
1420
for (i = 0; i < table->n_devs; i++) {
1421
pdev = NULL;
1422
do {
1423
rc = i7core_get_onedevice(&pdev, table, i,
1424
last_bus);
1425
if (rc < 0) {
1426
if (i == 0) {
1427
i = table->n_devs;
1428
break;
1429
}
1430
i7core_put_all_devices();
1431
return -ENODEV;
1432
}
1433
} while (pdev);
1434
}
1435
table++;
1436
}
1437
1438
return 0;
1439
}
1440
1441
static int mci_bind_devs(struct mem_ctl_info *mci,
1442
struct i7core_dev *i7core_dev)
1443
{
1444
struct i7core_pvt *pvt = mci->pvt_info;
1445
struct pci_dev *pdev;
1446
int i, func, slot;
1447
1448
pvt->is_registered = 0;
1449
for (i = 0; i < i7core_dev->n_devs; i++) {
1450
pdev = i7core_dev->pdev[i];
1451
if (!pdev)
1452
continue;
1453
1454
func = PCI_FUNC(pdev->devfn);
1455
slot = PCI_SLOT(pdev->devfn);
1456
if (slot == 3) {
1457
if (unlikely(func > MAX_MCR_FUNC))
1458
goto error;
1459
pvt->pci_mcr[func] = pdev;
1460
} else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1461
if (unlikely(func > MAX_CHAN_FUNC))
1462
goto error;
1463
pvt->pci_ch[slot - 4][func] = pdev;
1464
} else if (!slot && !func)
1465
pvt->pci_noncore = pdev;
1466
else
1467
goto error;
1468
1469
debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1470
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1471
pdev, i7core_dev->socket);
1472
1473
if (PCI_SLOT(pdev->devfn) == 3 &&
1474
PCI_FUNC(pdev->devfn) == 2)
1475
pvt->is_registered = 1;
1476
}
1477
1478
return 0;
1479
1480
error:
1481
i7core_printk(KERN_ERR, "Device %d, function %d "
1482
"is out of the expected range\n",
1483
slot, func);
1484
return -EINVAL;
1485
}
1486
1487
/****************************************************************************
1488
Error check routines
1489
****************************************************************************/
1490
static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1491
const int chan,
1492
const int dimm,
1493
const int add)
1494
{
1495
char *msg;
1496
struct i7core_pvt *pvt = mci->pvt_info;
1497
int row = pvt->csrow_map[chan][dimm], i;
1498
1499
for (i = 0; i < add; i++) {
1500
msg = kasprintf(GFP_KERNEL, "Corrected error "
1501
"(Socket=%d channel=%d dimm=%d)",
1502
pvt->i7core_dev->socket, chan, dimm);
1503
1504
edac_mc_handle_fbd_ce(mci, row, 0, msg);
1505
kfree (msg);
1506
}
1507
}
1508
1509
static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1510
const int chan,
1511
const int new0,
1512
const int new1,
1513
const int new2)
1514
{
1515
struct i7core_pvt *pvt = mci->pvt_info;
1516
int add0 = 0, add1 = 0, add2 = 0;
1517
/* Updates CE counters if it is not the first time here */
1518
if (pvt->ce_count_available) {
1519
/* Updates CE counters */
1520
1521
add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1522
add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1523
add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1524
1525
if (add2 < 0)
1526
add2 += 0x7fff;
1527
pvt->rdimm_ce_count[chan][2] += add2;
1528
1529
if (add1 < 0)
1530
add1 += 0x7fff;
1531
pvt->rdimm_ce_count[chan][1] += add1;
1532
1533
if (add0 < 0)
1534
add0 += 0x7fff;
1535
pvt->rdimm_ce_count[chan][0] += add0;
1536
} else
1537
pvt->ce_count_available = 1;
1538
1539
/* Store the new values */
1540
pvt->rdimm_last_ce_count[chan][2] = new2;
1541
pvt->rdimm_last_ce_count[chan][1] = new1;
1542
pvt->rdimm_last_ce_count[chan][0] = new0;
1543
1544
/*updated the edac core */
1545
if (add0 != 0)
1546
i7core_rdimm_update_csrow(mci, chan, 0, add0);
1547
if (add1 != 0)
1548
i7core_rdimm_update_csrow(mci, chan, 1, add1);
1549
if (add2 != 0)
1550
i7core_rdimm_update_csrow(mci, chan, 2, add2);
1551
1552
}
1553
1554
static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1555
{
1556
struct i7core_pvt *pvt = mci->pvt_info;
1557
u32 rcv[3][2];
1558
int i, new0, new1, new2;
1559
1560
/*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1561
pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1562
&rcv[0][0]);
1563
pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1564
&rcv[0][1]);
1565
pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1566
&rcv[1][0]);
1567
pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1568
&rcv[1][1]);
1569
pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1570
&rcv[2][0]);
1571
pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1572
&rcv[2][1]);
1573
for (i = 0 ; i < 3; i++) {
1574
debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1575
(i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1576
/*if the channel has 3 dimms*/
1577
if (pvt->channel[i].dimms > 2) {
1578
new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1579
new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1580
new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1581
} else {
1582
new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1583
DIMM_BOT_COR_ERR(rcv[i][0]);
1584
new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1585
DIMM_BOT_COR_ERR(rcv[i][1]);
1586
new2 = 0;
1587
}
1588
1589
i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1590
}
1591
}
1592
1593
/* This function is based on the device 3 function 4 registers as described on:
1594
* Intel Xeon Processor 5500 Series Datasheet Volume 2
1595
* http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1596
* also available at:
1597
* http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1598
*/
1599
static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1600
{
1601
struct i7core_pvt *pvt = mci->pvt_info;
1602
u32 rcv1, rcv0;
1603
int new0, new1, new2;
1604
1605
if (!pvt->pci_mcr[4]) {
1606
debugf0("%s MCR registers not found\n", __func__);
1607
return;
1608
}
1609
1610
/* Corrected test errors */
1611
pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1612
pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1613
1614
/* Store the new values */
1615
new2 = DIMM2_COR_ERR(rcv1);
1616
new1 = DIMM1_COR_ERR(rcv0);
1617
new0 = DIMM0_COR_ERR(rcv0);
1618
1619
/* Updates CE counters if it is not the first time here */
1620
if (pvt->ce_count_available) {
1621
/* Updates CE counters */
1622
int add0, add1, add2;
1623
1624
add2 = new2 - pvt->udimm_last_ce_count[2];
1625
add1 = new1 - pvt->udimm_last_ce_count[1];
1626
add0 = new0 - pvt->udimm_last_ce_count[0];
1627
1628
if (add2 < 0)
1629
add2 += 0x7fff;
1630
pvt->udimm_ce_count[2] += add2;
1631
1632
if (add1 < 0)
1633
add1 += 0x7fff;
1634
pvt->udimm_ce_count[1] += add1;
1635
1636
if (add0 < 0)
1637
add0 += 0x7fff;
1638
pvt->udimm_ce_count[0] += add0;
1639
1640
if (add0 | add1 | add2)
1641
i7core_printk(KERN_ERR, "New Corrected error(s): "
1642
"dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1643
add0, add1, add2);
1644
} else
1645
pvt->ce_count_available = 1;
1646
1647
/* Store the new values */
1648
pvt->udimm_last_ce_count[2] = new2;
1649
pvt->udimm_last_ce_count[1] = new1;
1650
pvt->udimm_last_ce_count[0] = new0;
1651
}
1652
1653
/*
1654
* According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1655
* Architectures Software Developer’s Manual Volume 3B.
1656
* Nehalem are defined as family 0x06, model 0x1a
1657
*
1658
* The MCA registers used here are the following ones:
1659
* struct mce field MCA Register
1660
* m->status MSR_IA32_MC8_STATUS
1661
* m->addr MSR_IA32_MC8_ADDR
1662
* m->misc MSR_IA32_MC8_MISC
1663
* In the case of Nehalem, the error information is masked at .status and .misc
1664
* fields
1665
*/
1666
static void i7core_mce_output_error(struct mem_ctl_info *mci,
1667
const struct mce *m)
1668
{
1669
struct i7core_pvt *pvt = mci->pvt_info;
1670
char *type, *optype, *err, *msg;
1671
unsigned long error = m->status & 0x1ff0000l;
1672
u32 optypenum = (m->status >> 4) & 0x07;
1673
u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1674
u32 dimm = (m->misc >> 16) & 0x3;
1675
u32 channel = (m->misc >> 18) & 0x3;
1676
u32 syndrome = m->misc >> 32;
1677
u32 errnum = find_first_bit(&error, 32);
1678
int csrow;
1679
1680
if (m->mcgstatus & 1)
1681
type = "FATAL";
1682
else
1683
type = "NON_FATAL";
1684
1685
switch (optypenum) {
1686
case 0:
1687
optype = "generic undef request";
1688
break;
1689
case 1:
1690
optype = "read error";
1691
break;
1692
case 2:
1693
optype = "write error";
1694
break;
1695
case 3:
1696
optype = "addr/cmd error";
1697
break;
1698
case 4:
1699
optype = "scrubbing error";
1700
break;
1701
default:
1702
optype = "reserved";
1703
break;
1704
}
1705
1706
switch (errnum) {
1707
case 16:
1708
err = "read ECC error";
1709
break;
1710
case 17:
1711
err = "RAS ECC error";
1712
break;
1713
case 18:
1714
err = "write parity error";
1715
break;
1716
case 19:
1717
err = "redundacy loss";
1718
break;
1719
case 20:
1720
err = "reserved";
1721
break;
1722
case 21:
1723
err = "memory range error";
1724
break;
1725
case 22:
1726
err = "RTID out of range";
1727
break;
1728
case 23:
1729
err = "address parity error";
1730
break;
1731
case 24:
1732
err = "byte enable parity error";
1733
break;
1734
default:
1735
err = "unknown";
1736
}
1737
1738
/* FIXME: should convert addr into bank and rank information */
1739
msg = kasprintf(GFP_ATOMIC,
1740
"%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1741
"syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1742
type, (long long) m->addr, m->cpu, dimm, channel,
1743
syndrome, core_err_cnt, (long long)m->status,
1744
(long long)m->misc, optype, err);
1745
1746
debugf0("%s", msg);
1747
1748
csrow = pvt->csrow_map[channel][dimm];
1749
1750
/* Call the helper to output message */
1751
if (m->mcgstatus & 1)
1752
edac_mc_handle_fbd_ue(mci, csrow, 0,
1753
0 /* FIXME: should be channel here */, msg);
1754
else if (!pvt->is_registered)
1755
edac_mc_handle_fbd_ce(mci, csrow,
1756
0 /* FIXME: should be channel here */, msg);
1757
1758
kfree(msg);
1759
}
1760
1761
/*
1762
* i7core_check_error Retrieve and process errors reported by the
1763
* hardware. Called by the Core module.
1764
*/
1765
static void i7core_check_error(struct mem_ctl_info *mci)
1766
{
1767
struct i7core_pvt *pvt = mci->pvt_info;
1768
int i;
1769
unsigned count = 0;
1770
struct mce *m;
1771
1772
/*
1773
* MCE first step: Copy all mce errors into a temporary buffer
1774
* We use a double buffering here, to reduce the risk of
1775
* losing an error.
1776
*/
1777
smp_rmb();
1778
count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1779
% MCE_LOG_LEN;
1780
if (!count)
1781
goto check_ce_error;
1782
1783
m = pvt->mce_outentry;
1784
if (pvt->mce_in + count > MCE_LOG_LEN) {
1785
unsigned l = MCE_LOG_LEN - pvt->mce_in;
1786
1787
memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1788
smp_wmb();
1789
pvt->mce_in = 0;
1790
count -= l;
1791
m += l;
1792
}
1793
memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1794
smp_wmb();
1795
pvt->mce_in += count;
1796
1797
smp_rmb();
1798
if (pvt->mce_overrun) {
1799
i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1800
pvt->mce_overrun);
1801
smp_wmb();
1802
pvt->mce_overrun = 0;
1803
}
1804
1805
/*
1806
* MCE second step: parse errors and display
1807
*/
1808
for (i = 0; i < count; i++)
1809
i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1810
1811
/*
1812
* Now, let's increment CE error counts
1813
*/
1814
check_ce_error:
1815
if (!pvt->is_registered)
1816
i7core_udimm_check_mc_ecc_err(mci);
1817
else
1818
i7core_rdimm_check_mc_ecc_err(mci);
1819
}
1820
1821
/*
1822
* i7core_mce_check_error Replicates mcelog routine to get errors
1823
* This routine simply queues mcelog errors, and
1824
* return. The error itself should be handled later
1825
* by i7core_check_error.
1826
* WARNING: As this routine should be called at NMI time, extra care should
1827
* be taken to avoid deadlocks, and to be as fast as possible.
1828
*/
1829
static int i7core_mce_check_error(void *priv, struct mce *mce)
1830
{
1831
struct mem_ctl_info *mci = priv;
1832
struct i7core_pvt *pvt = mci->pvt_info;
1833
1834
/*
1835
* Just let mcelog handle it if the error is
1836
* outside the memory controller
1837
*/
1838
if (((mce->status & 0xffff) >> 7) != 1)
1839
return 0;
1840
1841
/* Bank 8 registers are the only ones that we know how to handle */
1842
if (mce->bank != 8)
1843
return 0;
1844
1845
#ifdef CONFIG_SMP
1846
/* Only handle if it is the right mc controller */
1847
if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1848
return 0;
1849
#endif
1850
1851
smp_rmb();
1852
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1853
smp_wmb();
1854
pvt->mce_overrun++;
1855
return 0;
1856
}
1857
1858
/* Copy memory error at the ringbuffer */
1859
memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1860
smp_wmb();
1861
pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1862
1863
/* Handle fatal errors immediately */
1864
if (mce->mcgstatus & 1)
1865
i7core_check_error(mci);
1866
1867
/* Advise mcelog that the errors were handled */
1868
return 1;
1869
}
1870
1871
static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
1872
{
1873
pvt->i7core_pci = edac_pci_create_generic_ctl(
1874
&pvt->i7core_dev->pdev[0]->dev,
1875
EDAC_MOD_STR);
1876
if (unlikely(!pvt->i7core_pci))
1877
pr_warn("Unable to setup PCI error report via EDAC\n");
1878
}
1879
1880
static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
1881
{
1882
if (likely(pvt->i7core_pci))
1883
edac_pci_release_generic_ctl(pvt->i7core_pci);
1884
else
1885
i7core_printk(KERN_ERR,
1886
"Couldn't find mem_ctl_info for socket %d\n",
1887
pvt->i7core_dev->socket);
1888
pvt->i7core_pci = NULL;
1889
}
1890
1891
static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
1892
{
1893
struct mem_ctl_info *mci = i7core_dev->mci;
1894
struct i7core_pvt *pvt;
1895
1896
if (unlikely(!mci || !mci->pvt_info)) {
1897
debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
1898
__func__, &i7core_dev->pdev[0]->dev);
1899
1900
i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
1901
return;
1902
}
1903
1904
pvt = mci->pvt_info;
1905
1906
debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1907
__func__, mci, &i7core_dev->pdev[0]->dev);
1908
1909
/* Disable MCE NMI handler */
1910
edac_mce_unregister(&pvt->edac_mce);
1911
1912
/* Disable EDAC polling */
1913
i7core_pci_ctl_release(pvt);
1914
1915
/* Remove MC sysfs nodes */
1916
edac_mc_del_mc(mci->dev);
1917
1918
debugf1("%s: free mci struct\n", mci->ctl_name);
1919
kfree(mci->ctl_name);
1920
edac_mc_free(mci);
1921
i7core_dev->mci = NULL;
1922
}
1923
1924
static int i7core_register_mci(struct i7core_dev *i7core_dev)
1925
{
1926
struct mem_ctl_info *mci;
1927
struct i7core_pvt *pvt;
1928
int rc, channels, csrows;
1929
1930
/* Check the number of active and not disabled channels */
1931
rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
1932
if (unlikely(rc < 0))
1933
return rc;
1934
1935
/* allocate a new MC control structure */
1936
mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
1937
if (unlikely(!mci))
1938
return -ENOMEM;
1939
1940
debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1941
__func__, mci, &i7core_dev->pdev[0]->dev);
1942
1943
pvt = mci->pvt_info;
1944
memset(pvt, 0, sizeof(*pvt));
1945
1946
/* Associates i7core_dev and mci for future usage */
1947
pvt->i7core_dev = i7core_dev;
1948
i7core_dev->mci = mci;
1949
1950
/*
1951
* FIXME: how to handle RDDR3 at MCI level? It is possible to have
1952
* Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1953
* memory channels
1954
*/
1955
mci->mtype_cap = MEM_FLAG_DDR3;
1956
mci->edac_ctl_cap = EDAC_FLAG_NONE;
1957
mci->edac_cap = EDAC_FLAG_NONE;
1958
mci->mod_name = "i7core_edac.c";
1959
mci->mod_ver = I7CORE_REVISION;
1960
mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1961
i7core_dev->socket);
1962
mci->dev_name = pci_name(i7core_dev->pdev[0]);
1963
mci->ctl_page_to_phys = NULL;
1964
1965
/* Store pci devices at mci for faster access */
1966
rc = mci_bind_devs(mci, i7core_dev);
1967
if (unlikely(rc < 0))
1968
goto fail0;
1969
1970
if (pvt->is_registered)
1971
mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
1972
else
1973
mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
1974
1975
/* Get dimm basic config */
1976
get_dimm_config(mci);
1977
/* record ptr to the generic device */
1978
mci->dev = &i7core_dev->pdev[0]->dev;
1979
/* Set the function pointer to an actual operation function */
1980
mci->edac_check = i7core_check_error;
1981
1982
/* add this new MC control structure to EDAC's list of MCs */
1983
if (unlikely(edac_mc_add_mc(mci))) {
1984
debugf0("MC: " __FILE__
1985
": %s(): failed edac_mc_add_mc()\n", __func__);
1986
/* FIXME: perhaps some code should go here that disables error
1987
* reporting if we just enabled it
1988
*/
1989
1990
rc = -EINVAL;
1991
goto fail0;
1992
}
1993
1994
/* Default error mask is any memory */
1995
pvt->inject.channel = 0;
1996
pvt->inject.dimm = -1;
1997
pvt->inject.rank = -1;
1998
pvt->inject.bank = -1;
1999
pvt->inject.page = -1;
2000
pvt->inject.col = -1;
2001
2002
/* allocating generic PCI control info */
2003
i7core_pci_ctl_create(pvt);
2004
2005
/* Registers on edac_mce in order to receive memory errors */
2006
pvt->edac_mce.priv = mci;
2007
pvt->edac_mce.check_error = i7core_mce_check_error;
2008
rc = edac_mce_register(&pvt->edac_mce);
2009
if (unlikely(rc < 0)) {
2010
debugf0("MC: " __FILE__
2011
": %s(): failed edac_mce_register()\n", __func__);
2012
goto fail1;
2013
}
2014
2015
return 0;
2016
2017
fail1:
2018
i7core_pci_ctl_release(pvt);
2019
edac_mc_del_mc(mci->dev);
2020
fail0:
2021
kfree(mci->ctl_name);
2022
edac_mc_free(mci);
2023
i7core_dev->mci = NULL;
2024
return rc;
2025
}
2026
2027
/*
2028
* i7core_probe Probe for ONE instance of device to see if it is
2029
* present.
2030
* return:
2031
* 0 for FOUND a device
2032
* < 0 for error code
2033
*/
2034
2035
static int __devinit i7core_probe(struct pci_dev *pdev,
2036
const struct pci_device_id *id)
2037
{
2038
int rc;
2039
struct i7core_dev *i7core_dev;
2040
2041
/* get the pci devices we want to reserve for our use */
2042
mutex_lock(&i7core_edac_lock);
2043
2044
/*
2045
* All memory controllers are allocated at the first pass.
2046
*/
2047
if (unlikely(probed >= 1)) {
2048
mutex_unlock(&i7core_edac_lock);
2049
return -ENODEV;
2050
}
2051
probed++;
2052
2053
rc = i7core_get_all_devices();
2054
if (unlikely(rc < 0))
2055
goto fail0;
2056
2057
list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2058
rc = i7core_register_mci(i7core_dev);
2059
if (unlikely(rc < 0))
2060
goto fail1;
2061
}
2062
2063
i7core_printk(KERN_INFO, "Driver loaded.\n");
2064
2065
mutex_unlock(&i7core_edac_lock);
2066
return 0;
2067
2068
fail1:
2069
list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2070
i7core_unregister_mci(i7core_dev);
2071
2072
i7core_put_all_devices();
2073
fail0:
2074
mutex_unlock(&i7core_edac_lock);
2075
return rc;
2076
}
2077
2078
/*
2079
* i7core_remove destructor for one instance of device
2080
*
2081
*/
2082
static void __devexit i7core_remove(struct pci_dev *pdev)
2083
{
2084
struct i7core_dev *i7core_dev;
2085
2086
debugf0(__FILE__ ": %s()\n", __func__);
2087
2088
/*
2089
* we have a trouble here: pdev value for removal will be wrong, since
2090
* it will point to the X58 register used to detect that the machine
2091
* is a Nehalem or upper design. However, due to the way several PCI
2092
* devices are grouped together to provide MC functionality, we need
2093
* to use a different method for releasing the devices
2094
*/
2095
2096
mutex_lock(&i7core_edac_lock);
2097
2098
if (unlikely(!probed)) {
2099
mutex_unlock(&i7core_edac_lock);
2100
return;
2101
}
2102
2103
list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2104
i7core_unregister_mci(i7core_dev);
2105
2106
/* Release PCI resources */
2107
i7core_put_all_devices();
2108
2109
probed--;
2110
2111
mutex_unlock(&i7core_edac_lock);
2112
}
2113
2114
MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2115
2116
/*
2117
* i7core_driver pci_driver structure for this module
2118
*
2119
*/
2120
static struct pci_driver i7core_driver = {
2121
.name = "i7core_edac",
2122
.probe = i7core_probe,
2123
.remove = __devexit_p(i7core_remove),
2124
.id_table = i7core_pci_tbl,
2125
};
2126
2127
/*
2128
* i7core_init Module entry function
2129
* Try to initialize this module for its devices
2130
*/
2131
static int __init i7core_init(void)
2132
{
2133
int pci_rc;
2134
2135
debugf2("MC: " __FILE__ ": %s()\n", __func__);
2136
2137
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
2138
opstate_init();
2139
2140
if (use_pci_fixup)
2141
i7core_xeon_pci_fixup(pci_dev_table);
2142
2143
pci_rc = pci_register_driver(&i7core_driver);
2144
2145
if (pci_rc >= 0)
2146
return 0;
2147
2148
i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2149
pci_rc);
2150
2151
return pci_rc;
2152
}
2153
2154
/*
2155
* i7core_exit() Module exit function
2156
* Unregister the driver
2157
*/
2158
static void __exit i7core_exit(void)
2159
{
2160
debugf2("MC: " __FILE__ ": %s()\n", __func__);
2161
pci_unregister_driver(&i7core_driver);
2162
}
2163
2164
module_init(i7core_init);
2165
module_exit(i7core_exit);
2166
2167
MODULE_LICENSE("GPL");
2168
MODULE_AUTHOR("Mauro Carvalho Chehab <[email protected]>");
2169
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2170
MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2171
I7CORE_REVISION);
2172
2173
module_param(edac_op_state, int, 0444);
2174
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2175
2176