Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/edac/edac_mc.c
15112 views
1
/*
2
* edac_mc kernel module
3
* (C) 2005, 2006 Linux Networx (http://lnxi.com)
4
* This file may be distributed under the terms of the
5
* GNU General Public License.
6
*
7
* Written by Thayne Harbaugh
8
* Based on work by Dan Hollis <goemon at anime dot net> and others.
9
* http://www.anime.net/~goemon/linux-ecc/
10
*
11
* Modified by Dave Peterson and Doug Thompson
12
*
13
*/
14
15
#include <linux/module.h>
16
#include <linux/proc_fs.h>
17
#include <linux/kernel.h>
18
#include <linux/types.h>
19
#include <linux/smp.h>
20
#include <linux/init.h>
21
#include <linux/sysctl.h>
22
#include <linux/highmem.h>
23
#include <linux/timer.h>
24
#include <linux/slab.h>
25
#include <linux/jiffies.h>
26
#include <linux/spinlock.h>
27
#include <linux/list.h>
28
#include <linux/sysdev.h>
29
#include <linux/ctype.h>
30
#include <linux/edac.h>
31
#include <asm/uaccess.h>
32
#include <asm/page.h>
33
#include <asm/edac.h>
34
#include "edac_core.h"
35
#include "edac_module.h"
36
37
/* lock to memory controller's control array */
38
static DEFINE_MUTEX(mem_ctls_mutex);
39
static LIST_HEAD(mc_devices);
40
41
#ifdef CONFIG_EDAC_DEBUG
42
43
static void edac_mc_dump_channel(struct channel_info *chan)
44
{
45
debugf4("\tchannel = %p\n", chan);
46
debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
47
debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
48
debugf4("\tchannel->label = '%s'\n", chan->label);
49
debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
50
}
51
52
static void edac_mc_dump_csrow(struct csrow_info *csrow)
53
{
54
debugf4("\tcsrow = %p\n", csrow);
55
debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56
debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
57
debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58
debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59
debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60
debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
61
debugf4("\tcsrow->channels = %p\n", csrow->channels);
62
debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
63
}
64
65
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
66
{
67
debugf3("\tmci = %p\n", mci);
68
debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
69
debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
70
debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
71
debugf4("\tmci->edac_check = %p\n", mci->edac_check);
72
debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
73
mci->nr_csrows, mci->csrows);
74
debugf3("\tdev = %p\n", mci->dev);
75
debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
76
debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
77
}
78
79
#endif /* CONFIG_EDAC_DEBUG */
80
81
/*
82
* keep those in sync with the enum mem_type
83
*/
84
const char *edac_mem_types[] = {
85
"Empty csrow",
86
"Reserved csrow type",
87
"Unknown csrow type",
88
"Fast page mode RAM",
89
"Extended data out RAM",
90
"Burst Extended data out RAM",
91
"Single data rate SDRAM",
92
"Registered single data rate SDRAM",
93
"Double data rate SDRAM",
94
"Registered Double data rate SDRAM",
95
"Rambus DRAM",
96
"Unbuffered DDR2 RAM",
97
"Fully buffered DDR2",
98
"Registered DDR2 RAM",
99
"Rambus XDR",
100
"Unbuffered DDR3 RAM",
101
"Registered DDR3 RAM",
102
};
103
EXPORT_SYMBOL_GPL(edac_mem_types);
104
105
/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
106
* Adjust 'ptr' so that its alignment is at least as stringent as what the
107
* compiler would provide for X and return the aligned result.
108
*
109
* If 'size' is a constant, the compiler will optimize this whole function
110
* down to either a no-op or the addition of a constant to the value of 'ptr'.
111
*/
112
void *edac_align_ptr(void *ptr, unsigned size)
113
{
114
unsigned align, r;
115
116
/* Here we assume that the alignment of a "long long" is the most
117
* stringent alignment that the compiler will ever provide by default.
118
* As far as I know, this is a reasonable assumption.
119
*/
120
if (size > sizeof(long))
121
align = sizeof(long long);
122
else if (size > sizeof(int))
123
align = sizeof(long);
124
else if (size > sizeof(short))
125
align = sizeof(int);
126
else if (size > sizeof(char))
127
align = sizeof(short);
128
else
129
return (char *)ptr;
130
131
r = size % align;
132
133
if (r == 0)
134
return (char *)ptr;
135
136
return (void *)(((unsigned long)ptr) + align - r);
137
}
138
139
/**
140
* edac_mc_alloc: Allocate a struct mem_ctl_info structure
141
* @size_pvt: size of private storage needed
142
* @nr_csrows: Number of CWROWS needed for this MC
143
* @nr_chans: Number of channels for the MC
144
*
145
* Everything is kmalloc'ed as one big chunk - more efficient.
146
* Only can be used if all structures have the same lifetime - otherwise
147
* you have to allocate and initialize your own structures.
148
*
149
* Use edac_mc_free() to free mc structures allocated by this function.
150
*
151
* Returns:
152
* NULL allocation failed
153
* struct mem_ctl_info pointer
154
*/
155
struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
156
unsigned nr_chans, int edac_index)
157
{
158
struct mem_ctl_info *mci;
159
struct csrow_info *csi, *csrow;
160
struct channel_info *chi, *chp, *chan;
161
void *pvt;
162
unsigned size;
163
int row, chn;
164
int err;
165
166
/* Figure out the offsets of the various items from the start of an mc
167
* structure. We want the alignment of each item to be at least as
168
* stringent as what the compiler would provide if we could simply
169
* hardcode everything into a single struct.
170
*/
171
mci = (struct mem_ctl_info *)0;
172
csi = edac_align_ptr(&mci[1], sizeof(*csi));
173
chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
174
pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
175
size = ((unsigned long)pvt) + sz_pvt;
176
177
mci = kzalloc(size, GFP_KERNEL);
178
if (mci == NULL)
179
return NULL;
180
181
/* Adjust pointers so they point within the memory we just allocated
182
* rather than an imaginary chunk of memory located at address 0.
183
*/
184
csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
185
chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
186
pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
187
188
/* setup index and various internal pointers */
189
mci->mc_idx = edac_index;
190
mci->csrows = csi;
191
mci->pvt_info = pvt;
192
mci->nr_csrows = nr_csrows;
193
194
for (row = 0; row < nr_csrows; row++) {
195
csrow = &csi[row];
196
csrow->csrow_idx = row;
197
csrow->mci = mci;
198
csrow->nr_channels = nr_chans;
199
chp = &chi[row * nr_chans];
200
csrow->channels = chp;
201
202
for (chn = 0; chn < nr_chans; chn++) {
203
chan = &chp[chn];
204
chan->chan_idx = chn;
205
chan->csrow = csrow;
206
}
207
}
208
209
mci->op_state = OP_ALLOC;
210
INIT_LIST_HEAD(&mci->grp_kobj_list);
211
212
/*
213
* Initialize the 'root' kobj for the edac_mc controller
214
*/
215
err = edac_mc_register_sysfs_main_kobj(mci);
216
if (err) {
217
kfree(mci);
218
return NULL;
219
}
220
221
/* at this point, the root kobj is valid, and in order to
222
* 'free' the object, then the function:
223
* edac_mc_unregister_sysfs_main_kobj() must be called
224
* which will perform kobj unregistration and the actual free
225
* will occur during the kobject callback operation
226
*/
227
return mci;
228
}
229
EXPORT_SYMBOL_GPL(edac_mc_alloc);
230
231
/**
232
* edac_mc_free
233
* 'Free' a previously allocated 'mci' structure
234
* @mci: pointer to a struct mem_ctl_info structure
235
*/
236
void edac_mc_free(struct mem_ctl_info *mci)
237
{
238
debugf1("%s()\n", __func__);
239
240
edac_mc_unregister_sysfs_main_kobj(mci);
241
242
/* free the mci instance memory here */
243
kfree(mci);
244
}
245
EXPORT_SYMBOL_GPL(edac_mc_free);
246
247
248
/**
249
* find_mci_by_dev
250
*
251
* scan list of controllers looking for the one that manages
252
* the 'dev' device
253
* @dev: pointer to a struct device related with the MCI
254
*/
255
struct mem_ctl_info *find_mci_by_dev(struct device *dev)
256
{
257
struct mem_ctl_info *mci;
258
struct list_head *item;
259
260
debugf3("%s()\n", __func__);
261
262
list_for_each(item, &mc_devices) {
263
mci = list_entry(item, struct mem_ctl_info, link);
264
265
if (mci->dev == dev)
266
return mci;
267
}
268
269
return NULL;
270
}
271
EXPORT_SYMBOL_GPL(find_mci_by_dev);
272
273
/*
274
* handler for EDAC to check if NMI type handler has asserted interrupt
275
*/
276
static int edac_mc_assert_error_check_and_clear(void)
277
{
278
int old_state;
279
280
if (edac_op_state == EDAC_OPSTATE_POLL)
281
return 1;
282
283
old_state = edac_err_assert;
284
edac_err_assert = 0;
285
286
return old_state;
287
}
288
289
/*
290
* edac_mc_workq_function
291
* performs the operation scheduled by a workq request
292
*/
293
static void edac_mc_workq_function(struct work_struct *work_req)
294
{
295
struct delayed_work *d_work = to_delayed_work(work_req);
296
struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
297
298
mutex_lock(&mem_ctls_mutex);
299
300
/* if this control struct has movd to offline state, we are done */
301
if (mci->op_state == OP_OFFLINE) {
302
mutex_unlock(&mem_ctls_mutex);
303
return;
304
}
305
306
/* Only poll controllers that are running polled and have a check */
307
if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
308
mci->edac_check(mci);
309
310
mutex_unlock(&mem_ctls_mutex);
311
312
/* Reschedule */
313
queue_delayed_work(edac_workqueue, &mci->work,
314
msecs_to_jiffies(edac_mc_get_poll_msec()));
315
}
316
317
/*
318
* edac_mc_workq_setup
319
* initialize a workq item for this mci
320
* passing in the new delay period in msec
321
*
322
* locking model:
323
*
324
* called with the mem_ctls_mutex held
325
*/
326
static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
327
{
328
debugf0("%s()\n", __func__);
329
330
/* if this instance is not in the POLL state, then simply return */
331
if (mci->op_state != OP_RUNNING_POLL)
332
return;
333
334
INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
335
queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
336
}
337
338
/*
339
* edac_mc_workq_teardown
340
* stop the workq processing on this mci
341
*
342
* locking model:
343
*
344
* called WITHOUT lock held
345
*/
346
static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
347
{
348
int status;
349
350
if (mci->op_state != OP_RUNNING_POLL)
351
return;
352
353
status = cancel_delayed_work(&mci->work);
354
if (status == 0) {
355
debugf0("%s() not canceled, flush the queue\n",
356
__func__);
357
358
/* workq instance might be running, wait for it */
359
flush_workqueue(edac_workqueue);
360
}
361
}
362
363
/*
364
* edac_mc_reset_delay_period(unsigned long value)
365
*
366
* user space has updated our poll period value, need to
367
* reset our workq delays
368
*/
369
void edac_mc_reset_delay_period(int value)
370
{
371
struct mem_ctl_info *mci;
372
struct list_head *item;
373
374
mutex_lock(&mem_ctls_mutex);
375
376
/* scan the list and turn off all workq timers, doing so under lock
377
*/
378
list_for_each(item, &mc_devices) {
379
mci = list_entry(item, struct mem_ctl_info, link);
380
381
if (mci->op_state == OP_RUNNING_POLL)
382
cancel_delayed_work(&mci->work);
383
}
384
385
mutex_unlock(&mem_ctls_mutex);
386
387
388
/* re-walk the list, and reset the poll delay */
389
mutex_lock(&mem_ctls_mutex);
390
391
list_for_each(item, &mc_devices) {
392
mci = list_entry(item, struct mem_ctl_info, link);
393
394
edac_mc_workq_setup(mci, (unsigned long) value);
395
}
396
397
mutex_unlock(&mem_ctls_mutex);
398
}
399
400
401
402
/* Return 0 on success, 1 on failure.
403
* Before calling this function, caller must
404
* assign a unique value to mci->mc_idx.
405
*
406
* locking model:
407
*
408
* called with the mem_ctls_mutex lock held
409
*/
410
static int add_mc_to_global_list(struct mem_ctl_info *mci)
411
{
412
struct list_head *item, *insert_before;
413
struct mem_ctl_info *p;
414
415
insert_before = &mc_devices;
416
417
p = find_mci_by_dev(mci->dev);
418
if (unlikely(p != NULL))
419
goto fail0;
420
421
list_for_each(item, &mc_devices) {
422
p = list_entry(item, struct mem_ctl_info, link);
423
424
if (p->mc_idx >= mci->mc_idx) {
425
if (unlikely(p->mc_idx == mci->mc_idx))
426
goto fail1;
427
428
insert_before = item;
429
break;
430
}
431
}
432
433
list_add_tail_rcu(&mci->link, insert_before);
434
atomic_inc(&edac_handlers);
435
return 0;
436
437
fail0:
438
edac_printk(KERN_WARNING, EDAC_MC,
439
"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
440
edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
441
return 1;
442
443
fail1:
444
edac_printk(KERN_WARNING, EDAC_MC,
445
"bug in low-level driver: attempt to assign\n"
446
" duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
447
return 1;
448
}
449
450
static void del_mc_from_global_list(struct mem_ctl_info *mci)
451
{
452
atomic_dec(&edac_handlers);
453
list_del_rcu(&mci->link);
454
455
/* these are for safe removal of devices from global list while
456
* NMI handlers may be traversing list
457
*/
458
synchronize_rcu();
459
INIT_LIST_HEAD(&mci->link);
460
}
461
462
/**
463
* edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
464
*
465
* If found, return a pointer to the structure.
466
* Else return NULL.
467
*
468
* Caller must hold mem_ctls_mutex.
469
*/
470
struct mem_ctl_info *edac_mc_find(int idx)
471
{
472
struct list_head *item;
473
struct mem_ctl_info *mci;
474
475
list_for_each(item, &mc_devices) {
476
mci = list_entry(item, struct mem_ctl_info, link);
477
478
if (mci->mc_idx >= idx) {
479
if (mci->mc_idx == idx)
480
return mci;
481
482
break;
483
}
484
}
485
486
return NULL;
487
}
488
EXPORT_SYMBOL(edac_mc_find);
489
490
/**
491
* edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
492
* create sysfs entries associated with mci structure
493
* @mci: pointer to the mci structure to be added to the list
494
* @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
495
*
496
* Return:
497
* 0 Success
498
* !0 Failure
499
*/
500
501
/* FIXME - should a warning be printed if no error detection? correction? */
502
int edac_mc_add_mc(struct mem_ctl_info *mci)
503
{
504
debugf0("%s()\n", __func__);
505
506
#ifdef CONFIG_EDAC_DEBUG
507
if (edac_debug_level >= 3)
508
edac_mc_dump_mci(mci);
509
510
if (edac_debug_level >= 4) {
511
int i;
512
513
for (i = 0; i < mci->nr_csrows; i++) {
514
int j;
515
516
edac_mc_dump_csrow(&mci->csrows[i]);
517
for (j = 0; j < mci->csrows[i].nr_channels; j++)
518
edac_mc_dump_channel(&mci->csrows[i].
519
channels[j]);
520
}
521
}
522
#endif
523
mutex_lock(&mem_ctls_mutex);
524
525
if (add_mc_to_global_list(mci))
526
goto fail0;
527
528
/* set load time so that error rate can be tracked */
529
mci->start_time = jiffies;
530
531
if (edac_create_sysfs_mci_device(mci)) {
532
edac_mc_printk(mci, KERN_WARNING,
533
"failed to create sysfs device\n");
534
goto fail1;
535
}
536
537
/* If there IS a check routine, then we are running POLLED */
538
if (mci->edac_check != NULL) {
539
/* This instance is NOW RUNNING */
540
mci->op_state = OP_RUNNING_POLL;
541
542
edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
543
} else {
544
mci->op_state = OP_RUNNING_INTERRUPT;
545
}
546
547
/* Report action taken */
548
edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
549
" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
550
551
mutex_unlock(&mem_ctls_mutex);
552
return 0;
553
554
fail1:
555
del_mc_from_global_list(mci);
556
557
fail0:
558
mutex_unlock(&mem_ctls_mutex);
559
return 1;
560
}
561
EXPORT_SYMBOL_GPL(edac_mc_add_mc);
562
563
/**
564
* edac_mc_del_mc: Remove sysfs entries for specified mci structure and
565
* remove mci structure from global list
566
* @pdev: Pointer to 'struct device' representing mci structure to remove.
567
*
568
* Return pointer to removed mci structure, or NULL if device not found.
569
*/
570
struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
571
{
572
struct mem_ctl_info *mci;
573
574
debugf0("%s()\n", __func__);
575
576
mutex_lock(&mem_ctls_mutex);
577
578
/* find the requested mci struct in the global list */
579
mci = find_mci_by_dev(dev);
580
if (mci == NULL) {
581
mutex_unlock(&mem_ctls_mutex);
582
return NULL;
583
}
584
585
del_mc_from_global_list(mci);
586
mutex_unlock(&mem_ctls_mutex);
587
588
/* flush workq processes */
589
edac_mc_workq_teardown(mci);
590
591
/* marking MCI offline */
592
mci->op_state = OP_OFFLINE;
593
594
/* remove from sysfs */
595
edac_remove_sysfs_mci_device(mci);
596
597
edac_printk(KERN_INFO, EDAC_MC,
598
"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
599
mci->mod_name, mci->ctl_name, edac_dev_name(mci));
600
601
return mci;
602
}
603
EXPORT_SYMBOL_GPL(edac_mc_del_mc);
604
605
static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
606
u32 size)
607
{
608
struct page *pg;
609
void *virt_addr;
610
unsigned long flags = 0;
611
612
debugf3("%s()\n", __func__);
613
614
/* ECC error page was not in our memory. Ignore it. */
615
if (!pfn_valid(page))
616
return;
617
618
/* Find the actual page structure then map it and fix */
619
pg = pfn_to_page(page);
620
621
if (PageHighMem(pg))
622
local_irq_save(flags);
623
624
virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
625
626
/* Perform architecture specific atomic scrub operation */
627
atomic_scrub(virt_addr + offset, size);
628
629
/* Unmap and complete */
630
kunmap_atomic(virt_addr, KM_BOUNCE_READ);
631
632
if (PageHighMem(pg))
633
local_irq_restore(flags);
634
}
635
636
/* FIXME - should return -1 */
637
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
638
{
639
struct csrow_info *csrows = mci->csrows;
640
int row, i;
641
642
debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
643
row = -1;
644
645
for (i = 0; i < mci->nr_csrows; i++) {
646
struct csrow_info *csrow = &csrows[i];
647
648
if (csrow->nr_pages == 0)
649
continue;
650
651
debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
652
"mask(0x%lx)\n", mci->mc_idx, __func__,
653
csrow->first_page, page, csrow->last_page,
654
csrow->page_mask);
655
656
if ((page >= csrow->first_page) &&
657
(page <= csrow->last_page) &&
658
((page & csrow->page_mask) ==
659
(csrow->first_page & csrow->page_mask))) {
660
row = i;
661
break;
662
}
663
}
664
665
if (row == -1)
666
edac_mc_printk(mci, KERN_ERR,
667
"could not look up page error address %lx\n",
668
(unsigned long)page);
669
670
return row;
671
}
672
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
673
674
/* FIXME - setable log (warning/emerg) levels */
675
/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
676
void edac_mc_handle_ce(struct mem_ctl_info *mci,
677
unsigned long page_frame_number,
678
unsigned long offset_in_page, unsigned long syndrome,
679
int row, int channel, const char *msg)
680
{
681
unsigned long remapped_page;
682
683
debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
684
685
/* FIXME - maybe make panic on INTERNAL ERROR an option */
686
if (row >= mci->nr_csrows || row < 0) {
687
/* something is wrong */
688
edac_mc_printk(mci, KERN_ERR,
689
"INTERNAL ERROR: row out of range "
690
"(%d >= %d)\n", row, mci->nr_csrows);
691
edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
692
return;
693
}
694
695
if (channel >= mci->csrows[row].nr_channels || channel < 0) {
696
/* something is wrong */
697
edac_mc_printk(mci, KERN_ERR,
698
"INTERNAL ERROR: channel out of range "
699
"(%d >= %d)\n", channel,
700
mci->csrows[row].nr_channels);
701
edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
702
return;
703
}
704
705
if (edac_mc_get_log_ce())
706
/* FIXME - put in DIMM location */
707
edac_mc_printk(mci, KERN_WARNING,
708
"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
709
"0x%lx, row %d, channel %d, label \"%s\": %s\n",
710
page_frame_number, offset_in_page,
711
mci->csrows[row].grain, syndrome, row, channel,
712
mci->csrows[row].channels[channel].label, msg);
713
714
mci->ce_count++;
715
mci->csrows[row].ce_count++;
716
mci->csrows[row].channels[channel].ce_count++;
717
718
if (mci->scrub_mode & SCRUB_SW_SRC) {
719
/*
720
* Some MC's can remap memory so that it is still available
721
* at a different address when PCI devices map into memory.
722
* MC's that can't do this lose the memory where PCI devices
723
* are mapped. This mapping is MC dependent and so we call
724
* back into the MC driver for it to map the MC page to
725
* a physical (CPU) page which can then be mapped to a virtual
726
* page - which can then be scrubbed.
727
*/
728
remapped_page = mci->ctl_page_to_phys ?
729
mci->ctl_page_to_phys(mci, page_frame_number) :
730
page_frame_number;
731
732
edac_mc_scrub_block(remapped_page, offset_in_page,
733
mci->csrows[row].grain);
734
}
735
}
736
EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
737
738
void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
739
{
740
if (edac_mc_get_log_ce())
741
edac_mc_printk(mci, KERN_WARNING,
742
"CE - no information available: %s\n", msg);
743
744
mci->ce_noinfo_count++;
745
mci->ce_count++;
746
}
747
EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
748
749
void edac_mc_handle_ue(struct mem_ctl_info *mci,
750
unsigned long page_frame_number,
751
unsigned long offset_in_page, int row, const char *msg)
752
{
753
int len = EDAC_MC_LABEL_LEN * 4;
754
char labels[len + 1];
755
char *pos = labels;
756
int chan;
757
int chars;
758
759
debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
760
761
/* FIXME - maybe make panic on INTERNAL ERROR an option */
762
if (row >= mci->nr_csrows || row < 0) {
763
/* something is wrong */
764
edac_mc_printk(mci, KERN_ERR,
765
"INTERNAL ERROR: row out of range "
766
"(%d >= %d)\n", row, mci->nr_csrows);
767
edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
768
return;
769
}
770
771
chars = snprintf(pos, len + 1, "%s",
772
mci->csrows[row].channels[0].label);
773
len -= chars;
774
pos += chars;
775
776
for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
777
chan++) {
778
chars = snprintf(pos, len + 1, ":%s",
779
mci->csrows[row].channels[chan].label);
780
len -= chars;
781
pos += chars;
782
}
783
784
if (edac_mc_get_log_ue())
785
edac_mc_printk(mci, KERN_EMERG,
786
"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
787
"labels \"%s\": %s\n", page_frame_number,
788
offset_in_page, mci->csrows[row].grain, row,
789
labels, msg);
790
791
if (edac_mc_get_panic_on_ue())
792
panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
793
"row %d, labels \"%s\": %s\n", mci->mc_idx,
794
page_frame_number, offset_in_page,
795
mci->csrows[row].grain, row, labels, msg);
796
797
mci->ue_count++;
798
mci->csrows[row].ue_count++;
799
}
800
EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
801
802
void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
803
{
804
if (edac_mc_get_panic_on_ue())
805
panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
806
807
if (edac_mc_get_log_ue())
808
edac_mc_printk(mci, KERN_WARNING,
809
"UE - no information available: %s\n", msg);
810
mci->ue_noinfo_count++;
811
mci->ue_count++;
812
}
813
EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
814
815
/*************************************************************
816
* On Fully Buffered DIMM modules, this help function is
817
* called to process UE events
818
*/
819
void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
820
unsigned int csrow,
821
unsigned int channela,
822
unsigned int channelb, char *msg)
823
{
824
int len = EDAC_MC_LABEL_LEN * 4;
825
char labels[len + 1];
826
char *pos = labels;
827
int chars;
828
829
if (csrow >= mci->nr_csrows) {
830
/* something is wrong */
831
edac_mc_printk(mci, KERN_ERR,
832
"INTERNAL ERROR: row out of range (%d >= %d)\n",
833
csrow, mci->nr_csrows);
834
edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
835
return;
836
}
837
838
if (channela >= mci->csrows[csrow].nr_channels) {
839
/* something is wrong */
840
edac_mc_printk(mci, KERN_ERR,
841
"INTERNAL ERROR: channel-a out of range "
842
"(%d >= %d)\n",
843
channela, mci->csrows[csrow].nr_channels);
844
edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
845
return;
846
}
847
848
if (channelb >= mci->csrows[csrow].nr_channels) {
849
/* something is wrong */
850
edac_mc_printk(mci, KERN_ERR,
851
"INTERNAL ERROR: channel-b out of range "
852
"(%d >= %d)\n",
853
channelb, mci->csrows[csrow].nr_channels);
854
edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
855
return;
856
}
857
858
mci->ue_count++;
859
mci->csrows[csrow].ue_count++;
860
861
/* Generate the DIMM labels from the specified channels */
862
chars = snprintf(pos, len + 1, "%s",
863
mci->csrows[csrow].channels[channela].label);
864
len -= chars;
865
pos += chars;
866
chars = snprintf(pos, len + 1, "-%s",
867
mci->csrows[csrow].channels[channelb].label);
868
869
if (edac_mc_get_log_ue())
870
edac_mc_printk(mci, KERN_EMERG,
871
"UE row %d, channel-a= %d channel-b= %d "
872
"labels \"%s\": %s\n", csrow, channela, channelb,
873
labels, msg);
874
875
if (edac_mc_get_panic_on_ue())
876
panic("UE row %d, channel-a= %d channel-b= %d "
877
"labels \"%s\": %s\n", csrow, channela,
878
channelb, labels, msg);
879
}
880
EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
881
882
/*************************************************************
883
* On Fully Buffered DIMM modules, this help function is
884
* called to process CE events
885
*/
886
void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
887
unsigned int csrow, unsigned int channel, char *msg)
888
{
889
890
/* Ensure boundary values */
891
if (csrow >= mci->nr_csrows) {
892
/* something is wrong */
893
edac_mc_printk(mci, KERN_ERR,
894
"INTERNAL ERROR: row out of range (%d >= %d)\n",
895
csrow, mci->nr_csrows);
896
edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
897
return;
898
}
899
if (channel >= mci->csrows[csrow].nr_channels) {
900
/* something is wrong */
901
edac_mc_printk(mci, KERN_ERR,
902
"INTERNAL ERROR: channel out of range (%d >= %d)\n",
903
channel, mci->csrows[csrow].nr_channels);
904
edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
905
return;
906
}
907
908
if (edac_mc_get_log_ce())
909
/* FIXME - put in DIMM location */
910
edac_mc_printk(mci, KERN_WARNING,
911
"CE row %d, channel %d, label \"%s\": %s\n",
912
csrow, channel,
913
mci->csrows[csrow].channels[channel].label, msg);
914
915
mci->ce_count++;
916
mci->csrows[csrow].ce_count++;
917
mci->csrows[csrow].channels[channel].ce_count++;
918
}
919
EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
920
921