CoCalc -- edac

GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/edac/edac_mc.c
¹⁵¹¹² views
1
/*
2
 * edac_mc kernel module
3
 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4
 * This file may be distributed under the terms of the
5
 * GNU General Public License.
6
 *
7
 * Written by Thayne Harbaugh
8
 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9
 *	http://www.anime.net/~goemon/linux-ecc/
10
 *
11
 * Modified by Dave Peterson and Doug Thompson
12
 *
13
 */
14

15
#include <linux/module.h>
16
#include <linux/proc_fs.h>
17
#include <linux/kernel.h>
18
#include <linux/types.h>
19
#include <linux/smp.h>
20
#include <linux/init.h>
21
#include <linux/sysctl.h>
22
#include <linux/highmem.h>
23
#include <linux/timer.h>
24
#include <linux/slab.h>
25
#include <linux/jiffies.h>
26
#include <linux/spinlock.h>
27
#include <linux/list.h>
28
#include <linux/sysdev.h>
29
#include <linux/ctype.h>
30
#include <linux/edac.h>
31
#include <asm/uaccess.h>
32
#include <asm/page.h>
33
#include <asm/edac.h>
34
#include "edac_core.h"
35
#include "edac_module.h"
36

37
/* lock to memory controller's control array */
38
static DEFINE_MUTEX(mem_ctls_mutex);
39
static LIST_HEAD(mc_devices);
40

41
#ifdef CONFIG_EDAC_DEBUG
42

43
static void edac_mc_dump_channel(struct channel_info *chan)
44
{
45
	debugf4("\tchannel = %p\n", chan);
46
	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
47
	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
48
	debugf4("\tchannel->label = '%s'\n", chan->label);
49
	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
50
}
51

52
static void edac_mc_dump_csrow(struct csrow_info *csrow)
53
{
54
	debugf4("\tcsrow = %p\n", csrow);
55
	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56
	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
57
	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58
	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59
	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60
	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
61
	debugf4("\tcsrow->channels = %p\n", csrow->channels);
62
	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
63
}
64

65
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
66
{
67
	debugf3("\tmci = %p\n", mci);
68
	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
69
	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
70
	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
71
	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
72
	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
73
		mci->nr_csrows, mci->csrows);
74
	debugf3("\tdev = %p\n", mci->dev);
75
	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
76
	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
77
}
78

79
#endif				/* CONFIG_EDAC_DEBUG */
80

81
/*
82
 * keep those in sync with the enum mem_type
83
 */
84
const char *edac_mem_types[] = {
85
	"Empty csrow",
86
	"Reserved csrow type",
87
	"Unknown csrow type",
88
	"Fast page mode RAM",
89
	"Extended data out RAM",
90
	"Burst Extended data out RAM",
91
	"Single data rate SDRAM",
92
	"Registered single data rate SDRAM",
93
	"Double data rate SDRAM",
94
	"Registered Double data rate SDRAM",
95
	"Rambus DRAM",
96
	"Unbuffered DDR2 RAM",
97
	"Fully buffered DDR2",
98
	"Registered DDR2 RAM",
99
	"Rambus XDR",
100
	"Unbuffered DDR3 RAM",
101
	"Registered DDR3 RAM",
102
};
103
EXPORT_SYMBOL_GPL(edac_mem_types);
104

105
/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
106
 * Adjust 'ptr' so that its alignment is at least as stringent as what the
107
 * compiler would provide for X and return the aligned result.
108
 *
109
 * If 'size' is a constant, the compiler will optimize this whole function
110
 * down to either a no-op or the addition of a constant to the value of 'ptr'.
111
 */
112
void *edac_align_ptr(void *ptr, unsigned size)
113
{
114
	unsigned align, r;
115

116
	/* Here we assume that the alignment of a "long long" is the most
117
	 * stringent alignment that the compiler will ever provide by default.
118
	 * As far as I know, this is a reasonable assumption.
119
	 */
120
	if (size > sizeof(long))
121
		align = sizeof(long long);
122
	else if (size > sizeof(int))
123
		align = sizeof(long);
124
	else if (size > sizeof(short))
125
		align = sizeof(int);
126
	else if (size > sizeof(char))
127
		align = sizeof(short);
128
	else
129
		return (char *)ptr;
130

131
	r = size % align;
132

133
	if (r == 0)
134
		return (char *)ptr;
135

136
	return (void *)(((unsigned long)ptr) + align - r);
137
}
138

139
/**
140
 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
141
 * @size_pvt:	size of private storage needed
142
 * @nr_csrows:	Number of CWROWS needed for this MC
143
 * @nr_chans:	Number of channels for the MC
144
 *
145
 * Everything is kmalloc'ed as one big chunk - more efficient.
146
 * Only can be used if all structures have the same lifetime - otherwise
147
 * you have to allocate and initialize your own structures.
148
 *
149
 * Use edac_mc_free() to free mc structures allocated by this function.
150
 *
151
 * Returns:
152
 *	NULL allocation failed
153
 *	struct mem_ctl_info pointer
154
 */
155
struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
156
				unsigned nr_chans, int edac_index)
157
{
158
	struct mem_ctl_info *mci;
159
	struct csrow_info *csi, *csrow;
160
	struct channel_info *chi, *chp, *chan;
161
	void *pvt;
162
	unsigned size;
163
	int row, chn;
164
	int err;
165

166
	/* Figure out the offsets of the various items from the start of an mc
167
	 * structure.  We want the alignment of each item to be at least as
168
	 * stringent as what the compiler would provide if we could simply
169
	 * hardcode everything into a single struct.
170
	 */
171
	mci = (struct mem_ctl_info *)0;
172
	csi = edac_align_ptr(&mci[1], sizeof(*csi));
173
	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
174
	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
175
	size = ((unsigned long)pvt) + sz_pvt;
176

177
	mci = kzalloc(size, GFP_KERNEL);
178
	if (mci == NULL)
179
		return NULL;
180

181
	/* Adjust pointers so they point within the memory we just allocated
182
	 * rather than an imaginary chunk of memory located at address 0.
183
	 */
184
	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
185
	chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
186
	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
187

188
	/* setup index and various internal pointers */
189
	mci->mc_idx = edac_index;
190
	mci->csrows = csi;
191
	mci->pvt_info = pvt;
192
	mci->nr_csrows = nr_csrows;
193

194
	for (row = 0; row < nr_csrows; row++) {
195
		csrow = &csi[row];
196
		csrow->csrow_idx = row;
197
		csrow->mci = mci;
198
		csrow->nr_channels = nr_chans;
199
		chp = &chi[row * nr_chans];
200
		csrow->channels = chp;
201

202
		for (chn = 0; chn < nr_chans; chn++) {
203
			chan = &chp[chn];
204
			chan->chan_idx = chn;
205
			chan->csrow = csrow;
206
		}
207
	}
208

209
	mci->op_state = OP_ALLOC;
210
	INIT_LIST_HEAD(&mci->grp_kobj_list);
211

212
	/*
213
	 * Initialize the 'root' kobj for the edac_mc controller
214
	 */
215
	err = edac_mc_register_sysfs_main_kobj(mci);
216
	if (err) {
217
		kfree(mci);
218
		return NULL;
219
	}
220

221
	/* at this point, the root kobj is valid, and in order to
222
	 * 'free' the object, then the function:
223
	 *      edac_mc_unregister_sysfs_main_kobj() must be called
224
	 * which will perform kobj unregistration and the actual free
225
	 * will occur during the kobject callback operation
226
	 */
227
	return mci;
228
}
229
EXPORT_SYMBOL_GPL(edac_mc_alloc);
230

231
/**
232
 * edac_mc_free
233
 *	'Free' a previously allocated 'mci' structure
234
 * @mci: pointer to a struct mem_ctl_info structure
235
 */
236
void edac_mc_free(struct mem_ctl_info *mci)
237
{
238
	debugf1("%s()\n", __func__);
239

240
	edac_mc_unregister_sysfs_main_kobj(mci);
241

242
	/* free the mci instance memory here */
243
	kfree(mci);
244
}
245
EXPORT_SYMBOL_GPL(edac_mc_free);
246

247

248
/**
249
 * find_mci_by_dev
250
 *
251
 *	scan list of controllers looking for the one that manages
252
 *	the 'dev' device
253
 * @dev: pointer to a struct device related with the MCI
254
 */
255
struct mem_ctl_info *find_mci_by_dev(struct device *dev)
256
{
257
	struct mem_ctl_info *mci;
258
	struct list_head *item;
259

260
	debugf3("%s()\n", __func__);
261

262
	list_for_each(item, &mc_devices) {
263
		mci = list_entry(item, struct mem_ctl_info, link);
264

265
		if (mci->dev == dev)
266
			return mci;
267
	}
268

269
	return NULL;
270
}
271
EXPORT_SYMBOL_GPL(find_mci_by_dev);
272

273
/*
274
 * handler for EDAC to check if NMI type handler has asserted interrupt
275
 */
276
static int edac_mc_assert_error_check_and_clear(void)
277
{
278
	int old_state;
279

280
	if (edac_op_state == EDAC_OPSTATE_POLL)
281
		return 1;
282

283
	old_state = edac_err_assert;
284
	edac_err_assert = 0;
285

286
	return old_state;
287
}
288

289
/*
290
 * edac_mc_workq_function
291
 *	performs the operation scheduled by a workq request
292
 */
293
static void edac_mc_workq_function(struct work_struct *work_req)
294
{
295
	struct delayed_work *d_work = to_delayed_work(work_req);
296
	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
297

298
	mutex_lock(&mem_ctls_mutex);
299

300
	/* if this control struct has movd to offline state, we are done */
301
	if (mci->op_state == OP_OFFLINE) {
302
		mutex_unlock(&mem_ctls_mutex);
303
		return;
304
	}
305

306
	/* Only poll controllers that are running polled and have a check */
307
	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
308
		mci->edac_check(mci);
309

310
	mutex_unlock(&mem_ctls_mutex);
311

312
	/* Reschedule */
313
	queue_delayed_work(edac_workqueue, &mci->work,
314
			msecs_to_jiffies(edac_mc_get_poll_msec()));
315
}
316

317
/*
318
 * edac_mc_workq_setup
319
 *	initialize a workq item for this mci
320
 *	passing in the new delay period in msec
321
 *
322
 *	locking model:
323
 *
324
 *		called with the mem_ctls_mutex held
325
 */
326
static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
327
{
328
	debugf0("%s()\n", __func__);
329

330
	/* if this instance is not in the POLL state, then simply return */
331
	if (mci->op_state != OP_RUNNING_POLL)
332
		return;
333

334
	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
335
	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
336
}
337

338
/*
339
 * edac_mc_workq_teardown
340
 *	stop the workq processing on this mci
341
 *
342
 *	locking model:
343
 *
344
 *		called WITHOUT lock held
345
 */
346
static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
347
{
348
	int status;
349

350
	if (mci->op_state != OP_RUNNING_POLL)
351
		return;
352

353
	status = cancel_delayed_work(&mci->work);
354
	if (status == 0) {
355
		debugf0("%s() not canceled, flush the queue\n",
356
			__func__);
357

358
		/* workq instance might be running, wait for it */
359
		flush_workqueue(edac_workqueue);
360
	}
361
}
362

363
/*
364
 * edac_mc_reset_delay_period(unsigned long value)
365
 *
366
 *	user space has updated our poll period value, need to
367
 *	reset our workq delays
368
 */
369
void edac_mc_reset_delay_period(int value)
370
{
371
	struct mem_ctl_info *mci;
372
	struct list_head *item;
373

374
	mutex_lock(&mem_ctls_mutex);
375

376
	/* scan the list and turn off all workq timers, doing so under lock
377
	 */
378
	list_for_each(item, &mc_devices) {
379
		mci = list_entry(item, struct mem_ctl_info, link);
380

381
		if (mci->op_state == OP_RUNNING_POLL)
382
			cancel_delayed_work(&mci->work);
383
	}
384

385
	mutex_unlock(&mem_ctls_mutex);
386

387

388
	/* re-walk the list, and reset the poll delay */
389
	mutex_lock(&mem_ctls_mutex);
390

391
	list_for_each(item, &mc_devices) {
392
		mci = list_entry(item, struct mem_ctl_info, link);
393

394
		edac_mc_workq_setup(mci, (unsigned long) value);
395
	}
396

397
	mutex_unlock(&mem_ctls_mutex);
398
}
399

400

401

402
/* Return 0 on success, 1 on failure.
403
 * Before calling this function, caller must
404
 * assign a unique value to mci->mc_idx.
405
 *
406
 *	locking model:
407
 *
408
 *		called with the mem_ctls_mutex lock held
409
 */
410
static int add_mc_to_global_list(struct mem_ctl_info *mci)
411
{
412
	struct list_head *item, *insert_before;
413
	struct mem_ctl_info *p;
414

415
	insert_before = &mc_devices;
416

417
	p = find_mci_by_dev(mci->dev);
418
	if (unlikely(p != NULL))
419
		goto fail0;
420

421
	list_for_each(item, &mc_devices) {
422
		p = list_entry(item, struct mem_ctl_info, link);
423

424
		if (p->mc_idx >= mci->mc_idx) {
425
			if (unlikely(p->mc_idx == mci->mc_idx))
426
				goto fail1;
427

428
			insert_before = item;
429
			break;
430
		}
431
	}
432

433
	list_add_tail_rcu(&mci->link, insert_before);
434
	atomic_inc(&edac_handlers);
435
	return 0;
436

437
fail0:
438
	edac_printk(KERN_WARNING, EDAC_MC,
439
		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
440
		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
441
	return 1;
442

443
fail1:
444
	edac_printk(KERN_WARNING, EDAC_MC,
445
		"bug in low-level driver: attempt to assign\n"
446
		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
447
	return 1;
448
}
449

450
static void del_mc_from_global_list(struct mem_ctl_info *mci)
451
{
452
	atomic_dec(&edac_handlers);
453
	list_del_rcu(&mci->link);
454

455
	/* these are for safe removal of devices from global list while
456
	 * NMI handlers may be traversing list
457
	 */
458
	synchronize_rcu();
459
	INIT_LIST_HEAD(&mci->link);
460
}
461

462
/**
463
 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
464
 *
465
 * If found, return a pointer to the structure.
466
 * Else return NULL.
467
 *
468
 * Caller must hold mem_ctls_mutex.
469
 */
470
struct mem_ctl_info *edac_mc_find(int idx)
471
{
472
	struct list_head *item;
473
	struct mem_ctl_info *mci;
474

475
	list_for_each(item, &mc_devices) {
476
		mci = list_entry(item, struct mem_ctl_info, link);
477

478
		if (mci->mc_idx >= idx) {
479
			if (mci->mc_idx == idx)
480
				return mci;
481

482
			break;
483
		}
484
	}
485

486
	return NULL;
487
}
488
EXPORT_SYMBOL(edac_mc_find);
489

490
/**
491
 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
492
 *                 create sysfs entries associated with mci structure
493
 * @mci: pointer to the mci structure to be added to the list
494
 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
495
 *
496
 * Return:
497
 *	0	Success
498
 *	!0	Failure
499
 */
500

501
/* FIXME - should a warning be printed if no error detection? correction? */
502
int edac_mc_add_mc(struct mem_ctl_info *mci)
503
{
504
	debugf0("%s()\n", __func__);
505

506
#ifdef CONFIG_EDAC_DEBUG
507
	if (edac_debug_level >= 3)
508
		edac_mc_dump_mci(mci);
509

510
	if (edac_debug_level >= 4) {
511
		int i;
512

513
		for (i = 0; i < mci->nr_csrows; i++) {
514
			int j;
515

516
			edac_mc_dump_csrow(&mci->csrows[i]);
517
			for (j = 0; j < mci->csrows[i].nr_channels; j++)
518
				edac_mc_dump_channel(&mci->csrows[i].
519
						channels[j]);
520
		}
521
	}
522
#endif
523
	mutex_lock(&mem_ctls_mutex);
524

525
	if (add_mc_to_global_list(mci))
526
		goto fail0;
527

528
	/* set load time so that error rate can be tracked */
529
	mci->start_time = jiffies;
530

531
	if (edac_create_sysfs_mci_device(mci)) {
532
		edac_mc_printk(mci, KERN_WARNING,
533
			"failed to create sysfs device\n");
534
		goto fail1;
535
	}
536

537
	/* If there IS a check routine, then we are running POLLED */
538
	if (mci->edac_check != NULL) {
539
		/* This instance is NOW RUNNING */
540
		mci->op_state = OP_RUNNING_POLL;
541

542
		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
543
	} else {
544
		mci->op_state = OP_RUNNING_INTERRUPT;
545
	}
546

547
	/* Report action taken */
548
	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
549
		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
550

551
	mutex_unlock(&mem_ctls_mutex);
552
	return 0;
553

554
fail1:
555
	del_mc_from_global_list(mci);
556

557
fail0:
558
	mutex_unlock(&mem_ctls_mutex);
559
	return 1;
560
}
561
EXPORT_SYMBOL_GPL(edac_mc_add_mc);
562

563
/**
564
 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
565
 *                 remove mci structure from global list
566
 * @pdev: Pointer to 'struct device' representing mci structure to remove.
567
 *
568
 * Return pointer to removed mci structure, or NULL if device not found.
569
 */
570
struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
571
{
572
	struct mem_ctl_info *mci;
573

574
	debugf0("%s()\n", __func__);
575

576
	mutex_lock(&mem_ctls_mutex);
577

578
	/* find the requested mci struct in the global list */
579
	mci = find_mci_by_dev(dev);
580
	if (mci == NULL) {
581
		mutex_unlock(&mem_ctls_mutex);
582
		return NULL;
583
	}
584

585
	del_mc_from_global_list(mci);
586
	mutex_unlock(&mem_ctls_mutex);
587

588
	/* flush workq processes */
589
	edac_mc_workq_teardown(mci);
590

591
	/* marking MCI offline */
592
	mci->op_state = OP_OFFLINE;
593

594
	/* remove from sysfs */
595
	edac_remove_sysfs_mci_device(mci);
596

597
	edac_printk(KERN_INFO, EDAC_MC,
598
		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
599
		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
600

601
	return mci;
602
}
603
EXPORT_SYMBOL_GPL(edac_mc_del_mc);
604

605
static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
606
				u32 size)
607
{
608
	struct page *pg;
609
	void *virt_addr;
610
	unsigned long flags = 0;
611

612
	debugf3("%s()\n", __func__);
613

614
	/* ECC error page was not in our memory. Ignore it. */
615
	if (!pfn_valid(page))
616
		return;
617

618
	/* Find the actual page structure then map it and fix */
619
	pg = pfn_to_page(page);
620

621
	if (PageHighMem(pg))
622
		local_irq_save(flags);
623

624
	virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
625

626
	/* Perform architecture specific atomic scrub operation */
627
	atomic_scrub(virt_addr + offset, size);
628

629
	/* Unmap and complete */
630
	kunmap_atomic(virt_addr, KM_BOUNCE_READ);
631

632
	if (PageHighMem(pg))
633
		local_irq_restore(flags);
634
}
635

636
/* FIXME - should return -1 */
637
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
638
{
639
	struct csrow_info *csrows = mci->csrows;
640
	int row, i;
641

642
	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
643
	row = -1;
644

645
	for (i = 0; i < mci->nr_csrows; i++) {
646
		struct csrow_info *csrow = &csrows[i];
647

648
		if (csrow->nr_pages == 0)
649
			continue;
650

651
		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
652
			"mask(0x%lx)\n", mci->mc_idx, __func__,
653
			csrow->first_page, page, csrow->last_page,
654
			csrow->page_mask);
655

656
		if ((page >= csrow->first_page) &&
657
		    (page <= csrow->last_page) &&
658
		    ((page & csrow->page_mask) ==
659
		     (csrow->first_page & csrow->page_mask))) {
660
			row = i;
661
			break;
662
		}
663
	}
664

665
	if (row == -1)
666
		edac_mc_printk(mci, KERN_ERR,
667
			"could not look up page error address %lx\n",
668
			(unsigned long)page);
669

670
	return row;
671
}
672
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
673

674
/* FIXME - setable log (warning/emerg) levels */
675
/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
676
void edac_mc_handle_ce(struct mem_ctl_info *mci,
677
		unsigned long page_frame_number,
678
		unsigned long offset_in_page, unsigned long syndrome,
679
		int row, int channel, const char *msg)
680
{
681
	unsigned long remapped_page;
682

683
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
684

685
	/* FIXME - maybe make panic on INTERNAL ERROR an option */
686
	if (row >= mci->nr_csrows || row < 0) {
687
		/* something is wrong */
688
		edac_mc_printk(mci, KERN_ERR,
689
			"INTERNAL ERROR: row out of range "
690
			"(%d >= %d)\n", row, mci->nr_csrows);
691
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
692
		return;
693
	}
694

695
	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
696
		/* something is wrong */
697
		edac_mc_printk(mci, KERN_ERR,
698
			"INTERNAL ERROR: channel out of range "
699
			"(%d >= %d)\n", channel,
700
			mci->csrows[row].nr_channels);
701
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
702
		return;
703
	}
704

705
	if (edac_mc_get_log_ce())
706
		/* FIXME - put in DIMM location */
707
		edac_mc_printk(mci, KERN_WARNING,
708
			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
709
			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
710
			page_frame_number, offset_in_page,
711
			mci->csrows[row].grain, syndrome, row, channel,
712
			mci->csrows[row].channels[channel].label, msg);
713

714
	mci->ce_count++;
715
	mci->csrows[row].ce_count++;
716
	mci->csrows[row].channels[channel].ce_count++;
717

718
	if (mci->scrub_mode & SCRUB_SW_SRC) {
719
		/*
720
		 * Some MC's can remap memory so that it is still available
721
		 * at a different address when PCI devices map into memory.
722
		 * MC's that can't do this lose the memory where PCI devices
723
		 * are mapped.  This mapping is MC dependent and so we call
724
		 * back into the MC driver for it to map the MC page to
725
		 * a physical (CPU) page which can then be mapped to a virtual
726
		 * page - which can then be scrubbed.
727
		 */
728
		remapped_page = mci->ctl_page_to_phys ?
729
			mci->ctl_page_to_phys(mci, page_frame_number) :
730
			page_frame_number;
731

732
		edac_mc_scrub_block(remapped_page, offset_in_page,
733
				mci->csrows[row].grain);
734
	}
735
}
736
EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
737

738
void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
739
{
740
	if (edac_mc_get_log_ce())
741
		edac_mc_printk(mci, KERN_WARNING,
742
			"CE - no information available: %s\n", msg);
743

744
	mci->ce_noinfo_count++;
745
	mci->ce_count++;
746
}
747
EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
748

749
void edac_mc_handle_ue(struct mem_ctl_info *mci,
750
		unsigned long page_frame_number,
751
		unsigned long offset_in_page, int row, const char *msg)
752
{
753
	int len = EDAC_MC_LABEL_LEN * 4;
754
	char labels[len + 1];
755
	char *pos = labels;
756
	int chan;
757
	int chars;
758

759
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
760

761
	/* FIXME - maybe make panic on INTERNAL ERROR an option */
762
	if (row >= mci->nr_csrows || row < 0) {
763
		/* something is wrong */
764
		edac_mc_printk(mci, KERN_ERR,
765
			"INTERNAL ERROR: row out of range "
766
			"(%d >= %d)\n", row, mci->nr_csrows);
767
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
768
		return;
769
	}
770

771
	chars = snprintf(pos, len + 1, "%s",
772
			 mci->csrows[row].channels[0].label);
773
	len -= chars;
774
	pos += chars;
775

776
	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
777
		chan++) {
778
		chars = snprintf(pos, len + 1, ":%s",
779
				 mci->csrows[row].channels[chan].label);
780
		len -= chars;
781
		pos += chars;
782
	}
783

784
	if (edac_mc_get_log_ue())
785
		edac_mc_printk(mci, KERN_EMERG,
786
			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
787
			"labels \"%s\": %s\n", page_frame_number,
788
			offset_in_page, mci->csrows[row].grain, row,
789
			labels, msg);
790

791
	if (edac_mc_get_panic_on_ue())
792
		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
793
			"row %d, labels \"%s\": %s\n", mci->mc_idx,
794
			page_frame_number, offset_in_page,
795
			mci->csrows[row].grain, row, labels, msg);
796

797
	mci->ue_count++;
798
	mci->csrows[row].ue_count++;
799
}
800
EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
801

802
void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
803
{
804
	if (edac_mc_get_panic_on_ue())
805
		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
806

807
	if (edac_mc_get_log_ue())
808
		edac_mc_printk(mci, KERN_WARNING,
809
			"UE - no information available: %s\n", msg);
810
	mci->ue_noinfo_count++;
811
	mci->ue_count++;
812
}
813
EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
814

815
/*************************************************************
816
 * On Fully Buffered DIMM modules, this help function is
817
 * called to process UE events
818
 */
819
void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
820
			unsigned int csrow,
821
			unsigned int channela,
822
			unsigned int channelb, char *msg)
823
{
824
	int len = EDAC_MC_LABEL_LEN * 4;
825
	char labels[len + 1];
826
	char *pos = labels;
827
	int chars;
828

829
	if (csrow >= mci->nr_csrows) {
830
		/* something is wrong */
831
		edac_mc_printk(mci, KERN_ERR,
832
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
833
			csrow, mci->nr_csrows);
834
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
835
		return;
836
	}
837

838
	if (channela >= mci->csrows[csrow].nr_channels) {
839
		/* something is wrong */
840
		edac_mc_printk(mci, KERN_ERR,
841
			"INTERNAL ERROR: channel-a out of range "
842
			"(%d >= %d)\n",
843
			channela, mci->csrows[csrow].nr_channels);
844
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
845
		return;
846
	}
847

848
	if (channelb >= mci->csrows[csrow].nr_channels) {
849
		/* something is wrong */
850
		edac_mc_printk(mci, KERN_ERR,
851
			"INTERNAL ERROR: channel-b out of range "
852
			"(%d >= %d)\n",
853
			channelb, mci->csrows[csrow].nr_channels);
854
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
855
		return;
856
	}
857

858
	mci->ue_count++;
859
	mci->csrows[csrow].ue_count++;
860

861
	/* Generate the DIMM labels from the specified channels */
862
	chars = snprintf(pos, len + 1, "%s",
863
			 mci->csrows[csrow].channels[channela].label);
864
	len -= chars;
865
	pos += chars;
866
	chars = snprintf(pos, len + 1, "-%s",
867
			 mci->csrows[csrow].channels[channelb].label);
868

869
	if (edac_mc_get_log_ue())
870
		edac_mc_printk(mci, KERN_EMERG,
871
			"UE row %d, channel-a= %d channel-b= %d "
872
			"labels \"%s\": %s\n", csrow, channela, channelb,
873
			labels, msg);
874

875
	if (edac_mc_get_panic_on_ue())
876
		panic("UE row %d, channel-a= %d channel-b= %d "
877
			"labels \"%s\": %s\n", csrow, channela,
878
			channelb, labels, msg);
879
}
880
EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
881

882
/*************************************************************
883
 * On Fully Buffered DIMM modules, this help function is
884
 * called to process CE events
885
 */
886
void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
887
			unsigned int csrow, unsigned int channel, char *msg)
888
{
889

890
	/* Ensure boundary values */
891
	if (csrow >= mci->nr_csrows) {
892
		/* something is wrong */
893
		edac_mc_printk(mci, KERN_ERR,
894
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
895
			csrow, mci->nr_csrows);
896
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
897
		return;
898
	}
899
	if (channel >= mci->csrows[csrow].nr_channels) {
900
		/* something is wrong */
901
		edac_mc_printk(mci, KERN_ERR,
902
			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
903
			channel, mci->csrows[csrow].nr_channels);
904
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
905
		return;
906
	}
907

908
	if (edac_mc_get_log_ce())
909
		/* FIXME - put in DIMM location */
910
		edac_mc_printk(mci, KERN_WARNING,
911
			"CE row %d, channel %d, label \"%s\": %s\n",
912
			csrow, channel,
913
			mci->csrows[csrow].channels[channel].label, msg);
914

915
	mci->ce_count++;
916
	mci->csrows[csrow].ce_count++;
917
	mci->csrows[csrow].channels[channel].ce_count++;
918
}
919
EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
920

921
Product

Resources

Company