Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/edac/al_mc_edac.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4
*/
5
#include <linux/bitfield.h>
6
#include <linux/bitops.h>
7
#include <linux/edac.h>
8
#include <linux/of_irq.h>
9
#include <linux/platform_device.h>
10
#include <linux/spinlock.h>
11
#include "edac_module.h"
12
13
/* Registers Offset */
14
#define AL_MC_ECC_CFG 0x70
15
#define AL_MC_ECC_CLEAR 0x7c
16
#define AL_MC_ECC_ERR_COUNT 0x80
17
#define AL_MC_ECC_CE_ADDR0 0x84
18
#define AL_MC_ECC_CE_ADDR1 0x88
19
#define AL_MC_ECC_UE_ADDR0 0xa4
20
#define AL_MC_ECC_UE_ADDR1 0xa8
21
#define AL_MC_ECC_CE_SYND0 0x8c
22
#define AL_MC_ECC_CE_SYND1 0x90
23
#define AL_MC_ECC_CE_SYND2 0x94
24
#define AL_MC_ECC_UE_SYND0 0xac
25
#define AL_MC_ECC_UE_SYND1 0xb0
26
#define AL_MC_ECC_UE_SYND2 0xb4
27
28
/* Registers Fields */
29
#define AL_MC_ECC_CFG_SCRUB_DISABLED BIT(4)
30
31
#define AL_MC_ECC_CLEAR_UE_COUNT BIT(3)
32
#define AL_MC_ECC_CLEAR_CE_COUNT BIT(2)
33
#define AL_MC_ECC_CLEAR_UE_ERR BIT(1)
34
#define AL_MC_ECC_CLEAR_CE_ERR BIT(0)
35
36
#define AL_MC_ECC_ERR_COUNT_UE GENMASK(31, 16)
37
#define AL_MC_ECC_ERR_COUNT_CE GENMASK(15, 0)
38
39
#define AL_MC_ECC_CE_ADDR0_RANK GENMASK(25, 24)
40
#define AL_MC_ECC_CE_ADDR0_ROW GENMASK(17, 0)
41
42
#define AL_MC_ECC_CE_ADDR1_BG GENMASK(25, 24)
43
#define AL_MC_ECC_CE_ADDR1_BANK GENMASK(18, 16)
44
#define AL_MC_ECC_CE_ADDR1_COLUMN GENMASK(11, 0)
45
46
#define AL_MC_ECC_UE_ADDR0_RANK GENMASK(25, 24)
47
#define AL_MC_ECC_UE_ADDR0_ROW GENMASK(17, 0)
48
49
#define AL_MC_ECC_UE_ADDR1_BG GENMASK(25, 24)
50
#define AL_MC_ECC_UE_ADDR1_BANK GENMASK(18, 16)
51
#define AL_MC_ECC_UE_ADDR1_COLUMN GENMASK(11, 0)
52
53
#define DRV_NAME "al_mc_edac"
54
#define AL_MC_EDAC_MSG_MAX 256
55
56
struct al_mc_edac {
57
void __iomem *mmio_base;
58
spinlock_t lock;
59
int irq_ce;
60
int irq_ue;
61
};
62
63
static void prepare_msg(char *message, size_t buffer_size,
64
enum hw_event_mc_err_type type,
65
u8 rank, u32 row, u8 bg, u8 bank, u16 column,
66
u32 syn0, u32 syn1, u32 syn2)
67
{
68
snprintf(message, buffer_size,
69
"%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x",
70
type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE",
71
rank, row, bg, bank, column, syn0, syn1, syn2);
72
}
73
74
static int handle_ce(struct mem_ctl_info *mci)
75
{
76
u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row;
77
struct al_mc_edac *al_mc = mci->pvt_info;
78
char msg[AL_MC_EDAC_MSG_MAX];
79
u16 ce_count, column;
80
unsigned long flags;
81
u8 rank, bg, bank;
82
83
eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
84
ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt);
85
if (!ce_count)
86
return 0;
87
88
ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0);
89
ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1);
90
ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0);
91
ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1);
92
ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2);
93
94
writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR,
95
al_mc->mmio_base + AL_MC_ECC_CLEAR);
96
97
dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
98
ecccaddr0, ecccaddr1);
99
100
rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0);
101
row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0);
102
103
bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1);
104
bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1);
105
column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1);
106
107
prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED,
108
rank, row, bg, bank, column,
109
ecccsyn0, ecccsyn1, ecccsyn2);
110
111
spin_lock_irqsave(&al_mc->lock, flags);
112
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
113
ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
114
spin_unlock_irqrestore(&al_mc->lock, flags);
115
116
return ce_count;
117
}
118
119
static int handle_ue(struct mem_ctl_info *mci)
120
{
121
u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row;
122
struct al_mc_edac *al_mc = mci->pvt_info;
123
char msg[AL_MC_EDAC_MSG_MAX];
124
u16 ue_count, column;
125
unsigned long flags;
126
u8 rank, bg, bank;
127
128
eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
129
ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt);
130
if (!ue_count)
131
return 0;
132
133
eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0);
134
eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1);
135
eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0);
136
eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1);
137
eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2);
138
139
writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR,
140
al_mc->mmio_base + AL_MC_ECC_CLEAR);
141
142
dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
143
eccuaddr0, eccuaddr1);
144
145
rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0);
146
row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0);
147
148
bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1);
149
bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1);
150
column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1);
151
152
prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED,
153
rank, row, bg, bank, column,
154
eccusyn0, eccusyn1, eccusyn2);
155
156
spin_lock_irqsave(&al_mc->lock, flags);
157
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
158
ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
159
spin_unlock_irqrestore(&al_mc->lock, flags);
160
161
return ue_count;
162
}
163
164
static void al_mc_edac_check(struct mem_ctl_info *mci)
165
{
166
struct al_mc_edac *al_mc = mci->pvt_info;
167
168
if (al_mc->irq_ue <= 0)
169
handle_ue(mci);
170
171
if (al_mc->irq_ce <= 0)
172
handle_ce(mci);
173
}
174
175
static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info)
176
{
177
struct platform_device *pdev = info;
178
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
179
180
if (handle_ue(mci))
181
return IRQ_HANDLED;
182
return IRQ_NONE;
183
}
184
185
static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info)
186
{
187
struct platform_device *pdev = info;
188
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
189
190
if (handle_ce(mci))
191
return IRQ_HANDLED;
192
return IRQ_NONE;
193
}
194
195
static enum scrub_type get_scrub_mode(void __iomem *mmio_base)
196
{
197
u32 ecccfg0;
198
199
ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG);
200
201
if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0))
202
return SCRUB_NONE;
203
else
204
return SCRUB_HW_SRC;
205
}
206
207
static void devm_al_mc_edac_free(void *data)
208
{
209
edac_mc_free(data);
210
}
211
212
static void devm_al_mc_edac_del(void *data)
213
{
214
edac_mc_del_mc(data);
215
}
216
217
static int al_mc_edac_probe(struct platform_device *pdev)
218
{
219
struct edac_mc_layer layers[1];
220
struct mem_ctl_info *mci;
221
struct al_mc_edac *al_mc;
222
void __iomem *mmio_base;
223
struct dimm_info *dimm;
224
int ret;
225
226
mmio_base = devm_platform_ioremap_resource(pdev, 0);
227
if (IS_ERR(mmio_base)) {
228
dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
229
PTR_ERR(mmio_base));
230
return PTR_ERR(mmio_base);
231
}
232
233
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
234
layers[0].size = 1;
235
layers[0].is_virt_csrow = false;
236
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
237
sizeof(struct al_mc_edac));
238
if (!mci)
239
return -ENOMEM;
240
241
ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_free, mci);
242
if (ret)
243
return ret;
244
245
platform_set_drvdata(pdev, mci);
246
al_mc = mci->pvt_info;
247
248
al_mc->mmio_base = mmio_base;
249
250
al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue");
251
if (al_mc->irq_ue <= 0)
252
dev_dbg(&pdev->dev,
253
"no IRQ defined for UE - falling back to polling\n");
254
255
al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce");
256
if (al_mc->irq_ce <= 0)
257
dev_dbg(&pdev->dev,
258
"no IRQ defined for CE - falling back to polling\n");
259
260
/*
261
* In case both interrupts (ue/ce) are to be found, use interrupt mode.
262
* In case none of the interrupt are foud, use polling mode.
263
* In case only one interrupt is found, use interrupt mode for it but
264
* keep polling mode enable for the other.
265
*/
266
if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) {
267
edac_op_state = EDAC_OPSTATE_POLL;
268
mci->edac_check = al_mc_edac_check;
269
} else {
270
edac_op_state = EDAC_OPSTATE_INT;
271
}
272
273
spin_lock_init(&al_mc->lock);
274
275
mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
276
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
277
mci->edac_cap = EDAC_FLAG_SECDED;
278
mci->mod_name = DRV_NAME;
279
mci->ctl_name = "al_mc";
280
mci->pdev = &pdev->dev;
281
mci->scrub_mode = get_scrub_mode(mmio_base);
282
283
dimm = *mci->dimms;
284
dimm->grain = 1;
285
286
ret = edac_mc_add_mc(mci);
287
if (ret < 0) {
288
dev_err(&pdev->dev,
289
"fail to add memory controller device (%d)\n",
290
ret);
291
return ret;
292
}
293
294
ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_del, &pdev->dev);
295
if (ret)
296
return ret;
297
298
if (al_mc->irq_ue > 0) {
299
ret = devm_request_irq(&pdev->dev,
300
al_mc->irq_ue,
301
al_mc_edac_irq_handler_ue,
302
IRQF_SHARED,
303
pdev->name,
304
pdev);
305
if (ret != 0) {
306
dev_err(&pdev->dev,
307
"failed to request UE IRQ %d (%d)\n",
308
al_mc->irq_ue, ret);
309
return ret;
310
}
311
}
312
313
if (al_mc->irq_ce > 0) {
314
ret = devm_request_irq(&pdev->dev,
315
al_mc->irq_ce,
316
al_mc_edac_irq_handler_ce,
317
IRQF_SHARED,
318
pdev->name,
319
pdev);
320
if (ret != 0) {
321
dev_err(&pdev->dev,
322
"failed to request CE IRQ %d (%d)\n",
323
al_mc->irq_ce, ret);
324
return ret;
325
}
326
}
327
328
return 0;
329
}
330
331
static const struct of_device_id al_mc_edac_of_match[] = {
332
{ .compatible = "amazon,al-mc-edac", },
333
{},
334
};
335
336
MODULE_DEVICE_TABLE(of, al_mc_edac_of_match);
337
338
static struct platform_driver al_mc_edac_driver = {
339
.probe = al_mc_edac_probe,
340
.driver = {
341
.name = DRV_NAME,
342
.of_match_table = al_mc_edac_of_match,
343
},
344
};
345
346
module_platform_driver(al_mc_edac_driver);
347
348
MODULE_LICENSE("GPL v2");
349
MODULE_AUTHOR("Talel Shenhar");
350
MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver");
351
352