Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/edac/bluefield_edac.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Bluefield-specific EDAC driver.
4
*
5
* Copyright (c) 2019 Mellanox Technologies.
6
*/
7
8
#include <linux/acpi.h>
9
#include <linux/arm-smccc.h>
10
#include <linux/bitfield.h>
11
#include <linux/edac.h>
12
#include <linux/io.h>
13
#include <linux/module.h>
14
#include <linux/platform_device.h>
15
16
#include "edac_module.h"
17
18
#define DRIVER_NAME "bluefield-edac"
19
20
/*
21
* Mellanox BlueField EMI (External Memory Interface) register definitions.
22
*/
23
24
#define MLXBF_ECC_CNT 0x340
25
#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0)
26
#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16)
27
28
#define MLXBF_ECC_ERR 0x348
29
#define MLXBF_ECC_ERR__SECC BIT(0)
30
#define MLXBF_ECC_ERR__DECC BIT(16)
31
32
#define MLXBF_ECC_LATCH_SEL 0x354
33
#define MLXBF_ECC_LATCH_SEL__START BIT(24)
34
35
#define MLXBF_ERR_ADDR_0 0x358
36
37
#define MLXBF_ERR_ADDR_1 0x37c
38
39
#define MLXBF_SYNDROM 0x35c
40
#define MLXBF_SYNDROM__DERR BIT(0)
41
#define MLXBF_SYNDROM__SERR BIT(1)
42
#define MLXBF_SYNDROM__SYN GENMASK(25, 16)
43
44
#define MLXBF_ADD_INFO 0x364
45
#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8)
46
47
#define MLXBF_EDAC_MAX_DIMM_PER_MC 2
48
#define MLXBF_EDAC_ERROR_GRAIN 8
49
50
#define MLXBF_WRITE_REG_32 (0x82000009)
51
#define MLXBF_READ_REG_32 (0x8200000A)
52
#define MLXBF_SIP_SVC_VERSION (0x8200ff03)
53
54
#define MLXBF_SMCCC_ACCESS_VIOLATION (-4)
55
56
#define MLXBF_SVC_REQ_MAJOR 0
57
#define MLXBF_SVC_REQ_MINOR 3
58
59
/*
60
* Request MLXBF_SIP_GET_DIMM_INFO
61
*
62
* Retrieve information about DIMM on a certain slot.
63
*
64
* Call register usage:
65
* a0: MLXBF_SIP_GET_DIMM_INFO
66
* a1: (Memory controller index) << 16 | (Dimm index in memory controller)
67
* a2-7: not used.
68
*
69
* Return status:
70
* a0: MLXBF_DIMM_INFO defined below describing the DIMM.
71
* a1-3: not used.
72
*/
73
#define MLXBF_SIP_GET_DIMM_INFO 0x82000008
74
75
/* Format for the SMC response about the memory information */
76
#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0)
77
#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16)
78
#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17)
79
#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18)
80
#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21)
81
#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24)
82
83
struct bluefield_edac_priv {
84
/* pointer to device structure */
85
struct device *dev;
86
int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC];
87
void __iomem *emi_base;
88
int dimm_per_mc;
89
/* access to secure regs supported */
90
bool svc_sreg_support;
91
/* SMC table# for secure regs access */
92
u32 sreg_tbl;
93
};
94
95
static u64 smc_call1(u64 smc_op, u64 smc_arg)
96
{
97
struct arm_smccc_res res;
98
99
arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res);
100
101
return res.a0;
102
}
103
104
static int secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl)
105
{
106
struct arm_smccc_res res;
107
int status;
108
109
arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr,
110
0, 0, 0, 0, 0, &res);
111
112
status = res.a0;
113
114
if (status == SMCCC_RET_NOT_SUPPORTED ||
115
status == MLXBF_SMCCC_ACCESS_VIOLATION)
116
return -1;
117
118
*result = (u32)res.a1;
119
return 0;
120
}
121
122
static int secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl)
123
{
124
struct arm_smccc_res res;
125
int status;
126
127
arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr,
128
0, 0, 0, 0, &res);
129
130
status = res.a0;
131
132
if (status == SMCCC_RET_NOT_SUPPORTED ||
133
status == MLXBF_SMCCC_ACCESS_VIOLATION)
134
return -1;
135
else
136
return 0;
137
}
138
139
static int bluefield_edac_readl(struct bluefield_edac_priv *priv, u32 offset, u32 *result)
140
{
141
void __iomem *addr;
142
int err = 0;
143
144
addr = priv->emi_base + offset;
145
146
if (priv->svc_sreg_support)
147
err = secure_readl(addr, result, priv->sreg_tbl);
148
else
149
*result = readl(addr);
150
151
return err;
152
}
153
154
static int bluefield_edac_writel(struct bluefield_edac_priv *priv, u32 offset, u32 data)
155
{
156
void __iomem *addr;
157
int err = 0;
158
159
addr = priv->emi_base + offset;
160
161
if (priv->svc_sreg_support)
162
err = secure_writel(addr, data, priv->sreg_tbl);
163
else
164
writel(data, addr);
165
166
return err;
167
}
168
169
/*
170
* Gather the ECC information from the External Memory Interface registers
171
* and report it to the edac handler.
172
*/
173
static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
174
int error_cnt,
175
int is_single_ecc)
176
{
177
struct bluefield_edac_priv *priv = mci->pvt_info;
178
u32 dram_additional_info, err_prank, edea0, edea1;
179
u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom;
180
enum hw_event_mc_err_type ecc_type;
181
u64 ecc_dimm_addr;
182
int ecc_dimm, err;
183
184
ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED :
185
HW_EVENT_ERR_UNCORRECTED;
186
187
/*
188
* Tell the External Memory Interface to populate the relevant
189
* registers with information about the last ECC error occurrence.
190
*/
191
ecc_latch_select = MLXBF_ECC_LATCH_SEL__START;
192
err = bluefield_edac_writel(priv, MLXBF_ECC_LATCH_SEL, ecc_latch_select);
193
if (err)
194
dev_err(priv->dev, "ECC latch select write failed.\n");
195
196
/*
197
* Verify that the ECC reported info in the registers is of the
198
* same type as the one asked to report. If not, just report the
199
* error without the detailed information.
200
*/
201
err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom);
202
if (err) {
203
dev_err(priv->dev, "DRAM syndrom read failed.\n");
204
return;
205
}
206
207
serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
208
derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
209
syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom);
210
211
if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) {
212
edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0,
213
0, 0, -1, mci->ctl_name, "");
214
return;
215
}
216
217
err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info);
218
if (err) {
219
dev_err(priv->dev, "DRAM additional info read failed.\n");
220
return;
221
}
222
223
err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
224
225
ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
226
227
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0);
228
if (err) {
229
dev_err(priv->dev, "Error addr 0 read failed.\n");
230
return;
231
}
232
233
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1);
234
if (err) {
235
dev_err(priv->dev, "Error addr 1 read failed.\n");
236
return;
237
}
238
239
ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
240
241
edac_mc_handle_error(ecc_type, mci, error_cnt,
242
PFN_DOWN(ecc_dimm_addr),
243
offset_in_page(ecc_dimm_addr),
244
syndrom, ecc_dimm, 0, 0, mci->ctl_name, "");
245
}
246
247
static void bluefield_edac_check(struct mem_ctl_info *mci)
248
{
249
struct bluefield_edac_priv *priv = mci->pvt_info;
250
u32 ecc_count, single_error_count, double_error_count, ecc_error = 0;
251
int err;
252
253
/*
254
* The memory controller might not be initialized by the firmware
255
* when there isn't memory, which may lead to bad register readings.
256
*/
257
if (mci->edac_cap == EDAC_FLAG_NONE)
258
return;
259
260
err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count);
261
if (err) {
262
dev_err(priv->dev, "ECC count read failed.\n");
263
return;
264
}
265
266
single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
267
double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
268
269
if (single_error_count) {
270
ecc_error |= MLXBF_ECC_ERR__SECC;
271
272
bluefield_gather_report_ecc(mci, single_error_count, 1);
273
}
274
275
if (double_error_count) {
276
ecc_error |= MLXBF_ECC_ERR__DECC;
277
278
bluefield_gather_report_ecc(mci, double_error_count, 0);
279
}
280
281
/* Write to clear reported errors. */
282
if (ecc_count) {
283
err = bluefield_edac_writel(priv, MLXBF_ECC_ERR, ecc_error);
284
if (err)
285
dev_err(priv->dev, "ECC Error write failed.\n");
286
}
287
}
288
289
/* Initialize the DIMMs information for the given memory controller. */
290
static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
291
{
292
struct bluefield_edac_priv *priv = mci->pvt_info;
293
u64 mem_ctrl_idx = mci->mc_idx;
294
struct dimm_info *dimm;
295
u64 smc_info, smc_arg;
296
int is_empty = 1, i;
297
298
for (i = 0; i < priv->dimm_per_mc; i++) {
299
dimm = mci->dimms[i];
300
301
smc_arg = mem_ctrl_idx << 16 | i;
302
smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg);
303
304
if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) {
305
dimm->mtype = MEM_EMPTY;
306
continue;
307
}
308
309
is_empty = 0;
310
311
dimm->edac_mode = EDAC_SECDED;
312
313
if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info))
314
dimm->mtype = MEM_NVDIMM;
315
else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info))
316
dimm->mtype = MEM_LRDDR4;
317
else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info))
318
dimm->mtype = MEM_RDDR4;
319
else
320
dimm->mtype = MEM_DDR4;
321
322
dimm->nr_pages =
323
FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) *
324
(SZ_1G / PAGE_SIZE);
325
dimm->grain = MLXBF_EDAC_ERROR_GRAIN;
326
327
/* Mem controller for BlueField only supports x4, x8 and x16 */
328
switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) {
329
case 4:
330
dimm->dtype = DEV_X4;
331
break;
332
case 8:
333
dimm->dtype = DEV_X8;
334
break;
335
case 16:
336
dimm->dtype = DEV_X16;
337
break;
338
default:
339
dimm->dtype = DEV_UNKNOWN;
340
}
341
342
priv->dimm_ranks[i] =
343
FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info);
344
}
345
346
if (is_empty)
347
mci->edac_cap = EDAC_FLAG_NONE;
348
else
349
mci->edac_cap = EDAC_FLAG_SECDED;
350
}
351
352
static int bluefield_edac_mc_probe(struct platform_device *pdev)
353
{
354
struct bluefield_edac_priv *priv;
355
struct device *dev = &pdev->dev;
356
struct edac_mc_layer layers[1];
357
struct arm_smccc_res res;
358
struct mem_ctl_info *mci;
359
struct resource *emi_res;
360
unsigned int mc_idx, dimm_count;
361
int rc, ret;
362
363
/* Read the MSS (Memory SubSystem) index from ACPI table. */
364
if (device_property_read_u32(dev, "mss_number", &mc_idx)) {
365
dev_warn(dev, "bf_edac: MSS number unknown\n");
366
return -EINVAL;
367
}
368
369
/* Read the DIMMs per MC from ACPI table. */
370
if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) {
371
dev_warn(dev, "bf_edac: DIMMs per MC unknown\n");
372
return -EINVAL;
373
}
374
375
if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) {
376
dev_warn(dev, "bf_edac: DIMMs per MC not valid\n");
377
return -EINVAL;
378
}
379
380
emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
381
if (!emi_res)
382
return -EINVAL;
383
384
layers[0].type = EDAC_MC_LAYER_SLOT;
385
layers[0].size = dimm_count;
386
layers[0].is_virt_csrow = true;
387
388
mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv));
389
if (!mci)
390
return -ENOMEM;
391
392
priv = mci->pvt_info;
393
priv->dev = dev;
394
395
/*
396
* The "sec_reg_block" property in the ACPI table determines the method
397
* the driver uses to access the EMI registers:
398
* a) property is not present - directly access registers via readl/writel
399
* b) property is present - indirectly access registers via SMC calls
400
* (assuming required Silicon Provider service version found)
401
*/
402
if (device_property_read_u32(dev, "sec_reg_block", &priv->sreg_tbl)) {
403
priv->svc_sreg_support = false;
404
} else {
405
/*
406
* Check for minimum required Arm Silicon Provider (SiP) service
407
* version, ensuring support of required SMC function IDs.
408
*/
409
arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res);
410
if (res.a0 == MLXBF_SVC_REQ_MAJOR &&
411
res.a1 >= MLXBF_SVC_REQ_MINOR) {
412
priv->svc_sreg_support = true;
413
} else {
414
dev_err(dev, "Required SMCs are not supported.\n");
415
ret = -EINVAL;
416
goto err;
417
}
418
}
419
420
priv->dimm_per_mc = dimm_count;
421
if (!priv->svc_sreg_support) {
422
priv->emi_base = devm_ioremap_resource(dev, emi_res);
423
if (IS_ERR(priv->emi_base)) {
424
dev_err(dev, "failed to map EMI IO resource\n");
425
ret = PTR_ERR(priv->emi_base);
426
goto err;
427
}
428
} else {
429
priv->emi_base = (void __iomem *)emi_res->start;
430
}
431
432
mci->pdev = dev;
433
mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 |
434
MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM;
435
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
436
437
mci->mod_name = DRIVER_NAME;
438
mci->ctl_name = "BlueField_Memory_Controller";
439
mci->dev_name = dev_name(dev);
440
mci->edac_check = bluefield_edac_check;
441
442
/* Initialize mci with the actual populated DIMM information. */
443
bluefield_edac_init_dimms(mci);
444
445
platform_set_drvdata(pdev, mci);
446
447
/* Register with EDAC core */
448
rc = edac_mc_add_mc(mci);
449
if (rc) {
450
dev_err(dev, "failed to register with EDAC core\n");
451
ret = rc;
452
goto err;
453
}
454
455
/* Only POLL mode supported so far. */
456
edac_op_state = EDAC_OPSTATE_POLL;
457
458
return 0;
459
460
err:
461
edac_mc_free(mci);
462
463
return ret;
464
}
465
466
static void bluefield_edac_mc_remove(struct platform_device *pdev)
467
{
468
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
469
470
edac_mc_del_mc(&pdev->dev);
471
edac_mc_free(mci);
472
}
473
474
static const struct acpi_device_id bluefield_mc_acpi_ids[] = {
475
{"MLNXBF08", 0},
476
{}
477
};
478
479
MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids);
480
481
static struct platform_driver bluefield_edac_mc_driver = {
482
.driver = {
483
.name = DRIVER_NAME,
484
.acpi_match_table = bluefield_mc_acpi_ids,
485
},
486
.probe = bluefield_edac_mc_probe,
487
.remove = bluefield_edac_mc_remove,
488
};
489
490
module_platform_driver(bluefield_edac_mc_driver);
491
492
MODULE_DESCRIPTION("Mellanox BlueField memory edac driver");
493
MODULE_AUTHOR("Mellanox Technologies");
494
MODULE_LICENSE("GPL v2");
495
496