Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/cxl/core/edac.c
26282 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* CXL EDAC memory feature driver.
4
*
5
* Copyright (c) 2024-2025 HiSilicon Limited.
6
*
7
* - Supports functions to configure EDAC features of the
8
* CXL memory devices.
9
* - Registers with the EDAC device subsystem driver to expose
10
* the features sysfs attributes to the user for configuring
11
* CXL memory RAS feature.
12
*/
13
14
#include <linux/cleanup.h>
15
#include <linux/edac.h>
16
#include <linux/limits.h>
17
#include <linux/unaligned.h>
18
#include <linux/xarray.h>
19
#include <cxl/features.h>
20
#include <cxl.h>
21
#include <cxlmem.h>
22
#include "core.h"
23
#include "trace.h"
24
25
#define CXL_NR_EDAC_DEV_FEATURES 7
26
27
#define CXL_SCRUB_NO_REGION -1
28
29
struct cxl_patrol_scrub_context {
30
u8 instance;
31
u16 get_feat_size;
32
u16 set_feat_size;
33
u8 get_version;
34
u8 set_version;
35
u16 effects;
36
struct cxl_memdev *cxlmd;
37
struct cxl_region *cxlr;
38
};
39
40
/*
41
* See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-222 Device Patrol Scrub Control
42
* Feature Readable Attributes.
43
*/
44
struct cxl_scrub_rd_attrbs {
45
u8 scrub_cycle_cap;
46
__le16 scrub_cycle_hours;
47
u8 scrub_flags;
48
} __packed;
49
50
/*
51
* See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-223 Device Patrol Scrub Control
52
* Feature Writable Attributes.
53
*/
54
struct cxl_scrub_wr_attrbs {
55
u8 scrub_cycle_hours;
56
u8 scrub_flags;
57
} __packed;
58
59
#define CXL_SCRUB_CONTROL_CHANGEABLE BIT(0)
60
#define CXL_SCRUB_CONTROL_REALTIME BIT(1)
61
#define CXL_SCRUB_CONTROL_CYCLE_MASK GENMASK(7, 0)
62
#define CXL_SCRUB_CONTROL_MIN_CYCLE_MASK GENMASK(15, 8)
63
#define CXL_SCRUB_CONTROL_ENABLE BIT(0)
64
65
#define CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap) \
66
FIELD_GET(CXL_SCRUB_CONTROL_CHANGEABLE, cap)
67
#define CXL_GET_SCRUB_CYCLE(cycle) \
68
FIELD_GET(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
69
#define CXL_GET_SCRUB_MIN_CYCLE(cycle) \
70
FIELD_GET(CXL_SCRUB_CONTROL_MIN_CYCLE_MASK, cycle)
71
#define CXL_GET_SCRUB_EN_STS(flags) FIELD_GET(CXL_SCRUB_CONTROL_ENABLE, flags)
72
73
#define CXL_SET_SCRUB_CYCLE(cycle) \
74
FIELD_PREP(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
75
#define CXL_SET_SCRUB_EN(en) FIELD_PREP(CXL_SCRUB_CONTROL_ENABLE, en)
76
77
static int cxl_mem_scrub_get_attrbs(struct cxl_mailbox *cxl_mbox, u8 *cap,
78
u16 *cycle, u8 *flags, u8 *min_cycle)
79
{
80
size_t rd_data_size = sizeof(struct cxl_scrub_rd_attrbs);
81
size_t data_size;
82
struct cxl_scrub_rd_attrbs *rd_attrbs __free(kfree) =
83
kzalloc(rd_data_size, GFP_KERNEL);
84
if (!rd_attrbs)
85
return -ENOMEM;
86
87
data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
88
CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
89
rd_data_size, 0, NULL);
90
if (!data_size)
91
return -EIO;
92
93
*cap = rd_attrbs->scrub_cycle_cap;
94
*cycle = le16_to_cpu(rd_attrbs->scrub_cycle_hours);
95
*flags = rd_attrbs->scrub_flags;
96
if (min_cycle)
97
*min_cycle = CXL_GET_SCRUB_MIN_CYCLE(*cycle);
98
99
return 0;
100
}
101
102
static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
103
u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle)
104
{
105
struct cxl_mailbox *cxl_mbox;
106
struct cxl_region_params *p;
107
struct cxl_memdev *cxlmd;
108
struct cxl_region *cxlr;
109
u8 min_scrub_cycle = 0;
110
int i, ret;
111
112
if (!cxl_ps_ctx->cxlr) {
113
cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox;
114
return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle,
115
flags, min_cycle);
116
}
117
118
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
119
if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
120
return ret;
121
122
cxlr = cxl_ps_ctx->cxlr;
123
p = &cxlr->params;
124
125
for (i = 0; i < p->nr_targets; i++) {
126
struct cxl_endpoint_decoder *cxled = p->targets[i];
127
128
cxlmd = cxled_to_memdev(cxled);
129
cxl_mbox = &cxlmd->cxlds->cxl_mbox;
130
ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags,
131
min_cycle);
132
if (ret)
133
return ret;
134
135
/*
136
* The min_scrub_cycle of a region is the max of minimum scrub
137
* cycles supported by memdevs that back the region.
138
*/
139
if (min_cycle)
140
min_scrub_cycle = max(*min_cycle, min_scrub_cycle);
141
}
142
143
if (min_cycle)
144
*min_cycle = min_scrub_cycle;
145
146
return 0;
147
}
148
149
static int cxl_scrub_set_attrbs_region(struct device *dev,
150
struct cxl_patrol_scrub_context *cxl_ps_ctx,
151
u8 cycle, u8 flags)
152
{
153
struct cxl_scrub_wr_attrbs wr_attrbs;
154
struct cxl_mailbox *cxl_mbox;
155
struct cxl_region_params *p;
156
struct cxl_memdev *cxlmd;
157
struct cxl_region *cxlr;
158
int ret, i;
159
160
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
161
if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
162
return ret;
163
164
cxlr = cxl_ps_ctx->cxlr;
165
p = &cxlr->params;
166
wr_attrbs.scrub_cycle_hours = cycle;
167
wr_attrbs.scrub_flags = flags;
168
169
for (i = 0; i < p->nr_targets; i++) {
170
struct cxl_endpoint_decoder *cxled = p->targets[i];
171
172
cxlmd = cxled_to_memdev(cxled);
173
cxl_mbox = &cxlmd->cxlds->cxl_mbox;
174
ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
175
cxl_ps_ctx->set_version, &wr_attrbs,
176
sizeof(wr_attrbs),
177
CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
178
0, NULL);
179
if (ret)
180
return ret;
181
182
if (cycle != cxlmd->scrub_cycle) {
183
if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
184
dev_info(dev,
185
"Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n",
186
cxlmd->scrub_cycle,
187
cxlmd->scrub_region_id, cxlr->id,
188
cycle);
189
190
cxlmd->scrub_cycle = cycle;
191
cxlmd->scrub_region_id = cxlr->id;
192
}
193
}
194
195
return 0;
196
}
197
198
static int cxl_scrub_set_attrbs_device(struct device *dev,
199
struct cxl_patrol_scrub_context *cxl_ps_ctx,
200
u8 cycle, u8 flags)
201
{
202
struct cxl_scrub_wr_attrbs wr_attrbs;
203
struct cxl_mailbox *cxl_mbox;
204
struct cxl_memdev *cxlmd;
205
int ret;
206
207
wr_attrbs.scrub_cycle_hours = cycle;
208
wr_attrbs.scrub_flags = flags;
209
210
cxlmd = cxl_ps_ctx->cxlmd;
211
cxl_mbox = &cxlmd->cxlds->cxl_mbox;
212
ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
213
cxl_ps_ctx->set_version, &wr_attrbs,
214
sizeof(wr_attrbs),
215
CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0,
216
NULL);
217
if (ret)
218
return ret;
219
220
if (cycle != cxlmd->scrub_cycle) {
221
if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
222
dev_info(dev,
223
"Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n",
224
cxlmd->scrub_cycle, cxlmd->scrub_region_id,
225
cycle);
226
227
cxlmd->scrub_cycle = cycle;
228
cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
229
}
230
231
return 0;
232
}
233
234
static int cxl_scrub_set_attrbs(struct device *dev,
235
struct cxl_patrol_scrub_context *cxl_ps_ctx,
236
u8 cycle, u8 flags)
237
{
238
if (cxl_ps_ctx->cxlr)
239
return cxl_scrub_set_attrbs_region(dev, cxl_ps_ctx, cycle, flags);
240
241
return cxl_scrub_set_attrbs_device(dev, cxl_ps_ctx, cycle, flags);
242
}
243
244
static int cxl_patrol_scrub_get_enabled_bg(struct device *dev, void *drv_data,
245
bool *enabled)
246
{
247
struct cxl_patrol_scrub_context *ctx = drv_data;
248
u8 cap, flags;
249
u16 cycle;
250
int ret;
251
252
ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
253
if (ret)
254
return ret;
255
256
*enabled = CXL_GET_SCRUB_EN_STS(flags);
257
258
return 0;
259
}
260
261
static int cxl_patrol_scrub_set_enabled_bg(struct device *dev, void *drv_data,
262
bool enable)
263
{
264
struct cxl_patrol_scrub_context *ctx = drv_data;
265
u8 cap, flags, wr_cycle;
266
u16 rd_cycle;
267
int ret;
268
269
if (!capable(CAP_SYS_RAWIO))
270
return -EPERM;
271
272
ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, NULL);
273
if (ret)
274
return ret;
275
276
wr_cycle = CXL_GET_SCRUB_CYCLE(rd_cycle);
277
flags = CXL_SET_SCRUB_EN(enable);
278
279
return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
280
}
281
282
static int cxl_patrol_scrub_get_min_scrub_cycle(struct device *dev,
283
void *drv_data, u32 *min)
284
{
285
struct cxl_patrol_scrub_context *ctx = drv_data;
286
u8 cap, flags, min_cycle;
287
u16 cycle;
288
int ret;
289
290
ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, &min_cycle);
291
if (ret)
292
return ret;
293
294
*min = min_cycle * 3600;
295
296
return 0;
297
}
298
299
static int cxl_patrol_scrub_get_max_scrub_cycle(struct device *dev,
300
void *drv_data, u32 *max)
301
{
302
*max = U8_MAX * 3600; /* Max set by register size */
303
304
return 0;
305
}
306
307
static int cxl_patrol_scrub_get_scrub_cycle(struct device *dev, void *drv_data,
308
u32 *scrub_cycle_secs)
309
{
310
struct cxl_patrol_scrub_context *ctx = drv_data;
311
u8 cap, flags;
312
u16 cycle;
313
int ret;
314
315
ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
316
if (ret)
317
return ret;
318
319
*scrub_cycle_secs = CXL_GET_SCRUB_CYCLE(cycle) * 3600;
320
321
return 0;
322
}
323
324
static int cxl_patrol_scrub_set_scrub_cycle(struct device *dev, void *drv_data,
325
u32 scrub_cycle_secs)
326
{
327
struct cxl_patrol_scrub_context *ctx = drv_data;
328
u8 scrub_cycle_hours = scrub_cycle_secs / 3600;
329
u8 cap, wr_cycle, flags, min_cycle;
330
u16 rd_cycle;
331
int ret;
332
333
if (!capable(CAP_SYS_RAWIO))
334
return -EPERM;
335
336
ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, &min_cycle);
337
if (ret)
338
return ret;
339
340
if (!CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap))
341
return -EOPNOTSUPP;
342
343
if (scrub_cycle_hours < min_cycle) {
344
dev_dbg(dev, "Invalid CXL patrol scrub cycle(%d) to set\n",
345
scrub_cycle_hours);
346
dev_dbg(dev,
347
"Minimum supported CXL patrol scrub cycle in hour %d\n",
348
min_cycle);
349
return -EINVAL;
350
}
351
wr_cycle = CXL_SET_SCRUB_CYCLE(scrub_cycle_hours);
352
353
return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
354
}
355
356
static const struct edac_scrub_ops cxl_ps_scrub_ops = {
357
.get_enabled_bg = cxl_patrol_scrub_get_enabled_bg,
358
.set_enabled_bg = cxl_patrol_scrub_set_enabled_bg,
359
.get_min_cycle = cxl_patrol_scrub_get_min_scrub_cycle,
360
.get_max_cycle = cxl_patrol_scrub_get_max_scrub_cycle,
361
.get_cycle_duration = cxl_patrol_scrub_get_scrub_cycle,
362
.set_cycle_duration = cxl_patrol_scrub_set_scrub_cycle,
363
};
364
365
static int cxl_memdev_scrub_init(struct cxl_memdev *cxlmd,
366
struct edac_dev_feature *ras_feature,
367
u8 scrub_inst)
368
{
369
struct cxl_patrol_scrub_context *cxl_ps_ctx;
370
struct cxl_feat_entry *feat_entry;
371
u8 cap, flags;
372
u16 cycle;
373
int rc;
374
375
feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
376
&CXL_FEAT_PATROL_SCRUB_UUID);
377
if (IS_ERR(feat_entry))
378
return -EOPNOTSUPP;
379
380
if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
381
return -EOPNOTSUPP;
382
383
cxl_ps_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
384
if (!cxl_ps_ctx)
385
return -ENOMEM;
386
387
*cxl_ps_ctx = (struct cxl_patrol_scrub_context){
388
.get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
389
.set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
390
.get_version = feat_entry->get_feat_ver,
391
.set_version = feat_entry->set_feat_ver,
392
.effects = le16_to_cpu(feat_entry->effects),
393
.instance = scrub_inst,
394
.cxlmd = cxlmd,
395
};
396
397
rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, &cycle,
398
&flags, NULL);
399
if (rc)
400
return rc;
401
402
cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
403
cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
404
405
ras_feature->ft_type = RAS_FEAT_SCRUB;
406
ras_feature->instance = cxl_ps_ctx->instance;
407
ras_feature->scrub_ops = &cxl_ps_scrub_ops;
408
ras_feature->ctx = cxl_ps_ctx;
409
410
return 0;
411
}
412
413
static int cxl_region_scrub_init(struct cxl_region *cxlr,
414
struct edac_dev_feature *ras_feature,
415
u8 scrub_inst)
416
{
417
struct cxl_patrol_scrub_context *cxl_ps_ctx;
418
struct cxl_region_params *p = &cxlr->params;
419
struct cxl_feat_entry *feat_entry = NULL;
420
struct cxl_memdev *cxlmd;
421
u8 cap, flags;
422
u16 cycle;
423
int i, rc;
424
425
/*
426
* The cxl_region_rwsem must be held if the code below is used in a context
427
* other than when the region is in the probe state, as shown here.
428
*/
429
for (i = 0; i < p->nr_targets; i++) {
430
struct cxl_endpoint_decoder *cxled = p->targets[i];
431
432
cxlmd = cxled_to_memdev(cxled);
433
feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
434
&CXL_FEAT_PATROL_SCRUB_UUID);
435
if (IS_ERR(feat_entry))
436
return -EOPNOTSUPP;
437
438
if (!(le32_to_cpu(feat_entry->flags) &
439
CXL_FEATURE_F_CHANGEABLE))
440
return -EOPNOTSUPP;
441
442
rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap,
443
&cycle, &flags, NULL);
444
if (rc)
445
return rc;
446
447
cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
448
cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
449
}
450
451
cxl_ps_ctx = devm_kzalloc(&cxlr->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
452
if (!cxl_ps_ctx)
453
return -ENOMEM;
454
455
*cxl_ps_ctx = (struct cxl_patrol_scrub_context){
456
.get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
457
.set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
458
.get_version = feat_entry->get_feat_ver,
459
.set_version = feat_entry->set_feat_ver,
460
.effects = le16_to_cpu(feat_entry->effects),
461
.instance = scrub_inst,
462
.cxlr = cxlr,
463
};
464
465
ras_feature->ft_type = RAS_FEAT_SCRUB;
466
ras_feature->instance = cxl_ps_ctx->instance;
467
ras_feature->scrub_ops = &cxl_ps_scrub_ops;
468
ras_feature->ctx = cxl_ps_ctx;
469
470
return 0;
471
}
472
473
struct cxl_ecs_context {
474
u16 num_media_frus;
475
u16 get_feat_size;
476
u16 set_feat_size;
477
u8 get_version;
478
u8 set_version;
479
u16 effects;
480
struct cxl_memdev *cxlmd;
481
};
482
483
/*
484
* See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-225 DDR5 ECS Control Feature
485
* Readable Attributes.
486
*/
487
struct cxl_ecs_fru_rd_attrbs {
488
u8 ecs_cap;
489
__le16 ecs_config;
490
u8 ecs_flags;
491
} __packed;
492
493
struct cxl_ecs_rd_attrbs {
494
u8 ecs_log_cap;
495
struct cxl_ecs_fru_rd_attrbs fru_attrbs[];
496
} __packed;
497
498
/*
499
* See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-226 DDR5 ECS Control Feature
500
* Writable Attributes.
501
*/
502
struct cxl_ecs_fru_wr_attrbs {
503
__le16 ecs_config;
504
} __packed;
505
506
struct cxl_ecs_wr_attrbs {
507
u8 ecs_log_cap;
508
struct cxl_ecs_fru_wr_attrbs fru_attrbs[];
509
} __packed;
510
511
#define CXL_ECS_LOG_ENTRY_TYPE_MASK GENMASK(1, 0)
512
#define CXL_ECS_REALTIME_REPORT_CAP_MASK BIT(0)
513
#define CXL_ECS_THRESHOLD_COUNT_MASK GENMASK(2, 0)
514
#define CXL_ECS_COUNT_MODE_MASK BIT(3)
515
#define CXL_ECS_RESET_COUNTER_MASK BIT(4)
516
#define CXL_ECS_RESET_COUNTER 1
517
518
enum {
519
ECS_THRESHOLD_256 = 256,
520
ECS_THRESHOLD_1024 = 1024,
521
ECS_THRESHOLD_4096 = 4096,
522
};
523
524
enum {
525
ECS_THRESHOLD_IDX_256 = 3,
526
ECS_THRESHOLD_IDX_1024 = 4,
527
ECS_THRESHOLD_IDX_4096 = 5,
528
};
529
530
static const u16 ecs_supp_threshold[] = {
531
[ECS_THRESHOLD_IDX_256] = 256,
532
[ECS_THRESHOLD_IDX_1024] = 1024,
533
[ECS_THRESHOLD_IDX_4096] = 4096,
534
};
535
536
enum {
537
ECS_LOG_ENTRY_TYPE_DRAM = 0x0,
538
ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU = 0x1,
539
};
540
541
enum cxl_ecs_count_mode {
542
ECS_MODE_COUNTS_ROWS = 0,
543
ECS_MODE_COUNTS_CODEWORDS = 1,
544
};
545
546
static int cxl_mem_ecs_get_attrbs(struct device *dev,
547
struct cxl_ecs_context *cxl_ecs_ctx,
548
int fru_id, u8 *log_cap, u16 *config)
549
{
550
struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
551
struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
552
struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
553
size_t rd_data_size;
554
size_t data_size;
555
556
rd_data_size = cxl_ecs_ctx->get_feat_size;
557
558
struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
559
kvzalloc(rd_data_size, GFP_KERNEL);
560
if (!rd_attrbs)
561
return -ENOMEM;
562
563
data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
564
CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
565
rd_data_size, 0, NULL);
566
if (!data_size)
567
return -EIO;
568
569
fru_rd_attrbs = rd_attrbs->fru_attrbs;
570
*log_cap = rd_attrbs->ecs_log_cap;
571
*config = le16_to_cpu(fru_rd_attrbs[fru_id].ecs_config);
572
573
return 0;
574
}
575
576
static int cxl_mem_ecs_set_attrbs(struct device *dev,
577
struct cxl_ecs_context *cxl_ecs_ctx,
578
int fru_id, u8 log_cap, u16 config)
579
{
580
struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
581
struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
582
struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
583
struct cxl_ecs_fru_wr_attrbs *fru_wr_attrbs;
584
size_t rd_data_size, wr_data_size;
585
u16 num_media_frus, count;
586
size_t data_size;
587
588
num_media_frus = cxl_ecs_ctx->num_media_frus;
589
rd_data_size = cxl_ecs_ctx->get_feat_size;
590
wr_data_size = cxl_ecs_ctx->set_feat_size;
591
struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
592
kvzalloc(rd_data_size, GFP_KERNEL);
593
if (!rd_attrbs)
594
return -ENOMEM;
595
596
data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
597
CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
598
rd_data_size, 0, NULL);
599
if (!data_size)
600
return -EIO;
601
602
struct cxl_ecs_wr_attrbs *wr_attrbs __free(kvfree) =
603
kvzalloc(wr_data_size, GFP_KERNEL);
604
if (!wr_attrbs)
605
return -ENOMEM;
606
607
/*
608
* Fill writable attributes from the current attributes read
609
* for all the media FRUs.
610
*/
611
fru_rd_attrbs = rd_attrbs->fru_attrbs;
612
fru_wr_attrbs = wr_attrbs->fru_attrbs;
613
wr_attrbs->ecs_log_cap = log_cap;
614
for (count = 0; count < num_media_frus; count++)
615
fru_wr_attrbs[count].ecs_config =
616
fru_rd_attrbs[count].ecs_config;
617
618
fru_wr_attrbs[fru_id].ecs_config = cpu_to_le16(config);
619
620
return cxl_set_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
621
cxl_ecs_ctx->set_version, wr_attrbs,
622
wr_data_size,
623
CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
624
0, NULL);
625
}
626
627
static u8 cxl_get_ecs_log_entry_type(u8 log_cap, u16 config)
628
{
629
return FIELD_GET(CXL_ECS_LOG_ENTRY_TYPE_MASK, log_cap);
630
}
631
632
static u16 cxl_get_ecs_threshold(u8 log_cap, u16 config)
633
{
634
u8 index = FIELD_GET(CXL_ECS_THRESHOLD_COUNT_MASK, config);
635
636
return ecs_supp_threshold[index];
637
}
638
639
static u8 cxl_get_ecs_count_mode(u8 log_cap, u16 config)
640
{
641
return FIELD_GET(CXL_ECS_COUNT_MODE_MASK, config);
642
}
643
644
#define CXL_ECS_GET_ATTR(attrb) \
645
static int cxl_ecs_get_##attrb(struct device *dev, void *drv_data, \
646
int fru_id, u32 *val) \
647
{ \
648
struct cxl_ecs_context *ctx = drv_data; \
649
u8 log_cap; \
650
u16 config; \
651
int ret; \
652
\
653
ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
654
&config); \
655
if (ret) \
656
return ret; \
657
\
658
*val = cxl_get_ecs_##attrb(log_cap, config); \
659
\
660
return 0; \
661
}
662
663
CXL_ECS_GET_ATTR(log_entry_type)
664
CXL_ECS_GET_ATTR(count_mode)
665
CXL_ECS_GET_ATTR(threshold)
666
667
static int cxl_set_ecs_log_entry_type(struct device *dev, u8 *log_cap,
668
u16 *config, u32 val)
669
{
670
if (val != ECS_LOG_ENTRY_TYPE_DRAM &&
671
val != ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU)
672
return -EINVAL;
673
674
*log_cap = FIELD_PREP(CXL_ECS_LOG_ENTRY_TYPE_MASK, val);
675
676
return 0;
677
}
678
679
static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config,
680
u32 val)
681
{
682
*config &= ~CXL_ECS_THRESHOLD_COUNT_MASK;
683
684
switch (val) {
685
case ECS_THRESHOLD_256:
686
*config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
687
ECS_THRESHOLD_IDX_256);
688
break;
689
case ECS_THRESHOLD_1024:
690
*config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
691
ECS_THRESHOLD_IDX_1024);
692
break;
693
case ECS_THRESHOLD_4096:
694
*config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
695
ECS_THRESHOLD_IDX_4096);
696
break;
697
default:
698
dev_dbg(dev, "Invalid CXL ECS threshold count(%u) to set\n",
699
val);
700
dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n",
701
ECS_THRESHOLD_256, ECS_THRESHOLD_1024,
702
ECS_THRESHOLD_4096);
703
return -EINVAL;
704
}
705
706
return 0;
707
}
708
709
static int cxl_set_ecs_count_mode(struct device *dev, u8 *log_cap, u16 *config,
710
u32 val)
711
{
712
if (val != ECS_MODE_COUNTS_ROWS && val != ECS_MODE_COUNTS_CODEWORDS) {
713
dev_dbg(dev, "Invalid CXL ECS scrub mode(%d) to set\n", val);
714
dev_dbg(dev,
715
"Supported ECS Modes: 0: ECS counts rows with errors,"
716
" 1: ECS counts codewords with errors\n");
717
return -EINVAL;
718
}
719
720
*config &= ~CXL_ECS_COUNT_MODE_MASK;
721
*config |= FIELD_PREP(CXL_ECS_COUNT_MODE_MASK, val);
722
723
return 0;
724
}
725
726
static int cxl_set_ecs_reset_counter(struct device *dev, u8 *log_cap,
727
u16 *config, u32 val)
728
{
729
if (val != CXL_ECS_RESET_COUNTER)
730
return -EINVAL;
731
732
*config &= ~CXL_ECS_RESET_COUNTER_MASK;
733
*config |= FIELD_PREP(CXL_ECS_RESET_COUNTER_MASK, val);
734
735
return 0;
736
}
737
738
#define CXL_ECS_SET_ATTR(attrb) \
739
static int cxl_ecs_set_##attrb(struct device *dev, void *drv_data, \
740
int fru_id, u32 val) \
741
{ \
742
struct cxl_ecs_context *ctx = drv_data; \
743
u8 log_cap; \
744
u16 config; \
745
int ret; \
746
\
747
if (!capable(CAP_SYS_RAWIO)) \
748
return -EPERM; \
749
\
750
ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
751
&config); \
752
if (ret) \
753
return ret; \
754
\
755
ret = cxl_set_ecs_##attrb(dev, &log_cap, &config, val); \
756
if (ret) \
757
return ret; \
758
\
759
return cxl_mem_ecs_set_attrbs(dev, ctx, fru_id, log_cap, \
760
config); \
761
}
762
CXL_ECS_SET_ATTR(log_entry_type)
763
CXL_ECS_SET_ATTR(count_mode)
764
CXL_ECS_SET_ATTR(reset_counter)
765
CXL_ECS_SET_ATTR(threshold)
766
767
static const struct edac_ecs_ops cxl_ecs_ops = {
768
.get_log_entry_type = cxl_ecs_get_log_entry_type,
769
.set_log_entry_type = cxl_ecs_set_log_entry_type,
770
.get_mode = cxl_ecs_get_count_mode,
771
.set_mode = cxl_ecs_set_count_mode,
772
.reset = cxl_ecs_set_reset_counter,
773
.get_threshold = cxl_ecs_get_threshold,
774
.set_threshold = cxl_ecs_set_threshold,
775
};
776
777
static int cxl_memdev_ecs_init(struct cxl_memdev *cxlmd,
778
struct edac_dev_feature *ras_feature)
779
{
780
struct cxl_ecs_context *cxl_ecs_ctx;
781
struct cxl_feat_entry *feat_entry;
782
int num_media_frus;
783
784
feat_entry =
785
cxl_feature_info(to_cxlfs(cxlmd->cxlds), &CXL_FEAT_ECS_UUID);
786
if (IS_ERR(feat_entry))
787
return -EOPNOTSUPP;
788
789
if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
790
return -EOPNOTSUPP;
791
792
num_media_frus = (le16_to_cpu(feat_entry->get_feat_size) -
793
sizeof(struct cxl_ecs_rd_attrbs)) /
794
sizeof(struct cxl_ecs_fru_rd_attrbs);
795
if (!num_media_frus)
796
return -EOPNOTSUPP;
797
798
cxl_ecs_ctx =
799
devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ecs_ctx), GFP_KERNEL);
800
if (!cxl_ecs_ctx)
801
return -ENOMEM;
802
803
*cxl_ecs_ctx = (struct cxl_ecs_context){
804
.get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
805
.set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
806
.get_version = feat_entry->get_feat_ver,
807
.set_version = feat_entry->set_feat_ver,
808
.effects = le16_to_cpu(feat_entry->effects),
809
.num_media_frus = num_media_frus,
810
.cxlmd = cxlmd,
811
};
812
813
ras_feature->ft_type = RAS_FEAT_ECS;
814
ras_feature->ecs_ops = &cxl_ecs_ops;
815
ras_feature->ctx = cxl_ecs_ctx;
816
ras_feature->ecs_info.num_media_frus = num_media_frus;
817
818
return 0;
819
}
820
821
/*
822
* Perform Maintenance CXL 3.2 Spec 8.2.10.7.1
823
*/
824
825
/*
826
* Perform Maintenance input payload
827
* CXL rev 3.2 section 8.2.10.7.1 Table 8-117
828
*/
829
struct cxl_mbox_maintenance_hdr {
830
u8 op_class;
831
u8 op_subclass;
832
} __packed;
833
834
static int cxl_perform_maintenance(struct cxl_mailbox *cxl_mbox, u8 class,
835
u8 subclass, void *data_in,
836
size_t data_in_size)
837
{
838
struct cxl_memdev_maintenance_pi {
839
struct cxl_mbox_maintenance_hdr hdr;
840
u8 data[];
841
} __packed;
842
struct cxl_mbox_cmd mbox_cmd;
843
size_t hdr_size;
844
845
struct cxl_memdev_maintenance_pi *pi __free(kvfree) =
846
kvzalloc(cxl_mbox->payload_size, GFP_KERNEL);
847
if (!pi)
848
return -ENOMEM;
849
850
pi->hdr.op_class = class;
851
pi->hdr.op_subclass = subclass;
852
hdr_size = sizeof(pi->hdr);
853
/*
854
* Check minimum mbox payload size is available for
855
* the maintenance data transfer.
856
*/
857
if (hdr_size + data_in_size > cxl_mbox->payload_size)
858
return -ENOMEM;
859
860
memcpy(pi->data, data_in, data_in_size);
861
mbox_cmd = (struct cxl_mbox_cmd){
862
.opcode = CXL_MBOX_OP_DO_MAINTENANCE,
863
.size_in = hdr_size + data_in_size,
864
.payload_in = pi,
865
};
866
867
return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
868
}
869
870
/*
871
* Support for finding a memory operation attributes
872
* are from the current boot or not.
873
*/
874
875
struct cxl_mem_err_rec {
876
struct xarray rec_gen_media;
877
struct xarray rec_dram;
878
};
879
880
enum cxl_mem_repair_type {
881
CXL_PPR,
882
CXL_CACHELINE_SPARING,
883
CXL_ROW_SPARING,
884
CXL_BANK_SPARING,
885
CXL_RANK_SPARING,
886
CXL_REPAIR_MAX,
887
};
888
889
/**
890
* struct cxl_mem_repair_attrbs - CXL memory repair attributes
891
* @dpa: DPA of memory to repair
892
* @nibble_mask: nibble mask, identifies one or more nibbles on the memory bus
893
* @row: row of memory to repair
894
* @column: column of memory to repair
895
* @channel: channel of memory to repair
896
* @sub_channel: sub channel of memory to repair
897
* @rank: rank of memory to repair
898
* @bank_group: bank group of memory to repair
899
* @bank: bank of memory to repair
900
* @repair_type: repair type. For eg. PPR, memory sparing etc.
901
*/
902
struct cxl_mem_repair_attrbs {
903
u64 dpa;
904
u32 nibble_mask;
905
u32 row;
906
u16 column;
907
u8 channel;
908
u8 sub_channel;
909
u8 rank;
910
u8 bank_group;
911
u8 bank;
912
enum cxl_mem_repair_type repair_type;
913
};
914
915
static struct cxl_event_gen_media *
916
cxl_find_rec_gen_media(struct cxl_memdev *cxlmd,
917
struct cxl_mem_repair_attrbs *attrbs)
918
{
919
struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
920
struct cxl_event_gen_media *rec;
921
922
if (!array_rec)
923
return NULL;
924
925
rec = xa_load(&array_rec->rec_gen_media, attrbs->dpa);
926
if (!rec)
927
return NULL;
928
929
if (attrbs->repair_type == CXL_PPR)
930
return rec;
931
932
return NULL;
933
}
934
935
static struct cxl_event_dram *
936
cxl_find_rec_dram(struct cxl_memdev *cxlmd,
937
struct cxl_mem_repair_attrbs *attrbs)
938
{
939
struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
940
struct cxl_event_dram *rec;
941
u16 validity_flags;
942
943
if (!array_rec)
944
return NULL;
945
946
rec = xa_load(&array_rec->rec_dram, attrbs->dpa);
947
if (!rec)
948
return NULL;
949
950
validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
951
if (!(validity_flags & CXL_DER_VALID_CHANNEL) ||
952
!(validity_flags & CXL_DER_VALID_RANK))
953
return NULL;
954
955
switch (attrbs->repair_type) {
956
case CXL_PPR:
957
if (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
958
get_unaligned_le24(rec->nibble_mask) == attrbs->nibble_mask)
959
return rec;
960
break;
961
case CXL_CACHELINE_SPARING:
962
if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
963
!(validity_flags & CXL_DER_VALID_BANK) ||
964
!(validity_flags & CXL_DER_VALID_ROW) ||
965
!(validity_flags & CXL_DER_VALID_COLUMN))
966
return NULL;
967
968
if (rec->media_hdr.channel == attrbs->channel &&
969
rec->media_hdr.rank == attrbs->rank &&
970
rec->bank_group == attrbs->bank_group &&
971
rec->bank == attrbs->bank &&
972
get_unaligned_le24(rec->row) == attrbs->row &&
973
get_unaligned_le16(rec->column) == attrbs->column &&
974
(!(validity_flags & CXL_DER_VALID_NIBBLE) ||
975
get_unaligned_le24(rec->nibble_mask) ==
976
attrbs->nibble_mask) &&
977
(!(validity_flags & CXL_DER_VALID_SUB_CHANNEL) ||
978
rec->sub_channel == attrbs->sub_channel))
979
return rec;
980
break;
981
case CXL_ROW_SPARING:
982
if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
983
!(validity_flags & CXL_DER_VALID_BANK) ||
984
!(validity_flags & CXL_DER_VALID_ROW))
985
return NULL;
986
987
if (rec->media_hdr.channel == attrbs->channel &&
988
rec->media_hdr.rank == attrbs->rank &&
989
rec->bank_group == attrbs->bank_group &&
990
rec->bank == attrbs->bank &&
991
get_unaligned_le24(rec->row) == attrbs->row &&
992
(!(validity_flags & CXL_DER_VALID_NIBBLE) ||
993
get_unaligned_le24(rec->nibble_mask) ==
994
attrbs->nibble_mask))
995
return rec;
996
break;
997
case CXL_BANK_SPARING:
998
if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
999
!(validity_flags & CXL_DER_VALID_BANK))
1000
return NULL;
1001
1002
if (rec->media_hdr.channel == attrbs->channel &&
1003
rec->media_hdr.rank == attrbs->rank &&
1004
rec->bank_group == attrbs->bank_group &&
1005
rec->bank == attrbs->bank &&
1006
(!(validity_flags & CXL_DER_VALID_NIBBLE) ||
1007
get_unaligned_le24(rec->nibble_mask) ==
1008
attrbs->nibble_mask))
1009
return rec;
1010
break;
1011
case CXL_RANK_SPARING:
1012
if (rec->media_hdr.channel == attrbs->channel &&
1013
rec->media_hdr.rank == attrbs->rank &&
1014
(!(validity_flags & CXL_DER_VALID_NIBBLE) ||
1015
get_unaligned_le24(rec->nibble_mask) ==
1016
attrbs->nibble_mask))
1017
return rec;
1018
break;
1019
default:
1020
return NULL;
1021
}
1022
1023
return NULL;
1024
}
1025
1026
#define CXL_MAX_STORAGE_DAYS 10
1027
#define CXL_MAX_STORAGE_TIME_SECS (CXL_MAX_STORAGE_DAYS * 24 * 60 * 60)
1028
1029
static void cxl_del_expired_gmedia_recs(struct xarray *rec_xarray,
1030
struct cxl_event_gen_media *cur_rec)
1031
{
1032
u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
1033
struct cxl_event_gen_media *rec;
1034
unsigned long index;
1035
u64 delta_ts_secs;
1036
1037
xa_for_each(rec_xarray, index, rec) {
1038
delta_ts_secs = (cur_ts -
1039
le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
1040
if (delta_ts_secs >= CXL_MAX_STORAGE_TIME_SECS) {
1041
xa_erase(rec_xarray, index);
1042
kfree(rec);
1043
}
1044
}
1045
}
1046
1047
static void cxl_del_expired_dram_recs(struct xarray *rec_xarray,
1048
struct cxl_event_dram *cur_rec)
1049
{
1050
u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
1051
struct cxl_event_dram *rec;
1052
unsigned long index;
1053
u64 delta_secs;
1054
1055
xa_for_each(rec_xarray, index, rec) {
1056
delta_secs = (cur_ts -
1057
le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
1058
if (delta_secs >= CXL_MAX_STORAGE_TIME_SECS) {
1059
xa_erase(rec_xarray, index);
1060
kfree(rec);
1061
}
1062
}
1063
}
1064
1065
#define CXL_MAX_REC_STORAGE_COUNT 200
1066
1067
static void cxl_del_overflow_old_recs(struct xarray *rec_xarray)
1068
{
1069
void *err_rec;
1070
unsigned long index, count = 0;
1071
1072
xa_for_each(rec_xarray, index, err_rec)
1073
count++;
1074
1075
if (count <= CXL_MAX_REC_STORAGE_COUNT)
1076
return;
1077
1078
count -= CXL_MAX_REC_STORAGE_COUNT;
1079
xa_for_each(rec_xarray, index, err_rec) {
1080
xa_erase(rec_xarray, index);
1081
kfree(err_rec);
1082
count--;
1083
if (!count)
1084
break;
1085
}
1086
}
1087
1088
int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt)
1089
{
1090
struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
1091
struct cxl_event_gen_media *rec;
1092
void *old_rec;
1093
1094
if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
1095
return 0;
1096
1097
rec = kmemdup(&evt->gen_media, sizeof(*rec), GFP_KERNEL);
1098
if (!rec)
1099
return -ENOMEM;
1100
1101
old_rec = xa_store(&array_rec->rec_gen_media,
1102
le64_to_cpu(rec->media_hdr.phys_addr), rec,
1103
GFP_KERNEL);
1104
if (xa_is_err(old_rec)) {
1105
kfree(rec);
1106
return xa_err(old_rec);
1107
}
1108
1109
kfree(old_rec);
1110
1111
cxl_del_expired_gmedia_recs(&array_rec->rec_gen_media, rec);
1112
cxl_del_overflow_old_recs(&array_rec->rec_gen_media);
1113
1114
return 0;
1115
}
1116
EXPORT_SYMBOL_NS_GPL(cxl_store_rec_gen_media, "CXL");
1117
1118
int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt)
1119
{
1120
struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
1121
struct cxl_event_dram *rec;
1122
void *old_rec;
1123
1124
if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
1125
return 0;
1126
1127
rec = kmemdup(&evt->dram, sizeof(*rec), GFP_KERNEL);
1128
if (!rec)
1129
return -ENOMEM;
1130
1131
old_rec = xa_store(&array_rec->rec_dram,
1132
le64_to_cpu(rec->media_hdr.phys_addr), rec,
1133
GFP_KERNEL);
1134
if (xa_is_err(old_rec)) {
1135
kfree(rec);
1136
return xa_err(old_rec);
1137
}
1138
1139
kfree(old_rec);
1140
1141
cxl_del_expired_dram_recs(&array_rec->rec_dram, rec);
1142
cxl_del_overflow_old_recs(&array_rec->rec_dram);
1143
1144
return 0;
1145
}
1146
EXPORT_SYMBOL_NS_GPL(cxl_store_rec_dram, "CXL");
1147
1148
static bool cxl_is_memdev_memory_online(const struct cxl_memdev *cxlmd)
1149
{
1150
struct cxl_port *port = cxlmd->endpoint;
1151
1152
if (port && cxl_num_decoders_committed(port))
1153
return true;
1154
1155
return false;
1156
}
1157
1158
/*
1159
* CXL memory sparing control
1160
*/
1161
enum cxl_mem_sparing_granularity {
1162
CXL_MEM_SPARING_CACHELINE,
1163
CXL_MEM_SPARING_ROW,
1164
CXL_MEM_SPARING_BANK,
1165
CXL_MEM_SPARING_RANK,
1166
CXL_MEM_SPARING_MAX
1167
};
1168
1169
struct cxl_mem_sparing_context {
1170
struct cxl_memdev *cxlmd;
1171
uuid_t repair_uuid;
1172
u16 get_feat_size;
1173
u16 set_feat_size;
1174
u16 effects;
1175
u8 instance;
1176
u8 get_version;
1177
u8 set_version;
1178
u8 op_class;
1179
u8 op_subclass;
1180
bool cap_safe_when_in_use;
1181
bool cap_hard_sparing;
1182
bool cap_soft_sparing;
1183
u8 channel;
1184
u8 rank;
1185
u8 bank_group;
1186
u32 nibble_mask;
1187
u64 dpa;
1188
u32 row;
1189
u16 column;
1190
u8 bank;
1191
u8 sub_channel;
1192
enum edac_mem_repair_type repair_type;
1193
bool persist_mode;
1194
};
1195
1196
#define CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK BIT(0)
1197
#define CXL_SPARING_RD_CAP_HARD_SPARING_MASK BIT(1)
1198
#define CXL_SPARING_RD_CAP_SOFT_SPARING_MASK BIT(2)
1199
1200
#define CXL_SPARING_WR_DEVICE_INITIATED_MASK BIT(0)
1201
1202
#define CXL_SPARING_QUERY_RESOURCE_FLAG BIT(0)
1203
#define CXL_SET_HARD_SPARING_FLAG BIT(1)
1204
#define CXL_SPARING_SUB_CHNL_VALID_FLAG BIT(2)
1205
#define CXL_SPARING_NIB_MASK_VALID_FLAG BIT(3)
1206
1207
#define CXL_GET_SPARING_SAFE_IN_USE(flags) \
1208
(FIELD_GET(CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK, \
1209
flags) ^ 1)
1210
#define CXL_GET_CAP_HARD_SPARING(flags) \
1211
FIELD_GET(CXL_SPARING_RD_CAP_HARD_SPARING_MASK, \
1212
flags)
1213
#define CXL_GET_CAP_SOFT_SPARING(flags) \
1214
FIELD_GET(CXL_SPARING_RD_CAP_SOFT_SPARING_MASK, \
1215
flags)
1216
1217
#define CXL_SET_SPARING_QUERY_RESOURCE(val) \
1218
FIELD_PREP(CXL_SPARING_QUERY_RESOURCE_FLAG, val)
1219
#define CXL_SET_HARD_SPARING(val) \
1220
FIELD_PREP(CXL_SET_HARD_SPARING_FLAG, val)
1221
#define CXL_SET_SPARING_SUB_CHNL_VALID(val) \
1222
FIELD_PREP(CXL_SPARING_SUB_CHNL_VALID_FLAG, val)
1223
#define CXL_SET_SPARING_NIB_MASK_VALID(val) \
1224
FIELD_PREP(CXL_SPARING_NIB_MASK_VALID_FLAG, val)
1225
1226
/*
1227
* See CXL spec rev 3.2 @8.2.10.7.2.3 Table 8-134 Memory Sparing Feature
1228
* Readable Attributes.
1229
*/
1230
struct cxl_memdev_repair_rd_attrbs_hdr {
1231
u8 max_op_latency;
1232
__le16 op_cap;
1233
__le16 op_mode;
1234
u8 op_class;
1235
u8 op_subclass;
1236
u8 rsvd[9];
1237
} __packed;
1238
1239
struct cxl_memdev_sparing_rd_attrbs {
1240
struct cxl_memdev_repair_rd_attrbs_hdr hdr;
1241
u8 rsvd;
1242
__le16 restriction_flags;
1243
} __packed;
1244
1245
/*
1246
* See CXL spec rev 3.2 @8.2.10.7.1.4 Table 8-120 Memory Sparing Input Payload.
1247
*/
1248
struct cxl_memdev_sparing_in_payload {
1249
u8 flags;
1250
u8 channel;
1251
u8 rank;
1252
u8 nibble_mask[3];
1253
u8 bank_group;
1254
u8 bank;
1255
u8 row[3];
1256
__le16 column;
1257
u8 sub_channel;
1258
} __packed;
1259
1260
static int
1261
cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context *cxl_sparing_ctx)
1262
{
1263
size_t rd_data_size = sizeof(struct cxl_memdev_sparing_rd_attrbs);
1264
struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
1265
struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
1266
u16 restriction_flags;
1267
size_t data_size;
1268
u16 return_code;
1269
struct cxl_memdev_sparing_rd_attrbs *rd_attrbs __free(kfree) =
1270
kzalloc(rd_data_size, GFP_KERNEL);
1271
if (!rd_attrbs)
1272
return -ENOMEM;
1273
1274
data_size = cxl_get_feature(cxl_mbox, &cxl_sparing_ctx->repair_uuid,
1275
CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
1276
rd_data_size, 0, &return_code);
1277
if (!data_size)
1278
return -EIO;
1279
1280
cxl_sparing_ctx->op_class = rd_attrbs->hdr.op_class;
1281
cxl_sparing_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
1282
restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
1283
cxl_sparing_ctx->cap_safe_when_in_use =
1284
CXL_GET_SPARING_SAFE_IN_USE(restriction_flags);
1285
cxl_sparing_ctx->cap_hard_sparing =
1286
CXL_GET_CAP_HARD_SPARING(restriction_flags);
1287
cxl_sparing_ctx->cap_soft_sparing =
1288
CXL_GET_CAP_SOFT_SPARING(restriction_flags);
1289
1290
return 0;
1291
}
1292
1293
static struct cxl_event_dram *
1294
cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd,
1295
struct cxl_mem_sparing_context *ctx)
1296
{
1297
struct cxl_mem_repair_attrbs attrbs = { 0 };
1298
1299
attrbs.dpa = ctx->dpa;
1300
attrbs.channel = ctx->channel;
1301
attrbs.rank = ctx->rank;
1302
attrbs.nibble_mask = ctx->nibble_mask;
1303
switch (ctx->repair_type) {
1304
case EDAC_REPAIR_CACHELINE_SPARING:
1305
attrbs.repair_type = CXL_CACHELINE_SPARING;
1306
attrbs.bank_group = ctx->bank_group;
1307
attrbs.bank = ctx->bank;
1308
attrbs.row = ctx->row;
1309
attrbs.column = ctx->column;
1310
attrbs.sub_channel = ctx->sub_channel;
1311
break;
1312
case EDAC_REPAIR_ROW_SPARING:
1313
attrbs.repair_type = CXL_ROW_SPARING;
1314
attrbs.bank_group = ctx->bank_group;
1315
attrbs.bank = ctx->bank;
1316
attrbs.row = ctx->row;
1317
break;
1318
case EDAC_REPAIR_BANK_SPARING:
1319
attrbs.repair_type = CXL_BANK_SPARING;
1320
attrbs.bank_group = ctx->bank_group;
1321
attrbs.bank = ctx->bank;
1322
break;
1323
case EDAC_REPAIR_RANK_SPARING:
1324
attrbs.repair_type = CXL_RANK_SPARING;
1325
break;
1326
default:
1327
return NULL;
1328
}
1329
1330
return cxl_find_rec_dram(cxlmd, &attrbs);
1331
}
1332
1333
static int
1334
cxl_mem_perform_sparing(struct device *dev,
1335
struct cxl_mem_sparing_context *cxl_sparing_ctx)
1336
{
1337
struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
1338
struct cxl_memdev_sparing_in_payload sparing_pi;
1339
struct cxl_event_dram *rec = NULL;
1340
u16 validity_flags = 0;
1341
int ret;
1342
1343
ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
1344
if ((ret = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
1345
return ret;
1346
1347
ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
1348
if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
1349
return ret;
1350
1351
if (!cxl_sparing_ctx->cap_safe_when_in_use) {
1352
/* Memory to repair must be offline */
1353
if (cxl_is_memdev_memory_online(cxlmd))
1354
return -EBUSY;
1355
} else {
1356
if (cxl_is_memdev_memory_online(cxlmd)) {
1357
rec = cxl_mem_get_rec_dram(cxlmd, cxl_sparing_ctx);
1358
if (!rec)
1359
return -EINVAL;
1360
1361
if (!get_unaligned_le16(rec->media_hdr.validity_flags))
1362
return -EINVAL;
1363
}
1364
}
1365
1366
memset(&sparing_pi, 0, sizeof(sparing_pi));
1367
sparing_pi.flags = CXL_SET_SPARING_QUERY_RESOURCE(0);
1368
if (cxl_sparing_ctx->persist_mode)
1369
sparing_pi.flags |= CXL_SET_HARD_SPARING(1);
1370
1371
if (rec)
1372
validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
1373
1374
switch (cxl_sparing_ctx->repair_type) {
1375
case EDAC_REPAIR_CACHELINE_SPARING:
1376
sparing_pi.column = cpu_to_le16(cxl_sparing_ctx->column);
1377
if (!rec || (validity_flags & CXL_DER_VALID_SUB_CHANNEL)) {
1378
sparing_pi.flags |= CXL_SET_SPARING_SUB_CHNL_VALID(1);
1379
sparing_pi.sub_channel = cxl_sparing_ctx->sub_channel;
1380
}
1381
fallthrough;
1382
case EDAC_REPAIR_ROW_SPARING:
1383
put_unaligned_le24(cxl_sparing_ctx->row, sparing_pi.row);
1384
fallthrough;
1385
case EDAC_REPAIR_BANK_SPARING:
1386
sparing_pi.bank_group = cxl_sparing_ctx->bank_group;
1387
sparing_pi.bank = cxl_sparing_ctx->bank;
1388
fallthrough;
1389
case EDAC_REPAIR_RANK_SPARING:
1390
sparing_pi.rank = cxl_sparing_ctx->rank;
1391
fallthrough;
1392
default:
1393
sparing_pi.channel = cxl_sparing_ctx->channel;
1394
if ((rec && (validity_flags & CXL_DER_VALID_NIBBLE)) ||
1395
(!rec && (!cxl_sparing_ctx->nibble_mask ||
1396
(cxl_sparing_ctx->nibble_mask & 0xFFFFFF)))) {
1397
sparing_pi.flags |= CXL_SET_SPARING_NIB_MASK_VALID(1);
1398
put_unaligned_le24(cxl_sparing_ctx->nibble_mask,
1399
sparing_pi.nibble_mask);
1400
}
1401
break;
1402
}
1403
1404
return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
1405
cxl_sparing_ctx->op_class,
1406
cxl_sparing_ctx->op_subclass,
1407
&sparing_pi, sizeof(sparing_pi));
1408
}
1409
1410
static int cxl_mem_sparing_get_repair_type(struct device *dev, void *drv_data,
1411
const char **repair_type)
1412
{
1413
struct cxl_mem_sparing_context *ctx = drv_data;
1414
1415
switch (ctx->repair_type) {
1416
case EDAC_REPAIR_CACHELINE_SPARING:
1417
case EDAC_REPAIR_ROW_SPARING:
1418
case EDAC_REPAIR_BANK_SPARING:
1419
case EDAC_REPAIR_RANK_SPARING:
1420
*repair_type = edac_repair_type[ctx->repair_type];
1421
break;
1422
default:
1423
return -EINVAL;
1424
}
1425
1426
return 0;
1427
}
1428
1429
#define CXL_SPARING_GET_ATTR(attrb, data_type) \
1430
static int cxl_mem_sparing_get_##attrb( \
1431
struct device *dev, void *drv_data, data_type *val) \
1432
{ \
1433
struct cxl_mem_sparing_context *ctx = drv_data; \
1434
\
1435
*val = ctx->attrb; \
1436
\
1437
return 0; \
1438
}
1439
CXL_SPARING_GET_ATTR(persist_mode, bool)
1440
CXL_SPARING_GET_ATTR(dpa, u64)
1441
CXL_SPARING_GET_ATTR(nibble_mask, u32)
1442
CXL_SPARING_GET_ATTR(bank_group, u32)
1443
CXL_SPARING_GET_ATTR(bank, u32)
1444
CXL_SPARING_GET_ATTR(rank, u32)
1445
CXL_SPARING_GET_ATTR(row, u32)
1446
CXL_SPARING_GET_ATTR(column, u32)
1447
CXL_SPARING_GET_ATTR(channel, u32)
1448
CXL_SPARING_GET_ATTR(sub_channel, u32)
1449
1450
#define CXL_SPARING_SET_ATTR(attrb, data_type) \
1451
static int cxl_mem_sparing_set_##attrb(struct device *dev, \
1452
void *drv_data, data_type val) \
1453
{ \
1454
struct cxl_mem_sparing_context *ctx = drv_data; \
1455
\
1456
ctx->attrb = val; \
1457
\
1458
return 0; \
1459
}
1460
CXL_SPARING_SET_ATTR(nibble_mask, u32)
1461
CXL_SPARING_SET_ATTR(bank_group, u32)
1462
CXL_SPARING_SET_ATTR(bank, u32)
1463
CXL_SPARING_SET_ATTR(rank, u32)
1464
CXL_SPARING_SET_ATTR(row, u32)
1465
CXL_SPARING_SET_ATTR(column, u32)
1466
CXL_SPARING_SET_ATTR(channel, u32)
1467
CXL_SPARING_SET_ATTR(sub_channel, u32)
1468
1469
static int cxl_mem_sparing_set_persist_mode(struct device *dev, void *drv_data,
1470
bool persist_mode)
1471
{
1472
struct cxl_mem_sparing_context *ctx = drv_data;
1473
1474
if ((persist_mode && ctx->cap_hard_sparing) ||
1475
(!persist_mode && ctx->cap_soft_sparing))
1476
ctx->persist_mode = persist_mode;
1477
else
1478
return -EOPNOTSUPP;
1479
1480
return 0;
1481
}
1482
1483
static int cxl_get_mem_sparing_safe_when_in_use(struct device *dev,
1484
void *drv_data, bool *safe)
1485
{
1486
struct cxl_mem_sparing_context *ctx = drv_data;
1487
1488
*safe = ctx->cap_safe_when_in_use;
1489
1490
return 0;
1491
}
1492
1493
static int cxl_mem_sparing_get_min_dpa(struct device *dev, void *drv_data,
1494
u64 *min_dpa)
1495
{
1496
struct cxl_mem_sparing_context *ctx = drv_data;
1497
struct cxl_memdev *cxlmd = ctx->cxlmd;
1498
struct cxl_dev_state *cxlds = cxlmd->cxlds;
1499
1500
*min_dpa = cxlds->dpa_res.start;
1501
1502
return 0;
1503
}
1504
1505
static int cxl_mem_sparing_get_max_dpa(struct device *dev, void *drv_data,
1506
u64 *max_dpa)
1507
{
1508
struct cxl_mem_sparing_context *ctx = drv_data;
1509
struct cxl_memdev *cxlmd = ctx->cxlmd;
1510
struct cxl_dev_state *cxlds = cxlmd->cxlds;
1511
1512
*max_dpa = cxlds->dpa_res.end;
1513
1514
return 0;
1515
}
1516
1517
static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa)
1518
{
1519
struct cxl_mem_sparing_context *ctx = drv_data;
1520
struct cxl_memdev *cxlmd = ctx->cxlmd;
1521
struct cxl_dev_state *cxlds = cxlmd->cxlds;
1522
1523
if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa))
1524
return -EINVAL;
1525
1526
ctx->dpa = dpa;
1527
1528
return 0;
1529
}
1530
1531
static int cxl_do_mem_sparing(struct device *dev, void *drv_data, u32 val)
1532
{
1533
struct cxl_mem_sparing_context *ctx = drv_data;
1534
1535
if (val != EDAC_DO_MEM_REPAIR)
1536
return -EINVAL;
1537
1538
return cxl_mem_perform_sparing(dev, ctx);
1539
}
1540
1541
#define RANK_OPS \
1542
.get_repair_type = cxl_mem_sparing_get_repair_type, \
1543
.get_persist_mode = cxl_mem_sparing_get_persist_mode, \
1544
.set_persist_mode = cxl_mem_sparing_set_persist_mode, \
1545
.get_repair_safe_when_in_use = cxl_get_mem_sparing_safe_when_in_use, \
1546
.get_min_dpa = cxl_mem_sparing_get_min_dpa, \
1547
.get_max_dpa = cxl_mem_sparing_get_max_dpa, \
1548
.get_dpa = cxl_mem_sparing_get_dpa, \
1549
.set_dpa = cxl_mem_sparing_set_dpa, \
1550
.get_nibble_mask = cxl_mem_sparing_get_nibble_mask, \
1551
.set_nibble_mask = cxl_mem_sparing_set_nibble_mask, \
1552
.get_rank = cxl_mem_sparing_get_rank, \
1553
.set_rank = cxl_mem_sparing_set_rank, \
1554
.get_channel = cxl_mem_sparing_get_channel, \
1555
.set_channel = cxl_mem_sparing_set_channel, \
1556
.do_repair = cxl_do_mem_sparing
1557
1558
#define BANK_OPS \
1559
RANK_OPS, .get_bank_group = cxl_mem_sparing_get_bank_group, \
1560
.set_bank_group = cxl_mem_sparing_set_bank_group, \
1561
.get_bank = cxl_mem_sparing_get_bank, \
1562
.set_bank = cxl_mem_sparing_set_bank
1563
1564
#define ROW_OPS \
1565
BANK_OPS, .get_row = cxl_mem_sparing_get_row, \
1566
.set_row = cxl_mem_sparing_set_row
1567
1568
#define CACHELINE_OPS \
1569
ROW_OPS, .get_column = cxl_mem_sparing_get_column, \
1570
.set_column = cxl_mem_sparing_set_column, \
1571
.get_sub_channel = cxl_mem_sparing_get_sub_channel, \
1572
.set_sub_channel = cxl_mem_sparing_set_sub_channel
1573
1574
static const struct edac_mem_repair_ops cxl_rank_sparing_ops = {
1575
RANK_OPS,
1576
};
1577
1578
static const struct edac_mem_repair_ops cxl_bank_sparing_ops = {
1579
BANK_OPS,
1580
};
1581
1582
static const struct edac_mem_repair_ops cxl_row_sparing_ops = {
1583
ROW_OPS,
1584
};
1585
1586
static const struct edac_mem_repair_ops cxl_cacheline_sparing_ops = {
1587
CACHELINE_OPS,
1588
};
1589
1590
struct cxl_mem_sparing_desc {
1591
const uuid_t repair_uuid;
1592
enum edac_mem_repair_type repair_type;
1593
const struct edac_mem_repair_ops *repair_ops;
1594
};
1595
1596
static const struct cxl_mem_sparing_desc mem_sparing_desc[] = {
1597
{
1598
.repair_uuid = CXL_FEAT_CACHELINE_SPARING_UUID,
1599
.repair_type = EDAC_REPAIR_CACHELINE_SPARING,
1600
.repair_ops = &cxl_cacheline_sparing_ops,
1601
},
1602
{
1603
.repair_uuid = CXL_FEAT_ROW_SPARING_UUID,
1604
.repair_type = EDAC_REPAIR_ROW_SPARING,
1605
.repair_ops = &cxl_row_sparing_ops,
1606
},
1607
{
1608
.repair_uuid = CXL_FEAT_BANK_SPARING_UUID,
1609
.repair_type = EDAC_REPAIR_BANK_SPARING,
1610
.repair_ops = &cxl_bank_sparing_ops,
1611
},
1612
{
1613
.repair_uuid = CXL_FEAT_RANK_SPARING_UUID,
1614
.repair_type = EDAC_REPAIR_RANK_SPARING,
1615
.repair_ops = &cxl_rank_sparing_ops,
1616
},
1617
};
1618
1619
static int cxl_memdev_sparing_init(struct cxl_memdev *cxlmd,
1620
struct edac_dev_feature *ras_feature,
1621
const struct cxl_mem_sparing_desc *desc,
1622
u8 repair_inst)
1623
{
1624
struct cxl_mem_sparing_context *cxl_sparing_ctx;
1625
struct cxl_feat_entry *feat_entry;
1626
int ret;
1627
1628
feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
1629
&desc->repair_uuid);
1630
if (IS_ERR(feat_entry))
1631
return -EOPNOTSUPP;
1632
1633
if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
1634
return -EOPNOTSUPP;
1635
1636
cxl_sparing_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sparing_ctx),
1637
GFP_KERNEL);
1638
if (!cxl_sparing_ctx)
1639
return -ENOMEM;
1640
1641
*cxl_sparing_ctx = (struct cxl_mem_sparing_context){
1642
.get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
1643
.set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
1644
.get_version = feat_entry->get_feat_ver,
1645
.set_version = feat_entry->set_feat_ver,
1646
.effects = le16_to_cpu(feat_entry->effects),
1647
.cxlmd = cxlmd,
1648
.repair_type = desc->repair_type,
1649
.instance = repair_inst++,
1650
};
1651
uuid_copy(&cxl_sparing_ctx->repair_uuid, &desc->repair_uuid);
1652
1653
ret = cxl_mem_sparing_get_attrbs(cxl_sparing_ctx);
1654
if (ret)
1655
return ret;
1656
1657
if ((cxl_sparing_ctx->cap_soft_sparing &&
1658
cxl_sparing_ctx->cap_hard_sparing) ||
1659
cxl_sparing_ctx->cap_soft_sparing)
1660
cxl_sparing_ctx->persist_mode = 0;
1661
else if (cxl_sparing_ctx->cap_hard_sparing)
1662
cxl_sparing_ctx->persist_mode = 1;
1663
else
1664
return -EOPNOTSUPP;
1665
1666
ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
1667
ras_feature->instance = cxl_sparing_ctx->instance;
1668
ras_feature->mem_repair_ops = desc->repair_ops;
1669
ras_feature->ctx = cxl_sparing_ctx;
1670
1671
return 0;
1672
}
1673
1674
/*
1675
* CXL memory soft PPR & hard PPR control
1676
*/
1677
struct cxl_ppr_context {
1678
uuid_t repair_uuid;
1679
u8 instance;
1680
u16 get_feat_size;
1681
u16 set_feat_size;
1682
u8 get_version;
1683
u8 set_version;
1684
u16 effects;
1685
u8 op_class;
1686
u8 op_subclass;
1687
bool cap_dpa;
1688
bool cap_nib_mask;
1689
bool media_accessible;
1690
bool data_retained;
1691
struct cxl_memdev *cxlmd;
1692
enum edac_mem_repair_type repair_type;
1693
bool persist_mode;
1694
u64 dpa;
1695
u32 nibble_mask;
1696
};
1697
1698
/*
1699
* See CXL rev 3.2 @8.2.10.7.2.1 Table 8-128 sPPR Feature Readable Attributes
1700
*
1701
* See CXL rev 3.2 @8.2.10.7.2.2 Table 8-131 hPPR Feature Readable Attributes
1702
*/
1703
1704
#define CXL_PPR_OP_CAP_DEVICE_INITIATED BIT(0)
1705
#define CXL_PPR_OP_MODE_DEV_INITIATED BIT(0)
1706
1707
#define CXL_PPR_FLAG_DPA_SUPPORT_MASK BIT(0)
1708
#define CXL_PPR_FLAG_NIB_SUPPORT_MASK BIT(1)
1709
#define CXL_PPR_FLAG_MEM_SPARING_EV_REC_SUPPORT_MASK BIT(2)
1710
#define CXL_PPR_FLAG_DEV_INITED_PPR_AT_BOOT_CAP_MASK BIT(3)
1711
1712
#define CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK BIT(0)
1713
#define CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK BIT(2)
1714
1715
#define CXL_PPR_SPARING_EV_REC_EN_MASK BIT(0)
1716
#define CXL_PPR_DEV_INITED_PPR_AT_BOOT_EN_MASK BIT(1)
1717
1718
#define CXL_PPR_GET_CAP_DPA(flags) \
1719
FIELD_GET(CXL_PPR_FLAG_DPA_SUPPORT_MASK, flags)
1720
#define CXL_PPR_GET_CAP_NIB_MASK(flags) \
1721
FIELD_GET(CXL_PPR_FLAG_NIB_SUPPORT_MASK, flags)
1722
#define CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags) \
1723
(FIELD_GET(CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK, \
1724
restriction_flags) ^ 1)
1725
#define CXL_PPR_GET_DATA_RETAINED(restriction_flags) \
1726
(FIELD_GET(CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK, \
1727
restriction_flags) ^ 1)
1728
1729
struct cxl_memdev_ppr_rd_attrbs {
1730
struct cxl_memdev_repair_rd_attrbs_hdr hdr;
1731
u8 ppr_flags;
1732
__le16 restriction_flags;
1733
u8 ppr_op_mode;
1734
} __packed;
1735
1736
/*
1737
* See CXL rev 3.2 @8.2.10.7.1.2 Table 8-118 sPPR Maintenance Input Payload
1738
*
1739
* See CXL rev 3.2 @8.2.10.7.1.3 Table 8-119 hPPR Maintenance Input Payload
1740
*/
1741
struct cxl_memdev_ppr_maintenance_attrbs {
1742
u8 flags;
1743
__le64 dpa;
1744
u8 nibble_mask[3];
1745
} __packed;
1746
1747
static int cxl_mem_ppr_get_attrbs(struct cxl_ppr_context *cxl_ppr_ctx)
1748
{
1749
size_t rd_data_size = sizeof(struct cxl_memdev_ppr_rd_attrbs);
1750
struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1751
struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
1752
u16 restriction_flags;
1753
size_t data_size;
1754
u16 return_code;
1755
1756
struct cxl_memdev_ppr_rd_attrbs *rd_attrbs __free(kfree) =
1757
kmalloc(rd_data_size, GFP_KERNEL);
1758
if (!rd_attrbs)
1759
return -ENOMEM;
1760
1761
data_size = cxl_get_feature(cxl_mbox, &cxl_ppr_ctx->repair_uuid,
1762
CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
1763
rd_data_size, 0, &return_code);
1764
if (!data_size)
1765
return -EIO;
1766
1767
cxl_ppr_ctx->op_class = rd_attrbs->hdr.op_class;
1768
cxl_ppr_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
1769
cxl_ppr_ctx->cap_dpa = CXL_PPR_GET_CAP_DPA(rd_attrbs->ppr_flags);
1770
cxl_ppr_ctx->cap_nib_mask =
1771
CXL_PPR_GET_CAP_NIB_MASK(rd_attrbs->ppr_flags);
1772
1773
restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
1774
cxl_ppr_ctx->media_accessible =
1775
CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags);
1776
cxl_ppr_ctx->data_retained =
1777
CXL_PPR_GET_DATA_RETAINED(restriction_flags);
1778
1779
return 0;
1780
}
1781
1782
static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx)
1783
{
1784
struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs;
1785
struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1786
struct cxl_mem_repair_attrbs attrbs = { 0 };
1787
int ret;
1788
1789
ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
1790
if ((ret = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
1791
return ret;
1792
1793
ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
1794
if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
1795
return ret;
1796
1797
if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) {
1798
/* Memory to repair must be offline */
1799
if (cxl_is_memdev_memory_online(cxlmd))
1800
return -EBUSY;
1801
} else {
1802
if (cxl_is_memdev_memory_online(cxlmd)) {
1803
/* Check memory to repair is from the current boot */
1804
attrbs.repair_type = CXL_PPR;
1805
attrbs.dpa = cxl_ppr_ctx->dpa;
1806
attrbs.nibble_mask = cxl_ppr_ctx->nibble_mask;
1807
if (!cxl_find_rec_dram(cxlmd, &attrbs) &&
1808
!cxl_find_rec_gen_media(cxlmd, &attrbs))
1809
return -EINVAL;
1810
}
1811
}
1812
1813
memset(&maintenance_attrbs, 0, sizeof(maintenance_attrbs));
1814
maintenance_attrbs.flags = 0;
1815
maintenance_attrbs.dpa = cpu_to_le64(cxl_ppr_ctx->dpa);
1816
put_unaligned_le24(cxl_ppr_ctx->nibble_mask,
1817
maintenance_attrbs.nibble_mask);
1818
1819
return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
1820
cxl_ppr_ctx->op_class,
1821
cxl_ppr_ctx->op_subclass,
1822
&maintenance_attrbs,
1823
sizeof(maintenance_attrbs));
1824
}
1825
1826
static int cxl_ppr_get_repair_type(struct device *dev, void *drv_data,
1827
const char **repair_type)
1828
{
1829
*repair_type = edac_repair_type[EDAC_REPAIR_PPR];
1830
1831
return 0;
1832
}
1833
1834
static int cxl_ppr_get_persist_mode(struct device *dev, void *drv_data,
1835
bool *persist_mode)
1836
{
1837
struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1838
1839
*persist_mode = cxl_ppr_ctx->persist_mode;
1840
1841
return 0;
1842
}
1843
1844
static int cxl_get_ppr_safe_when_in_use(struct device *dev, void *drv_data,
1845
bool *safe)
1846
{
1847
struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1848
1849
*safe = cxl_ppr_ctx->media_accessible & cxl_ppr_ctx->data_retained;
1850
1851
return 0;
1852
}
1853
1854
static int cxl_ppr_get_min_dpa(struct device *dev, void *drv_data, u64 *min_dpa)
1855
{
1856
struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1857
struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1858
struct cxl_dev_state *cxlds = cxlmd->cxlds;
1859
1860
*min_dpa = cxlds->dpa_res.start;
1861
1862
return 0;
1863
}
1864
1865
static int cxl_ppr_get_max_dpa(struct device *dev, void *drv_data, u64 *max_dpa)
1866
{
1867
struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1868
struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1869
struct cxl_dev_state *cxlds = cxlmd->cxlds;
1870
1871
*max_dpa = cxlds->dpa_res.end;
1872
1873
return 0;
1874
}
1875
1876
static int cxl_ppr_get_dpa(struct device *dev, void *drv_data, u64 *dpa)
1877
{
1878
struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1879
1880
*dpa = cxl_ppr_ctx->dpa;
1881
1882
return 0;
1883
}
1884
1885
static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa)
1886
{
1887
struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1888
struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1889
struct cxl_dev_state *cxlds = cxlmd->cxlds;
1890
1891
if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa))
1892
return -EINVAL;
1893
1894
cxl_ppr_ctx->dpa = dpa;
1895
1896
return 0;
1897
}
1898
1899
static int cxl_ppr_get_nibble_mask(struct device *dev, void *drv_data,
1900
u32 *nibble_mask)
1901
{
1902
struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1903
1904
*nibble_mask = cxl_ppr_ctx->nibble_mask;
1905
1906
return 0;
1907
}
1908
1909
static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data,
1910
u32 nibble_mask)
1911
{
1912
struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1913
1914
cxl_ppr_ctx->nibble_mask = nibble_mask;
1915
1916
return 0;
1917
}
1918
1919
static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val)
1920
{
1921
struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1922
struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1923
struct cxl_dev_state *cxlds = cxlmd->cxlds;
1924
1925
if (val != EDAC_DO_MEM_REPAIR ||
1926
!cxl_resource_contains_addr(&cxlds->dpa_res, cxl_ppr_ctx->dpa))
1927
return -EINVAL;
1928
1929
return cxl_mem_perform_ppr(cxl_ppr_ctx);
1930
}
1931
1932
static const struct edac_mem_repair_ops cxl_sppr_ops = {
1933
.get_repair_type = cxl_ppr_get_repair_type,
1934
.get_persist_mode = cxl_ppr_get_persist_mode,
1935
.get_repair_safe_when_in_use = cxl_get_ppr_safe_when_in_use,
1936
.get_min_dpa = cxl_ppr_get_min_dpa,
1937
.get_max_dpa = cxl_ppr_get_max_dpa,
1938
.get_dpa = cxl_ppr_get_dpa,
1939
.set_dpa = cxl_ppr_set_dpa,
1940
.get_nibble_mask = cxl_ppr_get_nibble_mask,
1941
.set_nibble_mask = cxl_ppr_set_nibble_mask,
1942
.do_repair = cxl_do_ppr,
1943
};
1944
1945
static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd,
1946
struct edac_dev_feature *ras_feature,
1947
u8 repair_inst)
1948
{
1949
struct cxl_ppr_context *cxl_sppr_ctx;
1950
struct cxl_feat_entry *feat_entry;
1951
int ret;
1952
1953
feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
1954
&CXL_FEAT_SPPR_UUID);
1955
if (IS_ERR(feat_entry))
1956
return -EOPNOTSUPP;
1957
1958
if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
1959
return -EOPNOTSUPP;
1960
1961
cxl_sppr_ctx =
1962
devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sppr_ctx), GFP_KERNEL);
1963
if (!cxl_sppr_ctx)
1964
return -ENOMEM;
1965
1966
*cxl_sppr_ctx = (struct cxl_ppr_context){
1967
.get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
1968
.set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
1969
.get_version = feat_entry->get_feat_ver,
1970
.set_version = feat_entry->set_feat_ver,
1971
.effects = le16_to_cpu(feat_entry->effects),
1972
.cxlmd = cxlmd,
1973
.repair_type = EDAC_REPAIR_PPR,
1974
.persist_mode = 0,
1975
.instance = repair_inst,
1976
};
1977
uuid_copy(&cxl_sppr_ctx->repair_uuid, &CXL_FEAT_SPPR_UUID);
1978
1979
ret = cxl_mem_ppr_get_attrbs(cxl_sppr_ctx);
1980
if (ret)
1981
return ret;
1982
1983
ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
1984
ras_feature->instance = cxl_sppr_ctx->instance;
1985
ras_feature->mem_repair_ops = &cxl_sppr_ops;
1986
ras_feature->ctx = cxl_sppr_ctx;
1987
1988
return 0;
1989
}
1990
1991
int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
1992
{
1993
struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
1994
int num_ras_features = 0;
1995
u8 repair_inst = 0;
1996
int rc;
1997
1998
if (IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) {
1999
rc = cxl_memdev_scrub_init(cxlmd, &ras_features[num_ras_features], 0);
2000
if (rc < 0 && rc != -EOPNOTSUPP)
2001
return rc;
2002
2003
if (rc != -EOPNOTSUPP)
2004
num_ras_features++;
2005
}
2006
2007
if (IS_ENABLED(CONFIG_CXL_EDAC_ECS)) {
2008
rc = cxl_memdev_ecs_init(cxlmd, &ras_features[num_ras_features]);
2009
if (rc < 0 && rc != -EOPNOTSUPP)
2010
return rc;
2011
2012
if (rc != -EOPNOTSUPP)
2013
num_ras_features++;
2014
}
2015
2016
if (IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR)) {
2017
for (int i = 0; i < CXL_MEM_SPARING_MAX; i++) {
2018
rc = cxl_memdev_sparing_init(cxlmd,
2019
&ras_features[num_ras_features],
2020
&mem_sparing_desc[i], repair_inst);
2021
if (rc == -EOPNOTSUPP)
2022
continue;
2023
if (rc < 0)
2024
return rc;
2025
2026
repair_inst++;
2027
num_ras_features++;
2028
}
2029
2030
rc = cxl_memdev_soft_ppr_init(cxlmd, &ras_features[num_ras_features],
2031
repair_inst);
2032
if (rc < 0 && rc != -EOPNOTSUPP)
2033
return rc;
2034
2035
if (rc != -EOPNOTSUPP) {
2036
repair_inst++;
2037
num_ras_features++;
2038
}
2039
2040
if (repair_inst) {
2041
struct cxl_mem_err_rec *array_rec =
2042
devm_kzalloc(&cxlmd->dev, sizeof(*array_rec),
2043
GFP_KERNEL);
2044
if (!array_rec)
2045
return -ENOMEM;
2046
2047
xa_init(&array_rec->rec_gen_media);
2048
xa_init(&array_rec->rec_dram);
2049
cxlmd->err_rec_array = array_rec;
2050
}
2051
}
2052
2053
if (!num_ras_features)
2054
return -EINVAL;
2055
2056
char *cxl_dev_name __free(kfree) =
2057
kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlmd->dev));
2058
if (!cxl_dev_name)
2059
return -ENOMEM;
2060
2061
return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL,
2062
num_ras_features, ras_features);
2063
}
2064
EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_register, "CXL");
2065
2066
int devm_cxl_region_edac_register(struct cxl_region *cxlr)
2067
{
2068
struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
2069
int num_ras_features = 0;
2070
int rc;
2071
2072
if (!IS_ENABLED(CONFIG_CXL_EDAC_SCRUB))
2073
return 0;
2074
2075
rc = cxl_region_scrub_init(cxlr, &ras_features[num_ras_features], 0);
2076
if (rc < 0)
2077
return rc;
2078
2079
num_ras_features++;
2080
2081
char *cxl_dev_name __free(kfree) =
2082
kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlr->dev));
2083
if (!cxl_dev_name)
2084
return -ENOMEM;
2085
2086
return edac_dev_register(&cxlr->dev, cxl_dev_name, NULL,
2087
num_ras_features, ras_features);
2088
}
2089
EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL");
2090
2091
void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
2092
{
2093
struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
2094
struct cxl_event_gen_media *rec_gen_media;
2095
struct cxl_event_dram *rec_dram;
2096
unsigned long index;
2097
2098
if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
2099
return;
2100
2101
xa_for_each(&array_rec->rec_dram, index, rec_dram)
2102
kfree(rec_dram);
2103
xa_destroy(&array_rec->rec_dram);
2104
2105
xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media)
2106
kfree(rec_gen_media);
2107
xa_destroy(&array_rec->rec_gen_media);
2108
}
2109
EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL");
2110
2111