Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/cxl/core/region.c
26282 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3
#include <linux/memregion.h>
4
#include <linux/genalloc.h>
5
#include <linux/device.h>
6
#include <linux/module.h>
7
#include <linux/memory.h>
8
#include <linux/slab.h>
9
#include <linux/uuid.h>
10
#include <linux/sort.h>
11
#include <linux/idr.h>
12
#include <linux/memory-tiers.h>
13
#include <cxlmem.h>
14
#include <cxl.h>
15
#include "core.h"
16
17
/**
18
* DOC: cxl core region
19
*
20
* CXL Regions represent mapped memory capacity in system physical address
21
* space. Whereas the CXL Root Decoders identify the bounds of potential CXL
22
* Memory ranges, Regions represent the active mapped capacity by the HDM
23
* Decoder Capability structures throughout the Host Bridges, Switches, and
24
* Endpoints in the topology.
25
*
26
* Region configuration has ordering constraints. UUID may be set at any time
27
* but is only visible for persistent regions.
28
* 1. Interleave granularity
29
* 2. Interleave size
30
* 3. Decoder targets
31
*/
32
33
static struct cxl_region *to_cxl_region(struct device *dev);
34
35
#define __ACCESS_ATTR_RO(_level, _name) { \
36
.attr = { .name = __stringify(_name), .mode = 0444 }, \
37
.show = _name##_access##_level##_show, \
38
}
39
40
#define ACCESS_DEVICE_ATTR_RO(level, name) \
41
struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name)
42
43
#define ACCESS_ATTR_RO(level, attrib) \
44
static ssize_t attrib##_access##level##_show(struct device *dev, \
45
struct device_attribute *attr, \
46
char *buf) \
47
{ \
48
struct cxl_region *cxlr = to_cxl_region(dev); \
49
\
50
if (cxlr->coord[level].attrib == 0) \
51
return -ENOENT; \
52
\
53
return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib); \
54
} \
55
static ACCESS_DEVICE_ATTR_RO(level, attrib)
56
57
ACCESS_ATTR_RO(0, read_bandwidth);
58
ACCESS_ATTR_RO(0, read_latency);
59
ACCESS_ATTR_RO(0, write_bandwidth);
60
ACCESS_ATTR_RO(0, write_latency);
61
62
#define ACCESS_ATTR_DECLARE(level, attrib) \
63
(&dev_attr_access##level##_##attrib.attr)
64
65
static struct attribute *access0_coordinate_attrs[] = {
66
ACCESS_ATTR_DECLARE(0, read_bandwidth),
67
ACCESS_ATTR_DECLARE(0, write_bandwidth),
68
ACCESS_ATTR_DECLARE(0, read_latency),
69
ACCESS_ATTR_DECLARE(0, write_latency),
70
NULL
71
};
72
73
ACCESS_ATTR_RO(1, read_bandwidth);
74
ACCESS_ATTR_RO(1, read_latency);
75
ACCESS_ATTR_RO(1, write_bandwidth);
76
ACCESS_ATTR_RO(1, write_latency);
77
78
static struct attribute *access1_coordinate_attrs[] = {
79
ACCESS_ATTR_DECLARE(1, read_bandwidth),
80
ACCESS_ATTR_DECLARE(1, write_bandwidth),
81
ACCESS_ATTR_DECLARE(1, read_latency),
82
ACCESS_ATTR_DECLARE(1, write_latency),
83
NULL
84
};
85
86
#define ACCESS_VISIBLE(level) \
87
static umode_t cxl_region_access##level##_coordinate_visible( \
88
struct kobject *kobj, struct attribute *a, int n) \
89
{ \
90
struct device *dev = kobj_to_dev(kobj); \
91
struct cxl_region *cxlr = to_cxl_region(dev); \
92
\
93
if (a == &dev_attr_access##level##_read_latency.attr && \
94
cxlr->coord[level].read_latency == 0) \
95
return 0; \
96
\
97
if (a == &dev_attr_access##level##_write_latency.attr && \
98
cxlr->coord[level].write_latency == 0) \
99
return 0; \
100
\
101
if (a == &dev_attr_access##level##_read_bandwidth.attr && \
102
cxlr->coord[level].read_bandwidth == 0) \
103
return 0; \
104
\
105
if (a == &dev_attr_access##level##_write_bandwidth.attr && \
106
cxlr->coord[level].write_bandwidth == 0) \
107
return 0; \
108
\
109
return a->mode; \
110
}
111
112
ACCESS_VISIBLE(0);
113
ACCESS_VISIBLE(1);
114
115
static const struct attribute_group cxl_region_access0_coordinate_group = {
116
.name = "access0",
117
.attrs = access0_coordinate_attrs,
118
.is_visible = cxl_region_access0_coordinate_visible,
119
};
120
121
static const struct attribute_group *get_cxl_region_access0_group(void)
122
{
123
return &cxl_region_access0_coordinate_group;
124
}
125
126
static const struct attribute_group cxl_region_access1_coordinate_group = {
127
.name = "access1",
128
.attrs = access1_coordinate_attrs,
129
.is_visible = cxl_region_access1_coordinate_visible,
130
};
131
132
static const struct attribute_group *get_cxl_region_access1_group(void)
133
{
134
return &cxl_region_access1_coordinate_group;
135
}
136
137
static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
138
char *buf)
139
{
140
struct cxl_region *cxlr = to_cxl_region(dev);
141
struct cxl_region_params *p = &cxlr->params;
142
ssize_t rc;
143
144
ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
145
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
146
return rc;
147
if (cxlr->mode != CXL_PARTMODE_PMEM)
148
return sysfs_emit(buf, "\n");
149
return sysfs_emit(buf, "%pUb\n", &p->uuid);
150
}
151
152
static int is_dup(struct device *match, void *data)
153
{
154
struct cxl_region_params *p;
155
struct cxl_region *cxlr;
156
uuid_t *uuid = data;
157
158
if (!is_cxl_region(match))
159
return 0;
160
161
lockdep_assert_held(&cxl_rwsem.region);
162
cxlr = to_cxl_region(match);
163
p = &cxlr->params;
164
165
if (uuid_equal(&p->uuid, uuid)) {
166
dev_dbg(match, "already has uuid: %pUb\n", uuid);
167
return -EBUSY;
168
}
169
170
return 0;
171
}
172
173
static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
174
const char *buf, size_t len)
175
{
176
struct cxl_region *cxlr = to_cxl_region(dev);
177
struct cxl_region_params *p = &cxlr->params;
178
uuid_t temp;
179
ssize_t rc;
180
181
if (len != UUID_STRING_LEN + 1)
182
return -EINVAL;
183
184
rc = uuid_parse(buf, &temp);
185
if (rc)
186
return rc;
187
188
if (uuid_is_null(&temp))
189
return -EINVAL;
190
191
ACQUIRE(rwsem_write_kill, region_rwsem)(&cxl_rwsem.region);
192
if ((rc = ACQUIRE_ERR(rwsem_write_kill, &region_rwsem)))
193
return rc;
194
195
if (uuid_equal(&p->uuid, &temp))
196
return len;
197
198
if (p->state >= CXL_CONFIG_ACTIVE)
199
return -EBUSY;
200
201
rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
202
if (rc < 0)
203
return rc;
204
205
uuid_copy(&p->uuid, &temp);
206
207
return len;
208
}
209
static DEVICE_ATTR_RW(uuid);
210
211
static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
212
struct cxl_region *cxlr)
213
{
214
return xa_load(&port->regions, (unsigned long)cxlr);
215
}
216
217
static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
218
{
219
if (!cpu_cache_has_invalidate_memregion()) {
220
if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
221
dev_info_once(
222
&cxlr->dev,
223
"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
224
return 0;
225
}
226
dev_WARN(&cxlr->dev,
227
"Failed to synchronize CPU cache state\n");
228
return -ENXIO;
229
}
230
231
cpu_cache_invalidate_memregion(IORES_DESC_CXL);
232
return 0;
233
}
234
235
static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
236
{
237
struct cxl_region_params *p = &cxlr->params;
238
int i;
239
240
/*
241
* Before region teardown attempt to flush, evict any data cached for
242
* this region, or scream loudly about missing arch / platform support
243
* for CXL teardown.
244
*/
245
cxl_region_invalidate_memregion(cxlr);
246
247
for (i = count - 1; i >= 0; i--) {
248
struct cxl_endpoint_decoder *cxled = p->targets[i];
249
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
250
struct cxl_port *iter = cxled_to_port(cxled);
251
struct cxl_dev_state *cxlds = cxlmd->cxlds;
252
struct cxl_ep *ep;
253
254
if (cxlds->rcd)
255
goto endpoint_reset;
256
257
while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
258
iter = to_cxl_port(iter->dev.parent);
259
260
for (ep = cxl_ep_load(iter, cxlmd); iter;
261
iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
262
struct cxl_region_ref *cxl_rr;
263
struct cxl_decoder *cxld;
264
265
cxl_rr = cxl_rr_load(iter, cxlr);
266
cxld = cxl_rr->decoder;
267
if (cxld->reset)
268
cxld->reset(cxld);
269
set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
270
}
271
272
endpoint_reset:
273
cxled->cxld.reset(&cxled->cxld);
274
set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
275
}
276
277
/* all decoders associated with this region have been torn down */
278
clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
279
}
280
281
static int commit_decoder(struct cxl_decoder *cxld)
282
{
283
struct cxl_switch_decoder *cxlsd = NULL;
284
285
if (cxld->commit)
286
return cxld->commit(cxld);
287
288
if (is_switch_decoder(&cxld->dev))
289
cxlsd = to_cxl_switch_decoder(&cxld->dev);
290
291
if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
292
"->commit() is required\n"))
293
return -ENXIO;
294
return 0;
295
}
296
297
static int cxl_region_decode_commit(struct cxl_region *cxlr)
298
{
299
struct cxl_region_params *p = &cxlr->params;
300
int i, rc = 0;
301
302
for (i = 0; i < p->nr_targets; i++) {
303
struct cxl_endpoint_decoder *cxled = p->targets[i];
304
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
305
struct cxl_region_ref *cxl_rr;
306
struct cxl_decoder *cxld;
307
struct cxl_port *iter;
308
struct cxl_ep *ep;
309
310
/* commit bottom up */
311
for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
312
iter = to_cxl_port(iter->dev.parent)) {
313
cxl_rr = cxl_rr_load(iter, cxlr);
314
cxld = cxl_rr->decoder;
315
rc = commit_decoder(cxld);
316
if (rc)
317
break;
318
}
319
320
if (rc) {
321
/* programming @iter failed, teardown */
322
for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
323
iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
324
cxl_rr = cxl_rr_load(iter, cxlr);
325
cxld = cxl_rr->decoder;
326
if (cxld->reset)
327
cxld->reset(cxld);
328
}
329
330
cxled->cxld.reset(&cxled->cxld);
331
goto err;
332
}
333
}
334
335
return 0;
336
337
err:
338
/* undo the targets that were successfully committed */
339
cxl_region_decode_reset(cxlr, i);
340
return rc;
341
}
342
343
static int queue_reset(struct cxl_region *cxlr)
344
{
345
struct cxl_region_params *p = &cxlr->params;
346
int rc;
347
348
ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
349
if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
350
return rc;
351
352
/* Already in the requested state? */
353
if (p->state < CXL_CONFIG_COMMIT)
354
return 0;
355
356
p->state = CXL_CONFIG_RESET_PENDING;
357
358
return 0;
359
}
360
361
static int __commit(struct cxl_region *cxlr)
362
{
363
struct cxl_region_params *p = &cxlr->params;
364
int rc;
365
366
ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
367
if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
368
return rc;
369
370
/* Already in the requested state? */
371
if (p->state >= CXL_CONFIG_COMMIT)
372
return 0;
373
374
/* Not ready to commit? */
375
if (p->state < CXL_CONFIG_ACTIVE)
376
return -ENXIO;
377
378
/*
379
* Invalidate caches before region setup to drop any speculative
380
* consumption of this address space
381
*/
382
rc = cxl_region_invalidate_memregion(cxlr);
383
if (rc)
384
return rc;
385
386
rc = cxl_region_decode_commit(cxlr);
387
if (rc)
388
return rc;
389
390
p->state = CXL_CONFIG_COMMIT;
391
392
return 0;
393
}
394
395
static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
396
const char *buf, size_t len)
397
{
398
struct cxl_region *cxlr = to_cxl_region(dev);
399
struct cxl_region_params *p = &cxlr->params;
400
bool commit;
401
ssize_t rc;
402
403
rc = kstrtobool(buf, &commit);
404
if (rc)
405
return rc;
406
407
if (commit) {
408
rc = __commit(cxlr);
409
if (rc)
410
return rc;
411
return len;
412
}
413
414
rc = queue_reset(cxlr);
415
if (rc)
416
return rc;
417
418
/*
419
* Unmap the region and depend the reset-pending state to ensure
420
* it does not go active again until post reset
421
*/
422
device_release_driver(&cxlr->dev);
423
424
/*
425
* With the reset pending take cxl_rwsem.region unconditionally
426
* to ensure the reset gets handled before returning.
427
*/
428
guard(rwsem_write)(&cxl_rwsem.region);
429
430
/*
431
* Revalidate that the reset is still pending in case another
432
* thread already handled this reset.
433
*/
434
if (p->state == CXL_CONFIG_RESET_PENDING) {
435
cxl_region_decode_reset(cxlr, p->interleave_ways);
436
p->state = CXL_CONFIG_ACTIVE;
437
}
438
439
return len;
440
}
441
442
static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
443
char *buf)
444
{
445
struct cxl_region *cxlr = to_cxl_region(dev);
446
struct cxl_region_params *p = &cxlr->params;
447
ssize_t rc;
448
449
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
450
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
451
return rc;
452
return sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
453
}
454
static DEVICE_ATTR_RW(commit);
455
456
static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
457
int n)
458
{
459
struct device *dev = kobj_to_dev(kobj);
460
struct cxl_region *cxlr = to_cxl_region(dev);
461
462
/*
463
* Support tooling that expects to find a 'uuid' attribute for all
464
* regions regardless of mode.
465
*/
466
if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM)
467
return 0444;
468
return a->mode;
469
}
470
471
static ssize_t interleave_ways_show(struct device *dev,
472
struct device_attribute *attr, char *buf)
473
{
474
struct cxl_region *cxlr = to_cxl_region(dev);
475
struct cxl_region_params *p = &cxlr->params;
476
int rc;
477
478
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
479
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
480
return rc;
481
return sysfs_emit(buf, "%d\n", p->interleave_ways);
482
}
483
484
static const struct attribute_group *get_cxl_region_target_group(void);
485
486
static ssize_t interleave_ways_store(struct device *dev,
487
struct device_attribute *attr,
488
const char *buf, size_t len)
489
{
490
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
491
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
492
struct cxl_region *cxlr = to_cxl_region(dev);
493
struct cxl_region_params *p = &cxlr->params;
494
unsigned int val, save;
495
int rc;
496
u8 iw;
497
498
rc = kstrtouint(buf, 0, &val);
499
if (rc)
500
return rc;
501
502
rc = ways_to_eiw(val, &iw);
503
if (rc)
504
return rc;
505
506
/*
507
* Even for x3, x6, and x12 interleaves the region interleave must be a
508
* power of 2 multiple of the host bridge interleave.
509
*/
510
if (!is_power_of_2(val / cxld->interleave_ways) ||
511
(val % cxld->interleave_ways)) {
512
dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
513
return -EINVAL;
514
}
515
516
ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
517
if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
518
return rc;
519
520
if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
521
return -EBUSY;
522
523
save = p->interleave_ways;
524
p->interleave_ways = val;
525
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
526
if (rc) {
527
p->interleave_ways = save;
528
return rc;
529
}
530
531
return len;
532
}
533
static DEVICE_ATTR_RW(interleave_ways);
534
535
static ssize_t interleave_granularity_show(struct device *dev,
536
struct device_attribute *attr,
537
char *buf)
538
{
539
struct cxl_region *cxlr = to_cxl_region(dev);
540
struct cxl_region_params *p = &cxlr->params;
541
int rc;
542
543
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
544
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
545
return rc;
546
return sysfs_emit(buf, "%d\n", p->interleave_granularity);
547
}
548
549
static ssize_t interleave_granularity_store(struct device *dev,
550
struct device_attribute *attr,
551
const char *buf, size_t len)
552
{
553
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
554
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
555
struct cxl_region *cxlr = to_cxl_region(dev);
556
struct cxl_region_params *p = &cxlr->params;
557
int rc, val;
558
u16 ig;
559
560
rc = kstrtoint(buf, 0, &val);
561
if (rc)
562
return rc;
563
564
rc = granularity_to_eig(val, &ig);
565
if (rc)
566
return rc;
567
568
/*
569
* When the host-bridge is interleaved, disallow region granularity !=
570
* root granularity. Regions with a granularity less than the root
571
* interleave result in needing multiple endpoints to support a single
572
* slot in the interleave (possible to support in the future). Regions
573
* with a granularity greater than the root interleave result in invalid
574
* DPA translations (invalid to support).
575
*/
576
if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
577
return -EINVAL;
578
579
ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
580
if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
581
return rc;
582
583
if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
584
return -EBUSY;
585
586
p->interleave_granularity = val;
587
588
return len;
589
}
590
static DEVICE_ATTR_RW(interleave_granularity);
591
592
static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
593
char *buf)
594
{
595
struct cxl_region *cxlr = to_cxl_region(dev);
596
struct cxl_region_params *p = &cxlr->params;
597
u64 resource = -1ULL;
598
int rc;
599
600
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
601
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
602
return rc;
603
604
if (p->res)
605
resource = p->res->start;
606
return sysfs_emit(buf, "%#llx\n", resource);
607
}
608
static DEVICE_ATTR_RO(resource);
609
610
static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
611
char *buf)
612
{
613
struct cxl_region *cxlr = to_cxl_region(dev);
614
const char *desc;
615
616
if (cxlr->mode == CXL_PARTMODE_RAM)
617
desc = "ram";
618
else if (cxlr->mode == CXL_PARTMODE_PMEM)
619
desc = "pmem";
620
else
621
desc = "";
622
623
return sysfs_emit(buf, "%s\n", desc);
624
}
625
static DEVICE_ATTR_RO(mode);
626
627
static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
628
{
629
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
630
struct cxl_region_params *p = &cxlr->params;
631
struct resource *res;
632
u64 remainder = 0;
633
634
lockdep_assert_held_write(&cxl_rwsem.region);
635
636
/* Nothing to do... */
637
if (p->res && resource_size(p->res) == size)
638
return 0;
639
640
/* To change size the old size must be freed first */
641
if (p->res)
642
return -EBUSY;
643
644
if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
645
return -EBUSY;
646
647
/* ways, granularity and uuid (if PMEM) need to be set before HPA */
648
if (!p->interleave_ways || !p->interleave_granularity ||
649
(cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid)))
650
return -ENXIO;
651
652
div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
653
if (remainder)
654
return -EINVAL;
655
656
res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
657
dev_name(&cxlr->dev));
658
if (IS_ERR(res)) {
659
dev_dbg(&cxlr->dev,
660
"HPA allocation error (%ld) for size:%pap in %s %pr\n",
661
PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res);
662
return PTR_ERR(res);
663
}
664
665
p->res = res;
666
p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
667
668
return 0;
669
}
670
671
static void cxl_region_iomem_release(struct cxl_region *cxlr)
672
{
673
struct cxl_region_params *p = &cxlr->params;
674
675
if (device_is_registered(&cxlr->dev))
676
lockdep_assert_held_write(&cxl_rwsem.region);
677
if (p->res) {
678
/*
679
* Autodiscovered regions may not have been able to insert their
680
* resource.
681
*/
682
if (p->res->parent)
683
remove_resource(p->res);
684
kfree(p->res);
685
p->res = NULL;
686
}
687
}
688
689
static int free_hpa(struct cxl_region *cxlr)
690
{
691
struct cxl_region_params *p = &cxlr->params;
692
693
lockdep_assert_held_write(&cxl_rwsem.region);
694
695
if (!p->res)
696
return 0;
697
698
if (p->state >= CXL_CONFIG_ACTIVE)
699
return -EBUSY;
700
701
cxl_region_iomem_release(cxlr);
702
p->state = CXL_CONFIG_IDLE;
703
return 0;
704
}
705
706
static ssize_t size_store(struct device *dev, struct device_attribute *attr,
707
const char *buf, size_t len)
708
{
709
struct cxl_region *cxlr = to_cxl_region(dev);
710
u64 val;
711
int rc;
712
713
rc = kstrtou64(buf, 0, &val);
714
if (rc)
715
return rc;
716
717
ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
718
if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
719
return rc;
720
721
if (val)
722
rc = alloc_hpa(cxlr, val);
723
else
724
rc = free_hpa(cxlr);
725
726
if (rc)
727
return rc;
728
729
return len;
730
}
731
732
static ssize_t size_show(struct device *dev, struct device_attribute *attr,
733
char *buf)
734
{
735
struct cxl_region *cxlr = to_cxl_region(dev);
736
struct cxl_region_params *p = &cxlr->params;
737
u64 size = 0;
738
ssize_t rc;
739
740
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
741
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
742
return rc;
743
if (p->res)
744
size = resource_size(p->res);
745
return sysfs_emit(buf, "%#llx\n", size);
746
}
747
static DEVICE_ATTR_RW(size);
748
749
static struct attribute *cxl_region_attrs[] = {
750
&dev_attr_uuid.attr,
751
&dev_attr_commit.attr,
752
&dev_attr_interleave_ways.attr,
753
&dev_attr_interleave_granularity.attr,
754
&dev_attr_resource.attr,
755
&dev_attr_size.attr,
756
&dev_attr_mode.attr,
757
NULL,
758
};
759
760
static const struct attribute_group cxl_region_group = {
761
.attrs = cxl_region_attrs,
762
.is_visible = cxl_region_visible,
763
};
764
765
static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
766
{
767
struct cxl_region_params *p = &cxlr->params;
768
struct cxl_endpoint_decoder *cxled;
769
int rc;
770
771
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
772
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
773
return rc;
774
775
if (pos >= p->interleave_ways) {
776
dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
777
p->interleave_ways);
778
return -ENXIO;
779
}
780
781
cxled = p->targets[pos];
782
if (!cxled)
783
return sysfs_emit(buf, "\n");
784
return sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
785
}
786
787
static int check_commit_order(struct device *dev, void *data)
788
{
789
struct cxl_decoder *cxld = to_cxl_decoder(dev);
790
791
/*
792
* if port->commit_end is not the only free decoder, then out of
793
* order shutdown has occurred, block further allocations until
794
* that is resolved
795
*/
796
if (((cxld->flags & CXL_DECODER_F_ENABLE) == 0))
797
return -EBUSY;
798
return 0;
799
}
800
801
static int match_free_decoder(struct device *dev, const void *data)
802
{
803
struct cxl_port *port = to_cxl_port(dev->parent);
804
struct cxl_decoder *cxld;
805
int rc;
806
807
if (!is_switch_decoder(dev))
808
return 0;
809
810
cxld = to_cxl_decoder(dev);
811
812
if (cxld->id != port->commit_end + 1)
813
return 0;
814
815
if (cxld->region) {
816
dev_dbg(dev->parent,
817
"next decoder to commit (%s) is already reserved (%s)\n",
818
dev_name(dev), dev_name(&cxld->region->dev));
819
return 0;
820
}
821
822
rc = device_for_each_child_reverse_from(dev->parent, dev, NULL,
823
check_commit_order);
824
if (rc) {
825
dev_dbg(dev->parent,
826
"unable to allocate %s due to out of order shutdown\n",
827
dev_name(dev));
828
return 0;
829
}
830
return 1;
831
}
832
833
static bool region_res_match_cxl_range(const struct cxl_region_params *p,
834
struct range *range)
835
{
836
if (!p->res)
837
return false;
838
839
/*
840
* If an extended linear cache region then the CXL range is assumed
841
* to be fronted by the DRAM range in current known implementation.
842
* This assumption will be made until a variant implementation exists.
843
*/
844
return p->res->start + p->cache_size == range->start &&
845
p->res->end == range->end;
846
}
847
848
static int match_auto_decoder(struct device *dev, const void *data)
849
{
850
const struct cxl_region_params *p = data;
851
struct cxl_decoder *cxld;
852
struct range *r;
853
854
if (!is_switch_decoder(dev))
855
return 0;
856
857
cxld = to_cxl_decoder(dev);
858
r = &cxld->hpa_range;
859
860
if (region_res_match_cxl_range(p, r))
861
return 1;
862
863
return 0;
864
}
865
866
/**
867
* cxl_port_pick_region_decoder() - assign or lookup a decoder for a region
868
* @port: a port in the ancestry of the endpoint implied by @cxled
869
* @cxled: endpoint decoder to be, or currently, mapped by @port
870
* @cxlr: region to establish, or validate, decode @port
871
*
872
* In the region creation path cxl_port_pick_region_decoder() is an
873
* allocator to find a free port. In the region assembly path, it is
874
* recalling the decoder that platform firmware picked for validation
875
* purposes.
876
*
877
* The result is recorded in a 'struct cxl_region_ref' in @port.
878
*/
879
static struct cxl_decoder *
880
cxl_port_pick_region_decoder(struct cxl_port *port,
881
struct cxl_endpoint_decoder *cxled,
882
struct cxl_region *cxlr)
883
{
884
struct device *dev;
885
886
if (port == cxled_to_port(cxled))
887
return &cxled->cxld;
888
889
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
890
dev = device_find_child(&port->dev, &cxlr->params,
891
match_auto_decoder);
892
else
893
dev = device_find_child(&port->dev, NULL, match_free_decoder);
894
if (!dev)
895
return NULL;
896
/*
897
* This decoder is pinned registered as long as the endpoint decoder is
898
* registered, and endpoint decoder unregistration holds the
899
* cxl_rwsem.region over unregister events, so no need to hold on to
900
* this extra reference.
901
*/
902
put_device(dev);
903
return to_cxl_decoder(dev);
904
}
905
906
static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
907
struct cxl_decoder *cxld)
908
{
909
struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
910
struct cxl_decoder *cxld_iter = rr->decoder;
911
912
/*
913
* Allow the out of order assembly of auto-discovered regions.
914
* Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
915
* in HPA order. Confirm that the decoder with the lesser HPA
916
* starting address has the lesser id.
917
*/
918
dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
919
dev_name(&cxld->dev), cxld->id,
920
dev_name(&cxld_iter->dev), cxld_iter->id);
921
922
if (cxld_iter->id > cxld->id)
923
return true;
924
925
return false;
926
}
927
928
static struct cxl_region_ref *
929
alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
930
struct cxl_endpoint_decoder *cxled,
931
struct cxl_decoder *cxld)
932
{
933
struct cxl_region_params *p = &cxlr->params;
934
struct cxl_region_ref *cxl_rr, *iter;
935
unsigned long index;
936
int rc;
937
938
xa_for_each(&port->regions, index, iter) {
939
struct cxl_region_params *ip = &iter->region->params;
940
941
if (!ip->res || ip->res->start < p->res->start)
942
continue;
943
944
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
945
if (auto_order_ok(port, iter->region, cxld))
946
continue;
947
}
948
dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
949
dev_name(&port->dev),
950
dev_name(&iter->region->dev), ip->res, p->res);
951
952
return ERR_PTR(-EBUSY);
953
}
954
955
cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
956
if (!cxl_rr)
957
return ERR_PTR(-ENOMEM);
958
cxl_rr->port = port;
959
cxl_rr->region = cxlr;
960
cxl_rr->nr_targets = 1;
961
xa_init(&cxl_rr->endpoints);
962
963
rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
964
if (rc) {
965
dev_dbg(&cxlr->dev,
966
"%s: failed to track region reference: %d\n",
967
dev_name(&port->dev), rc);
968
kfree(cxl_rr);
969
return ERR_PTR(rc);
970
}
971
972
return cxl_rr;
973
}
974
975
static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
976
{
977
struct cxl_region *cxlr = cxl_rr->region;
978
struct cxl_decoder *cxld = cxl_rr->decoder;
979
980
if (!cxld)
981
return;
982
983
dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
984
if (cxld->region == cxlr) {
985
cxld->region = NULL;
986
put_device(&cxlr->dev);
987
}
988
}
989
990
static void free_region_ref(struct cxl_region_ref *cxl_rr)
991
{
992
struct cxl_port *port = cxl_rr->port;
993
struct cxl_region *cxlr = cxl_rr->region;
994
995
cxl_rr_free_decoder(cxl_rr);
996
xa_erase(&port->regions, (unsigned long)cxlr);
997
xa_destroy(&cxl_rr->endpoints);
998
kfree(cxl_rr);
999
}
1000
1001
static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
1002
struct cxl_endpoint_decoder *cxled)
1003
{
1004
int rc;
1005
struct cxl_port *port = cxl_rr->port;
1006
struct cxl_region *cxlr = cxl_rr->region;
1007
struct cxl_decoder *cxld = cxl_rr->decoder;
1008
struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
1009
1010
if (ep) {
1011
rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
1012
GFP_KERNEL);
1013
if (rc)
1014
return rc;
1015
}
1016
cxl_rr->nr_eps++;
1017
1018
if (!cxld->region) {
1019
cxld->region = cxlr;
1020
get_device(&cxlr->dev);
1021
}
1022
1023
return 0;
1024
}
1025
1026
static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
1027
struct cxl_endpoint_decoder *cxled,
1028
struct cxl_region_ref *cxl_rr,
1029
struct cxl_decoder *cxld)
1030
{
1031
if (cxld->region) {
1032
dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
1033
dev_name(&port->dev), dev_name(&cxld->dev),
1034
dev_name(&cxld->region->dev));
1035
return -EBUSY;
1036
}
1037
1038
/*
1039
* Endpoints should already match the region type, but backstop that
1040
* assumption with an assertion. Switch-decoders change mapping-type
1041
* based on what is mapped when they are assigned to a region.
1042
*/
1043
dev_WARN_ONCE(&cxlr->dev,
1044
port == cxled_to_port(cxled) &&
1045
cxld->target_type != cxlr->type,
1046
"%s:%s mismatch decoder type %d -> %d\n",
1047
dev_name(&cxled_to_memdev(cxled)->dev),
1048
dev_name(&cxld->dev), cxld->target_type, cxlr->type);
1049
cxld->target_type = cxlr->type;
1050
cxl_rr->decoder = cxld;
1051
return 0;
1052
}
1053
1054
/**
1055
* cxl_port_attach_region() - track a region's interest in a port by endpoint
1056
* @port: port to add a new region reference 'struct cxl_region_ref'
1057
* @cxlr: region to attach to @port
1058
* @cxled: endpoint decoder used to create or further pin a region reference
1059
* @pos: interleave position of @cxled in @cxlr
1060
*
1061
* The attach event is an opportunity to validate CXL decode setup
1062
* constraints and record metadata needed for programming HDM decoders,
1063
* in particular decoder target lists.
1064
*
1065
* The steps are:
1066
*
1067
* - validate that there are no other regions with a higher HPA already
1068
* associated with @port
1069
* - establish a region reference if one is not already present
1070
*
1071
* - additionally allocate a decoder instance that will host @cxlr on
1072
* @port
1073
*
1074
* - pin the region reference by the endpoint
1075
* - account for how many entries in @port's target list are needed to
1076
* cover all of the added endpoints.
1077
*/
1078
static int cxl_port_attach_region(struct cxl_port *port,
1079
struct cxl_region *cxlr,
1080
struct cxl_endpoint_decoder *cxled, int pos)
1081
{
1082
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1083
struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1084
struct cxl_region_ref *cxl_rr;
1085
bool nr_targets_inc = false;
1086
struct cxl_decoder *cxld;
1087
unsigned long index;
1088
int rc = -EBUSY;
1089
1090
lockdep_assert_held_write(&cxl_rwsem.region);
1091
1092
cxl_rr = cxl_rr_load(port, cxlr);
1093
if (cxl_rr) {
1094
struct cxl_ep *ep_iter;
1095
int found = 0;
1096
1097
/*
1098
* Walk the existing endpoints that have been attached to
1099
* @cxlr at @port and see if they share the same 'next' port
1100
* in the downstream direction. I.e. endpoints that share common
1101
* upstream switch.
1102
*/
1103
xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1104
if (ep_iter == ep)
1105
continue;
1106
if (ep_iter->next == ep->next) {
1107
found++;
1108
break;
1109
}
1110
}
1111
1112
/*
1113
* New target port, or @port is an endpoint port that always
1114
* accounts its own local decode as a target.
1115
*/
1116
if (!found || !ep->next) {
1117
cxl_rr->nr_targets++;
1118
nr_targets_inc = true;
1119
}
1120
} else {
1121
struct cxl_decoder *cxld;
1122
1123
cxld = cxl_port_pick_region_decoder(port, cxled, cxlr);
1124
if (!cxld) {
1125
dev_dbg(&cxlr->dev, "%s: no decoder available\n",
1126
dev_name(&port->dev));
1127
return -EBUSY;
1128
}
1129
1130
cxl_rr = alloc_region_ref(port, cxlr, cxled, cxld);
1131
if (IS_ERR(cxl_rr)) {
1132
dev_dbg(&cxlr->dev,
1133
"%s: failed to allocate region reference\n",
1134
dev_name(&port->dev));
1135
return PTR_ERR(cxl_rr);
1136
}
1137
nr_targets_inc = true;
1138
1139
rc = cxl_rr_assign_decoder(port, cxlr, cxled, cxl_rr, cxld);
1140
if (rc)
1141
goto out_erase;
1142
}
1143
cxld = cxl_rr->decoder;
1144
1145
/*
1146
* the number of targets should not exceed the target_count
1147
* of the decoder
1148
*/
1149
if (is_switch_decoder(&cxld->dev)) {
1150
struct cxl_switch_decoder *cxlsd;
1151
1152
cxlsd = to_cxl_switch_decoder(&cxld->dev);
1153
if (cxl_rr->nr_targets > cxlsd->nr_targets) {
1154
dev_dbg(&cxlr->dev,
1155
"%s:%s %s add: %s:%s @ %d overflows targets: %d\n",
1156
dev_name(port->uport_dev), dev_name(&port->dev),
1157
dev_name(&cxld->dev), dev_name(&cxlmd->dev),
1158
dev_name(&cxled->cxld.dev), pos,
1159
cxlsd->nr_targets);
1160
rc = -ENXIO;
1161
goto out_erase;
1162
}
1163
}
1164
1165
rc = cxl_rr_ep_add(cxl_rr, cxled);
1166
if (rc) {
1167
dev_dbg(&cxlr->dev,
1168
"%s: failed to track endpoint %s:%s reference\n",
1169
dev_name(&port->dev), dev_name(&cxlmd->dev),
1170
dev_name(&cxld->dev));
1171
goto out_erase;
1172
}
1173
1174
dev_dbg(&cxlr->dev,
1175
"%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
1176
dev_name(port->uport_dev), dev_name(&port->dev),
1177
dev_name(&cxld->dev), dev_name(&cxlmd->dev),
1178
dev_name(&cxled->cxld.dev), pos,
1179
ep ? ep->next ? dev_name(ep->next->uport_dev) :
1180
dev_name(&cxlmd->dev) :
1181
"none",
1182
cxl_rr->nr_eps, cxl_rr->nr_targets);
1183
1184
return 0;
1185
out_erase:
1186
if (nr_targets_inc)
1187
cxl_rr->nr_targets--;
1188
if (cxl_rr->nr_eps == 0)
1189
free_region_ref(cxl_rr);
1190
return rc;
1191
}
1192
1193
static void cxl_port_detach_region(struct cxl_port *port,
1194
struct cxl_region *cxlr,
1195
struct cxl_endpoint_decoder *cxled)
1196
{
1197
struct cxl_region_ref *cxl_rr;
1198
struct cxl_ep *ep = NULL;
1199
1200
lockdep_assert_held_write(&cxl_rwsem.region);
1201
1202
cxl_rr = cxl_rr_load(port, cxlr);
1203
if (!cxl_rr)
1204
return;
1205
1206
/*
1207
* Endpoint ports do not carry cxl_ep references, and they
1208
* never target more than one endpoint by definition
1209
*/
1210
if (cxl_rr->decoder == &cxled->cxld)
1211
cxl_rr->nr_eps--;
1212
else
1213
ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
1214
if (ep) {
1215
struct cxl_ep *ep_iter;
1216
unsigned long index;
1217
int found = 0;
1218
1219
cxl_rr->nr_eps--;
1220
xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1221
if (ep_iter->next == ep->next) {
1222
found++;
1223
break;
1224
}
1225
}
1226
if (!found)
1227
cxl_rr->nr_targets--;
1228
}
1229
1230
if (cxl_rr->nr_eps == 0)
1231
free_region_ref(cxl_rr);
1232
}
1233
1234
static int check_last_peer(struct cxl_endpoint_decoder *cxled,
1235
struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
1236
int distance)
1237
{
1238
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1239
struct cxl_region *cxlr = cxl_rr->region;
1240
struct cxl_region_params *p = &cxlr->params;
1241
struct cxl_endpoint_decoder *cxled_peer;
1242
struct cxl_port *port = cxl_rr->port;
1243
struct cxl_memdev *cxlmd_peer;
1244
struct cxl_ep *ep_peer;
1245
int pos = cxled->pos;
1246
1247
/*
1248
* If this position wants to share a dport with the last endpoint mapped
1249
* then that endpoint, at index 'position - distance', must also be
1250
* mapped by this dport.
1251
*/
1252
if (pos < distance) {
1253
dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
1254
dev_name(port->uport_dev), dev_name(&port->dev),
1255
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1256
return -ENXIO;
1257
}
1258
cxled_peer = p->targets[pos - distance];
1259
cxlmd_peer = cxled_to_memdev(cxled_peer);
1260
ep_peer = cxl_ep_load(port, cxlmd_peer);
1261
if (ep->dport != ep_peer->dport) {
1262
dev_dbg(&cxlr->dev,
1263
"%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
1264
dev_name(port->uport_dev), dev_name(&port->dev),
1265
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
1266
dev_name(&cxlmd_peer->dev),
1267
dev_name(&cxled_peer->cxld.dev));
1268
return -ENXIO;
1269
}
1270
1271
return 0;
1272
}
1273
1274
static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
1275
{
1276
struct cxl_port *port = to_cxl_port(cxld->dev.parent);
1277
struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
1278
unsigned int interleave_mask;
1279
u8 eiw;
1280
u16 eig;
1281
int high_pos, low_pos;
1282
1283
if (!test_bit(iw, &cxlhdm->iw_cap_mask))
1284
return -ENXIO;
1285
/*
1286
* Per CXL specification r3.1(8.2.4.20.13 Decoder Protection),
1287
* if eiw < 8:
1288
* DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw]
1289
* DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
1290
*
1291
* when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the
1292
* interleave bits are none.
1293
*
1294
* if eiw >= 8:
1295
* DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3
1296
* DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
1297
*
1298
* when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the
1299
* interleave bits are none.
1300
*/
1301
ways_to_eiw(iw, &eiw);
1302
if (eiw == 0 || eiw == 8)
1303
return 0;
1304
1305
granularity_to_eig(ig, &eig);
1306
if (eiw > 8)
1307
high_pos = eiw + eig - 1;
1308
else
1309
high_pos = eiw + eig + 7;
1310
low_pos = eig + 8;
1311
interleave_mask = GENMASK(high_pos, low_pos);
1312
if (interleave_mask & ~cxlhdm->interleave_mask)
1313
return -ENXIO;
1314
1315
return 0;
1316
}
1317
1318
static int cxl_port_setup_targets(struct cxl_port *port,
1319
struct cxl_region *cxlr,
1320
struct cxl_endpoint_decoder *cxled)
1321
{
1322
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1323
int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
1324
struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
1325
struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1326
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1327
struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1328
struct cxl_region_params *p = &cxlr->params;
1329
struct cxl_decoder *cxld = cxl_rr->decoder;
1330
struct cxl_switch_decoder *cxlsd;
1331
struct cxl_port *iter = port;
1332
u16 eig, peig;
1333
u8 eiw, peiw;
1334
1335
/*
1336
* While root level decoders support x3, x6, x12, switch level
1337
* decoders only support powers of 2 up to x16.
1338
*/
1339
if (!is_power_of_2(cxl_rr->nr_targets)) {
1340
dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
1341
dev_name(port->uport_dev), dev_name(&port->dev),
1342
cxl_rr->nr_targets);
1343
return -EINVAL;
1344
}
1345
1346
cxlsd = to_cxl_switch_decoder(&cxld->dev);
1347
if (cxl_rr->nr_targets_set) {
1348
int i, distance = 1;
1349
struct cxl_region_ref *cxl_rr_iter;
1350
1351
/*
1352
* The "distance" between peer downstream ports represents which
1353
* endpoint positions in the region interleave a given port can
1354
* host.
1355
*
1356
* For example, at the root of a hierarchy the distance is
1357
* always 1 as every index targets a different host-bridge. At
1358
* each subsequent switch level those ports map every Nth region
1359
* position where N is the width of the switch == distance.
1360
*/
1361
do {
1362
cxl_rr_iter = cxl_rr_load(iter, cxlr);
1363
distance *= cxl_rr_iter->nr_targets;
1364
iter = to_cxl_port(iter->dev.parent);
1365
} while (!is_cxl_root(iter));
1366
distance *= cxlrd->cxlsd.cxld.interleave_ways;
1367
1368
for (i = 0; i < cxl_rr->nr_targets_set; i++)
1369
if (ep->dport == cxlsd->target[i]) {
1370
rc = check_last_peer(cxled, ep, cxl_rr,
1371
distance);
1372
if (rc)
1373
return rc;
1374
goto out_target_set;
1375
}
1376
goto add_target;
1377
}
1378
1379
if (is_cxl_root(parent_port)) {
1380
/*
1381
* Root decoder IG is always set to value in CFMWS which
1382
* may be different than this region's IG. We can use the
1383
* region's IG here since interleave_granularity_store()
1384
* does not allow interleaved host-bridges with
1385
* root IG != region IG.
1386
*/
1387
parent_ig = p->interleave_granularity;
1388
parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1389
/*
1390
* For purposes of address bit routing, use power-of-2 math for
1391
* switch ports.
1392
*/
1393
if (!is_power_of_2(parent_iw))
1394
parent_iw /= 3;
1395
} else {
1396
struct cxl_region_ref *parent_rr;
1397
struct cxl_decoder *parent_cxld;
1398
1399
parent_rr = cxl_rr_load(parent_port, cxlr);
1400
parent_cxld = parent_rr->decoder;
1401
parent_ig = parent_cxld->interleave_granularity;
1402
parent_iw = parent_cxld->interleave_ways;
1403
}
1404
1405
rc = granularity_to_eig(parent_ig, &peig);
1406
if (rc) {
1407
dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1408
dev_name(parent_port->uport_dev),
1409
dev_name(&parent_port->dev), parent_ig);
1410
return rc;
1411
}
1412
1413
rc = ways_to_eiw(parent_iw, &peiw);
1414
if (rc) {
1415
dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1416
dev_name(parent_port->uport_dev),
1417
dev_name(&parent_port->dev), parent_iw);
1418
return rc;
1419
}
1420
1421
iw = cxl_rr->nr_targets;
1422
rc = ways_to_eiw(iw, &eiw);
1423
if (rc) {
1424
dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1425
dev_name(port->uport_dev), dev_name(&port->dev), iw);
1426
return rc;
1427
}
1428
1429
/*
1430
* Interleave granularity is a multiple of @parent_port granularity.
1431
* Multiplier is the parent port interleave ways.
1432
*/
1433
rc = granularity_to_eig(parent_ig * parent_iw, &eig);
1434
if (rc) {
1435
dev_dbg(&cxlr->dev,
1436
"%s: invalid granularity calculation (%d * %d)\n",
1437
dev_name(&parent_port->dev), parent_ig, parent_iw);
1438
return rc;
1439
}
1440
1441
rc = eig_to_granularity(eig, &ig);
1442
if (rc) {
1443
dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1444
dev_name(port->uport_dev), dev_name(&port->dev),
1445
256 << eig);
1446
return rc;
1447
}
1448
1449
if (iw > 8 || iw > cxlsd->nr_targets) {
1450
dev_dbg(&cxlr->dev,
1451
"%s:%s:%s: ways: %d overflows targets: %d\n",
1452
dev_name(port->uport_dev), dev_name(&port->dev),
1453
dev_name(&cxld->dev), iw, cxlsd->nr_targets);
1454
return -ENXIO;
1455
}
1456
1457
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1458
if (cxld->interleave_ways != iw ||
1459
(iw > 1 && cxld->interleave_granularity != ig) ||
1460
!region_res_match_cxl_range(p, &cxld->hpa_range) ||
1461
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
1462
dev_err(&cxlr->dev,
1463
"%s:%s %s expected iw: %d ig: %d %pr\n",
1464
dev_name(port->uport_dev), dev_name(&port->dev),
1465
__func__, iw, ig, p->res);
1466
dev_err(&cxlr->dev,
1467
"%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
1468
dev_name(port->uport_dev), dev_name(&port->dev),
1469
__func__, cxld->interleave_ways,
1470
cxld->interleave_granularity,
1471
(cxld->flags & CXL_DECODER_F_ENABLE) ?
1472
"enabled" :
1473
"disabled",
1474
cxld->hpa_range.start, cxld->hpa_range.end);
1475
return -ENXIO;
1476
}
1477
} else {
1478
rc = check_interleave_cap(cxld, iw, ig);
1479
if (rc) {
1480
dev_dbg(&cxlr->dev,
1481
"%s:%s iw: %d ig: %d is not supported\n",
1482
dev_name(port->uport_dev),
1483
dev_name(&port->dev), iw, ig);
1484
return rc;
1485
}
1486
1487
cxld->interleave_ways = iw;
1488
cxld->interleave_granularity = ig;
1489
cxld->hpa_range = (struct range) {
1490
.start = p->res->start,
1491
.end = p->res->end,
1492
};
1493
}
1494
dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport_dev),
1495
dev_name(&port->dev), iw, ig);
1496
add_target:
1497
if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1498
dev_dbg(&cxlr->dev,
1499
"%s:%s: targets full trying to add %s:%s at %d\n",
1500
dev_name(port->uport_dev), dev_name(&port->dev),
1501
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1502
return -ENXIO;
1503
}
1504
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1505
if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
1506
dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
1507
dev_name(port->uport_dev), dev_name(&port->dev),
1508
dev_name(&cxlsd->cxld.dev),
1509
dev_name(ep->dport->dport_dev),
1510
cxl_rr->nr_targets_set);
1511
return -ENXIO;
1512
}
1513
} else
1514
cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1515
inc = 1;
1516
out_target_set:
1517
cxl_rr->nr_targets_set += inc;
1518
dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1519
dev_name(port->uport_dev), dev_name(&port->dev),
1520
cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),
1521
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1522
1523
return 0;
1524
}
1525
1526
static void cxl_port_reset_targets(struct cxl_port *port,
1527
struct cxl_region *cxlr)
1528
{
1529
struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1530
struct cxl_decoder *cxld;
1531
1532
/*
1533
* After the last endpoint has been detached the entire cxl_rr may now
1534
* be gone.
1535
*/
1536
if (!cxl_rr)
1537
return;
1538
cxl_rr->nr_targets_set = 0;
1539
1540
cxld = cxl_rr->decoder;
1541
cxld->hpa_range = (struct range) {
1542
.start = 0,
1543
.end = -1,
1544
};
1545
}
1546
1547
static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1548
{
1549
struct cxl_region_params *p = &cxlr->params;
1550
struct cxl_endpoint_decoder *cxled;
1551
struct cxl_dev_state *cxlds;
1552
struct cxl_memdev *cxlmd;
1553
struct cxl_port *iter;
1554
struct cxl_ep *ep;
1555
int i;
1556
1557
/*
1558
* In the auto-discovery case skip automatic teardown since the
1559
* address space is already active
1560
*/
1561
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
1562
return;
1563
1564
for (i = 0; i < p->nr_targets; i++) {
1565
cxled = p->targets[i];
1566
cxlmd = cxled_to_memdev(cxled);
1567
cxlds = cxlmd->cxlds;
1568
1569
if (cxlds->rcd)
1570
continue;
1571
1572
iter = cxled_to_port(cxled);
1573
while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1574
iter = to_cxl_port(iter->dev.parent);
1575
1576
for (ep = cxl_ep_load(iter, cxlmd); iter;
1577
iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1578
cxl_port_reset_targets(iter, cxlr);
1579
}
1580
}
1581
1582
static int cxl_region_setup_targets(struct cxl_region *cxlr)
1583
{
1584
struct cxl_region_params *p = &cxlr->params;
1585
struct cxl_endpoint_decoder *cxled;
1586
struct cxl_dev_state *cxlds;
1587
int i, rc, rch = 0, vh = 0;
1588
struct cxl_memdev *cxlmd;
1589
struct cxl_port *iter;
1590
struct cxl_ep *ep;
1591
1592
for (i = 0; i < p->nr_targets; i++) {
1593
cxled = p->targets[i];
1594
cxlmd = cxled_to_memdev(cxled);
1595
cxlds = cxlmd->cxlds;
1596
1597
/* validate that all targets agree on topology */
1598
if (!cxlds->rcd) {
1599
vh++;
1600
} else {
1601
rch++;
1602
continue;
1603
}
1604
1605
iter = cxled_to_port(cxled);
1606
while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1607
iter = to_cxl_port(iter->dev.parent);
1608
1609
/*
1610
* Descend the topology tree programming / validating
1611
* targets while looking for conflicts.
1612
*/
1613
for (ep = cxl_ep_load(iter, cxlmd); iter;
1614
iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1615
rc = cxl_port_setup_targets(iter, cxlr, cxled);
1616
if (rc) {
1617
cxl_region_teardown_targets(cxlr);
1618
return rc;
1619
}
1620
}
1621
}
1622
1623
if (rch && vh) {
1624
dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
1625
cxl_region_teardown_targets(cxlr);
1626
return -ENXIO;
1627
}
1628
1629
return 0;
1630
}
1631
1632
static int cxl_region_validate_position(struct cxl_region *cxlr,
1633
struct cxl_endpoint_decoder *cxled,
1634
int pos)
1635
{
1636
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1637
struct cxl_region_params *p = &cxlr->params;
1638
int i;
1639
1640
if (pos < 0 || pos >= p->interleave_ways) {
1641
dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1642
p->interleave_ways);
1643
return -ENXIO;
1644
}
1645
1646
if (p->targets[pos] == cxled)
1647
return 0;
1648
1649
if (p->targets[pos]) {
1650
struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1651
struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1652
1653
dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1654
pos, dev_name(&cxlmd_target->dev),
1655
dev_name(&cxled_target->cxld.dev));
1656
return -EBUSY;
1657
}
1658
1659
for (i = 0; i < p->interleave_ways; i++) {
1660
struct cxl_endpoint_decoder *cxled_target;
1661
struct cxl_memdev *cxlmd_target;
1662
1663
cxled_target = p->targets[i];
1664
if (!cxled_target)
1665
continue;
1666
1667
cxlmd_target = cxled_to_memdev(cxled_target);
1668
if (cxlmd_target == cxlmd) {
1669
dev_dbg(&cxlr->dev,
1670
"%s already specified at position %d via: %s\n",
1671
dev_name(&cxlmd->dev), pos,
1672
dev_name(&cxled_target->cxld.dev));
1673
return -EBUSY;
1674
}
1675
}
1676
1677
return 0;
1678
}
1679
1680
static int cxl_region_attach_position(struct cxl_region *cxlr,
1681
struct cxl_root_decoder *cxlrd,
1682
struct cxl_endpoint_decoder *cxled,
1683
const struct cxl_dport *dport, int pos)
1684
{
1685
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1686
struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
1687
struct cxl_decoder *cxld = &cxlsd->cxld;
1688
int iw = cxld->interleave_ways;
1689
struct cxl_port *iter;
1690
int rc;
1691
1692
if (dport != cxlrd->cxlsd.target[pos % iw]) {
1693
dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1694
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1695
dev_name(&cxlrd->cxlsd.cxld.dev));
1696
return -ENXIO;
1697
}
1698
1699
for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1700
iter = to_cxl_port(iter->dev.parent)) {
1701
rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1702
if (rc)
1703
goto err;
1704
}
1705
1706
return 0;
1707
1708
err:
1709
for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1710
iter = to_cxl_port(iter->dev.parent))
1711
cxl_port_detach_region(iter, cxlr, cxled);
1712
return rc;
1713
}
1714
1715
static int cxl_region_attach_auto(struct cxl_region *cxlr,
1716
struct cxl_endpoint_decoder *cxled, int pos)
1717
{
1718
struct cxl_region_params *p = &cxlr->params;
1719
1720
if (cxled->state != CXL_DECODER_STATE_AUTO) {
1721
dev_err(&cxlr->dev,
1722
"%s: unable to add decoder to autodetected region\n",
1723
dev_name(&cxled->cxld.dev));
1724
return -EINVAL;
1725
}
1726
1727
if (pos >= 0) {
1728
dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
1729
dev_name(&cxled->cxld.dev), pos);
1730
return -EINVAL;
1731
}
1732
1733
if (p->nr_targets >= p->interleave_ways) {
1734
dev_err(&cxlr->dev, "%s: no more target slots available\n",
1735
dev_name(&cxled->cxld.dev));
1736
return -ENXIO;
1737
}
1738
1739
/*
1740
* Temporarily record the endpoint decoder into the target array. Yes,
1741
* this means that userspace can view devices in the wrong position
1742
* before the region activates, and must be careful to understand when
1743
* it might be racing region autodiscovery.
1744
*/
1745
pos = p->nr_targets;
1746
p->targets[pos] = cxled;
1747
cxled->pos = pos;
1748
p->nr_targets++;
1749
1750
return 0;
1751
}
1752
1753
static int cmp_interleave_pos(const void *a, const void *b)
1754
{
1755
struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
1756
struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
1757
1758
return cxled_a->pos - cxled_b->pos;
1759
}
1760
1761
static int match_switch_decoder_by_range(struct device *dev,
1762
const void *data)
1763
{
1764
struct cxl_switch_decoder *cxlsd;
1765
const struct range *r1, *r2 = data;
1766
1767
1768
if (!is_switch_decoder(dev))
1769
return 0;
1770
1771
cxlsd = to_cxl_switch_decoder(dev);
1772
r1 = &cxlsd->cxld.hpa_range;
1773
1774
if (is_root_decoder(dev))
1775
return range_contains(r1, r2);
1776
return (r1->start == r2->start && r1->end == r2->end);
1777
}
1778
1779
static int find_pos_and_ways(struct cxl_port *port, struct range *range,
1780
int *pos, int *ways)
1781
{
1782
struct cxl_switch_decoder *cxlsd;
1783
struct cxl_port *parent;
1784
struct device *dev;
1785
int rc = -ENXIO;
1786
1787
parent = parent_port_of(port);
1788
if (!parent)
1789
return rc;
1790
1791
dev = device_find_child(&parent->dev, range,
1792
match_switch_decoder_by_range);
1793
if (!dev) {
1794
dev_err(port->uport_dev,
1795
"failed to find decoder mapping %#llx-%#llx\n",
1796
range->start, range->end);
1797
return rc;
1798
}
1799
cxlsd = to_cxl_switch_decoder(dev);
1800
*ways = cxlsd->cxld.interleave_ways;
1801
1802
for (int i = 0; i < *ways; i++) {
1803
if (cxlsd->target[i] == port->parent_dport) {
1804
*pos = i;
1805
rc = 0;
1806
break;
1807
}
1808
}
1809
put_device(dev);
1810
1811
if (rc)
1812
dev_err(port->uport_dev,
1813
"failed to find %s:%s in target list of %s\n",
1814
dev_name(&port->dev),
1815
dev_name(port->parent_dport->dport_dev),
1816
dev_name(&cxlsd->cxld.dev));
1817
1818
return rc;
1819
}
1820
1821
/**
1822
* cxl_calc_interleave_pos() - calculate an endpoint position in a region
1823
* @cxled: endpoint decoder member of given region
1824
*
1825
* The endpoint position is calculated by traversing the topology from
1826
* the endpoint to the root decoder and iteratively applying this
1827
* calculation:
1828
*
1829
* position = position * parent_ways + parent_pos;
1830
*
1831
* ...where @position is inferred from switch and root decoder target lists.
1832
*
1833
* Return: position >= 0 on success
1834
* -ENXIO on failure
1835
*/
1836
static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
1837
{
1838
struct cxl_port *iter, *port = cxled_to_port(cxled);
1839
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1840
struct range *range = &cxled->cxld.hpa_range;
1841
int parent_ways = 0, parent_pos = 0, pos = 0;
1842
int rc;
1843
1844
/*
1845
* Example: the expected interleave order of the 4-way region shown
1846
* below is: mem0, mem2, mem1, mem3
1847
*
1848
* root_port
1849
* / \
1850
* host_bridge_0 host_bridge_1
1851
* | | | |
1852
* mem0 mem1 mem2 mem3
1853
*
1854
* In the example the calculator will iterate twice. The first iteration
1855
* uses the mem position in the host-bridge and the ways of the host-
1856
* bridge to generate the first, or local, position. The second
1857
* iteration uses the host-bridge position in the root_port and the ways
1858
* of the root_port to refine the position.
1859
*
1860
* A trace of the calculation per endpoint looks like this:
1861
* mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0
1862
* pos = 0 * 2 + 0 pos = 0 * 2 + 1
1863
* pos: 0 pos: 1
1864
*
1865
* mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1
1866
* pos = 1 * 2 + 0 pos = 1 * 2 + 1
1867
* pos: 2 pos = 3
1868
*
1869
* Note that while this example is simple, the method applies to more
1870
* complex topologies, including those with switches.
1871
*/
1872
1873
/* Iterate from endpoint to root_port refining the position */
1874
for (iter = port; iter; iter = parent_port_of(iter)) {
1875
if (is_cxl_root(iter))
1876
break;
1877
1878
rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways);
1879
if (rc)
1880
return rc;
1881
1882
pos = pos * parent_ways + parent_pos;
1883
}
1884
1885
dev_dbg(&cxlmd->dev,
1886
"decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
1887
dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
1888
dev_name(&port->dev), range->start, range->end, pos);
1889
1890
return pos;
1891
}
1892
1893
static int cxl_region_sort_targets(struct cxl_region *cxlr)
1894
{
1895
struct cxl_region_params *p = &cxlr->params;
1896
int i, rc = 0;
1897
1898
for (i = 0; i < p->nr_targets; i++) {
1899
struct cxl_endpoint_decoder *cxled = p->targets[i];
1900
1901
cxled->pos = cxl_calc_interleave_pos(cxled);
1902
/*
1903
* Record that sorting failed, but still continue to calc
1904
* cxled->pos so that follow-on code paths can reliably
1905
* do p->targets[cxled->pos] to self-reference their entry.
1906
*/
1907
if (cxled->pos < 0)
1908
rc = -ENXIO;
1909
}
1910
/* Keep the cxlr target list in interleave position order */
1911
sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
1912
cmp_interleave_pos, NULL);
1913
1914
dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
1915
return rc;
1916
}
1917
1918
static int cxl_region_attach(struct cxl_region *cxlr,
1919
struct cxl_endpoint_decoder *cxled, int pos)
1920
{
1921
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1922
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1923
struct cxl_dev_state *cxlds = cxlmd->cxlds;
1924
struct cxl_region_params *p = &cxlr->params;
1925
struct cxl_port *ep_port, *root_port;
1926
struct cxl_dport *dport;
1927
int rc = -ENXIO;
1928
1929
rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
1930
p->interleave_granularity);
1931
if (rc) {
1932
dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n",
1933
dev_name(&cxled->cxld.dev), p->interleave_ways,
1934
p->interleave_granularity);
1935
return rc;
1936
}
1937
1938
if (cxled->part < 0) {
1939
dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
1940
return -ENODEV;
1941
}
1942
1943
if (cxlds->part[cxled->part].mode != cxlr->mode) {
1944
dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n",
1945
dev_name(&cxled->cxld.dev), cxlr->mode);
1946
return -EINVAL;
1947
}
1948
1949
/* all full of members, or interleave config not established? */
1950
if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
1951
dev_dbg(&cxlr->dev, "region already active\n");
1952
return -EBUSY;
1953
}
1954
1955
if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
1956
dev_dbg(&cxlr->dev, "interleave config missing\n");
1957
return -ENXIO;
1958
}
1959
1960
if (p->nr_targets >= p->interleave_ways) {
1961
dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
1962
p->nr_targets);
1963
return -EINVAL;
1964
}
1965
1966
ep_port = cxled_to_port(cxled);
1967
root_port = cxlrd_to_port(cxlrd);
1968
dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
1969
if (!dport) {
1970
dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
1971
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1972
dev_name(cxlr->dev.parent));
1973
return -ENXIO;
1974
}
1975
1976
if (cxled->cxld.target_type != cxlr->type) {
1977
dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
1978
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1979
cxled->cxld.target_type, cxlr->type);
1980
return -ENXIO;
1981
}
1982
1983
if (!cxled->dpa_res) {
1984
dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
1985
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
1986
return -ENXIO;
1987
}
1988
1989
if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size !=
1990
resource_size(p->res)) {
1991
dev_dbg(&cxlr->dev,
1992
"%s:%s-size-%#llx * ways-%d + cache-%#llx != region-size-%#llx\n",
1993
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1994
(u64)resource_size(cxled->dpa_res), p->interleave_ways,
1995
(u64)p->cache_size, (u64)resource_size(p->res));
1996
return -EINVAL;
1997
}
1998
1999
cxl_region_perf_data_calculate(cxlr, cxled);
2000
2001
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
2002
int i;
2003
2004
rc = cxl_region_attach_auto(cxlr, cxled, pos);
2005
if (rc)
2006
return rc;
2007
2008
/* await more targets to arrive... */
2009
if (p->nr_targets < p->interleave_ways)
2010
return 0;
2011
2012
/*
2013
* All targets are here, which implies all PCI enumeration that
2014
* affects this region has been completed. Walk the topology to
2015
* sort the devices into their relative region decode position.
2016
*/
2017
rc = cxl_region_sort_targets(cxlr);
2018
if (rc)
2019
return rc;
2020
2021
for (i = 0; i < p->nr_targets; i++) {
2022
cxled = p->targets[i];
2023
ep_port = cxled_to_port(cxled);
2024
dport = cxl_find_dport_by_dev(root_port,
2025
ep_port->host_bridge);
2026
rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
2027
dport, i);
2028
if (rc)
2029
return rc;
2030
}
2031
2032
rc = cxl_region_setup_targets(cxlr);
2033
if (rc)
2034
return rc;
2035
2036
/*
2037
* If target setup succeeds in the autodiscovery case
2038
* then the region is already committed.
2039
*/
2040
p->state = CXL_CONFIG_COMMIT;
2041
cxl_region_shared_upstream_bandwidth_update(cxlr);
2042
2043
return 0;
2044
}
2045
2046
rc = cxl_region_validate_position(cxlr, cxled, pos);
2047
if (rc)
2048
return rc;
2049
2050
rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
2051
if (rc)
2052
return rc;
2053
2054
p->targets[pos] = cxled;
2055
cxled->pos = pos;
2056
p->nr_targets++;
2057
2058
if (p->nr_targets == p->interleave_ways) {
2059
rc = cxl_region_setup_targets(cxlr);
2060
if (rc)
2061
return rc;
2062
p->state = CXL_CONFIG_ACTIVE;
2063
cxl_region_shared_upstream_bandwidth_update(cxlr);
2064
}
2065
2066
cxled->cxld.interleave_ways = p->interleave_ways;
2067
cxled->cxld.interleave_granularity = p->interleave_granularity;
2068
cxled->cxld.hpa_range = (struct range) {
2069
.start = p->res->start,
2070
.end = p->res->end,
2071
};
2072
2073
if (p->nr_targets != p->interleave_ways)
2074
return 0;
2075
2076
/*
2077
* Test the auto-discovery position calculator function
2078
* against this successfully created user-defined region.
2079
* A fail message here means that this interleave config
2080
* will fail when presented as CXL_REGION_F_AUTO.
2081
*/
2082
for (int i = 0; i < p->nr_targets; i++) {
2083
struct cxl_endpoint_decoder *cxled = p->targets[i];
2084
int test_pos;
2085
2086
test_pos = cxl_calc_interleave_pos(cxled);
2087
dev_dbg(&cxled->cxld.dev,
2088
"Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
2089
(test_pos == cxled->pos) ? "success" : "fail",
2090
test_pos, cxled->pos);
2091
}
2092
2093
return 0;
2094
}
2095
2096
static struct cxl_region *
2097
__cxl_decoder_detach(struct cxl_region *cxlr,
2098
struct cxl_endpoint_decoder *cxled, int pos,
2099
enum cxl_detach_mode mode)
2100
{
2101
struct cxl_region_params *p;
2102
2103
lockdep_assert_held_write(&cxl_rwsem.region);
2104
2105
if (!cxled) {
2106
p = &cxlr->params;
2107
2108
if (pos >= p->interleave_ways) {
2109
dev_dbg(&cxlr->dev, "position %d out of range %d\n",
2110
pos, p->interleave_ways);
2111
return NULL;
2112
}
2113
2114
if (!p->targets[pos])
2115
return NULL;
2116
cxled = p->targets[pos];
2117
} else {
2118
cxlr = cxled->cxld.region;
2119
if (!cxlr)
2120
return NULL;
2121
p = &cxlr->params;
2122
}
2123
2124
if (mode == DETACH_INVALIDATE)
2125
cxled->part = -1;
2126
2127
if (p->state > CXL_CONFIG_ACTIVE) {
2128
cxl_region_decode_reset(cxlr, p->interleave_ways);
2129
p->state = CXL_CONFIG_ACTIVE;
2130
}
2131
2132
for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter);
2133
iter = to_cxl_port(iter->dev.parent))
2134
cxl_port_detach_region(iter, cxlr, cxled);
2135
2136
if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
2137
p->targets[cxled->pos] != cxled) {
2138
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2139
2140
dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
2141
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2142
cxled->pos);
2143
return NULL;
2144
}
2145
2146
if (p->state == CXL_CONFIG_ACTIVE) {
2147
p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
2148
cxl_region_teardown_targets(cxlr);
2149
}
2150
p->targets[cxled->pos] = NULL;
2151
p->nr_targets--;
2152
cxled->cxld.hpa_range = (struct range) {
2153
.start = 0,
2154
.end = -1,
2155
};
2156
2157
get_device(&cxlr->dev);
2158
return cxlr;
2159
}
2160
2161
/*
2162
* Cleanup a decoder's interest in a region. There are 2 cases to
2163
* handle, removing an unknown @cxled from a known position in a region
2164
* (detach_target()) or removing a known @cxled from an unknown @cxlr
2165
* (cxld_unregister())
2166
*
2167
* When the detachment finds a region release the region driver.
2168
*/
2169
int cxl_decoder_detach(struct cxl_region *cxlr,
2170
struct cxl_endpoint_decoder *cxled, int pos,
2171
enum cxl_detach_mode mode)
2172
{
2173
struct cxl_region *detach;
2174
2175
/* when the decoder is being destroyed lock unconditionally */
2176
if (mode == DETACH_INVALIDATE) {
2177
guard(rwsem_write)(&cxl_rwsem.region);
2178
detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
2179
} else {
2180
int rc;
2181
2182
ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
2183
if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
2184
return rc;
2185
detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
2186
}
2187
2188
if (detach) {
2189
device_release_driver(&detach->dev);
2190
put_device(&detach->dev);
2191
}
2192
return 0;
2193
}
2194
2195
static int __attach_target(struct cxl_region *cxlr,
2196
struct cxl_endpoint_decoder *cxled, int pos,
2197
unsigned int state)
2198
{
2199
int rc;
2200
2201
if (state == TASK_INTERRUPTIBLE) {
2202
ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
2203
if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
2204
return rc;
2205
guard(rwsem_read)(&cxl_rwsem.dpa);
2206
return cxl_region_attach(cxlr, cxled, pos);
2207
}
2208
guard(rwsem_write)(&cxl_rwsem.region);
2209
guard(rwsem_read)(&cxl_rwsem.dpa);
2210
return cxl_region_attach(cxlr, cxled, pos);
2211
}
2212
2213
static int attach_target(struct cxl_region *cxlr,
2214
struct cxl_endpoint_decoder *cxled, int pos,
2215
unsigned int state)
2216
{
2217
int rc = __attach_target(cxlr, cxled, pos, state);
2218
2219
if (rc == 0)
2220
return 0;
2221
2222
dev_warn(cxled->cxld.dev.parent, "failed to attach %s to %s: %d\n",
2223
dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc);
2224
return rc;
2225
}
2226
2227
static int detach_target(struct cxl_region *cxlr, int pos)
2228
{
2229
return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY);
2230
}
2231
2232
static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
2233
size_t len)
2234
{
2235
int rc;
2236
2237
if (sysfs_streq(buf, "\n"))
2238
rc = detach_target(cxlr, pos);
2239
else {
2240
struct device *dev;
2241
2242
dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
2243
if (!dev)
2244
return -ENODEV;
2245
2246
if (!is_endpoint_decoder(dev)) {
2247
rc = -EINVAL;
2248
goto out;
2249
}
2250
2251
rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
2252
TASK_INTERRUPTIBLE);
2253
out:
2254
put_device(dev);
2255
}
2256
2257
if (rc < 0)
2258
return rc;
2259
return len;
2260
}
2261
2262
#define TARGET_ATTR_RW(n) \
2263
static ssize_t target##n##_show( \
2264
struct device *dev, struct device_attribute *attr, char *buf) \
2265
{ \
2266
return show_targetN(to_cxl_region(dev), buf, (n)); \
2267
} \
2268
static ssize_t target##n##_store(struct device *dev, \
2269
struct device_attribute *attr, \
2270
const char *buf, size_t len) \
2271
{ \
2272
return store_targetN(to_cxl_region(dev), buf, (n), len); \
2273
} \
2274
static DEVICE_ATTR_RW(target##n)
2275
2276
TARGET_ATTR_RW(0);
2277
TARGET_ATTR_RW(1);
2278
TARGET_ATTR_RW(2);
2279
TARGET_ATTR_RW(3);
2280
TARGET_ATTR_RW(4);
2281
TARGET_ATTR_RW(5);
2282
TARGET_ATTR_RW(6);
2283
TARGET_ATTR_RW(7);
2284
TARGET_ATTR_RW(8);
2285
TARGET_ATTR_RW(9);
2286
TARGET_ATTR_RW(10);
2287
TARGET_ATTR_RW(11);
2288
TARGET_ATTR_RW(12);
2289
TARGET_ATTR_RW(13);
2290
TARGET_ATTR_RW(14);
2291
TARGET_ATTR_RW(15);
2292
2293
static struct attribute *target_attrs[] = {
2294
&dev_attr_target0.attr,
2295
&dev_attr_target1.attr,
2296
&dev_attr_target2.attr,
2297
&dev_attr_target3.attr,
2298
&dev_attr_target4.attr,
2299
&dev_attr_target5.attr,
2300
&dev_attr_target6.attr,
2301
&dev_attr_target7.attr,
2302
&dev_attr_target8.attr,
2303
&dev_attr_target9.attr,
2304
&dev_attr_target10.attr,
2305
&dev_attr_target11.attr,
2306
&dev_attr_target12.attr,
2307
&dev_attr_target13.attr,
2308
&dev_attr_target14.attr,
2309
&dev_attr_target15.attr,
2310
NULL,
2311
};
2312
2313
static umode_t cxl_region_target_visible(struct kobject *kobj,
2314
struct attribute *a, int n)
2315
{
2316
struct device *dev = kobj_to_dev(kobj);
2317
struct cxl_region *cxlr = to_cxl_region(dev);
2318
struct cxl_region_params *p = &cxlr->params;
2319
2320
if (n < p->interleave_ways)
2321
return a->mode;
2322
return 0;
2323
}
2324
2325
static const struct attribute_group cxl_region_target_group = {
2326
.attrs = target_attrs,
2327
.is_visible = cxl_region_target_visible,
2328
};
2329
2330
static const struct attribute_group *get_cxl_region_target_group(void)
2331
{
2332
return &cxl_region_target_group;
2333
}
2334
2335
static const struct attribute_group *region_groups[] = {
2336
&cxl_base_attribute_group,
2337
&cxl_region_group,
2338
&cxl_region_target_group,
2339
&cxl_region_access0_coordinate_group,
2340
&cxl_region_access1_coordinate_group,
2341
NULL,
2342
};
2343
2344
static void cxl_region_release(struct device *dev)
2345
{
2346
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
2347
struct cxl_region *cxlr = to_cxl_region(dev);
2348
int id = atomic_read(&cxlrd->region_id);
2349
2350
/*
2351
* Try to reuse the recently idled id rather than the cached
2352
* next id to prevent the region id space from increasing
2353
* unnecessarily.
2354
*/
2355
if (cxlr->id < id)
2356
if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
2357
memregion_free(id);
2358
goto out;
2359
}
2360
2361
memregion_free(cxlr->id);
2362
out:
2363
put_device(dev->parent);
2364
kfree(cxlr);
2365
}
2366
2367
const struct device_type cxl_region_type = {
2368
.name = "cxl_region",
2369
.release = cxl_region_release,
2370
.groups = region_groups
2371
};
2372
2373
bool is_cxl_region(struct device *dev)
2374
{
2375
return dev->type == &cxl_region_type;
2376
}
2377
EXPORT_SYMBOL_NS_GPL(is_cxl_region, "CXL");
2378
2379
static struct cxl_region *to_cxl_region(struct device *dev)
2380
{
2381
if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
2382
"not a cxl_region device\n"))
2383
return NULL;
2384
2385
return container_of(dev, struct cxl_region, dev);
2386
}
2387
2388
static void unregister_region(void *_cxlr)
2389
{
2390
struct cxl_region *cxlr = _cxlr;
2391
struct cxl_region_params *p = &cxlr->params;
2392
int i;
2393
2394
device_del(&cxlr->dev);
2395
2396
/*
2397
* Now that region sysfs is shutdown, the parameter block is now
2398
* read-only, so no need to hold the region rwsem to access the
2399
* region parameters.
2400
*/
2401
for (i = 0; i < p->interleave_ways; i++)
2402
detach_target(cxlr, i);
2403
2404
cxl_region_iomem_release(cxlr);
2405
put_device(&cxlr->dev);
2406
}
2407
2408
static struct lock_class_key cxl_region_key;
2409
2410
static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
2411
{
2412
struct cxl_region *cxlr;
2413
struct device *dev;
2414
2415
cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
2416
if (!cxlr) {
2417
memregion_free(id);
2418
return ERR_PTR(-ENOMEM);
2419
}
2420
2421
dev = &cxlr->dev;
2422
device_initialize(dev);
2423
lockdep_set_class(&dev->mutex, &cxl_region_key);
2424
dev->parent = &cxlrd->cxlsd.cxld.dev;
2425
/*
2426
* Keep root decoder pinned through cxl_region_release to fixup
2427
* region id allocations
2428
*/
2429
get_device(dev->parent);
2430
device_set_pm_not_required(dev);
2431
dev->bus = &cxl_bus_type;
2432
dev->type = &cxl_region_type;
2433
cxlr->id = id;
2434
2435
return cxlr;
2436
}
2437
2438
static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
2439
{
2440
int cset = 0;
2441
int rc;
2442
2443
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
2444
if (cxlr->coord[i].read_bandwidth) {
2445
rc = 0;
2446
if (cxl_need_node_perf_attrs_update(nid))
2447
node_set_perf_attrs(nid, &cxlr->coord[i], i);
2448
else
2449
rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
2450
2451
if (rc == 0)
2452
cset++;
2453
}
2454
}
2455
2456
if (!cset)
2457
return false;
2458
2459
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group());
2460
if (rc)
2461
dev_dbg(&cxlr->dev, "Failed to update access0 group\n");
2462
2463
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group());
2464
if (rc)
2465
dev_dbg(&cxlr->dev, "Failed to update access1 group\n");
2466
2467
return true;
2468
}
2469
2470
static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
2471
unsigned long action, void *arg)
2472
{
2473
struct cxl_region *cxlr = container_of(nb, struct cxl_region,
2474
node_notifier);
2475
struct node_notify *nn = arg;
2476
int nid = nn->nid;
2477
int region_nid;
2478
2479
if (action != NODE_ADDED_FIRST_MEMORY)
2480
return NOTIFY_DONE;
2481
2482
/*
2483
* No need to hold cxl_rwsem.region; region parameters are stable
2484
* within the cxl_region driver.
2485
*/
2486
region_nid = phys_to_target_node(cxlr->params.res->start);
2487
if (nid != region_nid)
2488
return NOTIFY_DONE;
2489
2490
if (!cxl_region_update_coordinates(cxlr, nid))
2491
return NOTIFY_DONE;
2492
2493
return NOTIFY_OK;
2494
}
2495
2496
static int cxl_region_calculate_adistance(struct notifier_block *nb,
2497
unsigned long nid, void *data)
2498
{
2499
struct cxl_region *cxlr = container_of(nb, struct cxl_region,
2500
adist_notifier);
2501
struct access_coordinate *perf;
2502
int *adist = data;
2503
int region_nid;
2504
2505
/*
2506
* No need to hold cxl_rwsem.region; region parameters are stable
2507
* within the cxl_region driver.
2508
*/
2509
region_nid = phys_to_target_node(cxlr->params.res->start);
2510
if (nid != region_nid)
2511
return NOTIFY_OK;
2512
2513
perf = &cxlr->coord[ACCESS_COORDINATE_CPU];
2514
2515
if (mt_perf_to_adistance(perf, adist))
2516
return NOTIFY_OK;
2517
2518
return NOTIFY_STOP;
2519
}
2520
2521
/**
2522
* devm_cxl_add_region - Adds a region to a decoder
2523
* @cxlrd: root decoder
2524
* @id: memregion id to create, or memregion_free() on failure
2525
* @mode: mode for the endpoint decoders of this region
2526
* @type: select whether this is an expander or accelerator (type-2 or type-3)
2527
*
2528
* This is the second step of region initialization. Regions exist within an
2529
* address space which is mapped by a @cxlrd.
2530
*
2531
* Return: 0 if the region was added to the @cxlrd, else returns negative error
2532
* code. The region will be named "regionZ" where Z is the unique region number.
2533
*/
2534
static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
2535
int id,
2536
enum cxl_partition_mode mode,
2537
enum cxl_decoder_type type)
2538
{
2539
struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
2540
struct cxl_region *cxlr;
2541
struct device *dev;
2542
int rc;
2543
2544
cxlr = cxl_region_alloc(cxlrd, id);
2545
if (IS_ERR(cxlr))
2546
return cxlr;
2547
cxlr->mode = mode;
2548
cxlr->type = type;
2549
2550
dev = &cxlr->dev;
2551
rc = dev_set_name(dev, "region%d", id);
2552
if (rc)
2553
goto err;
2554
2555
rc = device_add(dev);
2556
if (rc)
2557
goto err;
2558
2559
rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
2560
if (rc)
2561
return ERR_PTR(rc);
2562
2563
dev_dbg(port->uport_dev, "%s: created %s\n",
2564
dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
2565
return cxlr;
2566
2567
err:
2568
put_device(dev);
2569
return ERR_PTR(rc);
2570
}
2571
2572
static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
2573
{
2574
return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
2575
}
2576
2577
static ssize_t create_pmem_region_show(struct device *dev,
2578
struct device_attribute *attr, char *buf)
2579
{
2580
return __create_region_show(to_cxl_root_decoder(dev), buf);
2581
}
2582
2583
static ssize_t create_ram_region_show(struct device *dev,
2584
struct device_attribute *attr, char *buf)
2585
{
2586
return __create_region_show(to_cxl_root_decoder(dev), buf);
2587
}
2588
2589
static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
2590
enum cxl_partition_mode mode, int id)
2591
{
2592
int rc;
2593
2594
switch (mode) {
2595
case CXL_PARTMODE_RAM:
2596
case CXL_PARTMODE_PMEM:
2597
break;
2598
default:
2599
dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
2600
return ERR_PTR(-EINVAL);
2601
}
2602
2603
rc = memregion_alloc(GFP_KERNEL);
2604
if (rc < 0)
2605
return ERR_PTR(rc);
2606
2607
if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
2608
memregion_free(rc);
2609
return ERR_PTR(-EBUSY);
2610
}
2611
2612
return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
2613
}
2614
2615
static ssize_t create_region_store(struct device *dev, const char *buf,
2616
size_t len, enum cxl_partition_mode mode)
2617
{
2618
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2619
struct cxl_region *cxlr;
2620
int rc, id;
2621
2622
rc = sscanf(buf, "region%d\n", &id);
2623
if (rc != 1)
2624
return -EINVAL;
2625
2626
cxlr = __create_region(cxlrd, mode, id);
2627
if (IS_ERR(cxlr))
2628
return PTR_ERR(cxlr);
2629
2630
return len;
2631
}
2632
2633
static ssize_t create_pmem_region_store(struct device *dev,
2634
struct device_attribute *attr,
2635
const char *buf, size_t len)
2636
{
2637
return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM);
2638
}
2639
DEVICE_ATTR_RW(create_pmem_region);
2640
2641
static ssize_t create_ram_region_store(struct device *dev,
2642
struct device_attribute *attr,
2643
const char *buf, size_t len)
2644
{
2645
return create_region_store(dev, buf, len, CXL_PARTMODE_RAM);
2646
}
2647
DEVICE_ATTR_RW(create_ram_region);
2648
2649
static ssize_t region_show(struct device *dev, struct device_attribute *attr,
2650
char *buf)
2651
{
2652
struct cxl_decoder *cxld = to_cxl_decoder(dev);
2653
ssize_t rc;
2654
2655
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
2656
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
2657
return rc;
2658
2659
if (cxld->region)
2660
return sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
2661
return sysfs_emit(buf, "\n");
2662
}
2663
DEVICE_ATTR_RO(region);
2664
2665
static struct cxl_region *
2666
cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
2667
{
2668
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
2669
struct device *region_dev;
2670
2671
region_dev = device_find_child_by_name(&cxld->dev, name);
2672
if (!region_dev)
2673
return ERR_PTR(-ENODEV);
2674
2675
return to_cxl_region(region_dev);
2676
}
2677
2678
static ssize_t delete_region_store(struct device *dev,
2679
struct device_attribute *attr,
2680
const char *buf, size_t len)
2681
{
2682
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2683
struct cxl_port *port = to_cxl_port(dev->parent);
2684
struct cxl_region *cxlr;
2685
2686
cxlr = cxl_find_region_by_name(cxlrd, buf);
2687
if (IS_ERR(cxlr))
2688
return PTR_ERR(cxlr);
2689
2690
devm_release_action(port->uport_dev, unregister_region, cxlr);
2691
put_device(&cxlr->dev);
2692
2693
return len;
2694
}
2695
DEVICE_ATTR_WO(delete_region);
2696
2697
static void cxl_pmem_region_release(struct device *dev)
2698
{
2699
struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
2700
int i;
2701
2702
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
2703
struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
2704
2705
put_device(&cxlmd->dev);
2706
}
2707
2708
kfree(cxlr_pmem);
2709
}
2710
2711
static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
2712
&cxl_base_attribute_group,
2713
NULL,
2714
};
2715
2716
const struct device_type cxl_pmem_region_type = {
2717
.name = "cxl_pmem_region",
2718
.release = cxl_pmem_region_release,
2719
.groups = cxl_pmem_region_attribute_groups,
2720
};
2721
2722
bool is_cxl_pmem_region(struct device *dev)
2723
{
2724
return dev->type == &cxl_pmem_region_type;
2725
}
2726
EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, "CXL");
2727
2728
struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
2729
{
2730
if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
2731
"not a cxl_pmem_region device\n"))
2732
return NULL;
2733
return container_of(dev, struct cxl_pmem_region, dev);
2734
}
2735
EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL");
2736
2737
struct cxl_poison_context {
2738
struct cxl_port *port;
2739
int part;
2740
u64 offset;
2741
};
2742
2743
static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
2744
struct cxl_poison_context *ctx)
2745
{
2746
struct cxl_dev_state *cxlds = cxlmd->cxlds;
2747
const struct resource *res;
2748
struct resource *p, *last;
2749
u64 offset, length;
2750
int rc = 0;
2751
2752
if (ctx->part < 0)
2753
return 0;
2754
2755
/*
2756
* Collect poison for the remaining unmapped resources after
2757
* poison is collected by committed endpoints decoders.
2758
*/
2759
for (int i = ctx->part; i < cxlds->nr_partitions; i++) {
2760
res = &cxlds->part[i].res;
2761
for (p = res->child, last = NULL; p; p = p->sibling)
2762
last = p;
2763
if (last)
2764
offset = last->end + 1;
2765
else
2766
offset = res->start;
2767
length = res->end - offset + 1;
2768
if (!length)
2769
break;
2770
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2771
if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM)
2772
continue;
2773
if (rc)
2774
break;
2775
}
2776
2777
return rc;
2778
}
2779
2780
static int poison_by_decoder(struct device *dev, void *arg)
2781
{
2782
struct cxl_poison_context *ctx = arg;
2783
struct cxl_endpoint_decoder *cxled;
2784
enum cxl_partition_mode mode;
2785
struct cxl_dev_state *cxlds;
2786
struct cxl_memdev *cxlmd;
2787
u64 offset, length;
2788
int rc = 0;
2789
2790
if (!is_endpoint_decoder(dev))
2791
return rc;
2792
2793
cxled = to_cxl_endpoint_decoder(dev);
2794
if (!cxled->dpa_res)
2795
return rc;
2796
2797
cxlmd = cxled_to_memdev(cxled);
2798
cxlds = cxlmd->cxlds;
2799
mode = cxlds->part[cxled->part].mode;
2800
2801
if (cxled->skip) {
2802
offset = cxled->dpa_res->start - cxled->skip;
2803
length = cxled->skip;
2804
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2805
if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
2806
rc = 0;
2807
if (rc)
2808
return rc;
2809
}
2810
2811
offset = cxled->dpa_res->start;
2812
length = cxled->dpa_res->end - offset + 1;
2813
rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
2814
if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
2815
rc = 0;
2816
if (rc)
2817
return rc;
2818
2819
/* Iterate until commit_end is reached */
2820
if (cxled->cxld.id == ctx->port->commit_end) {
2821
ctx->offset = cxled->dpa_res->end + 1;
2822
ctx->part = cxled->part;
2823
return 1;
2824
}
2825
2826
return 0;
2827
}
2828
2829
int cxl_get_poison_by_endpoint(struct cxl_port *port)
2830
{
2831
struct cxl_poison_context ctx;
2832
int rc = 0;
2833
2834
ctx = (struct cxl_poison_context) {
2835
.port = port,
2836
.part = -1,
2837
};
2838
2839
rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
2840
if (rc == 1)
2841
rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
2842
&ctx);
2843
2844
return rc;
2845
}
2846
2847
struct cxl_dpa_to_region_context {
2848
struct cxl_region *cxlr;
2849
u64 dpa;
2850
};
2851
2852
static int __cxl_dpa_to_region(struct device *dev, void *arg)
2853
{
2854
struct cxl_dpa_to_region_context *ctx = arg;
2855
struct cxl_endpoint_decoder *cxled;
2856
struct cxl_region *cxlr;
2857
u64 dpa = ctx->dpa;
2858
2859
if (!is_endpoint_decoder(dev))
2860
return 0;
2861
2862
cxled = to_cxl_endpoint_decoder(dev);
2863
if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
2864
return 0;
2865
2866
if (!cxl_resource_contains_addr(cxled->dpa_res, dpa))
2867
return 0;
2868
2869
/*
2870
* Stop the region search (return 1) when an endpoint mapping is
2871
* found. The region may not be fully constructed so offering
2872
* the cxlr in the context structure is not guaranteed.
2873
*/
2874
cxlr = cxled->cxld.region;
2875
if (cxlr)
2876
dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
2877
dev_name(&cxlr->dev));
2878
else
2879
dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa,
2880
dev_name(dev));
2881
2882
ctx->cxlr = cxlr;
2883
2884
return 1;
2885
}
2886
2887
struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa)
2888
{
2889
struct cxl_dpa_to_region_context ctx;
2890
struct cxl_port *port;
2891
2892
ctx = (struct cxl_dpa_to_region_context) {
2893
.dpa = dpa,
2894
};
2895
port = cxlmd->endpoint;
2896
if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port))
2897
device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
2898
2899
return ctx.cxlr;
2900
}
2901
2902
static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos)
2903
{
2904
struct cxl_region_params *p = &cxlr->params;
2905
int gran = p->interleave_granularity;
2906
int ways = p->interleave_ways;
2907
u64 offset;
2908
2909
/* Is the hpa in an expected chunk for its pos(-ition) */
2910
offset = hpa - p->res->start;
2911
offset = do_div(offset, gran * ways);
2912
if ((offset >= pos * gran) && (offset < (pos + 1) * gran))
2913
return true;
2914
2915
dev_dbg(&cxlr->dev,
2916
"Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa);
2917
2918
return false;
2919
}
2920
2921
u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
2922
u64 dpa)
2923
{
2924
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
2925
u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
2926
struct cxl_region_params *p = &cxlr->params;
2927
struct cxl_endpoint_decoder *cxled = NULL;
2928
u16 eig = 0;
2929
u8 eiw = 0;
2930
int pos;
2931
2932
for (int i = 0; i < p->nr_targets; i++) {
2933
cxled = p->targets[i];
2934
if (cxlmd == cxled_to_memdev(cxled))
2935
break;
2936
}
2937
if (!cxled || cxlmd != cxled_to_memdev(cxled))
2938
return ULLONG_MAX;
2939
2940
pos = cxled->pos;
2941
ways_to_eiw(p->interleave_ways, &eiw);
2942
granularity_to_eig(p->interleave_granularity, &eig);
2943
2944
/*
2945
* The device position in the region interleave set was removed
2946
* from the offset at HPA->DPA translation. To reconstruct the
2947
* HPA, place the 'pos' in the offset.
2948
*
2949
* The placement of 'pos' in the HPA is determined by interleave
2950
* ways and granularity and is defined in the CXL Spec 3.0 Section
2951
* 8.2.4.19.13 Implementation Note: Device Decode Logic
2952
*/
2953
2954
/* Remove the dpa base */
2955
dpa_offset = dpa - cxl_dpa_resource_start(cxled);
2956
2957
mask_upper = GENMASK_ULL(51, eig + 8);
2958
2959
if (eiw < 8) {
2960
hpa_offset = (dpa_offset & mask_upper) << eiw;
2961
hpa_offset |= pos << (eig + 8);
2962
} else {
2963
bits_upper = (dpa_offset & mask_upper) >> (eig + 8);
2964
bits_upper = bits_upper * 3;
2965
hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8);
2966
}
2967
2968
/* The lower bits remain unchanged */
2969
hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
2970
2971
/* Apply the hpa_offset to the region base address */
2972
hpa = hpa_offset + p->res->start + p->cache_size;
2973
2974
/* Root decoder translation overrides typical modulo decode */
2975
if (cxlrd->hpa_to_spa)
2976
hpa = cxlrd->hpa_to_spa(cxlrd, hpa);
2977
2978
if (!cxl_resource_contains_addr(p->res, hpa)) {
2979
dev_dbg(&cxlr->dev,
2980
"Addr trans fail: hpa 0x%llx not in region\n", hpa);
2981
return ULLONG_MAX;
2982
}
2983
2984
/* Simple chunk check, by pos & gran, only applies to modulo decodes */
2985
if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos)))
2986
return ULLONG_MAX;
2987
2988
return hpa;
2989
}
2990
2991
static struct lock_class_key cxl_pmem_region_key;
2992
2993
static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
2994
{
2995
struct cxl_region_params *p = &cxlr->params;
2996
struct cxl_nvdimm_bridge *cxl_nvb;
2997
struct device *dev;
2998
int i;
2999
3000
guard(rwsem_read)(&cxl_rwsem.region);
3001
if (p->state != CXL_CONFIG_COMMIT)
3002
return -ENXIO;
3003
3004
struct cxl_pmem_region *cxlr_pmem __free(kfree) =
3005
kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets), GFP_KERNEL);
3006
if (!cxlr_pmem)
3007
return -ENOMEM;
3008
3009
cxlr_pmem->hpa_range.start = p->res->start;
3010
cxlr_pmem->hpa_range.end = p->res->end;
3011
3012
/* Snapshot the region configuration underneath the cxl_rwsem.region */
3013
cxlr_pmem->nr_mappings = p->nr_targets;
3014
for (i = 0; i < p->nr_targets; i++) {
3015
struct cxl_endpoint_decoder *cxled = p->targets[i];
3016
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3017
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
3018
3019
/*
3020
* Regions never span CXL root devices, so by definition the
3021
* bridge for one device is the same for all.
3022
*/
3023
if (i == 0) {
3024
cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint);
3025
if (!cxl_nvb)
3026
return -ENODEV;
3027
cxlr->cxl_nvb = cxl_nvb;
3028
}
3029
m->cxlmd = cxlmd;
3030
get_device(&cxlmd->dev);
3031
m->start = cxled->dpa_res->start;
3032
m->size = resource_size(cxled->dpa_res);
3033
m->position = i;
3034
}
3035
3036
dev = &cxlr_pmem->dev;
3037
device_initialize(dev);
3038
lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
3039
device_set_pm_not_required(dev);
3040
dev->parent = &cxlr->dev;
3041
dev->bus = &cxl_bus_type;
3042
dev->type = &cxl_pmem_region_type;
3043
cxlr_pmem->cxlr = cxlr;
3044
cxlr->cxlr_pmem = no_free_ptr(cxlr_pmem);
3045
3046
return 0;
3047
}
3048
3049
static void cxl_dax_region_release(struct device *dev)
3050
{
3051
struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
3052
3053
kfree(cxlr_dax);
3054
}
3055
3056
static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
3057
&cxl_base_attribute_group,
3058
NULL,
3059
};
3060
3061
const struct device_type cxl_dax_region_type = {
3062
.name = "cxl_dax_region",
3063
.release = cxl_dax_region_release,
3064
.groups = cxl_dax_region_attribute_groups,
3065
};
3066
3067
static bool is_cxl_dax_region(struct device *dev)
3068
{
3069
return dev->type == &cxl_dax_region_type;
3070
}
3071
3072
struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
3073
{
3074
if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
3075
"not a cxl_dax_region device\n"))
3076
return NULL;
3077
return container_of(dev, struct cxl_dax_region, dev);
3078
}
3079
EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, "CXL");
3080
3081
static struct lock_class_key cxl_dax_region_key;
3082
3083
static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
3084
{
3085
struct cxl_region_params *p = &cxlr->params;
3086
struct cxl_dax_region *cxlr_dax;
3087
struct device *dev;
3088
3089
guard(rwsem_read)(&cxl_rwsem.region);
3090
if (p->state != CXL_CONFIG_COMMIT)
3091
return ERR_PTR(-ENXIO);
3092
3093
cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
3094
if (!cxlr_dax)
3095
return ERR_PTR(-ENOMEM);
3096
3097
cxlr_dax->hpa_range.start = p->res->start;
3098
cxlr_dax->hpa_range.end = p->res->end;
3099
3100
dev = &cxlr_dax->dev;
3101
cxlr_dax->cxlr = cxlr;
3102
device_initialize(dev);
3103
lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
3104
device_set_pm_not_required(dev);
3105
dev->parent = &cxlr->dev;
3106
dev->bus = &cxl_bus_type;
3107
dev->type = &cxl_dax_region_type;
3108
3109
return cxlr_dax;
3110
}
3111
3112
static void cxlr_pmem_unregister(void *_cxlr_pmem)
3113
{
3114
struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
3115
struct cxl_region *cxlr = cxlr_pmem->cxlr;
3116
struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
3117
3118
/*
3119
* Either the bridge is in ->remove() context under the device_lock(),
3120
* or cxlr_release_nvdimm() is cancelling the bridge's release action
3121
* for @cxlr_pmem and doing it itself (while manually holding the bridge
3122
* lock).
3123
*/
3124
device_lock_assert(&cxl_nvb->dev);
3125
cxlr->cxlr_pmem = NULL;
3126
cxlr_pmem->cxlr = NULL;
3127
device_unregister(&cxlr_pmem->dev);
3128
}
3129
3130
static void cxlr_release_nvdimm(void *_cxlr)
3131
{
3132
struct cxl_region *cxlr = _cxlr;
3133
struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
3134
3135
scoped_guard(device, &cxl_nvb->dev) {
3136
if (cxlr->cxlr_pmem)
3137
devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
3138
cxlr->cxlr_pmem);
3139
}
3140
cxlr->cxl_nvb = NULL;
3141
put_device(&cxl_nvb->dev);
3142
}
3143
3144
/**
3145
* devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
3146
* @cxlr: parent CXL region for this pmem region bridge device
3147
*
3148
* Return: 0 on success negative error code on failure.
3149
*/
3150
static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
3151
{
3152
struct cxl_pmem_region *cxlr_pmem;
3153
struct cxl_nvdimm_bridge *cxl_nvb;
3154
struct device *dev;
3155
int rc;
3156
3157
rc = cxl_pmem_region_alloc(cxlr);
3158
if (rc)
3159
return rc;
3160
cxlr_pmem = cxlr->cxlr_pmem;
3161
cxl_nvb = cxlr->cxl_nvb;
3162
3163
dev = &cxlr_pmem->dev;
3164
rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
3165
if (rc)
3166
goto err;
3167
3168
rc = device_add(dev);
3169
if (rc)
3170
goto err;
3171
3172
dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
3173
dev_name(dev));
3174
3175
scoped_guard(device, &cxl_nvb->dev) {
3176
if (cxl_nvb->dev.driver)
3177
rc = devm_add_action_or_reset(&cxl_nvb->dev,
3178
cxlr_pmem_unregister,
3179
cxlr_pmem);
3180
else
3181
rc = -ENXIO;
3182
}
3183
3184
if (rc)
3185
goto err_bridge;
3186
3187
/* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
3188
return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
3189
3190
err:
3191
put_device(dev);
3192
err_bridge:
3193
put_device(&cxl_nvb->dev);
3194
cxlr->cxl_nvb = NULL;
3195
return rc;
3196
}
3197
3198
static void cxlr_dax_unregister(void *_cxlr_dax)
3199
{
3200
struct cxl_dax_region *cxlr_dax = _cxlr_dax;
3201
3202
device_unregister(&cxlr_dax->dev);
3203
}
3204
3205
static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
3206
{
3207
struct cxl_dax_region *cxlr_dax;
3208
struct device *dev;
3209
int rc;
3210
3211
cxlr_dax = cxl_dax_region_alloc(cxlr);
3212
if (IS_ERR(cxlr_dax))
3213
return PTR_ERR(cxlr_dax);
3214
3215
dev = &cxlr_dax->dev;
3216
rc = dev_set_name(dev, "dax_region%d", cxlr->id);
3217
if (rc)
3218
goto err;
3219
3220
rc = device_add(dev);
3221
if (rc)
3222
goto err;
3223
3224
dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
3225
dev_name(dev));
3226
3227
return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
3228
cxlr_dax);
3229
err:
3230
put_device(dev);
3231
return rc;
3232
}
3233
3234
static int match_decoder_by_range(struct device *dev, const void *data)
3235
{
3236
const struct range *r1, *r2 = data;
3237
struct cxl_decoder *cxld;
3238
3239
if (!is_switch_decoder(dev))
3240
return 0;
3241
3242
cxld = to_cxl_decoder(dev);
3243
r1 = &cxld->hpa_range;
3244
return range_contains(r1, r2);
3245
}
3246
3247
static struct cxl_decoder *
3248
cxl_port_find_switch_decoder(struct cxl_port *port, struct range *hpa)
3249
{
3250
struct device *cxld_dev = device_find_child(&port->dev, hpa,
3251
match_decoder_by_range);
3252
3253
return cxld_dev ? to_cxl_decoder(cxld_dev) : NULL;
3254
}
3255
3256
static struct cxl_root_decoder *
3257
cxl_find_root_decoder(struct cxl_endpoint_decoder *cxled)
3258
{
3259
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3260
struct cxl_port *port = cxled_to_port(cxled);
3261
struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
3262
struct cxl_decoder *root, *cxld = &cxled->cxld;
3263
struct range *hpa = &cxld->hpa_range;
3264
3265
root = cxl_port_find_switch_decoder(&cxl_root->port, hpa);
3266
if (!root) {
3267
dev_err(cxlmd->dev.parent,
3268
"%s:%s no CXL window for range %#llx:%#llx\n",
3269
dev_name(&cxlmd->dev), dev_name(&cxld->dev),
3270
cxld->hpa_range.start, cxld->hpa_range.end);
3271
return NULL;
3272
}
3273
3274
return to_cxl_root_decoder(&root->dev);
3275
}
3276
3277
static int match_region_by_range(struct device *dev, const void *data)
3278
{
3279
struct cxl_region_params *p;
3280
struct cxl_region *cxlr;
3281
const struct range *r = data;
3282
3283
if (!is_cxl_region(dev))
3284
return 0;
3285
3286
cxlr = to_cxl_region(dev);
3287
p = &cxlr->params;
3288
3289
guard(rwsem_read)(&cxl_rwsem.region);
3290
if (p->res && p->res->start == r->start && p->res->end == r->end)
3291
return 1;
3292
3293
return 0;
3294
}
3295
3296
static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
3297
struct resource *res)
3298
{
3299
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
3300
struct cxl_region_params *p = &cxlr->params;
3301
resource_size_t size = resource_size(res);
3302
resource_size_t cache_size, start;
3303
3304
cache_size = cxlrd->cache_size;
3305
if (!cache_size)
3306
return 0;
3307
3308
if (size != cache_size) {
3309
dev_warn(&cxlr->dev,
3310
"Extended Linear Cache size %pa != CXL size %pa. No Support!",
3311
&cache_size, &size);
3312
return -ENXIO;
3313
}
3314
3315
/*
3316
* Move the start of the range to where the cache range starts. The
3317
* implementation assumes that the cache range is in front of the
3318
* CXL range. This is not dictated by the HMAT spec but is how the
3319
* current known implementation is configured.
3320
*
3321
* The cache range is expected to be within the CFMWS. The adjusted
3322
* res->start should not be less than cxlrd->res->start.
3323
*/
3324
start = res->start - cache_size;
3325
if (start < cxlrd->res->start)
3326
return -ENXIO;
3327
3328
res->start = start;
3329
p->cache_size = cache_size;
3330
3331
return 0;
3332
}
3333
3334
static int __construct_region(struct cxl_region *cxlr,
3335
struct cxl_root_decoder *cxlrd,
3336
struct cxl_endpoint_decoder *cxled)
3337
{
3338
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3339
struct range *hpa = &cxled->cxld.hpa_range;
3340
struct cxl_region_params *p;
3341
struct resource *res;
3342
int rc;
3343
3344
guard(rwsem_write)(&cxl_rwsem.region);
3345
p = &cxlr->params;
3346
if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
3347
dev_err(cxlmd->dev.parent,
3348
"%s:%s: %s autodiscovery interrupted\n",
3349
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3350
__func__);
3351
return -EBUSY;
3352
}
3353
3354
set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
3355
3356
res = kmalloc(sizeof(*res), GFP_KERNEL);
3357
if (!res)
3358
return -ENOMEM;
3359
3360
*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
3361
dev_name(&cxlr->dev));
3362
3363
rc = cxl_extended_linear_cache_resize(cxlr, res);
3364
if (rc && rc != -EOPNOTSUPP) {
3365
/*
3366
* Failing to support extended linear cache region resize does not
3367
* prevent the region from functioning. Only causes cxl list showing
3368
* incorrect region size.
3369
*/
3370
dev_warn(cxlmd->dev.parent,
3371
"Extended linear cache calculation failed rc:%d\n", rc);
3372
}
3373
3374
rc = insert_resource(cxlrd->res, res);
3375
if (rc) {
3376
/*
3377
* Platform-firmware may not have split resources like "System
3378
* RAM" on CXL window boundaries see cxl_region_iomem_release()
3379
*/
3380
dev_warn(cxlmd->dev.parent,
3381
"%s:%s: %s %s cannot insert resource\n",
3382
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3383
__func__, dev_name(&cxlr->dev));
3384
}
3385
3386
p->res = res;
3387
p->interleave_ways = cxled->cxld.interleave_ways;
3388
p->interleave_granularity = cxled->cxld.interleave_granularity;
3389
p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
3390
3391
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
3392
if (rc)
3393
return rc;
3394
3395
dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
3396
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
3397
dev_name(&cxlr->dev), p->res, p->interleave_ways,
3398
p->interleave_granularity);
3399
3400
/* ...to match put_device() in cxl_add_to_region() */
3401
get_device(&cxlr->dev);
3402
3403
return 0;
3404
}
3405
3406
/* Establish an empty region covering the given HPA range */
3407
static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
3408
struct cxl_endpoint_decoder *cxled)
3409
{
3410
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3411
struct cxl_port *port = cxlrd_to_port(cxlrd);
3412
struct cxl_dev_state *cxlds = cxlmd->cxlds;
3413
int rc, part = READ_ONCE(cxled->part);
3414
struct cxl_region *cxlr;
3415
3416
do {
3417
cxlr = __create_region(cxlrd, cxlds->part[part].mode,
3418
atomic_read(&cxlrd->region_id));
3419
} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
3420
3421
if (IS_ERR(cxlr)) {
3422
dev_err(cxlmd->dev.parent,
3423
"%s:%s: %s failed assign region: %ld\n",
3424
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3425
__func__, PTR_ERR(cxlr));
3426
return cxlr;
3427
}
3428
3429
rc = __construct_region(cxlr, cxlrd, cxled);
3430
if (rc) {
3431
devm_release_action(port->uport_dev, unregister_region, cxlr);
3432
return ERR_PTR(rc);
3433
}
3434
3435
return cxlr;
3436
}
3437
3438
static struct cxl_region *
3439
cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa)
3440
{
3441
struct device *region_dev;
3442
3443
region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
3444
match_region_by_range);
3445
if (!region_dev)
3446
return NULL;
3447
3448
return to_cxl_region(region_dev);
3449
}
3450
3451
int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
3452
{
3453
struct range *hpa = &cxled->cxld.hpa_range;
3454
struct cxl_region_params *p;
3455
bool attach = false;
3456
int rc;
3457
3458
struct cxl_root_decoder *cxlrd __free(put_cxl_root_decoder) =
3459
cxl_find_root_decoder(cxled);
3460
if (!cxlrd)
3461
return -ENXIO;
3462
3463
/*
3464
* Ensure that if multiple threads race to construct_region() for @hpa
3465
* one does the construction and the others add to that.
3466
*/
3467
mutex_lock(&cxlrd->range_lock);
3468
struct cxl_region *cxlr __free(put_cxl_region) =
3469
cxl_find_region_by_range(cxlrd, hpa);
3470
if (!cxlr)
3471
cxlr = construct_region(cxlrd, cxled);
3472
mutex_unlock(&cxlrd->range_lock);
3473
3474
rc = PTR_ERR_OR_ZERO(cxlr);
3475
if (rc)
3476
return rc;
3477
3478
attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
3479
3480
scoped_guard(rwsem_read, &cxl_rwsem.region) {
3481
p = &cxlr->params;
3482
attach = p->state == CXL_CONFIG_COMMIT;
3483
}
3484
3485
if (attach) {
3486
/*
3487
* If device_attach() fails the range may still be active via
3488
* the platform-firmware memory map, otherwise the driver for
3489
* regions is local to this file, so driver matching can't fail.
3490
*/
3491
if (device_attach(&cxlr->dev) < 0)
3492
dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
3493
p->res);
3494
}
3495
3496
return rc;
3497
}
3498
EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
3499
3500
u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
3501
{
3502
struct cxl_region_ref *iter;
3503
unsigned long index;
3504
3505
if (!endpoint)
3506
return ~0ULL;
3507
3508
guard(rwsem_write)(&cxl_rwsem.region);
3509
3510
xa_for_each(&endpoint->regions, index, iter) {
3511
struct cxl_region_params *p = &iter->region->params;
3512
3513
if (cxl_resource_contains_addr(p->res, spa)) {
3514
if (!p->cache_size)
3515
return ~0ULL;
3516
3517
if (spa >= p->res->start + p->cache_size)
3518
return spa - p->cache_size;
3519
3520
return spa + p->cache_size;
3521
}
3522
}
3523
3524
return ~0ULL;
3525
}
3526
EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL");
3527
3528
static int is_system_ram(struct resource *res, void *arg)
3529
{
3530
struct cxl_region *cxlr = arg;
3531
struct cxl_region_params *p = &cxlr->params;
3532
3533
dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
3534
return 1;
3535
}
3536
3537
static void shutdown_notifiers(void *_cxlr)
3538
{
3539
struct cxl_region *cxlr = _cxlr;
3540
3541
unregister_node_notifier(&cxlr->node_notifier);
3542
unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
3543
}
3544
3545
static int cxl_region_can_probe(struct cxl_region *cxlr)
3546
{
3547
struct cxl_region_params *p = &cxlr->params;
3548
int rc;
3549
3550
ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
3551
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) {
3552
dev_dbg(&cxlr->dev, "probe interrupted\n");
3553
return rc;
3554
}
3555
3556
if (p->state < CXL_CONFIG_COMMIT) {
3557
dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
3558
return -ENXIO;
3559
}
3560
3561
if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
3562
dev_err(&cxlr->dev,
3563
"failed to activate, re-commit region and retry\n");
3564
return -ENXIO;
3565
}
3566
3567
return 0;
3568
}
3569
3570
static int cxl_region_probe(struct device *dev)
3571
{
3572
struct cxl_region *cxlr = to_cxl_region(dev);
3573
struct cxl_region_params *p = &cxlr->params;
3574
int rc;
3575
3576
rc = cxl_region_can_probe(cxlr);
3577
if (rc)
3578
return rc;
3579
3580
/*
3581
* From this point on any path that changes the region's state away from
3582
* CXL_CONFIG_COMMIT is also responsible for releasing the driver.
3583
*/
3584
3585
cxlr->node_notifier.notifier_call = cxl_region_perf_attrs_callback;
3586
cxlr->node_notifier.priority = CXL_CALLBACK_PRI;
3587
register_node_notifier(&cxlr->node_notifier);
3588
3589
cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
3590
cxlr->adist_notifier.priority = 100;
3591
register_mt_adistance_algorithm(&cxlr->adist_notifier);
3592
3593
rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr);
3594
if (rc)
3595
return rc;
3596
3597
switch (cxlr->mode) {
3598
case CXL_PARTMODE_PMEM:
3599
rc = devm_cxl_region_edac_register(cxlr);
3600
if (rc)
3601
dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
3602
cxlr->id);
3603
3604
return devm_cxl_add_pmem_region(cxlr);
3605
case CXL_PARTMODE_RAM:
3606
rc = devm_cxl_region_edac_register(cxlr);
3607
if (rc)
3608
dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
3609
cxlr->id);
3610
3611
/*
3612
* The region can not be manged by CXL if any portion of
3613
* it is already online as 'System RAM'
3614
*/
3615
if (walk_iomem_res_desc(IORES_DESC_NONE,
3616
IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
3617
p->res->start, p->res->end, cxlr,
3618
is_system_ram) > 0)
3619
return 0;
3620
return devm_cxl_add_dax_region(cxlr);
3621
default:
3622
dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
3623
cxlr->mode);
3624
return -ENXIO;
3625
}
3626
}
3627
3628
static struct cxl_driver cxl_region_driver = {
3629
.name = "cxl_region",
3630
.probe = cxl_region_probe,
3631
.id = CXL_DEVICE_REGION,
3632
};
3633
3634
int cxl_region_init(void)
3635
{
3636
return cxl_driver_register(&cxl_region_driver);
3637
}
3638
3639
void cxl_region_exit(void)
3640
{
3641
cxl_driver_unregister(&cxl_region_driver);
3642
}
3643
3644
MODULE_IMPORT_NS("CXL");
3645
MODULE_IMPORT_NS("DEVMEM");
3646
MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
3647
3648