Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/cxl/core/pci.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3
#include <linux/units.h>
4
#include <linux/io-64-nonatomic-lo-hi.h>
5
#include <linux/device.h>
6
#include <linux/delay.h>
7
#include <linux/pci.h>
8
#include <linux/pci-doe.h>
9
#include <linux/aer.h>
10
#include <cxlpci.h>
11
#include <cxlmem.h>
12
#include <cxl.h>
13
#include "core.h"
14
#include "trace.h"
15
16
/**
17
* DOC: cxl core pci
18
*
19
* Compute Express Link protocols are layered on top of PCIe. CXL core provides
20
* a set of helpers for CXL interactions which occur via PCIe.
21
*/
22
23
static unsigned short media_ready_timeout = 60;
24
module_param(media_ready_timeout, ushort, 0644);
25
MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready");
26
27
struct cxl_walk_context {
28
struct pci_bus *bus;
29
struct cxl_port *port;
30
int type;
31
int error;
32
int count;
33
};
34
35
static int match_add_dports(struct pci_dev *pdev, void *data)
36
{
37
struct cxl_walk_context *ctx = data;
38
struct cxl_port *port = ctx->port;
39
int type = pci_pcie_type(pdev);
40
struct cxl_register_map map;
41
struct cxl_dport *dport;
42
u32 lnkcap, port_num;
43
int rc;
44
45
if (pdev->bus != ctx->bus)
46
return 0;
47
if (!pci_is_pcie(pdev))
48
return 0;
49
if (type != ctx->type)
50
return 0;
51
if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
52
&lnkcap))
53
return 0;
54
55
rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
56
if (rc)
57
dev_dbg(&port->dev, "failed to find component registers\n");
58
59
port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
60
dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
61
if (IS_ERR(dport)) {
62
ctx->error = PTR_ERR(dport);
63
return PTR_ERR(dport);
64
}
65
ctx->count++;
66
67
return 0;
68
}
69
70
/**
71
* devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
72
* @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated
73
*
74
* Returns a positive number of dports enumerated or a negative error
75
* code.
76
*/
77
int devm_cxl_port_enumerate_dports(struct cxl_port *port)
78
{
79
struct pci_bus *bus = cxl_port_to_pci_bus(port);
80
struct cxl_walk_context ctx;
81
int type;
82
83
if (!bus)
84
return -ENXIO;
85
86
if (pci_is_root_bus(bus))
87
type = PCI_EXP_TYPE_ROOT_PORT;
88
else
89
type = PCI_EXP_TYPE_DOWNSTREAM;
90
91
ctx = (struct cxl_walk_context) {
92
.port = port,
93
.bus = bus,
94
.type = type,
95
};
96
pci_walk_bus(bus, match_add_dports, &ctx);
97
98
if (ctx.count == 0)
99
return -ENODEV;
100
if (ctx.error)
101
return ctx.error;
102
return ctx.count;
103
}
104
EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, "CXL");
105
106
static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
107
{
108
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
109
int d = cxlds->cxl_dvsec;
110
bool valid = false;
111
int rc, i;
112
u32 temp;
113
114
if (id > CXL_DVSEC_RANGE_MAX)
115
return -EINVAL;
116
117
/* Check MEM INFO VALID bit first, give up after 1s */
118
i = 1;
119
do {
120
rc = pci_read_config_dword(pdev,
121
d + CXL_DVSEC_RANGE_SIZE_LOW(id),
122
&temp);
123
if (rc)
124
return rc;
125
126
valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp);
127
if (valid)
128
break;
129
msleep(1000);
130
} while (i--);
131
132
if (!valid) {
133
dev_err(&pdev->dev,
134
"Timeout awaiting memory range %d valid after 1s.\n",
135
id);
136
return -ETIMEDOUT;
137
}
138
139
return 0;
140
}
141
142
static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)
143
{
144
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
145
int d = cxlds->cxl_dvsec;
146
bool active = false;
147
int rc, i;
148
u32 temp;
149
150
if (id > CXL_DVSEC_RANGE_MAX)
151
return -EINVAL;
152
153
/* Check MEM ACTIVE bit, up to 60s timeout by default */
154
for (i = media_ready_timeout; i; i--) {
155
rc = pci_read_config_dword(
156
pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp);
157
if (rc)
158
return rc;
159
160
active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
161
if (active)
162
break;
163
msleep(1000);
164
}
165
166
if (!active) {
167
dev_err(&pdev->dev,
168
"timeout awaiting memory active after %d seconds\n",
169
media_ready_timeout);
170
return -ETIMEDOUT;
171
}
172
173
return 0;
174
}
175
176
/*
177
* Wait up to @media_ready_timeout for the device to report memory
178
* active.
179
*/
180
int cxl_await_media_ready(struct cxl_dev_state *cxlds)
181
{
182
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
183
int d = cxlds->cxl_dvsec;
184
int rc, i, hdm_count;
185
u64 md_status;
186
u16 cap;
187
188
rc = pci_read_config_word(pdev,
189
d + CXL_DVSEC_CAP_OFFSET, &cap);
190
if (rc)
191
return rc;
192
193
hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
194
for (i = 0; i < hdm_count; i++) {
195
rc = cxl_dvsec_mem_range_valid(cxlds, i);
196
if (rc)
197
return rc;
198
}
199
200
for (i = 0; i < hdm_count; i++) {
201
rc = cxl_dvsec_mem_range_active(cxlds, i);
202
if (rc)
203
return rc;
204
}
205
206
md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
207
if (!CXLMDEV_READY(md_status))
208
return -EIO;
209
210
return 0;
211
}
212
EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, "CXL");
213
214
static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val)
215
{
216
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
217
int d = cxlds->cxl_dvsec;
218
u16 ctrl;
219
int rc;
220
221
rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
222
if (rc < 0)
223
return rc;
224
225
if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val)
226
return 1;
227
ctrl &= ~CXL_DVSEC_MEM_ENABLE;
228
ctrl |= val;
229
230
rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl);
231
if (rc < 0)
232
return rc;
233
234
return 0;
235
}
236
237
static void clear_mem_enable(void *cxlds)
238
{
239
cxl_set_mem_enable(cxlds, 0);
240
}
241
242
static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
243
{
244
int rc;
245
246
rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE);
247
if (rc < 0)
248
return rc;
249
if (rc > 0)
250
return 0;
251
return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
252
}
253
254
/* require dvsec ranges to be covered by a locked platform window */
255
static int dvsec_range_allowed(struct device *dev, const void *arg)
256
{
257
const struct range *dev_range = arg;
258
struct cxl_decoder *cxld;
259
260
if (!is_root_decoder(dev))
261
return 0;
262
263
cxld = to_cxl_decoder(dev);
264
265
if (!(cxld->flags & CXL_DECODER_F_RAM))
266
return 0;
267
268
return range_contains(&cxld->hpa_range, dev_range);
269
}
270
271
static void disable_hdm(void *_cxlhdm)
272
{
273
u32 global_ctrl;
274
struct cxl_hdm *cxlhdm = _cxlhdm;
275
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
276
277
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
278
writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE,
279
hdm + CXL_HDM_DECODER_CTRL_OFFSET);
280
}
281
282
static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
283
{
284
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
285
u32 global_ctrl;
286
287
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
288
writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
289
hdm + CXL_HDM_DECODER_CTRL_OFFSET);
290
291
return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
292
}
293
294
int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
295
struct cxl_endpoint_dvsec_info *info)
296
{
297
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
298
struct device *dev = cxlds->dev;
299
int hdm_count, rc, i, ranges = 0;
300
int d = cxlds->cxl_dvsec;
301
u16 cap, ctrl;
302
303
if (!d) {
304
dev_dbg(dev, "No DVSEC Capability\n");
305
return -ENXIO;
306
}
307
308
rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
309
if (rc)
310
return rc;
311
312
if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
313
dev_dbg(dev, "Not MEM Capable\n");
314
return -ENXIO;
315
}
316
317
/*
318
* It is not allowed by spec for MEM.capable to be set and have 0 legacy
319
* HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
320
* driver is for a spec defined class code which must be CXL.mem
321
* capable, there is no point in continuing to enable CXL.mem.
322
*/
323
hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
324
if (!hdm_count || hdm_count > 2)
325
return -EINVAL;
326
327
/*
328
* The current DVSEC values are moot if the memory capability is
329
* disabled, and they will remain moot after the HDM Decoder
330
* capability is enabled.
331
*/
332
rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
333
if (rc)
334
return rc;
335
336
info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
337
if (!info->mem_enabled)
338
return 0;
339
340
for (i = 0; i < hdm_count; i++) {
341
u64 base, size;
342
u32 temp;
343
344
rc = cxl_dvsec_mem_range_valid(cxlds, i);
345
if (rc)
346
return rc;
347
348
rc = pci_read_config_dword(
349
pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
350
if (rc)
351
return rc;
352
353
size = (u64)temp << 32;
354
355
rc = pci_read_config_dword(
356
pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
357
if (rc)
358
return rc;
359
360
size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
361
if (!size) {
362
continue;
363
}
364
365
rc = pci_read_config_dword(
366
pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
367
if (rc)
368
return rc;
369
370
base = (u64)temp << 32;
371
372
rc = pci_read_config_dword(
373
pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
374
if (rc)
375
return rc;
376
377
base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
378
379
info->dvsec_range[ranges++] = (struct range) {
380
.start = base,
381
.end = base + size - 1
382
};
383
}
384
385
info->ranges = ranges;
386
387
return 0;
388
}
389
EXPORT_SYMBOL_NS_GPL(cxl_dvsec_rr_decode, "CXL");
390
391
/**
392
* cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
393
* @cxlds: Device state
394
* @cxlhdm: Mapped HDM decoder Capability
395
* @info: Cached DVSEC range registers info
396
*
397
* Try to enable the endpoint's HDM Decoder Capability
398
*/
399
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
400
struct cxl_endpoint_dvsec_info *info)
401
{
402
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
403
struct cxl_port *port = cxlhdm->port;
404
struct device *dev = cxlds->dev;
405
struct cxl_port *root;
406
int i, rc, allowed;
407
u32 global_ctrl = 0;
408
409
if (hdm)
410
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
411
412
/*
413
* If the HDM Decoder Capability is already enabled then assume
414
* that some other agent like platform firmware set it up.
415
*/
416
if (global_ctrl & CXL_HDM_DECODER_ENABLE || (!hdm && info->mem_enabled))
417
return devm_cxl_enable_mem(&port->dev, cxlds);
418
419
/*
420
* If the HDM Decoder Capability does not exist and DVSEC was
421
* not setup, the DVSEC based emulation cannot be used.
422
*/
423
if (!hdm)
424
return -ENODEV;
425
426
/* The HDM Decoder Capability exists but is globally disabled. */
427
428
/*
429
* If the DVSEC CXL Range registers are not enabled, just
430
* enable and use the HDM Decoder Capability registers.
431
*/
432
if (!info->mem_enabled) {
433
rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
434
if (rc)
435
return rc;
436
437
return devm_cxl_enable_mem(&port->dev, cxlds);
438
}
439
440
/*
441
* Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
442
* [High,Low] when HDM operation is enabled the range register values
443
* are ignored by the device, but the spec also recommends matching the
444
* DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
445
* are expected even though Linux does not require or maintain that
446
* match. Check if at least one DVSEC range is enabled and allowed by
447
* the platform. That is, the DVSEC range must be covered by a locked
448
* platform window (CFMWS). Fail otherwise as the endpoint's decoders
449
* cannot be used.
450
*/
451
452
root = to_cxl_port(port->dev.parent);
453
while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
454
root = to_cxl_port(root->dev.parent);
455
if (!is_cxl_root(root)) {
456
dev_err(dev, "Failed to acquire root port for HDM enable\n");
457
return -ENODEV;
458
}
459
460
for (i = 0, allowed = 0; i < info->ranges; i++) {
461
struct device *cxld_dev;
462
463
cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
464
dvsec_range_allowed);
465
if (!cxld_dev) {
466
dev_dbg(dev, "DVSEC Range%d denied by platform\n", i);
467
continue;
468
}
469
dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i);
470
put_device(cxld_dev);
471
allowed++;
472
}
473
474
if (!allowed) {
475
dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n");
476
return -ENXIO;
477
}
478
479
return 0;
480
}
481
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, "CXL");
482
483
#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff
484
#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0
485
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00
486
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0
487
#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000
488
#define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff
489
#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
490
491
#define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \
492
(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \
493
CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \
494
FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \
495
CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \
496
FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
497
498
static int cxl_cdat_get_length(struct device *dev,
499
struct pci_doe_mb *doe_mb,
500
size_t *length)
501
{
502
__le32 request = CDAT_DOE_REQ(0);
503
__le32 response[2];
504
int rc;
505
506
rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL,
507
CXL_DOE_PROTOCOL_TABLE_ACCESS,
508
&request, sizeof(request),
509
&response, sizeof(response));
510
if (rc < 0) {
511
dev_err(dev, "DOE failed: %d", rc);
512
return rc;
513
}
514
if (rc < sizeof(response))
515
return -EIO;
516
517
*length = le32_to_cpu(response[1]);
518
dev_dbg(dev, "CDAT length %zu\n", *length);
519
520
return 0;
521
}
522
523
static int cxl_cdat_read_table(struct device *dev,
524
struct pci_doe_mb *doe_mb,
525
struct cdat_doe_rsp *rsp, size_t *length)
526
{
527
size_t received, remaining = *length;
528
unsigned int entry_handle = 0;
529
union cdat_data *data;
530
__le32 saved_dw = 0;
531
532
do {
533
__le32 request = CDAT_DOE_REQ(entry_handle);
534
int rc;
535
536
rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL,
537
CXL_DOE_PROTOCOL_TABLE_ACCESS,
538
&request, sizeof(request),
539
rsp, sizeof(*rsp) + remaining);
540
if (rc < 0) {
541
dev_err(dev, "DOE failed: %d", rc);
542
return rc;
543
}
544
545
if (rc < sizeof(*rsp))
546
return -EIO;
547
548
data = (union cdat_data *)rsp->data;
549
received = rc - sizeof(*rsp);
550
551
if (entry_handle == 0) {
552
if (received != sizeof(data->header))
553
return -EIO;
554
} else {
555
if (received < sizeof(data->entry) ||
556
received != le16_to_cpu(data->entry.length))
557
return -EIO;
558
}
559
560
/* Get the CXL table access header entry handle */
561
entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
562
le32_to_cpu(rsp->doe_header));
563
564
/*
565
* Table Access Response Header overwrote the last DW of
566
* previous entry, so restore that DW
567
*/
568
rsp->doe_header = saved_dw;
569
remaining -= received;
570
rsp = (void *)rsp + received;
571
saved_dw = rsp->doe_header;
572
} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
573
574
/* Length in CDAT header may exceed concatenation of CDAT entries */
575
*length -= remaining;
576
577
return 0;
578
}
579
580
static unsigned char cdat_checksum(void *buf, size_t size)
581
{
582
unsigned char sum, *data = buf;
583
size_t i;
584
585
for (sum = 0, i = 0; i < size; i++)
586
sum += data[i];
587
return sum;
588
}
589
590
/**
591
* read_cdat_data - Read the CDAT data on this port
592
* @port: Port to read data from
593
*
594
* This call will sleep waiting for responses from the DOE mailbox.
595
*/
596
void read_cdat_data(struct cxl_port *port)
597
{
598
struct device *uport = port->uport_dev;
599
struct device *dev = &port->dev;
600
struct pci_doe_mb *doe_mb;
601
struct pci_dev *pdev = NULL;
602
struct cxl_memdev *cxlmd;
603
struct cdat_doe_rsp *buf;
604
size_t table_length, length;
605
int rc;
606
607
if (is_cxl_memdev(uport)) {
608
struct device *host;
609
610
cxlmd = to_cxl_memdev(uport);
611
host = cxlmd->dev.parent;
612
if (dev_is_pci(host))
613
pdev = to_pci_dev(host);
614
} else if (dev_is_pci(uport)) {
615
pdev = to_pci_dev(uport);
616
}
617
618
if (!pdev)
619
return;
620
621
doe_mb = pci_find_doe_mailbox(pdev, PCI_VENDOR_ID_CXL,
622
CXL_DOE_PROTOCOL_TABLE_ACCESS);
623
if (!doe_mb) {
624
dev_dbg(dev, "No CDAT mailbox\n");
625
return;
626
}
627
628
port->cdat_available = true;
629
630
if (cxl_cdat_get_length(dev, doe_mb, &length)) {
631
dev_dbg(dev, "No CDAT length\n");
632
return;
633
}
634
635
/*
636
* The begin of the CDAT buffer needs space for additional 4
637
* bytes for the DOE header. Table data starts afterwards.
638
*/
639
buf = devm_kzalloc(dev, sizeof(*buf) + length, GFP_KERNEL);
640
if (!buf)
641
goto err;
642
643
table_length = length;
644
645
rc = cxl_cdat_read_table(dev, doe_mb, buf, &length);
646
if (rc)
647
goto err;
648
649
if (table_length != length)
650
dev_warn(dev, "Malformed CDAT table length (%zu:%zu), discarding trailing data\n",
651
table_length, length);
652
653
if (cdat_checksum(buf->data, length))
654
goto err;
655
656
port->cdat.table = buf->data;
657
port->cdat.length = length;
658
659
return;
660
err:
661
/* Don't leave table data allocated on error */
662
devm_kfree(dev, buf);
663
dev_err(dev, "Failed to read/validate CDAT.\n");
664
}
665
EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
666
667
static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
668
void __iomem *ras_base)
669
{
670
void __iomem *addr;
671
u32 status;
672
673
if (!ras_base)
674
return;
675
676
addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
677
status = readl(addr);
678
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
679
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
680
trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
681
}
682
}
683
684
static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
685
{
686
return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
687
}
688
689
/* CXL spec rev3.0 8.2.4.16.1 */
690
static void header_log_copy(void __iomem *ras_base, u32 *log)
691
{
692
void __iomem *addr;
693
u32 *log_addr;
694
int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
695
696
addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET;
697
log_addr = log;
698
699
for (i = 0; i < log_u32_size; i++) {
700
*log_addr = readl(addr);
701
log_addr++;
702
addr += sizeof(u32);
703
}
704
}
705
706
/*
707
* Log the state of the RAS status registers and prepare them to log the
708
* next error status. Return 1 if reset needed.
709
*/
710
static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
711
void __iomem *ras_base)
712
{
713
u32 hl[CXL_HEADERLOG_SIZE_U32];
714
void __iomem *addr;
715
u32 status;
716
u32 fe;
717
718
if (!ras_base)
719
return false;
720
721
addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
722
status = readl(addr);
723
if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
724
return false;
725
726
/* If multiple errors, log header points to first error from ctrl reg */
727
if (hweight32(status) > 1) {
728
void __iomem *rcc_addr =
729
ras_base + CXL_RAS_CAP_CONTROL_OFFSET;
730
731
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
732
readl(rcc_addr)));
733
} else {
734
fe = status;
735
}
736
737
header_log_copy(ras_base, hl);
738
trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
739
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
740
741
return true;
742
}
743
744
static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
745
{
746
return __cxl_handle_ras(cxlds, cxlds->regs.ras);
747
}
748
749
#ifdef CONFIG_PCIEAER_CXL
750
751
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
752
{
753
resource_size_t aer_phys;
754
struct device *host;
755
u16 aer_cap;
756
757
aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
758
if (aer_cap) {
759
host = dport->reg_map.host;
760
aer_phys = aer_cap + dport->rcrb.base;
761
dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
762
sizeof(struct aer_capability_regs));
763
}
764
}
765
766
static void cxl_dport_map_ras(struct cxl_dport *dport)
767
{
768
struct cxl_register_map *map = &dport->reg_map;
769
struct device *dev = dport->dport_dev;
770
771
if (!map->component_map.ras.valid)
772
dev_dbg(dev, "RAS registers not found\n");
773
else if (cxl_map_component_regs(map, &dport->regs.component,
774
BIT(CXL_CM_CAP_CAP_ID_RAS)))
775
dev_dbg(dev, "Failed to map RAS capability.\n");
776
}
777
778
static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
779
{
780
void __iomem *aer_base = dport->regs.dport_aer;
781
u32 aer_cmd_mask, aer_cmd;
782
783
if (!aer_base)
784
return;
785
786
/*
787
* Disable RCH root port command interrupts.
788
* CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
789
*
790
* This sequence may not be necessary. CXL spec states disabling
791
* the root cmd register's interrupts is required. But, PCI spec
792
* shows these are disabled by default on reset.
793
*/
794
aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
795
PCI_ERR_ROOT_CMD_NONFATAL_EN |
796
PCI_ERR_ROOT_CMD_FATAL_EN);
797
aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
798
aer_cmd &= ~aer_cmd_mask;
799
writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
800
}
801
802
/**
803
* cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
804
* @dport: the cxl_dport that needs to be initialized
805
* @host: host device for devm operations
806
*/
807
void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
808
{
809
dport->reg_map.host = host;
810
cxl_dport_map_ras(dport);
811
812
if (dport->rch) {
813
struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
814
815
if (!host_bridge->native_aer)
816
return;
817
818
cxl_dport_map_rch_aer(dport);
819
cxl_disable_rch_root_ints(dport);
820
}
821
}
822
EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
823
824
static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
825
struct cxl_dport *dport)
826
{
827
return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
828
}
829
830
static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
831
struct cxl_dport *dport)
832
{
833
return __cxl_handle_ras(cxlds, dport->regs.ras);
834
}
835
836
/*
837
* Copy the AER capability registers using 32 bit read accesses.
838
* This is necessary because RCRB AER capability is MMIO mapped. Clear the
839
* status after copying.
840
*
841
* @aer_base: base address of AER capability block in RCRB
842
* @aer_regs: destination for copying AER capability
843
*/
844
static bool cxl_rch_get_aer_info(void __iomem *aer_base,
845
struct aer_capability_regs *aer_regs)
846
{
847
int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
848
u32 *aer_regs_buf = (u32 *)aer_regs;
849
int n;
850
851
if (!aer_base)
852
return false;
853
854
/* Use readl() to guarantee 32-bit accesses */
855
for (n = 0; n < read_cnt; n++)
856
aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
857
858
writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
859
writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
860
861
return true;
862
}
863
864
/* Get AER severity. Return false if there is no error. */
865
static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
866
int *severity)
867
{
868
if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
869
if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
870
*severity = AER_FATAL;
871
else
872
*severity = AER_NONFATAL;
873
return true;
874
}
875
876
if (aer_regs->cor_status & ~aer_regs->cor_mask) {
877
*severity = AER_CORRECTABLE;
878
return true;
879
}
880
881
return false;
882
}
883
884
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
885
{
886
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
887
struct aer_capability_regs aer_regs;
888
struct cxl_dport *dport;
889
int severity;
890
891
struct cxl_port *port __free(put_cxl_port) =
892
cxl_pci_find_port(pdev, &dport);
893
if (!port)
894
return;
895
896
if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
897
return;
898
899
if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
900
return;
901
902
pci_print_aer(pdev, severity, &aer_regs);
903
904
if (severity == AER_CORRECTABLE)
905
cxl_handle_rdport_cor_ras(cxlds, dport);
906
else
907
cxl_handle_rdport_ras(cxlds, dport);
908
}
909
910
#else
911
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
912
#endif
913
914
void cxl_cor_error_detected(struct pci_dev *pdev)
915
{
916
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
917
struct device *dev = &cxlds->cxlmd->dev;
918
919
scoped_guard(device, dev) {
920
if (!dev->driver) {
921
dev_warn(&pdev->dev,
922
"%s: memdev disabled, abort error handling\n",
923
dev_name(dev));
924
return;
925
}
926
927
if (cxlds->rcd)
928
cxl_handle_rdport_errors(cxlds);
929
930
cxl_handle_endpoint_cor_ras(cxlds);
931
}
932
}
933
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
934
935
pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
936
pci_channel_state_t state)
937
{
938
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
939
struct cxl_memdev *cxlmd = cxlds->cxlmd;
940
struct device *dev = &cxlmd->dev;
941
bool ue;
942
943
scoped_guard(device, dev) {
944
if (!dev->driver) {
945
dev_warn(&pdev->dev,
946
"%s: memdev disabled, abort error handling\n",
947
dev_name(dev));
948
return PCI_ERS_RESULT_DISCONNECT;
949
}
950
951
if (cxlds->rcd)
952
cxl_handle_rdport_errors(cxlds);
953
/*
954
* A frozen channel indicates an impending reset which is fatal to
955
* CXL.mem operation, and will likely crash the system. On the off
956
* chance the situation is recoverable dump the status of the RAS
957
* capability registers and bounce the active state of the memdev.
958
*/
959
ue = cxl_handle_endpoint_ras(cxlds);
960
}
961
962
963
switch (state) {
964
case pci_channel_io_normal:
965
if (ue) {
966
device_release_driver(dev);
967
return PCI_ERS_RESULT_NEED_RESET;
968
}
969
return PCI_ERS_RESULT_CAN_RECOVER;
970
case pci_channel_io_frozen:
971
dev_warn(&pdev->dev,
972
"%s: frozen state error detected, disable CXL.mem\n",
973
dev_name(dev));
974
device_release_driver(dev);
975
return PCI_ERS_RESULT_NEED_RESET;
976
case pci_channel_io_perm_failure:
977
dev_warn(&pdev->dev,
978
"failure state error detected, request disconnect\n");
979
return PCI_ERS_RESULT_DISCONNECT;
980
}
981
return PCI_ERS_RESULT_NEED_RESET;
982
}
983
EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL");
984
985
static int cxl_flit_size(struct pci_dev *pdev)
986
{
987
if (cxl_pci_flit_256(pdev))
988
return 256;
989
990
return 68;
991
}
992
993
/**
994
* cxl_pci_get_latency - calculate the link latency for the PCIe link
995
* @pdev: PCI device
996
*
997
* return: calculated latency or 0 for no latency
998
*
999
* CXL Memory Device SW Guide v1.0 2.11.4 Link latency calculation
1000
* Link latency = LinkPropagationLatency + FlitLatency + RetimerLatency
1001
* LinkProgationLatency is negligible, so 0 will be used
1002
* RetimerLatency is assumed to be negligible and 0 will be used
1003
* FlitLatency = FlitSize / LinkBandwidth
1004
* FlitSize is defined by spec. CXL rev3.0 4.2.1.
1005
* 68B flit is used up to 32GT/s. >32GT/s, 256B flit size is used.
1006
* The FlitLatency is converted to picoseconds.
1007
*/
1008
long cxl_pci_get_latency(struct pci_dev *pdev)
1009
{
1010
long bw;
1011
1012
bw = pcie_link_speed_mbps(pdev);
1013
if (bw < 0)
1014
return 0;
1015
bw /= BITS_PER_BYTE;
1016
1017
return cxl_flit_size(pdev) * MEGA / bw;
1018
}
1019
1020
static int __cxl_endpoint_decoder_reset_detected(struct device *dev, void *data)
1021
{
1022
struct cxl_port *port = data;
1023
struct cxl_decoder *cxld;
1024
struct cxl_hdm *cxlhdm;
1025
void __iomem *hdm;
1026
u32 ctrl;
1027
1028
if (!is_endpoint_decoder(dev))
1029
return 0;
1030
1031
cxld = to_cxl_decoder(dev);
1032
if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
1033
return 0;
1034
1035
cxlhdm = dev_get_drvdata(&port->dev);
1036
hdm = cxlhdm->regs.hdm_decoder;
1037
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id));
1038
1039
return !FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl);
1040
}
1041
1042
bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
1043
{
1044
return device_for_each_child(&port->dev, port,
1045
__cxl_endpoint_decoder_reset_detected);
1046
}
1047
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL");
1048
1049
int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
1050
{
1051
int speed, bw;
1052
u16 lnksta;
1053
u32 width;
1054
1055
speed = pcie_link_speed_mbps(pdev);
1056
if (speed < 0)
1057
return speed;
1058
speed /= BITS_PER_BYTE;
1059
1060
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
1061
width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta);
1062
bw = speed * width;
1063
1064
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
1065
c[i].read_bandwidth = bw;
1066
c[i].write_bandwidth = bw;
1067
}
1068
1069
return 0;
1070
}
1071
1072
/*
1073
* Set max timeout such that platforms will optimize GPF flow to avoid
1074
* the implied worst-case scenario delays. On a sane platform, all
1075
* devices should always complete GPF within the energy budget of
1076
* the GPF flow. The kernel does not have enough information to pick
1077
* anything better than "maximize timeouts and hope it works".
1078
*
1079
* A misbehaving device could block forward progress of GPF for all
1080
* the other devices, exhausting the energy budget of the platform.
1081
* However, the spec seems to assume that moving on from slow to respond
1082
* devices is a virtue. It is not possible to know that, in actuality,
1083
* the slow to respond device is *the* most critical device in the
1084
* system to wait.
1085
*/
1086
#define GPF_TIMEOUT_BASE_MAX 2
1087
#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */
1088
1089
u16 cxl_gpf_get_dvsec(struct device *dev)
1090
{
1091
struct pci_dev *pdev;
1092
bool is_port = true;
1093
u16 dvsec;
1094
1095
if (!dev_is_pci(dev))
1096
return 0;
1097
1098
pdev = to_pci_dev(dev);
1099
if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT)
1100
is_port = false;
1101
1102
dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
1103
is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF);
1104
if (!dvsec)
1105
dev_warn(dev, "%s GPF DVSEC not present\n",
1106
is_port ? "Port" : "Device");
1107
return dvsec;
1108
}
1109
EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL");
1110
1111
static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase)
1112
{
1113
u64 base, scale;
1114
int rc, offset;
1115
u16 ctrl;
1116
1117
switch (phase) {
1118
case 1:
1119
offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET;
1120
base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK;
1121
scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK;
1122
break;
1123
case 2:
1124
offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET;
1125
base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK;
1126
scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK;
1127
break;
1128
default:
1129
return -EINVAL;
1130
}
1131
1132
rc = pci_read_config_word(pdev, dvsec + offset, &ctrl);
1133
if (rc)
1134
return rc;
1135
1136
if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX &&
1137
FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX)
1138
return 0;
1139
1140
ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX);
1141
ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX);
1142
1143
rc = pci_write_config_word(pdev, dvsec + offset, ctrl);
1144
if (!rc)
1145
pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n",
1146
phase, GPF_TIMEOUT_BASE_MAX);
1147
1148
return rc;
1149
}
1150
1151
int cxl_gpf_port_setup(struct cxl_dport *dport)
1152
{
1153
if (!dport)
1154
return -EINVAL;
1155
1156
if (!dport->gpf_dvsec) {
1157
struct pci_dev *pdev;
1158
int dvsec;
1159
1160
dvsec = cxl_gpf_get_dvsec(dport->dport_dev);
1161
if (!dvsec)
1162
return -EINVAL;
1163
1164
dport->gpf_dvsec = dvsec;
1165
pdev = to_pci_dev(dport->dport_dev);
1166
update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1);
1167
update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2);
1168
}
1169
1170
return 0;
1171
}
1172
1173