Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/cxl/core/pci.c
50682 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3
#include <linux/units.h>
4
#include <linux/io-64-nonatomic-lo-hi.h>
5
#include <linux/device.h>
6
#include <linux/delay.h>
7
#include <linux/pci.h>
8
#include <linux/pci-doe.h>
9
#include <linux/aer.h>
10
#include <cxlpci.h>
11
#include <cxlmem.h>
12
#include <cxl.h>
13
#include "core.h"
14
#include "trace.h"
15
16
/**
17
* DOC: cxl core pci
18
*
19
* Compute Express Link protocols are layered on top of PCIe. CXL core provides
20
* a set of helpers for CXL interactions which occur via PCIe.
21
*/
22
23
static unsigned short media_ready_timeout = 60;
24
module_param(media_ready_timeout, ushort, 0644);
25
MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready");
26
27
static int pci_get_port_num(struct pci_dev *pdev)
28
{
29
u32 lnkcap;
30
int type;
31
32
type = pci_pcie_type(pdev);
33
if (type != PCI_EXP_TYPE_DOWNSTREAM && type != PCI_EXP_TYPE_ROOT_PORT)
34
return -EINVAL;
35
36
if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
37
&lnkcap))
38
return -ENXIO;
39
40
return FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
41
}
42
43
/**
44
* __devm_cxl_add_dport_by_dev - allocate a dport by dport device
45
* @port: cxl_port that hosts the dport
46
* @dport_dev: 'struct device' of the dport
47
*
48
* Returns the allocated dport on success or ERR_PTR() of -errno on error
49
*/
50
struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port,
51
struct device *dport_dev)
52
{
53
struct cxl_register_map map;
54
struct pci_dev *pdev;
55
int port_num, rc;
56
57
if (!dev_is_pci(dport_dev))
58
return ERR_PTR(-EINVAL);
59
60
pdev = to_pci_dev(dport_dev);
61
port_num = pci_get_port_num(pdev);
62
if (port_num < 0)
63
return ERR_PTR(port_num);
64
65
rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
66
if (rc)
67
return ERR_PTR(rc);
68
69
device_lock_assert(&port->dev);
70
return devm_cxl_add_dport(port, dport_dev, port_num, map.resource);
71
}
72
EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL");
73
74
static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
75
{
76
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
77
int d = cxlds->cxl_dvsec;
78
bool valid = false;
79
int rc, i;
80
u32 temp;
81
82
if (id > CXL_DVSEC_RANGE_MAX)
83
return -EINVAL;
84
85
/* Check MEM INFO VALID bit first, give up after 1s */
86
i = 1;
87
do {
88
rc = pci_read_config_dword(pdev,
89
d + CXL_DVSEC_RANGE_SIZE_LOW(id),
90
&temp);
91
if (rc)
92
return rc;
93
94
valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp);
95
if (valid)
96
break;
97
msleep(1000);
98
} while (i--);
99
100
if (!valid) {
101
dev_err(&pdev->dev,
102
"Timeout awaiting memory range %d valid after 1s.\n",
103
id);
104
return -ETIMEDOUT;
105
}
106
107
return 0;
108
}
109
110
static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)
111
{
112
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
113
int d = cxlds->cxl_dvsec;
114
bool active = false;
115
int rc, i;
116
u32 temp;
117
118
if (id > CXL_DVSEC_RANGE_MAX)
119
return -EINVAL;
120
121
/* Check MEM ACTIVE bit, up to 60s timeout by default */
122
for (i = media_ready_timeout; i; i--) {
123
rc = pci_read_config_dword(
124
pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp);
125
if (rc)
126
return rc;
127
128
active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
129
if (active)
130
break;
131
msleep(1000);
132
}
133
134
if (!active) {
135
dev_err(&pdev->dev,
136
"timeout awaiting memory active after %d seconds\n",
137
media_ready_timeout);
138
return -ETIMEDOUT;
139
}
140
141
return 0;
142
}
143
144
/*
145
* Wait up to @media_ready_timeout for the device to report memory
146
* active.
147
*/
148
int cxl_await_media_ready(struct cxl_dev_state *cxlds)
149
{
150
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
151
int d = cxlds->cxl_dvsec;
152
int rc, i, hdm_count;
153
u64 md_status;
154
u16 cap;
155
156
rc = pci_read_config_word(pdev,
157
d + CXL_DVSEC_CAP_OFFSET, &cap);
158
if (rc)
159
return rc;
160
161
hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
162
for (i = 0; i < hdm_count; i++) {
163
rc = cxl_dvsec_mem_range_valid(cxlds, i);
164
if (rc)
165
return rc;
166
}
167
168
for (i = 0; i < hdm_count; i++) {
169
rc = cxl_dvsec_mem_range_active(cxlds, i);
170
if (rc)
171
return rc;
172
}
173
174
md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
175
if (!CXLMDEV_READY(md_status))
176
return -EIO;
177
178
return 0;
179
}
180
EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, "CXL");
181
182
static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val)
183
{
184
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
185
int d = cxlds->cxl_dvsec;
186
u16 ctrl;
187
int rc;
188
189
rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
190
if (rc < 0)
191
return rc;
192
193
if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val)
194
return 1;
195
ctrl &= ~CXL_DVSEC_MEM_ENABLE;
196
ctrl |= val;
197
198
rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl);
199
if (rc < 0)
200
return rc;
201
202
return 0;
203
}
204
205
static void clear_mem_enable(void *cxlds)
206
{
207
cxl_set_mem_enable(cxlds, 0);
208
}
209
210
static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
211
{
212
int rc;
213
214
rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE);
215
if (rc < 0)
216
return rc;
217
if (rc > 0)
218
return 0;
219
return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
220
}
221
222
/* require dvsec ranges to be covered by a locked platform window */
223
static int dvsec_range_allowed(struct device *dev, const void *arg)
224
{
225
const struct range *dev_range = arg;
226
struct cxl_decoder *cxld;
227
228
if (!is_root_decoder(dev))
229
return 0;
230
231
cxld = to_cxl_decoder(dev);
232
233
if (!(cxld->flags & CXL_DECODER_F_RAM))
234
return 0;
235
236
return range_contains(&cxld->hpa_range, dev_range);
237
}
238
239
static void disable_hdm(void *_cxlhdm)
240
{
241
u32 global_ctrl;
242
struct cxl_hdm *cxlhdm = _cxlhdm;
243
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
244
245
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
246
writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE,
247
hdm + CXL_HDM_DECODER_CTRL_OFFSET);
248
}
249
250
static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
251
{
252
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
253
u32 global_ctrl;
254
255
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
256
writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
257
hdm + CXL_HDM_DECODER_CTRL_OFFSET);
258
259
return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
260
}
261
262
int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
263
struct cxl_endpoint_dvsec_info *info)
264
{
265
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
266
struct device *dev = cxlds->dev;
267
int hdm_count, rc, i, ranges = 0;
268
int d = cxlds->cxl_dvsec;
269
u16 cap, ctrl;
270
271
if (!d) {
272
dev_dbg(dev, "No DVSEC Capability\n");
273
return -ENXIO;
274
}
275
276
rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
277
if (rc)
278
return rc;
279
280
if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
281
dev_dbg(dev, "Not MEM Capable\n");
282
return -ENXIO;
283
}
284
285
/*
286
* It is not allowed by spec for MEM.capable to be set and have 0 legacy
287
* HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
288
* driver is for a spec defined class code which must be CXL.mem
289
* capable, there is no point in continuing to enable CXL.mem.
290
*/
291
hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
292
if (!hdm_count || hdm_count > 2)
293
return -EINVAL;
294
295
/*
296
* The current DVSEC values are moot if the memory capability is
297
* disabled, and they will remain moot after the HDM Decoder
298
* capability is enabled.
299
*/
300
rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
301
if (rc)
302
return rc;
303
304
info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
305
if (!info->mem_enabled)
306
return 0;
307
308
for (i = 0; i < hdm_count; i++) {
309
u64 base, size;
310
u32 temp;
311
312
rc = cxl_dvsec_mem_range_valid(cxlds, i);
313
if (rc)
314
return rc;
315
316
rc = pci_read_config_dword(
317
pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
318
if (rc)
319
return rc;
320
321
size = (u64)temp << 32;
322
323
rc = pci_read_config_dword(
324
pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
325
if (rc)
326
return rc;
327
328
size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
329
if (!size) {
330
continue;
331
}
332
333
rc = pci_read_config_dword(
334
pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
335
if (rc)
336
return rc;
337
338
base = (u64)temp << 32;
339
340
rc = pci_read_config_dword(
341
pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
342
if (rc)
343
return rc;
344
345
base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
346
347
info->dvsec_range[ranges++] = (struct range) {
348
.start = base,
349
.end = base + size - 1
350
};
351
}
352
353
info->ranges = ranges;
354
355
return 0;
356
}
357
EXPORT_SYMBOL_NS_GPL(cxl_dvsec_rr_decode, "CXL");
358
359
/**
360
* cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
361
* @cxlds: Device state
362
* @cxlhdm: Mapped HDM decoder Capability
363
* @info: Cached DVSEC range registers info
364
*
365
* Try to enable the endpoint's HDM Decoder Capability
366
*/
367
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
368
struct cxl_endpoint_dvsec_info *info)
369
{
370
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
371
struct cxl_port *port = cxlhdm->port;
372
struct device *dev = cxlds->dev;
373
struct cxl_port *root;
374
int i, rc, allowed;
375
u32 global_ctrl = 0;
376
377
if (hdm)
378
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
379
380
/*
381
* If the HDM Decoder Capability is already enabled then assume
382
* that some other agent like platform firmware set it up.
383
*/
384
if (global_ctrl & CXL_HDM_DECODER_ENABLE || (!hdm && info->mem_enabled))
385
return devm_cxl_enable_mem(&port->dev, cxlds);
386
387
/*
388
* If the HDM Decoder Capability does not exist and DVSEC was
389
* not setup, the DVSEC based emulation cannot be used.
390
*/
391
if (!hdm)
392
return -ENODEV;
393
394
/* The HDM Decoder Capability exists but is globally disabled. */
395
396
/*
397
* If the DVSEC CXL Range registers are not enabled, just
398
* enable and use the HDM Decoder Capability registers.
399
*/
400
if (!info->mem_enabled) {
401
rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
402
if (rc)
403
return rc;
404
405
return devm_cxl_enable_mem(&port->dev, cxlds);
406
}
407
408
/*
409
* Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
410
* [High,Low] when HDM operation is enabled the range register values
411
* are ignored by the device, but the spec also recommends matching the
412
* DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
413
* are expected even though Linux does not require or maintain that
414
* match. Check if at least one DVSEC range is enabled and allowed by
415
* the platform. That is, the DVSEC range must be covered by a locked
416
* platform window (CFMWS). Fail otherwise as the endpoint's decoders
417
* cannot be used.
418
*/
419
420
root = to_cxl_port(port->dev.parent);
421
while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
422
root = to_cxl_port(root->dev.parent);
423
if (!is_cxl_root(root)) {
424
dev_err(dev, "Failed to acquire root port for HDM enable\n");
425
return -ENODEV;
426
}
427
428
for (i = 0, allowed = 0; i < info->ranges; i++) {
429
struct device *cxld_dev;
430
431
cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
432
dvsec_range_allowed);
433
if (!cxld_dev) {
434
dev_dbg(dev, "DVSEC Range%d denied by platform\n", i);
435
continue;
436
}
437
dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i);
438
put_device(cxld_dev);
439
allowed++;
440
}
441
442
if (!allowed) {
443
dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n");
444
return -ENXIO;
445
}
446
447
return 0;
448
}
449
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, "CXL");
450
451
#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff
452
#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0
453
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00
454
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0
455
#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000
456
#define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff
457
#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
458
459
#define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \
460
(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \
461
CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \
462
FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \
463
CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \
464
FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
465
466
static int cxl_cdat_get_length(struct device *dev,
467
struct pci_doe_mb *doe_mb,
468
size_t *length)
469
{
470
__le32 request = CDAT_DOE_REQ(0);
471
__le32 response[2];
472
int rc;
473
474
rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL,
475
CXL_DOE_PROTOCOL_TABLE_ACCESS,
476
&request, sizeof(request),
477
&response, sizeof(response));
478
if (rc < 0) {
479
dev_err(dev, "DOE failed: %d", rc);
480
return rc;
481
}
482
if (rc < sizeof(response))
483
return -EIO;
484
485
*length = le32_to_cpu(response[1]);
486
dev_dbg(dev, "CDAT length %zu\n", *length);
487
488
return 0;
489
}
490
491
static int cxl_cdat_read_table(struct device *dev,
492
struct pci_doe_mb *doe_mb,
493
struct cdat_doe_rsp *rsp, size_t *length)
494
{
495
size_t received, remaining = *length;
496
unsigned int entry_handle = 0;
497
union cdat_data *data;
498
__le32 saved_dw = 0;
499
500
do {
501
__le32 request = CDAT_DOE_REQ(entry_handle);
502
int rc;
503
504
rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL,
505
CXL_DOE_PROTOCOL_TABLE_ACCESS,
506
&request, sizeof(request),
507
rsp, sizeof(*rsp) + remaining);
508
if (rc < 0) {
509
dev_err(dev, "DOE failed: %d", rc);
510
return rc;
511
}
512
513
if (rc < sizeof(*rsp))
514
return -EIO;
515
516
data = (union cdat_data *)rsp->data;
517
received = rc - sizeof(*rsp);
518
519
if (entry_handle == 0) {
520
if (received != sizeof(data->header))
521
return -EIO;
522
} else {
523
if (received < sizeof(data->entry) ||
524
received != le16_to_cpu(data->entry.length))
525
return -EIO;
526
}
527
528
/* Get the CXL table access header entry handle */
529
entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
530
le32_to_cpu(rsp->doe_header));
531
532
/*
533
* Table Access Response Header overwrote the last DW of
534
* previous entry, so restore that DW
535
*/
536
rsp->doe_header = saved_dw;
537
remaining -= received;
538
rsp = (void *)rsp + received;
539
saved_dw = rsp->doe_header;
540
} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
541
542
/* Length in CDAT header may exceed concatenation of CDAT entries */
543
*length -= remaining;
544
545
return 0;
546
}
547
548
static unsigned char cdat_checksum(void *buf, size_t size)
549
{
550
unsigned char sum, *data = buf;
551
size_t i;
552
553
for (sum = 0, i = 0; i < size; i++)
554
sum += data[i];
555
return sum;
556
}
557
558
/**
559
* read_cdat_data - Read the CDAT data on this port
560
* @port: Port to read data from
561
*
562
* This call will sleep waiting for responses from the DOE mailbox.
563
*/
564
void read_cdat_data(struct cxl_port *port)
565
{
566
struct device *uport = port->uport_dev;
567
struct device *dev = &port->dev;
568
struct pci_doe_mb *doe_mb;
569
struct pci_dev *pdev = NULL;
570
struct cxl_memdev *cxlmd;
571
struct cdat_doe_rsp *buf;
572
size_t table_length, length;
573
int rc;
574
575
if (is_cxl_memdev(uport)) {
576
struct device *host;
577
578
cxlmd = to_cxl_memdev(uport);
579
host = cxlmd->dev.parent;
580
if (dev_is_pci(host))
581
pdev = to_pci_dev(host);
582
} else if (dev_is_pci(uport)) {
583
pdev = to_pci_dev(uport);
584
}
585
586
if (!pdev)
587
return;
588
589
doe_mb = pci_find_doe_mailbox(pdev, PCI_VENDOR_ID_CXL,
590
CXL_DOE_PROTOCOL_TABLE_ACCESS);
591
if (!doe_mb) {
592
dev_dbg(dev, "No CDAT mailbox\n");
593
return;
594
}
595
596
port->cdat_available = true;
597
598
if (cxl_cdat_get_length(dev, doe_mb, &length)) {
599
dev_dbg(dev, "No CDAT length\n");
600
return;
601
}
602
603
/*
604
* The begin of the CDAT buffer needs space for additional 4
605
* bytes for the DOE header. Table data starts afterwards.
606
*/
607
buf = devm_kzalloc(dev, sizeof(*buf) + length, GFP_KERNEL);
608
if (!buf)
609
goto err;
610
611
table_length = length;
612
613
rc = cxl_cdat_read_table(dev, doe_mb, buf, &length);
614
if (rc)
615
goto err;
616
617
if (table_length != length)
618
dev_warn(dev, "Malformed CDAT table length (%zu:%zu), discarding trailing data\n",
619
table_length, length);
620
621
if (cdat_checksum(buf->data, length))
622
goto err;
623
624
port->cdat.table = buf->data;
625
port->cdat.length = length;
626
627
return;
628
err:
629
/* Don't leave table data allocated on error */
630
devm_kfree(dev, buf);
631
dev_err(dev, "Failed to read/validate CDAT.\n");
632
}
633
EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
634
635
static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
636
void __iomem *ras_base)
637
{
638
void __iomem *addr;
639
u32 status;
640
641
if (!ras_base)
642
return;
643
644
addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
645
status = readl(addr);
646
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
647
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
648
trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
649
}
650
}
651
652
static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
653
{
654
return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
655
}
656
657
/* CXL spec rev3.0 8.2.4.16.1 */
658
static void header_log_copy(void __iomem *ras_base, u32 *log)
659
{
660
void __iomem *addr;
661
u32 *log_addr;
662
int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
663
664
addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET;
665
log_addr = log;
666
667
for (i = 0; i < log_u32_size; i++) {
668
*log_addr = readl(addr);
669
log_addr++;
670
addr += sizeof(u32);
671
}
672
}
673
674
/*
675
* Log the state of the RAS status registers and prepare them to log the
676
* next error status. Return 1 if reset needed.
677
*/
678
static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
679
void __iomem *ras_base)
680
{
681
u32 hl[CXL_HEADERLOG_SIZE_U32];
682
void __iomem *addr;
683
u32 status;
684
u32 fe;
685
686
if (!ras_base)
687
return false;
688
689
addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
690
status = readl(addr);
691
if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
692
return false;
693
694
/* If multiple errors, log header points to first error from ctrl reg */
695
if (hweight32(status) > 1) {
696
void __iomem *rcc_addr =
697
ras_base + CXL_RAS_CAP_CONTROL_OFFSET;
698
699
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
700
readl(rcc_addr)));
701
} else {
702
fe = status;
703
}
704
705
header_log_copy(ras_base, hl);
706
trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
707
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
708
709
return true;
710
}
711
712
static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
713
{
714
return __cxl_handle_ras(cxlds, cxlds->regs.ras);
715
}
716
717
#ifdef CONFIG_PCIEAER_CXL
718
719
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
720
{
721
resource_size_t aer_phys;
722
struct device *host;
723
u16 aer_cap;
724
725
aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
726
if (aer_cap) {
727
host = dport->reg_map.host;
728
aer_phys = aer_cap + dport->rcrb.base;
729
dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
730
sizeof(struct aer_capability_regs));
731
}
732
}
733
734
static void cxl_dport_map_ras(struct cxl_dport *dport)
735
{
736
struct cxl_register_map *map = &dport->reg_map;
737
struct device *dev = dport->dport_dev;
738
739
if (!map->component_map.ras.valid)
740
dev_dbg(dev, "RAS registers not found\n");
741
else if (cxl_map_component_regs(map, &dport->regs.component,
742
BIT(CXL_CM_CAP_CAP_ID_RAS)))
743
dev_dbg(dev, "Failed to map RAS capability.\n");
744
}
745
746
static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
747
{
748
void __iomem *aer_base = dport->regs.dport_aer;
749
u32 aer_cmd_mask, aer_cmd;
750
751
if (!aer_base)
752
return;
753
754
/*
755
* Disable RCH root port command interrupts.
756
* CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
757
*
758
* This sequence may not be necessary. CXL spec states disabling
759
* the root cmd register's interrupts is required. But, PCI spec
760
* shows these are disabled by default on reset.
761
*/
762
aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
763
PCI_ERR_ROOT_CMD_NONFATAL_EN |
764
PCI_ERR_ROOT_CMD_FATAL_EN);
765
aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
766
aer_cmd &= ~aer_cmd_mask;
767
writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
768
}
769
770
/**
771
* cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
772
* @dport: the cxl_dport that needs to be initialized
773
* @host: host device for devm operations
774
*/
775
void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
776
{
777
dport->reg_map.host = host;
778
cxl_dport_map_ras(dport);
779
780
if (dport->rch) {
781
struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
782
783
if (!host_bridge->native_aer)
784
return;
785
786
cxl_dport_map_rch_aer(dport);
787
cxl_disable_rch_root_ints(dport);
788
}
789
}
790
EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
791
792
static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
793
struct cxl_dport *dport)
794
{
795
return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
796
}
797
798
static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
799
struct cxl_dport *dport)
800
{
801
return __cxl_handle_ras(cxlds, dport->regs.ras);
802
}
803
804
/*
805
* Copy the AER capability registers using 32 bit read accesses.
806
* This is necessary because RCRB AER capability is MMIO mapped. Clear the
807
* status after copying.
808
*
809
* @aer_base: base address of AER capability block in RCRB
810
* @aer_regs: destination for copying AER capability
811
*/
812
static bool cxl_rch_get_aer_info(void __iomem *aer_base,
813
struct aer_capability_regs *aer_regs)
814
{
815
int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
816
u32 *aer_regs_buf = (u32 *)aer_regs;
817
int n;
818
819
if (!aer_base)
820
return false;
821
822
/* Use readl() to guarantee 32-bit accesses */
823
for (n = 0; n < read_cnt; n++)
824
aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
825
826
writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
827
writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
828
829
return true;
830
}
831
832
/* Get AER severity. Return false if there is no error. */
833
static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
834
int *severity)
835
{
836
if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
837
if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
838
*severity = AER_FATAL;
839
else
840
*severity = AER_NONFATAL;
841
return true;
842
}
843
844
if (aer_regs->cor_status & ~aer_regs->cor_mask) {
845
*severity = AER_CORRECTABLE;
846
return true;
847
}
848
849
return false;
850
}
851
852
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
853
{
854
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
855
struct aer_capability_regs aer_regs;
856
struct cxl_dport *dport;
857
int severity;
858
859
struct cxl_port *port __free(put_cxl_port) =
860
cxl_pci_find_port(pdev, &dport);
861
if (!port)
862
return;
863
864
if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
865
return;
866
867
if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
868
return;
869
870
pci_print_aer(pdev, severity, &aer_regs);
871
872
if (severity == AER_CORRECTABLE)
873
cxl_handle_rdport_cor_ras(cxlds, dport);
874
else
875
cxl_handle_rdport_ras(cxlds, dport);
876
}
877
878
#else
879
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
880
#endif
881
882
void cxl_cor_error_detected(struct pci_dev *pdev)
883
{
884
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
885
struct device *dev = &cxlds->cxlmd->dev;
886
887
scoped_guard(device, dev) {
888
if (!dev->driver) {
889
dev_warn(&pdev->dev,
890
"%s: memdev disabled, abort error handling\n",
891
dev_name(dev));
892
return;
893
}
894
895
if (cxlds->rcd)
896
cxl_handle_rdport_errors(cxlds);
897
898
cxl_handle_endpoint_cor_ras(cxlds);
899
}
900
}
901
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
902
903
pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
904
pci_channel_state_t state)
905
{
906
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
907
struct cxl_memdev *cxlmd = cxlds->cxlmd;
908
struct device *dev = &cxlmd->dev;
909
bool ue;
910
911
scoped_guard(device, dev) {
912
if (!dev->driver) {
913
dev_warn(&pdev->dev,
914
"%s: memdev disabled, abort error handling\n",
915
dev_name(dev));
916
return PCI_ERS_RESULT_DISCONNECT;
917
}
918
919
if (cxlds->rcd)
920
cxl_handle_rdport_errors(cxlds);
921
/*
922
* A frozen channel indicates an impending reset which is fatal to
923
* CXL.mem operation, and will likely crash the system. On the off
924
* chance the situation is recoverable dump the status of the RAS
925
* capability registers and bounce the active state of the memdev.
926
*/
927
ue = cxl_handle_endpoint_ras(cxlds);
928
}
929
930
931
switch (state) {
932
case pci_channel_io_normal:
933
if (ue) {
934
device_release_driver(dev);
935
return PCI_ERS_RESULT_NEED_RESET;
936
}
937
return PCI_ERS_RESULT_CAN_RECOVER;
938
case pci_channel_io_frozen:
939
dev_warn(&pdev->dev,
940
"%s: frozen state error detected, disable CXL.mem\n",
941
dev_name(dev));
942
device_release_driver(dev);
943
return PCI_ERS_RESULT_NEED_RESET;
944
case pci_channel_io_perm_failure:
945
dev_warn(&pdev->dev,
946
"failure state error detected, request disconnect\n");
947
return PCI_ERS_RESULT_DISCONNECT;
948
}
949
return PCI_ERS_RESULT_NEED_RESET;
950
}
951
EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL");
952
953
static int cxl_flit_size(struct pci_dev *pdev)
954
{
955
if (cxl_pci_flit_256(pdev))
956
return 256;
957
958
return 68;
959
}
960
961
/**
962
* cxl_pci_get_latency - calculate the link latency for the PCIe link
963
* @pdev: PCI device
964
*
965
* return: calculated latency or 0 for no latency
966
*
967
* CXL Memory Device SW Guide v1.0 2.11.4 Link latency calculation
968
* Link latency = LinkPropagationLatency + FlitLatency + RetimerLatency
969
* LinkProgationLatency is negligible, so 0 will be used
970
* RetimerLatency is assumed to be negligible and 0 will be used
971
* FlitLatency = FlitSize / LinkBandwidth
972
* FlitSize is defined by spec. CXL rev3.0 4.2.1.
973
* 68B flit is used up to 32GT/s. >32GT/s, 256B flit size is used.
974
* The FlitLatency is converted to picoseconds.
975
*/
976
long cxl_pci_get_latency(struct pci_dev *pdev)
977
{
978
long bw;
979
980
bw = pcie_link_speed_mbps(pdev);
981
if (bw < 0)
982
return 0;
983
bw /= BITS_PER_BYTE;
984
985
return cxl_flit_size(pdev) * MEGA / bw;
986
}
987
988
static int __cxl_endpoint_decoder_reset_detected(struct device *dev, void *data)
989
{
990
struct cxl_port *port = data;
991
struct cxl_decoder *cxld;
992
struct cxl_hdm *cxlhdm;
993
void __iomem *hdm;
994
u32 ctrl;
995
996
if (!is_endpoint_decoder(dev))
997
return 0;
998
999
cxld = to_cxl_decoder(dev);
1000
if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
1001
return 0;
1002
1003
cxlhdm = dev_get_drvdata(&port->dev);
1004
hdm = cxlhdm->regs.hdm_decoder;
1005
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id));
1006
1007
return !FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl);
1008
}
1009
1010
bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
1011
{
1012
return device_for_each_child(&port->dev, port,
1013
__cxl_endpoint_decoder_reset_detected);
1014
}
1015
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL");
1016
1017
int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
1018
{
1019
int speed, bw;
1020
u16 lnksta;
1021
u32 width;
1022
1023
speed = pcie_link_speed_mbps(pdev);
1024
if (speed < 0)
1025
return speed;
1026
speed /= BITS_PER_BYTE;
1027
1028
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
1029
width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta);
1030
bw = speed * width;
1031
1032
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
1033
c[i].read_bandwidth = bw;
1034
c[i].write_bandwidth = bw;
1035
}
1036
1037
return 0;
1038
}
1039
1040
/*
1041
* Set max timeout such that platforms will optimize GPF flow to avoid
1042
* the implied worst-case scenario delays. On a sane platform, all
1043
* devices should always complete GPF within the energy budget of
1044
* the GPF flow. The kernel does not have enough information to pick
1045
* anything better than "maximize timeouts and hope it works".
1046
*
1047
* A misbehaving device could block forward progress of GPF for all
1048
* the other devices, exhausting the energy budget of the platform.
1049
* However, the spec seems to assume that moving on from slow to respond
1050
* devices is a virtue. It is not possible to know that, in actuality,
1051
* the slow to respond device is *the* most critical device in the
1052
* system to wait.
1053
*/
1054
#define GPF_TIMEOUT_BASE_MAX 2
1055
#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */
1056
1057
u16 cxl_gpf_get_dvsec(struct device *dev)
1058
{
1059
struct pci_dev *pdev;
1060
bool is_port = true;
1061
u16 dvsec;
1062
1063
if (!dev_is_pci(dev))
1064
return 0;
1065
1066
pdev = to_pci_dev(dev);
1067
if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT)
1068
is_port = false;
1069
1070
dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
1071
is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF);
1072
if (!dvsec)
1073
dev_warn(dev, "%s GPF DVSEC not present\n",
1074
is_port ? "Port" : "Device");
1075
return dvsec;
1076
}
1077
EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL");
1078
1079
static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase)
1080
{
1081
u64 base, scale;
1082
int rc, offset;
1083
u16 ctrl;
1084
1085
switch (phase) {
1086
case 1:
1087
offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET;
1088
base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK;
1089
scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK;
1090
break;
1091
case 2:
1092
offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET;
1093
base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK;
1094
scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK;
1095
break;
1096
default:
1097
return -EINVAL;
1098
}
1099
1100
rc = pci_read_config_word(pdev, dvsec + offset, &ctrl);
1101
if (rc)
1102
return rc;
1103
1104
if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX &&
1105
FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX)
1106
return 0;
1107
1108
ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX);
1109
ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX);
1110
1111
rc = pci_write_config_word(pdev, dvsec + offset, ctrl);
1112
if (!rc)
1113
pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n",
1114
phase, GPF_TIMEOUT_BASE_MAX);
1115
1116
return rc;
1117
}
1118
1119
int cxl_gpf_port_setup(struct cxl_dport *dport)
1120
{
1121
if (!dport)
1122
return -EINVAL;
1123
1124
if (!dport->gpf_dvsec) {
1125
struct pci_dev *pdev;
1126
int dvsec;
1127
1128
dvsec = cxl_gpf_get_dvsec(dport->dport_dev);
1129
if (!dvsec)
1130
return -EINVAL;
1131
1132
dport->gpf_dvsec = dvsec;
1133
pdev = to_pci_dev(dport->dport_dev);
1134
update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1);
1135
update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2);
1136
}
1137
1138
return 0;
1139
}
1140
1141
struct cxl_walk_context {
1142
struct pci_bus *bus;
1143
struct cxl_port *port;
1144
int type;
1145
int error;
1146
int count;
1147
};
1148
1149
static int count_dports(struct pci_dev *pdev, void *data)
1150
{
1151
struct cxl_walk_context *ctx = data;
1152
int type = pci_pcie_type(pdev);
1153
1154
if (pdev->bus != ctx->bus)
1155
return 0;
1156
if (!pci_is_pcie(pdev))
1157
return 0;
1158
if (type != ctx->type)
1159
return 0;
1160
1161
ctx->count++;
1162
return 0;
1163
}
1164
1165
int cxl_port_get_possible_dports(struct cxl_port *port)
1166
{
1167
struct pci_bus *bus = cxl_port_to_pci_bus(port);
1168
struct cxl_walk_context ctx;
1169
int type;
1170
1171
if (!bus) {
1172
dev_err(&port->dev, "No PCI bus found for port %s\n",
1173
dev_name(&port->dev));
1174
return -ENXIO;
1175
}
1176
1177
if (pci_is_root_bus(bus))
1178
type = PCI_EXP_TYPE_ROOT_PORT;
1179
else
1180
type = PCI_EXP_TYPE_DOWNSTREAM;
1181
1182
ctx = (struct cxl_walk_context) {
1183
.bus = bus,
1184
.type = type,
1185
};
1186
pci_walk_bus(bus, count_dports, &ctx);
1187
1188
return ctx.count;
1189
}
1190
1191