Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/platforms/powernv/ocxl.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0+
2
// Copyright 2017 IBM Corp.
3
#include <asm/pnv-ocxl.h>
4
#include <asm/opal.h>
5
#include <misc/ocxl-config.h>
6
#include "pci.h"
7
8
#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full
9
#define PNV_OCXL_ACTAG_MAX 64
10
/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
11
#define PNV_OCXL_PASID_BITS 15
12
#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1)
13
14
#define AFU_PRESENT (1 << 31)
15
#define AFU_INDEX_MASK 0x3F000000
16
#define AFU_INDEX_SHIFT 24
17
#define ACTAG_MASK 0xFFF
18
19
20
struct actag_range {
21
u16 start;
22
u16 count;
23
};
24
25
struct npu_link {
26
struct list_head list;
27
int domain;
28
int bus;
29
int dev;
30
u16 fn_desired_actags[8];
31
struct actag_range fn_actags[8];
32
bool assignment_done;
33
};
34
static struct list_head links_list = LIST_HEAD_INIT(links_list);
35
static DEFINE_MUTEX(links_list_lock);
36
37
38
/*
39
* opencapi actags handling:
40
*
41
* When sending commands, the opencapi device references the memory
42
* context it's targeting with an 'actag', which is really an alias
43
* for a (BDF, pasid) combination. When it receives a command, the NPU
44
* must do a lookup of the actag to identify the memory context. The
45
* hardware supports a finite number of actags per link (64 for
46
* POWER9).
47
*
48
* The device can carry multiple functions, and each function can have
49
* multiple AFUs. Each AFU advertises in its config space the number
50
* of desired actags. The host must configure in the config space of
51
* the AFU how many actags the AFU is really allowed to use (which can
52
* be less than what the AFU desires).
53
*
54
* When a PCI function is probed by the driver, it has no visibility
55
* about the other PCI functions and how many actags they'd like,
56
* which makes it impossible to distribute actags fairly among AFUs.
57
*
58
* Unfortunately, the only way to know how many actags a function
59
* desires is by looking at the data for each AFU in the config space
60
* and add them up. Similarly, the only way to know how many actags
61
* all the functions of the physical device desire is by adding the
62
* previously computed function counts. Then we can match that against
63
* what the hardware supports.
64
*
65
* To get a comprehensive view, we use a 'pci fixup': at the end of
66
* PCI enumeration, each function counts how many actags its AFUs
67
* desire and we save it in a 'npu_link' structure, shared between all
68
* the PCI functions of a same device. Therefore, when the first
69
* function is probed by the driver, we can get an idea of the total
70
* count of desired actags for the device, and assign the actags to
71
* the AFUs, by pro-rating if needed.
72
*/
73
74
static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
75
{
76
int vsec = pos;
77
u16 vendor, id;
78
79
while ((vsec = pci_find_next_ext_capability(dev, vsec,
80
OCXL_EXT_CAP_ID_DVSEC))) {
81
pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
82
&vendor);
83
pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
84
if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
85
return vsec;
86
}
87
return 0;
88
}
89
90
static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
91
{
92
int vsec = 0;
93
u8 idx;
94
95
while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
96
vsec))) {
97
pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
98
&idx);
99
if (idx == afu_idx)
100
return vsec;
101
}
102
return 0;
103
}
104
105
static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
106
{
107
int pos;
108
u32 val;
109
110
pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM,
111
OCXL_DVSEC_FUNC_ID);
112
if (!pos)
113
return -ESRCH;
114
115
pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
116
if (val & AFU_PRESENT)
117
*afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
118
else
119
*afu_idx = -1;
120
return 0;
121
}
122
123
static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
124
{
125
int pos;
126
u16 actag_sup;
127
128
pos = find_dvsec_afu_ctrl(dev, afu_idx);
129
if (!pos)
130
return -ESRCH;
131
132
pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
133
&actag_sup);
134
*actag = actag_sup & ACTAG_MASK;
135
return 0;
136
}
137
138
static struct npu_link *find_link(struct pci_dev *dev)
139
{
140
struct npu_link *link;
141
142
list_for_each_entry(link, &links_list, list) {
143
/* The functions of a device all share the same link */
144
if (link->domain == pci_domain_nr(dev->bus) &&
145
link->bus == dev->bus->number &&
146
link->dev == PCI_SLOT(dev->devfn)) {
147
return link;
148
}
149
}
150
151
/* link doesn't exist yet. Allocate one */
152
link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
153
if (!link)
154
return NULL;
155
link->domain = pci_domain_nr(dev->bus);
156
link->bus = dev->bus->number;
157
link->dev = PCI_SLOT(dev->devfn);
158
list_add(&link->list, &links_list);
159
return link;
160
}
161
162
static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
163
{
164
struct pci_controller *hose = pci_bus_to_host(dev->bus);
165
struct pnv_phb *phb = hose->private_data;
166
struct npu_link *link;
167
int rc, afu_idx = -1, i, actag;
168
169
if (!machine_is(powernv))
170
return;
171
172
if (phb->type != PNV_PHB_NPU_OCAPI)
173
return;
174
175
guard(mutex)(&links_list_lock);
176
177
link = find_link(dev);
178
if (!link) {
179
dev_warn(&dev->dev, "couldn't update actag information\n");
180
return;
181
}
182
183
/*
184
* Check how many actags are desired for the AFUs under that
185
* function and add it to the count for the link
186
*/
187
rc = get_max_afu_index(dev, &afu_idx);
188
if (rc) {
189
/* Most likely an invalid config space */
190
dev_dbg(&dev->dev, "couldn't find AFU information\n");
191
afu_idx = -1;
192
}
193
194
link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
195
for (i = 0; i <= afu_idx; i++) {
196
/*
197
* AFU index 'holes' are allowed. So don't fail if we
198
* can't read the actag info for an index
199
*/
200
rc = get_actag_count(dev, i, &actag);
201
if (rc)
202
continue;
203
link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
204
}
205
dev_dbg(&dev->dev, "total actags for function: %d\n",
206
link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
207
208
}
209
DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
210
211
static u16 assign_fn_actags(u16 desired, u16 total)
212
{
213
u16 count;
214
215
if (total <= PNV_OCXL_ACTAG_MAX)
216
count = desired;
217
else
218
count = PNV_OCXL_ACTAG_MAX * desired / total;
219
220
return count;
221
}
222
223
static void assign_actags(struct npu_link *link)
224
{
225
u16 actag_count, range_start = 0, total_desired = 0;
226
int i;
227
228
for (i = 0; i < 8; i++)
229
total_desired += link->fn_desired_actags[i];
230
231
for (i = 0; i < 8; i++) {
232
if (link->fn_desired_actags[i]) {
233
actag_count = assign_fn_actags(
234
link->fn_desired_actags[i],
235
total_desired);
236
link->fn_actags[i].start = range_start;
237
link->fn_actags[i].count = actag_count;
238
range_start += actag_count;
239
WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
240
}
241
pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
242
link->domain, link->bus, link->dev, i,
243
link->fn_actags[i].start, link->fn_actags[i].count,
244
link->fn_desired_actags[i]);
245
}
246
link->assignment_done = true;
247
}
248
249
int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
250
u16 *supported)
251
{
252
struct npu_link *link;
253
254
guard(mutex)(&links_list_lock);
255
256
link = find_link(dev);
257
if (!link) {
258
dev_err(&dev->dev, "actag information not found\n");
259
return -ENODEV;
260
}
261
/*
262
* On p9, we only have 64 actags per link, so they must be
263
* shared by all the functions of the same adapter. We counted
264
* the desired actag counts during PCI enumeration, so that we
265
* can allocate a pro-rated number of actags to each function.
266
*/
267
if (!link->assignment_done)
268
assign_actags(link);
269
270
*base = link->fn_actags[PCI_FUNC(dev->devfn)].start;
271
*enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count;
272
*supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
273
274
return 0;
275
}
276
EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
277
278
int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
279
{
280
struct npu_link *link;
281
int i, rc = -EINVAL;
282
283
/*
284
* The number of PASIDs (process address space ID) which can
285
* be used by a function depends on how many functions exist
286
* on the device. The NPU needs to be configured to know how
287
* many bits are available to PASIDs and how many are to be
288
* used by the function BDF identifier.
289
*
290
* We only support one AFU-carrying function for now.
291
*/
292
guard(mutex)(&links_list_lock);
293
294
link = find_link(dev);
295
if (!link) {
296
dev_err(&dev->dev, "actag information not found\n");
297
return -ENODEV;
298
}
299
300
for (i = 0; i < 8; i++)
301
if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
302
*count = PNV_OCXL_PASID_MAX;
303
rc = 0;
304
break;
305
}
306
307
dev_dbg(&dev->dev, "%d PASIDs available for function\n",
308
rc ? 0 : *count);
309
return rc;
310
}
311
EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
312
313
static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
314
{
315
int shift, idx;
316
317
WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
318
idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
319
shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
320
buf[idx] |= rate << shift;
321
}
322
323
int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
324
char *rate_buf, int rate_buf_size)
325
{
326
if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
327
return -EINVAL;
328
/*
329
* The TL capabilities are a characteristic of the NPU, so
330
* we go with hard-coded values.
331
*
332
* The receiving rate of each template is encoded on 4 bits.
333
*
334
* On P9:
335
* - templates 0 -> 3 are supported
336
* - templates 0, 1 and 3 have a 0 receiving rate
337
* - template 2 has receiving rate of 1 (extra cycle)
338
*/
339
memset(rate_buf, 0, rate_buf_size);
340
set_templ_rate(2, 1, rate_buf);
341
*cap = PNV_OCXL_TL_P9_RECV_CAP;
342
return 0;
343
}
344
EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
345
346
int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
347
uint64_t rate_buf_phys, int rate_buf_size)
348
{
349
struct pci_controller *hose = pci_bus_to_host(dev->bus);
350
struct pnv_phb *phb = hose->private_data;
351
int rc;
352
353
if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
354
return -EINVAL;
355
356
rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
357
rate_buf_phys, rate_buf_size);
358
if (rc) {
359
dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
360
return -EINVAL;
361
}
362
return 0;
363
}
364
EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
365
366
int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
367
{
368
int rc;
369
370
rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
371
if (rc) {
372
dev_err(&dev->dev,
373
"Can't get translation interrupt for device\n");
374
return rc;
375
}
376
return 0;
377
}
378
EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
379
380
void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
381
void __iomem *tfc, void __iomem *pe_handle)
382
{
383
iounmap(dsisr);
384
iounmap(dar);
385
iounmap(tfc);
386
iounmap(pe_handle);
387
}
388
EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
389
390
int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
391
void __iomem **dar, void __iomem **tfc,
392
void __iomem **pe_handle)
393
{
394
u64 reg;
395
int i, j, rc = 0;
396
void __iomem *regs[4];
397
398
/*
399
* opal stores the mmio addresses of the DSISR, DAR, TFC and
400
* PE_HANDLE registers in a device tree property, in that
401
* order
402
*/
403
for (i = 0; i < 4; i++) {
404
rc = of_property_read_u64_index(dev->dev.of_node,
405
"ibm,opal-xsl-mmio", i, &reg);
406
if (rc)
407
break;
408
regs[i] = ioremap(reg, 8);
409
if (!regs[i]) {
410
rc = -EINVAL;
411
break;
412
}
413
}
414
if (rc) {
415
dev_err(&dev->dev, "Can't map translation mmio registers\n");
416
for (j = i - 1; j >= 0; j--)
417
iounmap(regs[j]);
418
} else {
419
*dsisr = regs[0];
420
*dar = regs[1];
421
*tfc = regs[2];
422
*pe_handle = regs[3];
423
}
424
return rc;
425
}
426
EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
427
428
struct spa_data {
429
u64 phb_opal_id;
430
u32 bdfn;
431
};
432
433
int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
434
void **platform_data)
435
{
436
struct pci_controller *hose = pci_bus_to_host(dev->bus);
437
struct pnv_phb *phb = hose->private_data;
438
struct spa_data *data;
439
u32 bdfn;
440
int rc;
441
442
data = kzalloc(sizeof(*data), GFP_KERNEL);
443
if (!data)
444
return -ENOMEM;
445
446
bdfn = pci_dev_id(dev);
447
rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
448
PE_mask);
449
if (rc) {
450
dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
451
kfree(data);
452
return rc;
453
}
454
data->phb_opal_id = phb->opal_id;
455
data->bdfn = bdfn;
456
*platform_data = (void *) data;
457
return 0;
458
}
459
EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
460
461
void pnv_ocxl_spa_release(void *platform_data)
462
{
463
struct spa_data *data = (struct spa_data *) platform_data;
464
int rc;
465
466
rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
467
WARN_ON(rc);
468
kfree(data);
469
}
470
EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
471
472
int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
473
{
474
struct spa_data *data = (struct spa_data *) platform_data;
475
476
return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
477
}
478
EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
479
480
int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
481
uint64_t lpcr, void __iomem **arva)
482
{
483
struct pci_controller *hose = pci_bus_to_host(dev->bus);
484
struct pnv_phb *phb = hose->private_data;
485
u64 mmio_atsd;
486
int rc;
487
488
/* ATSD physical address.
489
* ATSD LAUNCH register: write access initiates a shoot down to
490
* initiate the TLB Invalidate command.
491
*/
492
rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
493
0, &mmio_atsd);
494
if (rc) {
495
dev_info(&dev->dev, "No available ATSD found\n");
496
return rc;
497
}
498
499
/* Assign a register set to a Logical Partition and MMIO ATSD
500
* LPARID register to the required value.
501
*/
502
rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
503
lparid, lpcr);
504
if (rc) {
505
dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
506
return rc;
507
}
508
509
*arva = ioremap(mmio_atsd, 24);
510
if (!(*arva)) {
511
dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
512
rc = -ENOMEM;
513
}
514
515
return rc;
516
}
517
EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
518
519
void pnv_ocxl_unmap_lpar(void __iomem *arva)
520
{
521
iounmap(arva);
522
}
523
EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
524
525
void pnv_ocxl_tlb_invalidate(void __iomem *arva,
526
unsigned long pid,
527
unsigned long addr,
528
unsigned long page_size)
529
{
530
unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
531
u64 val = 0ull;
532
int pend;
533
u8 size;
534
535
if (!(arva))
536
return;
537
538
if (addr) {
539
/* load Abbreviated Virtual Address register with
540
* the necessary value
541
*/
542
val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
543
out_be64(arva + PNV_OCXL_ATSD_AVA, val);
544
}
545
546
/* Write access initiates a shoot down to initiate the
547
* TLB Invalidate command
548
*/
549
val = PNV_OCXL_ATSD_LNCH_R;
550
val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
551
if (addr)
552
val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
553
else {
554
val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
555
val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
556
}
557
val |= PNV_OCXL_ATSD_LNCH_PRS;
558
/* Actual Page Size to be invalidated
559
* 000 4KB
560
* 101 64KB
561
* 001 2MB
562
* 010 1GB
563
*/
564
size = 0b101;
565
if (page_size == 0x1000)
566
size = 0b000;
567
if (page_size == 0x200000)
568
size = 0b001;
569
if (page_size == 0x40000000)
570
size = 0b010;
571
val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
572
val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
573
out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
574
575
/* Poll the ATSD status register to determine when the
576
* TLB Invalidate has been completed.
577
*/
578
val = in_be64(arva + PNV_OCXL_ATSD_STAT);
579
pend = val >> 63;
580
581
while (pend) {
582
if (time_after_eq(jiffies, timeout)) {
583
pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
584
__func__, val, pid);
585
return;
586
}
587
cpu_relax();
588
val = in_be64(arva + PNV_OCXL_ATSD_STAT);
589
pend = val >> 63;
590
}
591
}
592
EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);
593
594