Path: blob/master/arch/powerpc/platforms/powernv/ocxl.c
26481 views
// SPDX-License-Identifier: GPL-2.0+1// Copyright 2017 IBM Corp.2#include <asm/pnv-ocxl.h>3#include <asm/opal.h>4#include <misc/ocxl-config.h>5#include "pci.h"67#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full8#define PNV_OCXL_ACTAG_MAX 649/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */10#define PNV_OCXL_PASID_BITS 1511#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1)1213#define AFU_PRESENT (1 << 31)14#define AFU_INDEX_MASK 0x3F00000015#define AFU_INDEX_SHIFT 2416#define ACTAG_MASK 0xFFF171819struct actag_range {20u16 start;21u16 count;22};2324struct npu_link {25struct list_head list;26int domain;27int bus;28int dev;29u16 fn_desired_actags[8];30struct actag_range fn_actags[8];31bool assignment_done;32};33static struct list_head links_list = LIST_HEAD_INIT(links_list);34static DEFINE_MUTEX(links_list_lock);353637/*38* opencapi actags handling:39*40* When sending commands, the opencapi device references the memory41* context it's targeting with an 'actag', which is really an alias42* for a (BDF, pasid) combination. When it receives a command, the NPU43* must do a lookup of the actag to identify the memory context. The44* hardware supports a finite number of actags per link (64 for45* POWER9).46*47* The device can carry multiple functions, and each function can have48* multiple AFUs. Each AFU advertises in its config space the number49* of desired actags. The host must configure in the config space of50* the AFU how many actags the AFU is really allowed to use (which can51* be less than what the AFU desires).52*53* When a PCI function is probed by the driver, it has no visibility54* about the other PCI functions and how many actags they'd like,55* which makes it impossible to distribute actags fairly among AFUs.56*57* Unfortunately, the only way to know how many actags a function58* desires is by looking at the data for each AFU in the config space59* and add them up. Similarly, the only way to know how many actags60* all the functions of the physical device desire is by adding the61* previously computed function counts. Then we can match that against62* what the hardware supports.63*64* To get a comprehensive view, we use a 'pci fixup': at the end of65* PCI enumeration, each function counts how many actags its AFUs66* desire and we save it in a 'npu_link' structure, shared between all67* the PCI functions of a same device. Therefore, when the first68* function is probed by the driver, we can get an idea of the total69* count of desired actags for the device, and assign the actags to70* the AFUs, by pro-rating if needed.71*/7273static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)74{75int vsec = pos;76u16 vendor, id;7778while ((vsec = pci_find_next_ext_capability(dev, vsec,79OCXL_EXT_CAP_ID_DVSEC))) {80pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,81&vendor);82pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);83if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)84return vsec;85}86return 0;87}8889static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)90{91int vsec = 0;92u8 idx;9394while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,95vsec))) {96pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,97&idx);98if (idx == afu_idx)99return vsec;100}101return 0;102}103104static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)105{106int pos;107u32 val;108109pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM,110OCXL_DVSEC_FUNC_ID);111if (!pos)112return -ESRCH;113114pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);115if (val & AFU_PRESENT)116*afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;117else118*afu_idx = -1;119return 0;120}121122static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)123{124int pos;125u16 actag_sup;126127pos = find_dvsec_afu_ctrl(dev, afu_idx);128if (!pos)129return -ESRCH;130131pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,132&actag_sup);133*actag = actag_sup & ACTAG_MASK;134return 0;135}136137static struct npu_link *find_link(struct pci_dev *dev)138{139struct npu_link *link;140141list_for_each_entry(link, &links_list, list) {142/* The functions of a device all share the same link */143if (link->domain == pci_domain_nr(dev->bus) &&144link->bus == dev->bus->number &&145link->dev == PCI_SLOT(dev->devfn)) {146return link;147}148}149150/* link doesn't exist yet. Allocate one */151link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);152if (!link)153return NULL;154link->domain = pci_domain_nr(dev->bus);155link->bus = dev->bus->number;156link->dev = PCI_SLOT(dev->devfn);157list_add(&link->list, &links_list);158return link;159}160161static void pnv_ocxl_fixup_actag(struct pci_dev *dev)162{163struct pci_controller *hose = pci_bus_to_host(dev->bus);164struct pnv_phb *phb = hose->private_data;165struct npu_link *link;166int rc, afu_idx = -1, i, actag;167168if (!machine_is(powernv))169return;170171if (phb->type != PNV_PHB_NPU_OCAPI)172return;173174guard(mutex)(&links_list_lock);175176link = find_link(dev);177if (!link) {178dev_warn(&dev->dev, "couldn't update actag information\n");179return;180}181182/*183* Check how many actags are desired for the AFUs under that184* function and add it to the count for the link185*/186rc = get_max_afu_index(dev, &afu_idx);187if (rc) {188/* Most likely an invalid config space */189dev_dbg(&dev->dev, "couldn't find AFU information\n");190afu_idx = -1;191}192193link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;194for (i = 0; i <= afu_idx; i++) {195/*196* AFU index 'holes' are allowed. So don't fail if we197* can't read the actag info for an index198*/199rc = get_actag_count(dev, i, &actag);200if (rc)201continue;202link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;203}204dev_dbg(&dev->dev, "total actags for function: %d\n",205link->fn_desired_actags[PCI_FUNC(dev->devfn)]);206207}208DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);209210static u16 assign_fn_actags(u16 desired, u16 total)211{212u16 count;213214if (total <= PNV_OCXL_ACTAG_MAX)215count = desired;216else217count = PNV_OCXL_ACTAG_MAX * desired / total;218219return count;220}221222static void assign_actags(struct npu_link *link)223{224u16 actag_count, range_start = 0, total_desired = 0;225int i;226227for (i = 0; i < 8; i++)228total_desired += link->fn_desired_actags[i];229230for (i = 0; i < 8; i++) {231if (link->fn_desired_actags[i]) {232actag_count = assign_fn_actags(233link->fn_desired_actags[i],234total_desired);235link->fn_actags[i].start = range_start;236link->fn_actags[i].count = actag_count;237range_start += actag_count;238WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);239}240pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",241link->domain, link->bus, link->dev, i,242link->fn_actags[i].start, link->fn_actags[i].count,243link->fn_desired_actags[i]);244}245link->assignment_done = true;246}247248int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,249u16 *supported)250{251struct npu_link *link;252253guard(mutex)(&links_list_lock);254255link = find_link(dev);256if (!link) {257dev_err(&dev->dev, "actag information not found\n");258return -ENODEV;259}260/*261* On p9, we only have 64 actags per link, so they must be262* shared by all the functions of the same adapter. We counted263* the desired actag counts during PCI enumeration, so that we264* can allocate a pro-rated number of actags to each function.265*/266if (!link->assignment_done)267assign_actags(link);268269*base = link->fn_actags[PCI_FUNC(dev->devfn)].start;270*enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count;271*supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];272273return 0;274}275EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);276277int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)278{279struct npu_link *link;280int i, rc = -EINVAL;281282/*283* The number of PASIDs (process address space ID) which can284* be used by a function depends on how many functions exist285* on the device. The NPU needs to be configured to know how286* many bits are available to PASIDs and how many are to be287* used by the function BDF identifier.288*289* We only support one AFU-carrying function for now.290*/291guard(mutex)(&links_list_lock);292293link = find_link(dev);294if (!link) {295dev_err(&dev->dev, "actag information not found\n");296return -ENODEV;297}298299for (i = 0; i < 8; i++)300if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {301*count = PNV_OCXL_PASID_MAX;302rc = 0;303break;304}305306dev_dbg(&dev->dev, "%d PASIDs available for function\n",307rc ? 0 : *count);308return rc;309}310EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);311312static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)313{314int shift, idx;315316WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);317idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;318shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));319buf[idx] |= rate << shift;320}321322int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,323char *rate_buf, int rate_buf_size)324{325if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)326return -EINVAL;327/*328* The TL capabilities are a characteristic of the NPU, so329* we go with hard-coded values.330*331* The receiving rate of each template is encoded on 4 bits.332*333* On P9:334* - templates 0 -> 3 are supported335* - templates 0, 1 and 3 have a 0 receiving rate336* - template 2 has receiving rate of 1 (extra cycle)337*/338memset(rate_buf, 0, rate_buf_size);339set_templ_rate(2, 1, rate_buf);340*cap = PNV_OCXL_TL_P9_RECV_CAP;341return 0;342}343EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);344345int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,346uint64_t rate_buf_phys, int rate_buf_size)347{348struct pci_controller *hose = pci_bus_to_host(dev->bus);349struct pnv_phb *phb = hose->private_data;350int rc;351352if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)353return -EINVAL;354355rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,356rate_buf_phys, rate_buf_size);357if (rc) {358dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);359return -EINVAL;360}361return 0;362}363EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);364365int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)366{367int rc;368369rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);370if (rc) {371dev_err(&dev->dev,372"Can't get translation interrupt for device\n");373return rc;374}375return 0;376}377EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);378379void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,380void __iomem *tfc, void __iomem *pe_handle)381{382iounmap(dsisr);383iounmap(dar);384iounmap(tfc);385iounmap(pe_handle);386}387EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);388389int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,390void __iomem **dar, void __iomem **tfc,391void __iomem **pe_handle)392{393u64 reg;394int i, j, rc = 0;395void __iomem *regs[4];396397/*398* opal stores the mmio addresses of the DSISR, DAR, TFC and399* PE_HANDLE registers in a device tree property, in that400* order401*/402for (i = 0; i < 4; i++) {403rc = of_property_read_u64_index(dev->dev.of_node,404"ibm,opal-xsl-mmio", i, ®);405if (rc)406break;407regs[i] = ioremap(reg, 8);408if (!regs[i]) {409rc = -EINVAL;410break;411}412}413if (rc) {414dev_err(&dev->dev, "Can't map translation mmio registers\n");415for (j = i - 1; j >= 0; j--)416iounmap(regs[j]);417} else {418*dsisr = regs[0];419*dar = regs[1];420*tfc = regs[2];421*pe_handle = regs[3];422}423return rc;424}425EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);426427struct spa_data {428u64 phb_opal_id;429u32 bdfn;430};431432int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,433void **platform_data)434{435struct pci_controller *hose = pci_bus_to_host(dev->bus);436struct pnv_phb *phb = hose->private_data;437struct spa_data *data;438u32 bdfn;439int rc;440441data = kzalloc(sizeof(*data), GFP_KERNEL);442if (!data)443return -ENOMEM;444445bdfn = pci_dev_id(dev);446rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),447PE_mask);448if (rc) {449dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);450kfree(data);451return rc;452}453data->phb_opal_id = phb->opal_id;454data->bdfn = bdfn;455*platform_data = (void *) data;456return 0;457}458EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);459460void pnv_ocxl_spa_release(void *platform_data)461{462struct spa_data *data = (struct spa_data *) platform_data;463int rc;464465rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);466WARN_ON(rc);467kfree(data);468}469EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);470471int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)472{473struct spa_data *data = (struct spa_data *) platform_data;474475return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);476}477EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);478479int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,480uint64_t lpcr, void __iomem **arva)481{482struct pci_controller *hose = pci_bus_to_host(dev->bus);483struct pnv_phb *phb = hose->private_data;484u64 mmio_atsd;485int rc;486487/* ATSD physical address.488* ATSD LAUNCH register: write access initiates a shoot down to489* initiate the TLB Invalidate command.490*/491rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",4920, &mmio_atsd);493if (rc) {494dev_info(&dev->dev, "No available ATSD found\n");495return rc;496}497498/* Assign a register set to a Logical Partition and MMIO ATSD499* LPARID register to the required value.500*/501rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),502lparid, lpcr);503if (rc) {504dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);505return rc;506}507508*arva = ioremap(mmio_atsd, 24);509if (!(*arva)) {510dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);511rc = -ENOMEM;512}513514return rc;515}516EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);517518void pnv_ocxl_unmap_lpar(void __iomem *arva)519{520iounmap(arva);521}522EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);523524void pnv_ocxl_tlb_invalidate(void __iomem *arva,525unsigned long pid,526unsigned long addr,527unsigned long page_size)528{529unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);530u64 val = 0ull;531int pend;532u8 size;533534if (!(arva))535return;536537if (addr) {538/* load Abbreviated Virtual Address register with539* the necessary value540*/541val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));542out_be64(arva + PNV_OCXL_ATSD_AVA, val);543}544545/* Write access initiates a shoot down to initiate the546* TLB Invalidate command547*/548val = PNV_OCXL_ATSD_LNCH_R;549val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);550if (addr)551val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);552else {553val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);554val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;555}556val |= PNV_OCXL_ATSD_LNCH_PRS;557/* Actual Page Size to be invalidated558* 000 4KB559* 101 64KB560* 001 2MB561* 010 1GB562*/563size = 0b101;564if (page_size == 0x1000)565size = 0b000;566if (page_size == 0x200000)567size = 0b001;568if (page_size == 0x40000000)569size = 0b010;570val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);571val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);572out_be64(arva + PNV_OCXL_ATSD_LNCH, val);573574/* Poll the ATSD status register to determine when the575* TLB Invalidate has been completed.576*/577val = in_be64(arva + PNV_OCXL_ATSD_STAT);578pend = val >> 63;579580while (pend) {581if (time_after_eq(jiffies, timeout)) {582pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",583__func__, val, pid);584return;585}586cpu_relax();587val = in_be64(arva + PNV_OCXL_ATSD_STAT);588pend = val >> 63;589}590}591EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);592593594