Path: blob/master/arch/powerpc/platforms/powernv/eeh-powernv.c
26481 views
// SPDX-License-Identifier: GPL-2.0-or-later1/*2* PowerNV Platform dependent EEH operations3*4* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.5*/67#include <linux/atomic.h>8#include <linux/debugfs.h>9#include <linux/delay.h>10#include <linux/export.h>11#include <linux/init.h>12#include <linux/interrupt.h>13#include <linux/irqdomain.h>14#include <linux/list.h>15#include <linux/msi.h>16#include <linux/of.h>17#include <linux/pci.h>18#include <linux/proc_fs.h>19#include <linux/rbtree.h>20#include <linux/sched.h>21#include <linux/seq_file.h>22#include <linux/spinlock.h>2324#include <asm/eeh.h>25#include <asm/eeh_event.h>26#include <asm/firmware.h>27#include <asm/io.h>28#include <asm/iommu.h>29#include <asm/machdep.h>30#include <asm/msi_bitmap.h>31#include <asm/opal.h>32#include <asm/ppc-pci.h>33#include <asm/pnv-pci.h>3435#include "powernv.h"36#include "pci.h"37#include "../../../../drivers/pci/pci.h"3839static int eeh_event_irq = -EINVAL;4041static void pnv_pcibios_bus_add_device(struct pci_dev *pdev)42{43dev_dbg(&pdev->dev, "EEH: Setting up device\n");44eeh_probe_device(pdev);45}4647static irqreturn_t pnv_eeh_event(int irq, void *data)48{49/*50* We simply send a special EEH event if EEH has been51* enabled. We don't care about EEH events until we've52* finished processing the outstanding ones. Event processing53* gets unmasked in next_error() if EEH is enabled.54*/55disable_irq_nosync(irq);5657if (eeh_enabled())58eeh_send_failure_event(NULL);5960return IRQ_HANDLED;61}6263#ifdef CONFIG_DEBUG_FS64static ssize_t pnv_eeh_ei_write(struct file *filp,65const char __user *user_buf,66size_t count, loff_t *ppos)67{68struct pci_controller *hose = filp->private_data;69struct eeh_pe *pe;70int pe_no, type, func;71unsigned long addr, mask;72char buf[50];73int ret;7475if (!eeh_ops || !eeh_ops->err_inject)76return -ENXIO;7778/* Copy over argument buffer */79ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);80if (!ret)81return -EFAULT;8283/* Retrieve parameters */84ret = sscanf(buf, "%x:%x:%x:%lx:%lx",85&pe_no, &type, &func, &addr, &mask);86if (ret != 5)87return -EINVAL;8889/* Retrieve PE */90pe = eeh_pe_get(hose, pe_no);91if (!pe)92return -ENODEV;9394/* Do error injection */95ret = eeh_ops->err_inject(pe, type, func, addr, mask);96return ret < 0 ? ret : count;97}9899static const struct file_operations pnv_eeh_ei_fops = {100.open = simple_open,101.write = pnv_eeh_ei_write,102};103104static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)105{106struct pci_controller *hose = data;107struct pnv_phb *phb = hose->private_data;108109out_be64(phb->regs + offset, val);110return 0;111}112113static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)114{115struct pci_controller *hose = data;116struct pnv_phb *phb = hose->private_data;117118*val = in_be64(phb->regs + offset);119return 0;120}121122#define PNV_EEH_DBGFS_ENTRY(name, reg) \123static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \124{ \125return pnv_eeh_dbgfs_set(data, reg, val); \126} \127\128static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \129{ \130return pnv_eeh_dbgfs_get(data, reg, val); \131} \132\133DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \134pnv_eeh_dbgfs_get_##name, \135pnv_eeh_dbgfs_set_##name, \136"0x%llx\n")137138PNV_EEH_DBGFS_ENTRY(outb, 0xD10);139PNV_EEH_DBGFS_ENTRY(inbA, 0xD90);140PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);141142#endif /* CONFIG_DEBUG_FS */143144static void pnv_eeh_enable_phbs(void)145{146struct pci_controller *hose;147struct pnv_phb *phb;148149list_for_each_entry(hose, &hose_list, list_node) {150phb = hose->private_data;151/*152* If EEH is enabled, we're going to rely on that.153* Otherwise, we restore to conventional mechanism154* to clear frozen PE during PCI config access.155*/156if (eeh_enabled())157phb->flags |= PNV_PHB_FLAG_EEH;158else159phb->flags &= ~PNV_PHB_FLAG_EEH;160}161}162163/**164* pnv_eeh_post_init - EEH platform dependent post initialization165*166* EEH platform dependent post initialization on powernv. When167* the function is called, the EEH PEs and devices should have168* been built. If the I/O cache staff has been built, EEH is169* ready to supply service.170*/171int pnv_eeh_post_init(void)172{173struct pci_controller *hose;174struct pnv_phb *phb;175int ret = 0;176177eeh_show_enabled();178179/* Register OPAL event notifier */180eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));181if (eeh_event_irq < 0) {182pr_err("%s: Can't register OPAL event interrupt (%d)\n",183__func__, eeh_event_irq);184return eeh_event_irq;185}186187ret = request_irq(eeh_event_irq, pnv_eeh_event,188IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);189if (ret < 0) {190irq_dispose_mapping(eeh_event_irq);191pr_err("%s: Can't request OPAL event interrupt (%d)\n",192__func__, eeh_event_irq);193return ret;194}195196if (!eeh_enabled())197disable_irq(eeh_event_irq);198199pnv_eeh_enable_phbs();200201list_for_each_entry(hose, &hose_list, list_node) {202phb = hose->private_data;203204/* Create debugfs entries */205#ifdef CONFIG_DEBUG_FS206if (phb->has_dbgfs || !phb->dbgfs)207continue;208209phb->has_dbgfs = 1;210debugfs_create_file("err_injct", 0200,211phb->dbgfs, hose,212&pnv_eeh_ei_fops);213214debugfs_create_file("err_injct_outbound", 0600,215phb->dbgfs, hose,216&pnv_eeh_dbgfs_ops_outb);217debugfs_create_file("err_injct_inboundA", 0600,218phb->dbgfs, hose,219&pnv_eeh_dbgfs_ops_inbA);220debugfs_create_file("err_injct_inboundB", 0600,221phb->dbgfs, hose,222&pnv_eeh_dbgfs_ops_inbB);223#endif /* CONFIG_DEBUG_FS */224}225226return ret;227}228229static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)230{231int pos = PCI_CAPABILITY_LIST;232int cnt = 48; /* Maximal number of capabilities */233u32 status, id;234235if (!pdn)236return 0;237238/* Check if the device supports capabilities */239pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);240if (!(status & PCI_STATUS_CAP_LIST))241return 0;242243while (cnt--) {244pnv_pci_cfg_read(pdn, pos, 1, &pos);245if (pos < 0x40)246break;247248pos &= ~3;249pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);250if (id == 0xff)251break;252253/* Found */254if (id == cap)255return pos;256257/* Next one */258pos += PCI_CAP_LIST_NEXT;259}260261return 0;262}263264static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)265{266struct eeh_dev *edev = pdn_to_eeh_dev(pdn);267u32 header;268int pos = 256, ttl = (4096 - 256) / 8;269270if (!edev || !edev->pcie_cap)271return 0;272if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)273return 0;274else if (!header)275return 0;276277while (ttl-- > 0) {278if (PCI_EXT_CAP_ID(header) == cap && pos)279return pos;280281pos = PCI_EXT_CAP_NEXT(header);282if (pos < 256)283break;284285if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)286break;287}288289return 0;290}291292static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)293{294struct pci_controller *hose = pdev->bus->sysdata;295struct pnv_phb *phb = hose->private_data;296struct pci_dev *parent = pdev->bus->self;297298#ifdef CONFIG_PCI_IOV299/* for VFs we use the PF's PE as the upstream PE */300if (pdev->is_virtfn)301parent = pdev->physfn;302#endif303304/* otherwise use the PE of our parent bridge */305if (parent) {306struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);307308return eeh_pe_get(phb->hose, ioda_pe->pe_number);309}310311return NULL;312}313314/**315* pnv_eeh_probe - Do probe on PCI device316* @pdev: pci_dev to probe317*318* Create, or find the existing, eeh_dev for this pci_dev.319*/320static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)321{322struct pci_dn *pdn = pci_get_pdn(pdev);323struct pci_controller *hose = pdn->phb;324struct pnv_phb *phb = hose->private_data;325struct eeh_dev *edev = pdn_to_eeh_dev(pdn);326struct eeh_pe *upstream_pe;327uint32_t pcie_flags;328int ret;329int config_addr = (pdn->busno << 8) | (pdn->devfn);330331/*332* When probing the root bridge, which doesn't have any333* subordinate PCI devices. We don't have OF node for334* the root bridge. So it's not reasonable to continue335* the probing.336*/337if (!edev || edev->pe)338return NULL;339340/* already configured? */341if (edev->pdev) {342pr_debug("%s: found existing edev for %04x:%02x:%02x.%01x\n",343__func__, hose->global_number, config_addr >> 8,344PCI_SLOT(config_addr), PCI_FUNC(config_addr));345return edev;346}347348/* Skip for PCI-ISA bridge */349if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)350return NULL;351352eeh_edev_dbg(edev, "Probing device\n");353354/* Initialize eeh device */355edev->mode &= 0xFFFFFF00;356edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);357edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);358edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);359edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);360if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {361edev->mode |= EEH_DEV_BRIDGE;362if (edev->pcie_cap) {363pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,3642, &pcie_flags);365pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;366if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)367edev->mode |= EEH_DEV_ROOT_PORT;368else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)369edev->mode |= EEH_DEV_DS_PORT;370}371}372373edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];374375upstream_pe = pnv_eeh_get_upstream_pe(pdev);376377/* Create PE */378ret = eeh_pe_tree_insert(edev, upstream_pe);379if (ret) {380eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);381return NULL;382}383384/*385* If the PE contains any one of following adapters, the386* PCI config space can't be accessed when dumping EEH log.387* Otherwise, we will run into fenced PHB caused by shortage388* of outbound credits in the adapter. The PCI config access389* should be blocked until PE reset. MMIO access is dropped390* by hardware certainly. In order to drop PCI config requests,391* one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which392* will be checked in the backend for PE state retrieval. If393* the PE becomes frozen for the first time and the flag has394* been set for the PE, we will set EEH_PE_CFG_BLOCKED for395* that PE to block its config space.396*397* Broadcom BCM5718 2-ports NICs (14e4:1656)398* Broadcom Austin 4-ports NICs (14e4:1657)399* Broadcom Shiner 4-ports 1G NICs (14e4:168a)400* Broadcom Shiner 2-ports 10G NICs (14e4:168e)401*/402if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&403pdn->device_id == 0x1656) ||404(pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&405pdn->device_id == 0x1657) ||406(pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&407pdn->device_id == 0x168a) ||408(pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&409pdn->device_id == 0x168e))410edev->pe->state |= EEH_PE_CFG_RESTRICTED;411412/*413* Cache the PE primary bus, which can't be fetched when414* full hotplug is in progress. In that case, all child415* PCI devices of the PE are expected to be removed prior416* to PE reset.417*/418if (!(edev->pe->state & EEH_PE_PRI_BUS)) {419edev->pe->bus = pci_find_bus(hose->global_number,420pdn->busno);421if (edev->pe->bus)422edev->pe->state |= EEH_PE_PRI_BUS;423}424425/*426* Enable EEH explicitly so that we will do EEH check427* while accessing I/O stuff428*/429if (!eeh_has_flag(EEH_ENABLED)) {430enable_irq(eeh_event_irq);431pnv_eeh_enable_phbs();432eeh_add_flag(EEH_ENABLED);433}434435/* Save memory bars */436eeh_save_bars(edev);437438eeh_edev_dbg(edev, "EEH enabled on device\n");439440return edev;441}442443/**444* pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable445* @pe: EEH PE446* @option: operation to be issued447*448* The function is used to control the EEH functionality globally.449* Currently, following options are support according to PAPR:450* Enable EEH, Disable EEH, Enable MMIO and Enable DMA451*/452static int pnv_eeh_set_option(struct eeh_pe *pe, int option)453{454struct pci_controller *hose = pe->phb;455struct pnv_phb *phb = hose->private_data;456bool freeze_pe = false;457int opt;458s64 rc;459460switch (option) {461case EEH_OPT_DISABLE:462return -EPERM;463case EEH_OPT_ENABLE:464return 0;465case EEH_OPT_THAW_MMIO:466opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;467break;468case EEH_OPT_THAW_DMA:469opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;470break;471case EEH_OPT_FREEZE_PE:472freeze_pe = true;473opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;474break;475default:476pr_warn("%s: Invalid option %d\n", __func__, option);477return -EINVAL;478}479480/* Freeze master and slave PEs if PHB supports compound PEs */481if (freeze_pe) {482if (phb->freeze_pe) {483phb->freeze_pe(phb, pe->addr);484return 0;485}486487rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt);488if (rc != OPAL_SUCCESS) {489pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",490__func__, rc, phb->hose->global_number,491pe->addr);492return -EIO;493}494495return 0;496}497498/* Unfreeze master and slave PEs if PHB supports */499if (phb->unfreeze_pe)500return phb->unfreeze_pe(phb, pe->addr, opt);501502rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt);503if (rc != OPAL_SUCCESS) {504pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",505__func__, rc, option, phb->hose->global_number,506pe->addr);507return -EIO;508}509510return 0;511}512513static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)514{515struct pnv_phb *phb = pe->phb->private_data;516s64 rc;517518rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,519phb->diag_data_size);520if (rc != OPAL_SUCCESS)521pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",522__func__, rc, pe->phb->global_number);523}524525static int pnv_eeh_get_phb_state(struct eeh_pe *pe)526{527struct pnv_phb *phb = pe->phb->private_data;528u8 fstate = 0;529__be16 pcierr = 0;530s64 rc;531int result = 0;532533rc = opal_pci_eeh_freeze_status(phb->opal_id,534pe->addr,535&fstate,536&pcierr,537NULL);538if (rc != OPAL_SUCCESS) {539pr_warn("%s: Failure %lld getting PHB#%x state\n",540__func__, rc, phb->hose->global_number);541return EEH_STATE_NOT_SUPPORT;542}543544/*545* Check PHB state. If the PHB is frozen for the546* first time, to dump the PHB diag-data.547*/548if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {549result = (EEH_STATE_MMIO_ACTIVE |550EEH_STATE_DMA_ACTIVE |551EEH_STATE_MMIO_ENABLED |552EEH_STATE_DMA_ENABLED);553} else if (!(pe->state & EEH_PE_ISOLATED)) {554eeh_pe_mark_isolated(pe);555pnv_eeh_get_phb_diag(pe);556557if (eeh_has_flag(EEH_EARLY_DUMP_LOG))558pnv_pci_dump_phb_diag_data(pe->phb, pe->data);559}560561return result;562}563564static int pnv_eeh_get_pe_state(struct eeh_pe *pe)565{566struct pnv_phb *phb = pe->phb->private_data;567u8 fstate = 0;568__be16 pcierr = 0;569s64 rc;570int result;571572/*573* We don't clobber hardware frozen state until PE574* reset is completed. In order to keep EEH core575* moving forward, we have to return operational576* state during PE reset.577*/578if (pe->state & EEH_PE_RESET) {579result = (EEH_STATE_MMIO_ACTIVE |580EEH_STATE_DMA_ACTIVE |581EEH_STATE_MMIO_ENABLED |582EEH_STATE_DMA_ENABLED);583return result;584}585586/*587* Fetch PE state from hardware. If the PHB588* supports compound PE, let it handle that.589*/590if (phb->get_pe_state) {591fstate = phb->get_pe_state(phb, pe->addr);592} else {593rc = opal_pci_eeh_freeze_status(phb->opal_id,594pe->addr,595&fstate,596&pcierr,597NULL);598if (rc != OPAL_SUCCESS) {599pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",600__func__, rc, phb->hose->global_number,601pe->addr);602return EEH_STATE_NOT_SUPPORT;603}604}605606/* Figure out state */607switch (fstate) {608case OPAL_EEH_STOPPED_NOT_FROZEN:609result = (EEH_STATE_MMIO_ACTIVE |610EEH_STATE_DMA_ACTIVE |611EEH_STATE_MMIO_ENABLED |612EEH_STATE_DMA_ENABLED);613break;614case OPAL_EEH_STOPPED_MMIO_FREEZE:615result = (EEH_STATE_DMA_ACTIVE |616EEH_STATE_DMA_ENABLED);617break;618case OPAL_EEH_STOPPED_DMA_FREEZE:619result = (EEH_STATE_MMIO_ACTIVE |620EEH_STATE_MMIO_ENABLED);621break;622case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:623result = 0;624break;625case OPAL_EEH_STOPPED_RESET:626result = EEH_STATE_RESET_ACTIVE;627break;628case OPAL_EEH_STOPPED_TEMP_UNAVAIL:629result = EEH_STATE_UNAVAILABLE;630break;631case OPAL_EEH_STOPPED_PERM_UNAVAIL:632result = EEH_STATE_NOT_SUPPORT;633break;634default:635result = EEH_STATE_NOT_SUPPORT;636pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",637__func__, phb->hose->global_number,638pe->addr, fstate);639}640641/*642* If PHB supports compound PE, to freeze all643* slave PEs for consistency.644*645* If the PE is switching to frozen state for the646* first time, to dump the PHB diag-data.647*/648if (!(result & EEH_STATE_NOT_SUPPORT) &&649!(result & EEH_STATE_UNAVAILABLE) &&650!(result & EEH_STATE_MMIO_ACTIVE) &&651!(result & EEH_STATE_DMA_ACTIVE) &&652!(pe->state & EEH_PE_ISOLATED)) {653if (phb->freeze_pe)654phb->freeze_pe(phb, pe->addr);655656eeh_pe_mark_isolated(pe);657pnv_eeh_get_phb_diag(pe);658659if (eeh_has_flag(EEH_EARLY_DUMP_LOG))660pnv_pci_dump_phb_diag_data(pe->phb, pe->data);661}662663return result;664}665666/**667* pnv_eeh_get_state - Retrieve PE state668* @pe: EEH PE669* @delay: delay while PE state is temporarily unavailable670*671* Retrieve the state of the specified PE. For IODA-compitable672* platform, it should be retrieved from IODA table. Therefore,673* we prefer passing down to hardware implementation to handle674* it.675*/676static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)677{678int ret;679680if (pe->type & EEH_PE_PHB)681ret = pnv_eeh_get_phb_state(pe);682else683ret = pnv_eeh_get_pe_state(pe);684685if (!delay)686return ret;687688/*689* If the PE state is temporarily unavailable,690* to inform the EEH core delay for default691* period (1 second)692*/693*delay = 0;694if (ret & EEH_STATE_UNAVAILABLE)695*delay = 1000;696697return ret;698}699700static s64 pnv_eeh_poll(unsigned long id)701{702s64 rc = OPAL_HARDWARE;703704while (1) {705rc = opal_pci_poll(id);706if (rc <= 0)707break;708709if (system_state < SYSTEM_RUNNING)710udelay(1000 * rc);711else712msleep(rc);713}714715return rc;716}717718int pnv_eeh_phb_reset(struct pci_controller *hose, int option)719{720struct pnv_phb *phb = hose->private_data;721s64 rc = OPAL_HARDWARE;722723pr_debug("%s: Reset PHB#%x, option=%d\n",724__func__, hose->global_number, option);725726/* Issue PHB complete reset request */727if (option == EEH_RESET_FUNDAMENTAL ||728option == EEH_RESET_HOT)729rc = opal_pci_reset(phb->opal_id,730OPAL_RESET_PHB_COMPLETE,731OPAL_ASSERT_RESET);732else if (option == EEH_RESET_DEACTIVATE)733rc = opal_pci_reset(phb->opal_id,734OPAL_RESET_PHB_COMPLETE,735OPAL_DEASSERT_RESET);736if (rc < 0)737goto out;738739/*740* Poll state of the PHB until the request is done741* successfully. The PHB reset is usually PHB complete742* reset followed by hot reset on root bus. So we also743* need the PCI bus settlement delay.744*/745if (rc > 0)746rc = pnv_eeh_poll(phb->opal_id);747if (option == EEH_RESET_DEACTIVATE) {748if (system_state < SYSTEM_RUNNING)749udelay(1000 * EEH_PE_RST_SETTLE_TIME);750else751msleep(EEH_PE_RST_SETTLE_TIME);752}753out:754if (rc != OPAL_SUCCESS)755return -EIO;756757return 0;758}759760static int pnv_eeh_root_reset(struct pci_controller *hose, int option)761{762struct pnv_phb *phb = hose->private_data;763s64 rc = OPAL_HARDWARE;764765pr_debug("%s: Reset PHB#%x, option=%d\n",766__func__, hose->global_number, option);767768/*769* During the reset deassert time, we needn't care770* the reset scope because the firmware does nothing771* for fundamental or hot reset during deassert phase.772*/773if (option == EEH_RESET_FUNDAMENTAL)774rc = opal_pci_reset(phb->opal_id,775OPAL_RESET_PCI_FUNDAMENTAL,776OPAL_ASSERT_RESET);777else if (option == EEH_RESET_HOT)778rc = opal_pci_reset(phb->opal_id,779OPAL_RESET_PCI_HOT,780OPAL_ASSERT_RESET);781else if (option == EEH_RESET_DEACTIVATE)782rc = opal_pci_reset(phb->opal_id,783OPAL_RESET_PCI_HOT,784OPAL_DEASSERT_RESET);785if (rc < 0)786goto out;787788/* Poll state of the PHB until the request is done */789if (rc > 0)790rc = pnv_eeh_poll(phb->opal_id);791if (option == EEH_RESET_DEACTIVATE)792msleep(EEH_PE_RST_SETTLE_TIME);793out:794if (rc != OPAL_SUCCESS)795return -EIO;796797return 0;798}799800static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)801{802struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);803struct eeh_dev *edev = pdn_to_eeh_dev(pdn);804int aer = edev ? edev->aer_cap : 0;805u32 ctrl;806807pr_debug("%s: Secondary Reset PCI bus %04x:%02x with option %d\n",808__func__, pci_domain_nr(dev->bus),809dev->bus->number, option);810811switch (option) {812case EEH_RESET_FUNDAMENTAL:813case EEH_RESET_HOT:814/* Don't report linkDown event */815if (aer) {816eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,8174, &ctrl);818ctrl |= PCI_ERR_UNC_SURPDN;819eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,8204, ctrl);821}822823eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);824ctrl |= PCI_BRIDGE_CTL_BUS_RESET;825eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);826827msleep(EEH_PE_RST_HOLD_TIME);828break;829case EEH_RESET_DEACTIVATE:830eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);831ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;832eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);833834msleep(EEH_PE_RST_SETTLE_TIME);835836/* Continue reporting linkDown event */837if (aer) {838eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,8394, &ctrl);840ctrl &= ~PCI_ERR_UNC_SURPDN;841eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,8424, ctrl);843}844845break;846}847848return 0;849}850851static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)852{853struct pci_controller *hose = pci_bus_to_host(pdev->bus);854struct pnv_phb *phb = hose->private_data;855struct device_node *dn = pci_device_to_OF_node(pdev);856uint64_t id = PCI_SLOT_ID(phb->opal_id, pci_dev_id(pdev));857uint8_t scope;858int64_t rc;859860/* Hot reset to the bus if firmware cannot handle */861if (!dn || !of_property_present(dn, "ibm,reset-by-firmware"))862return __pnv_eeh_bridge_reset(pdev, option);863864pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n",865__func__, pci_domain_nr(pdev->bus),866pdev->bus->number, option);867868switch (option) {869case EEH_RESET_FUNDAMENTAL:870scope = OPAL_RESET_PCI_FUNDAMENTAL;871break;872case EEH_RESET_HOT:873scope = OPAL_RESET_PCI_HOT;874break;875case EEH_RESET_DEACTIVATE:876return 0;877default:878dev_dbg(&pdev->dev, "%s: Unsupported reset %d\n",879__func__, option);880return -EINVAL;881}882883rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET);884if (rc <= OPAL_SUCCESS)885goto out;886887rc = pnv_eeh_poll(id);888out:889return (rc == OPAL_SUCCESS) ? 0 : -EIO;890}891892void pnv_pci_reset_secondary_bus(struct pci_dev *dev)893{894struct pci_controller *hose;895896if (pci_is_root_bus(dev->bus)) {897hose = pci_bus_to_host(dev->bus);898pnv_eeh_root_reset(hose, EEH_RESET_HOT);899pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);900} else {901pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);902pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);903}904}905906static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,907int pos, u16 mask)908{909struct eeh_dev *edev = pdn->edev;910int i, status = 0;911912/* Wait for Transaction Pending bit to be cleared */913for (i = 0; i < 4; i++) {914eeh_ops->read_config(edev, pos, 2, &status);915if (!(status & mask))916return;917918msleep((1 << i) * 100);919}920921pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",922__func__, type,923pdn->phb->global_number, pdn->busno,924PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));925}926927static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)928{929struct eeh_dev *edev = pdn_to_eeh_dev(pdn);930u32 reg = 0;931932if (WARN_ON(!edev->pcie_cap))933return -ENOTTY;934935eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®);936if (!(reg & PCI_EXP_DEVCAP_FLR))937return -ENOTTY;938939switch (option) {940case EEH_RESET_HOT:941case EEH_RESET_FUNDAMENTAL:942pnv_eeh_wait_for_pending(pdn, "",943edev->pcie_cap + PCI_EXP_DEVSTA,944PCI_EXP_DEVSTA_TRPND);945eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,9464, ®);947reg |= PCI_EXP_DEVCTL_BCR_FLR;948eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,9494, reg);950msleep(EEH_PE_RST_HOLD_TIME);951break;952case EEH_RESET_DEACTIVATE:953eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,9544, ®);955reg &= ~PCI_EXP_DEVCTL_BCR_FLR;956eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,9574, reg);958msleep(EEH_PE_RST_SETTLE_TIME);959break;960}961962return 0;963}964965static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)966{967struct eeh_dev *edev = pdn_to_eeh_dev(pdn);968u32 cap = 0;969970if (WARN_ON(!edev->af_cap))971return -ENOTTY;972973eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap);974if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))975return -ENOTTY;976977switch (option) {978case EEH_RESET_HOT:979case EEH_RESET_FUNDAMENTAL:980/*981* Wait for Transaction Pending bit to clear. A word-aligned982* test is used, so we use the control offset rather than status983* and shift the test bit to match.984*/985pnv_eeh_wait_for_pending(pdn, "AF",986edev->af_cap + PCI_AF_CTRL,987PCI_AF_STATUS_TP << 8);988eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL,9891, PCI_AF_CTRL_FLR);990msleep(EEH_PE_RST_HOLD_TIME);991break;992case EEH_RESET_DEACTIVATE:993eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0);994msleep(EEH_PE_RST_SETTLE_TIME);995break;996}997998return 0;999}10001001static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)1002{1003struct eeh_dev *edev;1004struct pci_dn *pdn;1005int ret;10061007/* The VF PE should have only one child device */1008edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);1009pdn = eeh_dev_to_pdn(edev);1010if (!pdn)1011return -ENXIO;10121013ret = pnv_eeh_do_flr(pdn, option);1014if (!ret)1015return ret;10161017return pnv_eeh_do_af_flr(pdn, option);1018}10191020/**1021* pnv_eeh_reset - Reset the specified PE1022* @pe: EEH PE1023* @option: reset option1024*1025* Do reset on the indicated PE. For PCI bus sensitive PE,1026* we need to reset the parent p2p bridge. The PHB has to1027* be reinitialized if the p2p bridge is root bridge. For1028* PCI device sensitive PE, we will try to reset the device1029* through FLR. For now, we don't have OPAL APIs to do HARD1030* reset yet, so all reset would be SOFT (HOT) reset.1031*/1032static int pnv_eeh_reset(struct eeh_pe *pe, int option)1033{1034struct pci_controller *hose = pe->phb;1035struct pnv_phb *phb;1036struct pci_bus *bus;1037int64_t rc;10381039/*1040* For PHB reset, we always have complete reset. For those PEs whose1041* primary bus derived from root complex (root bus) or root port1042* (usually bus#1), we apply hot or fundamental reset on the root port.1043* For other PEs, we always have hot reset on the PE primary bus.1044*1045* Here, we have different design to pHyp, which always clear the1046* frozen state during PE reset. However, the good idea here from1047* benh is to keep frozen state before we get PE reset done completely1048* (until BAR restore). With the frozen state, HW drops illegal IO1049* or MMIO access, which can incur recursive frozen PE during PE1050* reset. The side effect is that EEH core has to clear the frozen1051* state explicitly after BAR restore.1052*/1053if (pe->type & EEH_PE_PHB)1054return pnv_eeh_phb_reset(hose, option);10551056/*1057* The frozen PE might be caused by PAPR error injection1058* registers, which are expected to be cleared after hitting1059* frozen PE as stated in the hardware spec. Unfortunately,1060* that's not true on P7IOC. So we have to clear it manually1061* to avoid recursive EEH errors during recovery.1062*/1063phb = hose->private_data;1064if (phb->model == PNV_PHB_MODEL_P7IOC &&1065(option == EEH_RESET_HOT ||1066option == EEH_RESET_FUNDAMENTAL)) {1067rc = opal_pci_reset(phb->opal_id,1068OPAL_RESET_PHB_ERROR,1069OPAL_ASSERT_RESET);1070if (rc != OPAL_SUCCESS) {1071pr_warn("%s: Failure %lld clearing error injection registers\n",1072__func__, rc);1073return -EIO;1074}1075}10761077if (pe->type & EEH_PE_VF)1078return pnv_eeh_reset_vf_pe(pe, option);10791080bus = eeh_pe_bus_get(pe);1081if (!bus) {1082pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",1083__func__, pe->phb->global_number, pe->addr);1084return -EIO;1085}10861087if (pci_is_root_bus(bus))1088return pnv_eeh_root_reset(hose, option);10891090/*1091* For hot resets try use the generic PCI error recovery reset1092* functions. These correctly handles the case where the secondary1093* bus is behind a hotplug slot and it will use the slot provided1094* reset methods to prevent spurious hotplug events during the reset.1095*1096* Fundamental resets need to be handled internally to EEH since the1097* PCI core doesn't really have a concept of a fundamental reset,1098* mainly because there's no standard way to generate one. Only a1099* few devices require an FRESET so it should be fine.1100*/1101if (option != EEH_RESET_FUNDAMENTAL) {1102/*1103* NB: Skiboot and pnv_eeh_bridge_reset() also no-op the1104* de-assert step. It's like the OPAL reset API was1105* poorly designed or something...1106*/1107if (option == EEH_RESET_DEACTIVATE)1108return 0;11091110rc = pci_bus_error_reset(bus->self);1111if (!rc)1112return 0;1113}11141115/* otherwise, use the generic bridge reset. this might call into FW */1116if (pci_is_root_bus(bus->parent))1117return pnv_eeh_root_reset(hose, option);1118return pnv_eeh_bridge_reset(bus->self, option);1119}11201121/**1122* pnv_eeh_get_log - Retrieve error log1123* @pe: EEH PE1124* @severity: temporary or permanent error log1125* @drv_log: driver log to be combined with retrieved error log1126* @len: length of driver log1127*1128* Retrieve the temporary or permanent error from the PE.1129*/1130static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,1131char *drv_log, unsigned long len)1132{1133if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))1134pnv_pci_dump_phb_diag_data(pe->phb, pe->data);11351136return 0;1137}11381139/**1140* pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE1141* @pe: EEH PE1142*1143* The function will be called to reconfigure the bridges included1144* in the specified PE so that the mulfunctional PE would be recovered1145* again.1146*/1147static int pnv_eeh_configure_bridge(struct eeh_pe *pe)1148{1149return 0;1150}11511152/**1153* pnv_pe_err_inject - Inject specified error to the indicated PE1154* @pe: the indicated PE1155* @type: error type1156* @func: specific error type1157* @addr: address1158* @mask: address mask1159*1160* The routine is called to inject specified error, which is1161* determined by @type and @func, to the indicated PE for1162* testing purpose.1163*/1164static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,1165unsigned long addr, unsigned long mask)1166{1167struct pci_controller *hose = pe->phb;1168struct pnv_phb *phb = hose->private_data;1169s64 rc;11701171if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&1172type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {1173pr_warn("%s: Invalid error type %d\n",1174__func__, type);1175return -ERANGE;1176}11771178if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||1179func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {1180pr_warn("%s: Invalid error function %d\n",1181__func__, func);1182return -ERANGE;1183}11841185/* Firmware supports error injection ? */1186if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {1187pr_warn("%s: Firmware doesn't support error injection\n",1188__func__);1189return -ENXIO;1190}11911192/* Do error injection */1193rc = opal_pci_err_inject(phb->opal_id, pe->addr,1194type, func, addr, mask);1195if (rc != OPAL_SUCCESS) {1196pr_warn("%s: Failure %lld injecting error "1197"%d-%d to PHB#%x-PE#%x\n",1198__func__, rc, type, func,1199hose->global_number, pe->addr);1200return -EIO;1201}12021203return 0;1204}12051206static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)1207{1208struct eeh_dev *edev = pdn_to_eeh_dev(pdn);12091210if (!edev || !edev->pe)1211return false;12121213/*1214* We will issue FLR or AF FLR to all VFs, which are contained1215* in VF PE. It relies on the EEH PCI config accessors. So we1216* can't block them during the window.1217*/1218if (edev->physfn && (edev->pe->state & EEH_PE_RESET))1219return false;12201221if (edev->pe->state & EEH_PE_CFG_BLOCKED)1222return true;12231224return false;1225}12261227static int pnv_eeh_read_config(struct eeh_dev *edev,1228int where, int size, u32 *val)1229{1230struct pci_dn *pdn = eeh_dev_to_pdn(edev);12311232if (!pdn)1233return PCIBIOS_DEVICE_NOT_FOUND;12341235if (pnv_eeh_cfg_blocked(pdn)) {1236*val = 0xFFFFFFFF;1237return PCIBIOS_SET_FAILED;1238}12391240return pnv_pci_cfg_read(pdn, where, size, val);1241}12421243static int pnv_eeh_write_config(struct eeh_dev *edev,1244int where, int size, u32 val)1245{1246struct pci_dn *pdn = eeh_dev_to_pdn(edev);12471248if (!pdn)1249return PCIBIOS_DEVICE_NOT_FOUND;12501251if (pnv_eeh_cfg_blocked(pdn))1252return PCIBIOS_SET_FAILED;12531254return pnv_pci_cfg_write(pdn, where, size, val);1255}12561257static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)1258{1259/* GEM */1260if (data->gemXfir || data->gemRfir ||1261data->gemRirqfir || data->gemMask || data->gemRwof)1262pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n",1263be64_to_cpu(data->gemXfir),1264be64_to_cpu(data->gemRfir),1265be64_to_cpu(data->gemRirqfir),1266be64_to_cpu(data->gemMask),1267be64_to_cpu(data->gemRwof));12681269/* LEM */1270if (data->lemFir || data->lemErrMask ||1271data->lemAction0 || data->lemAction1 || data->lemWof)1272pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n",1273be64_to_cpu(data->lemFir),1274be64_to_cpu(data->lemErrMask),1275be64_to_cpu(data->lemAction0),1276be64_to_cpu(data->lemAction1),1277be64_to_cpu(data->lemWof));1278}12791280static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)1281{1282struct pnv_phb *phb = hose->private_data;1283struct OpalIoP7IOCErrorData *data =1284(struct OpalIoP7IOCErrorData*)phb->diag_data;1285long rc;12861287rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));1288if (rc != OPAL_SUCCESS) {1289pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",1290__func__, phb->hub_id, rc);1291return;1292}12931294switch (be16_to_cpu(data->type)) {1295case OPAL_P7IOC_DIAG_TYPE_RGC:1296pr_info("P7IOC diag-data for RGC\n\n");1297pnv_eeh_dump_hub_diag_common(data);1298if (data->rgc.rgcStatus || data->rgc.rgcLdcp)1299pr_info(" RGC: %016llx %016llx\n",1300be64_to_cpu(data->rgc.rgcStatus),1301be64_to_cpu(data->rgc.rgcLdcp));1302break;1303case OPAL_P7IOC_DIAG_TYPE_BI:1304pr_info("P7IOC diag-data for BI %s\n\n",1305data->bi.biDownbound ? "Downbound" : "Upbound");1306pnv_eeh_dump_hub_diag_common(data);1307if (data->bi.biLdcp0 || data->bi.biLdcp1 ||1308data->bi.biLdcp2 || data->bi.biFenceStatus)1309pr_info(" BI: %016llx %016llx %016llx %016llx\n",1310be64_to_cpu(data->bi.biLdcp0),1311be64_to_cpu(data->bi.biLdcp1),1312be64_to_cpu(data->bi.biLdcp2),1313be64_to_cpu(data->bi.biFenceStatus));1314break;1315case OPAL_P7IOC_DIAG_TYPE_CI:1316pr_info("P7IOC diag-data for CI Port %d\n\n",1317data->ci.ciPort);1318pnv_eeh_dump_hub_diag_common(data);1319if (data->ci.ciPortStatus || data->ci.ciPortLdcp)1320pr_info(" CI: %016llx %016llx\n",1321be64_to_cpu(data->ci.ciPortStatus),1322be64_to_cpu(data->ci.ciPortLdcp));1323break;1324case OPAL_P7IOC_DIAG_TYPE_MISC:1325pr_info("P7IOC diag-data for MISC\n\n");1326pnv_eeh_dump_hub_diag_common(data);1327break;1328case OPAL_P7IOC_DIAG_TYPE_I2C:1329pr_info("P7IOC diag-data for I2C\n\n");1330pnv_eeh_dump_hub_diag_common(data);1331break;1332default:1333pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",1334__func__, phb->hub_id, data->type);1335}1336}13371338static int pnv_eeh_get_pe(struct pci_controller *hose,1339u16 pe_no, struct eeh_pe **pe)1340{1341struct pnv_phb *phb = hose->private_data;1342struct pnv_ioda_pe *pnv_pe;1343struct eeh_pe *dev_pe;13441345/*1346* If PHB supports compound PE, to fetch1347* the master PE because slave PE is invisible1348* to EEH core.1349*/1350pnv_pe = &phb->ioda.pe_array[pe_no];1351if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {1352pnv_pe = pnv_pe->master;1353WARN_ON(!pnv_pe ||1354!(pnv_pe->flags & PNV_IODA_PE_MASTER));1355pe_no = pnv_pe->pe_number;1356}13571358/* Find the PE according to PE# */1359dev_pe = eeh_pe_get(hose, pe_no);1360if (!dev_pe)1361return -EEXIST;13621363/* Freeze the (compound) PE */1364*pe = dev_pe;1365if (!(dev_pe->state & EEH_PE_ISOLATED))1366phb->freeze_pe(phb, pe_no);13671368/*1369* At this point, we're sure the (compound) PE should1370* have been frozen. However, we still need poke until1371* hitting the frozen PE on top level.1372*/1373dev_pe = dev_pe->parent;1374while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {1375int ret;1376ret = eeh_ops->get_state(dev_pe, NULL);1377if (ret <= 0 || eeh_state_active(ret)) {1378dev_pe = dev_pe->parent;1379continue;1380}13811382/* Frozen parent PE */1383*pe = dev_pe;1384if (!(dev_pe->state & EEH_PE_ISOLATED))1385phb->freeze_pe(phb, dev_pe->addr);13861387/* Next one */1388dev_pe = dev_pe->parent;1389}13901391return 0;1392}13931394/**1395* pnv_eeh_next_error - Retrieve next EEH error to handle1396* @pe: Affected PE1397*1398* The function is expected to be called by EEH core while it gets1399* special EEH event (without binding PE). The function calls to1400* OPAL APIs for next error to handle. The informational error is1401* handled internally by platform. However, the dead IOC, dead PHB,1402* fenced PHB and frozen PE should be handled by EEH core eventually.1403*/1404static int pnv_eeh_next_error(struct eeh_pe **pe)1405{1406struct pci_controller *hose;1407struct pnv_phb *phb;1408struct eeh_pe *phb_pe, *parent_pe;1409__be64 frozen_pe_no;1410__be16 err_type, severity;1411long rc;1412int state, ret = EEH_NEXT_ERR_NONE;14131414/*1415* While running here, it's safe to purge the event queue. The1416* event should still be masked.1417*/1418eeh_remove_event(NULL, false);14191420list_for_each_entry(hose, &hose_list, list_node) {1421/*1422* If the subordinate PCI buses of the PHB has been1423* removed or is exactly under error recovery, we1424* needn't take care of it any more.1425*/1426phb = hose->private_data;1427phb_pe = eeh_phb_pe_get(hose);1428if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))1429continue;14301431rc = opal_pci_next_error(phb->opal_id,1432&frozen_pe_no, &err_type, &severity);1433if (rc != OPAL_SUCCESS) {1434pr_devel("%s: Invalid return value on "1435"PHB#%x (0x%lx) from opal_pci_next_error",1436__func__, hose->global_number, rc);1437continue;1438}14391440/* If the PHB doesn't have error, stop processing */1441if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||1442be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {1443pr_devel("%s: No error found on PHB#%x\n",1444__func__, hose->global_number);1445continue;1446}14471448/*1449* Processing the error. We're expecting the error with1450* highest priority reported upon multiple errors on the1451* specific PHB.1452*/1453pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",1454__func__, be16_to_cpu(err_type),1455be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),1456hose->global_number);1457switch (be16_to_cpu(err_type)) {1458case OPAL_EEH_IOC_ERROR:1459if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {1460pr_err("EEH: dead IOC detected\n");1461ret = EEH_NEXT_ERR_DEAD_IOC;1462} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {1463pr_info("EEH: IOC informative error "1464"detected\n");1465pnv_eeh_get_and_dump_hub_diag(hose);1466ret = EEH_NEXT_ERR_NONE;1467}14681469break;1470case OPAL_EEH_PHB_ERROR:1471if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {1472*pe = phb_pe;1473pr_err("EEH: dead PHB#%x detected, "1474"location: %s\n",1475hose->global_number,1476eeh_pe_loc_get(phb_pe));1477ret = EEH_NEXT_ERR_DEAD_PHB;1478} else if (be16_to_cpu(severity) ==1479OPAL_EEH_SEV_PHB_FENCED) {1480*pe = phb_pe;1481pr_err("EEH: Fenced PHB#%x detected, "1482"location: %s\n",1483hose->global_number,1484eeh_pe_loc_get(phb_pe));1485ret = EEH_NEXT_ERR_FENCED_PHB;1486} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {1487pr_info("EEH: PHB#%x informative error "1488"detected, location: %s\n",1489hose->global_number,1490eeh_pe_loc_get(phb_pe));1491pnv_eeh_get_phb_diag(phb_pe);1492pnv_pci_dump_phb_diag_data(hose, phb_pe->data);1493ret = EEH_NEXT_ERR_NONE;1494}14951496break;1497case OPAL_EEH_PE_ERROR:1498/*1499* If we can't find the corresponding PE, we1500* just try to unfreeze.1501*/1502if (pnv_eeh_get_pe(hose,1503be64_to_cpu(frozen_pe_no), pe)) {1504pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",1505hose->global_number, be64_to_cpu(frozen_pe_no));1506pr_info("EEH: PHB location: %s\n",1507eeh_pe_loc_get(phb_pe));15081509/* Dump PHB diag-data */1510rc = opal_pci_get_phb_diag_data2(phb->opal_id,1511phb->diag_data, phb->diag_data_size);1512if (rc == OPAL_SUCCESS)1513pnv_pci_dump_phb_diag_data(hose,1514phb->diag_data);15151516/* Try best to clear it */1517opal_pci_eeh_freeze_clear(phb->opal_id,1518be64_to_cpu(frozen_pe_no),1519OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);1520ret = EEH_NEXT_ERR_NONE;1521} else if ((*pe)->state & EEH_PE_ISOLATED ||1522eeh_pe_passed(*pe)) {1523ret = EEH_NEXT_ERR_NONE;1524} else {1525pr_err("EEH: Frozen PE#%x "1526"on PHB#%x detected\n",1527(*pe)->addr,1528(*pe)->phb->global_number);1529pr_err("EEH: PE location: %s, "1530"PHB location: %s\n",1531eeh_pe_loc_get(*pe),1532eeh_pe_loc_get(phb_pe));1533ret = EEH_NEXT_ERR_FROZEN_PE;1534}15351536break;1537default:1538pr_warn("%s: Unexpected error type %d\n",1539__func__, be16_to_cpu(err_type));1540}15411542/*1543* EEH core will try recover from fenced PHB or1544* frozen PE. In the time for frozen PE, EEH core1545* enable IO path for that before collecting logs,1546* but it ruins the site. So we have to dump the1547* log in advance here.1548*/1549if ((ret == EEH_NEXT_ERR_FROZEN_PE ||1550ret == EEH_NEXT_ERR_FENCED_PHB) &&1551!((*pe)->state & EEH_PE_ISOLATED)) {1552eeh_pe_mark_isolated(*pe);1553pnv_eeh_get_phb_diag(*pe);15541555if (eeh_has_flag(EEH_EARLY_DUMP_LOG))1556pnv_pci_dump_phb_diag_data((*pe)->phb,1557(*pe)->data);1558}15591560/*1561* We probably have the frozen parent PE out there and1562* we need have to handle frozen parent PE firstly.1563*/1564if (ret == EEH_NEXT_ERR_FROZEN_PE) {1565parent_pe = (*pe)->parent;1566while (parent_pe) {1567/* Hit the ceiling ? */1568if (parent_pe->type & EEH_PE_PHB)1569break;15701571/* Frozen parent PE ? */1572state = eeh_ops->get_state(parent_pe, NULL);1573if (state > 0 && !eeh_state_active(state))1574*pe = parent_pe;15751576/* Next parent level */1577parent_pe = parent_pe->parent;1578}15791580/* We possibly migrate to another PE */1581eeh_pe_mark_isolated(*pe);1582}15831584/*1585* If we have no errors on the specific PHB or only1586* informative error there, we continue poking it.1587* Otherwise, we need actions to be taken by upper1588* layer.1589*/1590if (ret > EEH_NEXT_ERR_INF)1591break;1592}15931594/* Unmask the event */1595if (ret == EEH_NEXT_ERR_NONE && eeh_enabled())1596enable_irq(eeh_event_irq);15971598return ret;1599}16001601static int pnv_eeh_restore_config(struct eeh_dev *edev)1602{1603struct pnv_phb *phb;1604s64 ret = 0;16051606if (!edev)1607return -EEXIST;16081609if (edev->physfn)1610return 0;16111612phb = edev->controller->private_data;1613ret = opal_pci_reinit(phb->opal_id,1614OPAL_REINIT_PCI_DEV, edev->bdfn);16151616if (ret) {1617pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",1618__func__, edev->bdfn, ret);1619return -EIO;1620}16211622return ret;1623}16241625static struct eeh_ops pnv_eeh_ops = {1626.name = "powernv",1627.probe = pnv_eeh_probe,1628.set_option = pnv_eeh_set_option,1629.get_state = pnv_eeh_get_state,1630.reset = pnv_eeh_reset,1631.get_log = pnv_eeh_get_log,1632.configure_bridge = pnv_eeh_configure_bridge,1633.err_inject = pnv_eeh_err_inject,1634.read_config = pnv_eeh_read_config,1635.write_config = pnv_eeh_write_config,1636.next_error = pnv_eeh_next_error,1637.restore_config = pnv_eeh_restore_config,1638.notify_resume = NULL1639};16401641/**1642* eeh_powernv_init - Register platform dependent EEH operations1643*1644* EEH initialization on powernv platform. This function should be1645* called before any EEH related functions.1646*/1647static int __init eeh_powernv_init(void)1648{1649int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;1650struct pci_controller *hose;1651struct pnv_phb *phb;1652int ret = -EINVAL;16531654if (!firmware_has_feature(FW_FEATURE_OPAL)) {1655pr_warn("%s: OPAL is required !\n", __func__);1656return -EINVAL;1657}16581659/* Set probe mode */1660eeh_add_flag(EEH_PROBE_MODE_DEV);16611662/*1663* P7IOC blocks PCI config access to frozen PE, but PHB31664* doesn't do that. So we have to selectively enable I/O1665* prior to collecting error log.1666*/1667list_for_each_entry(hose, &hose_list, list_node) {1668phb = hose->private_data;16691670if (phb->model == PNV_PHB_MODEL_P7IOC)1671eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);16721673if (phb->diag_data_size > max_diag_size)1674max_diag_size = phb->diag_data_size;16751676break;1677}16781679/*1680* eeh_init() allocates the eeh_pe and its aux data buf so the1681* size needs to be set before calling eeh_init().1682*/1683eeh_set_pe_aux_size(max_diag_size);1684ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;16851686ret = eeh_init(&pnv_eeh_ops);1687if (!ret)1688pr_info("EEH: PowerNV platform initialized\n");1689else1690pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);16911692return ret;1693}1694machine_arch_initcall(powernv, eeh_powernv_init);169516961697