Path: blob/master/drivers/infiniband/hw/ipath/ipath_driver.c
15112 views
/*1* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.2* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.3*4* This software is available to you under a choice of one of two5* licenses. You may choose to be licensed under the terms of the GNU6* General Public License (GPL) Version 2, available from the file7* COPYING in the main directory of this source tree, or the8* OpenIB.org BSD license below:9*10* Redistribution and use in source and binary forms, with or11* without modification, are permitted provided that the following12* conditions are met:13*14* - Redistributions of source code must retain the above15* copyright notice, this list of conditions and the following16* disclaimer.17*18* - Redistributions in binary form must reproduce the above19* copyright notice, this list of conditions and the following20* disclaimer in the documentation and/or other materials21* provided with the distribution.22*23* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,24* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF25* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND26* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS27* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN28* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN29* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE30* SOFTWARE.31*/3233#include <linux/sched.h>34#include <linux/spinlock.h>35#include <linux/idr.h>36#include <linux/pci.h>37#include <linux/io.h>38#include <linux/delay.h>39#include <linux/netdevice.h>40#include <linux/vmalloc.h>41#include <linux/bitmap.h>42#include <linux/slab.h>4344#include "ipath_kernel.h"45#include "ipath_verbs.h"4647static void ipath_update_pio_bufs(struct ipath_devdata *);4849const char *ipath_get_unit_name(int unit)50{51static char iname[16];52snprintf(iname, sizeof iname, "infinipath%u", unit);53return iname;54}5556#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "57#define PFX IPATH_DRV_NAME ": "5859/*60* The size has to be longer than this string, so we can append61* board/chip information to it in the init code.62*/63const char ib_ipath_version[] = IPATH_IDSTR "\n";6465static struct idr unit_table;66DEFINE_SPINLOCK(ipath_devs_lock);67LIST_HEAD(ipath_dev_list);6869wait_queue_head_t ipath_state_wait;7071unsigned ipath_debug = __IPATH_INFO;7273module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);74MODULE_PARM_DESC(debug, "mask for debug prints");75EXPORT_SYMBOL_GPL(ipath_debug);7677unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */78module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);79MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");8081static unsigned ipath_hol_timeout_ms = 13000;82module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);83MODULE_PARM_DESC(hol_timeout_ms,84"duration of user app suspension after link failure");8586unsigned ipath_linkrecovery = 1;87module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);88MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");8990MODULE_LICENSE("GPL");91MODULE_AUTHOR("QLogic <[email protected]>");92MODULE_DESCRIPTION("QLogic InfiniPath driver");9394/*95* Table to translate the LINKTRAININGSTATE portion of96* IBCStatus to a human-readable form.97*/98const char *ipath_ibcstatus_str[] = {99"Disabled",100"LinkUp",101"PollActive",102"PollQuiet",103"SleepDelay",104"SleepQuiet",105"LState6", /* unused */106"LState7", /* unused */107"CfgDebounce",108"CfgRcvfCfg",109"CfgWaitRmt",110"CfgIdle",111"RecovRetrain",112"CfgTxRevLane", /* unused before IBA7220 */113"RecovWaitRmt",114"RecovIdle",115/* below were added for IBA7220 */116"CfgEnhanced",117"CfgTest",118"CfgWaitRmtTest",119"CfgWaitCfgEnhanced",120"SendTS_T",121"SendTstIdles",122"RcvTS_T",123"SendTst_TS1s",124"LTState18", "LTState19", "LTState1A", "LTState1B",125"LTState1C", "LTState1D", "LTState1E", "LTState1F"126};127128static void __devexit ipath_remove_one(struct pci_dev *);129static int __devinit ipath_init_one(struct pci_dev *,130const struct pci_device_id *);131132/* Only needed for registration, nothing else needs this info */133#define PCI_VENDOR_ID_PATHSCALE 0x1fc1134#define PCI_DEVICE_ID_INFINIPATH_HT 0xd135136/* Number of seconds before our card status check... */137#define STATUS_TIMEOUT 60138139static const struct pci_device_id ipath_pci_tbl[] = {140{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },141{ 0, }142};143144MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);145146static struct pci_driver ipath_driver = {147.name = IPATH_DRV_NAME,148.probe = ipath_init_one,149.remove = __devexit_p(ipath_remove_one),150.id_table = ipath_pci_tbl,151.driver = {152.groups = ipath_driver_attr_groups,153},154};155156static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,157u32 *bar0, u32 *bar1)158{159int ret;160161ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);162if (ret)163ipath_dev_err(dd, "failed to read bar0 before enable: "164"error %d\n", -ret);165166ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);167if (ret)168ipath_dev_err(dd, "failed to read bar1 before enable: "169"error %d\n", -ret);170171ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);172}173174static void ipath_free_devdata(struct pci_dev *pdev,175struct ipath_devdata *dd)176{177unsigned long flags;178179pci_set_drvdata(pdev, NULL);180181if (dd->ipath_unit != -1) {182spin_lock_irqsave(&ipath_devs_lock, flags);183idr_remove(&unit_table, dd->ipath_unit);184list_del(&dd->ipath_list);185spin_unlock_irqrestore(&ipath_devs_lock, flags);186}187vfree(dd);188}189190static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)191{192unsigned long flags;193struct ipath_devdata *dd;194int ret;195196if (!idr_pre_get(&unit_table, GFP_KERNEL)) {197dd = ERR_PTR(-ENOMEM);198goto bail;199}200201dd = vzalloc(sizeof(*dd));202if (!dd) {203dd = ERR_PTR(-ENOMEM);204goto bail;205}206dd->ipath_unit = -1;207208spin_lock_irqsave(&ipath_devs_lock, flags);209210ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);211if (ret < 0) {212printk(KERN_ERR IPATH_DRV_NAME213": Could not allocate unit ID: error %d\n", -ret);214ipath_free_devdata(pdev, dd);215dd = ERR_PTR(ret);216goto bail_unlock;217}218219dd->pcidev = pdev;220pci_set_drvdata(pdev, dd);221222list_add(&dd->ipath_list, &ipath_dev_list);223224bail_unlock:225spin_unlock_irqrestore(&ipath_devs_lock, flags);226227bail:228return dd;229}230231static inline struct ipath_devdata *__ipath_lookup(int unit)232{233return idr_find(&unit_table, unit);234}235236struct ipath_devdata *ipath_lookup(int unit)237{238struct ipath_devdata *dd;239unsigned long flags;240241spin_lock_irqsave(&ipath_devs_lock, flags);242dd = __ipath_lookup(unit);243spin_unlock_irqrestore(&ipath_devs_lock, flags);244245return dd;246}247248int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)249{250int nunits, npresent, nup;251struct ipath_devdata *dd;252unsigned long flags;253int maxports;254255nunits = npresent = nup = maxports = 0;256257spin_lock_irqsave(&ipath_devs_lock, flags);258259list_for_each_entry(dd, &ipath_dev_list, ipath_list) {260nunits++;261if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)262npresent++;263if (dd->ipath_lid &&264!(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN265| IPATH_LINKUNK)))266nup++;267if (dd->ipath_cfgports > maxports)268maxports = dd->ipath_cfgports;269}270271spin_unlock_irqrestore(&ipath_devs_lock, flags);272273if (npresentp)274*npresentp = npresent;275if (nupp)276*nupp = nup;277if (maxportsp)278*maxportsp = maxports;279280return nunits;281}282283/*284* These next two routines are placeholders in case we don't have per-arch285* code for controlling write combining. If explicit control of write286* combining is not available, performance will probably be awful.287*/288289int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)290{291return -EOPNOTSUPP;292}293294void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)295{296}297298/*299* Perform a PIO buffer bandwidth write test, to verify proper system300* configuration. Even when all the setup calls work, occasionally301* BIOS or other issues can prevent write combining from working, or302* can cause other bandwidth problems to the chip.303*304* This test simply writes the same buffer over and over again, and305* measures close to the peak bandwidth to the chip (not testing306* data bandwidth to the wire). On chips that use an address-based307* trigger to send packets to the wire, this is easy. On chips that308* use a count to trigger, we want to make sure that the packet doesn't309* go out on the wire, or trigger flow control checks.310*/311static void ipath_verify_pioperf(struct ipath_devdata *dd)312{313u32 pbnum, cnt, lcnt;314u32 __iomem *piobuf;315u32 *addr;316u64 msecs, emsecs;317318piobuf = ipath_getpiobuf(dd, 0, &pbnum);319if (!piobuf) {320dev_info(&dd->pcidev->dev,321"No PIObufs for checking perf, skipping\n");322return;323}324325/*326* Enough to give us a reasonable test, less than piobuf size, and327* likely multiple of store buffer length.328*/329cnt = 1024;330331addr = vmalloc(cnt);332if (!addr) {333dev_info(&dd->pcidev->dev,334"Couldn't get memory for checking PIO perf,"335" skipping\n");336goto done;337}338339preempt_disable(); /* we want reasonably accurate elapsed time */340msecs = 1 + jiffies_to_msecs(jiffies);341for (lcnt = 0; lcnt < 10000U; lcnt++) {342/* wait until we cross msec boundary */343if (jiffies_to_msecs(jiffies) >= msecs)344break;345udelay(1);346}347348ipath_disable_armlaunch(dd);349350/*351* length 0, no dwords actually sent, and mark as VL15352* on chips where that may matter (due to IB flowcontrol)353*/354if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))355writeq(1UL << 63, piobuf);356else357writeq(0, piobuf);358ipath_flush_wc();359360/*361* this is only roughly accurate, since even with preempt we362* still take interrupts that could take a while. Running for363* >= 5 msec seems to get us "close enough" to accurate values364*/365msecs = jiffies_to_msecs(jiffies);366for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {367__iowrite32_copy(piobuf + 64, addr, cnt >> 2);368emsecs = jiffies_to_msecs(jiffies) - msecs;369}370371/* 1 GiB/sec, slightly over IB SDR line rate */372if (lcnt < (emsecs * 1024U))373ipath_dev_err(dd,374"Performance problem: bandwidth to PIO buffers is "375"only %u MiB/sec\n",376lcnt / (u32) emsecs);377else378ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",379lcnt / (u32) emsecs);380381preempt_enable();382383vfree(addr);384385done:386/* disarm piobuf, so it's available again */387ipath_disarm_piobufs(dd, pbnum, 1);388ipath_enable_armlaunch(dd);389}390391static void cleanup_device(struct ipath_devdata *dd);392393static int __devinit ipath_init_one(struct pci_dev *pdev,394const struct pci_device_id *ent)395{396int ret, len, j;397struct ipath_devdata *dd;398unsigned long long addr;399u32 bar0 = 0, bar1 = 0;400401dd = ipath_alloc_devdata(pdev);402if (IS_ERR(dd)) {403ret = PTR_ERR(dd);404printk(KERN_ERR IPATH_DRV_NAME405": Could not allocate devdata: error %d\n", -ret);406goto bail;407}408409ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);410411ret = pci_enable_device(pdev);412if (ret) {413/* This can happen iff:414*415* We did a chip reset, and then failed to reprogram the416* BAR, or the chip reset due to an internal error. We then417* unloaded the driver and reloaded it.418*419* Both reset cases set the BAR back to initial state. For420* the latter case, the AER sticky error bit at offset 0x718421* should be set, but the Linux kernel doesn't yet know422* about that, it appears. If the original BAR was retained423* in the kernel data structures, this may be OK.424*/425ipath_dev_err(dd, "enable unit %d failed: error %d\n",426dd->ipath_unit, -ret);427goto bail_devdata;428}429addr = pci_resource_start(pdev, 0);430len = pci_resource_len(pdev, 0);431ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "432"driver_data %lx\n", addr, len, pdev->irq, ent->vendor,433ent->device, ent->driver_data);434435read_bars(dd, pdev, &bar0, &bar1);436437if (!bar1 && !(bar0 & ~0xf)) {438if (addr) {439dev_info(&pdev->dev, "BAR is 0 (probable RESET), "440"rewriting as %llx\n", addr);441ret = pci_write_config_dword(442pdev, PCI_BASE_ADDRESS_0, addr);443if (ret) {444ipath_dev_err(dd, "rewrite of BAR0 "445"failed: err %d\n", -ret);446goto bail_disable;447}448ret = pci_write_config_dword(449pdev, PCI_BASE_ADDRESS_1, addr >> 32);450if (ret) {451ipath_dev_err(dd, "rewrite of BAR1 "452"failed: err %d\n", -ret);453goto bail_disable;454}455} else {456ipath_dev_err(dd, "BAR is 0 (probable RESET), "457"not usable until reboot\n");458ret = -ENODEV;459goto bail_disable;460}461}462463ret = pci_request_regions(pdev, IPATH_DRV_NAME);464if (ret) {465dev_info(&pdev->dev, "pci_request_regions unit %u fails: "466"err %d\n", dd->ipath_unit, -ret);467goto bail_disable;468}469470ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));471if (ret) {472/*473* if the 64 bit setup fails, try 32 bit. Some systems474* do not setup 64 bit maps on systems with 2GB or less475* memory installed.476*/477ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));478if (ret) {479dev_info(&pdev->dev,480"Unable to set DMA mask for unit %u: %d\n",481dd->ipath_unit, ret);482goto bail_regions;483}484else {485ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");486ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));487if (ret)488dev_info(&pdev->dev,489"Unable to set DMA consistent mask "490"for unit %u: %d\n",491dd->ipath_unit, ret);492493}494}495else {496ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));497if (ret)498dev_info(&pdev->dev,499"Unable to set DMA consistent mask "500"for unit %u: %d\n",501dd->ipath_unit, ret);502}503504pci_set_master(pdev);505506/*507* Save BARs to rewrite after device reset. Save all 64 bits of508* BAR, just in case.509*/510dd->ipath_pcibar0 = addr;511dd->ipath_pcibar1 = addr >> 32;512dd->ipath_deviceid = ent->device; /* save for later use */513dd->ipath_vendorid = ent->vendor;514515/* setup the chip-specific functions, as early as possible. */516switch (ent->device) {517case PCI_DEVICE_ID_INFINIPATH_HT:518ipath_init_iba6110_funcs(dd);519break;520521default:522ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "523"failing\n", ent->device);524return -ENODEV;525}526527for (j = 0; j < 6; j++) {528if (!pdev->resource[j].start)529continue;530ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",531j, &pdev->resource[j],532(unsigned long long)pci_resource_len(pdev, j));533}534535if (!addr) {536ipath_dev_err(dd, "No valid address in BAR 0!\n");537ret = -ENODEV;538goto bail_regions;539}540541dd->ipath_pcirev = pdev->revision;542543#if defined(__powerpc__)544/* There isn't a generic way to specify writethrough mappings */545dd->ipath_kregbase = __ioremap(addr, len,546(_PAGE_NO_CACHE|_PAGE_WRITETHRU));547#else548dd->ipath_kregbase = ioremap_nocache(addr, len);549#endif550551if (!dd->ipath_kregbase) {552ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",553addr);554ret = -ENOMEM;555goto bail_iounmap;556}557dd->ipath_kregend = (u64 __iomem *)558((void __iomem *)dd->ipath_kregbase + len);559dd->ipath_physaddr = addr; /* used for io_remap, etc. */560/* for user mmap */561ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",562addr, dd->ipath_kregbase);563564if (dd->ipath_f_bus(dd, pdev))565ipath_dev_err(dd, "Failed to setup config space; "566"continuing anyway\n");567568/*569* set up our interrupt handler; IRQF_SHARED probably not needed,570* since MSI interrupts shouldn't be shared but won't hurt for now.571* check 0 irq after we return from chip-specific bus setup, since572* that can affect this due to setup573*/574if (!dd->ipath_irq)575ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "576"work\n");577else {578ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,579IPATH_DRV_NAME, dd);580if (ret) {581ipath_dev_err(dd, "Couldn't setup irq handler, "582"irq=%d: %d\n", dd->ipath_irq, ret);583goto bail_iounmap;584}585}586587ret = ipath_init_chip(dd, 0); /* do the chip-specific init */588if (ret)589goto bail_irqsetup;590591ret = ipath_enable_wc(dd);592593if (ret) {594ipath_dev_err(dd, "Write combining not enabled "595"(err %d): performance may be poor\n",596-ret);597ret = 0;598}599600ipath_verify_pioperf(dd);601602ipath_device_create_group(&pdev->dev, dd);603ipathfs_add_device(dd);604ipath_user_add(dd);605ipath_diag_add(dd);606ipath_register_ib_device(dd);607608goto bail;609610bail_irqsetup:611cleanup_device(dd);612613if (dd->ipath_irq)614dd->ipath_f_free_irq(dd);615616if (dd->ipath_f_cleanup)617dd->ipath_f_cleanup(dd);618619bail_iounmap:620iounmap((volatile void __iomem *) dd->ipath_kregbase);621622bail_regions:623pci_release_regions(pdev);624625bail_disable:626pci_disable_device(pdev);627628bail_devdata:629ipath_free_devdata(pdev, dd);630631bail:632return ret;633}634635static void cleanup_device(struct ipath_devdata *dd)636{637int port;638struct ipath_portdata **tmp;639unsigned long flags;640641if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {642/* can't do anything more with chip; needs re-init */643*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;644if (dd->ipath_kregbase) {645/*646* if we haven't already cleaned up before these are647* to ensure any register reads/writes "fail" until648* re-init649*/650dd->ipath_kregbase = NULL;651dd->ipath_uregbase = 0;652dd->ipath_sregbase = 0;653dd->ipath_cregbase = 0;654dd->ipath_kregsize = 0;655}656ipath_disable_wc(dd);657}658659if (dd->ipath_spectriggerhit)660dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",661dd->ipath_spectriggerhit);662663if (dd->ipath_pioavailregs_dma) {664dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,665(void *) dd->ipath_pioavailregs_dma,666dd->ipath_pioavailregs_phys);667dd->ipath_pioavailregs_dma = NULL;668}669if (dd->ipath_dummy_hdrq) {670dma_free_coherent(&dd->pcidev->dev,671dd->ipath_pd[0]->port_rcvhdrq_size,672dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);673dd->ipath_dummy_hdrq = NULL;674}675676if (dd->ipath_pageshadow) {677struct page **tmpp = dd->ipath_pageshadow;678dma_addr_t *tmpd = dd->ipath_physshadow;679int i, cnt = 0;680681ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "682"locked\n");683for (port = 0; port < dd->ipath_cfgports; port++) {684int port_tidbase = port * dd->ipath_rcvtidcnt;685int maxtid = port_tidbase + dd->ipath_rcvtidcnt;686for (i = port_tidbase; i < maxtid; i++) {687if (!tmpp[i])688continue;689pci_unmap_page(dd->pcidev, tmpd[i],690PAGE_SIZE, PCI_DMA_FROMDEVICE);691ipath_release_user_pages(&tmpp[i], 1);692tmpp[i] = NULL;693cnt++;694}695}696if (cnt) {697ipath_stats.sps_pageunlocks += cnt;698ipath_cdbg(VERBOSE, "There were still %u expTID "699"entries locked\n", cnt);700}701if (ipath_stats.sps_pagelocks ||702ipath_stats.sps_pageunlocks)703ipath_cdbg(VERBOSE, "%llu pages locked, %llu "704"unlocked via ipath_m{un}lock\n",705(unsigned long long)706ipath_stats.sps_pagelocks,707(unsigned long long)708ipath_stats.sps_pageunlocks);709710ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",711dd->ipath_pageshadow);712tmpp = dd->ipath_pageshadow;713dd->ipath_pageshadow = NULL;714vfree(tmpp);715716dd->ipath_egrtidbase = NULL;717}718719/*720* free any resources still in use (usually just kernel ports)721* at unload; we do for portcnt, because that's what we allocate.722* We acquire lock to be really paranoid that ipath_pd isn't being723* accessed from some interrupt-related code (that should not happen,724* but best to be sure).725*/726spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);727tmp = dd->ipath_pd;728dd->ipath_pd = NULL;729spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);730for (port = 0; port < dd->ipath_portcnt; port++) {731struct ipath_portdata *pd = tmp[port];732tmp[port] = NULL; /* debugging paranoia */733ipath_free_pddata(dd, pd);734}735kfree(tmp);736}737738static void __devexit ipath_remove_one(struct pci_dev *pdev)739{740struct ipath_devdata *dd = pci_get_drvdata(pdev);741742ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);743744/*745* disable the IB link early, to be sure no new packets arrive, which746* complicates the shutdown process747*/748ipath_shutdown_device(dd);749750flush_workqueue(ib_wq);751752if (dd->verbs_dev)753ipath_unregister_ib_device(dd->verbs_dev);754755ipath_diag_remove(dd);756ipath_user_remove(dd);757ipathfs_remove_device(dd);758ipath_device_remove_group(&pdev->dev, dd);759760ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "761"unit %u\n", dd, (u32) dd->ipath_unit);762763cleanup_device(dd);764765/*766* turn off rcv, send, and interrupts for all ports, all drivers767* should also hard reset the chip here?768* free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs769* for all versions of the driver, if they were allocated770*/771if (dd->ipath_irq) {772ipath_cdbg(VERBOSE, "unit %u free irq %d\n",773dd->ipath_unit, dd->ipath_irq);774dd->ipath_f_free_irq(dd);775} else776ipath_dbg("irq is 0, not doing free_irq "777"for unit %u\n", dd->ipath_unit);778/*779* we check for NULL here, because it's outside780* the kregbase check, and we need to call it781* after the free_irq. Thus it's possible that782* the function pointers were never initialized.783*/784if (dd->ipath_f_cleanup)785/* clean up chip-specific stuff */786dd->ipath_f_cleanup(dd);787788ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);789iounmap((volatile void __iomem *) dd->ipath_kregbase);790pci_release_regions(pdev);791ipath_cdbg(VERBOSE, "calling pci_disable_device\n");792pci_disable_device(pdev);793794ipath_free_devdata(pdev, dd);795}796797/* general driver use */798DEFINE_MUTEX(ipath_mutex);799800static DEFINE_SPINLOCK(ipath_pioavail_lock);801802/**803* ipath_disarm_piobufs - cancel a range of PIO buffers804* @dd: the infinipath device805* @first: the first PIO buffer to cancel806* @cnt: the number of PIO buffers to cancel807*808* cancel a range of PIO buffers, used when they might be armed, but809* not triggered. Used at init to ensure buffer state, and also user810* process close, in case it died while writing to a PIO buffer811* Also after errors.812*/813void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,814unsigned cnt)815{816unsigned i, last = first + cnt;817unsigned long flags;818819ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);820for (i = first; i < last; i++) {821spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);822/*823* The disarm-related bits are write-only, so it824* is ok to OR them in with our copy of sendctrl825* while we hold the lock.826*/827ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,828dd->ipath_sendctrl | INFINIPATH_S_DISARM |829(i << INFINIPATH_S_DISARMPIOBUF_SHIFT));830/* can't disarm bufs back-to-back per iba7220 spec */831ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);832spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);833}834/* on some older chips, update may not happen after cancel */835ipath_force_pio_avail_update(dd);836}837838/**839* ipath_wait_linkstate - wait for an IB link state change to occur840* @dd: the infinipath device841* @state: the state to wait for842* @msecs: the number of milliseconds to wait843*844* wait up to msecs milliseconds for IB link state change to occur for845* now, take the easy polling route. Currently used only by846* ipath_set_linkstate. Returns 0 if state reached, otherwise847* -ETIMEDOUT state can have multiple states set, for any of several848* transitions.849*/850int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)851{852dd->ipath_state_wanted = state;853wait_event_interruptible_timeout(ipath_state_wait,854(dd->ipath_flags & state),855msecs_to_jiffies(msecs));856dd->ipath_state_wanted = 0;857858if (!(dd->ipath_flags & state)) {859u64 val;860ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"861" ms\n",862/* test INIT ahead of DOWN, both can be set */863(state & IPATH_LINKINIT) ? "INIT" :864((state & IPATH_LINKDOWN) ? "DOWN" :865((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),866msecs);867val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);868ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",869(unsigned long long) ipath_read_kreg64(870dd, dd->ipath_kregs->kr_ibcctrl),871(unsigned long long) val,872ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);873}874return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;875}876877static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,878char *buf, size_t blen)879{880static const struct {881ipath_err_t err;882const char *msg;883} errs[] = {884{ INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },885{ INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },886{ INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },887{ INFINIPATH_E_SDMABASE, "SDmaBase" },888{ INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },889{ INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },890{ INFINIPATH_E_SDMADWEN, "SDmaDwEn" },891{ INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },892{ INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },893{ INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },894{ INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },895{ INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },896};897int i;898int expected;899size_t bidx = 0;900901for (i = 0; i < ARRAY_SIZE(errs); i++) {902expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :903test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);904if ((err & errs[i].err) && !expected)905bidx += snprintf(buf + bidx, blen - bidx,906"%s ", errs[i].msg);907}908}909910/*911* Decode the error status into strings, deciding whether to always912* print * it or not depending on "normal packet errors" vs everything913* else. Return 1 if "real" errors, otherwise 0 if only packet914* errors, so caller can decide what to print with the string.915*/916int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,917ipath_err_t err)918{919int iserr = 1;920*buf = '\0';921if (err & INFINIPATH_E_PKTERRS) {922if (!(err & ~INFINIPATH_E_PKTERRS))923iserr = 0; // if only packet errors.924if (ipath_debug & __IPATH_ERRPKTDBG) {925if (err & INFINIPATH_E_REBP)926strlcat(buf, "EBP ", blen);927if (err & INFINIPATH_E_RVCRC)928strlcat(buf, "VCRC ", blen);929if (err & INFINIPATH_E_RICRC) {930strlcat(buf, "CRC ", blen);931// clear for check below, so only once932err &= INFINIPATH_E_RICRC;933}934if (err & INFINIPATH_E_RSHORTPKTLEN)935strlcat(buf, "rshortpktlen ", blen);936if (err & INFINIPATH_E_SDROPPEDDATAPKT)937strlcat(buf, "sdroppeddatapkt ", blen);938if (err & INFINIPATH_E_SPKTLEN)939strlcat(buf, "spktlen ", blen);940}941if ((err & INFINIPATH_E_RICRC) &&942!(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))943strlcat(buf, "CRC ", blen);944if (!iserr)945goto done;946}947if (err & INFINIPATH_E_RHDRLEN)948strlcat(buf, "rhdrlen ", blen);949if (err & INFINIPATH_E_RBADTID)950strlcat(buf, "rbadtid ", blen);951if (err & INFINIPATH_E_RBADVERSION)952strlcat(buf, "rbadversion ", blen);953if (err & INFINIPATH_E_RHDR)954strlcat(buf, "rhdr ", blen);955if (err & INFINIPATH_E_SENDSPECIALTRIGGER)956strlcat(buf, "sendspecialtrigger ", blen);957if (err & INFINIPATH_E_RLONGPKTLEN)958strlcat(buf, "rlongpktlen ", blen);959if (err & INFINIPATH_E_RMAXPKTLEN)960strlcat(buf, "rmaxpktlen ", blen);961if (err & INFINIPATH_E_RMINPKTLEN)962strlcat(buf, "rminpktlen ", blen);963if (err & INFINIPATH_E_SMINPKTLEN)964strlcat(buf, "sminpktlen ", blen);965if (err & INFINIPATH_E_RFORMATERR)966strlcat(buf, "rformaterr ", blen);967if (err & INFINIPATH_E_RUNSUPVL)968strlcat(buf, "runsupvl ", blen);969if (err & INFINIPATH_E_RUNEXPCHAR)970strlcat(buf, "runexpchar ", blen);971if (err & INFINIPATH_E_RIBFLOW)972strlcat(buf, "ribflow ", blen);973if (err & INFINIPATH_E_SUNDERRUN)974strlcat(buf, "sunderrun ", blen);975if (err & INFINIPATH_E_SPIOARMLAUNCH)976strlcat(buf, "spioarmlaunch ", blen);977if (err & INFINIPATH_E_SUNEXPERRPKTNUM)978strlcat(buf, "sunexperrpktnum ", blen);979if (err & INFINIPATH_E_SDROPPEDSMPPKT)980strlcat(buf, "sdroppedsmppkt ", blen);981if (err & INFINIPATH_E_SMAXPKTLEN)982strlcat(buf, "smaxpktlen ", blen);983if (err & INFINIPATH_E_SUNSUPVL)984strlcat(buf, "sunsupVL ", blen);985if (err & INFINIPATH_E_INVALIDADDR)986strlcat(buf, "invalidaddr ", blen);987if (err & INFINIPATH_E_RRCVEGRFULL)988strlcat(buf, "rcvegrfull ", blen);989if (err & INFINIPATH_E_RRCVHDRFULL)990strlcat(buf, "rcvhdrfull ", blen);991if (err & INFINIPATH_E_IBSTATUSCHANGED)992strlcat(buf, "ibcstatuschg ", blen);993if (err & INFINIPATH_E_RIBLOSTLINK)994strlcat(buf, "riblostlink ", blen);995if (err & INFINIPATH_E_HARDWARE)996strlcat(buf, "hardware ", blen);997if (err & INFINIPATH_E_RESET)998strlcat(buf, "reset ", blen);999if (err & INFINIPATH_E_SDMAERRS)1000decode_sdma_errs(dd, err, buf, blen);1001if (err & INFINIPATH_E_INVALIDEEPCMD)1002strlcat(buf, "invalideepromcmd ", blen);1003done:1004return iserr;1005}10061007/**1008* get_rhf_errstring - decode RHF errors1009* @err: the err number1010* @msg: the output buffer1011* @len: the length of the output buffer1012*1013* only used one place now, may want more later1014*/1015static void get_rhf_errstring(u32 err, char *msg, size_t len)1016{1017/* if no errors, and so don't need to check what's first */1018*msg = '\0';10191020if (err & INFINIPATH_RHF_H_ICRCERR)1021strlcat(msg, "icrcerr ", len);1022if (err & INFINIPATH_RHF_H_VCRCERR)1023strlcat(msg, "vcrcerr ", len);1024if (err & INFINIPATH_RHF_H_PARITYERR)1025strlcat(msg, "parityerr ", len);1026if (err & INFINIPATH_RHF_H_LENERR)1027strlcat(msg, "lenerr ", len);1028if (err & INFINIPATH_RHF_H_MTUERR)1029strlcat(msg, "mtuerr ", len);1030if (err & INFINIPATH_RHF_H_IHDRERR)1031/* infinipath hdr checksum error */1032strlcat(msg, "ipathhdrerr ", len);1033if (err & INFINIPATH_RHF_H_TIDERR)1034strlcat(msg, "tiderr ", len);1035if (err & INFINIPATH_RHF_H_MKERR)1036/* bad port, offset, etc. */1037strlcat(msg, "invalid ipathhdr ", len);1038if (err & INFINIPATH_RHF_H_IBERR)1039strlcat(msg, "iberr ", len);1040if (err & INFINIPATH_RHF_L_SWA)1041strlcat(msg, "swA ", len);1042if (err & INFINIPATH_RHF_L_SWB)1043strlcat(msg, "swB ", len);1044}10451046/**1047* ipath_get_egrbuf - get an eager buffer1048* @dd: the infinipath device1049* @bufnum: the eager buffer to get1050*1051* must only be called if ipath_pd[port] is known to be allocated1052*/1053static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)1054{1055return dd->ipath_port0_skbinfo ?1056(void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;1057}10581059/**1060* ipath_alloc_skb - allocate an skb and buffer with possible constraints1061* @dd: the infinipath device1062* @gfp_mask: the sk_buff SFP mask1063*/1064struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,1065gfp_t gfp_mask)1066{1067struct sk_buff *skb;1068u32 len;10691070/*1071* Only fully supported way to handle this is to allocate lots1072* extra, align as needed, and then do skb_reserve(). That wastes1073* a lot of memory... I'll have to hack this into infinipath_copy1074* also.1075*/10761077/*1078* We need 2 extra bytes for ipath_ether data sent in the1079* key header. In order to keep everything dword aligned,1080* we'll reserve 4 bytes.1081*/1082len = dd->ipath_ibmaxlen + 4;10831084if (dd->ipath_flags & IPATH_4BYTE_TID) {1085/* We need a 2KB multiple alignment, and there is no way1086* to do it except to allocate extra and then skb_reserve1087* enough to bring it up to the right alignment.1088*/1089len += 2047;1090}10911092skb = __dev_alloc_skb(len, gfp_mask);1093if (!skb) {1094ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",1095len);1096goto bail;1097}10981099skb_reserve(skb, 4);11001101if (dd->ipath_flags & IPATH_4BYTE_TID) {1102u32 una = (unsigned long)skb->data & 2047;1103if (una)1104skb_reserve(skb, 2048 - una);1105}11061107bail:1108return skb;1109}11101111static void ipath_rcv_hdrerr(struct ipath_devdata *dd,1112u32 eflags,1113u32 l,1114u32 etail,1115__le32 *rhf_addr,1116struct ipath_message_header *hdr)1117{1118char emsg[128];11191120get_rhf_errstring(eflags, emsg, sizeof emsg);1121ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "1122"tlen=%x opcode=%x egridx=%x: %s\n",1123eflags, l,1124ipath_hdrget_rcv_type(rhf_addr),1125ipath_hdrget_length_in_bytes(rhf_addr),1126be32_to_cpu(hdr->bth[0]) >> 24,1127etail, emsg);11281129/* Count local link integrity errors. */1130if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {1131u8 n = (dd->ipath_ibcctrl >>1132INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &1133INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;11341135if (++dd->ipath_lli_counter > n) {1136dd->ipath_lli_counter = 0;1137dd->ipath_lli_errors++;1138}1139}1140}11411142/*1143* ipath_kreceive - receive a packet1144* @pd: the infinipath port1145*1146* called from interrupt handler for errors or receive interrupt1147*/1148void ipath_kreceive(struct ipath_portdata *pd)1149{1150struct ipath_devdata *dd = pd->port_dd;1151__le32 *rhf_addr;1152void *ebuf;1153const u32 rsize = dd->ipath_rcvhdrentsize; /* words */1154const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */1155u32 etail = -1, l, hdrqtail;1156struct ipath_message_header *hdr;1157u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;1158static u64 totcalls; /* stats, may eventually remove */1159int last;11601161l = pd->port_head;1162rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;1163if (dd->ipath_flags & IPATH_NODMA_RTAIL) {1164u32 seq = ipath_hdrget_seq(rhf_addr);11651166if (seq != pd->port_seq_cnt)1167goto bail;1168hdrqtail = 0;1169} else {1170hdrqtail = ipath_get_rcvhdrtail(pd);1171if (l == hdrqtail)1172goto bail;1173smp_rmb();1174}11751176reloop:1177for (last = 0, i = 1; !last; i += !last) {1178hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);1179eflags = ipath_hdrget_err_flags(rhf_addr);1180etype = ipath_hdrget_rcv_type(rhf_addr);1181/* total length */1182tlen = ipath_hdrget_length_in_bytes(rhf_addr);1183ebuf = NULL;1184if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?1185ipath_hdrget_use_egr_buf(rhf_addr) :1186(etype != RCVHQ_RCV_TYPE_EXPECTED)) {1187/*1188* It turns out that the chip uses an eager buffer1189* for all non-expected packets, whether it "needs"1190* one or not. So always get the index, but don't1191* set ebuf (so we try to copy data) unless the1192* length requires it.1193*/1194etail = ipath_hdrget_index(rhf_addr);1195updegr = 1;1196if (tlen > sizeof(*hdr) ||1197etype == RCVHQ_RCV_TYPE_NON_KD)1198ebuf = ipath_get_egrbuf(dd, etail);1199}12001201/*1202* both tiderr and ipathhdrerr are set for all plain IB1203* packets; only ipathhdrerr should be set.1204*/12051206if (etype != RCVHQ_RCV_TYPE_NON_KD &&1207etype != RCVHQ_RCV_TYPE_ERROR &&1208ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=1209IPS_PROTO_VERSION)1210ipath_cdbg(PKT, "Bad InfiniPath protocol version "1211"%x\n", etype);12121213if (unlikely(eflags))1214ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);1215else if (etype == RCVHQ_RCV_TYPE_NON_KD) {1216ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);1217if (dd->ipath_lli_counter)1218dd->ipath_lli_counter--;1219} else if (etype == RCVHQ_RCV_TYPE_EAGER) {1220u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;1221u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;1222ipath_cdbg(PKT, "typ %x, opcode %x (eager, "1223"qp=%x), len %x; ignored\n",1224etype, opcode, qp, tlen);1225}1226else if (etype == RCVHQ_RCV_TYPE_EXPECTED)1227ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",1228be32_to_cpu(hdr->bth[0]) >> 24);1229else {1230/*1231* error packet, type of error unknown.1232* Probably type 3, but we don't know, so don't1233* even try to print the opcode, etc.1234* Usually caused by a "bad packet", that has no1235* BTH, when the LRH says it should.1236*/1237ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"1238" %x, len %x hdrq+%x rhf: %Lx\n",1239etail, tlen, l, (unsigned long long)1240le64_to_cpu(*(__le64 *) rhf_addr));1241if (ipath_debug & __IPATH_ERRPKTDBG) {1242u32 j, *d, dw = rsize-2;1243if (rsize > (tlen>>2))1244dw = tlen>>2;1245d = (u32 *)hdr;1246printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",1247dw);1248for (j = 0; j < dw; j++)1249printk(KERN_DEBUG "%8x%s", d[j],1250(j%8) == 7 ? "\n" : " ");1251printk(KERN_DEBUG ".\n");1252}1253}1254l += rsize;1255if (l >= maxcnt)1256l = 0;1257rhf_addr = (__le32 *) pd->port_rcvhdrq +1258l + dd->ipath_rhf_offset;1259if (dd->ipath_flags & IPATH_NODMA_RTAIL) {1260u32 seq = ipath_hdrget_seq(rhf_addr);12611262if (++pd->port_seq_cnt > 13)1263pd->port_seq_cnt = 1;1264if (seq != pd->port_seq_cnt)1265last = 1;1266} else if (l == hdrqtail)1267last = 1;1268/*1269* update head regs on last packet, and every 16 packets.1270* Reduce bus traffic, while still trying to prevent1271* rcvhdrq overflows, for when the queue is nearly full1272*/1273if (last || !(i & 0xf)) {1274u64 lval = l;12751276/* request IBA6120 and 7220 interrupt only on last */1277if (last)1278lval |= dd->ipath_rhdrhead_intr_off;1279ipath_write_ureg(dd, ur_rcvhdrhead, lval,1280pd->port_port);1281if (updegr) {1282ipath_write_ureg(dd, ur_rcvegrindexhead,1283etail, pd->port_port);1284updegr = 0;1285}1286}1287}12881289if (!dd->ipath_rhdrhead_intr_off && !reloop &&1290!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {1291/* IBA6110 workaround; we can have a race clearing chip1292* interrupt with another interrupt about to be delivered,1293* and can clear it before it is delivered on the GPIO1294* workaround. By doing the extra check here for the1295* in-memory tail register updating while we were doing1296* earlier packets, we "almost" guarantee we have covered1297* that case.1298*/1299u32 hqtail = ipath_get_rcvhdrtail(pd);1300if (hqtail != hdrqtail) {1301hdrqtail = hqtail;1302reloop = 1; /* loop 1 extra time at most */1303goto reloop;1304}1305}13061307pkttot += i;13081309pd->port_head = l;13101311if (pkttot > ipath_stats.sps_maxpkts_call)1312ipath_stats.sps_maxpkts_call = pkttot;1313ipath_stats.sps_port0pkts += pkttot;1314ipath_stats.sps_avgpkts_call =1315ipath_stats.sps_port0pkts / ++totcalls;13161317bail:;1318}13191320/**1321* ipath_update_pio_bufs - update shadow copy of the PIO availability map1322* @dd: the infinipath device1323*1324* called whenever our local copy indicates we have run out of send buffers1325* NOTE: This can be called from interrupt context by some code1326* and from non-interrupt context by ipath_getpiobuf().1327*/13281329static void ipath_update_pio_bufs(struct ipath_devdata *dd)1330{1331unsigned long flags;1332int i;1333const unsigned piobregs = (unsigned)dd->ipath_pioavregs;13341335/* If the generation (check) bits have changed, then we update the1336* busy bit for the corresponding PIO buffer. This algorithm will1337* modify positions to the value they already have in some cases1338* (i.e., no change), but it's faster than changing only the bits1339* that have changed.1340*1341* We would like to do this atomicly, to avoid spinlocks in the1342* critical send path, but that's not really possible, given the1343* type of changes, and that this routine could be called on1344* multiple cpu's simultaneously, so we lock in this routine only,1345* to avoid conflicting updates; all we change is the shadow, and1346* it's a single 64 bit memory location, so by definition the update1347* is atomic in terms of what other cpu's can see in testing the1348* bits. The spin_lock overhead isn't too bad, since it only1349* happens when all buffers are in use, so only cpu overhead, not1350* latency or bandwidth is affected.1351*/1352if (!dd->ipath_pioavailregs_dma) {1353ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");1354return;1355}1356if (ipath_debug & __IPATH_VERBDBG) {1357/* only if packet debug and verbose */1358volatile __le64 *dma = dd->ipath_pioavailregs_dma;1359unsigned long *shadow = dd->ipath_pioavailshadow;13601361ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "1362"d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "1363"s3=%lx\n",1364(unsigned long long) le64_to_cpu(dma[0]),1365shadow[0],1366(unsigned long long) le64_to_cpu(dma[1]),1367shadow[1],1368(unsigned long long) le64_to_cpu(dma[2]),1369shadow[2],1370(unsigned long long) le64_to_cpu(dma[3]),1371shadow[3]);1372if (piobregs > 4)1373ipath_cdbg(1374PKT, "2nd group, dma4=%llx shad4=%lx, "1375"d5=%llx s5=%lx, d6=%llx s6=%lx, "1376"d7=%llx s7=%lx\n",1377(unsigned long long) le64_to_cpu(dma[4]),1378shadow[4],1379(unsigned long long) le64_to_cpu(dma[5]),1380shadow[5],1381(unsigned long long) le64_to_cpu(dma[6]),1382shadow[6],1383(unsigned long long) le64_to_cpu(dma[7]),1384shadow[7]);1385}1386spin_lock_irqsave(&ipath_pioavail_lock, flags);1387for (i = 0; i < piobregs; i++) {1388u64 pchbusy, pchg, piov, pnew;1389/*1390* Chip Errata: bug 6641; even and odd qwords>3 are swapped1391*/1392if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))1393piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);1394else1395piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);1396pchg = dd->ipath_pioavailkernel[i] &1397~(dd->ipath_pioavailshadow[i] ^ piov);1398pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;1399if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {1400pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;1401pnew |= piov & pchbusy;1402dd->ipath_pioavailshadow[i] = pnew;1403}1404}1405spin_unlock_irqrestore(&ipath_pioavail_lock, flags);1406}14071408/*1409* used to force update of pioavailshadow if we can't get a pio buffer.1410* Needed primarily due to exitting freeze mode after recovering1411* from errors. Done lazily, because it's safer (known to not1412* be writing pio buffers).1413*/1414static void ipath_reset_availshadow(struct ipath_devdata *dd)1415{1416int i, im;1417unsigned long flags;14181419spin_lock_irqsave(&ipath_pioavail_lock, flags);1420for (i = 0; i < dd->ipath_pioavregs; i++) {1421u64 val, oldval;1422/* deal with 6110 chip bug on high register #s */1423im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?1424i ^ 1 : i;1425val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);1426/*1427* busy out the buffers not in the kernel avail list,1428* without changing the generation bits.1429*/1430oldval = dd->ipath_pioavailshadow[i];1431dd->ipath_pioavailshadow[i] = val |1432((~dd->ipath_pioavailkernel[i] <<1433INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &14340xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */1435if (oldval != dd->ipath_pioavailshadow[i])1436ipath_dbg("shadow[%d] was %Lx, now %lx\n",1437i, (unsigned long long) oldval,1438dd->ipath_pioavailshadow[i]);1439}1440spin_unlock_irqrestore(&ipath_pioavail_lock, flags);1441}14421443/**1444* ipath_setrcvhdrsize - set the receive header size1445* @dd: the infinipath device1446* @rhdrsize: the receive header size1447*1448* called from user init code, and also layered driver init1449*/1450int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)1451{1452int ret = 0;14531454if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {1455if (dd->ipath_rcvhdrsize != rhdrsize) {1456dev_info(&dd->pcidev->dev,1457"Error: can't set protocol header "1458"size %u, already %u\n",1459rhdrsize, dd->ipath_rcvhdrsize);1460ret = -EAGAIN;1461} else1462ipath_cdbg(VERBOSE, "Reuse same protocol header "1463"size %u\n", dd->ipath_rcvhdrsize);1464} else if (rhdrsize > (dd->ipath_rcvhdrentsize -1465(sizeof(u64) / sizeof(u32)))) {1466ipath_dbg("Error: can't set protocol header size %u "1467"(> max %u)\n", rhdrsize,1468dd->ipath_rcvhdrentsize -1469(u32) (sizeof(u64) / sizeof(u32)));1470ret = -EOVERFLOW;1471} else {1472dd->ipath_flags |= IPATH_RCVHDRSZ_SET;1473dd->ipath_rcvhdrsize = rhdrsize;1474ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,1475dd->ipath_rcvhdrsize);1476ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",1477dd->ipath_rcvhdrsize);1478}1479return ret;1480}14811482/*1483* debugging code and stats updates if no pio buffers available.1484*/1485static noinline void no_pio_bufs(struct ipath_devdata *dd)1486{1487unsigned long *shadow = dd->ipath_pioavailshadow;1488__le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;14891490dd->ipath_upd_pio_shadow = 1;14911492/*1493* not atomic, but if we lose a stat count in a while, that's OK1494*/1495ipath_stats.sps_nopiobufs++;1496if (!(++dd->ipath_consec_nopiobuf % 100000)) {1497ipath_force_pio_avail_update(dd); /* at start */1498ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "1499"%llx %llx %llx %llx\n"1500"ipath shadow: %lx %lx %lx %lx\n",1501dd->ipath_consec_nopiobuf,1502(unsigned long)get_cycles(),1503(unsigned long long) le64_to_cpu(dma[0]),1504(unsigned long long) le64_to_cpu(dma[1]),1505(unsigned long long) le64_to_cpu(dma[2]),1506(unsigned long long) le64_to_cpu(dma[3]),1507shadow[0], shadow[1], shadow[2], shadow[3]);1508/*1509* 4 buffers per byte, 4 registers above, cover rest1510* below1511*/1512if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >1513(sizeof(shadow[0]) * 4 * 4))1514ipath_dbg("2nd group: dmacopy: "1515"%llx %llx %llx %llx\n"1516"ipath shadow: %lx %lx %lx %lx\n",1517(unsigned long long)le64_to_cpu(dma[4]),1518(unsigned long long)le64_to_cpu(dma[5]),1519(unsigned long long)le64_to_cpu(dma[6]),1520(unsigned long long)le64_to_cpu(dma[7]),1521shadow[4], shadow[5], shadow[6], shadow[7]);15221523/* at end, so update likely happened */1524ipath_reset_availshadow(dd);1525}1526}15271528/*1529* common code for normal driver pio buffer allocation, and reserved1530* allocation.1531*1532* do appropriate marking as busy, etc.1533* returns buffer number if one found (>=0), negative number is error.1534*/1535static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,1536u32 *pbufnum, u32 first, u32 last, u32 firsti)1537{1538int i, j, updated = 0;1539unsigned piobcnt;1540unsigned long flags;1541unsigned long *shadow = dd->ipath_pioavailshadow;1542u32 __iomem *buf;15431544piobcnt = last - first;1545if (dd->ipath_upd_pio_shadow) {1546/*1547* Minor optimization. If we had no buffers on last call,1548* start out by doing the update; continue and do scan even1549* if no buffers were updated, to be paranoid1550*/1551ipath_update_pio_bufs(dd);1552updated++;1553i = first;1554} else1555i = firsti;1556rescan:1557/*1558* while test_and_set_bit() is atomic, we do that and then the1559* change_bit(), and the pair is not. See if this is the cause1560* of the remaining armlaunch errors.1561*/1562spin_lock_irqsave(&ipath_pioavail_lock, flags);1563for (j = 0; j < piobcnt; j++, i++) {1564if (i >= last)1565i = first;1566if (__test_and_set_bit((2 * i) + 1, shadow))1567continue;1568/* flip generation bit */1569__change_bit(2 * i, shadow);1570break;1571}1572spin_unlock_irqrestore(&ipath_pioavail_lock, flags);15731574if (j == piobcnt) {1575if (!updated) {1576/*1577* first time through; shadow exhausted, but may be1578* buffers available, try an update and then rescan.1579*/1580ipath_update_pio_bufs(dd);1581updated++;1582i = first;1583goto rescan;1584} else if (updated == 1 && piobcnt <=1585((dd->ipath_sendctrl1586>> INFINIPATH_S_UPDTHRESH_SHIFT) &1587INFINIPATH_S_UPDTHRESH_MASK)) {1588/*1589* for chips supporting and using the update1590* threshold we need to force an update of the1591* in-memory copy if the count is less than the1592* thershold, then check one more time.1593*/1594ipath_force_pio_avail_update(dd);1595ipath_update_pio_bufs(dd);1596updated++;1597i = first;1598goto rescan;1599}16001601no_pio_bufs(dd);1602buf = NULL;1603} else {1604if (i < dd->ipath_piobcnt2k)1605buf = (u32 __iomem *) (dd->ipath_pio2kbase +1606i * dd->ipath_palign);1607else1608buf = (u32 __iomem *)1609(dd->ipath_pio4kbase +1610(i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);1611if (pbufnum)1612*pbufnum = i;1613}16141615return buf;1616}16171618/**1619* ipath_getpiobuf - find an available pio buffer1620* @dd: the infinipath device1621* @plen: the size of the PIO buffer needed in 32-bit words1622* @pbufnum: the buffer number is placed here1623*/1624u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)1625{1626u32 __iomem *buf;1627u32 pnum, nbufs;1628u32 first, lasti;16291630if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {1631first = dd->ipath_piobcnt2k;1632lasti = dd->ipath_lastpioindexl;1633} else {1634first = 0;1635lasti = dd->ipath_lastpioindex;1636}1637nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;1638buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);16391640if (buf) {1641/*1642* Set next starting place. It's just an optimization,1643* it doesn't matter who wins on this, so no locking1644*/1645if (plen + 1 >= IPATH_SMALLBUF_DWORDS)1646dd->ipath_lastpioindexl = pnum + 1;1647else1648dd->ipath_lastpioindex = pnum + 1;1649if (dd->ipath_upd_pio_shadow)1650dd->ipath_upd_pio_shadow = 0;1651if (dd->ipath_consec_nopiobuf)1652dd->ipath_consec_nopiobuf = 0;1653ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",1654pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);1655if (pbufnum)1656*pbufnum = pnum;16571658}1659return buf;1660}16611662/**1663* ipath_chg_pioavailkernel - change which send buffers are available for kernel1664* @dd: the infinipath device1665* @start: the starting send buffer number1666* @len: the number of send buffers1667* @avail: true if the buffers are available for kernel use, false otherwise1668*/1669void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,1670unsigned len, int avail)1671{1672unsigned long flags;1673unsigned end, cnt = 0;16741675/* There are two bits per send buffer (busy and generation) */1676start *= 2;1677end = start + len * 2;16781679spin_lock_irqsave(&ipath_pioavail_lock, flags);1680/* Set or clear the busy bit in the shadow. */1681while (start < end) {1682if (avail) {1683unsigned long dma;1684int i, im;1685/*1686* the BUSY bit will never be set, because we disarm1687* the user buffers before we hand them back to the1688* kernel. We do have to make sure the generation1689* bit is set correctly in shadow, since it could1690* have changed many times while allocated to user.1691* We can't use the bitmap functions on the full1692* dma array because it is always little-endian, so1693* we have to flip to host-order first.1694* BITS_PER_LONG is slightly wrong, since it's1695* always 64 bits per register in chip...1696* We only work on 64 bit kernels, so that's OK.1697*/1698/* deal with 6110 chip bug on high register #s */1699i = start / BITS_PER_LONG;1700im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?1701i ^ 1 : i;1702__clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT1703+ start, dd->ipath_pioavailshadow);1704dma = (unsigned long) le64_to_cpu(1705dd->ipath_pioavailregs_dma[im]);1706if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT1707+ start) % BITS_PER_LONG, &dma))1708__set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT1709+ start, dd->ipath_pioavailshadow);1710else1711__clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT1712+ start, dd->ipath_pioavailshadow);1713__set_bit(start, dd->ipath_pioavailkernel);1714} else {1715__set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,1716dd->ipath_pioavailshadow);1717__clear_bit(start, dd->ipath_pioavailkernel);1718}1719start += 2;1720}17211722if (dd->ipath_pioupd_thresh) {1723end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);1724cnt = bitmap_weight(dd->ipath_pioavailkernel, end);1725}1726spin_unlock_irqrestore(&ipath_pioavail_lock, flags);17271728/*1729* When moving buffers from kernel to user, if number assigned to1730* the user is less than the pio update threshold, and threshold1731* is supported (cnt was computed > 0), drop the update threshold1732* so we update at least once per allocated number of buffers.1733* In any case, if the kernel buffers are less than the threshold,1734* drop the threshold. We don't bother increasing it, having once1735* decreased it, since it would typically just cycle back and forth.1736* If we don't decrease below buffers in use, we can wait a long1737* time for an update, until some other context uses PIO buffers.1738*/1739if (!avail && len < cnt)1740cnt = len;1741if (cnt < dd->ipath_pioupd_thresh) {1742dd->ipath_pioupd_thresh = cnt;1743ipath_dbg("Decreased pio update threshold to %u\n",1744dd->ipath_pioupd_thresh);1745spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);1746dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK1747<< INFINIPATH_S_UPDTHRESH_SHIFT);1748dd->ipath_sendctrl |= dd->ipath_pioupd_thresh1749<< INFINIPATH_S_UPDTHRESH_SHIFT;1750ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,1751dd->ipath_sendctrl);1752spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);1753}1754}17551756/**1757* ipath_create_rcvhdrq - create a receive header queue1758* @dd: the infinipath device1759* @pd: the port data1760*1761* this must be contiguous memory (from an i/o perspective), and must be1762* DMA'able (which means for some systems, it will go through an IOMMU,1763* or be forced into a low address range).1764*/1765int ipath_create_rcvhdrq(struct ipath_devdata *dd,1766struct ipath_portdata *pd)1767{1768int ret = 0;17691770if (!pd->port_rcvhdrq) {1771dma_addr_t phys_hdrqtail;1772gfp_t gfp_flags = GFP_USER | __GFP_COMP;1773int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *1774sizeof(u32), PAGE_SIZE);17751776pd->port_rcvhdrq = dma_alloc_coherent(1777&dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,1778gfp_flags);17791780if (!pd->port_rcvhdrq) {1781ipath_dev_err(dd, "attempt to allocate %d bytes "1782"for port %u rcvhdrq failed\n",1783amt, pd->port_port);1784ret = -ENOMEM;1785goto bail;1786}17871788if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {1789pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(1790&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,1791GFP_KERNEL);1792if (!pd->port_rcvhdrtail_kvaddr) {1793ipath_dev_err(dd, "attempt to allocate 1 page "1794"for port %u rcvhdrqtailaddr "1795"failed\n", pd->port_port);1796ret = -ENOMEM;1797dma_free_coherent(&dd->pcidev->dev, amt,1798pd->port_rcvhdrq,1799pd->port_rcvhdrq_phys);1800pd->port_rcvhdrq = NULL;1801goto bail;1802}1803pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;1804ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "1805"physical\n", pd->port_port,1806(unsigned long long) phys_hdrqtail);1807}18081809pd->port_rcvhdrq_size = amt;18101811ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "1812"for port %u rcvhdr Q\n",1813amt >> PAGE_SHIFT, pd->port_rcvhdrq,1814(unsigned long) pd->port_rcvhdrq_phys,1815(unsigned long) pd->port_rcvhdrq_size,1816pd->port_port);1817}1818else1819ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "1820"hdrtailaddr@%p %llx physical\n",1821pd->port_port, pd->port_rcvhdrq,1822(unsigned long long) pd->port_rcvhdrq_phys,1823pd->port_rcvhdrtail_kvaddr, (unsigned long long)1824pd->port_rcvhdrqtailaddr_phys);18251826/* clear for security and sanity on each use */1827memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);1828if (pd->port_rcvhdrtail_kvaddr)1829memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);18301831/*1832* tell chip each time we init it, even if we are re-using previous1833* memory (we zero the register at process close)1834*/1835ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,1836pd->port_port, pd->port_rcvhdrqtailaddr_phys);1837ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,1838pd->port_port, pd->port_rcvhdrq_phys);18391840bail:1841return ret;1842}184318441845/*1846* Flush all sends that might be in the ready to send state, as well as any1847* that are in the process of being sent. Used whenever we need to be1848* sure the send side is idle. Cleans up all buffer state by canceling1849* all pio buffers, and issuing an abort, which cleans up anything in the1850* launch fifo. The cancel is superfluous on some chip versions, but1851* it's safer to always do it.1852* PIOAvail bits are updated by the chip as if normal send had happened.1853*/1854void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)1855{1856unsigned long flags;18571858if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {1859ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");1860goto bail;1861}1862/*1863* If we have SDMA, and it's not disabled, we have to kick off the1864* abort state machine, provided we aren't already aborting.1865* If we are in the process of aborting SDMA (!DISABLED, but ABORTING),1866* we skip the rest of this routine. It is already "in progress"1867*/1868if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {1869int skip_cancel;1870unsigned long *statp = &dd->ipath_sdma_status;18711872spin_lock_irqsave(&dd->ipath_sdma_lock, flags);1873skip_cancel =1874test_and_set_bit(IPATH_SDMA_ABORTING, statp)1875&& !test_bit(IPATH_SDMA_DISABLED, statp);1876spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);1877if (skip_cancel)1878goto bail;1879}18801881ipath_dbg("Cancelling all in-progress send buffers\n");18821883/* skip armlaunch errs for a while */1884dd->ipath_lastcancel = jiffies + HZ / 2;18851886/*1887* The abort bit is auto-clearing. We also don't want pioavail1888* update happening during this, and we don't want any other1889* sends going out, so turn those off for the duration. We read1890* the scratch register to be sure that cancels and the abort1891* have taken effect in the chip. Otherwise two parts are same1892* as ipath_force_pio_avail_update()1893*/1894spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);1895dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD1896| INFINIPATH_S_PIOENABLE);1897ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,1898dd->ipath_sendctrl | INFINIPATH_S_ABORT);1899ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);1900spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);19011902/* disarm all send buffers */1903ipath_disarm_piobufs(dd, 0,1904dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);19051906if (dd->ipath_flags & IPATH_HAS_SEND_DMA)1907set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);19081909if (restore_sendctrl) {1910/* else done by caller later if needed */1911spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);1912dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |1913INFINIPATH_S_PIOENABLE;1914ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,1915dd->ipath_sendctrl);1916/* and again, be sure all have hit the chip */1917ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);1918spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);1919}19201921if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&1922!test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&1923test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {1924spin_lock_irqsave(&dd->ipath_sdma_lock, flags);1925/* only wait so long for intr */1926dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;1927dd->ipath_sdma_reset_wait = 200;1928if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))1929tasklet_hi_schedule(&dd->ipath_sdma_abort_task);1930spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);1931}1932bail:;1933}19341935/*1936* Force an update of in-memory copy of the pioavail registers, when1937* needed for any of a variety of reasons. We read the scratch register1938* to make it highly likely that the update will have happened by the1939* time we return. If already off (as in cancel_sends above), this1940* routine is a nop, on the assumption that the caller will "do the1941* right thing".1942*/1943void ipath_force_pio_avail_update(struct ipath_devdata *dd)1944{1945unsigned long flags;19461947spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);1948if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {1949ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,1950dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);1951ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);1952ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,1953dd->ipath_sendctrl);1954ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);1955}1956spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);1957}19581959static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,1960int linitcmd)1961{1962u64 mod_wd;1963static const char *what[4] = {1964[0] = "NOP",1965[INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",1966[INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",1967[INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"1968};19691970if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {1971/*1972* If we are told to disable, note that so link-recovery1973* code does not attempt to bring us back up.1974*/1975preempt_disable();1976dd->ipath_flags |= IPATH_IB_LINK_DISABLED;1977preempt_enable();1978} else if (linitcmd) {1979/*1980* Any other linkinitcmd will lead to LINKDOWN and then1981* to INIT (if all is well), so clear flag to let1982* link-recovery code attempt to bring us back up.1983*/1984preempt_disable();1985dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;1986preempt_enable();1987}19881989mod_wd = (linkcmd << dd->ibcc_lc_shift) |1990(linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);1991ipath_cdbg(VERBOSE,1992"Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",1993dd->ipath_unit, what[linkcmd], linitcmd,1994ipath_ibcstatus_str[ipath_ib_linktrstate(dd,1995ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);19961997ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,1998dd->ipath_ibcctrl | mod_wd);1999/* read from chip so write is flushed */2000(void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);2001}20022003int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)2004{2005u32 lstate;2006int ret;20072008switch (newstate) {2009case IPATH_IB_LINKDOWN_ONLY:2010ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);2011/* don't wait */2012ret = 0;2013goto bail;20142015case IPATH_IB_LINKDOWN:2016ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,2017INFINIPATH_IBCC_LINKINITCMD_POLL);2018/* don't wait */2019ret = 0;2020goto bail;20212022case IPATH_IB_LINKDOWN_SLEEP:2023ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,2024INFINIPATH_IBCC_LINKINITCMD_SLEEP);2025/* don't wait */2026ret = 0;2027goto bail;20282029case IPATH_IB_LINKDOWN_DISABLE:2030ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,2031INFINIPATH_IBCC_LINKINITCMD_DISABLE);2032/* don't wait */2033ret = 0;2034goto bail;20352036case IPATH_IB_LINKARM:2037if (dd->ipath_flags & IPATH_LINKARMED) {2038ret = 0;2039goto bail;2040}2041if (!(dd->ipath_flags &2042(IPATH_LINKINIT | IPATH_LINKACTIVE))) {2043ret = -EINVAL;2044goto bail;2045}2046ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);20472048/*2049* Since the port can transition to ACTIVE by receiving2050* a non VL 15 packet, wait for either state.2051*/2052lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;2053break;20542055case IPATH_IB_LINKACTIVE:2056if (dd->ipath_flags & IPATH_LINKACTIVE) {2057ret = 0;2058goto bail;2059}2060if (!(dd->ipath_flags & IPATH_LINKARMED)) {2061ret = -EINVAL;2062goto bail;2063}2064ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);2065lstate = IPATH_LINKACTIVE;2066break;20672068case IPATH_IB_LINK_LOOPBACK:2069dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");2070dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;2071ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,2072dd->ipath_ibcctrl);20732074/* turn heartbeat off, as it causes loopback to fail */2075dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,2076IPATH_IB_HRTBT_OFF);2077/* don't wait */2078ret = 0;2079goto bail;20802081case IPATH_IB_LINK_EXTERNAL:2082dev_info(&dd->pcidev->dev,2083"Disabling IB local loopback (normal)\n");2084dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,2085IPATH_IB_HRTBT_ON);2086dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;2087ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,2088dd->ipath_ibcctrl);2089/* don't wait */2090ret = 0;2091goto bail;20922093/*2094* Heartbeat can be explicitly enabled by the user via2095* "hrtbt_enable" "file", and if disabled, trying to enable here2096* will have no effect. Implicit changes (heartbeat off when2097* loopback on, and vice versa) are included to ease testing.2098*/2099case IPATH_IB_LINK_HRTBT:2100ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,2101IPATH_IB_HRTBT_ON);2102goto bail;21032104case IPATH_IB_LINK_NO_HRTBT:2105ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,2106IPATH_IB_HRTBT_OFF);2107goto bail;21082109default:2110ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);2111ret = -EINVAL;2112goto bail;2113}2114ret = ipath_wait_linkstate(dd, lstate, 2000);21152116bail:2117return ret;2118}21192120/**2121* ipath_set_mtu - set the MTU2122* @dd: the infinipath device2123* @arg: the new MTU2124*2125* we can handle "any" incoming size, the issue here is whether we2126* need to restrict our outgoing size. For now, we don't do any2127* sanity checking on this, and we don't deal with what happens to2128* programs that are already running when the size changes.2129* NOTE: changing the MTU will usually cause the IBC to go back to2130* link INIT state...2131*/2132int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)2133{2134u32 piosize;2135int changed = 0;2136int ret;21372138/*2139* mtu is IB data payload max. It's the largest power of 2 less2140* than piosize (or even larger, since it only really controls the2141* largest we can receive; we can send the max of the mtu and2142* piosize). We check that it's one of the valid IB sizes.2143*/2144if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&2145(arg != 4096 || !ipath_mtu4096)) {2146ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);2147ret = -EINVAL;2148goto bail;2149}2150if (dd->ipath_ibmtu == arg) {2151ret = 0; /* same as current */2152goto bail;2153}21542155piosize = dd->ipath_ibmaxlen;2156dd->ipath_ibmtu = arg;21572158if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {2159/* Only if it's not the initial value (or reset to it) */2160if (piosize != dd->ipath_init_ibmaxlen) {2161if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)2162piosize = dd->ipath_init_ibmaxlen;2163dd->ipath_ibmaxlen = piosize;2164changed = 1;2165}2166} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {2167piosize = arg + IPATH_PIO_MAXIBHDR;2168ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "2169"(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,2170arg);2171dd->ipath_ibmaxlen = piosize;2172changed = 1;2173}21742175if (changed) {2176u64 ibc = dd->ipath_ibcctrl, ibdw;2177/*2178* update our housekeeping variables, and set IBC max2179* size, same as init code; max IBC is max we allow in2180* buffer, less the qword pbc, plus 1 for ICRC, in dwords2181*/2182dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);2183ibdw = (dd->ipath_ibmaxlen >> 2) + 1;2184ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<2185dd->ibcc_mpl_shift);2186ibc |= ibdw << dd->ibcc_mpl_shift;2187dd->ipath_ibcctrl = ibc;2188ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,2189dd->ipath_ibcctrl);2190dd->ipath_f_tidtemplate(dd);2191}21922193ret = 0;21942195bail:2196return ret;2197}21982199int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)2200{2201dd->ipath_lid = lid;2202dd->ipath_lmc = lmc;22032204dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |2205(~((1U << lmc) - 1)) << 16);22062207dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);22082209return 0;2210}221122122213/**2214* ipath_write_kreg_port - write a device's per-port 64-bit kernel register2215* @dd: the infinipath device2216* @regno: the register number to write2217* @port: the port containing the register2218* @value: the value to write2219*2220* Registers that vary with the chip implementation constants (port)2221* use this routine.2222*/2223void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,2224unsigned port, u64 value)2225{2226u16 where;22272228if (port < dd->ipath_portcnt &&2229(regno == dd->ipath_kregs->kr_rcvhdraddr ||2230regno == dd->ipath_kregs->kr_rcvhdrtailaddr))2231where = regno + port;2232else2233where = -1;22342235ipath_write_kreg(dd, where, value);2236}22372238/*2239* Following deal with the "obviously simple" task of overriding the state2240* of the LEDS, which normally indicate link physical and logical status.2241* The complications arise in dealing with different hardware mappings2242* and the board-dependent routine being called from interrupts.2243* and then there's the requirement to _flash_ them.2244*/2245#define LED_OVER_FREQ_SHIFT 82246#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)2247/* Below is "non-zero" to force override, but both actual LEDs are off */2248#define LED_OVER_BOTH_OFF (8)22492250static void ipath_run_led_override(unsigned long opaque)2251{2252struct ipath_devdata *dd = (struct ipath_devdata *)opaque;2253int timeoff;2254int pidx;2255u64 lstate, ltstate, val;22562257if (!(dd->ipath_flags & IPATH_INITTED))2258return;22592260pidx = dd->ipath_led_override_phase++ & 1;2261dd->ipath_led_override = dd->ipath_led_override_vals[pidx];2262timeoff = dd->ipath_led_override_timeoff;22632264/*2265* below potentially restores the LED values per current status,2266* should also possibly setup the traffic-blink register,2267* but leave that to per-chip functions.2268*/2269val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);2270ltstate = ipath_ib_linktrstate(dd, val);2271lstate = ipath_ib_linkstate(dd, val);22722273dd->ipath_f_setextled(dd, lstate, ltstate);2274mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);2275}22762277void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)2278{2279int timeoff, freq;22802281if (!(dd->ipath_flags & IPATH_INITTED))2282return;22832284/* First check if we are blinking. If not, use 1HZ polling */2285timeoff = HZ;2286freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;22872288if (freq) {2289/* For blink, set each phase from one nybble of val */2290dd->ipath_led_override_vals[0] = val & 0xF;2291dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;2292timeoff = (HZ << 4)/freq;2293} else {2294/* Non-blink set both phases the same. */2295dd->ipath_led_override_vals[0] = val & 0xF;2296dd->ipath_led_override_vals[1] = val & 0xF;2297}2298dd->ipath_led_override_timeoff = timeoff;22992300/*2301* If the timer has not already been started, do so. Use a "quick"2302* timeout so the function will be called soon, to look at our request.2303*/2304if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {2305/* Need to start timer */2306init_timer(&dd->ipath_led_override_timer);2307dd->ipath_led_override_timer.function =2308ipath_run_led_override;2309dd->ipath_led_override_timer.data = (unsigned long) dd;2310dd->ipath_led_override_timer.expires = jiffies + 1;2311add_timer(&dd->ipath_led_override_timer);2312} else2313atomic_dec(&dd->ipath_led_override_timer_active);2314}23152316/**2317* ipath_shutdown_device - shut down a device2318* @dd: the infinipath device2319*2320* This is called to make the device quiet when we are about to2321* unload the driver, and also when the device is administratively2322* disabled. It does not free any data structures.2323* Everything it does has to be setup again by ipath_init_chip(dd,1)2324*/2325void ipath_shutdown_device(struct ipath_devdata *dd)2326{2327unsigned long flags;23282329ipath_dbg("Shutting down the device\n");23302331ipath_hol_up(dd); /* make sure user processes aren't suspended */23322333dd->ipath_flags |= IPATH_LINKUNK;2334dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |2335IPATH_LINKINIT | IPATH_LINKARMED |2336IPATH_LINKACTIVE);2337*dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |2338IPATH_STATUS_IB_READY);23392340/* mask interrupts, but not errors */2341ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);23422343dd->ipath_rcvctrl = 0;2344ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,2345dd->ipath_rcvctrl);23462347if (dd->ipath_flags & IPATH_HAS_SEND_DMA)2348teardown_sdma(dd);23492350/*2351* gracefully stop all sends allowing any in progress to trickle out2352* first.2353*/2354spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);2355dd->ipath_sendctrl = 0;2356ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);2357/* flush it */2358ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);2359spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);23602361/*2362* enough for anything that's going to trickle out to have actually2363* done so.2364*/2365udelay(5);23662367dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */23682369ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);2370ipath_cancel_sends(dd, 0);23712372/*2373* we are shutting down, so tell components that care. We don't do2374* this on just a link state change, much like ethernet, a cable2375* unplug, etc. doesn't change driver state2376*/2377signal_ib_event(dd, IB_EVENT_PORT_ERR);23782379/* disable IBC */2380dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;2381ipath_write_kreg(dd, dd->ipath_kregs->kr_control,2382dd->ipath_control | INFINIPATH_C_FREEZEMODE);23832384/*2385* clear SerdesEnable and turn the leds off; do this here because2386* we are unloading, so don't count on interrupts to move along2387* Turn the LEDs off explicitly for the same reason.2388*/2389dd->ipath_f_quiet_serdes(dd);23902391/* stop all the timers that might still be running */2392del_timer_sync(&dd->ipath_hol_timer);2393if (dd->ipath_stats_timer_active) {2394del_timer_sync(&dd->ipath_stats_timer);2395dd->ipath_stats_timer_active = 0;2396}2397if (dd->ipath_intrchk_timer.data) {2398del_timer_sync(&dd->ipath_intrchk_timer);2399dd->ipath_intrchk_timer.data = 0;2400}2401if (atomic_read(&dd->ipath_led_override_timer_active)) {2402del_timer_sync(&dd->ipath_led_override_timer);2403atomic_set(&dd->ipath_led_override_timer_active, 0);2404}24052406/*2407* clear all interrupts and errors, so that the next time the driver2408* is loaded or device is enabled, we know that whatever is set2409* happened while we were unloaded2410*/2411ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,2412~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);2413ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);2414ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);24152416ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");2417ipath_update_eeprom_log(dd);2418}24192420/**2421* ipath_free_pddata - free a port's allocated data2422* @dd: the infinipath device2423* @pd: the portdata structure2424*2425* free up any allocated data for a port2426* This should not touch anything that would affect a simultaneous2427* re-allocation of port data, because it is called after ipath_mutex2428* is released (and can be called from reinit as well).2429* It should never change any chip state, or global driver state.2430* (The only exception to global state is freeing the port0 port0_skbs.)2431*/2432void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)2433{2434if (!pd)2435return;24362437if (pd->port_rcvhdrq) {2438ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "2439"(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,2440(unsigned long) pd->port_rcvhdrq_size);2441dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,2442pd->port_rcvhdrq, pd->port_rcvhdrq_phys);2443pd->port_rcvhdrq = NULL;2444if (pd->port_rcvhdrtail_kvaddr) {2445dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,2446pd->port_rcvhdrtail_kvaddr,2447pd->port_rcvhdrqtailaddr_phys);2448pd->port_rcvhdrtail_kvaddr = NULL;2449}2450}2451if (pd->port_port && pd->port_rcvegrbuf) {2452unsigned e;24532454for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {2455void *base = pd->port_rcvegrbuf[e];2456size_t size = pd->port_rcvegrbuf_size;24572458ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "2459"chunk %u/%u\n", base,2460(unsigned long) size,2461e, pd->port_rcvegrbuf_chunks);2462dma_free_coherent(&dd->pcidev->dev, size,2463base, pd->port_rcvegrbuf_phys[e]);2464}2465kfree(pd->port_rcvegrbuf);2466pd->port_rcvegrbuf = NULL;2467kfree(pd->port_rcvegrbuf_phys);2468pd->port_rcvegrbuf_phys = NULL;2469pd->port_rcvegrbuf_chunks = 0;2470} else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {2471unsigned e;2472struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;24732474dd->ipath_port0_skbinfo = NULL;2475ipath_cdbg(VERBOSE, "free closed port %d "2476"ipath_port0_skbinfo @ %p\n", pd->port_port,2477skbinfo);2478for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)2479if (skbinfo[e].skb) {2480pci_unmap_single(dd->pcidev, skbinfo[e].phys,2481dd->ipath_ibmaxlen,2482PCI_DMA_FROMDEVICE);2483dev_kfree_skb(skbinfo[e].skb);2484}2485vfree(skbinfo);2486}2487kfree(pd->port_tid_pg_list);2488vfree(pd->subport_uregbase);2489vfree(pd->subport_rcvegrbuf);2490vfree(pd->subport_rcvhdr_base);2491kfree(pd);2492}24932494static int __init infinipath_init(void)2495{2496int ret;24972498if (ipath_debug & __IPATH_DBG)2499printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);25002501/*2502* These must be called before the driver is registered with2503* the PCI subsystem.2504*/2505idr_init(&unit_table);2506if (!idr_pre_get(&unit_table, GFP_KERNEL)) {2507printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n");2508ret = -ENOMEM;2509goto bail;2510}25112512ret = pci_register_driver(&ipath_driver);2513if (ret < 0) {2514printk(KERN_ERR IPATH_DRV_NAME2515": Unable to register driver: error %d\n", -ret);2516goto bail_unit;2517}25182519ret = ipath_init_ipathfs();2520if (ret < 0) {2521printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "2522"ipathfs: error %d\n", -ret);2523goto bail_pci;2524}25252526goto bail;25272528bail_pci:2529pci_unregister_driver(&ipath_driver);25302531bail_unit:2532idr_destroy(&unit_table);25332534bail:2535return ret;2536}25372538static void __exit infinipath_cleanup(void)2539{2540ipath_exit_ipathfs();25412542ipath_cdbg(VERBOSE, "Unregistering pci driver\n");2543pci_unregister_driver(&ipath_driver);25442545idr_destroy(&unit_table);2546}25472548/**2549* ipath_reset_device - reset the chip if possible2550* @unit: the device to reset2551*2552* Whether or not reset is successful, we attempt to re-initialize the chip2553* (that is, much like a driver unload/reload). We clear the INITTED flag2554* so that the various entry points will fail until we reinitialize. For2555* now, we only allow this if no user ports are open that use chip resources2556*/2557int ipath_reset_device(int unit)2558{2559int ret, i;2560struct ipath_devdata *dd = ipath_lookup(unit);2561unsigned long flags;25622563if (!dd) {2564ret = -ENODEV;2565goto bail;2566}25672568if (atomic_read(&dd->ipath_led_override_timer_active)) {2569/* Need to stop LED timer, _then_ shut off LEDs */2570del_timer_sync(&dd->ipath_led_override_timer);2571atomic_set(&dd->ipath_led_override_timer_active, 0);2572}25732574/* Shut off LEDs after we are sure timer is not running */2575dd->ipath_led_override = LED_OVER_BOTH_OFF;2576dd->ipath_f_setextled(dd, 0, 0);25772578dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);25792580if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {2581dev_info(&dd->pcidev->dev, "Invalid unit number %u or "2582"not initialized or not present\n", unit);2583ret = -ENXIO;2584goto bail;2585}25862587spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);2588if (dd->ipath_pd)2589for (i = 1; i < dd->ipath_cfgports; i++) {2590if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)2591continue;2592spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);2593ipath_dbg("unit %u port %d is in use "2594"(PID %u cmd %s), can't reset\n",2595unit, i,2596pid_nr(dd->ipath_pd[i]->port_pid),2597dd->ipath_pd[i]->port_comm);2598ret = -EBUSY;2599goto bail;2600}2601spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);26022603if (dd->ipath_flags & IPATH_HAS_SEND_DMA)2604teardown_sdma(dd);26052606dd->ipath_flags &= ~IPATH_INITTED;2607ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);2608ret = dd->ipath_f_reset(dd);2609if (ret == 1) {2610ipath_dbg("Reinitializing unit %u after reset attempt\n",2611unit);2612ret = ipath_init_chip(dd, 1);2613} else2614ret = -EAGAIN;2615if (ret)2616ipath_dev_err(dd, "Reinitialize unit %u after "2617"reset failed with %d\n", unit, ret);2618else2619dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "2620"resetting\n", unit);26212622bail:2623return ret;2624}26252626/*2627* send a signal to all the processes that have the driver open2628* through the normal interfaces (i.e., everything other than diags2629* interface). Returns number of signalled processes.2630*/2631static int ipath_signal_procs(struct ipath_devdata *dd, int sig)2632{2633int i, sub, any = 0;2634struct pid *pid;2635unsigned long flags;26362637if (!dd->ipath_pd)2638return 0;26392640spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);2641for (i = 1; i < dd->ipath_cfgports; i++) {2642if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)2643continue;2644pid = dd->ipath_pd[i]->port_pid;2645if (!pid)2646continue;26472648dev_info(&dd->pcidev->dev, "context %d in use "2649"(PID %u), sending signal %d\n",2650i, pid_nr(pid), sig);2651kill_pid(pid, sig, 1);2652any++;2653for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {2654pid = dd->ipath_pd[i]->port_subpid[sub];2655if (!pid)2656continue;2657dev_info(&dd->pcidev->dev, "sub-context "2658"%d:%d in use (PID %u), sending "2659"signal %d\n", i, sub, pid_nr(pid), sig);2660kill_pid(pid, sig, 1);2661any++;2662}2663}2664spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);2665return any;2666}26672668static void ipath_hol_signal_down(struct ipath_devdata *dd)2669{2670if (ipath_signal_procs(dd, SIGSTOP))2671ipath_dbg("Stopped some processes\n");2672ipath_cancel_sends(dd, 1);2673}267426752676static void ipath_hol_signal_up(struct ipath_devdata *dd)2677{2678if (ipath_signal_procs(dd, SIGCONT))2679ipath_dbg("Continued some processes\n");2680}26812682/*2683* link is down, stop any users processes, and flush pending sends2684* to prevent HoL blocking, then start the HoL timer that2685* periodically continues, then stop procs, so they can detect2686* link down if they want, and do something about it.2687* Timer may already be running, so use mod_timer, not add_timer.2688*/2689void ipath_hol_down(struct ipath_devdata *dd)2690{2691dd->ipath_hol_state = IPATH_HOL_DOWN;2692ipath_hol_signal_down(dd);2693dd->ipath_hol_next = IPATH_HOL_DOWNCONT;2694dd->ipath_hol_timer.expires = jiffies +2695msecs_to_jiffies(ipath_hol_timeout_ms);2696mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);2697}26982699/*2700* link is up, continue any user processes, and ensure timer2701* is a nop, if running. Let timer keep running, if set; it2702* will nop when it sees the link is up2703*/2704void ipath_hol_up(struct ipath_devdata *dd)2705{2706ipath_hol_signal_up(dd);2707dd->ipath_hol_state = IPATH_HOL_UP;2708}27092710/*2711* toggle the running/not running state of user proceses2712* to prevent HoL blocking on chip resources, but still allow2713* user processes to do link down special case handling.2714* Should only be called via the timer2715*/2716void ipath_hol_event(unsigned long opaque)2717{2718struct ipath_devdata *dd = (struct ipath_devdata *)opaque;27192720if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP2721&& dd->ipath_hol_state != IPATH_HOL_UP) {2722dd->ipath_hol_next = IPATH_HOL_DOWNCONT;2723ipath_dbg("Stopping processes\n");2724ipath_hol_signal_down(dd);2725} else { /* may do "extra" if also in ipath_hol_up() */2726dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;2727ipath_dbg("Continuing processes\n");2728ipath_hol_signal_up(dd);2729}2730if (dd->ipath_hol_state == IPATH_HOL_UP)2731ipath_dbg("link's up, don't resched timer\n");2732else {2733dd->ipath_hol_timer.expires = jiffies +2734msecs_to_jiffies(ipath_hol_timeout_ms);2735mod_timer(&dd->ipath_hol_timer,2736dd->ipath_hol_timer.expires);2737}2738}27392740int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)2741{2742u64 val;27432744if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)2745return -1;2746if (dd->ipath_rx_pol_inv != new_pol_inv) {2747dd->ipath_rx_pol_inv = new_pol_inv;2748val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);2749val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<2750INFINIPATH_XGXS_RX_POL_SHIFT);2751val |= ((u64)dd->ipath_rx_pol_inv) <<2752INFINIPATH_XGXS_RX_POL_SHIFT;2753ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);2754}2755return 0;2756}27572758/*2759* Disable and enable the armlaunch error. Used for PIO bandwidth testing on2760* the 7220, which is count-based, rather than trigger-based. Safe for the2761* driver check, since it's at init. Not completely safe when used for2762* user-mode checking, since some error checking can be lost, but not2763* particularly risky, and only has problematic side-effects in the face of2764* very buggy user code. There is no reference counting, but that's also2765* fine, given the intended use.2766*/2767void ipath_enable_armlaunch(struct ipath_devdata *dd)2768{2769dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;2770ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,2771INFINIPATH_E_SPIOARMLAUNCH);2772dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;2773ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,2774dd->ipath_errormask);2775}27762777void ipath_disable_armlaunch(struct ipath_devdata *dd)2778{2779/* so don't re-enable if already set */2780dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;2781dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;2782ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,2783dd->ipath_errormask);2784}27852786module_init(infinipath_init);2787module_exit(infinipath_cleanup);278827892790