Path: blob/master/arch/powerpc/sysdev/dart_iommu.c
10817 views
/*1* arch/powerpc/sysdev/dart_iommu.c2*3* Copyright (C) 2004 Olof Johansson <[email protected]>, IBM Corporation4* Copyright (C) 2005 Benjamin Herrenschmidt <[email protected]>,5* IBM Corporation6*7* Based on pSeries_iommu.c:8* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation9* Copyright (C) 2004 Olof Johansson <[email protected]>, IBM Corporation10*11* Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu.12*13*14* This program is free software; you can redistribute it and/or modify15* it under the terms of the GNU General Public License as published by16* the Free Software Foundation; either version 2 of the License, or17* (at your option) any later version.18*19* This program is distributed in the hope that it will be useful,20* but WITHOUT ANY WARRANTY; without even the implied warranty of21* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the22* GNU General Public License for more details.23*24* You should have received a copy of the GNU General Public License25* along with this program; if not, write to the Free Software26* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA27*/2829#include <linux/init.h>30#include <linux/types.h>31#include <linux/mm.h>32#include <linux/spinlock.h>33#include <linux/string.h>34#include <linux/pci.h>35#include <linux/dma-mapping.h>36#include <linux/vmalloc.h>37#include <linux/suspend.h>38#include <linux/memblock.h>39#include <linux/gfp.h>40#include <asm/io.h>41#include <asm/prom.h>42#include <asm/iommu.h>43#include <asm/pci-bridge.h>44#include <asm/machdep.h>45#include <asm/abs_addr.h>46#include <asm/cacheflush.h>47#include <asm/ppc-pci.h>4849#include "dart.h"5051/* Physical base address and size of the DART table */52unsigned long dart_tablebase; /* exported to htab_initialize */53static unsigned long dart_tablesize;5455/* Virtual base address of the DART table */56static u32 *dart_vbase;57#ifdef CONFIG_PM58static u32 *dart_copy;59#endif6061/* Mapped base address for the dart */62static unsigned int __iomem *dart;6364/* Dummy val that entries are set to when unused */65static unsigned int dart_emptyval;6667static struct iommu_table iommu_table_dart;68static int iommu_table_dart_inited;69static int dart_dirty;70static int dart_is_u4;7172#define DART_U4_BYPASS_BASE 0x8000000000ull7374#define DBG(...)7576static inline void dart_tlb_invalidate_all(void)77{78unsigned long l = 0;79unsigned int reg, inv_bit;80unsigned long limit;8182DBG("dart: flush\n");8384/* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the85* control register and wait for it to clear.86*87* Gotcha: Sometimes, the DART won't detect that the bit gets88* set. If so, clear it and set it again.89*/9091limit = 0;9293inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB;94retry:95l = 0;96reg = DART_IN(DART_CNTL);97reg |= inv_bit;98DART_OUT(DART_CNTL, reg);99100while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit))101l++;102if (l == (1L << limit)) {103if (limit < 4) {104limit++;105reg = DART_IN(DART_CNTL);106reg &= ~inv_bit;107DART_OUT(DART_CNTL, reg);108goto retry;109} else110panic("DART: TLB did not flush after waiting a long "111"time. Buggy U3 ?");112}113}114115static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)116{117unsigned int reg;118unsigned int l, limit;119120reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE |121(bus_rpn & DART_CNTL_U4_IONE_MASK);122DART_OUT(DART_CNTL, reg);123124limit = 0;125wait_more:126l = 0;127while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) {128rmb();129l++;130}131132if (l == (1L << limit)) {133if (limit < 4) {134limit++;135goto wait_more;136} else137panic("DART: TLB did not flush after waiting a long "138"time. Buggy U4 ?");139}140}141142static void dart_flush(struct iommu_table *tbl)143{144mb();145if (dart_dirty) {146dart_tlb_invalidate_all();147dart_dirty = 0;148}149}150151static int dart_build(struct iommu_table *tbl, long index,152long npages, unsigned long uaddr,153enum dma_data_direction direction,154struct dma_attrs *attrs)155{156unsigned int *dp;157unsigned int rpn;158long l;159160DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);161162dp = ((unsigned int*)tbl->it_base) + index;163164/* On U3, all memory is contiguous, so we can move this165* out of the loop.166*/167l = npages;168while (l--) {169rpn = virt_to_abs(uaddr) >> DART_PAGE_SHIFT;170171*(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);172173uaddr += DART_PAGE_SIZE;174}175176/* make sure all updates have reached memory */177mb();178in_be32((unsigned __iomem *)dp);179mb();180181if (dart_is_u4) {182rpn = index;183while (npages--)184dart_tlb_invalidate_one(rpn++);185} else {186dart_dirty = 1;187}188return 0;189}190191192static void dart_free(struct iommu_table *tbl, long index, long npages)193{194unsigned int *dp;195196/* We don't worry about flushing the TLB cache. The only drawback of197* not doing it is that we won't catch buggy device drivers doing198* bad DMAs, but then no 32-bit architecture ever does either.199*/200201DBG("dart: free at: %lx, %lx\n", index, npages);202203dp = ((unsigned int *)tbl->it_base) + index;204205while (npages--)206*(dp++) = dart_emptyval;207}208209210static int __init dart_init(struct device_node *dart_node)211{212unsigned int i;213unsigned long tmp, base, size;214struct resource r;215216if (dart_tablebase == 0 || dart_tablesize == 0) {217printk(KERN_INFO "DART: table not allocated, using "218"direct DMA\n");219return -ENODEV;220}221222if (of_address_to_resource(dart_node, 0, &r))223panic("DART: can't get register base ! ");224225/* Make sure nothing from the DART range remains in the CPU cache226* from a previous mapping that existed before the kernel took227* over228*/229flush_dcache_phys_range(dart_tablebase,230dart_tablebase + dart_tablesize);231232/* Allocate a spare page to map all invalid DART pages. We need to do233* that to work around what looks like a problem with the HT bridge234* prefetching into invalid pages and corrupting data235*/236tmp = memblock_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);237dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &238DARTMAP_RPNMASK);239240/* Map in DART registers */241dart = ioremap(r.start, r.end - r.start + 1);242if (dart == NULL)243panic("DART: Cannot map registers!");244245/* Map in DART table */246dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize);247248/* Fill initial table */249for (i = 0; i < dart_tablesize/4; i++)250dart_vbase[i] = dart_emptyval;251252/* Initialize DART with table base and enable it. */253base = dart_tablebase >> DART_PAGE_SHIFT;254size = dart_tablesize >> DART_PAGE_SHIFT;255if (dart_is_u4) {256size &= DART_SIZE_U4_SIZE_MASK;257DART_OUT(DART_BASE_U4, base);258DART_OUT(DART_SIZE_U4, size);259DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE);260} else {261size &= DART_CNTL_U3_SIZE_MASK;262DART_OUT(DART_CNTL,263DART_CNTL_U3_ENABLE |264(base << DART_CNTL_U3_BASE_SHIFT) |265(size << DART_CNTL_U3_SIZE_SHIFT));266}267268/* Invalidate DART to get rid of possible stale TLBs */269dart_tlb_invalidate_all();270271printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n",272dart_is_u4 ? "U4" : "U3");273274return 0;275}276277static void iommu_table_dart_setup(void)278{279iommu_table_dart.it_busno = 0;280iommu_table_dart.it_offset = 0;281/* it_size is in number of entries */282iommu_table_dart.it_size = dart_tablesize / sizeof(u32);283284/* Initialize the common IOMMU code */285iommu_table_dart.it_base = (unsigned long)dart_vbase;286iommu_table_dart.it_index = 0;287iommu_table_dart.it_blocksize = 1;288iommu_init_table(&iommu_table_dart, -1);289290/* Reserve the last page of the DART to avoid possible prefetch291* past the DART mapped area292*/293set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);294}295296static void dma_dev_setup_dart(struct device *dev)297{298/* We only have one iommu table on the mac for now, which makes299* things simple. Setup all PCI devices to point to this table300*/301if (get_dma_ops(dev) == &dma_direct_ops)302set_dma_offset(dev, DART_U4_BYPASS_BASE);303else304set_iommu_table_base(dev, &iommu_table_dart);305}306307static void pci_dma_dev_setup_dart(struct pci_dev *dev)308{309dma_dev_setup_dart(&dev->dev);310}311312static void pci_dma_bus_setup_dart(struct pci_bus *bus)313{314if (!iommu_table_dart_inited) {315iommu_table_dart_inited = 1;316iommu_table_dart_setup();317}318}319320static bool dart_device_on_pcie(struct device *dev)321{322struct device_node *np = of_node_get(dev->of_node);323324while(np) {325if (of_device_is_compatible(np, "U4-pcie") ||326of_device_is_compatible(np, "u4-pcie")) {327of_node_put(np);328return true;329}330np = of_get_next_parent(np);331}332return false;333}334335static int dart_dma_set_mask(struct device *dev, u64 dma_mask)336{337if (!dev->dma_mask || !dma_supported(dev, dma_mask))338return -EIO;339340/* U4 supports a DART bypass, we use it for 64-bit capable341* devices to improve performances. However, that only works342* for devices connected to U4 own PCIe interface, not bridged343* through hypertransport. We need the device to support at344* least 40 bits of addresses.345*/346if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) {347dev_info(dev, "Using 64-bit DMA iommu bypass\n");348set_dma_ops(dev, &dma_direct_ops);349} else {350dev_info(dev, "Using 32-bit DMA via iommu\n");351set_dma_ops(dev, &dma_iommu_ops);352}353dma_dev_setup_dart(dev);354355*dev->dma_mask = dma_mask;356return 0;357}358359void __init iommu_init_early_dart(void)360{361struct device_node *dn;362363/* Find the DART in the device-tree */364dn = of_find_compatible_node(NULL, "dart", "u3-dart");365if (dn == NULL) {366dn = of_find_compatible_node(NULL, "dart", "u4-dart");367if (dn == NULL)368return; /* use default direct_dma_ops */369dart_is_u4 = 1;370}371372/* Initialize the DART HW */373if (dart_init(dn) != 0)374goto bail;375376/* Setup low level TCE operations for the core IOMMU code */377ppc_md.tce_build = dart_build;378ppc_md.tce_free = dart_free;379ppc_md.tce_flush = dart_flush;380381/* Setup bypass if supported */382if (dart_is_u4)383ppc_md.dma_set_mask = dart_dma_set_mask;384385ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart;386ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart;387388/* Setup pci_dma ops */389set_pci_dma_ops(&dma_iommu_ops);390return;391392bail:393/* If init failed, use direct iommu and null setup functions */394ppc_md.pci_dma_dev_setup = NULL;395ppc_md.pci_dma_bus_setup = NULL;396397/* Setup pci_dma ops */398set_pci_dma_ops(&dma_direct_ops);399}400401#ifdef CONFIG_PM402static void iommu_dart_save(void)403{404memcpy(dart_copy, dart_vbase, 2*1024*1024);405}406407static void iommu_dart_restore(void)408{409memcpy(dart_vbase, dart_copy, 2*1024*1024);410dart_tlb_invalidate_all();411}412413static int __init iommu_init_late_dart(void)414{415unsigned long tbasepfn;416struct page *p;417418/* if no dart table exists then we won't need to save it419* and the area has also not been reserved */420if (!dart_tablebase)421return 0;422423tbasepfn = __pa(dart_tablebase) >> PAGE_SHIFT;424register_nosave_region_late(tbasepfn,425tbasepfn + ((1<<24) >> PAGE_SHIFT));426427/* For suspend we need to copy the dart contents because428* it is not part of the regular mapping (see above) and429* thus not saved automatically. The memory for this copy430* must be allocated early because we need 2 MB. */431p = alloc_pages(GFP_KERNEL, 21 - PAGE_SHIFT);432BUG_ON(!p);433dart_copy = page_address(p);434435ppc_md.iommu_save = iommu_dart_save;436ppc_md.iommu_restore = iommu_dart_restore;437438return 0;439}440441late_initcall(iommu_init_late_dart);442#endif443444void __init alloc_dart_table(void)445{446/* Only reserve DART space if machine has more than 1GB of RAM447* or if requested with iommu=on on cmdline.448*449* 1GB of RAM is picked as limit because some default devices450* (i.e. Airport Extreme) have 30 bit address range limits.451*/452453if (iommu_is_off)454return;455456if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)457return;458459/* 512 pages (2MB) is max DART tablesize. */460dart_tablesize = 1UL << 21;461/* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we462* will blow up an entire large page anyway in the kernel mapping463*/464dart_tablebase = (unsigned long)465abs_to_virt(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));466467printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase);468}469470471