Path: blob/master/arch/powerpc/platforms/cell/spu_base.c
26481 views
// SPDX-License-Identifier: GPL-2.0-or-later1/*2* Low-level SPU handling3*4* (C) Copyright IBM Deutschland Entwicklung GmbH 20055*6* Author: Arnd Bergmann <[email protected]>7*/89#undef DEBUG1011#include <linux/interrupt.h>12#include <linux/list.h>13#include <linux/init.h>14#include <linux/ptrace.h>15#include <linux/slab.h>16#include <linux/wait.h>17#include <linux/mm.h>18#include <linux/io.h>19#include <linux/mutex.h>20#include <linux/linux_logo.h>21#include <linux/syscore_ops.h>22#include <asm/spu.h>23#include <asm/spu_priv1.h>24#include <asm/spu_csa.h>25#include <asm/kexec.h>2627const struct spu_management_ops *spu_management_ops;28EXPORT_SYMBOL_GPL(spu_management_ops);2930const struct spu_priv1_ops *spu_priv1_ops;31EXPORT_SYMBOL_GPL(spu_priv1_ops);3233struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];34EXPORT_SYMBOL_GPL(cbe_spu_info);3536/*37* The spufs fault-handling code needs to call force_sig_fault to raise signals38* on DMA errors. Export it here to avoid general kernel-wide access to this39* function40*/41EXPORT_SYMBOL_GPL(force_sig_fault);4243/*44* Protects cbe_spu_info and spu->number.45*/46static DEFINE_SPINLOCK(spu_lock);4748/*49* List of all spus in the system.50*51* This list is iterated by callers from irq context and callers that52* want to sleep. Thus modifications need to be done with both53* spu_full_list_lock and spu_full_list_mutex held, while iterating54* through it requires either of these locks.55*56* In addition spu_full_list_lock protects all assignments to57* spu->mm.58*/59static LIST_HEAD(spu_full_list);60static DEFINE_SPINLOCK(spu_full_list_lock);61static DEFINE_MUTEX(spu_full_list_mutex);6263void spu_invalidate_slbs(struct spu *spu)64{65struct spu_priv2 __iomem *priv2 = spu->priv2;66unsigned long flags;6768spin_lock_irqsave(&spu->register_lock, flags);69if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)70out_be64(&priv2->slb_invalidate_all_W, 0UL);71spin_unlock_irqrestore(&spu->register_lock, flags);72}73EXPORT_SYMBOL_GPL(spu_invalidate_slbs);7475/* This is called by the MM core when a segment size is changed, to76* request a flush of all the SPEs using a given mm77*/78void spu_flush_all_slbs(struct mm_struct *mm)79{80struct spu *spu;81unsigned long flags;8283spin_lock_irqsave(&spu_full_list_lock, flags);84list_for_each_entry(spu, &spu_full_list, full_list) {85if (spu->mm == mm)86spu_invalidate_slbs(spu);87}88spin_unlock_irqrestore(&spu_full_list_lock, flags);89}9091/* The hack below stinks... try to do something better one of92* these days... Does it even work properly with NR_CPUS == 1 ?93*/94static inline void mm_needs_global_tlbie(struct mm_struct *mm)95{96int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;9798/* Global TLBIE broadcast required with SPEs. */99bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr);100}101102void spu_associate_mm(struct spu *spu, struct mm_struct *mm)103{104unsigned long flags;105106spin_lock_irqsave(&spu_full_list_lock, flags);107spu->mm = mm;108spin_unlock_irqrestore(&spu_full_list_lock, flags);109if (mm)110mm_needs_global_tlbie(mm);111}112EXPORT_SYMBOL_GPL(spu_associate_mm);113114int spu_64k_pages_available(void)115{116return mmu_psize_defs[MMU_PAGE_64K].shift != 0;117}118EXPORT_SYMBOL_GPL(spu_64k_pages_available);119120static void spu_restart_dma(struct spu *spu)121{122struct spu_priv2 __iomem *priv2 = spu->priv2;123124if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))125out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);126else {127set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);128mb();129}130}131132static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb)133{134struct spu_priv2 __iomem *priv2 = spu->priv2;135136pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n",137__func__, slbe, slb->vsid, slb->esid);138139out_be64(&priv2->slb_index_W, slbe);140/* set invalid before writing vsid */141out_be64(&priv2->slb_esid_RW, 0);142/* now it's safe to write the vsid */143out_be64(&priv2->slb_vsid_RW, slb->vsid);144/* setting the new esid makes the entry valid again */145out_be64(&priv2->slb_esid_RW, slb->esid);146}147148static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)149{150struct copro_slb slb;151int ret;152153ret = copro_calculate_slb(spu->mm, ea, &slb);154if (ret)155return ret;156157spu_load_slb(spu, spu->slb_replace, &slb);158159spu->slb_replace++;160if (spu->slb_replace >= 8)161spu->slb_replace = 0;162163spu_restart_dma(spu);164spu->stats.slb_flt++;165return 0;166}167168extern int hash_page(unsigned long ea, unsigned long access,169unsigned long trap, unsigned long dsisr); //XXX170static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)171{172int ret;173174pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea);175176/*177* Handle kernel space hash faults immediately. User hash178* faults need to be deferred to process context.179*/180if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&181(get_region_id(ea) != USER_REGION_ID)) {182183spin_unlock(&spu->register_lock);184ret = hash_page(ea,185_PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED,1860x300, dsisr);187spin_lock(&spu->register_lock);188189if (!ret) {190spu_restart_dma(spu);191return 0;192}193}194195spu->class_1_dar = ea;196spu->class_1_dsisr = dsisr;197198spu->stop_callback(spu, 1);199200spu->class_1_dar = 0;201spu->class_1_dsisr = 0;202203return 0;204}205206static void __spu_kernel_slb(void *addr, struct copro_slb *slb)207{208unsigned long ea = (unsigned long)addr;209u64 llp;210211if (get_region_id(ea) == LINEAR_MAP_REGION_ID)212llp = mmu_psize_defs[mmu_linear_psize].sllp;213else214llp = mmu_psize_defs[mmu_virtual_psize].sllp;215216slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) |217SLB_VSID_KERNEL | llp;218slb->esid = (ea & ESID_MASK) | SLB_ESID_V;219}220221/**222* Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the223* address @new_addr is present.224*/225static inline int __slb_present(struct copro_slb *slbs, int nr_slbs,226void *new_addr)227{228unsigned long ea = (unsigned long)new_addr;229int i;230231for (i = 0; i < nr_slbs; i++)232if (!((slbs[i].esid ^ ea) & ESID_MASK))233return 1;234235return 0;236}237238/**239* Setup the SPU kernel SLBs, in preparation for a context save/restore. We240* need to map both the context save area, and the save/restore code.241*242* Because the lscsa and code may cross segment boundaries, we check to see243* if mappings are required for the start and end of each range. We currently244* assume that the mappings are smaller that one segment - if not, something245* is seriously wrong.246*/247void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,248void *code, int code_size)249{250struct copro_slb slbs[4];251int i, nr_slbs = 0;252/* start and end addresses of both mappings */253void *addrs[] = {254lscsa, (void *)lscsa + sizeof(*lscsa) - 1,255code, code + code_size - 1256};257258/* check the set of addresses, and create a new entry in the slbs array259* if there isn't already a SLB for that address */260for (i = 0; i < ARRAY_SIZE(addrs); i++) {261if (__slb_present(slbs, nr_slbs, addrs[i]))262continue;263264__spu_kernel_slb(addrs[i], &slbs[nr_slbs]);265nr_slbs++;266}267268spin_lock_irq(&spu->register_lock);269/* Add the set of SLBs */270for (i = 0; i < nr_slbs; i++)271spu_load_slb(spu, i, &slbs[i]);272spin_unlock_irq(&spu->register_lock);273}274EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);275276static irqreturn_t277spu_irq_class_0(int irq, void *data)278{279struct spu *spu;280unsigned long stat, mask;281282spu = data;283284spin_lock(&spu->register_lock);285mask = spu_int_mask_get(spu, 0);286stat = spu_int_stat_get(spu, 0) & mask;287288spu->class_0_pending |= stat;289spu->class_0_dar = spu_mfc_dar_get(spu);290spu->stop_callback(spu, 0);291spu->class_0_pending = 0;292spu->class_0_dar = 0;293294spu_int_stat_clear(spu, 0, stat);295spin_unlock(&spu->register_lock);296297return IRQ_HANDLED;298}299300static irqreturn_t301spu_irq_class_1(int irq, void *data)302{303struct spu *spu;304unsigned long stat, mask, dar, dsisr;305306spu = data;307308/* atomically read & clear class1 status. */309spin_lock(&spu->register_lock);310mask = spu_int_mask_get(spu, 1);311stat = spu_int_stat_get(spu, 1) & mask;312dar = spu_mfc_dar_get(spu);313dsisr = spu_mfc_dsisr_get(spu);314if (stat & CLASS1_STORAGE_FAULT_INTR)315spu_mfc_dsisr_set(spu, 0ul);316spu_int_stat_clear(spu, 1, stat);317318pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat,319dar, dsisr);320321if (stat & CLASS1_SEGMENT_FAULT_INTR)322__spu_trap_data_seg(spu, dar);323324if (stat & CLASS1_STORAGE_FAULT_INTR)325__spu_trap_data_map(spu, dar, dsisr);326327spu->class_1_dsisr = 0;328spu->class_1_dar = 0;329330spin_unlock(&spu->register_lock);331332return stat ? IRQ_HANDLED : IRQ_NONE;333}334335static irqreturn_t336spu_irq_class_2(int irq, void *data)337{338struct spu *spu;339unsigned long stat;340unsigned long mask;341const int mailbox_intrs =342CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR;343344spu = data;345spin_lock(&spu->register_lock);346stat = spu_int_stat_get(spu, 2);347mask = spu_int_mask_get(spu, 2);348/* ignore interrupts we're not waiting for */349stat &= mask;350/* mailbox interrupts are level triggered. mask them now before351* acknowledging */352if (stat & mailbox_intrs)353spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs));354/* acknowledge all interrupts before the callbacks */355spu_int_stat_clear(spu, 2, stat);356357pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);358359if (stat & CLASS2_MAILBOX_INTR)360spu->ibox_callback(spu);361362if (stat & CLASS2_SPU_STOP_INTR)363spu->stop_callback(spu, 2);364365if (stat & CLASS2_SPU_HALT_INTR)366spu->stop_callback(spu, 2);367368if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR)369spu->mfc_callback(spu);370371if (stat & CLASS2_MAILBOX_THRESHOLD_INTR)372spu->wbox_callback(spu);373374spu->stats.class2_intr++;375376spin_unlock(&spu->register_lock);377378return stat ? IRQ_HANDLED : IRQ_NONE;379}380381static int __init spu_request_irqs(struct spu *spu)382{383int ret = 0;384385if (spu->irqs[0]) {386snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",387spu->number);388ret = request_irq(spu->irqs[0], spu_irq_class_0,3890, spu->irq_c0, spu);390if (ret)391goto bail0;392}393if (spu->irqs[1]) {394snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",395spu->number);396ret = request_irq(spu->irqs[1], spu_irq_class_1,3970, spu->irq_c1, spu);398if (ret)399goto bail1;400}401if (spu->irqs[2]) {402snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",403spu->number);404ret = request_irq(spu->irqs[2], spu_irq_class_2,4050, spu->irq_c2, spu);406if (ret)407goto bail2;408}409return 0;410411bail2:412if (spu->irqs[1])413free_irq(spu->irqs[1], spu);414bail1:415if (spu->irqs[0])416free_irq(spu->irqs[0], spu);417bail0:418return ret;419}420421static void spu_free_irqs(struct spu *spu)422{423if (spu->irqs[0])424free_irq(spu->irqs[0], spu);425if (spu->irqs[1])426free_irq(spu->irqs[1], spu);427if (spu->irqs[2])428free_irq(spu->irqs[2], spu);429}430431void spu_init_channels(struct spu *spu)432{433static const struct {434unsigned channel;435unsigned count;436} zero_list[] = {437{ 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },438{ 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },439}, count_list[] = {440{ 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },441{ 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },442{ 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },443};444struct spu_priv2 __iomem *priv2;445int i;446447priv2 = spu->priv2;448449/* initialize all channel data to zero */450for (i = 0; i < ARRAY_SIZE(zero_list); i++) {451int count;452453out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);454for (count = 0; count < zero_list[i].count; count++)455out_be64(&priv2->spu_chnldata_RW, 0);456}457458/* initialize channel counts to meaningful values */459for (i = 0; i < ARRAY_SIZE(count_list); i++) {460out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);461out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);462}463}464EXPORT_SYMBOL_GPL(spu_init_channels);465466static struct bus_type spu_subsys = {467.name = "spu",468.dev_name = "spu",469};470471int spu_add_dev_attr(struct device_attribute *attr)472{473struct spu *spu;474475mutex_lock(&spu_full_list_mutex);476list_for_each_entry(spu, &spu_full_list, full_list)477device_create_file(&spu->dev, attr);478mutex_unlock(&spu_full_list_mutex);479480return 0;481}482EXPORT_SYMBOL_GPL(spu_add_dev_attr);483484int spu_add_dev_attr_group(const struct attribute_group *attrs)485{486struct spu *spu;487int rc = 0;488489mutex_lock(&spu_full_list_mutex);490list_for_each_entry(spu, &spu_full_list, full_list) {491rc = sysfs_create_group(&spu->dev.kobj, attrs);492493/* we're in trouble here, but try unwinding anyway */494if (rc) {495printk(KERN_ERR "%s: can't create sysfs group '%s'\n",496__func__, attrs->name);497498list_for_each_entry_continue_reverse(spu,499&spu_full_list, full_list)500sysfs_remove_group(&spu->dev.kobj, attrs);501break;502}503}504505mutex_unlock(&spu_full_list_mutex);506507return rc;508}509EXPORT_SYMBOL_GPL(spu_add_dev_attr_group);510511512void spu_remove_dev_attr(struct device_attribute *attr)513{514struct spu *spu;515516mutex_lock(&spu_full_list_mutex);517list_for_each_entry(spu, &spu_full_list, full_list)518device_remove_file(&spu->dev, attr);519mutex_unlock(&spu_full_list_mutex);520}521EXPORT_SYMBOL_GPL(spu_remove_dev_attr);522523void spu_remove_dev_attr_group(const struct attribute_group *attrs)524{525struct spu *spu;526527mutex_lock(&spu_full_list_mutex);528list_for_each_entry(spu, &spu_full_list, full_list)529sysfs_remove_group(&spu->dev.kobj, attrs);530mutex_unlock(&spu_full_list_mutex);531}532EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);533534static int __init spu_create_dev(struct spu *spu)535{536int ret;537538spu->dev.id = spu->number;539spu->dev.bus = &spu_subsys;540ret = device_register(&spu->dev);541if (ret) {542printk(KERN_ERR "Can't register SPU %d with sysfs\n",543spu->number);544return ret;545}546547sysfs_add_device_to_node(&spu->dev, spu->node);548549return 0;550}551552static int __init create_spu(void *data)553{554struct spu *spu;555int ret;556static int number;557unsigned long flags;558559ret = -ENOMEM;560spu = kzalloc(sizeof (*spu), GFP_KERNEL);561if (!spu)562goto out;563564spu->alloc_state = SPU_FREE;565566spin_lock_init(&spu->register_lock);567spin_lock(&spu_lock);568spu->number = number++;569spin_unlock(&spu_lock);570571ret = spu_create_spu(spu, data);572573if (ret)574goto out_free;575576spu_mfc_sdr_setup(spu);577spu_mfc_sr1_set(spu, 0x33);578ret = spu_request_irqs(spu);579if (ret)580goto out_destroy;581582ret = spu_create_dev(spu);583if (ret)584goto out_free_irqs;585586mutex_lock(&cbe_spu_info[spu->node].list_mutex);587list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);588cbe_spu_info[spu->node].n_spus++;589mutex_unlock(&cbe_spu_info[spu->node].list_mutex);590591mutex_lock(&spu_full_list_mutex);592spin_lock_irqsave(&spu_full_list_lock, flags);593list_add(&spu->full_list, &spu_full_list);594spin_unlock_irqrestore(&spu_full_list_lock, flags);595mutex_unlock(&spu_full_list_mutex);596597spu->stats.util_state = SPU_UTIL_IDLE_LOADED;598spu->stats.tstamp = ktime_get_ns();599600INIT_LIST_HEAD(&spu->aff_list);601602goto out;603604out_free_irqs:605spu_free_irqs(spu);606out_destroy:607spu_destroy_spu(spu);608out_free:609kfree(spu);610out:611return ret;612}613614static const char *spu_state_names[] = {615"user", "system", "iowait", "idle"616};617618static unsigned long long spu_acct_time(struct spu *spu,619enum spu_utilization_state state)620{621unsigned long long time = spu->stats.times[state];622623/*624* If the spu is idle or the context is stopped, utilization625* statistics are not updated. Apply the time delta from the626* last recorded state of the spu.627*/628if (spu->stats.util_state == state)629time += ktime_get_ns() - spu->stats.tstamp;630631return time / NSEC_PER_MSEC;632}633634635static ssize_t spu_stat_show(struct device *dev,636struct device_attribute *attr, char *buf)637{638struct spu *spu = container_of(dev, struct spu, dev);639640return sprintf(buf, "%s %llu %llu %llu %llu "641"%llu %llu %llu %llu %llu %llu %llu %llu\n",642spu_state_names[spu->stats.util_state],643spu_acct_time(spu, SPU_UTIL_USER),644spu_acct_time(spu, SPU_UTIL_SYSTEM),645spu_acct_time(spu, SPU_UTIL_IOWAIT),646spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),647spu->stats.vol_ctx_switch,648spu->stats.invol_ctx_switch,649spu->stats.slb_flt,650spu->stats.hash_flt,651spu->stats.min_flt,652spu->stats.maj_flt,653spu->stats.class2_intr,654spu->stats.libassist);655}656657static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);658659#ifdef CONFIG_KEXEC_CORE660661struct crash_spu_info {662struct spu *spu;663u32 saved_spu_runcntl_RW;664u32 saved_spu_status_R;665u32 saved_spu_npc_RW;666u64 saved_mfc_sr1_RW;667u64 saved_mfc_dar;668u64 saved_mfc_dsisr;669};670671#define CRASH_NUM_SPUS 16 /* Enough for current hardware */672static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];673674static void crash_kexec_stop_spus(void)675{676struct spu *spu;677int i;678u64 tmp;679680for (i = 0; i < CRASH_NUM_SPUS; i++) {681if (!crash_spu_info[i].spu)682continue;683684spu = crash_spu_info[i].spu;685686crash_spu_info[i].saved_spu_runcntl_RW =687in_be32(&spu->problem->spu_runcntl_RW);688crash_spu_info[i].saved_spu_status_R =689in_be32(&spu->problem->spu_status_R);690crash_spu_info[i].saved_spu_npc_RW =691in_be32(&spu->problem->spu_npc_RW);692693crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu);694crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu);695tmp = spu_mfc_sr1_get(spu);696crash_spu_info[i].saved_mfc_sr1_RW = tmp;697698tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;699spu_mfc_sr1_set(spu, tmp);700701__delay(200);702}703}704705static void __init crash_register_spus(struct list_head *list)706{707struct spu *spu;708int ret;709710list_for_each_entry(spu, list, full_list) {711if (WARN_ON(spu->number >= CRASH_NUM_SPUS))712continue;713714crash_spu_info[spu->number].spu = spu;715}716717ret = crash_shutdown_register(&crash_kexec_stop_spus);718if (ret)719printk(KERN_ERR "Could not register SPU crash handler");720}721722#else723static inline void crash_register_spus(struct list_head *list)724{725}726#endif727728static void spu_shutdown(void)729{730struct spu *spu;731732mutex_lock(&spu_full_list_mutex);733list_for_each_entry(spu, &spu_full_list, full_list) {734spu_free_irqs(spu);735spu_destroy_spu(spu);736}737mutex_unlock(&spu_full_list_mutex);738}739740static struct syscore_ops spu_syscore_ops = {741.shutdown = spu_shutdown,742};743744static int __init init_spu_base(void)745{746int i, ret = 0;747748for (i = 0; i < MAX_NUMNODES; i++) {749mutex_init(&cbe_spu_info[i].list_mutex);750INIT_LIST_HEAD(&cbe_spu_info[i].spus);751}752753if (!spu_management_ops)754goto out;755756/* create system subsystem for spus */757ret = subsys_system_register(&spu_subsys, NULL);758if (ret)759goto out;760761ret = spu_enumerate_spus(create_spu);762763if (ret < 0) {764printk(KERN_WARNING "%s: Error initializing spus\n",765__func__);766goto out_unregister_subsys;767}768769if (ret > 0)770fb_append_extra_logo(&logo_spe_clut224, ret);771772mutex_lock(&spu_full_list_mutex);773crash_register_spus(&spu_full_list);774mutex_unlock(&spu_full_list_mutex);775spu_add_dev_attr(&dev_attr_stat);776register_syscore_ops(&spu_syscore_ops);777778spu_init_affinity();779780return 0;781782out_unregister_subsys:783bus_unregister(&spu_subsys);784out:785return ret;786}787device_initcall(init_spu_base);788789790