Path: blob/master/arch/cris/arch-v32/mach-a3/arbiter.c
15125 views
/*1* Memory arbiter functions. Allocates bandwidth through the2* arbiter and sets up arbiter breakpoints.3*4* The algorithm first assigns slots to the clients that has specified5* bandwidth (e.g. ethernet) and then the remaining slots are divided6* on all the active clients.7*8* Copyright (c) 2004-2007 Axis Communications AB.9*10* The artpec-3 has two arbiters. The memory hierarchy looks like this:11*12*13* CPU DMAs14* | |15* | |16* -------------- ------------------17* | foo arbiter|----| Internal memory|18* -------------- ------------------19* |20* --------------21* | L2 cache |22* --------------23* |24* h264 etc |25* | |26* | |27* --------------28* | bar arbiter|29* --------------30* |31* ---------32* | SDRAM |33* ---------34*35*/3637#include <hwregs/reg_map.h>38#include <hwregs/reg_rdwr.h>39#include <hwregs/marb_foo_defs.h>40#include <hwregs/marb_bar_defs.h>41#include <arbiter.h>42#include <hwregs/intr_vect.h>43#include <linux/interrupt.h>44#include <linux/irq.h>45#include <linux/signal.h>46#include <linux/errno.h>47#include <linux/spinlock.h>48#include <asm/io.h>49#include <asm/irq_regs.h>5051#define D(x)5253struct crisv32_watch_entry {54unsigned long instance;55watch_callback *cb;56unsigned long start;57unsigned long end;58int used;59};6061#define NUMBER_OF_BP 462#define SDRAM_BANDWIDTH 40000000063#define INTMEM_BANDWIDTH 40000000064#define NBR_OF_SLOTS 6465#define NBR_OF_REGIONS 266#define NBR_OF_CLIENTS 1567#define ARBITERS 268#define UNASSIGNED 1006970struct arbiter {71unsigned long instance;72int nbr_regions;73int nbr_clients;74int requested_slots[NBR_OF_REGIONS][NBR_OF_CLIENTS];75int active_clients[NBR_OF_REGIONS][NBR_OF_CLIENTS];76};7778static struct crisv32_watch_entry watches[ARBITERS][NUMBER_OF_BP] =79{80{81{regi_marb_foo_bp0},82{regi_marb_foo_bp1},83{regi_marb_foo_bp2},84{regi_marb_foo_bp3}85},86{87{regi_marb_bar_bp0},88{regi_marb_bar_bp1},89{regi_marb_bar_bp2},90{regi_marb_bar_bp3}91}92};9394struct arbiter arbiters[ARBITERS] =95{96{ /* L2 cache arbiter */97.instance = regi_marb_foo,98.nbr_regions = 2,99.nbr_clients = 15100},101{ /* DDR2 arbiter */102.instance = regi_marb_bar,103.nbr_regions = 1,104.nbr_clients = 9105}106};107108static int max_bandwidth[NBR_OF_REGIONS] = {SDRAM_BANDWIDTH, INTMEM_BANDWIDTH};109110DEFINE_SPINLOCK(arbiter_lock);111112static irqreturn_t113crisv32_foo_arbiter_irq(int irq, void *dev_id);114static irqreturn_t115crisv32_bar_arbiter_irq(int irq, void *dev_id);116117/*118* "I'm the arbiter, I know the score.119* From square one I'll be watching all 64."120* (memory arbiter slots, that is)121*122* Or in other words:123* Program the memory arbiter slots for "region" according to what's124* in requested_slots[] and active_clients[], while minimizing125* latency. A caller may pass a non-zero positive amount for126* "unused_slots", which must then be the unallocated, remaining127* number of slots, free to hand out to any client.128*/129130static void crisv32_arbiter_config(int arbiter, int region, int unused_slots)131{132int slot;133int client;134int interval = 0;135136/*137* This vector corresponds to the hardware arbiter slots (see138* the hardware documentation for semantics). We initialize139* each slot with a suitable sentinel value outside the valid140* range {0 .. NBR_OF_CLIENTS - 1} and replace them with141* client indexes. Then it's fed to the hardware.142*/143s8 val[NBR_OF_SLOTS];144145for (slot = 0; slot < NBR_OF_SLOTS; slot++)146val[slot] = -1;147148for (client = 0; client < arbiters[arbiter].nbr_clients; client++) {149int pos;150/* Allocate the requested non-zero number of slots, but151* also give clients with zero-requests one slot each152* while stocks last. We do the latter here, in client153* order. This makes sure zero-request clients are the154* first to get to any spare slots, else those slots155* could, when bandwidth is allocated close to the limit,156* all be allocated to low-index non-zero-request clients157* in the default-fill loop below. Another positive but158* secondary effect is a somewhat better spread of the159* zero-bandwidth clients in the vector, avoiding some of160* the latency that could otherwise be caused by the161* partitioning of non-zero-bandwidth clients at low162* indexes and zero-bandwidth clients at high163* indexes. (Note that this spreading can only affect the164* unallocated bandwidth.) All the above only matters for165* memory-intensive situations, of course.166*/167if (!arbiters[arbiter].requested_slots[region][client]) {168/*169* Skip inactive clients. Also skip zero-slot170* allocations in this pass when there are no known171* free slots.172*/173if (!arbiters[arbiter].active_clients[region][client] ||174unused_slots <= 0)175continue;176177unused_slots--;178179/* Only allocate one slot for this client. */180interval = NBR_OF_SLOTS;181} else182interval = NBR_OF_SLOTS /183arbiters[arbiter].requested_slots[region][client];184185pos = 0;186while (pos < NBR_OF_SLOTS) {187if (val[pos] >= 0)188pos++;189else {190val[pos] = client;191pos += interval;192}193}194}195196client = 0;197for (slot = 0; slot < NBR_OF_SLOTS; slot++) {198/*199* Allocate remaining slots in round-robin200* client-number order for active clients. For this201* pass, we ignore requested bandwidth and previous202* allocations.203*/204if (val[slot] < 0) {205int first = client;206while (!arbiters[arbiter].active_clients[region][client]) {207client = (client + 1) %208arbiters[arbiter].nbr_clients;209if (client == first)210break;211}212val[slot] = client;213client = (client + 1) % arbiters[arbiter].nbr_clients;214}215if (arbiter == 0) {216if (region == EXT_REGION)217REG_WR_INT_VECT(marb_foo, regi_marb_foo,218rw_l2_slots, slot, val[slot]);219else if (region == INT_REGION)220REG_WR_INT_VECT(marb_foo, regi_marb_foo,221rw_intm_slots, slot, val[slot]);222} else {223REG_WR_INT_VECT(marb_bar, regi_marb_bar,224rw_ddr2_slots, slot, val[slot]);225}226}227}228229extern char _stext, _etext;230231static void crisv32_arbiter_init(void)232{233static int initialized;234235if (initialized)236return;237238initialized = 1;239240/*241* CPU caches are always set to active, but with zero242* bandwidth allocated. It should be ok to allocate zero243* bandwidth for the caches, because DMA for other channels244* will supposedly finish, once their programmed amount is245* done, and then the caches will get access according to the246* "fixed scheme" for unclaimed slots. Though, if for some247* use-case somewhere, there's a maximum CPU latency for248* e.g. some interrupt, we have to start allocating specific249* bandwidth for the CPU caches too.250*/251arbiters[0].active_clients[EXT_REGION][11] = 1;252arbiters[0].active_clients[EXT_REGION][12] = 1;253crisv32_arbiter_config(0, EXT_REGION, 0);254crisv32_arbiter_config(0, INT_REGION, 0);255crisv32_arbiter_config(1, EXT_REGION, 0);256257if (request_irq(MEMARB_FOO_INTR_VECT, crisv32_foo_arbiter_irq,258IRQF_DISABLED, "arbiter", NULL))259printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");260261if (request_irq(MEMARB_BAR_INTR_VECT, crisv32_bar_arbiter_irq,262IRQF_DISABLED, "arbiter", NULL))263printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");264265#ifndef CONFIG_ETRAX_KGDB266/* Global watch for writes to kernel text segment. */267crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,268MARB_CLIENTS(arbiter_all_clients, arbiter_bar_all_clients),269arbiter_all_write, NULL);270#endif271272/* Set up max burst sizes by default */273REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_rd_burst, 3);274REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_wr_burst, 3);275REG_WR_INT(marb_bar, regi_marb_bar, rw_ccd_burst, 3);276REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_wr_burst, 3);277REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_rd_burst, 3);278REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_rd_burst, 3);279REG_WR_INT(marb_bar, regi_marb_bar, rw_vout_burst, 3);280REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_fifo_burst, 3);281REG_WR_INT(marb_bar, regi_marb_bar, rw_l2cache_burst, 3);282}283284int crisv32_arbiter_allocate_bandwidth(int client, int region,285unsigned long bandwidth)286{287int i;288int total_assigned = 0;289int total_clients = 0;290int req;291int arbiter = 0;292293crisv32_arbiter_init();294295if (client & 0xffff0000) {296arbiter = 1;297client >>= 16;298}299300for (i = 0; i < arbiters[arbiter].nbr_clients; i++) {301total_assigned += arbiters[arbiter].requested_slots[region][i];302total_clients += arbiters[arbiter].active_clients[region][i];303}304305/* Avoid division by 0 for 0-bandwidth requests. */306req = bandwidth == 0307? 0 : NBR_OF_SLOTS / (max_bandwidth[region] / bandwidth);308309/*310* We make sure that there are enough slots only for non-zero311* requests. Requesting 0 bandwidth *may* allocate slots,312* though if all bandwidth is allocated, such a client won't313* get any and will have to rely on getting memory access314* according to the fixed scheme that's the default when one315* of the slot-allocated clients doesn't claim their slot.316*/317if (total_assigned + req > NBR_OF_SLOTS)318return -ENOMEM;319320arbiters[arbiter].active_clients[region][client] = 1;321arbiters[arbiter].requested_slots[region][client] = req;322crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);323324/* Propagate allocation from foo to bar */325if (arbiter == 0)326crisv32_arbiter_allocate_bandwidth(8 << 16,327EXT_REGION, bandwidth);328return 0;329}330331/*332* Main entry for bandwidth deallocation.333*334* Strictly speaking, for a somewhat constant set of clients where335* each client gets a constant bandwidth and is just enabled or336* disabled (somewhat dynamically), no action is necessary here to337* avoid starvation for non-zero-allocation clients, as the allocated338* slots will just be unused. However, handing out those unused slots339* to active clients avoids needless latency if the "fixed scheme"340* would give unclaimed slots to an eager low-index client.341*/342343void crisv32_arbiter_deallocate_bandwidth(int client, int region)344{345int i;346int total_assigned = 0;347int arbiter = 0;348349if (client & 0xffff0000)350arbiter = 1;351352arbiters[arbiter].requested_slots[region][client] = 0;353arbiters[arbiter].active_clients[region][client] = 0;354355for (i = 0; i < arbiters[arbiter].nbr_clients; i++)356total_assigned += arbiters[arbiter].requested_slots[region][i];357358crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);359}360361int crisv32_arbiter_watch(unsigned long start, unsigned long size,362unsigned long clients, unsigned long accesses,363watch_callback *cb)364{365int i;366int arbiter;367int used[2];368int ret = 0;369370crisv32_arbiter_init();371372if (start > 0x80000000) {373printk(KERN_ERR "Arbiter: %lX doesn't look like a "374"physical address", start);375return -EFAULT;376}377378spin_lock(&arbiter_lock);379380if (clients & 0xffff)381used[0] = 1;382if (clients & 0xffff0000)383used[1] = 1;384385for (arbiter = 0; arbiter < ARBITERS; arbiter++) {386if (!used[arbiter])387continue;388389for (i = 0; i < NUMBER_OF_BP; i++) {390if (!watches[arbiter][i].used) {391unsigned intr_mask;392if (arbiter)393intr_mask = REG_RD_INT(marb_bar,394regi_marb_bar, rw_intr_mask);395else396intr_mask = REG_RD_INT(marb_foo,397regi_marb_foo, rw_intr_mask);398399watches[arbiter][i].used = 1;400watches[arbiter][i].start = start;401watches[arbiter][i].end = start + size;402watches[arbiter][i].cb = cb;403404ret |= (i + 1) << (arbiter + 8);405if (arbiter) {406REG_WR_INT(marb_bar_bp,407watches[arbiter][i].instance,408rw_first_addr,409watches[arbiter][i].start);410REG_WR_INT(marb_bar_bp,411watches[arbiter][i].instance,412rw_last_addr,413watches[arbiter][i].end);414REG_WR_INT(marb_bar_bp,415watches[arbiter][i].instance,416rw_op, accesses);417REG_WR_INT(marb_bar_bp,418watches[arbiter][i].instance,419rw_clients,420clients & 0xffff);421} else {422REG_WR_INT(marb_foo_bp,423watches[arbiter][i].instance,424rw_first_addr,425watches[arbiter][i].start);426REG_WR_INT(marb_foo_bp,427watches[arbiter][i].instance,428rw_last_addr,429watches[arbiter][i].end);430REG_WR_INT(marb_foo_bp,431watches[arbiter][i].instance,432rw_op, accesses);433REG_WR_INT(marb_foo_bp,434watches[arbiter][i].instance,435rw_clients, clients >> 16);436}437438if (i == 0)439intr_mask |= 1;440else if (i == 1)441intr_mask |= 2;442else if (i == 2)443intr_mask |= 4;444else if (i == 3)445intr_mask |= 8;446447if (arbiter)448REG_WR_INT(marb_bar, regi_marb_bar,449rw_intr_mask, intr_mask);450else451REG_WR_INT(marb_foo, regi_marb_foo,452rw_intr_mask, intr_mask);453454spin_unlock(&arbiter_lock);455456break;457}458}459}460spin_unlock(&arbiter_lock);461if (ret)462return ret;463else464return -ENOMEM;465}466467int crisv32_arbiter_unwatch(int id)468{469int arbiter;470int intr_mask;471472crisv32_arbiter_init();473474spin_lock(&arbiter_lock);475476for (arbiter = 0; arbiter < ARBITERS; arbiter++) {477int id2;478479if (arbiter)480intr_mask = REG_RD_INT(marb_bar, regi_marb_bar,481rw_intr_mask);482else483intr_mask = REG_RD_INT(marb_foo, regi_marb_foo,484rw_intr_mask);485486id2 = (id & (0xff << (arbiter + 8))) >> (arbiter + 8);487if (id2 == 0)488continue;489id2--;490if ((id2 >= NUMBER_OF_BP) || (!watches[arbiter][id2].used)) {491spin_unlock(&arbiter_lock);492return -EINVAL;493}494495memset(&watches[arbiter][id2], 0,496sizeof(struct crisv32_watch_entry));497498if (id2 == 0)499intr_mask &= ~1;500else if (id2 == 1)501intr_mask &= ~2;502else if (id2 == 2)503intr_mask &= ~4;504else if (id2 == 3)505intr_mask &= ~8;506507if (arbiter)508REG_WR_INT(marb_bar, regi_marb_bar, rw_intr_mask,509intr_mask);510else511REG_WR_INT(marb_foo, regi_marb_foo, rw_intr_mask,512intr_mask);513}514515spin_unlock(&arbiter_lock);516return 0;517}518519extern void show_registers(struct pt_regs *regs);520521522static irqreturn_t523crisv32_foo_arbiter_irq(int irq, void *dev_id)524{525reg_marb_foo_r_masked_intr masked_intr =526REG_RD(marb_foo, regi_marb_foo, r_masked_intr);527reg_marb_foo_bp_r_brk_clients r_clients;528reg_marb_foo_bp_r_brk_addr r_addr;529reg_marb_foo_bp_r_brk_op r_op;530reg_marb_foo_bp_r_brk_first_client r_first;531reg_marb_foo_bp_r_brk_size r_size;532reg_marb_foo_bp_rw_ack ack = {0};533reg_marb_foo_rw_ack_intr ack_intr = {534.bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1535};536struct crisv32_watch_entry *watch;537unsigned arbiter = (unsigned)dev_id;538539masked_intr = REG_RD(marb_foo, regi_marb_foo, r_masked_intr);540541if (masked_intr.bp0)542watch = &watches[arbiter][0];543else if (masked_intr.bp1)544watch = &watches[arbiter][1];545else if (masked_intr.bp2)546watch = &watches[arbiter][2];547else if (masked_intr.bp3)548watch = &watches[arbiter][3];549else550return IRQ_NONE;551552/* Retrieve all useful information and print it. */553r_clients = REG_RD(marb_foo_bp, watch->instance, r_brk_clients);554r_addr = REG_RD(marb_foo_bp, watch->instance, r_brk_addr);555r_op = REG_RD(marb_foo_bp, watch->instance, r_brk_op);556r_first = REG_RD(marb_foo_bp, watch->instance, r_brk_first_client);557r_size = REG_RD(marb_foo_bp, watch->instance, r_brk_size);558559printk(KERN_DEBUG "Arbiter IRQ\n");560printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",561REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_clients, r_clients),562REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_addr, r_addr),563REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_op, r_op),564REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_first_client, r_first),565REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_size, r_size));566567REG_WR(marb_foo_bp, watch->instance, rw_ack, ack);568REG_WR(marb_foo, regi_marb_foo, rw_ack_intr, ack_intr);569570printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs());571572if (watch->cb)573watch->cb();574575return IRQ_HANDLED;576}577578static irqreturn_t579crisv32_bar_arbiter_irq(int irq, void *dev_id)580{581reg_marb_bar_r_masked_intr masked_intr =582REG_RD(marb_bar, regi_marb_bar, r_masked_intr);583reg_marb_bar_bp_r_brk_clients r_clients;584reg_marb_bar_bp_r_brk_addr r_addr;585reg_marb_bar_bp_r_brk_op r_op;586reg_marb_bar_bp_r_brk_first_client r_first;587reg_marb_bar_bp_r_brk_size r_size;588reg_marb_bar_bp_rw_ack ack = {0};589reg_marb_bar_rw_ack_intr ack_intr = {590.bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1591};592struct crisv32_watch_entry *watch;593unsigned arbiter = (unsigned)dev_id;594595masked_intr = REG_RD(marb_bar, regi_marb_bar, r_masked_intr);596597if (masked_intr.bp0)598watch = &watches[arbiter][0];599else if (masked_intr.bp1)600watch = &watches[arbiter][1];601else if (masked_intr.bp2)602watch = &watches[arbiter][2];603else if (masked_intr.bp3)604watch = &watches[arbiter][3];605else606return IRQ_NONE;607608/* Retrieve all useful information and print it. */609r_clients = REG_RD(marb_bar_bp, watch->instance, r_brk_clients);610r_addr = REG_RD(marb_bar_bp, watch->instance, r_brk_addr);611r_op = REG_RD(marb_bar_bp, watch->instance, r_brk_op);612r_first = REG_RD(marb_bar_bp, watch->instance, r_brk_first_client);613r_size = REG_RD(marb_bar_bp, watch->instance, r_brk_size);614615printk(KERN_DEBUG "Arbiter IRQ\n");616printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",617REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_clients, r_clients),618REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_addr, r_addr),619REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_op, r_op),620REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_first_client, r_first),621REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_size, r_size));622623REG_WR(marb_bar_bp, watch->instance, rw_ack, ack);624REG_WR(marb_bar, regi_marb_bar, rw_ack_intr, ack_intr);625626printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs()->erp);627628if (watch->cb)629watch->cb();630631return IRQ_HANDLED;632}633634635636