Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/cris/arch-v32/mach-a3/arbiter.c
15125 views
1
/*
2
* Memory arbiter functions. Allocates bandwidth through the
3
* arbiter and sets up arbiter breakpoints.
4
*
5
* The algorithm first assigns slots to the clients that has specified
6
* bandwidth (e.g. ethernet) and then the remaining slots are divided
7
* on all the active clients.
8
*
9
* Copyright (c) 2004-2007 Axis Communications AB.
10
*
11
* The artpec-3 has two arbiters. The memory hierarchy looks like this:
12
*
13
*
14
* CPU DMAs
15
* | |
16
* | |
17
* -------------- ------------------
18
* | foo arbiter|----| Internal memory|
19
* -------------- ------------------
20
* |
21
* --------------
22
* | L2 cache |
23
* --------------
24
* |
25
* h264 etc |
26
* | |
27
* | |
28
* --------------
29
* | bar arbiter|
30
* --------------
31
* |
32
* ---------
33
* | SDRAM |
34
* ---------
35
*
36
*/
37
38
#include <hwregs/reg_map.h>
39
#include <hwregs/reg_rdwr.h>
40
#include <hwregs/marb_foo_defs.h>
41
#include <hwregs/marb_bar_defs.h>
42
#include <arbiter.h>
43
#include <hwregs/intr_vect.h>
44
#include <linux/interrupt.h>
45
#include <linux/irq.h>
46
#include <linux/signal.h>
47
#include <linux/errno.h>
48
#include <linux/spinlock.h>
49
#include <asm/io.h>
50
#include <asm/irq_regs.h>
51
52
#define D(x)
53
54
struct crisv32_watch_entry {
55
unsigned long instance;
56
watch_callback *cb;
57
unsigned long start;
58
unsigned long end;
59
int used;
60
};
61
62
#define NUMBER_OF_BP 4
63
#define SDRAM_BANDWIDTH 400000000
64
#define INTMEM_BANDWIDTH 400000000
65
#define NBR_OF_SLOTS 64
66
#define NBR_OF_REGIONS 2
67
#define NBR_OF_CLIENTS 15
68
#define ARBITERS 2
69
#define UNASSIGNED 100
70
71
struct arbiter {
72
unsigned long instance;
73
int nbr_regions;
74
int nbr_clients;
75
int requested_slots[NBR_OF_REGIONS][NBR_OF_CLIENTS];
76
int active_clients[NBR_OF_REGIONS][NBR_OF_CLIENTS];
77
};
78
79
static struct crisv32_watch_entry watches[ARBITERS][NUMBER_OF_BP] =
80
{
81
{
82
{regi_marb_foo_bp0},
83
{regi_marb_foo_bp1},
84
{regi_marb_foo_bp2},
85
{regi_marb_foo_bp3}
86
},
87
{
88
{regi_marb_bar_bp0},
89
{regi_marb_bar_bp1},
90
{regi_marb_bar_bp2},
91
{regi_marb_bar_bp3}
92
}
93
};
94
95
struct arbiter arbiters[ARBITERS] =
96
{
97
{ /* L2 cache arbiter */
98
.instance = regi_marb_foo,
99
.nbr_regions = 2,
100
.nbr_clients = 15
101
},
102
{ /* DDR2 arbiter */
103
.instance = regi_marb_bar,
104
.nbr_regions = 1,
105
.nbr_clients = 9
106
}
107
};
108
109
static int max_bandwidth[NBR_OF_REGIONS] = {SDRAM_BANDWIDTH, INTMEM_BANDWIDTH};
110
111
DEFINE_SPINLOCK(arbiter_lock);
112
113
static irqreturn_t
114
crisv32_foo_arbiter_irq(int irq, void *dev_id);
115
static irqreturn_t
116
crisv32_bar_arbiter_irq(int irq, void *dev_id);
117
118
/*
119
* "I'm the arbiter, I know the score.
120
* From square one I'll be watching all 64."
121
* (memory arbiter slots, that is)
122
*
123
* Or in other words:
124
* Program the memory arbiter slots for "region" according to what's
125
* in requested_slots[] and active_clients[], while minimizing
126
* latency. A caller may pass a non-zero positive amount for
127
* "unused_slots", which must then be the unallocated, remaining
128
* number of slots, free to hand out to any client.
129
*/
130
131
static void crisv32_arbiter_config(int arbiter, int region, int unused_slots)
132
{
133
int slot;
134
int client;
135
int interval = 0;
136
137
/*
138
* This vector corresponds to the hardware arbiter slots (see
139
* the hardware documentation for semantics). We initialize
140
* each slot with a suitable sentinel value outside the valid
141
* range {0 .. NBR_OF_CLIENTS - 1} and replace them with
142
* client indexes. Then it's fed to the hardware.
143
*/
144
s8 val[NBR_OF_SLOTS];
145
146
for (slot = 0; slot < NBR_OF_SLOTS; slot++)
147
val[slot] = -1;
148
149
for (client = 0; client < arbiters[arbiter].nbr_clients; client++) {
150
int pos;
151
/* Allocate the requested non-zero number of slots, but
152
* also give clients with zero-requests one slot each
153
* while stocks last. We do the latter here, in client
154
* order. This makes sure zero-request clients are the
155
* first to get to any spare slots, else those slots
156
* could, when bandwidth is allocated close to the limit,
157
* all be allocated to low-index non-zero-request clients
158
* in the default-fill loop below. Another positive but
159
* secondary effect is a somewhat better spread of the
160
* zero-bandwidth clients in the vector, avoiding some of
161
* the latency that could otherwise be caused by the
162
* partitioning of non-zero-bandwidth clients at low
163
* indexes and zero-bandwidth clients at high
164
* indexes. (Note that this spreading can only affect the
165
* unallocated bandwidth.) All the above only matters for
166
* memory-intensive situations, of course.
167
*/
168
if (!arbiters[arbiter].requested_slots[region][client]) {
169
/*
170
* Skip inactive clients. Also skip zero-slot
171
* allocations in this pass when there are no known
172
* free slots.
173
*/
174
if (!arbiters[arbiter].active_clients[region][client] ||
175
unused_slots <= 0)
176
continue;
177
178
unused_slots--;
179
180
/* Only allocate one slot for this client. */
181
interval = NBR_OF_SLOTS;
182
} else
183
interval = NBR_OF_SLOTS /
184
arbiters[arbiter].requested_slots[region][client];
185
186
pos = 0;
187
while (pos < NBR_OF_SLOTS) {
188
if (val[pos] >= 0)
189
pos++;
190
else {
191
val[pos] = client;
192
pos += interval;
193
}
194
}
195
}
196
197
client = 0;
198
for (slot = 0; slot < NBR_OF_SLOTS; slot++) {
199
/*
200
* Allocate remaining slots in round-robin
201
* client-number order for active clients. For this
202
* pass, we ignore requested bandwidth and previous
203
* allocations.
204
*/
205
if (val[slot] < 0) {
206
int first = client;
207
while (!arbiters[arbiter].active_clients[region][client]) {
208
client = (client + 1) %
209
arbiters[arbiter].nbr_clients;
210
if (client == first)
211
break;
212
}
213
val[slot] = client;
214
client = (client + 1) % arbiters[arbiter].nbr_clients;
215
}
216
if (arbiter == 0) {
217
if (region == EXT_REGION)
218
REG_WR_INT_VECT(marb_foo, regi_marb_foo,
219
rw_l2_slots, slot, val[slot]);
220
else if (region == INT_REGION)
221
REG_WR_INT_VECT(marb_foo, regi_marb_foo,
222
rw_intm_slots, slot, val[slot]);
223
} else {
224
REG_WR_INT_VECT(marb_bar, regi_marb_bar,
225
rw_ddr2_slots, slot, val[slot]);
226
}
227
}
228
}
229
230
extern char _stext, _etext;
231
232
static void crisv32_arbiter_init(void)
233
{
234
static int initialized;
235
236
if (initialized)
237
return;
238
239
initialized = 1;
240
241
/*
242
* CPU caches are always set to active, but with zero
243
* bandwidth allocated. It should be ok to allocate zero
244
* bandwidth for the caches, because DMA for other channels
245
* will supposedly finish, once their programmed amount is
246
* done, and then the caches will get access according to the
247
* "fixed scheme" for unclaimed slots. Though, if for some
248
* use-case somewhere, there's a maximum CPU latency for
249
* e.g. some interrupt, we have to start allocating specific
250
* bandwidth for the CPU caches too.
251
*/
252
arbiters[0].active_clients[EXT_REGION][11] = 1;
253
arbiters[0].active_clients[EXT_REGION][12] = 1;
254
crisv32_arbiter_config(0, EXT_REGION, 0);
255
crisv32_arbiter_config(0, INT_REGION, 0);
256
crisv32_arbiter_config(1, EXT_REGION, 0);
257
258
if (request_irq(MEMARB_FOO_INTR_VECT, crisv32_foo_arbiter_irq,
259
IRQF_DISABLED, "arbiter", NULL))
260
printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
261
262
if (request_irq(MEMARB_BAR_INTR_VECT, crisv32_bar_arbiter_irq,
263
IRQF_DISABLED, "arbiter", NULL))
264
printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
265
266
#ifndef CONFIG_ETRAX_KGDB
267
/* Global watch for writes to kernel text segment. */
268
crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
269
MARB_CLIENTS(arbiter_all_clients, arbiter_bar_all_clients),
270
arbiter_all_write, NULL);
271
#endif
272
273
/* Set up max burst sizes by default */
274
REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_rd_burst, 3);
275
REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_wr_burst, 3);
276
REG_WR_INT(marb_bar, regi_marb_bar, rw_ccd_burst, 3);
277
REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_wr_burst, 3);
278
REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_rd_burst, 3);
279
REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_rd_burst, 3);
280
REG_WR_INT(marb_bar, regi_marb_bar, rw_vout_burst, 3);
281
REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_fifo_burst, 3);
282
REG_WR_INT(marb_bar, regi_marb_bar, rw_l2cache_burst, 3);
283
}
284
285
int crisv32_arbiter_allocate_bandwidth(int client, int region,
286
unsigned long bandwidth)
287
{
288
int i;
289
int total_assigned = 0;
290
int total_clients = 0;
291
int req;
292
int arbiter = 0;
293
294
crisv32_arbiter_init();
295
296
if (client & 0xffff0000) {
297
arbiter = 1;
298
client >>= 16;
299
}
300
301
for (i = 0; i < arbiters[arbiter].nbr_clients; i++) {
302
total_assigned += arbiters[arbiter].requested_slots[region][i];
303
total_clients += arbiters[arbiter].active_clients[region][i];
304
}
305
306
/* Avoid division by 0 for 0-bandwidth requests. */
307
req = bandwidth == 0
308
? 0 : NBR_OF_SLOTS / (max_bandwidth[region] / bandwidth);
309
310
/*
311
* We make sure that there are enough slots only for non-zero
312
* requests. Requesting 0 bandwidth *may* allocate slots,
313
* though if all bandwidth is allocated, such a client won't
314
* get any and will have to rely on getting memory access
315
* according to the fixed scheme that's the default when one
316
* of the slot-allocated clients doesn't claim their slot.
317
*/
318
if (total_assigned + req > NBR_OF_SLOTS)
319
return -ENOMEM;
320
321
arbiters[arbiter].active_clients[region][client] = 1;
322
arbiters[arbiter].requested_slots[region][client] = req;
323
crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);
324
325
/* Propagate allocation from foo to bar */
326
if (arbiter == 0)
327
crisv32_arbiter_allocate_bandwidth(8 << 16,
328
EXT_REGION, bandwidth);
329
return 0;
330
}
331
332
/*
333
* Main entry for bandwidth deallocation.
334
*
335
* Strictly speaking, for a somewhat constant set of clients where
336
* each client gets a constant bandwidth and is just enabled or
337
* disabled (somewhat dynamically), no action is necessary here to
338
* avoid starvation for non-zero-allocation clients, as the allocated
339
* slots will just be unused. However, handing out those unused slots
340
* to active clients avoids needless latency if the "fixed scheme"
341
* would give unclaimed slots to an eager low-index client.
342
*/
343
344
void crisv32_arbiter_deallocate_bandwidth(int client, int region)
345
{
346
int i;
347
int total_assigned = 0;
348
int arbiter = 0;
349
350
if (client & 0xffff0000)
351
arbiter = 1;
352
353
arbiters[arbiter].requested_slots[region][client] = 0;
354
arbiters[arbiter].active_clients[region][client] = 0;
355
356
for (i = 0; i < arbiters[arbiter].nbr_clients; i++)
357
total_assigned += arbiters[arbiter].requested_slots[region][i];
358
359
crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);
360
}
361
362
int crisv32_arbiter_watch(unsigned long start, unsigned long size,
363
unsigned long clients, unsigned long accesses,
364
watch_callback *cb)
365
{
366
int i;
367
int arbiter;
368
int used[2];
369
int ret = 0;
370
371
crisv32_arbiter_init();
372
373
if (start > 0x80000000) {
374
printk(KERN_ERR "Arbiter: %lX doesn't look like a "
375
"physical address", start);
376
return -EFAULT;
377
}
378
379
spin_lock(&arbiter_lock);
380
381
if (clients & 0xffff)
382
used[0] = 1;
383
if (clients & 0xffff0000)
384
used[1] = 1;
385
386
for (arbiter = 0; arbiter < ARBITERS; arbiter++) {
387
if (!used[arbiter])
388
continue;
389
390
for (i = 0; i < NUMBER_OF_BP; i++) {
391
if (!watches[arbiter][i].used) {
392
unsigned intr_mask;
393
if (arbiter)
394
intr_mask = REG_RD_INT(marb_bar,
395
regi_marb_bar, rw_intr_mask);
396
else
397
intr_mask = REG_RD_INT(marb_foo,
398
regi_marb_foo, rw_intr_mask);
399
400
watches[arbiter][i].used = 1;
401
watches[arbiter][i].start = start;
402
watches[arbiter][i].end = start + size;
403
watches[arbiter][i].cb = cb;
404
405
ret |= (i + 1) << (arbiter + 8);
406
if (arbiter) {
407
REG_WR_INT(marb_bar_bp,
408
watches[arbiter][i].instance,
409
rw_first_addr,
410
watches[arbiter][i].start);
411
REG_WR_INT(marb_bar_bp,
412
watches[arbiter][i].instance,
413
rw_last_addr,
414
watches[arbiter][i].end);
415
REG_WR_INT(marb_bar_bp,
416
watches[arbiter][i].instance,
417
rw_op, accesses);
418
REG_WR_INT(marb_bar_bp,
419
watches[arbiter][i].instance,
420
rw_clients,
421
clients & 0xffff);
422
} else {
423
REG_WR_INT(marb_foo_bp,
424
watches[arbiter][i].instance,
425
rw_first_addr,
426
watches[arbiter][i].start);
427
REG_WR_INT(marb_foo_bp,
428
watches[arbiter][i].instance,
429
rw_last_addr,
430
watches[arbiter][i].end);
431
REG_WR_INT(marb_foo_bp,
432
watches[arbiter][i].instance,
433
rw_op, accesses);
434
REG_WR_INT(marb_foo_bp,
435
watches[arbiter][i].instance,
436
rw_clients, clients >> 16);
437
}
438
439
if (i == 0)
440
intr_mask |= 1;
441
else if (i == 1)
442
intr_mask |= 2;
443
else if (i == 2)
444
intr_mask |= 4;
445
else if (i == 3)
446
intr_mask |= 8;
447
448
if (arbiter)
449
REG_WR_INT(marb_bar, regi_marb_bar,
450
rw_intr_mask, intr_mask);
451
else
452
REG_WR_INT(marb_foo, regi_marb_foo,
453
rw_intr_mask, intr_mask);
454
455
spin_unlock(&arbiter_lock);
456
457
break;
458
}
459
}
460
}
461
spin_unlock(&arbiter_lock);
462
if (ret)
463
return ret;
464
else
465
return -ENOMEM;
466
}
467
468
int crisv32_arbiter_unwatch(int id)
469
{
470
int arbiter;
471
int intr_mask;
472
473
crisv32_arbiter_init();
474
475
spin_lock(&arbiter_lock);
476
477
for (arbiter = 0; arbiter < ARBITERS; arbiter++) {
478
int id2;
479
480
if (arbiter)
481
intr_mask = REG_RD_INT(marb_bar, regi_marb_bar,
482
rw_intr_mask);
483
else
484
intr_mask = REG_RD_INT(marb_foo, regi_marb_foo,
485
rw_intr_mask);
486
487
id2 = (id & (0xff << (arbiter + 8))) >> (arbiter + 8);
488
if (id2 == 0)
489
continue;
490
id2--;
491
if ((id2 >= NUMBER_OF_BP) || (!watches[arbiter][id2].used)) {
492
spin_unlock(&arbiter_lock);
493
return -EINVAL;
494
}
495
496
memset(&watches[arbiter][id2], 0,
497
sizeof(struct crisv32_watch_entry));
498
499
if (id2 == 0)
500
intr_mask &= ~1;
501
else if (id2 == 1)
502
intr_mask &= ~2;
503
else if (id2 == 2)
504
intr_mask &= ~4;
505
else if (id2 == 3)
506
intr_mask &= ~8;
507
508
if (arbiter)
509
REG_WR_INT(marb_bar, regi_marb_bar, rw_intr_mask,
510
intr_mask);
511
else
512
REG_WR_INT(marb_foo, regi_marb_foo, rw_intr_mask,
513
intr_mask);
514
}
515
516
spin_unlock(&arbiter_lock);
517
return 0;
518
}
519
520
extern void show_registers(struct pt_regs *regs);
521
522
523
static irqreturn_t
524
crisv32_foo_arbiter_irq(int irq, void *dev_id)
525
{
526
reg_marb_foo_r_masked_intr masked_intr =
527
REG_RD(marb_foo, regi_marb_foo, r_masked_intr);
528
reg_marb_foo_bp_r_brk_clients r_clients;
529
reg_marb_foo_bp_r_brk_addr r_addr;
530
reg_marb_foo_bp_r_brk_op r_op;
531
reg_marb_foo_bp_r_brk_first_client r_first;
532
reg_marb_foo_bp_r_brk_size r_size;
533
reg_marb_foo_bp_rw_ack ack = {0};
534
reg_marb_foo_rw_ack_intr ack_intr = {
535
.bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
536
};
537
struct crisv32_watch_entry *watch;
538
unsigned arbiter = (unsigned)dev_id;
539
540
masked_intr = REG_RD(marb_foo, regi_marb_foo, r_masked_intr);
541
542
if (masked_intr.bp0)
543
watch = &watches[arbiter][0];
544
else if (masked_intr.bp1)
545
watch = &watches[arbiter][1];
546
else if (masked_intr.bp2)
547
watch = &watches[arbiter][2];
548
else if (masked_intr.bp3)
549
watch = &watches[arbiter][3];
550
else
551
return IRQ_NONE;
552
553
/* Retrieve all useful information and print it. */
554
r_clients = REG_RD(marb_foo_bp, watch->instance, r_brk_clients);
555
r_addr = REG_RD(marb_foo_bp, watch->instance, r_brk_addr);
556
r_op = REG_RD(marb_foo_bp, watch->instance, r_brk_op);
557
r_first = REG_RD(marb_foo_bp, watch->instance, r_brk_first_client);
558
r_size = REG_RD(marb_foo_bp, watch->instance, r_brk_size);
559
560
printk(KERN_DEBUG "Arbiter IRQ\n");
561
printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",
562
REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_clients, r_clients),
563
REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_addr, r_addr),
564
REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_op, r_op),
565
REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_first_client, r_first),
566
REG_TYPE_CONV(int, reg_marb_foo_bp_r_brk_size, r_size));
567
568
REG_WR(marb_foo_bp, watch->instance, rw_ack, ack);
569
REG_WR(marb_foo, regi_marb_foo, rw_ack_intr, ack_intr);
570
571
printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs());
572
573
if (watch->cb)
574
watch->cb();
575
576
return IRQ_HANDLED;
577
}
578
579
static irqreturn_t
580
crisv32_bar_arbiter_irq(int irq, void *dev_id)
581
{
582
reg_marb_bar_r_masked_intr masked_intr =
583
REG_RD(marb_bar, regi_marb_bar, r_masked_intr);
584
reg_marb_bar_bp_r_brk_clients r_clients;
585
reg_marb_bar_bp_r_brk_addr r_addr;
586
reg_marb_bar_bp_r_brk_op r_op;
587
reg_marb_bar_bp_r_brk_first_client r_first;
588
reg_marb_bar_bp_r_brk_size r_size;
589
reg_marb_bar_bp_rw_ack ack = {0};
590
reg_marb_bar_rw_ack_intr ack_intr = {
591
.bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
592
};
593
struct crisv32_watch_entry *watch;
594
unsigned arbiter = (unsigned)dev_id;
595
596
masked_intr = REG_RD(marb_bar, regi_marb_bar, r_masked_intr);
597
598
if (masked_intr.bp0)
599
watch = &watches[arbiter][0];
600
else if (masked_intr.bp1)
601
watch = &watches[arbiter][1];
602
else if (masked_intr.bp2)
603
watch = &watches[arbiter][2];
604
else if (masked_intr.bp3)
605
watch = &watches[arbiter][3];
606
else
607
return IRQ_NONE;
608
609
/* Retrieve all useful information and print it. */
610
r_clients = REG_RD(marb_bar_bp, watch->instance, r_brk_clients);
611
r_addr = REG_RD(marb_bar_bp, watch->instance, r_brk_addr);
612
r_op = REG_RD(marb_bar_bp, watch->instance, r_brk_op);
613
r_first = REG_RD(marb_bar_bp, watch->instance, r_brk_first_client);
614
r_size = REG_RD(marb_bar_bp, watch->instance, r_brk_size);
615
616
printk(KERN_DEBUG "Arbiter IRQ\n");
617
printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",
618
REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_clients, r_clients),
619
REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_addr, r_addr),
620
REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_op, r_op),
621
REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_first_client, r_first),
622
REG_TYPE_CONV(int, reg_marb_bar_bp_r_brk_size, r_size));
623
624
REG_WR(marb_bar_bp, watch->instance, rw_ack, ack);
625
REG_WR(marb_bar, regi_marb_bar, rw_ack_intr, ack_intr);
626
627
printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs()->erp);
628
629
if (watch->cb)
630
watch->cb();
631
632
return IRQ_HANDLED;
633
}
634
635
636