Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/ia64/mm/tlb.c
10817 views
1
/*
2
* TLB support routines.
3
*
4
* Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5
* David Mosberger-Tang <[email protected]>
6
*
7
* 08/02/00 A. Mallick <[email protected]>
8
* Modified RID allocation for SMP
9
* Goutham Rao <[email protected]>
10
* IPI based ptc implementation and A-step IPI implementation.
11
* Rohit Seth <[email protected]>
12
* Ken Chen <[email protected]>
13
* Christophe de Dinechin <[email protected]>: Avoid ptc.e on memory allocation
14
* Copyright (C) 2007 Intel Corp
15
* Fenghua Yu <[email protected]>
16
* Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
17
*/
18
#include <linux/module.h>
19
#include <linux/init.h>
20
#include <linux/kernel.h>
21
#include <linux/sched.h>
22
#include <linux/smp.h>
23
#include <linux/mm.h>
24
#include <linux/bootmem.h>
25
#include <linux/slab.h>
26
27
#include <asm/delay.h>
28
#include <asm/mmu_context.h>
29
#include <asm/pgalloc.h>
30
#include <asm/pal.h>
31
#include <asm/tlbflush.h>
32
#include <asm/dma.h>
33
#include <asm/processor.h>
34
#include <asm/sal.h>
35
#include <asm/tlb.h>
36
37
static struct {
38
u64 mask; /* mask of supported purge page-sizes */
39
unsigned long max_bits; /* log2 of largest supported purge page-size */
40
} purge;
41
42
struct ia64_ctx ia64_ctx = {
43
.lock = __SPIN_LOCK_UNLOCKED(ia64_ctx.lock),
44
.next = 1,
45
.max_ctx = ~0U
46
};
47
48
DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
49
DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/
50
DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
51
52
struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
53
54
/*
55
* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
56
* Called after cpu_init() has setup ia64_ctx.max_ctx based on
57
* maximum RID that is supported by boot CPU.
58
*/
59
void __init
60
mmu_context_init (void)
61
{
62
ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
63
ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
64
}
65
66
/*
67
* Acquire the ia64_ctx.lock before calling this function!
68
*/
69
void
70
wrap_mmu_context (struct mm_struct *mm)
71
{
72
int i, cpu;
73
unsigned long flush_bit;
74
75
for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
76
flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
77
ia64_ctx.bitmap[i] ^= flush_bit;
78
}
79
80
/* use offset at 300 to skip daemons */
81
ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
82
ia64_ctx.max_ctx, 300);
83
ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
84
ia64_ctx.max_ctx, ia64_ctx.next);
85
86
/*
87
* can't call flush_tlb_all() here because of race condition
88
* with O(1) scheduler [EF]
89
*/
90
cpu = get_cpu(); /* prevent preemption/migration */
91
for_each_online_cpu(i)
92
if (i != cpu)
93
per_cpu(ia64_need_tlb_flush, i) = 1;
94
put_cpu();
95
local_flush_tlb_all();
96
}
97
98
/*
99
* Implement "spinaphores" ... like counting semaphores, but they
100
* spin instead of sleeping. If there are ever any other users for
101
* this primitive it can be moved up to a spinaphore.h header.
102
*/
103
struct spinaphore {
104
unsigned long ticket;
105
unsigned long serve;
106
};
107
108
static inline void spinaphore_init(struct spinaphore *ss, int val)
109
{
110
ss->ticket = 0;
111
ss->serve = val;
112
}
113
114
static inline void down_spin(struct spinaphore *ss)
115
{
116
unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve;
117
118
if (time_before(t, ss->serve))
119
return;
120
121
ia64_invala();
122
123
for (;;) {
124
asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory");
125
if (time_before(t, serve))
126
return;
127
cpu_relax();
128
}
129
}
130
131
static inline void up_spin(struct spinaphore *ss)
132
{
133
ia64_fetchadd(1, &ss->serve, rel);
134
}
135
136
static struct spinaphore ptcg_sem;
137
static u16 nptcg = 1;
138
static int need_ptcg_sem = 1;
139
static int toolatetochangeptcgsem = 0;
140
141
/*
142
* Kernel parameter "nptcg=" overrides max number of concurrent global TLB
143
* purges which is reported from either PAL or SAL PALO.
144
*
145
* We don't have sanity checking for nptcg value. It's the user's responsibility
146
* for valid nptcg value on the platform. Otherwise, kernel may hang in some
147
* cases.
148
*/
149
static int __init
150
set_nptcg(char *str)
151
{
152
int value = 0;
153
154
get_option(&str, &value);
155
setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
156
157
return 1;
158
}
159
160
__setup("nptcg=", set_nptcg);
161
162
/*
163
* Maximum number of simultaneous ptc.g purges in the system can
164
* be defined by PAL_VM_SUMMARY (in which case we should take
165
* the smallest value for any cpu in the system) or by the PAL
166
* override table (in which case we should ignore the value from
167
* PAL_VM_SUMMARY).
168
*
169
* Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
170
* purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
171
* we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
172
*
173
* Complicating the logic here is the fact that num_possible_cpus()
174
* isn't fully setup until we start bringing cpus online.
175
*/
176
void
177
setup_ptcg_sem(int max_purges, int nptcg_from)
178
{
179
static int kp_override;
180
static int palo_override;
181
static int firstcpu = 1;
182
183
if (toolatetochangeptcgsem) {
184
if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0)
185
BUG_ON(1 < nptcg);
186
else
187
BUG_ON(max_purges < nptcg);
188
return;
189
}
190
191
if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
192
kp_override = 1;
193
nptcg = max_purges;
194
goto resetsema;
195
}
196
if (kp_override) {
197
need_ptcg_sem = num_possible_cpus() > nptcg;
198
return;
199
}
200
201
if (nptcg_from == NPTCG_FROM_PALO) {
202
palo_override = 1;
203
204
/* In PALO max_purges == 0 really means it! */
205
if (max_purges == 0)
206
panic("Whoa! Platform does not support global TLB purges.\n");
207
nptcg = max_purges;
208
if (nptcg == PALO_MAX_TLB_PURGES) {
209
need_ptcg_sem = 0;
210
return;
211
}
212
goto resetsema;
213
}
214
if (palo_override) {
215
if (nptcg != PALO_MAX_TLB_PURGES)
216
need_ptcg_sem = (num_possible_cpus() > nptcg);
217
return;
218
}
219
220
/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
221
if (max_purges == 0) max_purges = 1;
222
223
if (firstcpu) {
224
nptcg = max_purges;
225
firstcpu = 0;
226
}
227
if (max_purges < nptcg)
228
nptcg = max_purges;
229
if (nptcg == PAL_MAX_PURGES) {
230
need_ptcg_sem = 0;
231
return;
232
} else
233
need_ptcg_sem = (num_possible_cpus() > nptcg);
234
235
resetsema:
236
spinaphore_init(&ptcg_sem, max_purges);
237
}
238
239
void
240
ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
241
unsigned long end, unsigned long nbits)
242
{
243
struct mm_struct *active_mm = current->active_mm;
244
245
toolatetochangeptcgsem = 1;
246
247
if (mm != active_mm) {
248
/* Restore region IDs for mm */
249
if (mm && active_mm) {
250
activate_context(mm);
251
} else {
252
flush_tlb_all();
253
return;
254
}
255
}
256
257
if (need_ptcg_sem)
258
down_spin(&ptcg_sem);
259
260
do {
261
/*
262
* Flush ALAT entries also.
263
*/
264
ia64_ptcga(start, (nbits << 2));
265
ia64_srlz_i();
266
start += (1UL << nbits);
267
} while (start < end);
268
269
if (need_ptcg_sem)
270
up_spin(&ptcg_sem);
271
272
if (mm != active_mm) {
273
activate_context(active_mm);
274
}
275
}
276
277
void
278
local_flush_tlb_all (void)
279
{
280
unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
281
282
addr = local_cpu_data->ptce_base;
283
count0 = local_cpu_data->ptce_count[0];
284
count1 = local_cpu_data->ptce_count[1];
285
stride0 = local_cpu_data->ptce_stride[0];
286
stride1 = local_cpu_data->ptce_stride[1];
287
288
local_irq_save(flags);
289
for (i = 0; i < count0; ++i) {
290
for (j = 0; j < count1; ++j) {
291
ia64_ptce(addr);
292
addr += stride1;
293
}
294
addr += stride0;
295
}
296
local_irq_restore(flags);
297
ia64_srlz_i(); /* srlz.i implies srlz.d */
298
}
299
300
void
301
flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
302
unsigned long end)
303
{
304
struct mm_struct *mm = vma->vm_mm;
305
unsigned long size = end - start;
306
unsigned long nbits;
307
308
#ifndef CONFIG_SMP
309
if (mm != current->active_mm) {
310
mm->context = 0;
311
return;
312
}
313
#endif
314
315
nbits = ia64_fls(size + 0xfff);
316
while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
317
(nbits < purge.max_bits))
318
++nbits;
319
if (nbits > purge.max_bits)
320
nbits = purge.max_bits;
321
start &= ~((1UL << nbits) - 1);
322
323
preempt_disable();
324
#ifdef CONFIG_SMP
325
if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) {
326
platform_global_tlb_purge(mm, start, end, nbits);
327
preempt_enable();
328
return;
329
}
330
#endif
331
do {
332
ia64_ptcl(start, (nbits<<2));
333
start += (1UL << nbits);
334
} while (start < end);
335
preempt_enable();
336
ia64_srlz_i(); /* srlz.i implies srlz.d */
337
}
338
EXPORT_SYMBOL(flush_tlb_range);
339
340
void __devinit
341
ia64_tlb_init (void)
342
{
343
ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
344
u64 tr_pgbits;
345
long status;
346
pal_vm_info_1_u_t vm_info_1;
347
pal_vm_info_2_u_t vm_info_2;
348
int cpu = smp_processor_id();
349
350
if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
351
printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
352
"defaulting to architected purge page-sizes.\n", status);
353
purge.mask = 0x115557000UL;
354
}
355
purge.max_bits = ia64_fls(purge.mask);
356
357
ia64_get_ptce(&ptce_info);
358
local_cpu_data->ptce_base = ptce_info.base;
359
local_cpu_data->ptce_count[0] = ptce_info.count[0];
360
local_cpu_data->ptce_count[1] = ptce_info.count[1];
361
local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
362
local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
363
364
local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
365
status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
366
367
if (status) {
368
printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
369
per_cpu(ia64_tr_num, cpu) = 8;
370
return;
371
}
372
per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
373
if (per_cpu(ia64_tr_num, cpu) >
374
(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
375
per_cpu(ia64_tr_num, cpu) =
376
vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
377
if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
378
static int justonce = 1;
379
per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
380
if (justonce) {
381
justonce = 0;
382
printk(KERN_DEBUG "TR register number exceeds "
383
"IA64_TR_ALLOC_MAX!\n");
384
}
385
}
386
}
387
388
/*
389
* is_tr_overlap
390
*
391
* Check overlap with inserted TRs.
392
*/
393
static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
394
{
395
u64 tr_log_size;
396
u64 tr_end;
397
u64 va_rr = ia64_get_rr(va);
398
u64 va_rid = RR_TO_RID(va_rr);
399
u64 va_end = va + (1<<log_size) - 1;
400
401
if (va_rid != RR_TO_RID(p->rr))
402
return 0;
403
tr_log_size = (p->itir & 0xff) >> 2;
404
tr_end = p->ifa + (1<<tr_log_size) - 1;
405
406
if (va > tr_end || p->ifa > va_end)
407
return 0;
408
return 1;
409
410
}
411
412
/*
413
* ia64_insert_tr in virtual mode. Allocate a TR slot
414
*
415
* target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
416
*
417
* va : virtual address.
418
* pte : pte entries inserted.
419
* log_size: range to be covered.
420
*
421
* Return value: <0 : error No.
422
*
423
* >=0 : slot number allocated for TR.
424
* Must be called with preemption disabled.
425
*/
426
int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
427
{
428
int i, r;
429
unsigned long psr;
430
struct ia64_tr_entry *p;
431
int cpu = smp_processor_id();
432
433
if (!ia64_idtrs[cpu]) {
434
ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX *
435
sizeof (struct ia64_tr_entry), GFP_KERNEL);
436
if (!ia64_idtrs[cpu])
437
return -ENOMEM;
438
}
439
r = -EINVAL;
440
/*Check overlap with existing TR entries*/
441
if (target_mask & 0x1) {
442
p = ia64_idtrs[cpu];
443
for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
444
i++, p++) {
445
if (p->pte & 0x1)
446
if (is_tr_overlap(p, va, log_size)) {
447
printk(KERN_DEBUG "Overlapped Entry"
448
"Inserted for TR Reigster!!\n");
449
goto out;
450
}
451
}
452
}
453
if (target_mask & 0x2) {
454
p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX;
455
for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
456
i++, p++) {
457
if (p->pte & 0x1)
458
if (is_tr_overlap(p, va, log_size)) {
459
printk(KERN_DEBUG "Overlapped Entry"
460
"Inserted for TR Reigster!!\n");
461
goto out;
462
}
463
}
464
}
465
466
for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
467
switch (target_mask & 0x3) {
468
case 1:
469
if (!((ia64_idtrs[cpu] + i)->pte & 0x1))
470
goto found;
471
continue;
472
case 2:
473
if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
474
goto found;
475
continue;
476
case 3:
477
if (!((ia64_idtrs[cpu] + i)->pte & 0x1) &&
478
!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
479
goto found;
480
continue;
481
default:
482
r = -EINVAL;
483
goto out;
484
}
485
}
486
found:
487
if (i >= per_cpu(ia64_tr_num, cpu))
488
return -EBUSY;
489
490
/*Record tr info for mca hander use!*/
491
if (i > per_cpu(ia64_tr_used, cpu))
492
per_cpu(ia64_tr_used, cpu) = i;
493
494
psr = ia64_clear_ic();
495
if (target_mask & 0x1) {
496
ia64_itr(0x1, i, va, pte, log_size);
497
ia64_srlz_i();
498
p = ia64_idtrs[cpu] + i;
499
p->ifa = va;
500
p->pte = pte;
501
p->itir = log_size << 2;
502
p->rr = ia64_get_rr(va);
503
}
504
if (target_mask & 0x2) {
505
ia64_itr(0x2, i, va, pte, log_size);
506
ia64_srlz_i();
507
p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i;
508
p->ifa = va;
509
p->pte = pte;
510
p->itir = log_size << 2;
511
p->rr = ia64_get_rr(va);
512
}
513
ia64_set_psr(psr);
514
r = i;
515
out:
516
return r;
517
}
518
EXPORT_SYMBOL_GPL(ia64_itr_entry);
519
520
/*
521
* ia64_purge_tr
522
*
523
* target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
524
* slot: slot number to be freed.
525
*
526
* Must be called with preemption disabled.
527
*/
528
void ia64_ptr_entry(u64 target_mask, int slot)
529
{
530
int cpu = smp_processor_id();
531
int i;
532
struct ia64_tr_entry *p;
533
534
if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
535
return;
536
537
if (target_mask & 0x1) {
538
p = ia64_idtrs[cpu] + slot;
539
if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
540
p->pte = 0;
541
ia64_ptr(0x1, p->ifa, p->itir>>2);
542
ia64_srlz_i();
543
}
544
}
545
546
if (target_mask & 0x2) {
547
p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot;
548
if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
549
p->pte = 0;
550
ia64_ptr(0x2, p->ifa, p->itir>>2);
551
ia64_srlz_i();
552
}
553
}
554
555
for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
556
if (((ia64_idtrs[cpu] + i)->pte & 0x1) ||
557
((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
558
break;
559
}
560
per_cpu(ia64_tr_used, cpu) = i;
561
}
562
EXPORT_SYMBOL_GPL(ia64_ptr_entry);
563
564