CoCalc -- tlb.c

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/ia64/mm/tlb.c
¹⁰⁸¹⁷ views
1
/*
2
 * TLB support routines.
3
 *
4
 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5
 *	David Mosberger-Tang <[email protected]>
6
 *
7
 * 08/02/00 A. Mallick <[email protected]>
8
 *		Modified RID allocation for SMP
9
 *          Goutham Rao <[email protected]>
10
 *              IPI based ptc implementation and A-step IPI implementation.
11
 * Rohit Seth <[email protected]>
12
 * Ken Chen <[email protected]>
13
 * Christophe de Dinechin <[email protected]>: Avoid ptc.e on memory allocation
14
 * Copyright (C) 2007 Intel Corp
15
 *	Fenghua Yu <[email protected]>
16
 *	Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
17
 */
18
#include <linux/module.h>
19
#include <linux/init.h>
20
#include <linux/kernel.h>
21
#include <linux/sched.h>
22
#include <linux/smp.h>
23
#include <linux/mm.h>
24
#include <linux/bootmem.h>
25
#include <linux/slab.h>
26

27
#include <asm/delay.h>
28
#include <asm/mmu_context.h>
29
#include <asm/pgalloc.h>
30
#include <asm/pal.h>
31
#include <asm/tlbflush.h>
32
#include <asm/dma.h>
33
#include <asm/processor.h>
34
#include <asm/sal.h>
35
#include <asm/tlb.h>
36

37
static struct {
38
	u64 mask;		/* mask of supported purge page-sizes */
39
	unsigned long max_bits;	/* log2 of largest supported purge page-size */
40
} purge;
41

42
struct ia64_ctx ia64_ctx = {
43
	.lock =	__SPIN_LOCK_UNLOCKED(ia64_ctx.lock),
44
	.next =	1,
45
	.max_ctx = ~0U
46
};
47

48
DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
49
DEFINE_PER_CPU(u8, ia64_tr_num);  /*Number of TR slots in current processor*/
50
DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
51

52
struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
53

54
/*
55
 * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
56
 * Called after cpu_init() has setup ia64_ctx.max_ctx based on
57
 * maximum RID that is supported by boot CPU.
58
 */
59
void __init
60
mmu_context_init (void)
61
{
62
	ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
63
	ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
64
}
65

66
/*
67
 * Acquire the ia64_ctx.lock before calling this function!
68
 */
69
void
70
wrap_mmu_context (struct mm_struct *mm)
71
{
72
	int i, cpu;
73
	unsigned long flush_bit;
74

75
	for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
76
		flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
77
		ia64_ctx.bitmap[i] ^= flush_bit;
78
	}
79
 
80
	/* use offset at 300 to skip daemons */
81
	ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
82
				ia64_ctx.max_ctx, 300);
83
	ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
84
				ia64_ctx.max_ctx, ia64_ctx.next);
85

86
	/*
87
	 * can't call flush_tlb_all() here because of race condition
88
	 * with O(1) scheduler [EF]
89
	 */
90
	cpu = get_cpu(); /* prevent preemption/migration */
91
	for_each_online_cpu(i)
92
		if (i != cpu)
93
			per_cpu(ia64_need_tlb_flush, i) = 1;
94
	put_cpu();
95
	local_flush_tlb_all();
96
}
97

98
/*
99
 * Implement "spinaphores" ... like counting semaphores, but they
100
 * spin instead of sleeping.  If there are ever any other users for
101
 * this primitive it can be moved up to a spinaphore.h header.
102
 */
103
struct spinaphore {
104
	unsigned long	ticket;
105
	unsigned long	serve;
106
};
107

108
static inline void spinaphore_init(struct spinaphore *ss, int val)
109
{
110
	ss->ticket = 0;
111
	ss->serve = val;
112
}
113

114
static inline void down_spin(struct spinaphore *ss)
115
{
116
	unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve;
117

118
	if (time_before(t, ss->serve))
119
		return;
120

121
	ia64_invala();
122

123
	for (;;) {
124
		asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory");
125
		if (time_before(t, serve))
126
			return;
127
		cpu_relax();
128
	}
129
}
130

131
static inline void up_spin(struct spinaphore *ss)
132
{
133
	ia64_fetchadd(1, &ss->serve, rel);
134
}
135

136
static struct spinaphore ptcg_sem;
137
static u16 nptcg = 1;
138
static int need_ptcg_sem = 1;
139
static int toolatetochangeptcgsem = 0;
140

141
/*
142
 * Kernel parameter "nptcg=" overrides max number of concurrent global TLB
143
 * purges which is reported from either PAL or SAL PALO.
144
 *
145
 * We don't have sanity checking for nptcg value. It's the user's responsibility
146
 * for valid nptcg value on the platform. Otherwise, kernel may hang in some
147
 * cases.
148
 */
149
static int __init
150
set_nptcg(char *str)
151
{
152
	int value = 0;
153

154
	get_option(&str, &value);
155
	setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
156

157
	return 1;
158
}
159

160
__setup("nptcg=", set_nptcg);
161

162
/*
163
 * Maximum number of simultaneous ptc.g purges in the system can
164
 * be defined by PAL_VM_SUMMARY (in which case we should take
165
 * the smallest value for any cpu in the system) or by the PAL
166
 * override table (in which case we should ignore the value from
167
 * PAL_VM_SUMMARY).
168
 *
169
 * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
170
 * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
171
 * we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
172
 *
173
 * Complicating the logic here is the fact that num_possible_cpus()
174
 * isn't fully setup until we start bringing cpus online.
175
 */
176
void
177
setup_ptcg_sem(int max_purges, int nptcg_from)
178
{
179
	static int kp_override;
180
	static int palo_override;
181
	static int firstcpu = 1;
182

183
	if (toolatetochangeptcgsem) {
184
		if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0)
185
			BUG_ON(1 < nptcg);
186
		else
187
			BUG_ON(max_purges < nptcg);
188
		return;
189
	}
190

191
	if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
192
		kp_override = 1;
193
		nptcg = max_purges;
194
		goto resetsema;
195
	}
196
	if (kp_override) {
197
		need_ptcg_sem = num_possible_cpus() > nptcg;
198
		return;
199
	}
200

201
	if (nptcg_from == NPTCG_FROM_PALO) {
202
		palo_override = 1;
203

204
		/* In PALO max_purges == 0 really means it! */
205
		if (max_purges == 0)
206
			panic("Whoa! Platform does not support global TLB purges.\n");
207
		nptcg = max_purges;
208
		if (nptcg == PALO_MAX_TLB_PURGES) {
209
			need_ptcg_sem = 0;
210
			return;
211
		}
212
		goto resetsema;
213
	}
214
	if (palo_override) {
215
		if (nptcg != PALO_MAX_TLB_PURGES)
216
			need_ptcg_sem = (num_possible_cpus() > nptcg);
217
		return;
218
	}
219

220
	/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
221
	if (max_purges == 0) max_purges = 1;
222

223
	if (firstcpu) {
224
		nptcg = max_purges;
225
		firstcpu = 0;
226
	}
227
	if (max_purges < nptcg)
228
		nptcg = max_purges;
229
	if (nptcg == PAL_MAX_PURGES) {
230
		need_ptcg_sem = 0;
231
		return;
232
	} else
233
		need_ptcg_sem = (num_possible_cpus() > nptcg);
234

235
resetsema:
236
	spinaphore_init(&ptcg_sem, max_purges);
237
}
238

239
void
240
ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
241
		       unsigned long end, unsigned long nbits)
242
{
243
	struct mm_struct *active_mm = current->active_mm;
244

245
	toolatetochangeptcgsem = 1;
246

247
	if (mm != active_mm) {
248
		/* Restore region IDs for mm */
249
		if (mm && active_mm) {
250
			activate_context(mm);
251
		} else {
252
			flush_tlb_all();
253
			return;
254
		}
255
	}
256

257
	if (need_ptcg_sem)
258
		down_spin(&ptcg_sem);
259

260
	do {
261
		/*
262
		 * Flush ALAT entries also.
263
		 */
264
		ia64_ptcga(start, (nbits << 2));
265
		ia64_srlz_i();
266
		start += (1UL << nbits);
267
	} while (start < end);
268

269
	if (need_ptcg_sem)
270
		up_spin(&ptcg_sem);
271

272
        if (mm != active_mm) {
273
                activate_context(active_mm);
274
        }
275
}
276

277
void
278
local_flush_tlb_all (void)
279
{
280
	unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
281

282
	addr    = local_cpu_data->ptce_base;
283
	count0  = local_cpu_data->ptce_count[0];
284
	count1  = local_cpu_data->ptce_count[1];
285
	stride0 = local_cpu_data->ptce_stride[0];
286
	stride1 = local_cpu_data->ptce_stride[1];
287

288
	local_irq_save(flags);
289
	for (i = 0; i < count0; ++i) {
290
		for (j = 0; j < count1; ++j) {
291
			ia64_ptce(addr);
292
			addr += stride1;
293
		}
294
		addr += stride0;
295
	}
296
	local_irq_restore(flags);
297
	ia64_srlz_i();			/* srlz.i implies srlz.d */
298
}
299

300
void
301
flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
302
		 unsigned long end)
303
{
304
	struct mm_struct *mm = vma->vm_mm;
305
	unsigned long size = end - start;
306
	unsigned long nbits;
307

308
#ifndef CONFIG_SMP
309
	if (mm != current->active_mm) {
310
		mm->context = 0;
311
		return;
312
	}
313
#endif
314

315
	nbits = ia64_fls(size + 0xfff);
316
	while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
317
			(nbits < purge.max_bits))
318
		++nbits;
319
	if (nbits > purge.max_bits)
320
		nbits = purge.max_bits;
321
	start &= ~((1UL << nbits) - 1);
322

323
	preempt_disable();
324
#ifdef CONFIG_SMP
325
	if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) {
326
		platform_global_tlb_purge(mm, start, end, nbits);
327
		preempt_enable();
328
		return;
329
	}
330
#endif
331
	do {
332
		ia64_ptcl(start, (nbits<<2));
333
		start += (1UL << nbits);
334
	} while (start < end);
335
	preempt_enable();
336
	ia64_srlz_i();			/* srlz.i implies srlz.d */
337
}
338
EXPORT_SYMBOL(flush_tlb_range);
339

340
void __devinit
341
ia64_tlb_init (void)
342
{
343
	ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
344
	u64 tr_pgbits;
345
	long status;
346
	pal_vm_info_1_u_t vm_info_1;
347
	pal_vm_info_2_u_t vm_info_2;
348
	int cpu = smp_processor_id();
349

350
	if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
351
		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
352
		       "defaulting to architected purge page-sizes.\n", status);
353
		purge.mask = 0x115557000UL;
354
	}
355
	purge.max_bits = ia64_fls(purge.mask);
356

357
	ia64_get_ptce(&ptce_info);
358
	local_cpu_data->ptce_base = ptce_info.base;
359
	local_cpu_data->ptce_count[0] = ptce_info.count[0];
360
	local_cpu_data->ptce_count[1] = ptce_info.count[1];
361
	local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
362
	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
363

364
	local_flush_tlb_all();	/* nuke left overs from bootstrapping... */
365
	status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
366

367
	if (status) {
368
		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
369
		per_cpu(ia64_tr_num, cpu) = 8;
370
		return;
371
	}
372
	per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
373
	if (per_cpu(ia64_tr_num, cpu) >
374
				(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
375
		per_cpu(ia64_tr_num, cpu) =
376
				vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
377
	if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
378
		static int justonce = 1;
379
		per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
380
		if (justonce) {
381
			justonce = 0;
382
			printk(KERN_DEBUG "TR register number exceeds "
383
			       "IA64_TR_ALLOC_MAX!\n");
384
		}
385
	}
386
}
387

388
/*
389
 * is_tr_overlap
390
 *
391
 * Check overlap with inserted TRs.
392
 */
393
static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
394
{
395
	u64 tr_log_size;
396
	u64 tr_end;
397
	u64 va_rr = ia64_get_rr(va);
398
	u64 va_rid = RR_TO_RID(va_rr);
399
	u64 va_end = va + (1<<log_size) - 1;
400

401
	if (va_rid != RR_TO_RID(p->rr))
402
		return 0;
403
	tr_log_size = (p->itir & 0xff) >> 2;
404
	tr_end = p->ifa + (1<<tr_log_size) - 1;
405

406
	if (va > tr_end || p->ifa > va_end)
407
		return 0;
408
	return 1;
409

410
}
411

412
/*
413
 * ia64_insert_tr in virtual mode. Allocate a TR slot
414
 *
415
 * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
416
 *
417
 * va 	: virtual address.
418
 * pte 	: pte entries inserted.
419
 * log_size: range to be covered.
420
 *
421
 * Return value:  <0 :  error No.
422
 *
423
 *		  >=0 : slot number allocated for TR.
424
 * Must be called with preemption disabled.
425
 */
426
int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
427
{
428
	int i, r;
429
	unsigned long psr;
430
	struct ia64_tr_entry *p;
431
	int cpu = smp_processor_id();
432

433
	if (!ia64_idtrs[cpu]) {
434
		ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX *
435
				sizeof (struct ia64_tr_entry), GFP_KERNEL);
436
		if (!ia64_idtrs[cpu])
437
			return -ENOMEM;
438
	}
439
	r = -EINVAL;
440
	/*Check overlap with existing TR entries*/
441
	if (target_mask & 0x1) {
442
		p = ia64_idtrs[cpu];
443
		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
444
								i++, p++) {
445
			if (p->pte & 0x1)
446
				if (is_tr_overlap(p, va, log_size)) {
447
					printk(KERN_DEBUG "Overlapped Entry"
448
						"Inserted for TR Reigster!!\n");
449
					goto out;
450
			}
451
		}
452
	}
453
	if (target_mask & 0x2) {
454
		p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX;
455
		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
456
								i++, p++) {
457
			if (p->pte & 0x1)
458
				if (is_tr_overlap(p, va, log_size)) {
459
					printk(KERN_DEBUG "Overlapped Entry"
460
						"Inserted for TR Reigster!!\n");
461
					goto out;
462
				}
463
		}
464
	}
465

466
	for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
467
		switch (target_mask & 0x3) {
468
		case 1:
469
			if (!((ia64_idtrs[cpu] + i)->pte & 0x1))
470
				goto found;
471
			continue;
472
		case 2:
473
			if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
474
				goto found;
475
			continue;
476
		case 3:
477
			if (!((ia64_idtrs[cpu] + i)->pte & 0x1) &&
478
			    !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
479
				goto found;
480
			continue;
481
		default:
482
			r = -EINVAL;
483
			goto out;
484
		}
485
	}
486
found:
487
	if (i >= per_cpu(ia64_tr_num, cpu))
488
		return -EBUSY;
489

490
	/*Record tr info for mca hander use!*/
491
	if (i > per_cpu(ia64_tr_used, cpu))
492
		per_cpu(ia64_tr_used, cpu) = i;
493

494
	psr = ia64_clear_ic();
495
	if (target_mask & 0x1) {
496
		ia64_itr(0x1, i, va, pte, log_size);
497
		ia64_srlz_i();
498
		p = ia64_idtrs[cpu] + i;
499
		p->ifa = va;
500
		p->pte = pte;
501
		p->itir = log_size << 2;
502
		p->rr = ia64_get_rr(va);
503
	}
504
	if (target_mask & 0x2) {
505
		ia64_itr(0x2, i, va, pte, log_size);
506
		ia64_srlz_i();
507
		p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i;
508
		p->ifa = va;
509
		p->pte = pte;
510
		p->itir = log_size << 2;
511
		p->rr = ia64_get_rr(va);
512
	}
513
	ia64_set_psr(psr);
514
	r = i;
515
out:
516
	return r;
517
}
518
EXPORT_SYMBOL_GPL(ia64_itr_entry);
519

520
/*
521
 * ia64_purge_tr
522
 *
523
 * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
524
 * slot: slot number to be freed.
525
 *
526
 * Must be called with preemption disabled.
527
 */
528
void ia64_ptr_entry(u64 target_mask, int slot)
529
{
530
	int cpu = smp_processor_id();
531
	int i;
532
	struct ia64_tr_entry *p;
533

534
	if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
535
		return;
536

537
	if (target_mask & 0x1) {
538
		p = ia64_idtrs[cpu] + slot;
539
		if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
540
			p->pte = 0;
541
			ia64_ptr(0x1, p->ifa, p->itir>>2);
542
			ia64_srlz_i();
543
		}
544
	}
545

546
	if (target_mask & 0x2) {
547
		p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot;
548
		if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
549
			p->pte = 0;
550
			ia64_ptr(0x2, p->ifa, p->itir>>2);
551
			ia64_srlz_i();
552
		}
553
	}
554

555
	for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
556
		if (((ia64_idtrs[cpu] + i)->pte & 0x1) ||
557
		    ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
558
			break;
559
	}
560
	per_cpu(ia64_tr_used, cpu) = i;
561
}
562
EXPORT_SYMBOL_GPL(ia64_ptr_entry);
563

564
Product

Resources

Company