Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/sparc/mm/tsb.c
10817 views
1
/* arch/sparc64/mm/tsb.c
2
*
3
* Copyright (C) 2006, 2008 David S. Miller <[email protected]>
4
*/
5
6
#include <linux/kernel.h>
7
#include <linux/preempt.h>
8
#include <linux/slab.h>
9
#include <asm/system.h>
10
#include <asm/page.h>
11
#include <asm/tlbflush.h>
12
#include <asm/tlb.h>
13
#include <asm/mmu_context.h>
14
#include <asm/pgtable.h>
15
#include <asm/tsb.h>
16
#include <asm/oplib.h>
17
18
extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
19
20
static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
21
{
22
vaddr >>= hash_shift;
23
return vaddr & (nentries - 1);
24
}
25
26
static inline int tag_compare(unsigned long tag, unsigned long vaddr)
27
{
28
return (tag == (vaddr >> 22));
29
}
30
31
/* TSB flushes need only occur on the processor initiating the address
32
* space modification, not on each cpu the address space has run on.
33
* Only the TLB flush needs that treatment.
34
*/
35
36
void flush_tsb_kernel_range(unsigned long start, unsigned long end)
37
{
38
unsigned long v;
39
40
for (v = start; v < end; v += PAGE_SIZE) {
41
unsigned long hash = tsb_hash(v, PAGE_SHIFT,
42
KERNEL_TSB_NENTRIES);
43
struct tsb *ent = &swapper_tsb[hash];
44
45
if (tag_compare(ent->tag, v))
46
ent->tag = (1UL << TSB_TAG_INVALID_BIT);
47
}
48
}
49
50
static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
51
unsigned long tsb, unsigned long nentries)
52
{
53
unsigned long i;
54
55
for (i = 0; i < tb->tlb_nr; i++) {
56
unsigned long v = tb->vaddrs[i];
57
unsigned long tag, ent, hash;
58
59
v &= ~0x1UL;
60
61
hash = tsb_hash(v, hash_shift, nentries);
62
ent = tsb + (hash * sizeof(struct tsb));
63
tag = (v >> 22UL);
64
65
tsb_flush(ent, tag);
66
}
67
}
68
69
void flush_tsb_user(struct tlb_batch *tb)
70
{
71
struct mm_struct *mm = tb->mm;
72
unsigned long nentries, base, flags;
73
74
spin_lock_irqsave(&mm->context.lock, flags);
75
76
base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
77
nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
78
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
79
base = __pa(base);
80
__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
81
82
#ifdef CONFIG_HUGETLB_PAGE
83
if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
84
base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
85
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
86
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
87
base = __pa(base);
88
__flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
89
}
90
#endif
91
spin_unlock_irqrestore(&mm->context.lock, flags);
92
}
93
94
#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
95
#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
96
#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
97
#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
98
#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K
99
#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K
100
#else
101
#error Broken base page size setting...
102
#endif
103
104
#ifdef CONFIG_HUGETLB_PAGE
105
#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
106
#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K
107
#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K
108
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
109
#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K
110
#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K
111
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
112
#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
113
#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
114
#else
115
#error Broken huge page size setting...
116
#endif
117
#endif
118
119
static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
120
{
121
unsigned long tsb_reg, base, tsb_paddr;
122
unsigned long page_sz, tte;
123
124
mm->context.tsb_block[tsb_idx].tsb_nentries =
125
tsb_bytes / sizeof(struct tsb);
126
127
base = TSBMAP_BASE;
128
tte = pgprot_val(PAGE_KERNEL_LOCKED);
129
tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
130
BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
131
132
/* Use the smallest page size that can map the whole TSB
133
* in one TLB entry.
134
*/
135
switch (tsb_bytes) {
136
case 8192 << 0:
137
tsb_reg = 0x0UL;
138
#ifdef DCACHE_ALIASING_POSSIBLE
139
base += (tsb_paddr & 8192);
140
#endif
141
page_sz = 8192;
142
break;
143
144
case 8192 << 1:
145
tsb_reg = 0x1UL;
146
page_sz = 64 * 1024;
147
break;
148
149
case 8192 << 2:
150
tsb_reg = 0x2UL;
151
page_sz = 64 * 1024;
152
break;
153
154
case 8192 << 3:
155
tsb_reg = 0x3UL;
156
page_sz = 64 * 1024;
157
break;
158
159
case 8192 << 4:
160
tsb_reg = 0x4UL;
161
page_sz = 512 * 1024;
162
break;
163
164
case 8192 << 5:
165
tsb_reg = 0x5UL;
166
page_sz = 512 * 1024;
167
break;
168
169
case 8192 << 6:
170
tsb_reg = 0x6UL;
171
page_sz = 512 * 1024;
172
break;
173
174
case 8192 << 7:
175
tsb_reg = 0x7UL;
176
page_sz = 4 * 1024 * 1024;
177
break;
178
179
default:
180
printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
181
current->comm, current->pid, tsb_bytes);
182
do_exit(SIGSEGV);
183
}
184
tte |= pte_sz_bits(page_sz);
185
186
if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
187
/* Physical mapping, no locked TLB entry for TSB. */
188
tsb_reg |= tsb_paddr;
189
190
mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
191
mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
192
mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
193
} else {
194
tsb_reg |= base;
195
tsb_reg |= (tsb_paddr & (page_sz - 1UL));
196
tte |= (tsb_paddr & ~(page_sz - 1UL));
197
198
mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
199
mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
200
mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
201
}
202
203
/* Setup the Hypervisor TSB descriptor. */
204
if (tlb_type == hypervisor) {
205
struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
206
207
switch (tsb_idx) {
208
case MM_TSB_BASE:
209
hp->pgsz_idx = HV_PGSZ_IDX_BASE;
210
break;
211
#ifdef CONFIG_HUGETLB_PAGE
212
case MM_TSB_HUGE:
213
hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
214
break;
215
#endif
216
default:
217
BUG();
218
}
219
hp->assoc = 1;
220
hp->num_ttes = tsb_bytes / 16;
221
hp->ctx_idx = 0;
222
switch (tsb_idx) {
223
case MM_TSB_BASE:
224
hp->pgsz_mask = HV_PGSZ_MASK_BASE;
225
break;
226
#ifdef CONFIG_HUGETLB_PAGE
227
case MM_TSB_HUGE:
228
hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
229
break;
230
#endif
231
default:
232
BUG();
233
}
234
hp->tsb_base = tsb_paddr;
235
hp->resv = 0;
236
}
237
}
238
239
static struct kmem_cache *tsb_caches[8] __read_mostly;
240
241
static const char *tsb_cache_names[8] = {
242
"tsb_8KB",
243
"tsb_16KB",
244
"tsb_32KB",
245
"tsb_64KB",
246
"tsb_128KB",
247
"tsb_256KB",
248
"tsb_512KB",
249
"tsb_1MB",
250
};
251
252
void __init pgtable_cache_init(void)
253
{
254
unsigned long i;
255
256
for (i = 0; i < 8; i++) {
257
unsigned long size = 8192 << i;
258
const char *name = tsb_cache_names[i];
259
260
tsb_caches[i] = kmem_cache_create(name,
261
size, size,
262
0, NULL);
263
if (!tsb_caches[i]) {
264
prom_printf("Could not create %s cache\n", name);
265
prom_halt();
266
}
267
}
268
}
269
270
int sysctl_tsb_ratio = -2;
271
272
static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
273
{
274
unsigned long num_ents = (new_size / sizeof(struct tsb));
275
276
if (sysctl_tsb_ratio < 0)
277
return num_ents - (num_ents >> -sysctl_tsb_ratio);
278
else
279
return num_ents + (num_ents >> sysctl_tsb_ratio);
280
}
281
282
/* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
283
* do_sparc64_fault() invokes this routine to try and grow it.
284
*
285
* When we reach the maximum TSB size supported, we stick ~0UL into
286
* tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
287
* will not trigger any longer.
288
*
289
* The TSB can be anywhere from 8K to 1MB in size, in increasing powers
290
* of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
291
* must be 512K aligned. It also must be physically contiguous, so we
292
* cannot use vmalloc().
293
*
294
* The idea here is to grow the TSB when the RSS of the process approaches
295
* the number of entries that the current TSB can hold at once. Currently,
296
* we trigger when the RSS hits 3/4 of the TSB capacity.
297
*/
298
void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
299
{
300
unsigned long max_tsb_size = 1 * 1024 * 1024;
301
unsigned long new_size, old_size, flags;
302
struct tsb *old_tsb, *new_tsb;
303
unsigned long new_cache_index, old_cache_index;
304
unsigned long new_rss_limit;
305
gfp_t gfp_flags;
306
307
if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
308
max_tsb_size = (PAGE_SIZE << MAX_ORDER);
309
310
new_cache_index = 0;
311
for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
312
new_rss_limit = tsb_size_to_rss_limit(new_size);
313
if (new_rss_limit > rss)
314
break;
315
new_cache_index++;
316
}
317
318
if (new_size == max_tsb_size)
319
new_rss_limit = ~0UL;
320
321
retry_tsb_alloc:
322
gfp_flags = GFP_KERNEL;
323
if (new_size > (PAGE_SIZE * 2))
324
gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
325
326
new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
327
gfp_flags, numa_node_id());
328
if (unlikely(!new_tsb)) {
329
/* Not being able to fork due to a high-order TSB
330
* allocation failure is very bad behavior. Just back
331
* down to a 0-order allocation and force no TSB
332
* growing for this address space.
333
*/
334
if (mm->context.tsb_block[tsb_index].tsb == NULL &&
335
new_cache_index > 0) {
336
new_cache_index = 0;
337
new_size = 8192;
338
new_rss_limit = ~0UL;
339
goto retry_tsb_alloc;
340
}
341
342
/* If we failed on a TSB grow, we are under serious
343
* memory pressure so don't try to grow any more.
344
*/
345
if (mm->context.tsb_block[tsb_index].tsb != NULL)
346
mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
347
return;
348
}
349
350
/* Mark all tags as invalid. */
351
tsb_init(new_tsb, new_size);
352
353
/* Ok, we are about to commit the changes. If we are
354
* growing an existing TSB the locking is very tricky,
355
* so WATCH OUT!
356
*
357
* We have to hold mm->context.lock while committing to the
358
* new TSB, this synchronizes us with processors in
359
* flush_tsb_user() and switch_mm() for this address space.
360
*
361
* But even with that lock held, processors run asynchronously
362
* accessing the old TSB via TLB miss handling. This is OK
363
* because those actions are just propagating state from the
364
* Linux page tables into the TSB, page table mappings are not
365
* being changed. If a real fault occurs, the processor will
366
* synchronize with us when it hits flush_tsb_user(), this is
367
* also true for the case where vmscan is modifying the page
368
* tables. The only thing we need to be careful with is to
369
* skip any locked TSB entries during copy_tsb().
370
*
371
* When we finish committing to the new TSB, we have to drop
372
* the lock and ask all other cpus running this address space
373
* to run tsb_context_switch() to see the new TSB table.
374
*/
375
spin_lock_irqsave(&mm->context.lock, flags);
376
377
old_tsb = mm->context.tsb_block[tsb_index].tsb;
378
old_cache_index =
379
(mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
380
old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
381
sizeof(struct tsb));
382
383
384
/* Handle multiple threads trying to grow the TSB at the same time.
385
* One will get in here first, and bump the size and the RSS limit.
386
* The others will get in here next and hit this check.
387
*/
388
if (unlikely(old_tsb &&
389
(rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
390
spin_unlock_irqrestore(&mm->context.lock, flags);
391
392
kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
393
return;
394
}
395
396
mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
397
398
if (old_tsb) {
399
extern void copy_tsb(unsigned long old_tsb_base,
400
unsigned long old_tsb_size,
401
unsigned long new_tsb_base,
402
unsigned long new_tsb_size);
403
unsigned long old_tsb_base = (unsigned long) old_tsb;
404
unsigned long new_tsb_base = (unsigned long) new_tsb;
405
406
if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
407
old_tsb_base = __pa(old_tsb_base);
408
new_tsb_base = __pa(new_tsb_base);
409
}
410
copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
411
}
412
413
mm->context.tsb_block[tsb_index].tsb = new_tsb;
414
setup_tsb_params(mm, tsb_index, new_size);
415
416
spin_unlock_irqrestore(&mm->context.lock, flags);
417
418
/* If old_tsb is NULL, we're being invoked for the first time
419
* from init_new_context().
420
*/
421
if (old_tsb) {
422
/* Reload it on the local cpu. */
423
tsb_context_switch(mm);
424
425
/* Now force other processors to do the same. */
426
preempt_disable();
427
smp_tsb_sync(mm);
428
preempt_enable();
429
430
/* Now it is safe to free the old tsb. */
431
kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
432
}
433
}
434
435
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
436
{
437
#ifdef CONFIG_HUGETLB_PAGE
438
unsigned long huge_pte_count;
439
#endif
440
unsigned int i;
441
442
spin_lock_init(&mm->context.lock);
443
444
mm->context.sparc64_ctx_val = 0UL;
445
446
#ifdef CONFIG_HUGETLB_PAGE
447
/* We reset it to zero because the fork() page copying
448
* will re-increment the counters as the parent PTEs are
449
* copied into the child address space.
450
*/
451
huge_pte_count = mm->context.huge_pte_count;
452
mm->context.huge_pte_count = 0;
453
#endif
454
455
/* copy_mm() copies over the parent's mm_struct before calling
456
* us, so we need to zero out the TSB pointer or else tsb_grow()
457
* will be confused and think there is an older TSB to free up.
458
*/
459
for (i = 0; i < MM_NUM_TSBS; i++)
460
mm->context.tsb_block[i].tsb = NULL;
461
462
/* If this is fork, inherit the parent's TSB size. We would
463
* grow it to that size on the first page fault anyways.
464
*/
465
tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
466
467
#ifdef CONFIG_HUGETLB_PAGE
468
if (unlikely(huge_pte_count))
469
tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
470
#endif
471
472
if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
473
return -ENOMEM;
474
475
return 0;
476
}
477
478
static void tsb_destroy_one(struct tsb_config *tp)
479
{
480
unsigned long cache_index;
481
482
if (!tp->tsb)
483
return;
484
cache_index = tp->tsb_reg_val & 0x7UL;
485
kmem_cache_free(tsb_caches[cache_index], tp->tsb);
486
tp->tsb = NULL;
487
tp->tsb_reg_val = 0UL;
488
}
489
490
void destroy_context(struct mm_struct *mm)
491
{
492
unsigned long flags, i;
493
494
for (i = 0; i < MM_NUM_TSBS; i++)
495
tsb_destroy_one(&mm->context.tsb_block[i]);
496
497
spin_lock_irqsave(&ctx_alloc_lock, flags);
498
499
if (CTX_VALID(mm->context)) {
500
unsigned long nr = CTX_NRBITS(mm->context);
501
mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
502
}
503
504
spin_unlock_irqrestore(&ctx_alloc_lock, flags);
505
}
506
507