Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/mm/hugetlbpage.c
52065 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* arch/arm64/mm/hugetlbpage.c
4
*
5
* Copyright (C) 2013 Linaro Ltd.
6
*
7
* Based on arch/x86/mm/hugetlbpage.c.
8
*/
9
10
#include <linux/init.h>
11
#include <linux/fs.h>
12
#include <linux/mm.h>
13
#include <linux/hugetlb.h>
14
#include <linux/pagemap.h>
15
#include <linux/err.h>
16
#include <linux/sysctl.h>
17
#include <asm/mman.h>
18
#include <asm/tlb.h>
19
#include <asm/tlbflush.h>
20
21
/*
22
* HugeTLB Support Matrix
23
*
24
* ---------------------------------------------------
25
* | Page Size | CONT PTE | PMD | CONT PMD | PUD |
26
* ---------------------------------------------------
27
* | 4K | 64K | 2M | 32M | 1G |
28
* | 16K | 2M | 32M | 1G | |
29
* | 64K | 2M | 512M | 16G | |
30
* ---------------------------------------------------
31
*/
32
33
/*
34
* Reserve CMA areas for the largest supported gigantic
35
* huge page when requested. Any other smaller gigantic
36
* huge pages could still be served from those areas.
37
*/
38
#ifdef CONFIG_CMA
39
unsigned int arch_hugetlb_cma_order(void)
40
{
41
if (pud_sect_supported())
42
return PUD_SHIFT - PAGE_SHIFT;
43
44
return CONT_PMD_SHIFT - PAGE_SHIFT;
45
}
46
#endif /* CONFIG_CMA */
47
48
static bool __hugetlb_valid_size(unsigned long size)
49
{
50
switch (size) {
51
#ifndef __PAGETABLE_PMD_FOLDED
52
case PUD_SIZE:
53
return pud_sect_supported();
54
#endif
55
case CONT_PMD_SIZE:
56
case PMD_SIZE:
57
case CONT_PTE_SIZE:
58
return true;
59
}
60
61
return false;
62
}
63
64
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
65
bool arch_hugetlb_migration_supported(struct hstate *h)
66
{
67
size_t pagesize = huge_page_size(h);
68
69
if (!__hugetlb_valid_size(pagesize)) {
70
pr_warn("%s: unrecognized huge page size 0x%lx\n",
71
__func__, pagesize);
72
return false;
73
}
74
return true;
75
}
76
#endif
77
78
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
79
pte_t *ptep, size_t *pgsize)
80
{
81
pgd_t *pgdp = pgd_offset(mm, addr);
82
p4d_t *p4dp;
83
pud_t *pudp;
84
pmd_t *pmdp;
85
86
*pgsize = PAGE_SIZE;
87
p4dp = p4d_offset(pgdp, addr);
88
pudp = pud_offset(p4dp, addr);
89
pmdp = pmd_offset(pudp, addr);
90
if ((pte_t *)pmdp == ptep) {
91
*pgsize = PMD_SIZE;
92
return CONT_PMDS;
93
}
94
return CONT_PTES;
95
}
96
97
static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
98
{
99
int contig_ptes = 1;
100
101
*pgsize = size;
102
103
switch (size) {
104
case CONT_PMD_SIZE:
105
*pgsize = PMD_SIZE;
106
contig_ptes = CONT_PMDS;
107
break;
108
case CONT_PTE_SIZE:
109
*pgsize = PAGE_SIZE;
110
contig_ptes = CONT_PTES;
111
break;
112
default:
113
WARN_ON(!__hugetlb_valid_size(size));
114
}
115
116
return contig_ptes;
117
}
118
119
pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
120
{
121
int ncontig, i;
122
size_t pgsize;
123
pte_t orig_pte = __ptep_get(ptep);
124
125
if (!pte_present(orig_pte) || !pte_cont(orig_pte))
126
return orig_pte;
127
128
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
129
for (i = 0; i < ncontig; i++, ptep++) {
130
pte_t pte = __ptep_get(ptep);
131
132
if (pte_dirty(pte))
133
orig_pte = pte_mkdirty(orig_pte);
134
135
if (pte_young(pte))
136
orig_pte = pte_mkyoung(orig_pte);
137
}
138
return orig_pte;
139
}
140
141
/*
142
* Changing some bits of contiguous entries requires us to follow a
143
* Break-Before-Make approach, breaking the whole contiguous set
144
* before we can change any entries. See ARM DDI 0487A.k_iss10775,
145
* "Misprogramming of the Contiguous bit", page D4-1762.
146
*
147
* This helper performs the break step.
148
*/
149
static pte_t get_clear_contig(struct mm_struct *mm,
150
unsigned long addr,
151
pte_t *ptep,
152
unsigned long pgsize,
153
unsigned long ncontig)
154
{
155
pte_t pte, tmp_pte;
156
bool present;
157
158
pte = __ptep_get_and_clear_anysz(mm, addr, ptep, pgsize);
159
present = pte_present(pte);
160
while (--ncontig) {
161
ptep++;
162
addr += pgsize;
163
tmp_pte = __ptep_get_and_clear_anysz(mm, addr, ptep, pgsize);
164
if (present) {
165
if (pte_dirty(tmp_pte))
166
pte = pte_mkdirty(pte);
167
if (pte_young(tmp_pte))
168
pte = pte_mkyoung(pte);
169
}
170
}
171
return pte;
172
}
173
174
static pte_t get_clear_contig_flush(struct mm_struct *mm,
175
unsigned long addr,
176
pte_t *ptep,
177
unsigned long pgsize,
178
unsigned long ncontig)
179
{
180
pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
181
struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
182
unsigned long end = addr + (pgsize * ncontig);
183
184
__flush_hugetlb_tlb_range(&vma, addr, end, pgsize, true);
185
return orig_pte;
186
}
187
188
/*
189
* Changing some bits of contiguous entries requires us to follow a
190
* Break-Before-Make approach, breaking the whole contiguous set
191
* before we can change any entries. See ARM DDI 0487A.k_iss10775,
192
* "Misprogramming of the Contiguous bit", page D4-1762.
193
*
194
* This helper performs the break step for use cases where the
195
* original pte is not needed.
196
*/
197
static void clear_flush(struct mm_struct *mm,
198
unsigned long addr,
199
pte_t *ptep,
200
unsigned long pgsize,
201
unsigned long ncontig)
202
{
203
struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
204
unsigned long i, saddr = addr;
205
206
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
207
__ptep_get_and_clear_anysz(mm, addr, ptep, pgsize);
208
209
if (mm == &init_mm)
210
flush_tlb_kernel_range(saddr, addr);
211
else
212
__flush_hugetlb_tlb_range(&vma, saddr, addr, pgsize, true);
213
}
214
215
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
216
pte_t *ptep, pte_t pte, unsigned long sz)
217
{
218
size_t pgsize;
219
int i;
220
int ncontig;
221
222
ncontig = num_contig_ptes(sz, &pgsize);
223
224
if (!pte_present(pte)) {
225
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
226
__set_ptes_anysz(mm, addr, ptep, pte, 1, pgsize);
227
return;
228
}
229
230
/* Only need to "break" if transitioning valid -> valid. */
231
if (pte_cont(pte) && pte_valid(__ptep_get(ptep)))
232
clear_flush(mm, addr, ptep, pgsize, ncontig);
233
234
__set_ptes_anysz(mm, addr, ptep, pte, ncontig, pgsize);
235
}
236
237
pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
238
unsigned long addr, unsigned long sz)
239
{
240
pgd_t *pgdp;
241
p4d_t *p4dp;
242
pud_t *pudp;
243
pmd_t *pmdp;
244
pte_t *ptep = NULL;
245
246
pgdp = pgd_offset(mm, addr);
247
p4dp = p4d_alloc(mm, pgdp, addr);
248
if (!p4dp)
249
return NULL;
250
251
pudp = pud_alloc(mm, p4dp, addr);
252
if (!pudp)
253
return NULL;
254
255
if (sz == PUD_SIZE) {
256
ptep = (pte_t *)pudp;
257
} else if (sz == (CONT_PTE_SIZE)) {
258
pmdp = pmd_alloc(mm, pudp, addr);
259
if (!pmdp)
260
return NULL;
261
262
WARN_ON(addr & (sz - 1));
263
ptep = pte_alloc_huge(mm, pmdp, addr);
264
} else if (sz == PMD_SIZE) {
265
if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
266
ptep = huge_pmd_share(mm, vma, addr, pudp);
267
else
268
ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
269
} else if (sz == (CONT_PMD_SIZE)) {
270
pmdp = pmd_alloc(mm, pudp, addr);
271
WARN_ON(addr & (sz - 1));
272
return (pte_t *)pmdp;
273
}
274
275
return ptep;
276
}
277
278
pte_t *huge_pte_offset(struct mm_struct *mm,
279
unsigned long addr, unsigned long sz)
280
{
281
pgd_t *pgdp;
282
p4d_t *p4dp;
283
pud_t *pudp, pud;
284
pmd_t *pmdp, pmd;
285
286
pgdp = pgd_offset(mm, addr);
287
if (!pgd_present(READ_ONCE(*pgdp)))
288
return NULL;
289
290
p4dp = p4d_offset(pgdp, addr);
291
if (!p4d_present(READ_ONCE(*p4dp)))
292
return NULL;
293
294
pudp = pud_offset(p4dp, addr);
295
pud = READ_ONCE(*pudp);
296
if (sz != PUD_SIZE && pud_none(pud))
297
return NULL;
298
/* hugepage or swap? */
299
if (pud_leaf(pud) || !pud_present(pud))
300
return (pte_t *)pudp;
301
/* table; check the next level */
302
303
if (sz == CONT_PMD_SIZE)
304
addr &= CONT_PMD_MASK;
305
306
pmdp = pmd_offset(pudp, addr);
307
pmd = READ_ONCE(*pmdp);
308
if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
309
pmd_none(pmd))
310
return NULL;
311
if (pmd_leaf(pmd) || !pmd_present(pmd))
312
return (pte_t *)pmdp;
313
314
if (sz == CONT_PTE_SIZE)
315
return pte_offset_huge(pmdp, (addr & CONT_PTE_MASK));
316
317
return NULL;
318
}
319
320
unsigned long hugetlb_mask_last_page(struct hstate *h)
321
{
322
unsigned long hp_size = huge_page_size(h);
323
324
switch (hp_size) {
325
#ifndef __PAGETABLE_PMD_FOLDED
326
case PUD_SIZE:
327
if (pud_sect_supported())
328
return PGDIR_SIZE - PUD_SIZE;
329
break;
330
#endif
331
case CONT_PMD_SIZE:
332
return PUD_SIZE - CONT_PMD_SIZE;
333
case PMD_SIZE:
334
return PUD_SIZE - PMD_SIZE;
335
case CONT_PTE_SIZE:
336
return PMD_SIZE - CONT_PTE_SIZE;
337
default:
338
break;
339
}
340
341
return 0UL;
342
}
343
344
pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
345
{
346
size_t pagesize = 1UL << shift;
347
348
switch (pagesize) {
349
#ifndef __PAGETABLE_PMD_FOLDED
350
case PUD_SIZE:
351
if (pud_sect_supported())
352
return pud_pte(pud_mkhuge(pte_pud(entry)));
353
break;
354
#endif
355
case CONT_PMD_SIZE:
356
return pmd_pte(pmd_mkhuge(pmd_mkcont(pte_pmd(entry))));
357
case PMD_SIZE:
358
return pmd_pte(pmd_mkhuge(pte_pmd(entry)));
359
case CONT_PTE_SIZE:
360
return pte_mkcont(entry);
361
default:
362
break;
363
}
364
pr_warn("%s: unrecognized huge page size 0x%lx\n",
365
__func__, pagesize);
366
return entry;
367
}
368
369
void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
370
pte_t *ptep, unsigned long sz)
371
{
372
int i, ncontig;
373
size_t pgsize;
374
375
ncontig = num_contig_ptes(sz, &pgsize);
376
377
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
378
__pte_clear(mm, addr, ptep);
379
}
380
381
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
382
pte_t *ptep, unsigned long sz)
383
{
384
int ncontig;
385
size_t pgsize;
386
387
ncontig = num_contig_ptes(sz, &pgsize);
388
return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
389
}
390
391
/*
392
* huge_ptep_set_access_flags will update access flags (dirty, accesssed)
393
* and write permission.
394
*
395
* For a contiguous huge pte range we need to check whether or not write
396
* permission has to change only on the first pte in the set. Then for
397
* all the contiguous ptes we need to check whether or not there is a
398
* discrepancy between dirty or young.
399
*/
400
static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
401
{
402
int i;
403
404
if (pte_write(pte) != pte_write(__ptep_get(ptep)))
405
return 1;
406
407
for (i = 0; i < ncontig; i++) {
408
pte_t orig_pte = __ptep_get(ptep + i);
409
410
if (pte_dirty(pte) != pte_dirty(orig_pte))
411
return 1;
412
413
if (pte_young(pte) != pte_young(orig_pte))
414
return 1;
415
}
416
417
return 0;
418
}
419
420
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
421
unsigned long addr, pte_t *ptep,
422
pte_t pte, int dirty)
423
{
424
int ncontig;
425
size_t pgsize = 0;
426
struct mm_struct *mm = vma->vm_mm;
427
pte_t orig_pte;
428
429
VM_WARN_ON(!pte_present(pte));
430
431
if (!pte_cont(pte))
432
return __ptep_set_access_flags(vma, addr, ptep, pte, dirty);
433
434
ncontig = num_contig_ptes(huge_page_size(hstate_vma(vma)), &pgsize);
435
436
if (!__cont_access_flags_changed(ptep, pte, ncontig))
437
return 0;
438
439
orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
440
VM_WARN_ON(!pte_present(orig_pte));
441
442
/* Make sure we don't lose the dirty or young state */
443
if (pte_dirty(orig_pte))
444
pte = pte_mkdirty(pte);
445
446
if (pte_young(orig_pte))
447
pte = pte_mkyoung(pte);
448
449
__set_ptes_anysz(mm, addr, ptep, pte, ncontig, pgsize);
450
return 1;
451
}
452
453
void huge_ptep_set_wrprotect(struct mm_struct *mm,
454
unsigned long addr, pte_t *ptep)
455
{
456
int ncontig;
457
size_t pgsize;
458
pte_t pte;
459
460
pte = __ptep_get(ptep);
461
VM_WARN_ON(!pte_present(pte));
462
463
if (!pte_cont(pte)) {
464
__ptep_set_wrprotect(mm, addr, ptep);
465
return;
466
}
467
468
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
469
470
pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
471
pte = pte_wrprotect(pte);
472
473
__set_ptes_anysz(mm, addr, ptep, pte, ncontig, pgsize);
474
}
475
476
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
477
unsigned long addr, pte_t *ptep)
478
{
479
struct mm_struct *mm = vma->vm_mm;
480
size_t pgsize;
481
int ncontig;
482
483
ncontig = num_contig_ptes(huge_page_size(hstate_vma(vma)), &pgsize);
484
return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
485
}
486
487
static int __init hugetlbpage_init(void)
488
{
489
/*
490
* HugeTLB pages are supported on maximum four page table
491
* levels (PUD, CONT PMD, PMD, CONT PTE) for a given base
492
* page size, corresponding to hugetlb_add_hstate() calls
493
* here.
494
*
495
* HUGE_MAX_HSTATE should at least match maximum supported
496
* HugeTLB page sizes on the platform. Any new addition to
497
* supported HugeTLB page sizes will also require changing
498
* HUGE_MAX_HSTATE as well.
499
*/
500
BUILD_BUG_ON(HUGE_MAX_HSTATE < 4);
501
if (pud_sect_supported())
502
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
503
504
hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
505
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
506
hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
507
508
return 0;
509
}
510
arch_initcall(hugetlbpage_init);
511
512
bool __init arch_hugetlb_valid_size(unsigned long size)
513
{
514
return __hugetlb_valid_size(size);
515
}
516
517
pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
518
{
519
unsigned long psize = huge_page_size(hstate_vma(vma));
520
521
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) {
522
/*
523
* Break-before-make (BBM) is required for all user space mappings
524
* when the permission changes from executable to non-executable
525
* in cases where cpu is affected with errata #2645198.
526
*/
527
if (pte_user_exec(__ptep_get(ptep)))
528
return huge_ptep_clear_flush(vma, addr, ptep);
529
}
530
return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize);
531
}
532
533
void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
534
pte_t old_pte, pte_t pte)
535
{
536
unsigned long psize = huge_page_size(hstate_vma(vma));
537
538
set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
539
}
540
541