Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/mm/pgtable.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright IBM Corp. 2007, 2011
4
* Author(s): Martin Schwidefsky <[email protected]>
5
*/
6
7
#include <linux/cpufeature.h>
8
#include <linux/export.h>
9
#include <linux/sched.h>
10
#include <linux/kernel.h>
11
#include <linux/errno.h>
12
#include <linux/gfp.h>
13
#include <linux/mm.h>
14
#include <linux/swap.h>
15
#include <linux/smp.h>
16
#include <linux/spinlock.h>
17
#include <linux/rcupdate.h>
18
#include <linux/slab.h>
19
#include <linux/swapops.h>
20
#include <linux/sysctl.h>
21
#include <linux/ksm.h>
22
#include <linux/mman.h>
23
24
#include <asm/tlbflush.h>
25
#include <asm/mmu_context.h>
26
#include <asm/page-states.h>
27
#include <asm/machine.h>
28
29
pgprot_t pgprot_writecombine(pgprot_t prot)
30
{
31
/*
32
* mio_wb_bit_mask may be set on a different CPU, but it is only set
33
* once at init and only read afterwards.
34
*/
35
return __pgprot(pgprot_val(prot) | mio_wb_bit_mask);
36
}
37
EXPORT_SYMBOL_GPL(pgprot_writecombine);
38
39
static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
40
pte_t *ptep, int nodat)
41
{
42
unsigned long opt, asce;
43
44
if (machine_has_tlb_guest()) {
45
opt = 0;
46
asce = READ_ONCE(mm->context.gmap_asce);
47
if (asce == 0UL || nodat)
48
opt |= IPTE_NODAT;
49
if (asce != -1UL) {
50
asce = asce ? : mm->context.asce;
51
opt |= IPTE_GUEST_ASCE;
52
}
53
__ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
54
} else {
55
__ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
56
}
57
}
58
59
static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
60
pte_t *ptep, int nodat)
61
{
62
unsigned long opt, asce;
63
64
if (machine_has_tlb_guest()) {
65
opt = 0;
66
asce = READ_ONCE(mm->context.gmap_asce);
67
if (asce == 0UL || nodat)
68
opt |= IPTE_NODAT;
69
if (asce != -1UL) {
70
asce = asce ? : mm->context.asce;
71
opt |= IPTE_GUEST_ASCE;
72
}
73
__ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
74
} else {
75
__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
76
}
77
}
78
79
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
80
unsigned long addr, pte_t *ptep,
81
int nodat)
82
{
83
pte_t old;
84
85
old = *ptep;
86
if (unlikely(pte_val(old) & _PAGE_INVALID))
87
return old;
88
atomic_inc(&mm->context.flush_count);
89
if (cpu_has_tlb_lc() &&
90
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
91
ptep_ipte_local(mm, addr, ptep, nodat);
92
else
93
ptep_ipte_global(mm, addr, ptep, nodat);
94
atomic_dec(&mm->context.flush_count);
95
return old;
96
}
97
98
static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
99
unsigned long addr, pte_t *ptep,
100
int nodat)
101
{
102
pte_t old;
103
104
old = *ptep;
105
if (unlikely(pte_val(old) & _PAGE_INVALID))
106
return old;
107
atomic_inc(&mm->context.flush_count);
108
if (cpumask_equal(&mm->context.cpu_attach_mask,
109
cpumask_of(smp_processor_id()))) {
110
set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID)));
111
mm->context.flush_mm = 1;
112
} else
113
ptep_ipte_global(mm, addr, ptep, nodat);
114
atomic_dec(&mm->context.flush_count);
115
return old;
116
}
117
118
static inline pgste_t pgste_get_lock(pte_t *ptep)
119
{
120
unsigned long value = 0;
121
#ifdef CONFIG_PGSTE
122
unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
123
124
do {
125
value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
126
} while (value & PGSTE_PCL_BIT);
127
value |= PGSTE_PCL_BIT;
128
#endif
129
return __pgste(value);
130
}
131
132
static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
133
{
134
#ifdef CONFIG_PGSTE
135
barrier();
136
WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
137
#endif
138
}
139
140
static inline pgste_t pgste_get(pte_t *ptep)
141
{
142
unsigned long pgste = 0;
143
#ifdef CONFIG_PGSTE
144
pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
145
#endif
146
return __pgste(pgste);
147
}
148
149
static inline void pgste_set(pte_t *ptep, pgste_t pgste)
150
{
151
#ifdef CONFIG_PGSTE
152
*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
153
#endif
154
}
155
156
static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
157
struct mm_struct *mm)
158
{
159
#ifdef CONFIG_PGSTE
160
unsigned long address, bits, skey;
161
162
if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
163
return pgste;
164
address = pte_val(pte) & PAGE_MASK;
165
skey = (unsigned long) page_get_storage_key(address);
166
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
167
/* Transfer page changed & referenced bit to guest bits in pgste */
168
pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */
169
/* Copy page access key and fetch protection bit to pgste */
170
pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
171
pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
172
#endif
173
return pgste;
174
175
}
176
177
static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
178
struct mm_struct *mm)
179
{
180
#ifdef CONFIG_PGSTE
181
unsigned long address;
182
unsigned long nkey;
183
184
if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
185
return;
186
VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
187
address = pte_val(entry) & PAGE_MASK;
188
/*
189
* Set page access key and fetch protection bit from pgste.
190
* The guest C/R information is still in the PGSTE, set real
191
* key C/R to 0.
192
*/
193
nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
194
nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
195
page_set_storage_key(address, nkey, 0);
196
#endif
197
}
198
199
static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
200
{
201
#ifdef CONFIG_PGSTE
202
if ((pte_val(entry) & _PAGE_PRESENT) &&
203
(pte_val(entry) & _PAGE_WRITE) &&
204
!(pte_val(entry) & _PAGE_INVALID)) {
205
if (!machine_has_esop()) {
206
/*
207
* Without enhanced suppression-on-protection force
208
* the dirty bit on for all writable ptes.
209
*/
210
entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY));
211
entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT));
212
}
213
if (!(pte_val(entry) & _PAGE_PROTECT))
214
/* This pte allows write access, set user-dirty */
215
pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);
216
}
217
#endif
218
set_pte(ptep, entry);
219
return pgste;
220
}
221
222
static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
223
unsigned long addr,
224
pte_t *ptep, pgste_t pgste)
225
{
226
#ifdef CONFIG_PGSTE
227
unsigned long bits;
228
229
bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
230
if (bits) {
231
pgste = __pgste(pgste_val(pgste) ^ bits);
232
ptep_notify(mm, addr, ptep, bits);
233
}
234
#endif
235
return pgste;
236
}
237
238
static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
239
unsigned long addr, pte_t *ptep)
240
{
241
pgste_t pgste = __pgste(0);
242
243
if (mm_has_pgste(mm)) {
244
pgste = pgste_get_lock(ptep);
245
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
246
}
247
return pgste;
248
}
249
250
static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
251
unsigned long addr, pte_t *ptep,
252
pgste_t pgste, pte_t old, pte_t new)
253
{
254
if (mm_has_pgste(mm)) {
255
if (pte_val(old) & _PAGE_INVALID)
256
pgste_set_key(ptep, pgste, new, mm);
257
if (pte_val(new) & _PAGE_INVALID) {
258
pgste = pgste_update_all(old, pgste, mm);
259
if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
260
_PGSTE_GPS_USAGE_UNUSED)
261
old = set_pte_bit(old, __pgprot(_PAGE_UNUSED));
262
}
263
pgste = pgste_set_pte(ptep, pgste, new);
264
pgste_set_unlock(ptep, pgste);
265
} else {
266
set_pte(ptep, new);
267
}
268
return old;
269
}
270
271
pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
272
pte_t *ptep, pte_t new)
273
{
274
pgste_t pgste;
275
pte_t old;
276
int nodat;
277
278
preempt_disable();
279
pgste = ptep_xchg_start(mm, addr, ptep);
280
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
281
old = ptep_flush_direct(mm, addr, ptep, nodat);
282
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
283
preempt_enable();
284
return old;
285
}
286
EXPORT_SYMBOL(ptep_xchg_direct);
287
288
/*
289
* Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
290
* RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
291
*/
292
void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
293
pte_t new)
294
{
295
preempt_disable();
296
atomic_inc(&mm->context.flush_count);
297
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
298
__ptep_rdp(addr, ptep, 0, 0, 1);
299
else
300
__ptep_rdp(addr, ptep, 0, 0, 0);
301
/*
302
* PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
303
* means it is still valid and active, and must not be changed according
304
* to the architecture. But writing a new value that only differs in SW
305
* bits is allowed.
306
*/
307
set_pte(ptep, new);
308
atomic_dec(&mm->context.flush_count);
309
preempt_enable();
310
}
311
EXPORT_SYMBOL(ptep_reset_dat_prot);
312
313
pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
314
pte_t *ptep, pte_t new)
315
{
316
pgste_t pgste;
317
pte_t old;
318
int nodat;
319
320
preempt_disable();
321
pgste = ptep_xchg_start(mm, addr, ptep);
322
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
323
old = ptep_flush_lazy(mm, addr, ptep, nodat);
324
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
325
preempt_enable();
326
return old;
327
}
328
EXPORT_SYMBOL(ptep_xchg_lazy);
329
330
pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
331
pte_t *ptep)
332
{
333
pgste_t pgste;
334
pte_t old;
335
int nodat;
336
struct mm_struct *mm = vma->vm_mm;
337
338
preempt_disable();
339
pgste = ptep_xchg_start(mm, addr, ptep);
340
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
341
old = ptep_flush_lazy(mm, addr, ptep, nodat);
342
if (mm_has_pgste(mm)) {
343
pgste = pgste_update_all(old, pgste, mm);
344
pgste_set(ptep, pgste);
345
}
346
return old;
347
}
348
349
void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
350
pte_t *ptep, pte_t old_pte, pte_t pte)
351
{
352
pgste_t pgste;
353
struct mm_struct *mm = vma->vm_mm;
354
355
if (mm_has_pgste(mm)) {
356
pgste = pgste_get(ptep);
357
pgste_set_key(ptep, pgste, pte, mm);
358
pgste = pgste_set_pte(ptep, pgste, pte);
359
pgste_set_unlock(ptep, pgste);
360
} else {
361
set_pte(ptep, pte);
362
}
363
preempt_enable();
364
}
365
366
static inline void pmdp_idte_local(struct mm_struct *mm,
367
unsigned long addr, pmd_t *pmdp)
368
{
369
if (machine_has_tlb_guest())
370
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
371
mm->context.asce, IDTE_LOCAL);
372
else
373
__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
374
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
375
gmap_pmdp_idte_local(mm, addr);
376
}
377
378
static inline void pmdp_idte_global(struct mm_struct *mm,
379
unsigned long addr, pmd_t *pmdp)
380
{
381
if (machine_has_tlb_guest()) {
382
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
383
mm->context.asce, IDTE_GLOBAL);
384
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
385
gmap_pmdp_idte_global(mm, addr);
386
} else if (cpu_has_idte()) {
387
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
388
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
389
gmap_pmdp_idte_global(mm, addr);
390
} else {
391
__pmdp_csp(pmdp);
392
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
393
gmap_pmdp_csp(mm, addr);
394
}
395
}
396
397
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
398
unsigned long addr, pmd_t *pmdp)
399
{
400
pmd_t old;
401
402
old = *pmdp;
403
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
404
return old;
405
atomic_inc(&mm->context.flush_count);
406
if (cpu_has_tlb_lc() &&
407
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
408
pmdp_idte_local(mm, addr, pmdp);
409
else
410
pmdp_idte_global(mm, addr, pmdp);
411
atomic_dec(&mm->context.flush_count);
412
return old;
413
}
414
415
static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
416
unsigned long addr, pmd_t *pmdp)
417
{
418
pmd_t old;
419
420
old = *pmdp;
421
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
422
return old;
423
atomic_inc(&mm->context.flush_count);
424
if (cpumask_equal(&mm->context.cpu_attach_mask,
425
cpumask_of(smp_processor_id()))) {
426
set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID)));
427
mm->context.flush_mm = 1;
428
if (mm_has_pgste(mm))
429
gmap_pmdp_invalidate(mm, addr);
430
} else {
431
pmdp_idte_global(mm, addr, pmdp);
432
}
433
atomic_dec(&mm->context.flush_count);
434
return old;
435
}
436
437
#ifdef CONFIG_PGSTE
438
static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
439
{
440
struct vm_area_struct *vma;
441
pgd_t *pgd;
442
p4d_t *p4d;
443
pud_t *pud;
444
445
/* We need a valid VMA, otherwise this is clearly a fault. */
446
vma = vma_lookup(mm, addr);
447
if (!vma)
448
return -EFAULT;
449
450
pgd = pgd_offset(mm, addr);
451
if (!pgd_present(*pgd))
452
return -ENOENT;
453
454
p4d = p4d_offset(pgd, addr);
455
if (!p4d_present(*p4d))
456
return -ENOENT;
457
458
pud = pud_offset(p4d, addr);
459
if (!pud_present(*pud))
460
return -ENOENT;
461
462
/* Large PUDs are not supported yet. */
463
if (pud_leaf(*pud))
464
return -EFAULT;
465
466
*pmdp = pmd_offset(pud, addr);
467
return 0;
468
}
469
#endif
470
471
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
472
pmd_t *pmdp, pmd_t new)
473
{
474
pmd_t old;
475
476
preempt_disable();
477
old = pmdp_flush_direct(mm, addr, pmdp);
478
set_pmd(pmdp, new);
479
preempt_enable();
480
return old;
481
}
482
EXPORT_SYMBOL(pmdp_xchg_direct);
483
484
pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
485
pmd_t *pmdp, pmd_t new)
486
{
487
pmd_t old;
488
489
preempt_disable();
490
old = pmdp_flush_lazy(mm, addr, pmdp);
491
set_pmd(pmdp, new);
492
preempt_enable();
493
return old;
494
}
495
EXPORT_SYMBOL(pmdp_xchg_lazy);
496
497
static inline void pudp_idte_local(struct mm_struct *mm,
498
unsigned long addr, pud_t *pudp)
499
{
500
if (machine_has_tlb_guest())
501
__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
502
mm->context.asce, IDTE_LOCAL);
503
else
504
__pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
505
}
506
507
static inline void pudp_idte_global(struct mm_struct *mm,
508
unsigned long addr, pud_t *pudp)
509
{
510
if (machine_has_tlb_guest())
511
__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
512
mm->context.asce, IDTE_GLOBAL);
513
else if (cpu_has_idte())
514
__pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
515
else
516
/*
517
* Invalid bit position is the same for pmd and pud, so we can
518
* reuse _pmd_csp() here
519
*/
520
__pmdp_csp((pmd_t *) pudp);
521
}
522
523
static inline pud_t pudp_flush_direct(struct mm_struct *mm,
524
unsigned long addr, pud_t *pudp)
525
{
526
pud_t old;
527
528
old = *pudp;
529
if (pud_val(old) & _REGION_ENTRY_INVALID)
530
return old;
531
atomic_inc(&mm->context.flush_count);
532
if (cpu_has_tlb_lc() &&
533
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
534
pudp_idte_local(mm, addr, pudp);
535
else
536
pudp_idte_global(mm, addr, pudp);
537
atomic_dec(&mm->context.flush_count);
538
return old;
539
}
540
541
pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
542
pud_t *pudp, pud_t new)
543
{
544
pud_t old;
545
546
preempt_disable();
547
old = pudp_flush_direct(mm, addr, pudp);
548
set_pud(pudp, new);
549
preempt_enable();
550
return old;
551
}
552
EXPORT_SYMBOL(pudp_xchg_direct);
553
554
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
555
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
556
pgtable_t pgtable)
557
{
558
struct list_head *lh = (struct list_head *) pgtable;
559
560
assert_spin_locked(pmd_lockptr(mm, pmdp));
561
562
/* FIFO */
563
if (!pmd_huge_pte(mm, pmdp))
564
INIT_LIST_HEAD(lh);
565
else
566
list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
567
pmd_huge_pte(mm, pmdp) = pgtable;
568
}
569
570
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
571
{
572
struct list_head *lh;
573
pgtable_t pgtable;
574
pte_t *ptep;
575
576
assert_spin_locked(pmd_lockptr(mm, pmdp));
577
578
/* FIFO */
579
pgtable = pmd_huge_pte(mm, pmdp);
580
lh = (struct list_head *) pgtable;
581
if (list_empty(lh))
582
pmd_huge_pte(mm, pmdp) = NULL;
583
else {
584
pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
585
list_del(lh);
586
}
587
ptep = (pte_t *) pgtable;
588
set_pte(ptep, __pte(_PAGE_INVALID));
589
ptep++;
590
set_pte(ptep, __pte(_PAGE_INVALID));
591
return pgtable;
592
}
593
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
594
595
#ifdef CONFIG_PGSTE
596
void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
597
pte_t *ptep, pte_t entry)
598
{
599
pgste_t pgste;
600
601
/* the mm_has_pgste() check is done in set_pte_at() */
602
preempt_disable();
603
pgste = pgste_get_lock(ptep);
604
pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);
605
pgste_set_key(ptep, pgste, entry, mm);
606
pgste = pgste_set_pte(ptep, pgste, entry);
607
pgste_set_unlock(ptep, pgste);
608
preempt_enable();
609
}
610
611
void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
612
{
613
pgste_t pgste;
614
615
preempt_disable();
616
pgste = pgste_get_lock(ptep);
617
pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);
618
pgste_set_unlock(ptep, pgste);
619
preempt_enable();
620
}
621
622
/**
623
* ptep_force_prot - change access rights of a locked pte
624
* @mm: pointer to the process mm_struct
625
* @addr: virtual address in the guest address space
626
* @ptep: pointer to the page table entry
627
* @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
628
* @bit: pgste bit to set (e.g. for notification)
629
*
630
* Returns 0 if the access rights were changed and -EAGAIN if the current
631
* and requested access rights are incompatible.
632
*/
633
int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
634
pte_t *ptep, int prot, unsigned long bit)
635
{
636
pte_t entry;
637
pgste_t pgste;
638
int pte_i, pte_p, nodat;
639
640
pgste = pgste_get_lock(ptep);
641
entry = *ptep;
642
/* Check pte entry after all locks have been acquired */
643
pte_i = pte_val(entry) & _PAGE_INVALID;
644
pte_p = pte_val(entry) & _PAGE_PROTECT;
645
if ((pte_i && (prot != PROT_NONE)) ||
646
(pte_p && (prot & PROT_WRITE))) {
647
pgste_set_unlock(ptep, pgste);
648
return -EAGAIN;
649
}
650
/* Change access rights and set pgste bit */
651
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
652
if (prot == PROT_NONE && !pte_i) {
653
ptep_flush_direct(mm, addr, ptep, nodat);
654
pgste = pgste_update_all(entry, pgste, mm);
655
entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID));
656
}
657
if (prot == PROT_READ && !pte_p) {
658
ptep_flush_direct(mm, addr, ptep, nodat);
659
entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
660
entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
661
}
662
pgste = set_pgste_bit(pgste, bit);
663
pgste = pgste_set_pte(ptep, pgste, entry);
664
pgste_set_unlock(ptep, pgste);
665
return 0;
666
}
667
668
int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
669
pte_t *sptep, pte_t *tptep, pte_t pte)
670
{
671
pgste_t spgste, tpgste;
672
pte_t spte, tpte;
673
int rc = -EAGAIN;
674
675
if (!(pte_val(*tptep) & _PAGE_INVALID))
676
return 0; /* already shadowed */
677
spgste = pgste_get_lock(sptep);
678
spte = *sptep;
679
if (!(pte_val(spte) & _PAGE_INVALID) &&
680
!((pte_val(spte) & _PAGE_PROTECT) &&
681
!(pte_val(pte) & _PAGE_PROTECT))) {
682
spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);
683
tpgste = pgste_get_lock(tptep);
684
tpte = __pte((pte_val(spte) & PAGE_MASK) |
685
(pte_val(pte) & _PAGE_PROTECT));
686
/* don't touch the storage key - it belongs to parent pgste */
687
tpgste = pgste_set_pte(tptep, tpgste, tpte);
688
pgste_set_unlock(tptep, tpgste);
689
rc = 1;
690
}
691
pgste_set_unlock(sptep, spgste);
692
return rc;
693
}
694
695
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
696
{
697
pgste_t pgste;
698
int nodat;
699
700
pgste = pgste_get_lock(ptep);
701
/* notifier is called by the caller */
702
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
703
ptep_flush_direct(mm, saddr, ptep, nodat);
704
/* don't touch the storage key - it belongs to parent pgste */
705
pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
706
pgste_set_unlock(ptep, pgste);
707
}
708
709
static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
710
{
711
if (!non_swap_entry(entry))
712
dec_mm_counter(mm, MM_SWAPENTS);
713
else if (is_migration_entry(entry)) {
714
struct folio *folio = pfn_swap_entry_folio(entry);
715
716
dec_mm_counter(mm, mm_counter(folio));
717
}
718
free_swap_and_cache(entry);
719
}
720
721
void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
722
pte_t *ptep, int reset)
723
{
724
unsigned long pgstev;
725
pgste_t pgste;
726
pte_t pte;
727
728
/* Zap unused and logically-zero pages */
729
preempt_disable();
730
pgste = pgste_get_lock(ptep);
731
pgstev = pgste_val(pgste);
732
pte = *ptep;
733
if (!reset && pte_swap(pte) &&
734
((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
735
(pgstev & _PGSTE_GPS_ZERO))) {
736
ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
737
pte_clear(mm, addr, ptep);
738
}
739
if (reset)
740
pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
741
pgste_set_unlock(ptep, pgste);
742
preempt_enable();
743
}
744
745
void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
746
{
747
unsigned long ptev;
748
pgste_t pgste;
749
750
/* Clear storage key ACC and F, but set R/C */
751
preempt_disable();
752
pgste = pgste_get_lock(ptep);
753
pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
754
pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);
755
ptev = pte_val(*ptep);
756
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
757
page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
758
pgste_set_unlock(ptep, pgste);
759
preempt_enable();
760
}
761
762
/*
763
* Test and reset if a guest page is dirty
764
*/
765
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
766
pte_t *ptep)
767
{
768
pgste_t pgste;
769
pte_t pte;
770
bool dirty;
771
int nodat;
772
773
pgste = pgste_get_lock(ptep);
774
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
775
pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);
776
pte = *ptep;
777
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
778
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
779
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
780
ptep_ipte_global(mm, addr, ptep, nodat);
781
if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE))
782
pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
783
else
784
pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
785
set_pte(ptep, pte);
786
}
787
pgste_set_unlock(ptep, pgste);
788
return dirty;
789
}
790
EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
791
792
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
793
unsigned char key, bool nq)
794
{
795
unsigned long keyul, paddr;
796
spinlock_t *ptl;
797
pgste_t old, new;
798
pmd_t *pmdp;
799
pte_t *ptep;
800
801
/*
802
* If we don't have a PTE table and if there is no huge page mapped,
803
* we can ignore attempts to set the key to 0, because it already is 0.
804
*/
805
switch (pmd_lookup(mm, addr, &pmdp)) {
806
case -ENOENT:
807
return key ? -EFAULT : 0;
808
case 0:
809
break;
810
default:
811
return -EFAULT;
812
}
813
again:
814
ptl = pmd_lock(mm, pmdp);
815
if (!pmd_present(*pmdp)) {
816
spin_unlock(ptl);
817
return key ? -EFAULT : 0;
818
}
819
820
if (pmd_leaf(*pmdp)) {
821
paddr = pmd_val(*pmdp) & HPAGE_MASK;
822
paddr |= addr & ~HPAGE_MASK;
823
/*
824
* Huge pmds need quiescing operations, they are
825
* always mapped.
826
*/
827
page_set_storage_key(paddr, key, 1);
828
spin_unlock(ptl);
829
return 0;
830
}
831
spin_unlock(ptl);
832
833
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
834
if (!ptep)
835
goto again;
836
new = old = pgste_get_lock(ptep);
837
new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |
838
PGSTE_ACC_BITS | PGSTE_FP_BIT);
839
keyul = (unsigned long) key;
840
new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);
841
new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
842
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
843
unsigned long bits, skey;
844
845
paddr = pte_val(*ptep) & PAGE_MASK;
846
skey = (unsigned long) page_get_storage_key(paddr);
847
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
848
skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
849
/* Set storage key ACC and FP */
850
page_set_storage_key(paddr, skey, !nq);
851
/* Merge host changed & referenced into pgste */
852
new = set_pgste_bit(new, bits << 52);
853
}
854
/* changing the guest storage key is considered a change of the page */
855
if ((pgste_val(new) ^ pgste_val(old)) &
856
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
857
new = set_pgste_bit(new, PGSTE_UC_BIT);
858
859
pgste_set_unlock(ptep, new);
860
pte_unmap_unlock(ptep, ptl);
861
return 0;
862
}
863
EXPORT_SYMBOL(set_guest_storage_key);
864
865
/*
866
* Conditionally set a guest storage key (handling csske).
867
* oldkey will be updated when either mr or mc is set and a pointer is given.
868
*
869
* Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
870
* storage key was updated and -EFAULT on access errors.
871
*/
872
int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
873
unsigned char key, unsigned char *oldkey,
874
bool nq, bool mr, bool mc)
875
{
876
unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
877
int rc;
878
879
/* we can drop the pgste lock between getting and setting the key */
880
if (mr | mc) {
881
rc = get_guest_storage_key(current->mm, addr, &tmp);
882
if (rc)
883
return rc;
884
if (oldkey)
885
*oldkey = tmp;
886
if (!mr)
887
mask |= _PAGE_REFERENCED;
888
if (!mc)
889
mask |= _PAGE_CHANGED;
890
if (!((tmp ^ key) & mask))
891
return 0;
892
}
893
rc = set_guest_storage_key(current->mm, addr, key, nq);
894
return rc < 0 ? rc : 1;
895
}
896
EXPORT_SYMBOL(cond_set_guest_storage_key);
897
898
/*
899
* Reset a guest reference bit (rrbe), returning the reference and changed bit.
900
*
901
* Returns < 0 in case of error, otherwise the cc to be reported to the guest.
902
*/
903
int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
904
{
905
spinlock_t *ptl;
906
unsigned long paddr;
907
pgste_t old, new;
908
pmd_t *pmdp;
909
pte_t *ptep;
910
int cc = 0;
911
912
/*
913
* If we don't have a PTE table and if there is no huge page mapped,
914
* the storage key is 0 and there is nothing for us to do.
915
*/
916
switch (pmd_lookup(mm, addr, &pmdp)) {
917
case -ENOENT:
918
return 0;
919
case 0:
920
break;
921
default:
922
return -EFAULT;
923
}
924
again:
925
ptl = pmd_lock(mm, pmdp);
926
if (!pmd_present(*pmdp)) {
927
spin_unlock(ptl);
928
return 0;
929
}
930
931
if (pmd_leaf(*pmdp)) {
932
paddr = pmd_val(*pmdp) & HPAGE_MASK;
933
paddr |= addr & ~HPAGE_MASK;
934
cc = page_reset_referenced(paddr);
935
spin_unlock(ptl);
936
return cc;
937
}
938
spin_unlock(ptl);
939
940
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
941
if (!ptep)
942
goto again;
943
new = old = pgste_get_lock(ptep);
944
/* Reset guest reference bit only */
945
new = clear_pgste_bit(new, PGSTE_GR_BIT);
946
947
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
948
paddr = pte_val(*ptep) & PAGE_MASK;
949
cc = page_reset_referenced(paddr);
950
/* Merge real referenced bit into host-set */
951
new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);
952
}
953
/* Reflect guest's logical view, not physical */
954
cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
955
/* Changing the guest storage key is considered a change of the page */
956
if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
957
new = set_pgste_bit(new, PGSTE_UC_BIT);
958
959
pgste_set_unlock(ptep, new);
960
pte_unmap_unlock(ptep, ptl);
961
return cc;
962
}
963
EXPORT_SYMBOL(reset_guest_reference_bit);
964
965
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
966
unsigned char *key)
967
{
968
unsigned long paddr;
969
spinlock_t *ptl;
970
pgste_t pgste;
971
pmd_t *pmdp;
972
pte_t *ptep;
973
974
/*
975
* If we don't have a PTE table and if there is no huge page mapped,
976
* the storage key is 0.
977
*/
978
*key = 0;
979
980
switch (pmd_lookup(mm, addr, &pmdp)) {
981
case -ENOENT:
982
return 0;
983
case 0:
984
break;
985
default:
986
return -EFAULT;
987
}
988
again:
989
ptl = pmd_lock(mm, pmdp);
990
if (!pmd_present(*pmdp)) {
991
spin_unlock(ptl);
992
return 0;
993
}
994
995
if (pmd_leaf(*pmdp)) {
996
paddr = pmd_val(*pmdp) & HPAGE_MASK;
997
paddr |= addr & ~HPAGE_MASK;
998
*key = page_get_storage_key(paddr);
999
spin_unlock(ptl);
1000
return 0;
1001
}
1002
spin_unlock(ptl);
1003
1004
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
1005
if (!ptep)
1006
goto again;
1007
pgste = pgste_get_lock(ptep);
1008
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
1009
paddr = pte_val(*ptep) & PAGE_MASK;
1010
if (!(pte_val(*ptep) & _PAGE_INVALID))
1011
*key = page_get_storage_key(paddr);
1012
/* Reflect guest's logical view, not physical */
1013
*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
1014
pgste_set_unlock(ptep, pgste);
1015
pte_unmap_unlock(ptep, ptl);
1016
return 0;
1017
}
1018
EXPORT_SYMBOL(get_guest_storage_key);
1019
1020
/**
1021
* pgste_perform_essa - perform ESSA actions on the PGSTE.
1022
* @mm: the memory context. It must have PGSTEs, no check is performed here!
1023
* @hva: the host virtual address of the page whose PGSTE is to be processed
1024
* @orc: the specific action to perform, see the ESSA_SET_* macros.
1025
* @oldpte: the PTE will be saved there if the pointer is not NULL.
1026
* @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
1027
*
1028
* Return: 1 if the page is to be added to the CBRL, otherwise 0,
1029
* or < 0 in case of error. -EINVAL is returned for invalid values
1030
* of orc, -EFAULT for invalid addresses.
1031
*/
1032
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
1033
unsigned long *oldpte, unsigned long *oldpgste)
1034
{
1035
struct vm_area_struct *vma;
1036
unsigned long pgstev;
1037
spinlock_t *ptl;
1038
pgste_t pgste;
1039
pte_t *ptep;
1040
int res = 0;
1041
1042
WARN_ON_ONCE(orc > ESSA_MAX);
1043
if (unlikely(orc > ESSA_MAX))
1044
return -EINVAL;
1045
1046
vma = vma_lookup(mm, hva);
1047
if (!vma || is_vm_hugetlb_page(vma))
1048
return -EFAULT;
1049
ptep = get_locked_pte(mm, hva, &ptl);
1050
if (unlikely(!ptep))
1051
return -EFAULT;
1052
pgste = pgste_get_lock(ptep);
1053
pgstev = pgste_val(pgste);
1054
if (oldpte)
1055
*oldpte = pte_val(*ptep);
1056
if (oldpgste)
1057
*oldpgste = pgstev;
1058
1059
switch (orc) {
1060
case ESSA_GET_STATE:
1061
break;
1062
case ESSA_SET_STABLE:
1063
pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
1064
pgstev |= _PGSTE_GPS_USAGE_STABLE;
1065
break;
1066
case ESSA_SET_UNUSED:
1067
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1068
pgstev |= _PGSTE_GPS_USAGE_UNUSED;
1069
if (pte_val(*ptep) & _PAGE_INVALID)
1070
res = 1;
1071
break;
1072
case ESSA_SET_VOLATILE:
1073
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1074
pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1075
if (pte_val(*ptep) & _PAGE_INVALID)
1076
res = 1;
1077
break;
1078
case ESSA_SET_POT_VOLATILE:
1079
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1080
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1081
pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
1082
break;
1083
}
1084
if (pgstev & _PGSTE_GPS_ZERO) {
1085
pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1086
break;
1087
}
1088
if (!(pgstev & PGSTE_GC_BIT)) {
1089
pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1090
res = 1;
1091
break;
1092
}
1093
break;
1094
case ESSA_SET_STABLE_RESIDENT:
1095
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1096
pgstev |= _PGSTE_GPS_USAGE_STABLE;
1097
/*
1098
* Since the resident state can go away any time after this
1099
* call, we will not make this page resident. We can revisit
1100
* this decision if a guest will ever start using this.
1101
*/
1102
break;
1103
case ESSA_SET_STABLE_IF_RESIDENT:
1104
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1105
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1106
pgstev |= _PGSTE_GPS_USAGE_STABLE;
1107
}
1108
break;
1109
case ESSA_SET_STABLE_NODAT:
1110
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1111
pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
1112
break;
1113
default:
1114
/* we should never get here! */
1115
break;
1116
}
1117
/* If we are discarding a page, set it to logical zero */
1118
if (res)
1119
pgstev |= _PGSTE_GPS_ZERO;
1120
1121
pgste = __pgste(pgstev);
1122
pgste_set_unlock(ptep, pgste);
1123
pte_unmap_unlock(ptep, ptl);
1124
return res;
1125
}
1126
EXPORT_SYMBOL(pgste_perform_essa);
1127
1128
/**
1129
* set_pgste_bits - set specific PGSTE bits.
1130
* @mm: the memory context. It must have PGSTEs, no check is performed here!
1131
* @hva: the host virtual address of the page whose PGSTE is to be processed
1132
* @bits: a bitmask representing the bits that will be touched
1133
* @value: the values of the bits to be written. Only the bits in the mask
1134
* will be written.
1135
*
1136
* Return: 0 on success, < 0 in case of error.
1137
*/
1138
int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
1139
unsigned long bits, unsigned long value)
1140
{
1141
struct vm_area_struct *vma;
1142
spinlock_t *ptl;
1143
pgste_t new;
1144
pte_t *ptep;
1145
1146
vma = vma_lookup(mm, hva);
1147
if (!vma || is_vm_hugetlb_page(vma))
1148
return -EFAULT;
1149
ptep = get_locked_pte(mm, hva, &ptl);
1150
if (unlikely(!ptep))
1151
return -EFAULT;
1152
new = pgste_get_lock(ptep);
1153
1154
new = clear_pgste_bit(new, bits);
1155
new = set_pgste_bit(new, value & bits);
1156
1157
pgste_set_unlock(ptep, new);
1158
pte_unmap_unlock(ptep, ptl);
1159
return 0;
1160
}
1161
EXPORT_SYMBOL(set_pgste_bits);
1162
1163
/**
1164
* get_pgste - get the current PGSTE for the given address.
1165
* @mm: the memory context. It must have PGSTEs, no check is performed here!
1166
* @hva: the host virtual address of the page whose PGSTE is to be processed
1167
* @pgstep: will be written with the current PGSTE for the given address.
1168
*
1169
* Return: 0 on success, < 0 in case of error.
1170
*/
1171
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
1172
{
1173
struct vm_area_struct *vma;
1174
spinlock_t *ptl;
1175
pte_t *ptep;
1176
1177
vma = vma_lookup(mm, hva);
1178
if (!vma || is_vm_hugetlb_page(vma))
1179
return -EFAULT;
1180
ptep = get_locked_pte(mm, hva, &ptl);
1181
if (unlikely(!ptep))
1182
return -EFAULT;
1183
*pgstep = pgste_val(pgste_get(ptep));
1184
pte_unmap_unlock(ptep, ptl);
1185
return 0;
1186
}
1187
EXPORT_SYMBOL(get_pgste);
1188
#endif
1189
1190