Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/mm/gmap.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* KVM guest address space mapping code
4
*
5
* Copyright IBM Corp. 2007, 2020
6
* Author(s): Martin Schwidefsky <[email protected]>
7
* David Hildenbrand <[email protected]>
8
* Janosch Frank <[email protected]>
9
*/
10
11
#include <linux/cpufeature.h>
12
#include <linux/export.h>
13
#include <linux/kernel.h>
14
#include <linux/pagewalk.h>
15
#include <linux/swap.h>
16
#include <linux/smp.h>
17
#include <linux/spinlock.h>
18
#include <linux/slab.h>
19
#include <linux/swapops.h>
20
#include <linux/ksm.h>
21
#include <linux/mman.h>
22
#include <linux/pgtable.h>
23
#include <asm/page-states.h>
24
#include <asm/pgalloc.h>
25
#include <asm/machine.h>
26
#include <asm/gmap_helpers.h>
27
#include <asm/gmap.h>
28
#include <asm/page.h>
29
30
/*
31
* The address is saved in a radix tree directly; NULL would be ambiguous,
32
* since 0 is a valid address, and NULL is returned when nothing was found.
33
* The lower bits are ignored by all users of the macro, so it can be used
34
* to distinguish a valid address 0 from a NULL.
35
*/
36
#define VALID_GADDR_FLAG 1
37
#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG)
38
#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG)
39
40
#define GMAP_SHADOW_FAKE_TABLE 1ULL
41
42
static struct page *gmap_alloc_crst(void)
43
{
44
struct page *page;
45
46
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
47
if (!page)
48
return NULL;
49
__arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER);
50
return page;
51
}
52
53
/**
54
* gmap_alloc - allocate and initialize a guest address space
55
* @limit: maximum address of the gmap address space
56
*
57
* Returns a guest address space structure.
58
*/
59
struct gmap *gmap_alloc(unsigned long limit)
60
{
61
struct gmap *gmap;
62
struct page *page;
63
unsigned long *table;
64
unsigned long etype, atype;
65
66
if (limit < _REGION3_SIZE) {
67
limit = _REGION3_SIZE - 1;
68
atype = _ASCE_TYPE_SEGMENT;
69
etype = _SEGMENT_ENTRY_EMPTY;
70
} else if (limit < _REGION2_SIZE) {
71
limit = _REGION2_SIZE - 1;
72
atype = _ASCE_TYPE_REGION3;
73
etype = _REGION3_ENTRY_EMPTY;
74
} else if (limit < _REGION1_SIZE) {
75
limit = _REGION1_SIZE - 1;
76
atype = _ASCE_TYPE_REGION2;
77
etype = _REGION2_ENTRY_EMPTY;
78
} else {
79
limit = -1UL;
80
atype = _ASCE_TYPE_REGION1;
81
etype = _REGION1_ENTRY_EMPTY;
82
}
83
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
84
if (!gmap)
85
goto out;
86
INIT_LIST_HEAD(&gmap->children);
87
INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
88
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
89
INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
90
spin_lock_init(&gmap->guest_table_lock);
91
spin_lock_init(&gmap->shadow_lock);
92
refcount_set(&gmap->ref_count, 1);
93
page = gmap_alloc_crst();
94
if (!page)
95
goto out_free;
96
table = page_to_virt(page);
97
crst_table_init(table, etype);
98
gmap->table = table;
99
gmap->asce = atype | _ASCE_TABLE_LENGTH |
100
_ASCE_USER_BITS | __pa(table);
101
gmap->asce_end = limit;
102
return gmap;
103
104
out_free:
105
kfree(gmap);
106
out:
107
return NULL;
108
}
109
EXPORT_SYMBOL_GPL(gmap_alloc);
110
111
/**
112
* gmap_create - create a guest address space
113
* @mm: pointer to the parent mm_struct
114
* @limit: maximum size of the gmap address space
115
*
116
* Returns a guest address space structure.
117
*/
118
struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
119
{
120
struct gmap *gmap;
121
unsigned long gmap_asce;
122
123
gmap = gmap_alloc(limit);
124
if (!gmap)
125
return NULL;
126
gmap->mm = mm;
127
spin_lock(&mm->context.lock);
128
list_add_rcu(&gmap->list, &mm->context.gmap_list);
129
if (list_is_singular(&mm->context.gmap_list))
130
gmap_asce = gmap->asce;
131
else
132
gmap_asce = -1UL;
133
WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
134
spin_unlock(&mm->context.lock);
135
return gmap;
136
}
137
EXPORT_SYMBOL_GPL(gmap_create);
138
139
static void gmap_flush_tlb(struct gmap *gmap)
140
{
141
if (cpu_has_idte())
142
__tlb_flush_idte(gmap->asce);
143
else
144
__tlb_flush_global();
145
}
146
147
static void gmap_radix_tree_free(struct radix_tree_root *root)
148
{
149
struct radix_tree_iter iter;
150
unsigned long indices[16];
151
unsigned long index;
152
void __rcu **slot;
153
int i, nr;
154
155
/* A radix tree is freed by deleting all of its entries */
156
index = 0;
157
do {
158
nr = 0;
159
radix_tree_for_each_slot(slot, root, &iter, index) {
160
indices[nr] = iter.index;
161
if (++nr == 16)
162
break;
163
}
164
for (i = 0; i < nr; i++) {
165
index = indices[i];
166
radix_tree_delete(root, index);
167
}
168
} while (nr > 0);
169
}
170
171
static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
172
{
173
struct gmap_rmap *rmap, *rnext, *head;
174
struct radix_tree_iter iter;
175
unsigned long indices[16];
176
unsigned long index;
177
void __rcu **slot;
178
int i, nr;
179
180
/* A radix tree is freed by deleting all of its entries */
181
index = 0;
182
do {
183
nr = 0;
184
radix_tree_for_each_slot(slot, root, &iter, index) {
185
indices[nr] = iter.index;
186
if (++nr == 16)
187
break;
188
}
189
for (i = 0; i < nr; i++) {
190
index = indices[i];
191
head = radix_tree_delete(root, index);
192
gmap_for_each_rmap_safe(rmap, rnext, head)
193
kfree(rmap);
194
}
195
} while (nr > 0);
196
}
197
198
static void gmap_free_crst(unsigned long *table, bool free_ptes)
199
{
200
bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
201
int i;
202
203
if (is_segment) {
204
if (!free_ptes)
205
goto out;
206
for (i = 0; i < _CRST_ENTRIES; i++)
207
if (!(table[i] & _SEGMENT_ENTRY_INVALID))
208
page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
209
} else {
210
for (i = 0; i < _CRST_ENTRIES; i++)
211
if (!(table[i] & _REGION_ENTRY_INVALID))
212
gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
213
}
214
215
out:
216
free_pages((unsigned long)table, CRST_ALLOC_ORDER);
217
}
218
219
/**
220
* gmap_free - free a guest address space
221
* @gmap: pointer to the guest address space structure
222
*
223
* No locks required. There are no references to this gmap anymore.
224
*/
225
void gmap_free(struct gmap *gmap)
226
{
227
/* Flush tlb of all gmaps (if not already done for shadows) */
228
if (!(gmap_is_shadow(gmap) && gmap->removed))
229
gmap_flush_tlb(gmap);
230
/* Free all segment & region tables. */
231
gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
232
233
gmap_radix_tree_free(&gmap->guest_to_host);
234
gmap_radix_tree_free(&gmap->host_to_guest);
235
236
/* Free additional data for a shadow gmap */
237
if (gmap_is_shadow(gmap)) {
238
gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
239
/* Release reference to the parent */
240
gmap_put(gmap->parent);
241
}
242
243
kfree(gmap);
244
}
245
EXPORT_SYMBOL_GPL(gmap_free);
246
247
/**
248
* gmap_get - increase reference counter for guest address space
249
* @gmap: pointer to the guest address space structure
250
*
251
* Returns the gmap pointer
252
*/
253
struct gmap *gmap_get(struct gmap *gmap)
254
{
255
refcount_inc(&gmap->ref_count);
256
return gmap;
257
}
258
EXPORT_SYMBOL_GPL(gmap_get);
259
260
/**
261
* gmap_put - decrease reference counter for guest address space
262
* @gmap: pointer to the guest address space structure
263
*
264
* If the reference counter reaches zero the guest address space is freed.
265
*/
266
void gmap_put(struct gmap *gmap)
267
{
268
if (refcount_dec_and_test(&gmap->ref_count))
269
gmap_free(gmap);
270
}
271
EXPORT_SYMBOL_GPL(gmap_put);
272
273
/**
274
* gmap_remove - remove a guest address space but do not free it yet
275
* @gmap: pointer to the guest address space structure
276
*/
277
void gmap_remove(struct gmap *gmap)
278
{
279
struct gmap *sg, *next;
280
unsigned long gmap_asce;
281
282
/* Remove all shadow gmaps linked to this gmap */
283
if (!list_empty(&gmap->children)) {
284
spin_lock(&gmap->shadow_lock);
285
list_for_each_entry_safe(sg, next, &gmap->children, list) {
286
list_del(&sg->list);
287
gmap_put(sg);
288
}
289
spin_unlock(&gmap->shadow_lock);
290
}
291
/* Remove gmap from the pre-mm list */
292
spin_lock(&gmap->mm->context.lock);
293
list_del_rcu(&gmap->list);
294
if (list_empty(&gmap->mm->context.gmap_list))
295
gmap_asce = 0;
296
else if (list_is_singular(&gmap->mm->context.gmap_list))
297
gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,
298
struct gmap, list)->asce;
299
else
300
gmap_asce = -1UL;
301
WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
302
spin_unlock(&gmap->mm->context.lock);
303
synchronize_rcu();
304
/* Put reference */
305
gmap_put(gmap);
306
}
307
EXPORT_SYMBOL_GPL(gmap_remove);
308
309
/*
310
* gmap_alloc_table is assumed to be called with mmap_lock held
311
*/
312
static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
313
unsigned long init, unsigned long gaddr)
314
{
315
struct page *page;
316
unsigned long *new;
317
318
/* since we dont free the gmap table until gmap_free we can unlock */
319
page = gmap_alloc_crst();
320
if (!page)
321
return -ENOMEM;
322
new = page_to_virt(page);
323
crst_table_init(new, init);
324
spin_lock(&gmap->guest_table_lock);
325
if (*table & _REGION_ENTRY_INVALID) {
326
*table = __pa(new) | _REGION_ENTRY_LENGTH |
327
(*table & _REGION_ENTRY_TYPE_MASK);
328
page = NULL;
329
}
330
spin_unlock(&gmap->guest_table_lock);
331
if (page)
332
__free_pages(page, CRST_ALLOC_ORDER);
333
return 0;
334
}
335
336
static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr)
337
{
338
return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
339
}
340
341
static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr)
342
{
343
return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
344
}
345
346
static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr,
347
unsigned long *gaddr)
348
{
349
*gaddr = host_to_guest_delete(gmap, vmaddr);
350
if (IS_GADDR_VALID(*gaddr))
351
return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1);
352
return NULL;
353
}
354
355
/**
356
* __gmap_unlink_by_vmaddr - unlink a single segment via a host address
357
* @gmap: pointer to the guest address space structure
358
* @vmaddr: address in the host process address space
359
*
360
* Returns 1 if a TLB flush is required
361
*/
362
static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
363
{
364
unsigned long gaddr;
365
int flush = 0;
366
pmd_t *pmdp;
367
368
BUG_ON(gmap_is_shadow(gmap));
369
spin_lock(&gmap->guest_table_lock);
370
371
pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
372
if (pmdp) {
373
flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY);
374
*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
375
}
376
377
spin_unlock(&gmap->guest_table_lock);
378
return flush;
379
}
380
381
/**
382
* __gmap_unmap_by_gaddr - unmap a single segment via a guest address
383
* @gmap: pointer to the guest address space structure
384
* @gaddr: address in the guest address space
385
*
386
* Returns 1 if a TLB flush is required
387
*/
388
static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
389
{
390
unsigned long vmaddr;
391
392
vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
393
gaddr >> PMD_SHIFT);
394
return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
395
}
396
397
/**
398
* gmap_unmap_segment - unmap segment from the guest address space
399
* @gmap: pointer to the guest address space structure
400
* @to: address in the guest address space
401
* @len: length of the memory area to unmap
402
*
403
* Returns 0 if the unmap succeeded, -EINVAL if not.
404
*/
405
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
406
{
407
unsigned long off;
408
int flush;
409
410
BUG_ON(gmap_is_shadow(gmap));
411
if ((to | len) & (PMD_SIZE - 1))
412
return -EINVAL;
413
if (len == 0 || to + len < to)
414
return -EINVAL;
415
416
flush = 0;
417
mmap_write_lock(gmap->mm);
418
for (off = 0; off < len; off += PMD_SIZE)
419
flush |= __gmap_unmap_by_gaddr(gmap, to + off);
420
mmap_write_unlock(gmap->mm);
421
if (flush)
422
gmap_flush_tlb(gmap);
423
return 0;
424
}
425
EXPORT_SYMBOL_GPL(gmap_unmap_segment);
426
427
/**
428
* gmap_map_segment - map a segment to the guest address space
429
* @gmap: pointer to the guest address space structure
430
* @from: source address in the parent address space
431
* @to: target address in the guest address space
432
* @len: length of the memory area to map
433
*
434
* Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
435
*/
436
int gmap_map_segment(struct gmap *gmap, unsigned long from,
437
unsigned long to, unsigned long len)
438
{
439
unsigned long off;
440
int flush;
441
442
BUG_ON(gmap_is_shadow(gmap));
443
if ((from | to | len) & (PMD_SIZE - 1))
444
return -EINVAL;
445
if (len == 0 || from + len < from || to + len < to ||
446
from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
447
return -EINVAL;
448
449
flush = 0;
450
mmap_write_lock(gmap->mm);
451
for (off = 0; off < len; off += PMD_SIZE) {
452
/* Remove old translation */
453
flush |= __gmap_unmap_by_gaddr(gmap, to + off);
454
/* Store new translation */
455
if (radix_tree_insert(&gmap->guest_to_host,
456
(to + off) >> PMD_SHIFT,
457
(void *) from + off))
458
break;
459
}
460
mmap_write_unlock(gmap->mm);
461
if (flush)
462
gmap_flush_tlb(gmap);
463
if (off >= len)
464
return 0;
465
gmap_unmap_segment(gmap, to, len);
466
return -ENOMEM;
467
}
468
EXPORT_SYMBOL_GPL(gmap_map_segment);
469
470
/**
471
* __gmap_translate - translate a guest address to a user space address
472
* @gmap: pointer to guest mapping meta data structure
473
* @gaddr: guest address
474
*
475
* Returns user space address which corresponds to the guest address or
476
* -EFAULT if no such mapping exists.
477
* This function does not establish potentially missing page table entries.
478
* The mmap_lock of the mm that belongs to the address space must be held
479
* when this function gets called.
480
*
481
* Note: Can also be called for shadow gmaps.
482
*/
483
unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
484
{
485
unsigned long vmaddr;
486
487
vmaddr = (unsigned long)
488
radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
489
/* Note: guest_to_host is empty for a shadow gmap */
490
return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
491
}
492
EXPORT_SYMBOL_GPL(__gmap_translate);
493
494
/**
495
* gmap_unlink - disconnect a page table from the gmap shadow tables
496
* @mm: pointer to the parent mm_struct
497
* @table: pointer to the host page table
498
* @vmaddr: vm address associated with the host page table
499
*/
500
void gmap_unlink(struct mm_struct *mm, unsigned long *table,
501
unsigned long vmaddr)
502
{
503
struct gmap *gmap;
504
int flush;
505
506
rcu_read_lock();
507
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
508
flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
509
if (flush)
510
gmap_flush_tlb(gmap);
511
}
512
rcu_read_unlock();
513
}
514
515
static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
516
unsigned long gaddr);
517
518
/**
519
* __gmap_link - set up shadow page tables to connect a host to a guest address
520
* @gmap: pointer to guest mapping meta data structure
521
* @gaddr: guest address
522
* @vmaddr: vm address
523
*
524
* Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
525
* if the vm address is already mapped to a different guest segment.
526
* The mmap_lock of the mm that belongs to the address space must be held
527
* when this function gets called.
528
*/
529
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
530
{
531
struct mm_struct *mm;
532
unsigned long *table;
533
spinlock_t *ptl;
534
pgd_t *pgd;
535
p4d_t *p4d;
536
pud_t *pud;
537
pmd_t *pmd;
538
u64 unprot;
539
int rc;
540
541
BUG_ON(gmap_is_shadow(gmap));
542
/* Create higher level tables in the gmap page table */
543
table = gmap->table;
544
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
545
table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
546
if ((*table & _REGION_ENTRY_INVALID) &&
547
gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
548
gaddr & _REGION1_MASK))
549
return -ENOMEM;
550
table = __va(*table & _REGION_ENTRY_ORIGIN);
551
}
552
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
553
table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
554
if ((*table & _REGION_ENTRY_INVALID) &&
555
gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
556
gaddr & _REGION2_MASK))
557
return -ENOMEM;
558
table = __va(*table & _REGION_ENTRY_ORIGIN);
559
}
560
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
561
table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
562
if ((*table & _REGION_ENTRY_INVALID) &&
563
gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
564
gaddr & _REGION3_MASK))
565
return -ENOMEM;
566
table = __va(*table & _REGION_ENTRY_ORIGIN);
567
}
568
table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
569
/* Walk the parent mm page table */
570
mm = gmap->mm;
571
pgd = pgd_offset(mm, vmaddr);
572
VM_BUG_ON(pgd_none(*pgd));
573
p4d = p4d_offset(pgd, vmaddr);
574
VM_BUG_ON(p4d_none(*p4d));
575
pud = pud_offset(p4d, vmaddr);
576
VM_BUG_ON(pud_none(*pud));
577
/* large puds cannot yet be handled */
578
if (pud_leaf(*pud))
579
return -EFAULT;
580
pmd = pmd_offset(pud, vmaddr);
581
VM_BUG_ON(pmd_none(*pmd));
582
/* Are we allowed to use huge pages? */
583
if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
584
return -EFAULT;
585
/* Link gmap segment table entry location to page table. */
586
rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
587
if (rc)
588
return rc;
589
ptl = pmd_lock(mm, pmd);
590
spin_lock(&gmap->guest_table_lock);
591
if (*table == _SEGMENT_ENTRY_EMPTY) {
592
rc = radix_tree_insert(&gmap->host_to_guest,
593
vmaddr >> PMD_SHIFT,
594
(void *)MAKE_VALID_GADDR(gaddr));
595
if (!rc) {
596
if (pmd_leaf(*pmd)) {
597
*table = (pmd_val(*pmd) &
598
_SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
599
| _SEGMENT_ENTRY_GMAP_UC
600
| _SEGMENT_ENTRY;
601
} else
602
*table = pmd_val(*pmd) &
603
_SEGMENT_ENTRY_HARDWARE_BITS;
604
}
605
} else if (*table & _SEGMENT_ENTRY_PROTECT &&
606
!(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
607
unprot = (u64)*table;
608
unprot &= ~_SEGMENT_ENTRY_PROTECT;
609
unprot |= _SEGMENT_ENTRY_GMAP_UC;
610
gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
611
}
612
spin_unlock(&gmap->guest_table_lock);
613
spin_unlock(ptl);
614
radix_tree_preload_end();
615
return rc;
616
}
617
EXPORT_SYMBOL(__gmap_link);
618
619
/*
620
* this function is assumed to be called with mmap_lock held
621
*/
622
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
623
{
624
unsigned long vmaddr;
625
626
mmap_assert_locked(gmap->mm);
627
628
/* Find the vm address for the guest address */
629
vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
630
gaddr >> PMD_SHIFT);
631
if (vmaddr) {
632
vmaddr |= gaddr & ~PMD_MASK;
633
gmap_helper_zap_one_page(gmap->mm, vmaddr);
634
}
635
}
636
EXPORT_SYMBOL_GPL(__gmap_zap);
637
638
static LIST_HEAD(gmap_notifier_list);
639
static DEFINE_SPINLOCK(gmap_notifier_lock);
640
641
/**
642
* gmap_register_pte_notifier - register a pte invalidation callback
643
* @nb: pointer to the gmap notifier block
644
*/
645
void gmap_register_pte_notifier(struct gmap_notifier *nb)
646
{
647
spin_lock(&gmap_notifier_lock);
648
list_add_rcu(&nb->list, &gmap_notifier_list);
649
spin_unlock(&gmap_notifier_lock);
650
}
651
EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);
652
653
/**
654
* gmap_unregister_pte_notifier - remove a pte invalidation callback
655
* @nb: pointer to the gmap notifier block
656
*/
657
void gmap_unregister_pte_notifier(struct gmap_notifier *nb)
658
{
659
spin_lock(&gmap_notifier_lock);
660
list_del_rcu(&nb->list);
661
spin_unlock(&gmap_notifier_lock);
662
synchronize_rcu();
663
}
664
EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);
665
666
/**
667
* gmap_call_notifier - call all registered invalidation callbacks
668
* @gmap: pointer to guest mapping meta data structure
669
* @start: start virtual address in the guest address space
670
* @end: end virtual address in the guest address space
671
*/
672
static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
673
unsigned long end)
674
{
675
struct gmap_notifier *nb;
676
677
list_for_each_entry(nb, &gmap_notifier_list, list)
678
nb->notifier_call(gmap, start, end);
679
}
680
681
/**
682
* gmap_table_walk - walk the gmap page tables
683
* @gmap: pointer to guest mapping meta data structure
684
* @gaddr: virtual address in the guest address space
685
* @level: page table level to stop at
686
*
687
* Returns a table entry pointer for the given guest address and @level
688
* @level=0 : returns a pointer to a page table table entry (or NULL)
689
* @level=1 : returns a pointer to a segment table entry (or NULL)
690
* @level=2 : returns a pointer to a region-3 table entry (or NULL)
691
* @level=3 : returns a pointer to a region-2 table entry (or NULL)
692
* @level=4 : returns a pointer to a region-1 table entry (or NULL)
693
*
694
* Returns NULL if the gmap page tables could not be walked to the
695
* requested level.
696
*
697
* Note: Can also be called for shadow gmaps.
698
*/
699
unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level)
700
{
701
const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
702
unsigned long *table = gmap->table;
703
704
if (gmap_is_shadow(gmap) && gmap->removed)
705
return NULL;
706
707
if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))
708
return NULL;
709
710
if (asce_type != _ASCE_TYPE_REGION1 &&
711
gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
712
return NULL;
713
714
switch (asce_type) {
715
case _ASCE_TYPE_REGION1:
716
table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
717
if (level == 4)
718
break;
719
if (*table & _REGION_ENTRY_INVALID)
720
return NULL;
721
table = __va(*table & _REGION_ENTRY_ORIGIN);
722
fallthrough;
723
case _ASCE_TYPE_REGION2:
724
table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
725
if (level == 3)
726
break;
727
if (*table & _REGION_ENTRY_INVALID)
728
return NULL;
729
table = __va(*table & _REGION_ENTRY_ORIGIN);
730
fallthrough;
731
case _ASCE_TYPE_REGION3:
732
table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
733
if (level == 2)
734
break;
735
if (*table & _REGION_ENTRY_INVALID)
736
return NULL;
737
table = __va(*table & _REGION_ENTRY_ORIGIN);
738
fallthrough;
739
case _ASCE_TYPE_SEGMENT:
740
table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
741
if (level == 1)
742
break;
743
if (*table & _REGION_ENTRY_INVALID)
744
return NULL;
745
table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
746
table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT;
747
}
748
return table;
749
}
750
EXPORT_SYMBOL(gmap_table_walk);
751
752
/**
753
* gmap_pte_op_walk - walk the gmap page table, get the page table lock
754
* and return the pte pointer
755
* @gmap: pointer to guest mapping meta data structure
756
* @gaddr: virtual address in the guest address space
757
* @ptl: pointer to the spinlock pointer
758
*
759
* Returns a pointer to the locked pte for a guest address, or NULL
760
*/
761
static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
762
spinlock_t **ptl)
763
{
764
unsigned long *table;
765
766
BUG_ON(gmap_is_shadow(gmap));
767
/* Walk the gmap page table, lock and get pte pointer */
768
table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
769
if (!table || *table & _SEGMENT_ENTRY_INVALID)
770
return NULL;
771
return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
772
}
773
774
/**
775
* gmap_pte_op_fixup - force a page in and connect the gmap page table
776
* @gmap: pointer to guest mapping meta data structure
777
* @gaddr: virtual address in the guest address space
778
* @vmaddr: address in the host process address space
779
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
780
*
781
* Returns 0 if the caller can retry __gmap_translate (might fail again),
782
* -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
783
* up or connecting the gmap page table.
784
*/
785
static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
786
unsigned long vmaddr, int prot)
787
{
788
struct mm_struct *mm = gmap->mm;
789
unsigned int fault_flags;
790
bool unlocked = false;
791
792
BUG_ON(gmap_is_shadow(gmap));
793
fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
794
if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
795
return -EFAULT;
796
if (unlocked)
797
/* lost mmap_lock, caller has to retry __gmap_translate */
798
return 0;
799
/* Connect the page tables */
800
return __gmap_link(gmap, gaddr, vmaddr);
801
}
802
803
/**
804
* gmap_pte_op_end - release the page table lock
805
* @ptep: pointer to the locked pte
806
* @ptl: pointer to the page table spinlock
807
*/
808
static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)
809
{
810
pte_unmap_unlock(ptep, ptl);
811
}
812
813
/**
814
* gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
815
* and return the pmd pointer
816
* @gmap: pointer to guest mapping meta data structure
817
* @gaddr: virtual address in the guest address space
818
*
819
* Returns a pointer to the pmd for a guest address, or NULL
820
*/
821
static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
822
{
823
pmd_t *pmdp;
824
825
BUG_ON(gmap_is_shadow(gmap));
826
pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
827
if (!pmdp)
828
return NULL;
829
830
/* without huge pages, there is no need to take the table lock */
831
if (!gmap->mm->context.allow_gmap_hpage_1m)
832
return pmd_none(*pmdp) ? NULL : pmdp;
833
834
spin_lock(&gmap->guest_table_lock);
835
if (pmd_none(*pmdp)) {
836
spin_unlock(&gmap->guest_table_lock);
837
return NULL;
838
}
839
840
/* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
841
if (!pmd_leaf(*pmdp))
842
spin_unlock(&gmap->guest_table_lock);
843
return pmdp;
844
}
845
846
/**
847
* gmap_pmd_op_end - release the guest_table_lock if needed
848
* @gmap: pointer to the guest mapping meta data structure
849
* @pmdp: pointer to the pmd
850
*/
851
static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
852
{
853
if (pmd_leaf(*pmdp))
854
spin_unlock(&gmap->guest_table_lock);
855
}
856
857
/*
858
* gmap_protect_pmd - remove access rights to memory and set pmd notification bits
859
* @pmdp: pointer to the pmd to be protected
860
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
861
* @bits: notification bits to set
862
*
863
* Returns:
864
* 0 if successfully protected
865
* -EAGAIN if a fixup is needed
866
* -EINVAL if unsupported notifier bits have been specified
867
*
868
* Expected to be called with sg->mm->mmap_lock in read and
869
* guest_table_lock held.
870
*/
871
static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
872
pmd_t *pmdp, int prot, unsigned long bits)
873
{
874
int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
875
int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
876
pmd_t new = *pmdp;
877
878
/* Fixup needed */
879
if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
880
return -EAGAIN;
881
882
if (prot == PROT_NONE && !pmd_i) {
883
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
884
gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
885
}
886
887
if (prot == PROT_READ && !pmd_p) {
888
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
889
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT));
890
gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
891
}
892
893
if (bits & GMAP_NOTIFY_MPROT)
894
set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
895
896
/* Shadow GMAP protection needs split PMDs */
897
if (bits & GMAP_NOTIFY_SHADOW)
898
return -EINVAL;
899
900
return 0;
901
}
902
903
/*
904
* gmap_protect_pte - remove access rights to memory and set pgste bits
905
* @gmap: pointer to guest mapping meta data structure
906
* @gaddr: virtual address in the guest address space
907
* @pmdp: pointer to the pmd associated with the pte
908
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
909
* @bits: notification bits to set
910
*
911
* Returns 0 if successfully protected, -ENOMEM if out of memory and
912
* -EAGAIN if a fixup is needed.
913
*
914
* Expected to be called with sg->mm->mmap_lock in read
915
*/
916
static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
917
pmd_t *pmdp, int prot, unsigned long bits)
918
{
919
int rc;
920
pte_t *ptep;
921
spinlock_t *ptl;
922
unsigned long pbits = 0;
923
924
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
925
return -EAGAIN;
926
927
ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
928
if (!ptep)
929
return -ENOMEM;
930
931
pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
932
pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
933
/* Protect and unlock. */
934
rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
935
gmap_pte_op_end(ptep, ptl);
936
return rc;
937
}
938
939
/*
940
* gmap_protect_range - remove access rights to memory and set pgste bits
941
* @gmap: pointer to guest mapping meta data structure
942
* @gaddr: virtual address in the guest address space
943
* @len: size of area
944
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
945
* @bits: pgste notification bits to set
946
*
947
* Returns:
948
* PAGE_SIZE if a small page was successfully protected;
949
* HPAGE_SIZE if a large page was successfully protected;
950
* -ENOMEM if out of memory;
951
* -EFAULT if gaddr is invalid (or mapping for shadows is missing);
952
* -EAGAIN if the guest mapping is missing and should be fixed by the caller.
953
*
954
* Context: Called with sg->mm->mmap_lock in read.
955
*/
956
int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits)
957
{
958
pmd_t *pmdp;
959
int rc = 0;
960
961
BUG_ON(gmap_is_shadow(gmap));
962
963
pmdp = gmap_pmd_op_walk(gmap, gaddr);
964
if (!pmdp)
965
return -EAGAIN;
966
967
if (!pmd_leaf(*pmdp)) {
968
rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits);
969
if (!rc)
970
rc = PAGE_SIZE;
971
} else {
972
rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits);
973
if (!rc)
974
rc = HPAGE_SIZE;
975
}
976
gmap_pmd_op_end(gmap, pmdp);
977
978
return rc;
979
}
980
EXPORT_SYMBOL_GPL(gmap_protect_one);
981
982
/**
983
* gmap_read_table - get an unsigned long value from a guest page table using
984
* absolute addressing, without marking the page referenced.
985
* @gmap: pointer to guest mapping meta data structure
986
* @gaddr: virtual address in the guest address space
987
* @val: pointer to the unsigned long value to return
988
*
989
* Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
990
* if reading using the virtual address failed. -EINVAL if called on a gmap
991
* shadow.
992
*
993
* Called with gmap->mm->mmap_lock in read.
994
*/
995
int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
996
{
997
unsigned long address, vmaddr;
998
spinlock_t *ptl;
999
pte_t *ptep, pte;
1000
int rc;
1001
1002
if (gmap_is_shadow(gmap))
1003
return -EINVAL;
1004
1005
while (1) {
1006
rc = -EAGAIN;
1007
ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
1008
if (ptep) {
1009
pte = *ptep;
1010
if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
1011
address = pte_val(pte) & PAGE_MASK;
1012
address += gaddr & ~PAGE_MASK;
1013
*val = *(unsigned long *)__va(address);
1014
set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));
1015
/* Do *NOT* clear the _PAGE_INVALID bit! */
1016
rc = 0;
1017
}
1018
gmap_pte_op_end(ptep, ptl);
1019
}
1020
if (!rc)
1021
break;
1022
vmaddr = __gmap_translate(gmap, gaddr);
1023
if (IS_ERR_VALUE(vmaddr)) {
1024
rc = vmaddr;
1025
break;
1026
}
1027
rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ);
1028
if (rc)
1029
break;
1030
}
1031
return rc;
1032
}
1033
EXPORT_SYMBOL_GPL(gmap_read_table);
1034
1035
/**
1036
* gmap_insert_rmap - add a rmap to the host_to_rmap radix tree
1037
* @sg: pointer to the shadow guest address space structure
1038
* @vmaddr: vm address associated with the rmap
1039
* @rmap: pointer to the rmap structure
1040
*
1041
* Called with the sg->guest_table_lock
1042
*/
1043
static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
1044
struct gmap_rmap *rmap)
1045
{
1046
struct gmap_rmap *temp;
1047
void __rcu **slot;
1048
1049
BUG_ON(!gmap_is_shadow(sg));
1050
slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
1051
if (slot) {
1052
rmap->next = radix_tree_deref_slot_protected(slot,
1053
&sg->guest_table_lock);
1054
for (temp = rmap->next; temp; temp = temp->next) {
1055
if (temp->raddr == rmap->raddr) {
1056
kfree(rmap);
1057
return;
1058
}
1059
}
1060
radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
1061
} else {
1062
rmap->next = NULL;
1063
radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
1064
rmap);
1065
}
1066
}
1067
1068
/**
1069
* gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
1070
* @sg: pointer to the shadow guest address space structure
1071
* @raddr: rmap address in the shadow gmap
1072
* @paddr: address in the parent guest address space
1073
* @len: length of the memory area to protect
1074
*
1075
* Returns 0 if successfully protected and the rmap was created, -ENOMEM
1076
* if out of memory and -EFAULT if paddr is invalid.
1077
*/
1078
static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
1079
unsigned long paddr, unsigned long len)
1080
{
1081
struct gmap *parent;
1082
struct gmap_rmap *rmap;
1083
unsigned long vmaddr;
1084
spinlock_t *ptl;
1085
pte_t *ptep;
1086
int rc;
1087
1088
BUG_ON(!gmap_is_shadow(sg));
1089
parent = sg->parent;
1090
while (len) {
1091
vmaddr = __gmap_translate(parent, paddr);
1092
if (IS_ERR_VALUE(vmaddr))
1093
return vmaddr;
1094
rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
1095
if (!rmap)
1096
return -ENOMEM;
1097
rmap->raddr = raddr;
1098
rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
1099
if (rc) {
1100
kfree(rmap);
1101
return rc;
1102
}
1103
rc = -EAGAIN;
1104
ptep = gmap_pte_op_walk(parent, paddr, &ptl);
1105
if (ptep) {
1106
spin_lock(&sg->guest_table_lock);
1107
rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
1108
PGSTE_VSIE_BIT);
1109
if (!rc)
1110
gmap_insert_rmap(sg, vmaddr, rmap);
1111
spin_unlock(&sg->guest_table_lock);
1112
gmap_pte_op_end(ptep, ptl);
1113
}
1114
radix_tree_preload_end();
1115
if (rc) {
1116
kfree(rmap);
1117
rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
1118
if (rc)
1119
return rc;
1120
continue;
1121
}
1122
paddr += PAGE_SIZE;
1123
len -= PAGE_SIZE;
1124
}
1125
return 0;
1126
}
1127
1128
#define _SHADOW_RMAP_MASK 0x7
1129
#define _SHADOW_RMAP_REGION1 0x5
1130
#define _SHADOW_RMAP_REGION2 0x4
1131
#define _SHADOW_RMAP_REGION3 0x3
1132
#define _SHADOW_RMAP_SEGMENT 0x2
1133
#define _SHADOW_RMAP_PGTABLE 0x1
1134
1135
/**
1136
* gmap_idte_one - invalidate a single region or segment table entry
1137
* @asce: region or segment table *origin* + table-type bits
1138
* @vaddr: virtual address to identify the table entry to flush
1139
*
1140
* The invalid bit of a single region or segment table entry is set
1141
* and the associated TLB entries depending on the entry are flushed.
1142
* The table-type of the @asce identifies the portion of the @vaddr
1143
* that is used as the invalidation index.
1144
*/
1145
static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr)
1146
{
1147
asm volatile(
1148
" idte %0,0,%1"
1149
: : "a" (asce), "a" (vaddr) : "cc", "memory");
1150
}
1151
1152
/**
1153
* gmap_unshadow_page - remove a page from a shadow page table
1154
* @sg: pointer to the shadow guest address space structure
1155
* @raddr: rmap address in the shadow guest address space
1156
*
1157
* Called with the sg->guest_table_lock
1158
*/
1159
static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
1160
{
1161
unsigned long *table;
1162
1163
BUG_ON(!gmap_is_shadow(sg));
1164
table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
1165
if (!table || *table & _PAGE_INVALID)
1166
return;
1167
gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1);
1168
ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
1169
}
1170
1171
/**
1172
* __gmap_unshadow_pgt - remove all entries from a shadow page table
1173
* @sg: pointer to the shadow guest address space structure
1174
* @raddr: rmap address in the shadow guest address space
1175
* @pgt: pointer to the start of a shadow page table
1176
*
1177
* Called with the sg->guest_table_lock
1178
*/
1179
static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
1180
unsigned long *pgt)
1181
{
1182
int i;
1183
1184
BUG_ON(!gmap_is_shadow(sg));
1185
for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE)
1186
pgt[i] = _PAGE_INVALID;
1187
}
1188
1189
/**
1190
* gmap_unshadow_pgt - remove a shadow page table from a segment entry
1191
* @sg: pointer to the shadow guest address space structure
1192
* @raddr: address in the shadow guest address space
1193
*
1194
* Called with the sg->guest_table_lock
1195
*/
1196
static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
1197
{
1198
unsigned long *ste;
1199
phys_addr_t sto, pgt;
1200
struct ptdesc *ptdesc;
1201
1202
BUG_ON(!gmap_is_shadow(sg));
1203
ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
1204
if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
1205
return;
1206
gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
1207
sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
1208
gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
1209
pgt = *ste & _SEGMENT_ENTRY_ORIGIN;
1210
*ste = _SEGMENT_ENTRY_EMPTY;
1211
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
1212
/* Free page table */
1213
ptdesc = page_ptdesc(phys_to_page(pgt));
1214
page_table_free_pgste(ptdesc);
1215
}
1216
1217
/**
1218
* __gmap_unshadow_sgt - remove all entries from a shadow segment table
1219
* @sg: pointer to the shadow guest address space structure
1220
* @raddr: rmap address in the shadow guest address space
1221
* @sgt: pointer to the start of a shadow segment table
1222
*
1223
* Called with the sg->guest_table_lock
1224
*/
1225
static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
1226
unsigned long *sgt)
1227
{
1228
struct ptdesc *ptdesc;
1229
phys_addr_t pgt;
1230
int i;
1231
1232
BUG_ON(!gmap_is_shadow(sg));
1233
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
1234
if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
1235
continue;
1236
pgt = sgt[i] & _REGION_ENTRY_ORIGIN;
1237
sgt[i] = _SEGMENT_ENTRY_EMPTY;
1238
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
1239
/* Free page table */
1240
ptdesc = page_ptdesc(phys_to_page(pgt));
1241
page_table_free_pgste(ptdesc);
1242
}
1243
}
1244
1245
/**
1246
* gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry
1247
* @sg: pointer to the shadow guest address space structure
1248
* @raddr: rmap address in the shadow guest address space
1249
*
1250
* Called with the shadow->guest_table_lock
1251
*/
1252
static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
1253
{
1254
unsigned long r3o, *r3e;
1255
phys_addr_t sgt;
1256
struct page *page;
1257
1258
BUG_ON(!gmap_is_shadow(sg));
1259
r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
1260
if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
1261
return;
1262
gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
1263
r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
1264
gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr);
1265
sgt = *r3e & _REGION_ENTRY_ORIGIN;
1266
*r3e = _REGION3_ENTRY_EMPTY;
1267
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
1268
/* Free segment table */
1269
page = phys_to_page(sgt);
1270
__free_pages(page, CRST_ALLOC_ORDER);
1271
}
1272
1273
/**
1274
* __gmap_unshadow_r3t - remove all entries from a shadow region-3 table
1275
* @sg: pointer to the shadow guest address space structure
1276
* @raddr: address in the shadow guest address space
1277
* @r3t: pointer to the start of a shadow region-3 table
1278
*
1279
* Called with the sg->guest_table_lock
1280
*/
1281
static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
1282
unsigned long *r3t)
1283
{
1284
struct page *page;
1285
phys_addr_t sgt;
1286
int i;
1287
1288
BUG_ON(!gmap_is_shadow(sg));
1289
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
1290
if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
1291
continue;
1292
sgt = r3t[i] & _REGION_ENTRY_ORIGIN;
1293
r3t[i] = _REGION3_ENTRY_EMPTY;
1294
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
1295
/* Free segment table */
1296
page = phys_to_page(sgt);
1297
__free_pages(page, CRST_ALLOC_ORDER);
1298
}
1299
}
1300
1301
/**
1302
* gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry
1303
* @sg: pointer to the shadow guest address space structure
1304
* @raddr: rmap address in the shadow guest address space
1305
*
1306
* Called with the sg->guest_table_lock
1307
*/
1308
static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
1309
{
1310
unsigned long r2o, *r2e;
1311
phys_addr_t r3t;
1312
struct page *page;
1313
1314
BUG_ON(!gmap_is_shadow(sg));
1315
r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
1316
if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
1317
return;
1318
gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
1319
r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
1320
gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr);
1321
r3t = *r2e & _REGION_ENTRY_ORIGIN;
1322
*r2e = _REGION2_ENTRY_EMPTY;
1323
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
1324
/* Free region 3 table */
1325
page = phys_to_page(r3t);
1326
__free_pages(page, CRST_ALLOC_ORDER);
1327
}
1328
1329
/**
1330
* __gmap_unshadow_r2t - remove all entries from a shadow region-2 table
1331
* @sg: pointer to the shadow guest address space structure
1332
* @raddr: rmap address in the shadow guest address space
1333
* @r2t: pointer to the start of a shadow region-2 table
1334
*
1335
* Called with the sg->guest_table_lock
1336
*/
1337
static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
1338
unsigned long *r2t)
1339
{
1340
phys_addr_t r3t;
1341
struct page *page;
1342
int i;
1343
1344
BUG_ON(!gmap_is_shadow(sg));
1345
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
1346
if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
1347
continue;
1348
r3t = r2t[i] & _REGION_ENTRY_ORIGIN;
1349
r2t[i] = _REGION2_ENTRY_EMPTY;
1350
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
1351
/* Free region 3 table */
1352
page = phys_to_page(r3t);
1353
__free_pages(page, CRST_ALLOC_ORDER);
1354
}
1355
}
1356
1357
/**
1358
* gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry
1359
* @sg: pointer to the shadow guest address space structure
1360
* @raddr: rmap address in the shadow guest address space
1361
*
1362
* Called with the sg->guest_table_lock
1363
*/
1364
static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
1365
{
1366
unsigned long r1o, *r1e;
1367
struct page *page;
1368
phys_addr_t r2t;
1369
1370
BUG_ON(!gmap_is_shadow(sg));
1371
r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
1372
if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
1373
return;
1374
gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
1375
r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
1376
gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr);
1377
r2t = *r1e & _REGION_ENTRY_ORIGIN;
1378
*r1e = _REGION1_ENTRY_EMPTY;
1379
__gmap_unshadow_r2t(sg, raddr, __va(r2t));
1380
/* Free region 2 table */
1381
page = phys_to_page(r2t);
1382
__free_pages(page, CRST_ALLOC_ORDER);
1383
}
1384
1385
/**
1386
* __gmap_unshadow_r1t - remove all entries from a shadow region-1 table
1387
* @sg: pointer to the shadow guest address space structure
1388
* @raddr: rmap address in the shadow guest address space
1389
* @r1t: pointer to the start of a shadow region-1 table
1390
*
1391
* Called with the shadow->guest_table_lock
1392
*/
1393
static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
1394
unsigned long *r1t)
1395
{
1396
unsigned long asce;
1397
struct page *page;
1398
phys_addr_t r2t;
1399
int i;
1400
1401
BUG_ON(!gmap_is_shadow(sg));
1402
asce = __pa(r1t) | _ASCE_TYPE_REGION1;
1403
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
1404
if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
1405
continue;
1406
r2t = r1t[i] & _REGION_ENTRY_ORIGIN;
1407
__gmap_unshadow_r2t(sg, raddr, __va(r2t));
1408
/* Clear entry and flush translation r1t -> r2t */
1409
gmap_idte_one(asce, raddr);
1410
r1t[i] = _REGION1_ENTRY_EMPTY;
1411
/* Free region 2 table */
1412
page = phys_to_page(r2t);
1413
__free_pages(page, CRST_ALLOC_ORDER);
1414
}
1415
}
1416
1417
/**
1418
* gmap_unshadow - remove a shadow page table completely
1419
* @sg: pointer to the shadow guest address space structure
1420
*
1421
* Called with sg->guest_table_lock
1422
*/
1423
void gmap_unshadow(struct gmap *sg)
1424
{
1425
unsigned long *table;
1426
1427
BUG_ON(!gmap_is_shadow(sg));
1428
if (sg->removed)
1429
return;
1430
sg->removed = 1;
1431
gmap_call_notifier(sg, 0, -1UL);
1432
gmap_flush_tlb(sg);
1433
table = __va(sg->asce & _ASCE_ORIGIN);
1434
switch (sg->asce & _ASCE_TYPE_MASK) {
1435
case _ASCE_TYPE_REGION1:
1436
__gmap_unshadow_r1t(sg, 0, table);
1437
break;
1438
case _ASCE_TYPE_REGION2:
1439
__gmap_unshadow_r2t(sg, 0, table);
1440
break;
1441
case _ASCE_TYPE_REGION3:
1442
__gmap_unshadow_r3t(sg, 0, table);
1443
break;
1444
case _ASCE_TYPE_SEGMENT:
1445
__gmap_unshadow_sgt(sg, 0, table);
1446
break;
1447
}
1448
}
1449
EXPORT_SYMBOL(gmap_unshadow);
1450
1451
/**
1452
* gmap_shadow_r2t - create an empty shadow region 2 table
1453
* @sg: pointer to the shadow guest address space structure
1454
* @saddr: faulting address in the shadow gmap
1455
* @r2t: parent gmap address of the region 2 table to get shadowed
1456
* @fake: r2t references contiguous guest memory block, not a r2t
1457
*
1458
* The r2t parameter specifies the address of the source table. The
1459
* four pages of the source table are made read-only in the parent gmap
1460
* address space. A write to the source table area @r2t will automatically
1461
* remove the shadow r2 table and all of its descendants.
1462
*
1463
* Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1464
* shadow table structure is incomplete, -ENOMEM if out of memory and
1465
* -EFAULT if an address in the parent gmap could not be resolved.
1466
*
1467
* Called with sg->mm->mmap_lock in read.
1468
*/
1469
int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
1470
int fake)
1471
{
1472
unsigned long raddr, origin, offset, len;
1473
unsigned long *table;
1474
phys_addr_t s_r2t;
1475
struct page *page;
1476
int rc;
1477
1478
BUG_ON(!gmap_is_shadow(sg));
1479
/* Allocate a shadow region second table */
1480
page = gmap_alloc_crst();
1481
if (!page)
1482
return -ENOMEM;
1483
s_r2t = page_to_phys(page);
1484
/* Install shadow region second table */
1485
spin_lock(&sg->guest_table_lock);
1486
table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
1487
if (!table) {
1488
rc = -EAGAIN; /* Race with unshadow */
1489
goto out_free;
1490
}
1491
if (!(*table & _REGION_ENTRY_INVALID)) {
1492
rc = 0; /* Already established */
1493
goto out_free;
1494
} else if (*table & _REGION_ENTRY_ORIGIN) {
1495
rc = -EAGAIN; /* Race with shadow */
1496
goto out_free;
1497
}
1498
crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY);
1499
/* mark as invalid as long as the parent table is not protected */
1500
*table = s_r2t | _REGION_ENTRY_LENGTH |
1501
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
1502
if (sg->edat_level >= 1)
1503
*table |= (r2t & _REGION_ENTRY_PROTECT);
1504
if (fake) {
1505
/* nothing to protect for fake tables */
1506
*table &= ~_REGION_ENTRY_INVALID;
1507
spin_unlock(&sg->guest_table_lock);
1508
return 0;
1509
}
1510
spin_unlock(&sg->guest_table_lock);
1511
/* Make r2t read-only in parent gmap page table */
1512
raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
1513
origin = r2t & _REGION_ENTRY_ORIGIN;
1514
offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1515
len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
1516
rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
1517
spin_lock(&sg->guest_table_lock);
1518
if (!rc) {
1519
table = gmap_table_walk(sg, saddr, 4);
1520
if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t)
1521
rc = -EAGAIN; /* Race with unshadow */
1522
else
1523
*table &= ~_REGION_ENTRY_INVALID;
1524
} else {
1525
gmap_unshadow_r2t(sg, raddr);
1526
}
1527
spin_unlock(&sg->guest_table_lock);
1528
return rc;
1529
out_free:
1530
spin_unlock(&sg->guest_table_lock);
1531
__free_pages(page, CRST_ALLOC_ORDER);
1532
return rc;
1533
}
1534
EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
1535
1536
/**
1537
* gmap_shadow_r3t - create a shadow region 3 table
1538
* @sg: pointer to the shadow guest address space structure
1539
* @saddr: faulting address in the shadow gmap
1540
* @r3t: parent gmap address of the region 3 table to get shadowed
1541
* @fake: r3t references contiguous guest memory block, not a r3t
1542
*
1543
* Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1544
* shadow table structure is incomplete, -ENOMEM if out of memory and
1545
* -EFAULT if an address in the parent gmap could not be resolved.
1546
*
1547
* Called with sg->mm->mmap_lock in read.
1548
*/
1549
int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
1550
int fake)
1551
{
1552
unsigned long raddr, origin, offset, len;
1553
unsigned long *table;
1554
phys_addr_t s_r3t;
1555
struct page *page;
1556
int rc;
1557
1558
BUG_ON(!gmap_is_shadow(sg));
1559
/* Allocate a shadow region second table */
1560
page = gmap_alloc_crst();
1561
if (!page)
1562
return -ENOMEM;
1563
s_r3t = page_to_phys(page);
1564
/* Install shadow region second table */
1565
spin_lock(&sg->guest_table_lock);
1566
table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
1567
if (!table) {
1568
rc = -EAGAIN; /* Race with unshadow */
1569
goto out_free;
1570
}
1571
if (!(*table & _REGION_ENTRY_INVALID)) {
1572
rc = 0; /* Already established */
1573
goto out_free;
1574
} else if (*table & _REGION_ENTRY_ORIGIN) {
1575
rc = -EAGAIN; /* Race with shadow */
1576
goto out_free;
1577
}
1578
crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY);
1579
/* mark as invalid as long as the parent table is not protected */
1580
*table = s_r3t | _REGION_ENTRY_LENGTH |
1581
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
1582
if (sg->edat_level >= 1)
1583
*table |= (r3t & _REGION_ENTRY_PROTECT);
1584
if (fake) {
1585
/* nothing to protect for fake tables */
1586
*table &= ~_REGION_ENTRY_INVALID;
1587
spin_unlock(&sg->guest_table_lock);
1588
return 0;
1589
}
1590
spin_unlock(&sg->guest_table_lock);
1591
/* Make r3t read-only in parent gmap page table */
1592
raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
1593
origin = r3t & _REGION_ENTRY_ORIGIN;
1594
offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1595
len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
1596
rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
1597
spin_lock(&sg->guest_table_lock);
1598
if (!rc) {
1599
table = gmap_table_walk(sg, saddr, 3);
1600
if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t)
1601
rc = -EAGAIN; /* Race with unshadow */
1602
else
1603
*table &= ~_REGION_ENTRY_INVALID;
1604
} else {
1605
gmap_unshadow_r3t(sg, raddr);
1606
}
1607
spin_unlock(&sg->guest_table_lock);
1608
return rc;
1609
out_free:
1610
spin_unlock(&sg->guest_table_lock);
1611
__free_pages(page, CRST_ALLOC_ORDER);
1612
return rc;
1613
}
1614
EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
1615
1616
/**
1617
* gmap_shadow_sgt - create a shadow segment table
1618
* @sg: pointer to the shadow guest address space structure
1619
* @saddr: faulting address in the shadow gmap
1620
* @sgt: parent gmap address of the segment table to get shadowed
1621
* @fake: sgt references contiguous guest memory block, not a sgt
1622
*
1623
* Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the
1624
* shadow table structure is incomplete, -ENOMEM if out of memory and
1625
* -EFAULT if an address in the parent gmap could not be resolved.
1626
*
1627
* Called with sg->mm->mmap_lock in read.
1628
*/
1629
int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
1630
int fake)
1631
{
1632
unsigned long raddr, origin, offset, len;
1633
unsigned long *table;
1634
phys_addr_t s_sgt;
1635
struct page *page;
1636
int rc;
1637
1638
BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
1639
/* Allocate a shadow segment table */
1640
page = gmap_alloc_crst();
1641
if (!page)
1642
return -ENOMEM;
1643
s_sgt = page_to_phys(page);
1644
/* Install shadow region second table */
1645
spin_lock(&sg->guest_table_lock);
1646
table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
1647
if (!table) {
1648
rc = -EAGAIN; /* Race with unshadow */
1649
goto out_free;
1650
}
1651
if (!(*table & _REGION_ENTRY_INVALID)) {
1652
rc = 0; /* Already established */
1653
goto out_free;
1654
} else if (*table & _REGION_ENTRY_ORIGIN) {
1655
rc = -EAGAIN; /* Race with shadow */
1656
goto out_free;
1657
}
1658
crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY);
1659
/* mark as invalid as long as the parent table is not protected */
1660
*table = s_sgt | _REGION_ENTRY_LENGTH |
1661
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
1662
if (sg->edat_level >= 1)
1663
*table |= sgt & _REGION_ENTRY_PROTECT;
1664
if (fake) {
1665
/* nothing to protect for fake tables */
1666
*table &= ~_REGION_ENTRY_INVALID;
1667
spin_unlock(&sg->guest_table_lock);
1668
return 0;
1669
}
1670
spin_unlock(&sg->guest_table_lock);
1671
/* Make sgt read-only in parent gmap page table */
1672
raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
1673
origin = sgt & _REGION_ENTRY_ORIGIN;
1674
offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1675
len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
1676
rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
1677
spin_lock(&sg->guest_table_lock);
1678
if (!rc) {
1679
table = gmap_table_walk(sg, saddr, 2);
1680
if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt)
1681
rc = -EAGAIN; /* Race with unshadow */
1682
else
1683
*table &= ~_REGION_ENTRY_INVALID;
1684
} else {
1685
gmap_unshadow_sgt(sg, raddr);
1686
}
1687
spin_unlock(&sg->guest_table_lock);
1688
return rc;
1689
out_free:
1690
spin_unlock(&sg->guest_table_lock);
1691
__free_pages(page, CRST_ALLOC_ORDER);
1692
return rc;
1693
}
1694
EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
1695
1696
static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr)
1697
{
1698
unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc));
1699
1700
pgstes += _PAGE_ENTRIES;
1701
1702
pgstes[0] &= ~PGSTE_ST2_MASK;
1703
pgstes[1] &= ~PGSTE_ST2_MASK;
1704
pgstes[2] &= ~PGSTE_ST2_MASK;
1705
pgstes[3] &= ~PGSTE_ST2_MASK;
1706
1707
pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK;
1708
pgstes[1] |= pgt_addr & PGSTE_ST2_MASK;
1709
pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK;
1710
pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK;
1711
}
1712
1713
/**
1714
* gmap_shadow_pgt - instantiate a shadow page table
1715
* @sg: pointer to the shadow guest address space structure
1716
* @saddr: faulting address in the shadow gmap
1717
* @pgt: parent gmap address of the page table to get shadowed
1718
* @fake: pgt references contiguous guest memory block, not a pgtable
1719
*
1720
* Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1721
* shadow table structure is incomplete, -ENOMEM if out of memory,
1722
* -EFAULT if an address in the parent gmap could not be resolved and
1723
*
1724
* Called with gmap->mm->mmap_lock in read
1725
*/
1726
int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
1727
int fake)
1728
{
1729
unsigned long raddr, origin;
1730
unsigned long *table;
1731
struct ptdesc *ptdesc;
1732
phys_addr_t s_pgt;
1733
int rc;
1734
1735
BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
1736
/* Allocate a shadow page table */
1737
ptdesc = page_table_alloc_pgste(sg->mm);
1738
if (!ptdesc)
1739
return -ENOMEM;
1740
origin = pgt & _SEGMENT_ENTRY_ORIGIN;
1741
if (fake)
1742
origin |= GMAP_SHADOW_FAKE_TABLE;
1743
gmap_pgste_set_pgt_addr(ptdesc, origin);
1744
s_pgt = page_to_phys(ptdesc_page(ptdesc));
1745
/* Install shadow page table */
1746
spin_lock(&sg->guest_table_lock);
1747
table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
1748
if (!table) {
1749
rc = -EAGAIN; /* Race with unshadow */
1750
goto out_free;
1751
}
1752
if (!(*table & _SEGMENT_ENTRY_INVALID)) {
1753
rc = 0; /* Already established */
1754
goto out_free;
1755
} else if (*table & _SEGMENT_ENTRY_ORIGIN) {
1756
rc = -EAGAIN; /* Race with shadow */
1757
goto out_free;
1758
}
1759
/* mark as invalid as long as the parent table is not protected */
1760
*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
1761
(pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
1762
if (fake) {
1763
/* nothing to protect for fake tables */
1764
*table &= ~_SEGMENT_ENTRY_INVALID;
1765
spin_unlock(&sg->guest_table_lock);
1766
return 0;
1767
}
1768
spin_unlock(&sg->guest_table_lock);
1769
/* Make pgt read-only in parent gmap page table (not the pgste) */
1770
raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
1771
origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
1772
rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
1773
spin_lock(&sg->guest_table_lock);
1774
if (!rc) {
1775
table = gmap_table_walk(sg, saddr, 1);
1776
if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt)
1777
rc = -EAGAIN; /* Race with unshadow */
1778
else
1779
*table &= ~_SEGMENT_ENTRY_INVALID;
1780
} else {
1781
gmap_unshadow_pgt(sg, raddr);
1782
}
1783
spin_unlock(&sg->guest_table_lock);
1784
return rc;
1785
out_free:
1786
spin_unlock(&sg->guest_table_lock);
1787
page_table_free_pgste(ptdesc);
1788
return rc;
1789
1790
}
1791
EXPORT_SYMBOL_GPL(gmap_shadow_pgt);
1792
1793
/**
1794
* gmap_shadow_page - create a shadow page mapping
1795
* @sg: pointer to the shadow guest address space structure
1796
* @saddr: faulting address in the shadow gmap
1797
* @pte: pte in parent gmap address space to get shadowed
1798
*
1799
* Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1800
* shadow table structure is incomplete, -ENOMEM if out of memory and
1801
* -EFAULT if an address in the parent gmap could not be resolved.
1802
*
1803
* Called with sg->mm->mmap_lock in read.
1804
*/
1805
int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
1806
{
1807
struct gmap *parent;
1808
struct gmap_rmap *rmap;
1809
unsigned long vmaddr, paddr;
1810
spinlock_t *ptl;
1811
pte_t *sptep, *tptep;
1812
int prot;
1813
int rc;
1814
1815
BUG_ON(!gmap_is_shadow(sg));
1816
parent = sg->parent;
1817
prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;
1818
1819
rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
1820
if (!rmap)
1821
return -ENOMEM;
1822
rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;
1823
1824
while (1) {
1825
paddr = pte_val(pte) & PAGE_MASK;
1826
vmaddr = __gmap_translate(parent, paddr);
1827
if (IS_ERR_VALUE(vmaddr)) {
1828
rc = vmaddr;
1829
break;
1830
}
1831
rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
1832
if (rc)
1833
break;
1834
rc = -EAGAIN;
1835
sptep = gmap_pte_op_walk(parent, paddr, &ptl);
1836
if (sptep) {
1837
spin_lock(&sg->guest_table_lock);
1838
/* Get page table pointer */
1839
tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
1840
if (!tptep) {
1841
spin_unlock(&sg->guest_table_lock);
1842
gmap_pte_op_end(sptep, ptl);
1843
radix_tree_preload_end();
1844
break;
1845
}
1846
rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte);
1847
if (rc > 0) {
1848
/* Success and a new mapping */
1849
gmap_insert_rmap(sg, vmaddr, rmap);
1850
rmap = NULL;
1851
rc = 0;
1852
}
1853
gmap_pte_op_end(sptep, ptl);
1854
spin_unlock(&sg->guest_table_lock);
1855
}
1856
radix_tree_preload_end();
1857
if (!rc)
1858
break;
1859
rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
1860
if (rc)
1861
break;
1862
}
1863
kfree(rmap);
1864
return rc;
1865
}
1866
EXPORT_SYMBOL_GPL(gmap_shadow_page);
1867
1868
/*
1869
* gmap_shadow_notify - handle notifications for shadow gmap
1870
*
1871
* Called with sg->parent->shadow_lock.
1872
*/
1873
static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
1874
unsigned long gaddr)
1875
{
1876
struct gmap_rmap *rmap, *rnext, *head;
1877
unsigned long start, end, bits, raddr;
1878
1879
BUG_ON(!gmap_is_shadow(sg));
1880
1881
spin_lock(&sg->guest_table_lock);
1882
if (sg->removed) {
1883
spin_unlock(&sg->guest_table_lock);
1884
return;
1885
}
1886
/* Check for top level table */
1887
start = sg->orig_asce & _ASCE_ORIGIN;
1888
end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
1889
if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
1890
gaddr < end) {
1891
/* The complete shadow table has to go */
1892
gmap_unshadow(sg);
1893
spin_unlock(&sg->guest_table_lock);
1894
list_del(&sg->list);
1895
gmap_put(sg);
1896
return;
1897
}
1898
/* Remove the page table tree from on specific entry */
1899
head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
1900
gmap_for_each_rmap_safe(rmap, rnext, head) {
1901
bits = rmap->raddr & _SHADOW_RMAP_MASK;
1902
raddr = rmap->raddr ^ bits;
1903
switch (bits) {
1904
case _SHADOW_RMAP_REGION1:
1905
gmap_unshadow_r2t(sg, raddr);
1906
break;
1907
case _SHADOW_RMAP_REGION2:
1908
gmap_unshadow_r3t(sg, raddr);
1909
break;
1910
case _SHADOW_RMAP_REGION3:
1911
gmap_unshadow_sgt(sg, raddr);
1912
break;
1913
case _SHADOW_RMAP_SEGMENT:
1914
gmap_unshadow_pgt(sg, raddr);
1915
break;
1916
case _SHADOW_RMAP_PGTABLE:
1917
gmap_unshadow_page(sg, raddr);
1918
break;
1919
}
1920
kfree(rmap);
1921
}
1922
spin_unlock(&sg->guest_table_lock);
1923
}
1924
1925
/**
1926
* ptep_notify - call all invalidation callbacks for a specific pte.
1927
* @mm: pointer to the process mm_struct
1928
* @vmaddr: virtual address in the process address space
1929
* @pte: pointer to the page table entry
1930
* @bits: bits from the pgste that caused the notify call
1931
*
1932
* This function is assumed to be called with the page table lock held
1933
* for the pte to notify.
1934
*/
1935
void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
1936
pte_t *pte, unsigned long bits)
1937
{
1938
unsigned long offset, gaddr = 0;
1939
struct gmap *gmap, *sg, *next;
1940
1941
offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
1942
offset = offset * (PAGE_SIZE / sizeof(pte_t));
1943
rcu_read_lock();
1944
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
1945
spin_lock(&gmap->guest_table_lock);
1946
gaddr = host_to_guest_lookup(gmap, vmaddr) + offset;
1947
spin_unlock(&gmap->guest_table_lock);
1948
if (!IS_GADDR_VALID(gaddr))
1949
continue;
1950
1951
if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
1952
spin_lock(&gmap->shadow_lock);
1953
list_for_each_entry_safe(sg, next,
1954
&gmap->children, list)
1955
gmap_shadow_notify(sg, vmaddr, gaddr);
1956
spin_unlock(&gmap->shadow_lock);
1957
}
1958
if (bits & PGSTE_IN_BIT)
1959
gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
1960
}
1961
rcu_read_unlock();
1962
}
1963
EXPORT_SYMBOL_GPL(ptep_notify);
1964
1965
static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
1966
unsigned long gaddr)
1967
{
1968
set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
1969
gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
1970
}
1971
1972
/**
1973
* gmap_pmdp_xchg - exchange a gmap pmd with another
1974
* @gmap: pointer to the guest address space structure
1975
* @pmdp: pointer to the pmd entry
1976
* @new: replacement entry
1977
* @gaddr: the affected guest address
1978
*
1979
* This function is assumed to be called with the guest_table_lock
1980
* held.
1981
*/
1982
static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
1983
unsigned long gaddr)
1984
{
1985
gaddr &= HPAGE_MASK;
1986
pmdp_notify_gmap(gmap, pmdp, gaddr);
1987
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
1988
if (machine_has_tlb_guest())
1989
__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
1990
IDTE_GLOBAL);
1991
else if (cpu_has_idte())
1992
__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
1993
else
1994
__pmdp_csp(pmdp);
1995
set_pmd(pmdp, new);
1996
}
1997
1998
static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
1999
int purge)
2000
{
2001
pmd_t *pmdp;
2002
struct gmap *gmap;
2003
unsigned long gaddr;
2004
2005
rcu_read_lock();
2006
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
2007
spin_lock(&gmap->guest_table_lock);
2008
pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
2009
if (pmdp) {
2010
pmdp_notify_gmap(gmap, pmdp, gaddr);
2011
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2012
_SEGMENT_ENTRY_GMAP_UC |
2013
_SEGMENT_ENTRY));
2014
if (purge)
2015
__pmdp_csp(pmdp);
2016
set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
2017
}
2018
spin_unlock(&gmap->guest_table_lock);
2019
}
2020
rcu_read_unlock();
2021
}
2022
2023
/**
2024
* gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
2025
* flushing
2026
* @mm: pointer to the process mm_struct
2027
* @vmaddr: virtual address in the process address space
2028
*/
2029
void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
2030
{
2031
gmap_pmdp_clear(mm, vmaddr, 0);
2032
}
2033
EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
2034
2035
/**
2036
* gmap_pmdp_csp - csp all affected guest pmd entries
2037
* @mm: pointer to the process mm_struct
2038
* @vmaddr: virtual address in the process address space
2039
*/
2040
void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
2041
{
2042
gmap_pmdp_clear(mm, vmaddr, 1);
2043
}
2044
EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
2045
2046
/**
2047
* gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
2048
* @mm: pointer to the process mm_struct
2049
* @vmaddr: virtual address in the process address space
2050
*/
2051
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
2052
{
2053
unsigned long gaddr;
2054
struct gmap *gmap;
2055
pmd_t *pmdp;
2056
2057
rcu_read_lock();
2058
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
2059
spin_lock(&gmap->guest_table_lock);
2060
pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
2061
if (pmdp) {
2062
pmdp_notify_gmap(gmap, pmdp, gaddr);
2063
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2064
_SEGMENT_ENTRY_GMAP_UC |
2065
_SEGMENT_ENTRY));
2066
if (machine_has_tlb_guest())
2067
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
2068
gmap->asce, IDTE_LOCAL);
2069
else if (cpu_has_idte())
2070
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
2071
*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
2072
}
2073
spin_unlock(&gmap->guest_table_lock);
2074
}
2075
rcu_read_unlock();
2076
}
2077
EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
2078
2079
/**
2080
* gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
2081
* @mm: pointer to the process mm_struct
2082
* @vmaddr: virtual address in the process address space
2083
*/
2084
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
2085
{
2086
unsigned long gaddr;
2087
struct gmap *gmap;
2088
pmd_t *pmdp;
2089
2090
rcu_read_lock();
2091
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
2092
spin_lock(&gmap->guest_table_lock);
2093
pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
2094
if (pmdp) {
2095
pmdp_notify_gmap(gmap, pmdp, gaddr);
2096
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2097
_SEGMENT_ENTRY_GMAP_UC |
2098
_SEGMENT_ENTRY));
2099
if (machine_has_tlb_guest())
2100
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
2101
gmap->asce, IDTE_GLOBAL);
2102
else if (cpu_has_idte())
2103
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
2104
else
2105
__pmdp_csp(pmdp);
2106
*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
2107
}
2108
spin_unlock(&gmap->guest_table_lock);
2109
}
2110
rcu_read_unlock();
2111
}
2112
EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
2113
2114
/**
2115
* gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
2116
* @gmap: pointer to guest address space
2117
* @pmdp: pointer to the pmd to be tested
2118
* @gaddr: virtual address in the guest address space
2119
*
2120
* This function is assumed to be called with the guest_table_lock
2121
* held.
2122
*/
2123
static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
2124
unsigned long gaddr)
2125
{
2126
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
2127
return false;
2128
2129
/* Already protected memory, which did not change is clean */
2130
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
2131
!(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
2132
return false;
2133
2134
/* Clear UC indication and reset protection */
2135
set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC)));
2136
gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
2137
return true;
2138
}
2139
2140
/**
2141
* gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
2142
* @gmap: pointer to guest address space
2143
* @bitmap: dirty bitmap for this pmd
2144
* @gaddr: virtual address in the guest address space
2145
* @vmaddr: virtual address in the host address space
2146
*
2147
* This function is assumed to be called with the guest_table_lock
2148
* held.
2149
*/
2150
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
2151
unsigned long gaddr, unsigned long vmaddr)
2152
{
2153
int i;
2154
pmd_t *pmdp;
2155
pte_t *ptep;
2156
spinlock_t *ptl;
2157
2158
pmdp = gmap_pmd_op_walk(gmap, gaddr);
2159
if (!pmdp)
2160
return;
2161
2162
if (pmd_leaf(*pmdp)) {
2163
if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
2164
bitmap_fill(bitmap, _PAGE_ENTRIES);
2165
} else {
2166
for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
2167
ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
2168
if (!ptep)
2169
continue;
2170
if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
2171
set_bit(i, bitmap);
2172
pte_unmap_unlock(ptep, ptl);
2173
}
2174
}
2175
gmap_pmd_op_end(gmap, pmdp);
2176
}
2177
EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
2178
2179
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2180
static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
2181
unsigned long end, struct mm_walk *walk)
2182
{
2183
struct vm_area_struct *vma = walk->vma;
2184
2185
split_huge_pmd(vma, pmd, addr);
2186
return 0;
2187
}
2188
2189
static const struct mm_walk_ops thp_split_walk_ops = {
2190
.pmd_entry = thp_split_walk_pmd_entry,
2191
.walk_lock = PGWALK_WRLOCK_VERIFY,
2192
};
2193
2194
static inline void thp_split_mm(struct mm_struct *mm)
2195
{
2196
struct vm_area_struct *vma;
2197
VMA_ITERATOR(vmi, mm, 0);
2198
2199
for_each_vma(vmi, vma) {
2200
vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE);
2201
walk_page_vma(vma, &thp_split_walk_ops, NULL);
2202
}
2203
mm->def_flags |= VM_NOHUGEPAGE;
2204
}
2205
#else
2206
static inline void thp_split_mm(struct mm_struct *mm)
2207
{
2208
}
2209
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2210
2211
/*
2212
* switch on pgstes for its userspace process (for kvm)
2213
*/
2214
int s390_enable_sie(void)
2215
{
2216
struct mm_struct *mm = current->mm;
2217
2218
/* Do we have pgstes? if yes, we are done */
2219
if (mm_has_pgste(mm))
2220
return 0;
2221
mmap_write_lock(mm);
2222
mm->context.has_pgste = 1;
2223
/* split thp mappings and disable thp for future mappings */
2224
thp_split_mm(mm);
2225
mmap_write_unlock(mm);
2226
return 0;
2227
}
2228
EXPORT_SYMBOL_GPL(s390_enable_sie);
2229
2230
/*
2231
* Enable storage key handling from now on and initialize the storage
2232
* keys with the default key.
2233
*/
2234
static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
2235
unsigned long next, struct mm_walk *walk)
2236
{
2237
/* Clear storage key */
2238
ptep_zap_key(walk->mm, addr, pte);
2239
return 0;
2240
}
2241
2242
/*
2243
* Give a chance to schedule after setting a key to 256 pages.
2244
* We only hold the mm lock, which is a rwsem and the kvm srcu.
2245
* Both can sleep.
2246
*/
2247
static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
2248
unsigned long next, struct mm_walk *walk)
2249
{
2250
cond_resched();
2251
return 0;
2252
}
2253
2254
static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
2255
unsigned long hmask, unsigned long next,
2256
struct mm_walk *walk)
2257
{
2258
pmd_t *pmd = (pmd_t *)pte;
2259
unsigned long start, end;
2260
struct folio *folio = page_folio(pmd_page(*pmd));
2261
2262
/*
2263
* The write check makes sure we do not set a key on shared
2264
* memory. This is needed as the walker does not differentiate
2265
* between actual guest memory and the process executable or
2266
* shared libraries.
2267
*/
2268
if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
2269
!(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
2270
return 0;
2271
2272
start = pmd_val(*pmd) & HPAGE_MASK;
2273
end = start + HPAGE_SIZE;
2274
__storage_key_init_range(start, end);
2275
set_bit(PG_arch_1, &folio->flags);
2276
cond_resched();
2277
return 0;
2278
}
2279
2280
static const struct mm_walk_ops enable_skey_walk_ops = {
2281
.hugetlb_entry = __s390_enable_skey_hugetlb,
2282
.pte_entry = __s390_enable_skey_pte,
2283
.pmd_entry = __s390_enable_skey_pmd,
2284
.walk_lock = PGWALK_WRLOCK,
2285
};
2286
2287
int s390_enable_skey(void)
2288
{
2289
struct mm_struct *mm = current->mm;
2290
int rc = 0;
2291
2292
mmap_write_lock(mm);
2293
if (mm_uses_skeys(mm))
2294
goto out_up;
2295
2296
mm->context.uses_skeys = 1;
2297
rc = gmap_helper_disable_cow_sharing();
2298
if (rc) {
2299
mm->context.uses_skeys = 0;
2300
goto out_up;
2301
}
2302
walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
2303
2304
out_up:
2305
mmap_write_unlock(mm);
2306
return rc;
2307
}
2308
EXPORT_SYMBOL_GPL(s390_enable_skey);
2309
2310
/*
2311
* Reset CMMA state, make all pages stable again.
2312
*/
2313
static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
2314
unsigned long next, struct mm_walk *walk)
2315
{
2316
ptep_zap_unused(walk->mm, addr, pte, 1);
2317
return 0;
2318
}
2319
2320
static const struct mm_walk_ops reset_cmma_walk_ops = {
2321
.pte_entry = __s390_reset_cmma,
2322
.walk_lock = PGWALK_WRLOCK,
2323
};
2324
2325
void s390_reset_cmma(struct mm_struct *mm)
2326
{
2327
mmap_write_lock(mm);
2328
walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
2329
mmap_write_unlock(mm);
2330
}
2331
EXPORT_SYMBOL_GPL(s390_reset_cmma);
2332
2333
#define GATHER_GET_PAGES 32
2334
2335
struct reset_walk_state {
2336
unsigned long next;
2337
unsigned long count;
2338
unsigned long pfns[GATHER_GET_PAGES];
2339
};
2340
2341
static int s390_gather_pages(pte_t *ptep, unsigned long addr,
2342
unsigned long next, struct mm_walk *walk)
2343
{
2344
struct reset_walk_state *p = walk->private;
2345
pte_t pte = READ_ONCE(*ptep);
2346
2347
if (pte_present(pte)) {
2348
/* we have a reference from the mapping, take an extra one */
2349
get_page(phys_to_page(pte_val(pte)));
2350
p->pfns[p->count] = phys_to_pfn(pte_val(pte));
2351
p->next = next;
2352
p->count++;
2353
}
2354
return p->count >= GATHER_GET_PAGES;
2355
}
2356
2357
static const struct mm_walk_ops gather_pages_ops = {
2358
.pte_entry = s390_gather_pages,
2359
.walk_lock = PGWALK_RDLOCK,
2360
};
2361
2362
/*
2363
* Call the Destroy secure page UVC on each page in the given array of PFNs.
2364
* Each page needs to have an extra reference, which will be released here.
2365
*/
2366
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
2367
{
2368
struct folio *folio;
2369
unsigned long i;
2370
2371
for (i = 0; i < count; i++) {
2372
folio = pfn_folio(pfns[i]);
2373
/* we always have an extra reference */
2374
uv_destroy_folio(folio);
2375
/* get rid of the extra reference */
2376
folio_put(folio);
2377
cond_resched();
2378
}
2379
}
2380
EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
2381
2382
/**
2383
* __s390_uv_destroy_range - Call the destroy secure page UVC on each page
2384
* in the given range of the given address space.
2385
* @mm: the mm to operate on
2386
* @start: the start of the range
2387
* @end: the end of the range
2388
* @interruptible: if not 0, stop when a fatal signal is received
2389
*
2390
* Walk the given range of the given address space and call the destroy
2391
* secure page UVC on each page. Optionally exit early if a fatal signal is
2392
* pending.
2393
*
2394
* Return: 0 on success, -EINTR if the function stopped before completing
2395
*/
2396
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
2397
unsigned long end, bool interruptible)
2398
{
2399
struct reset_walk_state state = { .next = start };
2400
int r = 1;
2401
2402
while (r > 0) {
2403
state.count = 0;
2404
mmap_read_lock(mm);
2405
r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
2406
mmap_read_unlock(mm);
2407
cond_resched();
2408
s390_uv_destroy_pfns(state.count, state.pfns);
2409
if (interruptible && fatal_signal_pending(current))
2410
return -EINTR;
2411
}
2412
return 0;
2413
}
2414
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
2415
2416
/**
2417
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
2418
* @gmap: the gmap whose ASCE needs to be replaced
2419
*
2420
* If the ASCE is a SEGMENT type then this function will return -EINVAL,
2421
* otherwise the pointers in the host_to_guest radix tree will keep pointing
2422
* to the wrong pages, causing use-after-free and memory corruption.
2423
* If the allocation of the new top level page table fails, the ASCE is not
2424
* replaced.
2425
* In any case, the old ASCE is always removed from the gmap CRST list.
2426
* Therefore the caller has to make sure to save a pointer to it
2427
* beforehand, unless a leak is actually intended.
2428
*/
2429
int s390_replace_asce(struct gmap *gmap)
2430
{
2431
unsigned long asce;
2432
struct page *page;
2433
void *table;
2434
2435
/* Replacing segment type ASCEs would cause serious issues */
2436
if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
2437
return -EINVAL;
2438
2439
page = gmap_alloc_crst();
2440
if (!page)
2441
return -ENOMEM;
2442
table = page_to_virt(page);
2443
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
2444
2445
/* Set new table origin while preserving existing ASCE control bits */
2446
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
2447
WRITE_ONCE(gmap->asce, asce);
2448
WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
2449
WRITE_ONCE(gmap->table, table);
2450
2451
return 0;
2452
}
2453
EXPORT_SYMBOL_GPL(s390_replace_asce);
2454
2455