Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/xen/mmu_pv.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
/*
4
* Xen mmu operations
5
*
6
* This file contains the various mmu fetch and update operations.
7
* The most important job they must perform is the mapping between the
8
* domain's pfn and the overall machine mfns.
9
*
10
* Xen allows guests to directly update the pagetable, in a controlled
11
* fashion. In other words, the guest modifies the same pagetable
12
* that the CPU actually uses, which eliminates the overhead of having
13
* a separate shadow pagetable.
14
*
15
* In order to allow this, it falls on the guest domain to map its
16
* notion of a "physical" pfn - which is just a domain-local linear
17
* address - into a real "machine address" which the CPU's MMU can
18
* use.
19
*
20
* A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
21
* inserted directly into the pagetable. When creating a new
22
* pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely,
23
* when reading the content back with __(pgd|pmd|pte)_val, it converts
24
* the mfn back into a pfn.
25
*
26
* The other constraint is that all pages which make up a pagetable
27
* must be mapped read-only in the guest. This prevents uncontrolled
28
* guest updates to the pagetable. Xen strictly enforces this, and
29
* will disallow any pagetable update which will end up mapping a
30
* pagetable page RW, and will disallow using any writable page as a
31
* pagetable.
32
*
33
* Naively, when loading %cr3 with the base of a new pagetable, Xen
34
* would need to validate the whole pagetable before going on.
35
* Naturally, this is quite slow. The solution is to "pin" a
36
* pagetable, which enforces all the constraints on the pagetable even
37
* when it is not actively in use. This means that Xen can be assured
38
* that it is still valid when you do load it into %cr3, and doesn't
39
* need to revalidate it.
40
*
41
* Jeremy Fitzhardinge <[email protected]>, XenSource Inc, 2007
42
*/
43
#include <linux/sched/mm.h>
44
#include <linux/debugfs.h>
45
#include <linux/bug.h>
46
#include <linux/vmalloc.h>
47
#include <linux/export.h>
48
#include <linux/init.h>
49
#include <linux/gfp.h>
50
#include <linux/memblock.h>
51
#include <linux/seq_file.h>
52
#include <linux/crash_dump.h>
53
#include <linux/pgtable.h>
54
#ifdef CONFIG_KEXEC_CORE
55
#include <linux/kexec.h>
56
#endif
57
58
#include <trace/events/xen.h>
59
60
#include <asm/tlbflush.h>
61
#include <asm/fixmap.h>
62
#include <asm/mmu_context.h>
63
#include <asm/setup.h>
64
#include <asm/paravirt.h>
65
#include <asm/e820/api.h>
66
#include <asm/linkage.h>
67
#include <asm/page.h>
68
#include <asm/init.h>
69
#include <asm/memtype.h>
70
#include <asm/smp.h>
71
#include <asm/tlb.h>
72
73
#include <asm/xen/hypercall.h>
74
#include <asm/xen/hypervisor.h>
75
76
#include <xen/xen.h>
77
#include <xen/page.h>
78
#include <xen/interface/xen.h>
79
#include <xen/interface/hvm/hvm_op.h>
80
#include <xen/interface/version.h>
81
#include <xen/interface/memory.h>
82
#include <xen/hvc-console.h>
83
#include <xen/swiotlb-xen.h>
84
85
#include "xen-ops.h"
86
87
/*
88
* Prototypes for functions called via PV_CALLEE_SAVE_REGS_THUNK() in order
89
* to avoid warnings with "-Wmissing-prototypes".
90
*/
91
pteval_t xen_pte_val(pte_t pte);
92
pgdval_t xen_pgd_val(pgd_t pgd);
93
pmdval_t xen_pmd_val(pmd_t pmd);
94
pudval_t xen_pud_val(pud_t pud);
95
p4dval_t xen_p4d_val(p4d_t p4d);
96
pte_t xen_make_pte(pteval_t pte);
97
pgd_t xen_make_pgd(pgdval_t pgd);
98
pmd_t xen_make_pmd(pmdval_t pmd);
99
pud_t xen_make_pud(pudval_t pud);
100
p4d_t xen_make_p4d(p4dval_t p4d);
101
pte_t xen_make_pte_init(pteval_t pte);
102
103
#ifdef CONFIG_X86_VSYSCALL_EMULATION
104
/* l3 pud for userspace vsyscall mapping */
105
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
106
#endif
107
108
/*
109
* Protects atomic reservation decrease/increase against concurrent increases.
110
* Also protects non-atomic updates of current_pages and balloon lists.
111
*/
112
static DEFINE_SPINLOCK(xen_reservation_lock);
113
114
/* Protected by xen_reservation_lock. */
115
#define MIN_CONTIG_ORDER 9 /* 2MB */
116
static unsigned int discontig_frames_order = MIN_CONTIG_ORDER;
117
static unsigned long discontig_frames_early[1UL << MIN_CONTIG_ORDER] __initdata;
118
static unsigned long *discontig_frames __refdata = discontig_frames_early;
119
static bool discontig_frames_dyn;
120
121
static int alloc_discontig_frames(unsigned int order)
122
{
123
unsigned long *new_array, *old_array;
124
unsigned int old_order;
125
unsigned long flags;
126
127
BUG_ON(order < MIN_CONTIG_ORDER);
128
BUILD_BUG_ON(sizeof(discontig_frames_early) != PAGE_SIZE);
129
130
new_array = (unsigned long *)__get_free_pages(GFP_KERNEL,
131
order - MIN_CONTIG_ORDER);
132
if (!new_array)
133
return -ENOMEM;
134
135
spin_lock_irqsave(&xen_reservation_lock, flags);
136
137
old_order = discontig_frames_order;
138
139
if (order > discontig_frames_order || !discontig_frames_dyn) {
140
if (!discontig_frames_dyn)
141
old_array = NULL;
142
else
143
old_array = discontig_frames;
144
145
discontig_frames = new_array;
146
discontig_frames_order = order;
147
discontig_frames_dyn = true;
148
} else {
149
old_array = new_array;
150
}
151
152
spin_unlock_irqrestore(&xen_reservation_lock, flags);
153
154
free_pages((unsigned long)old_array, old_order - MIN_CONTIG_ORDER);
155
156
return 0;
157
}
158
159
/*
160
* Note about cr3 (pagetable base) values:
161
*
162
* xen_cr3 contains the current logical cr3 value; it contains the
163
* last set cr3. This may not be the current effective cr3, because
164
* its update may be being lazily deferred. However, a vcpu looking
165
* at its own cr3 can use this value knowing that it everything will
166
* be self-consistent.
167
*
168
* xen_current_cr3 contains the actual vcpu cr3; it is set once the
169
* hypercall to set the vcpu cr3 is complete (so it may be a little
170
* out of date, but it will never be set early). If one vcpu is
171
* looking at another vcpu's cr3 value, it should use this variable.
172
*/
173
DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
174
static DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
175
176
static phys_addr_t xen_pt_base, xen_pt_size __initdata;
177
178
static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready);
179
180
/*
181
* Just beyond the highest usermode address. STACK_TOP_MAX has a
182
* redzone above it, so round it up to a PGD boundary.
183
*/
184
#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
185
186
void make_lowmem_page_readonly(void *vaddr)
187
{
188
pte_t *pte, ptev;
189
unsigned long address = (unsigned long)vaddr;
190
unsigned int level;
191
192
pte = lookup_address(address, &level);
193
if (pte == NULL)
194
return; /* vaddr missing */
195
196
ptev = pte_wrprotect(*pte);
197
198
if (HYPERVISOR_update_va_mapping(address, ptev, 0))
199
BUG();
200
}
201
202
void make_lowmem_page_readwrite(void *vaddr)
203
{
204
pte_t *pte, ptev;
205
unsigned long address = (unsigned long)vaddr;
206
unsigned int level;
207
208
pte = lookup_address(address, &level);
209
if (pte == NULL)
210
return; /* vaddr missing */
211
212
ptev = pte_mkwrite_novma(*pte);
213
214
if (HYPERVISOR_update_va_mapping(address, ptev, 0))
215
BUG();
216
}
217
218
219
/*
220
* During early boot all page table pages are pinned, but we do not have struct
221
* pages, so return true until struct pages are ready.
222
*/
223
static bool xen_page_pinned(void *ptr)
224
{
225
if (static_branch_likely(&xen_struct_pages_ready)) {
226
struct page *page = virt_to_page(ptr);
227
228
return PagePinned(page);
229
}
230
return true;
231
}
232
233
static void xen_extend_mmu_update(const struct mmu_update *update)
234
{
235
struct multicall_space mcs;
236
struct mmu_update *u;
237
238
mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
239
240
if (mcs.mc != NULL) {
241
mcs.mc->args[1]++;
242
} else {
243
mcs = __xen_mc_entry(sizeof(*u));
244
MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
245
}
246
247
u = mcs.args;
248
*u = *update;
249
}
250
251
static void xen_extend_mmuext_op(const struct mmuext_op *op)
252
{
253
struct multicall_space mcs;
254
struct mmuext_op *u;
255
256
mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
257
258
if (mcs.mc != NULL) {
259
mcs.mc->args[1]++;
260
} else {
261
mcs = __xen_mc_entry(sizeof(*u));
262
MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
263
}
264
265
u = mcs.args;
266
*u = *op;
267
}
268
269
static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
270
{
271
struct mmu_update u;
272
273
preempt_disable();
274
275
xen_mc_batch();
276
277
/* ptr may be ioremapped for 64-bit pagetable setup */
278
u.ptr = arbitrary_virt_to_machine(ptr).maddr;
279
u.val = pmd_val_ma(val);
280
xen_extend_mmu_update(&u);
281
282
xen_mc_issue(XEN_LAZY_MMU);
283
284
preempt_enable();
285
}
286
287
static void xen_set_pmd(pmd_t *ptr, pmd_t val)
288
{
289
trace_xen_mmu_set_pmd(ptr, val);
290
291
/* If page is not pinned, we can just update the entry
292
directly */
293
if (!xen_page_pinned(ptr)) {
294
*ptr = val;
295
return;
296
}
297
298
xen_set_pmd_hyper(ptr, val);
299
}
300
301
/*
302
* Associate a virtual page frame with a given physical page frame
303
* and protection flags for that frame.
304
*/
305
void __init set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
306
{
307
if (HYPERVISOR_update_va_mapping(vaddr, mfn_pte(mfn, flags),
308
UVMF_INVLPG))
309
BUG();
310
}
311
312
static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
313
{
314
struct mmu_update u;
315
316
if (xen_get_lazy_mode() != XEN_LAZY_MMU)
317
return false;
318
319
xen_mc_batch();
320
321
u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
322
u.val = pte_val_ma(pteval);
323
xen_extend_mmu_update(&u);
324
325
xen_mc_issue(XEN_LAZY_MMU);
326
327
return true;
328
}
329
330
static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
331
{
332
if (!xen_batched_set_pte(ptep, pteval)) {
333
/*
334
* Could call native_set_pte() here and trap and
335
* emulate the PTE write, but a hypercall is much cheaper.
336
*/
337
struct mmu_update u;
338
339
u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
340
u.val = pte_val_ma(pteval);
341
HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
342
}
343
}
344
345
static void xen_set_pte(pte_t *ptep, pte_t pteval)
346
{
347
trace_xen_mmu_set_pte(ptep, pteval);
348
__xen_set_pte(ptep, pteval);
349
}
350
351
static pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma,
352
unsigned long addr, pte_t *ptep)
353
{
354
/* Just return the pte as-is. We preserve the bits on commit */
355
trace_xen_mmu_ptep_modify_prot_start(vma->vm_mm, addr, ptep, *ptep);
356
return *ptep;
357
}
358
359
static void xen_ptep_modify_prot_commit(struct vm_area_struct *vma,
360
unsigned long addr,
361
pte_t *ptep, pte_t pte)
362
{
363
struct mmu_update u;
364
365
trace_xen_mmu_ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte);
366
xen_mc_batch();
367
368
u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
369
u.val = pte_val_ma(pte);
370
xen_extend_mmu_update(&u);
371
372
xen_mc_issue(XEN_LAZY_MMU);
373
}
374
375
/* Assume pteval_t is equivalent to all the other *val_t types. */
376
static pteval_t pte_mfn_to_pfn(pteval_t val)
377
{
378
if (val & _PAGE_PRESENT) {
379
unsigned long mfn = (val & XEN_PTE_MFN_MASK) >> PAGE_SHIFT;
380
unsigned long pfn = mfn_to_pfn(mfn);
381
382
pteval_t flags = val & PTE_FLAGS_MASK;
383
if (unlikely(pfn == ~0))
384
val = flags & ~_PAGE_PRESENT;
385
else
386
val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
387
}
388
389
return val;
390
}
391
392
static pteval_t pte_pfn_to_mfn(pteval_t val)
393
{
394
if (val & _PAGE_PRESENT) {
395
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
396
pteval_t flags = val & PTE_FLAGS_MASK;
397
unsigned long mfn;
398
399
mfn = __pfn_to_mfn(pfn);
400
401
/*
402
* If there's no mfn for the pfn, then just create an
403
* empty non-present pte. Unfortunately this loses
404
* information about the original pfn, so
405
* pte_mfn_to_pfn is asymmetric.
406
*/
407
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
408
mfn = 0;
409
flags = 0;
410
} else
411
mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
412
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
413
}
414
415
return val;
416
}
417
418
__visible pteval_t xen_pte_val(pte_t pte)
419
{
420
pteval_t pteval = pte.pte;
421
422
return pte_mfn_to_pfn(pteval);
423
}
424
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
425
426
__visible pgdval_t xen_pgd_val(pgd_t pgd)
427
{
428
return pte_mfn_to_pfn(pgd.pgd);
429
}
430
PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
431
432
__visible pte_t xen_make_pte(pteval_t pte)
433
{
434
pte = pte_pfn_to_mfn(pte);
435
436
return native_make_pte(pte);
437
}
438
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
439
440
__visible pgd_t xen_make_pgd(pgdval_t pgd)
441
{
442
pgd = pte_pfn_to_mfn(pgd);
443
return native_make_pgd(pgd);
444
}
445
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
446
447
__visible pmdval_t xen_pmd_val(pmd_t pmd)
448
{
449
return pte_mfn_to_pfn(pmd.pmd);
450
}
451
PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
452
453
static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
454
{
455
struct mmu_update u;
456
457
preempt_disable();
458
459
xen_mc_batch();
460
461
/* ptr may be ioremapped for 64-bit pagetable setup */
462
u.ptr = arbitrary_virt_to_machine(ptr).maddr;
463
u.val = pud_val_ma(val);
464
xen_extend_mmu_update(&u);
465
466
xen_mc_issue(XEN_LAZY_MMU);
467
468
preempt_enable();
469
}
470
471
static void xen_set_pud(pud_t *ptr, pud_t val)
472
{
473
trace_xen_mmu_set_pud(ptr, val);
474
475
/* If page is not pinned, we can just update the entry
476
directly */
477
if (!xen_page_pinned(ptr)) {
478
*ptr = val;
479
return;
480
}
481
482
xen_set_pud_hyper(ptr, val);
483
}
484
485
__visible pmd_t xen_make_pmd(pmdval_t pmd)
486
{
487
pmd = pte_pfn_to_mfn(pmd);
488
return native_make_pmd(pmd);
489
}
490
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
491
492
__visible pudval_t xen_pud_val(pud_t pud)
493
{
494
return pte_mfn_to_pfn(pud.pud);
495
}
496
PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
497
498
__visible pud_t xen_make_pud(pudval_t pud)
499
{
500
pud = pte_pfn_to_mfn(pud);
501
502
return native_make_pud(pud);
503
}
504
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
505
506
static pgd_t *xen_get_user_pgd(pgd_t *pgd)
507
{
508
pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
509
unsigned offset = pgd - pgd_page;
510
pgd_t *user_ptr = NULL;
511
512
if (offset < pgd_index(USER_LIMIT)) {
513
struct page *page = virt_to_page(pgd_page);
514
user_ptr = (pgd_t *)page->private;
515
if (user_ptr)
516
user_ptr += offset;
517
}
518
519
return user_ptr;
520
}
521
522
static void __xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
523
{
524
struct mmu_update u;
525
526
u.ptr = virt_to_machine(ptr).maddr;
527
u.val = p4d_val_ma(val);
528
xen_extend_mmu_update(&u);
529
}
530
531
/*
532
* Raw hypercall-based set_p4d, intended for in early boot before
533
* there's a page structure. This implies:
534
* 1. The only existing pagetable is the kernel's
535
* 2. It is always pinned
536
* 3. It has no user pagetable attached to it
537
*/
538
static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
539
{
540
preempt_disable();
541
542
xen_mc_batch();
543
544
__xen_set_p4d_hyper(ptr, val);
545
546
xen_mc_issue(XEN_LAZY_MMU);
547
548
preempt_enable();
549
}
550
551
static void xen_set_p4d(p4d_t *ptr, p4d_t val)
552
{
553
pgd_t *user_ptr = xen_get_user_pgd((pgd_t *)ptr);
554
pgd_t pgd_val;
555
556
trace_xen_mmu_set_p4d(ptr, (p4d_t *)user_ptr, val);
557
558
/* If page is not pinned, we can just update the entry
559
directly */
560
if (!xen_page_pinned(ptr)) {
561
*ptr = val;
562
if (user_ptr) {
563
WARN_ON(xen_page_pinned(user_ptr));
564
pgd_val.pgd = p4d_val_ma(val);
565
*user_ptr = pgd_val;
566
}
567
return;
568
}
569
570
/* If it's pinned, then we can at least batch the kernel and
571
user updates together. */
572
xen_mc_batch();
573
574
__xen_set_p4d_hyper(ptr, val);
575
if (user_ptr)
576
__xen_set_p4d_hyper((p4d_t *)user_ptr, val);
577
578
xen_mc_issue(XEN_LAZY_MMU);
579
}
580
581
__visible p4dval_t xen_p4d_val(p4d_t p4d)
582
{
583
return pte_mfn_to_pfn(p4d.p4d);
584
}
585
PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val);
586
587
__visible p4d_t xen_make_p4d(p4dval_t p4d)
588
{
589
p4d = pte_pfn_to_mfn(p4d);
590
591
return native_make_p4d(p4d);
592
}
593
PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
594
595
static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
596
void (*func)(struct mm_struct *mm, struct page *,
597
enum pt_level),
598
bool last, unsigned long limit)
599
{
600
int i, nr;
601
602
nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
603
for (i = 0; i < nr; i++) {
604
if (!pmd_none(pmd[i]))
605
(*func)(mm, pmd_page(pmd[i]), PT_PTE);
606
}
607
}
608
609
static void xen_pud_walk(struct mm_struct *mm, pud_t *pud,
610
void (*func)(struct mm_struct *mm, struct page *,
611
enum pt_level),
612
bool last, unsigned long limit)
613
{
614
int i, nr;
615
616
nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
617
for (i = 0; i < nr; i++) {
618
pmd_t *pmd;
619
620
if (pud_none(pud[i]))
621
continue;
622
623
pmd = pmd_offset(&pud[i], 0);
624
if (PTRS_PER_PMD > 1)
625
(*func)(mm, virt_to_page(pmd), PT_PMD);
626
xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit);
627
}
628
}
629
630
static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
631
void (*func)(struct mm_struct *mm, struct page *,
632
enum pt_level),
633
bool last, unsigned long limit)
634
{
635
pud_t *pud;
636
637
638
if (p4d_none(*p4d))
639
return;
640
641
pud = pud_offset(p4d, 0);
642
if (PTRS_PER_PUD > 1)
643
(*func)(mm, virt_to_page(pud), PT_PUD);
644
xen_pud_walk(mm, pud, func, last, limit);
645
}
646
647
/*
648
* (Yet another) pagetable walker. This one is intended for pinning a
649
* pagetable. This means that it walks a pagetable and calls the
650
* callback function on each page it finds making up the page table,
651
* at every level. It walks the entire pagetable, but it only bothers
652
* pinning pte pages which are below limit. In the normal case this
653
* will be STACK_TOP_MAX, but at boot we need to pin up to
654
* FIXADDR_TOP.
655
*
656
* We must skip the Xen hole in the middle of the address space, just after
657
* the big x86-64 virtual hole.
658
*/
659
static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
660
void (*func)(struct mm_struct *mm, struct page *,
661
enum pt_level),
662
unsigned long limit)
663
{
664
int i, nr;
665
unsigned hole_low = 0, hole_high = 0;
666
667
/* The limit is the last byte to be touched */
668
limit--;
669
BUG_ON(limit >= FIXADDR_TOP);
670
671
/*
672
* 64-bit has a great big hole in the middle of the address
673
* space, which contains the Xen mappings.
674
*/
675
hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
676
hole_high = pgd_index(GUARD_HOLE_END_ADDR);
677
678
nr = pgd_index(limit) + 1;
679
for (i = 0; i < nr; i++) {
680
p4d_t *p4d;
681
682
if (i >= hole_low && i < hole_high)
683
continue;
684
685
if (pgd_none(pgd[i]))
686
continue;
687
688
p4d = p4d_offset(&pgd[i], 0);
689
xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
690
}
691
692
/* Do the top level last, so that the callbacks can use it as
693
a cue to do final things like tlb flushes. */
694
(*func)(mm, virt_to_page(pgd), PT_PGD);
695
}
696
697
static void xen_pgd_walk(struct mm_struct *mm,
698
void (*func)(struct mm_struct *mm, struct page *,
699
enum pt_level),
700
unsigned long limit)
701
{
702
__xen_pgd_walk(mm, mm->pgd, func, limit);
703
}
704
705
/* If we're using split pte locks, then take the page's lock and
706
return a pointer to it. Otherwise return NULL. */
707
static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
708
{
709
spinlock_t *ptl = NULL;
710
711
#if defined(CONFIG_SPLIT_PTE_PTLOCKS)
712
ptl = ptlock_ptr(page_ptdesc(page));
713
spin_lock_nest_lock(ptl, &mm->page_table_lock);
714
#endif
715
716
return ptl;
717
}
718
719
static void xen_pte_unlock(void *v)
720
{
721
spinlock_t *ptl = v;
722
spin_unlock(ptl);
723
}
724
725
static void xen_do_pin(unsigned level, unsigned long pfn)
726
{
727
struct mmuext_op op;
728
729
op.cmd = level;
730
op.arg1.mfn = pfn_to_mfn(pfn);
731
732
xen_extend_mmuext_op(&op);
733
}
734
735
static void xen_pin_page(struct mm_struct *mm, struct page *page,
736
enum pt_level level)
737
{
738
unsigned pgfl = TestSetPagePinned(page);
739
740
if (!pgfl) {
741
void *pt = lowmem_page_address(page);
742
unsigned long pfn = page_to_pfn(page);
743
struct multicall_space mcs = __xen_mc_entry(0);
744
spinlock_t *ptl;
745
746
/*
747
* We need to hold the pagetable lock between the time
748
* we make the pagetable RO and when we actually pin
749
* it. If we don't, then other users may come in and
750
* attempt to update the pagetable by writing it,
751
* which will fail because the memory is RO but not
752
* pinned, so Xen won't do the trap'n'emulate.
753
*
754
* If we're using split pte locks, we can't hold the
755
* entire pagetable's worth of locks during the
756
* traverse, because we may wrap the preempt count (8
757
* bits). The solution is to mark RO and pin each PTE
758
* page while holding the lock. This means the number
759
* of locks we end up holding is never more than a
760
* batch size (~32 entries, at present).
761
*
762
* If we're not using split pte locks, we needn't pin
763
* the PTE pages independently, because we're
764
* protected by the overall pagetable lock.
765
*/
766
ptl = NULL;
767
if (level == PT_PTE)
768
ptl = xen_pte_lock(page, mm);
769
770
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
771
pfn_pte(pfn, PAGE_KERNEL_RO),
772
level == PT_PGD ? UVMF_TLB_FLUSH : 0);
773
774
if (ptl) {
775
xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
776
777
/* Queue a deferred unlock for when this batch
778
is completed. */
779
xen_mc_callback(xen_pte_unlock, ptl);
780
}
781
}
782
}
783
784
/* This is called just after a mm has been created, but it has not
785
been used yet. We need to make sure that its pagetable is all
786
read-only, and can be pinned. */
787
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
788
{
789
pgd_t *user_pgd = xen_get_user_pgd(pgd);
790
791
trace_xen_mmu_pgd_pin(mm, pgd);
792
793
xen_mc_batch();
794
795
__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT);
796
797
xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
798
799
if (user_pgd) {
800
xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
801
xen_do_pin(MMUEXT_PIN_L4_TABLE,
802
PFN_DOWN(__pa(user_pgd)));
803
}
804
805
xen_mc_issue(0);
806
}
807
808
static void xen_pgd_pin(struct mm_struct *mm)
809
{
810
__xen_pgd_pin(mm, mm->pgd);
811
}
812
813
/*
814
* On save, we need to pin all pagetables to make sure they get their
815
* mfns turned into pfns. Search the list for any unpinned pgds and pin
816
* them (unpinned pgds are not currently in use, probably because the
817
* process is under construction or destruction).
818
*
819
* Expected to be called in stop_machine() ("equivalent to taking
820
* every spinlock in the system"), so the locking doesn't really
821
* matter all that much.
822
*/
823
void xen_mm_pin_all(void)
824
{
825
struct page *page;
826
827
spin_lock(&init_mm.page_table_lock);
828
spin_lock(&pgd_lock);
829
830
list_for_each_entry(page, &pgd_list, lru) {
831
if (!PagePinned(page)) {
832
__xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
833
SetPageSavePinned(page);
834
}
835
}
836
837
spin_unlock(&pgd_lock);
838
spin_unlock(&init_mm.page_table_lock);
839
}
840
841
static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
842
enum pt_level level)
843
{
844
SetPagePinned(page);
845
}
846
847
/*
848
* The init_mm pagetable is really pinned as soon as its created, but
849
* that's before we have page structures to store the bits. So do all
850
* the book-keeping now once struct pages for allocated pages are
851
* initialized. This happens only after memblock_free_all() is called.
852
*/
853
static void __init xen_after_bootmem(void)
854
{
855
static_branch_enable(&xen_struct_pages_ready);
856
#ifdef CONFIG_X86_VSYSCALL_EMULATION
857
SetPagePinned(virt_to_page(level3_user_vsyscall));
858
#endif
859
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
860
861
if (alloc_discontig_frames(MIN_CONTIG_ORDER))
862
BUG();
863
}
864
865
static void xen_unpin_page(struct mm_struct *mm, struct page *page,
866
enum pt_level level)
867
{
868
unsigned pgfl = TestClearPagePinned(page);
869
870
if (pgfl) {
871
void *pt = lowmem_page_address(page);
872
unsigned long pfn = page_to_pfn(page);
873
spinlock_t *ptl = NULL;
874
struct multicall_space mcs;
875
876
/*
877
* Do the converse to pin_page. If we're using split
878
* pte locks, we must be holding the lock for while
879
* the pte page is unpinned but still RO to prevent
880
* concurrent updates from seeing it in this
881
* partially-pinned state.
882
*/
883
if (level == PT_PTE) {
884
ptl = xen_pte_lock(page, mm);
885
886
if (ptl)
887
xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
888
}
889
890
mcs = __xen_mc_entry(0);
891
892
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
893
pfn_pte(pfn, PAGE_KERNEL),
894
level == PT_PGD ? UVMF_TLB_FLUSH : 0);
895
896
if (ptl) {
897
/* unlock when batch completed */
898
xen_mc_callback(xen_pte_unlock, ptl);
899
}
900
}
901
}
902
903
/* Release a pagetables pages back as normal RW */
904
static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
905
{
906
pgd_t *user_pgd = xen_get_user_pgd(pgd);
907
908
trace_xen_mmu_pgd_unpin(mm, pgd);
909
910
xen_mc_batch();
911
912
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
913
914
if (user_pgd) {
915
xen_do_pin(MMUEXT_UNPIN_TABLE,
916
PFN_DOWN(__pa(user_pgd)));
917
xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
918
}
919
920
__xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
921
922
xen_mc_issue(0);
923
}
924
925
static void xen_pgd_unpin(struct mm_struct *mm)
926
{
927
__xen_pgd_unpin(mm, mm->pgd);
928
}
929
930
/*
931
* On resume, undo any pinning done at save, so that the rest of the
932
* kernel doesn't see any unexpected pinned pagetables.
933
*/
934
void xen_mm_unpin_all(void)
935
{
936
struct page *page;
937
938
spin_lock(&init_mm.page_table_lock);
939
spin_lock(&pgd_lock);
940
941
list_for_each_entry(page, &pgd_list, lru) {
942
if (PageSavePinned(page)) {
943
BUG_ON(!PagePinned(page));
944
__xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
945
ClearPageSavePinned(page);
946
}
947
}
948
949
spin_unlock(&pgd_lock);
950
spin_unlock(&init_mm.page_table_lock);
951
}
952
953
static void xen_enter_mmap(struct mm_struct *mm)
954
{
955
spin_lock(&mm->page_table_lock);
956
xen_pgd_pin(mm);
957
spin_unlock(&mm->page_table_lock);
958
}
959
960
static void drop_mm_ref_this_cpu(void *info)
961
{
962
struct mm_struct *mm = info;
963
964
if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
965
leave_mm();
966
967
/*
968
* If this cpu still has a stale cr3 reference, then make sure
969
* it has been flushed.
970
*/
971
if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
972
xen_mc_flush();
973
}
974
975
#ifdef CONFIG_SMP
976
/*
977
* Another cpu may still have their %cr3 pointing at the pagetable, so
978
* we need to repoint it somewhere else before we can unpin it.
979
*/
980
static void xen_drop_mm_ref(struct mm_struct *mm)
981
{
982
cpumask_var_t mask;
983
unsigned cpu;
984
985
drop_mm_ref_this_cpu(mm);
986
987
/* Get the "official" set of cpus referring to our pagetable. */
988
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
989
for_each_online_cpu(cpu) {
990
if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
991
continue;
992
smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
993
}
994
return;
995
}
996
997
/*
998
* It's possible that a vcpu may have a stale reference to our
999
* cr3, because its in lazy mode, and it hasn't yet flushed
1000
* its set of pending hypercalls yet. In this case, we can
1001
* look at its actual current cr3 value, and force it to flush
1002
* if needed.
1003
*/
1004
cpumask_clear(mask);
1005
for_each_online_cpu(cpu) {
1006
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
1007
cpumask_set_cpu(cpu, mask);
1008
}
1009
1010
smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1);
1011
free_cpumask_var(mask);
1012
}
1013
#else
1014
static void xen_drop_mm_ref(struct mm_struct *mm)
1015
{
1016
drop_mm_ref_this_cpu(mm);
1017
}
1018
#endif
1019
1020
/*
1021
* While a process runs, Xen pins its pagetables, which means that the
1022
* hypervisor forces it to be read-only, and it controls all updates
1023
* to it. This means that all pagetable updates have to go via the
1024
* hypervisor, which is moderately expensive.
1025
*
1026
* Since we're pulling the pagetable down, we switch to use init_mm,
1027
* unpin old process pagetable and mark it all read-write, which
1028
* allows further operations on it to be simple memory accesses.
1029
*
1030
* The only subtle point is that another CPU may be still using the
1031
* pagetable because of lazy tlb flushing. This means we need need to
1032
* switch all CPUs off this pagetable before we can unpin it.
1033
*/
1034
static void xen_exit_mmap(struct mm_struct *mm)
1035
{
1036
get_cpu(); /* make sure we don't move around */
1037
xen_drop_mm_ref(mm);
1038
put_cpu();
1039
1040
spin_lock(&mm->page_table_lock);
1041
1042
/* pgd may not be pinned in the error exit path of execve */
1043
if (xen_page_pinned(mm->pgd))
1044
xen_pgd_unpin(mm);
1045
1046
spin_unlock(&mm->page_table_lock);
1047
}
1048
1049
static void xen_post_allocator_init(void);
1050
1051
static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1052
{
1053
struct mmuext_op op;
1054
1055
op.cmd = cmd;
1056
op.arg1.mfn = pfn_to_mfn(pfn);
1057
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1058
BUG();
1059
}
1060
1061
static void __init xen_cleanhighmap(unsigned long vaddr,
1062
unsigned long vaddr_end)
1063
{
1064
unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
1065
pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
1066
1067
/* NOTE: The loop is more greedy than the cleanup_highmap variant.
1068
* We include the PMD passed in on _both_ boundaries. */
1069
for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD));
1070
pmd++, vaddr += PMD_SIZE) {
1071
if (pmd_none(*pmd))
1072
continue;
1073
if (vaddr < (unsigned long) _text || vaddr > kernel_end)
1074
set_pmd(pmd, __pmd(0));
1075
}
1076
/* In case we did something silly, we should crash in this function
1077
* instead of somewhere later and be confusing. */
1078
xen_mc_flush();
1079
}
1080
1081
/*
1082
* Make a page range writeable and free it.
1083
*/
1084
static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size)
1085
{
1086
void *vaddr = __va(paddr);
1087
void *vaddr_end = vaddr + size;
1088
1089
for (; vaddr < vaddr_end; vaddr += PAGE_SIZE)
1090
make_lowmem_page_readwrite(vaddr);
1091
1092
memblock_phys_free(paddr, size);
1093
}
1094
1095
static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
1096
{
1097
unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK;
1098
1099
if (unpin)
1100
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(pa));
1101
ClearPagePinned(virt_to_page(__va(pa)));
1102
xen_free_ro_pages(pa, PAGE_SIZE);
1103
}
1104
1105
static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
1106
{
1107
unsigned long pa;
1108
pte_t *pte_tbl;
1109
int i;
1110
1111
if (pmd_leaf(*pmd)) {
1112
pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
1113
xen_free_ro_pages(pa, PMD_SIZE);
1114
return;
1115
}
1116
1117
pte_tbl = pte_offset_kernel(pmd, 0);
1118
for (i = 0; i < PTRS_PER_PTE; i++) {
1119
if (pte_none(pte_tbl[i]))
1120
continue;
1121
pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
1122
xen_free_ro_pages(pa, PAGE_SIZE);
1123
}
1124
set_pmd(pmd, __pmd(0));
1125
xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
1126
}
1127
1128
static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
1129
{
1130
unsigned long pa;
1131
pmd_t *pmd_tbl;
1132
int i;
1133
1134
if (pud_leaf(*pud)) {
1135
pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
1136
xen_free_ro_pages(pa, PUD_SIZE);
1137
return;
1138
}
1139
1140
pmd_tbl = pmd_offset(pud, 0);
1141
for (i = 0; i < PTRS_PER_PMD; i++) {
1142
if (pmd_none(pmd_tbl[i]))
1143
continue;
1144
xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
1145
}
1146
set_pud(pud, __pud(0));
1147
xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
1148
}
1149
1150
static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
1151
{
1152
unsigned long pa;
1153
pud_t *pud_tbl;
1154
int i;
1155
1156
if (p4d_leaf(*p4d)) {
1157
pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
1158
xen_free_ro_pages(pa, P4D_SIZE);
1159
return;
1160
}
1161
1162
pud_tbl = pud_offset(p4d, 0);
1163
for (i = 0; i < PTRS_PER_PUD; i++) {
1164
if (pud_none(pud_tbl[i]))
1165
continue;
1166
xen_cleanmfnmap_pud(pud_tbl + i, unpin);
1167
}
1168
set_p4d(p4d, __p4d(0));
1169
xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
1170
}
1171
1172
/*
1173
* Since it is well isolated we can (and since it is perhaps large we should)
1174
* also free the page tables mapping the initial P->M table.
1175
*/
1176
static void __init xen_cleanmfnmap(unsigned long vaddr)
1177
{
1178
pgd_t *pgd;
1179
p4d_t *p4d;
1180
bool unpin;
1181
1182
unpin = (vaddr == 2 * PGDIR_SIZE);
1183
vaddr &= PMD_MASK;
1184
pgd = pgd_offset_k(vaddr);
1185
p4d = p4d_offset(pgd, 0);
1186
if (!p4d_none(*p4d))
1187
xen_cleanmfnmap_p4d(p4d, unpin);
1188
}
1189
1190
static void __init xen_pagetable_p2m_free(void)
1191
{
1192
unsigned long size;
1193
unsigned long addr;
1194
1195
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1196
1197
/* No memory or already called. */
1198
if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
1199
return;
1200
1201
/* using __ka address and sticking INVALID_P2M_ENTRY! */
1202
memset((void *)xen_start_info->mfn_list, 0xff, size);
1203
1204
addr = xen_start_info->mfn_list;
1205
/*
1206
* We could be in __ka space.
1207
* We roundup to the PMD, which means that if anybody at this stage is
1208
* using the __ka address of xen_start_info or
1209
* xen_start_info->shared_info they are in going to crash. Fortunately
1210
* we have already revectored in xen_setup_kernel_pagetable.
1211
*/
1212
size = roundup(size, PMD_SIZE);
1213
1214
if (addr >= __START_KERNEL_map) {
1215
xen_cleanhighmap(addr, addr + size);
1216
size = PAGE_ALIGN(xen_start_info->nr_pages *
1217
sizeof(unsigned long));
1218
memblock_free((void *)addr, size);
1219
} else {
1220
xen_cleanmfnmap(addr);
1221
}
1222
}
1223
1224
static void __init xen_pagetable_cleanhighmap(void)
1225
{
1226
unsigned long size;
1227
unsigned long addr;
1228
1229
/* At this stage, cleanup_highmap has already cleaned __ka space
1230
* from _brk_limit way up to the max_pfn_mapped (which is the end of
1231
* the ramdisk). We continue on, erasing PMD entries that point to page
1232
* tables - do note that they are accessible at this stage via __va.
1233
* As Xen is aligning the memory end to a 4MB boundary, for good
1234
* measure we also round up to PMD_SIZE * 2 - which means that if
1235
* anybody is using __ka address to the initial boot-stack - and try
1236
* to use it - they are going to crash. The xen_start_info has been
1237
* taken care of already in xen_setup_kernel_pagetable. */
1238
addr = xen_start_info->pt_base;
1239
size = xen_start_info->nr_pt_frames * PAGE_SIZE;
1240
1241
xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
1242
xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
1243
}
1244
1245
static void __init xen_pagetable_p2m_setup(void)
1246
{
1247
xen_vmalloc_p2m_tree();
1248
1249
xen_pagetable_p2m_free();
1250
1251
xen_pagetable_cleanhighmap();
1252
1253
/* And revector! Bye bye old array */
1254
xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
1255
}
1256
1257
static void __init xen_pagetable_init(void)
1258
{
1259
/*
1260
* The majority of further PTE writes is to pagetables already
1261
* announced as such to Xen. Hence it is more efficient to use
1262
* hypercalls for these updates.
1263
*/
1264
pv_ops.mmu.set_pte = __xen_set_pte;
1265
1266
paging_init();
1267
xen_post_allocator_init();
1268
1269
xen_pagetable_p2m_setup();
1270
1271
/* Allocate and initialize top and mid mfn levels for p2m structure */
1272
xen_build_mfn_list_list();
1273
1274
/* Remap memory freed due to conflicts with E820 map */
1275
xen_remap_memory();
1276
xen_setup_mfn_list_list();
1277
}
1278
1279
static noinstr void xen_write_cr2(unsigned long cr2)
1280
{
1281
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
1282
}
1283
1284
static noinline void xen_flush_tlb(void)
1285
{
1286
struct mmuext_op *op;
1287
struct multicall_space mcs;
1288
1289
preempt_disable();
1290
1291
mcs = xen_mc_entry(sizeof(*op));
1292
1293
op = mcs.args;
1294
op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
1295
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1296
1297
xen_mc_issue(XEN_LAZY_MMU);
1298
1299
preempt_enable();
1300
}
1301
1302
static void xen_flush_tlb_one_user(unsigned long addr)
1303
{
1304
struct mmuext_op *op;
1305
struct multicall_space mcs;
1306
1307
trace_xen_mmu_flush_tlb_one_user(addr);
1308
1309
preempt_disable();
1310
1311
mcs = xen_mc_entry(sizeof(*op));
1312
op = mcs.args;
1313
op->cmd = MMUEXT_INVLPG_LOCAL;
1314
op->arg1.linear_addr = addr & PAGE_MASK;
1315
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1316
1317
xen_mc_issue(XEN_LAZY_MMU);
1318
1319
preempt_enable();
1320
}
1321
1322
static void xen_flush_tlb_multi(const struct cpumask *cpus,
1323
const struct flush_tlb_info *info)
1324
{
1325
struct {
1326
struct mmuext_op op;
1327
DECLARE_BITMAP(mask, NR_CPUS);
1328
} *args;
1329
struct multicall_space mcs;
1330
const size_t mc_entry_size = sizeof(args->op) +
1331
sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
1332
1333
trace_xen_mmu_flush_tlb_multi(cpus, info->mm, info->start, info->end);
1334
1335
if (cpumask_empty(cpus))
1336
return; /* nothing to do */
1337
1338
mcs = xen_mc_entry(mc_entry_size);
1339
args = mcs.args;
1340
args->op.arg2.vcpumask = to_cpumask(args->mask);
1341
1342
/* Remove any offline CPUs */
1343
cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
1344
1345
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1346
if (info->end != TLB_FLUSH_ALL &&
1347
(info->end - info->start) <= PAGE_SIZE) {
1348
args->op.cmd = MMUEXT_INVLPG_MULTI;
1349
args->op.arg1.linear_addr = info->start;
1350
}
1351
1352
MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
1353
1354
xen_mc_issue(XEN_LAZY_MMU);
1355
}
1356
1357
static unsigned long xen_read_cr3(void)
1358
{
1359
return this_cpu_read(xen_cr3);
1360
}
1361
1362
static void set_current_cr3(void *v)
1363
{
1364
this_cpu_write(xen_current_cr3, (unsigned long)v);
1365
}
1366
1367
static void __xen_write_cr3(bool kernel, unsigned long cr3)
1368
{
1369
struct mmuext_op op;
1370
unsigned long mfn;
1371
1372
trace_xen_mmu_write_cr3(kernel, cr3);
1373
1374
if (cr3)
1375
mfn = pfn_to_mfn(PFN_DOWN(cr3));
1376
else
1377
mfn = 0;
1378
1379
WARN_ON(mfn == 0 && kernel);
1380
1381
op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
1382
op.arg1.mfn = mfn;
1383
1384
xen_extend_mmuext_op(&op);
1385
1386
if (kernel) {
1387
this_cpu_write(xen_cr3, cr3);
1388
1389
/* Update xen_current_cr3 once the batch has actually
1390
been submitted. */
1391
xen_mc_callback(set_current_cr3, (void *)cr3);
1392
}
1393
}
1394
static void xen_write_cr3(unsigned long cr3)
1395
{
1396
pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
1397
1398
BUG_ON(preemptible());
1399
1400
xen_mc_batch(); /* disables interrupts */
1401
1402
/* Update while interrupts are disabled, so its atomic with
1403
respect to ipis */
1404
this_cpu_write(xen_cr3, cr3);
1405
1406
__xen_write_cr3(true, cr3);
1407
1408
if (user_pgd)
1409
__xen_write_cr3(false, __pa(user_pgd));
1410
else
1411
__xen_write_cr3(false, 0);
1412
1413
xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */
1414
}
1415
1416
/*
1417
* At the start of the day - when Xen launches a guest, it has already
1418
* built pagetables for the guest. We diligently look over them
1419
* in xen_setup_kernel_pagetable and graft as appropriate them in the
1420
* init_top_pgt and its friends. Then when we are happy we load
1421
* the new init_top_pgt - and continue on.
1422
*
1423
* The generic code starts (start_kernel) and 'init_mem_mapping' sets
1424
* up the rest of the pagetables. When it has completed it loads the cr3.
1425
* N.B. that baremetal would start at 'start_kernel' (and the early
1426
* #PF handler would create bootstrap pagetables) - so we are running
1427
* with the same assumptions as what to do when write_cr3 is executed
1428
* at this point.
1429
*
1430
* Since there are no user-page tables at all, we have two variants
1431
* of xen_write_cr3 - the early bootup (this one), and the late one
1432
* (xen_write_cr3). The reason we have to do that is that in 64-bit
1433
* the Linux kernel and user-space are both in ring 3 while the
1434
* hypervisor is in ring 0.
1435
*/
1436
static void __init xen_write_cr3_init(unsigned long cr3)
1437
{
1438
BUG_ON(preemptible());
1439
1440
xen_mc_batch(); /* disables interrupts */
1441
1442
/* Update while interrupts are disabled, so its atomic with
1443
respect to ipis */
1444
this_cpu_write(xen_cr3, cr3);
1445
1446
__xen_write_cr3(true, cr3);
1447
1448
xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */
1449
}
1450
1451
static int xen_pgd_alloc(struct mm_struct *mm)
1452
{
1453
pgd_t *pgd = mm->pgd;
1454
struct page *page = virt_to_page(pgd);
1455
pgd_t *user_pgd;
1456
int ret = -ENOMEM;
1457
1458
BUG_ON(PagePinned(virt_to_page(pgd)));
1459
BUG_ON(page->private != 0);
1460
1461
user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1462
page->private = (unsigned long)user_pgd;
1463
1464
if (user_pgd != NULL) {
1465
#ifdef CONFIG_X86_VSYSCALL_EMULATION
1466
user_pgd[pgd_index(VSYSCALL_ADDR)] =
1467
__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
1468
#endif
1469
ret = 0;
1470
}
1471
1472
BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
1473
1474
return ret;
1475
}
1476
1477
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1478
{
1479
pgd_t *user_pgd = xen_get_user_pgd(pgd);
1480
1481
if (user_pgd)
1482
free_page((unsigned long)user_pgd);
1483
}
1484
1485
/*
1486
* Init-time set_pte while constructing initial pagetables, which
1487
* doesn't allow RO page table pages to be remapped RW.
1488
*
1489
* If there is no MFN for this PFN then this page is initially
1490
* ballooned out so clear the PTE (as in decrease_reservation() in
1491
* drivers/xen/balloon.c).
1492
*
1493
* Many of these PTE updates are done on unpinned and writable pages
1494
* and doing a hypercall for these is unnecessary and expensive. At
1495
* this point it is rarely possible to tell if a page is pinned, so
1496
* mostly write the PTE directly and rely on Xen trapping and
1497
* emulating any updates as necessary.
1498
*/
1499
static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
1500
{
1501
if (unlikely(is_early_ioremap_ptep(ptep)))
1502
__xen_set_pte(ptep, pte);
1503
else
1504
native_set_pte(ptep, pte);
1505
}
1506
1507
__visible pte_t xen_make_pte_init(pteval_t pte)
1508
{
1509
unsigned long pfn;
1510
1511
/*
1512
* Pages belonging to the initial p2m list mapped outside the default
1513
* address range must be mapped read-only. This region contains the
1514
* page tables for mapping the p2m list, too, and page tables MUST be
1515
* mapped read-only.
1516
*/
1517
pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
1518
if (xen_start_info->mfn_list < __START_KERNEL_map &&
1519
pfn >= xen_start_info->first_p2m_pfn &&
1520
pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
1521
pte &= ~_PAGE_RW;
1522
1523
pte = pte_pfn_to_mfn(pte);
1524
return native_make_pte(pte);
1525
}
1526
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
1527
1528
/* Early in boot, while setting up the initial pagetable, assume
1529
everything is pinned. */
1530
static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1531
{
1532
#ifdef CONFIG_FLATMEM
1533
BUG_ON(mem_map); /* should only be used early */
1534
#endif
1535
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1536
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1537
}
1538
1539
/* Used for pmd and pud */
1540
static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1541
{
1542
#ifdef CONFIG_FLATMEM
1543
BUG_ON(mem_map); /* should only be used early */
1544
#endif
1545
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1546
}
1547
1548
/* Early release_pte assumes that all pts are pinned, since there's
1549
only init_mm and anything attached to that is pinned. */
1550
static void __init xen_release_pte_init(unsigned long pfn)
1551
{
1552
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1553
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1554
}
1555
1556
static void __init xen_release_pmd_init(unsigned long pfn)
1557
{
1558
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1559
}
1560
1561
static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1562
{
1563
struct multicall_space mcs;
1564
struct mmuext_op *op;
1565
1566
mcs = __xen_mc_entry(sizeof(*op));
1567
op = mcs.args;
1568
op->cmd = cmd;
1569
op->arg1.mfn = pfn_to_mfn(pfn);
1570
1571
MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
1572
}
1573
1574
static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
1575
{
1576
struct multicall_space mcs;
1577
unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
1578
1579
mcs = __xen_mc_entry(0);
1580
MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
1581
pfn_pte(pfn, prot), 0);
1582
}
1583
1584
/* This needs to make sure the new pte page is pinned iff its being
1585
attached to a pinned pagetable. */
1586
static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
1587
unsigned level)
1588
{
1589
bool pinned = xen_page_pinned(mm->pgd);
1590
1591
trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
1592
1593
if (pinned) {
1594
struct page *page = pfn_to_page(pfn);
1595
1596
pinned = false;
1597
if (static_branch_likely(&xen_struct_pages_ready)) {
1598
pinned = PagePinned(page);
1599
SetPagePinned(page);
1600
}
1601
1602
xen_mc_batch();
1603
1604
__set_pfn_prot(pfn, PAGE_KERNEL_RO);
1605
1606
if (level == PT_PTE && IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS) &&
1607
!pinned)
1608
__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1609
1610
xen_mc_issue(XEN_LAZY_MMU);
1611
}
1612
}
1613
1614
static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
1615
{
1616
xen_alloc_ptpage(mm, pfn, PT_PTE);
1617
}
1618
1619
static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
1620
{
1621
xen_alloc_ptpage(mm, pfn, PT_PMD);
1622
}
1623
1624
/* This should never happen until we're OK to use struct page */
1625
static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
1626
{
1627
struct page *page = pfn_to_page(pfn);
1628
bool pinned = PagePinned(page);
1629
1630
trace_xen_mmu_release_ptpage(pfn, level, pinned);
1631
1632
if (pinned) {
1633
xen_mc_batch();
1634
1635
if (level == PT_PTE && IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS))
1636
__pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1637
1638
__set_pfn_prot(pfn, PAGE_KERNEL);
1639
1640
xen_mc_issue(XEN_LAZY_MMU);
1641
1642
ClearPagePinned(page);
1643
}
1644
}
1645
1646
static void xen_release_pte(unsigned long pfn)
1647
{
1648
xen_release_ptpage(pfn, PT_PTE);
1649
}
1650
1651
static void xen_release_pmd(unsigned long pfn)
1652
{
1653
xen_release_ptpage(pfn, PT_PMD);
1654
}
1655
1656
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
1657
{
1658
xen_alloc_ptpage(mm, pfn, PT_PUD);
1659
}
1660
1661
static void xen_release_pud(unsigned long pfn)
1662
{
1663
xen_release_ptpage(pfn, PT_PUD);
1664
}
1665
1666
/*
1667
* Like __va(), but returns address in the kernel mapping (which is
1668
* all we have until the physical memory mapping has been set up.
1669
*/
1670
static void * __init __ka(phys_addr_t paddr)
1671
{
1672
return (void *)(paddr + __START_KERNEL_map);
1673
}
1674
1675
/* Convert a machine address to physical address */
1676
static unsigned long __init m2p(phys_addr_t maddr)
1677
{
1678
phys_addr_t paddr;
1679
1680
maddr &= XEN_PTE_MFN_MASK;
1681
paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1682
1683
return paddr;
1684
}
1685
1686
/* Convert a machine address to kernel virtual */
1687
static void * __init m2v(phys_addr_t maddr)
1688
{
1689
return __ka(m2p(maddr));
1690
}
1691
1692
/* Set the page permissions on an identity-mapped pages */
1693
static void __init set_page_prot_flags(void *addr, pgprot_t prot,
1694
unsigned long flags)
1695
{
1696
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1697
pte_t pte = pfn_pte(pfn, prot);
1698
1699
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
1700
BUG();
1701
}
1702
static void __init set_page_prot(void *addr, pgprot_t prot)
1703
{
1704
return set_page_prot_flags(addr, prot, UVMF_NONE);
1705
}
1706
1707
void __init xen_setup_machphys_mapping(void)
1708
{
1709
struct xen_machphys_mapping mapping;
1710
1711
if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
1712
machine_to_phys_mapping = (unsigned long *)mapping.v_start;
1713
machine_to_phys_nr = mapping.max_mfn + 1;
1714
} else {
1715
machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
1716
}
1717
}
1718
1719
static void __init convert_pfn_mfn(void *v)
1720
{
1721
pte_t *pte = v;
1722
int i;
1723
1724
/* All levels are converted the same way, so just treat them
1725
as ptes. */
1726
for (i = 0; i < PTRS_PER_PTE; i++)
1727
pte[i] = xen_make_pte(pte[i].pte);
1728
}
1729
static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1730
unsigned long addr)
1731
{
1732
if (*pt_base == PFN_DOWN(__pa(addr))) {
1733
set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1734
clear_page((void *)addr);
1735
(*pt_base)++;
1736
}
1737
if (*pt_end == PFN_DOWN(__pa(addr))) {
1738
set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1739
clear_page((void *)addr);
1740
(*pt_end)--;
1741
}
1742
}
1743
/*
1744
* Set up the initial kernel pagetable.
1745
*
1746
* We can construct this by grafting the Xen provided pagetable into
1747
* head_64.S's preconstructed pagetables. We copy the Xen L2's into
1748
* level2_ident_pgt, and level2_kernel_pgt. This means that only the
1749
* kernel has a physical mapping to start with - but that's enough to
1750
* get __va working. We need to fill in the rest of the physical
1751
* mapping once some sort of allocator has been set up.
1752
*/
1753
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1754
{
1755
pud_t *l3;
1756
pmd_t *l2;
1757
unsigned long addr[3];
1758
unsigned long pt_base, pt_end;
1759
unsigned i;
1760
1761
/* max_pfn_mapped is the last pfn mapped in the initial memory
1762
* mappings. Considering that on Xen after the kernel mappings we
1763
* have the mappings of some pages that don't exist in pfn space, we
1764
* set max_pfn_mapped to the last real pfn mapped. */
1765
if (xen_start_info->mfn_list < __START_KERNEL_map)
1766
max_pfn_mapped = xen_start_info->first_p2m_pfn;
1767
else
1768
max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
1769
1770
pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
1771
pt_end = pt_base + xen_start_info->nr_pt_frames;
1772
1773
/* Zap identity mapping */
1774
init_top_pgt[0] = __pgd(0);
1775
1776
/* Pre-constructed entries are in pfn, so convert to mfn */
1777
/* L4[273] -> level3_ident_pgt */
1778
/* L4[511] -> level3_kernel_pgt */
1779
convert_pfn_mfn(init_top_pgt);
1780
1781
/* L3_i[0] -> level2_ident_pgt */
1782
convert_pfn_mfn(level3_ident_pgt);
1783
/* L3_k[510] -> level2_kernel_pgt */
1784
/* L3_k[511] -> level2_fixmap_pgt */
1785
convert_pfn_mfn(level3_kernel_pgt);
1786
1787
/* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */
1788
convert_pfn_mfn(level2_fixmap_pgt);
1789
1790
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
1791
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1792
l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1793
1794
addr[0] = (unsigned long)pgd;
1795
addr[1] = (unsigned long)l3;
1796
addr[2] = (unsigned long)l2;
1797
/* Graft it onto L4[273][0]. Note that we creating an aliasing problem:
1798
* Both L4[273][0] and L4[511][510] have entries that point to the same
1799
* L2 (PMD) tables. Meaning that if you modify it in __va space
1800
* it will be also modified in the __ka space! (But if you just
1801
* modify the PMD table to point to other PTE's or none, then you
1802
* are OK - which is what cleanup_highmap does) */
1803
copy_page(level2_ident_pgt, l2);
1804
/* Graft it onto L4[511][510] */
1805
copy_page(level2_kernel_pgt, l2);
1806
1807
/*
1808
* Zap execute permission from the ident map. Due to the sharing of
1809
* L1 entries we need to do this in the L2.
1810
*/
1811
if (__supported_pte_mask & _PAGE_NX) {
1812
for (i = 0; i < PTRS_PER_PMD; ++i) {
1813
if (pmd_none(level2_ident_pgt[i]))
1814
continue;
1815
level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
1816
}
1817
}
1818
1819
/* Copy the initial P->M table mappings if necessary. */
1820
i = pgd_index(xen_start_info->mfn_list);
1821
if (i && i < pgd_index(__START_KERNEL_map))
1822
init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
1823
1824
/* Make pagetable pieces RO */
1825
set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
1826
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1827
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1828
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1829
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1830
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1831
1832
for (i = 0; i < FIXMAP_PMD_NUM; i++) {
1833
set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
1834
PAGE_KERNEL_RO);
1835
}
1836
1837
/* Pin down new L4 */
1838
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1839
PFN_DOWN(__pa_symbol(init_top_pgt)));
1840
1841
/* Unpin Xen-provided one */
1842
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1843
1844
#ifdef CONFIG_X86_VSYSCALL_EMULATION
1845
/* Pin user vsyscall L3 */
1846
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1847
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
1848
PFN_DOWN(__pa_symbol(level3_user_vsyscall)));
1849
#endif
1850
1851
/*
1852
* At this stage there can be no user pgd, and no page structure to
1853
* attach it to, so make sure we just set kernel pgd.
1854
*/
1855
xen_mc_batch();
1856
__xen_write_cr3(true, __pa(init_top_pgt));
1857
xen_mc_issue(XEN_LAZY_CPU);
1858
1859
/* We can't that easily rip out L3 and L2, as the Xen pagetables are
1860
* set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
1861
* the initial domain. For guests using the toolstack, they are in:
1862
* [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only
1863
* rip out the [L4] (pgd), but for guests we shave off three pages.
1864
*/
1865
for (i = 0; i < ARRAY_SIZE(addr); i++)
1866
check_pt_base(&pt_base, &pt_end, addr[i]);
1867
1868
/* Our (by three pages) smaller Xen pagetable that we are using */
1869
xen_pt_base = PFN_PHYS(pt_base);
1870
xen_pt_size = (pt_end - pt_base) * PAGE_SIZE;
1871
memblock_reserve(xen_pt_base, xen_pt_size);
1872
1873
/* Revector the xen_start_info */
1874
xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
1875
}
1876
1877
/*
1878
* Read a value from a physical address.
1879
*/
1880
static unsigned long __init xen_read_phys_ulong(phys_addr_t addr)
1881
{
1882
unsigned long *vaddr;
1883
unsigned long val;
1884
1885
vaddr = early_memremap_ro(addr, sizeof(val));
1886
val = *vaddr;
1887
early_memunmap(vaddr, sizeof(val));
1888
return val;
1889
}
1890
1891
/*
1892
* Translate a virtual address to a physical one without relying on mapped
1893
* page tables. Don't rely on big pages being aligned in (guest) physical
1894
* space!
1895
*/
1896
static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
1897
{
1898
phys_addr_t pa;
1899
pgd_t pgd;
1900
pud_t pud;
1901
pmd_t pmd;
1902
pte_t pte;
1903
1904
pa = read_cr3_pa();
1905
pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
1906
sizeof(pgd)));
1907
if (!pgd_present(pgd))
1908
return 0;
1909
1910
pa = pgd_val(pgd) & PTE_PFN_MASK;
1911
pud = native_make_pud(xen_read_phys_ulong(pa + pud_index(vaddr) *
1912
sizeof(pud)));
1913
if (!pud_present(pud))
1914
return 0;
1915
pa = pud_val(pud) & PTE_PFN_MASK;
1916
if (pud_leaf(pud))
1917
return pa + (vaddr & ~PUD_MASK);
1918
1919
pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
1920
sizeof(pmd)));
1921
if (!pmd_present(pmd))
1922
return 0;
1923
pa = pmd_val(pmd) & PTE_PFN_MASK;
1924
if (pmd_leaf(pmd))
1925
return pa + (vaddr & ~PMD_MASK);
1926
1927
pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) *
1928
sizeof(pte)));
1929
if (!pte_present(pte))
1930
return 0;
1931
pa = pte_pfn(pte) << PAGE_SHIFT;
1932
1933
return pa | (vaddr & ~PAGE_MASK);
1934
}
1935
1936
/*
1937
* Find a new area for the hypervisor supplied p2m list and relocate the p2m to
1938
* this area.
1939
*/
1940
void __init xen_relocate_p2m(void)
1941
{
1942
phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
1943
unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
1944
int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
1945
pte_t *pt;
1946
pmd_t *pmd;
1947
pud_t *pud;
1948
pgd_t *pgd;
1949
unsigned long *new_p2m;
1950
1951
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1952
n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
1953
n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
1954
n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
1955
n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
1956
n_frames = n_pte + n_pt + n_pmd + n_pud;
1957
1958
new_area = xen_find_free_area(PFN_PHYS(n_frames));
1959
if (!new_area) {
1960
xen_raw_console_write("Can't find new memory area for p2m needed due to E820 map conflict\n");
1961
BUG();
1962
}
1963
1964
/*
1965
* Setup the page tables for addressing the new p2m list.
1966
* We have asked the hypervisor to map the p2m list at the user address
1967
* PUD_SIZE. It may have done so, or it may have used a kernel space
1968
* address depending on the Xen version.
1969
* To avoid any possible virtual address collision, just use
1970
* 2 * PUD_SIZE for the new area.
1971
*/
1972
pud_phys = new_area;
1973
pmd_phys = pud_phys + PFN_PHYS(n_pud);
1974
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
1975
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
1976
1977
pgd = __va(read_cr3_pa());
1978
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
1979
for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
1980
pud = early_memremap(pud_phys, PAGE_SIZE);
1981
clear_page(pud);
1982
for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
1983
idx_pmd++) {
1984
pmd = early_memremap(pmd_phys, PAGE_SIZE);
1985
clear_page(pmd);
1986
for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
1987
idx_pt++) {
1988
pt = early_memremap(pt_phys, PAGE_SIZE);
1989
clear_page(pt);
1990
for (idx_pte = 0;
1991
idx_pte < min(n_pte, PTRS_PER_PTE);
1992
idx_pte++) {
1993
pt[idx_pte] = pfn_pte(p2m_pfn,
1994
PAGE_KERNEL);
1995
p2m_pfn++;
1996
}
1997
n_pte -= PTRS_PER_PTE;
1998
early_memunmap(pt, PAGE_SIZE);
1999
make_lowmem_page_readonly(__va(pt_phys));
2000
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
2001
PFN_DOWN(pt_phys));
2002
pmd[idx_pt] = __pmd(_PAGE_TABLE | pt_phys);
2003
pt_phys += PAGE_SIZE;
2004
}
2005
n_pt -= PTRS_PER_PMD;
2006
early_memunmap(pmd, PAGE_SIZE);
2007
make_lowmem_page_readonly(__va(pmd_phys));
2008
pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
2009
PFN_DOWN(pmd_phys));
2010
pud[idx_pmd] = __pud(_PAGE_TABLE | pmd_phys);
2011
pmd_phys += PAGE_SIZE;
2012
}
2013
n_pmd -= PTRS_PER_PUD;
2014
early_memunmap(pud, PAGE_SIZE);
2015
make_lowmem_page_readonly(__va(pud_phys));
2016
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
2017
set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
2018
pud_phys += PAGE_SIZE;
2019
}
2020
2021
/* Now copy the old p2m info to the new area. */
2022
memcpy(new_p2m, xen_p2m_addr, size);
2023
xen_p2m_addr = new_p2m;
2024
2025
/* Release the old p2m list and set new list info. */
2026
p2m_pfn = PFN_DOWN(xen_early_virt_to_phys(xen_start_info->mfn_list));
2027
BUG_ON(!p2m_pfn);
2028
p2m_pfn_end = p2m_pfn + PFN_DOWN(size);
2029
2030
if (xen_start_info->mfn_list < __START_KERNEL_map) {
2031
pfn = xen_start_info->first_p2m_pfn;
2032
pfn_end = xen_start_info->first_p2m_pfn +
2033
xen_start_info->nr_p2m_frames;
2034
set_pgd(pgd + 1, __pgd(0));
2035
} else {
2036
pfn = p2m_pfn;
2037
pfn_end = p2m_pfn_end;
2038
}
2039
2040
memblock_phys_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn));
2041
while (pfn < pfn_end) {
2042
if (pfn == p2m_pfn) {
2043
pfn = p2m_pfn_end;
2044
continue;
2045
}
2046
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
2047
pfn++;
2048
}
2049
2050
xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
2051
xen_start_info->first_p2m_pfn = PFN_DOWN(new_area);
2052
xen_start_info->nr_p2m_frames = n_frames;
2053
}
2054
2055
void __init xen_reserve_special_pages(void)
2056
{
2057
phys_addr_t paddr;
2058
2059
memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
2060
if (xen_start_info->store_mfn) {
2061
paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->store_mfn));
2062
memblock_reserve(paddr, PAGE_SIZE);
2063
}
2064
if (!xen_initial_domain()) {
2065
paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->console.domU.mfn));
2066
memblock_reserve(paddr, PAGE_SIZE);
2067
}
2068
}
2069
2070
void __init xen_pt_check_e820(void)
2071
{
2072
xen_chk_is_e820_usable(xen_pt_base, xen_pt_size, "page table");
2073
}
2074
2075
static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
2076
2077
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2078
{
2079
pte_t pte;
2080
unsigned long vaddr;
2081
2082
phys >>= PAGE_SHIFT;
2083
2084
switch (idx) {
2085
case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
2086
#ifdef CONFIG_X86_VSYSCALL_EMULATION
2087
case VSYSCALL_PAGE:
2088
#endif
2089
/* All local page mappings */
2090
pte = pfn_pte(phys, prot);
2091
break;
2092
2093
#ifdef CONFIG_X86_LOCAL_APIC
2094
case FIX_APIC_BASE: /* maps dummy local APIC */
2095
pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2096
break;
2097
#endif
2098
2099
#ifdef CONFIG_X86_IO_APIC
2100
case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
2101
/*
2102
* We just don't map the IO APIC - all access is via
2103
* hypercalls. Keep the address in the pte for reference.
2104
*/
2105
pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2106
break;
2107
#endif
2108
2109
case FIX_PARAVIRT_BOOTMAP:
2110
/* This is an MFN, but it isn't an IO mapping from the
2111
IO domain */
2112
pte = mfn_pte(phys, prot);
2113
break;
2114
2115
default:
2116
/* By default, set_fixmap is used for hardware mappings */
2117
pte = mfn_pte(phys, prot);
2118
break;
2119
}
2120
2121
vaddr = __fix_to_virt(idx);
2122
if (HYPERVISOR_update_va_mapping(vaddr, pte, UVMF_INVLPG))
2123
BUG();
2124
2125
#ifdef CONFIG_X86_VSYSCALL_EMULATION
2126
/* Replicate changes to map the vsyscall page into the user
2127
pagetable vsyscall mapping. */
2128
if (idx == VSYSCALL_PAGE)
2129
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
2130
#endif
2131
}
2132
2133
static void xen_enter_lazy_mmu(void)
2134
{
2135
enter_lazy(XEN_LAZY_MMU);
2136
}
2137
2138
static void xen_flush_lazy_mmu(void)
2139
{
2140
preempt_disable();
2141
2142
if (xen_get_lazy_mode() == XEN_LAZY_MMU) {
2143
arch_leave_lazy_mmu_mode();
2144
arch_enter_lazy_mmu_mode();
2145
}
2146
2147
preempt_enable();
2148
}
2149
2150
static void __init xen_post_allocator_init(void)
2151
{
2152
pv_ops.mmu.set_pte = xen_set_pte;
2153
pv_ops.mmu.set_pmd = xen_set_pmd;
2154
pv_ops.mmu.set_pud = xen_set_pud;
2155
pv_ops.mmu.set_p4d = xen_set_p4d;
2156
2157
/* This will work as long as patching hasn't happened yet
2158
(which it hasn't) */
2159
pv_ops.mmu.alloc_pte = xen_alloc_pte;
2160
pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
2161
pv_ops.mmu.release_pte = xen_release_pte;
2162
pv_ops.mmu.release_pmd = xen_release_pmd;
2163
pv_ops.mmu.alloc_pud = xen_alloc_pud;
2164
pv_ops.mmu.release_pud = xen_release_pud;
2165
pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
2166
2167
pv_ops.mmu.write_cr3 = &xen_write_cr3;
2168
}
2169
2170
static void xen_leave_lazy_mmu(void)
2171
{
2172
preempt_disable();
2173
xen_mc_flush();
2174
leave_lazy(XEN_LAZY_MMU);
2175
preempt_enable();
2176
}
2177
2178
static const typeof(pv_ops) xen_mmu_ops __initconst = {
2179
.mmu = {
2180
.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
2181
.write_cr2 = xen_write_cr2,
2182
2183
.read_cr3 = xen_read_cr3,
2184
.write_cr3 = xen_write_cr3_init,
2185
2186
.flush_tlb_user = xen_flush_tlb,
2187
.flush_tlb_kernel = xen_flush_tlb,
2188
.flush_tlb_one_user = xen_flush_tlb_one_user,
2189
.flush_tlb_multi = xen_flush_tlb_multi,
2190
2191
.pgd_alloc = xen_pgd_alloc,
2192
.pgd_free = xen_pgd_free,
2193
2194
.alloc_pte = xen_alloc_pte_init,
2195
.release_pte = xen_release_pte_init,
2196
.alloc_pmd = xen_alloc_pmd_init,
2197
.release_pmd = xen_release_pmd_init,
2198
2199
.set_pte = xen_set_pte_init,
2200
.set_pmd = xen_set_pmd_hyper,
2201
2202
.ptep_modify_prot_start = xen_ptep_modify_prot_start,
2203
.ptep_modify_prot_commit = xen_ptep_modify_prot_commit,
2204
2205
.pte_val = PV_CALLEE_SAVE(xen_pte_val),
2206
.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
2207
2208
.make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
2209
.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
2210
2211
.set_pud = xen_set_pud_hyper,
2212
2213
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
2214
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
2215
2216
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
2217
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
2218
.set_p4d = xen_set_p4d_hyper,
2219
2220
.alloc_pud = xen_alloc_pmd_init,
2221
.release_pud = xen_release_pmd_init,
2222
2223
.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
2224
.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
2225
2226
.enter_mmap = xen_enter_mmap,
2227
.exit_mmap = xen_exit_mmap,
2228
2229
.lazy_mode = {
2230
.enter = xen_enter_lazy_mmu,
2231
.leave = xen_leave_lazy_mmu,
2232
.flush = xen_flush_lazy_mmu,
2233
},
2234
2235
.set_fixmap = xen_set_fixmap,
2236
},
2237
};
2238
2239
void __init xen_init_mmu_ops(void)
2240
{
2241
x86_init.paging.pagetable_init = xen_pagetable_init;
2242
x86_init.hyper.init_after_bootmem = xen_after_bootmem;
2243
2244
pv_ops.mmu = xen_mmu_ops.mmu;
2245
2246
memset(dummy_mapping, 0xff, PAGE_SIZE);
2247
}
2248
2249
#define VOID_PTE (mfn_pte(0, __pgprot(0)))
2250
static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2251
unsigned long *in_frames,
2252
unsigned long *out_frames)
2253
{
2254
int i;
2255
struct multicall_space mcs;
2256
2257
xen_mc_batch();
2258
for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
2259
mcs = __xen_mc_entry(0);
2260
2261
if (in_frames)
2262
in_frames[i] = virt_to_mfn((void *)vaddr);
2263
2264
MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2265
__set_phys_to_machine(virt_to_pfn((void *)vaddr), INVALID_P2M_ENTRY);
2266
2267
if (out_frames)
2268
out_frames[i] = virt_to_pfn((void *)vaddr);
2269
}
2270
xen_mc_issue(0);
2271
}
2272
2273
/*
2274
* Update the pfn-to-mfn mappings for a virtual address range, either to
2275
* point to an array of mfns, or contiguously from a single starting
2276
* mfn.
2277
*/
2278
static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
2279
unsigned long *mfns,
2280
unsigned long first_mfn)
2281
{
2282
unsigned i, limit;
2283
unsigned long mfn;
2284
2285
xen_mc_batch();
2286
2287
limit = 1u << order;
2288
for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
2289
struct multicall_space mcs;
2290
unsigned flags;
2291
2292
mcs = __xen_mc_entry(0);
2293
if (mfns)
2294
mfn = mfns[i];
2295
else
2296
mfn = first_mfn + i;
2297
2298
if (i < (limit - 1))
2299
flags = 0;
2300
else {
2301
if (order == 0)
2302
flags = UVMF_INVLPG | UVMF_ALL;
2303
else
2304
flags = UVMF_TLB_FLUSH | UVMF_ALL;
2305
}
2306
2307
MULTI_update_va_mapping(mcs.mc, vaddr,
2308
mfn_pte(mfn, PAGE_KERNEL), flags);
2309
2310
set_phys_to_machine(virt_to_pfn((void *)vaddr), mfn);
2311
}
2312
2313
xen_mc_issue(0);
2314
}
2315
2316
/*
2317
* Perform the hypercall to exchange a region of our pfns to point to
2318
* memory with the required contiguous alignment. Takes the pfns as
2319
* input, and populates mfns as output.
2320
*
2321
* Returns a success code indicating whether the hypervisor was able to
2322
* satisfy the request or not.
2323
*/
2324
static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
2325
unsigned long *pfns_in,
2326
unsigned long extents_out,
2327
unsigned int order_out,
2328
unsigned long *mfns_out,
2329
unsigned int address_bits)
2330
{
2331
long rc;
2332
int success;
2333
2334
struct xen_memory_exchange exchange = {
2335
.in = {
2336
.nr_extents = extents_in,
2337
.extent_order = order_in,
2338
.extent_start = pfns_in,
2339
.domid = DOMID_SELF
2340
},
2341
.out = {
2342
.nr_extents = extents_out,
2343
.extent_order = order_out,
2344
.extent_start = mfns_out,
2345
.address_bits = address_bits,
2346
.domid = DOMID_SELF
2347
}
2348
};
2349
2350
BUG_ON(extents_in << order_in != extents_out << order_out);
2351
2352
rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
2353
success = (exchange.nr_exchanged == extents_in);
2354
2355
BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
2356
BUG_ON(success && (rc != 0));
2357
2358
return success;
2359
}
2360
2361
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
2362
unsigned int address_bits,
2363
dma_addr_t *dma_handle)
2364
{
2365
unsigned long *in_frames, out_frame;
2366
unsigned long flags;
2367
int success;
2368
unsigned long vstart = (unsigned long)phys_to_virt(pstart);
2369
2370
if (unlikely(order > discontig_frames_order)) {
2371
if (!discontig_frames_dyn)
2372
return -ENOMEM;
2373
2374
if (alloc_discontig_frames(order))
2375
return -ENOMEM;
2376
}
2377
2378
memset((void *) vstart, 0, PAGE_SIZE << order);
2379
2380
spin_lock_irqsave(&xen_reservation_lock, flags);
2381
2382
in_frames = discontig_frames;
2383
2384
/* 1. Zap current PTEs, remembering MFNs. */
2385
xen_zap_pfn_range(vstart, order, in_frames, NULL);
2386
2387
/* 2. Get a new contiguous memory extent. */
2388
out_frame = virt_to_pfn((void *)vstart);
2389
success = xen_exchange_memory(1UL << order, 0, in_frames,
2390
1, order, &out_frame,
2391
address_bits);
2392
2393
/* 3. Map the new extent in place of old pages. */
2394
if (success)
2395
xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
2396
else
2397
xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2398
2399
spin_unlock_irqrestore(&xen_reservation_lock, flags);
2400
2401
*dma_handle = virt_to_machine(vstart).maddr;
2402
return success ? 0 : -ENOMEM;
2403
}
2404
2405
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
2406
{
2407
unsigned long *out_frames, in_frame;
2408
unsigned long flags;
2409
int success;
2410
unsigned long vstart;
2411
2412
if (unlikely(order > discontig_frames_order))
2413
return;
2414
2415
vstart = (unsigned long)phys_to_virt(pstart);
2416
memset((void *) vstart, 0, PAGE_SIZE << order);
2417
2418
spin_lock_irqsave(&xen_reservation_lock, flags);
2419
2420
out_frames = discontig_frames;
2421
2422
/* 1. Find start MFN of contiguous extent. */
2423
in_frame = virt_to_mfn((void *)vstart);
2424
2425
/* 2. Zap current PTEs. */
2426
xen_zap_pfn_range(vstart, order, NULL, out_frames);
2427
2428
/* 3. Do the exchange for non-contiguous MFNs. */
2429
success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
2430
0, out_frames, 0);
2431
2432
/* 4. Map new pages in place of old pages. */
2433
if (success)
2434
xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2435
else
2436
xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
2437
2438
spin_unlock_irqrestore(&xen_reservation_lock, flags);
2439
}
2440
2441
static noinline void xen_flush_tlb_all(void)
2442
{
2443
struct mmuext_op *op;
2444
struct multicall_space mcs;
2445
2446
preempt_disable();
2447
2448
mcs = xen_mc_entry(sizeof(*op));
2449
2450
op = mcs.args;
2451
op->cmd = MMUEXT_TLB_FLUSH_ALL;
2452
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
2453
2454
xen_mc_issue(XEN_LAZY_MMU);
2455
2456
preempt_enable();
2457
}
2458
2459
#define REMAP_BATCH_SIZE 16
2460
2461
struct remap_data {
2462
xen_pfn_t *pfn;
2463
bool contiguous;
2464
bool no_translate;
2465
pgprot_t prot;
2466
struct mmu_update *mmu_update;
2467
};
2468
2469
static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
2470
{
2471
struct remap_data *rmd = data;
2472
pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
2473
2474
/*
2475
* If we have a contiguous range, just update the pfn itself,
2476
* else update pointer to be "next pfn".
2477
*/
2478
if (rmd->contiguous)
2479
(*rmd->pfn)++;
2480
else
2481
rmd->pfn++;
2482
2483
rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
2484
rmd->mmu_update->ptr |= rmd->no_translate ?
2485
MMU_PT_UPDATE_NO_TRANSLATE :
2486
MMU_NORMAL_PT_UPDATE;
2487
rmd->mmu_update->val = pte_val_ma(pte);
2488
rmd->mmu_update++;
2489
2490
return 0;
2491
}
2492
2493
int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
2494
xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
2495
unsigned int domid, bool no_translate)
2496
{
2497
int err = 0;
2498
struct remap_data rmd;
2499
struct mmu_update mmu_update[REMAP_BATCH_SIZE];
2500
unsigned long range;
2501
int mapped = 0;
2502
2503
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
2504
2505
rmd.pfn = pfn;
2506
rmd.prot = prot;
2507
/*
2508
* We use the err_ptr to indicate if there we are doing a contiguous
2509
* mapping or a discontiguous mapping.
2510
*/
2511
rmd.contiguous = !err_ptr;
2512
rmd.no_translate = no_translate;
2513
2514
while (nr) {
2515
int index = 0;
2516
int done = 0;
2517
int batch = min(REMAP_BATCH_SIZE, nr);
2518
int batch_left = batch;
2519
2520
range = (unsigned long)batch << PAGE_SHIFT;
2521
2522
rmd.mmu_update = mmu_update;
2523
err = apply_to_page_range(vma->vm_mm, addr, range,
2524
remap_area_pfn_pte_fn, &rmd);
2525
if (err)
2526
goto out;
2527
2528
/*
2529
* We record the error for each page that gives an error, but
2530
* continue mapping until the whole set is done
2531
*/
2532
do {
2533
int i;
2534
2535
err = HYPERVISOR_mmu_update(&mmu_update[index],
2536
batch_left, &done, domid);
2537
2538
/*
2539
* @err_ptr may be the same buffer as @gfn, so
2540
* only clear it after each chunk of @gfn is
2541
* used.
2542
*/
2543
if (err_ptr) {
2544
for (i = index; i < index + done; i++)
2545
err_ptr[i] = 0;
2546
}
2547
if (err < 0) {
2548
if (!err_ptr)
2549
goto out;
2550
err_ptr[i] = err;
2551
done++; /* Skip failed frame. */
2552
} else
2553
mapped += done;
2554
batch_left -= done;
2555
index += done;
2556
} while (batch_left);
2557
2558
nr -= batch;
2559
addr += range;
2560
if (err_ptr)
2561
err_ptr += batch;
2562
cond_resched();
2563
}
2564
out:
2565
2566
xen_flush_tlb_all();
2567
2568
return err < 0 ? err : mapped;
2569
}
2570
EXPORT_SYMBOL_GPL(xen_remap_pfn);
2571
2572
#ifdef CONFIG_VMCORE_INFO
2573
phys_addr_t paddr_vmcoreinfo_note(void)
2574
{
2575
if (xen_pv_domain())
2576
return virt_to_machine(vmcoreinfo_note).maddr;
2577
else
2578
return __pa(vmcoreinfo_note);
2579
}
2580
#endif /* CONFIG_KEXEC_CORE */
2581
2582