Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/pkvm.c
26439 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (C) 2020 - Google LLC
4
* Author: Quentin Perret <[email protected]>
5
*/
6
7
#include <linux/init.h>
8
#include <linux/interval_tree_generic.h>
9
#include <linux/kmemleak.h>
10
#include <linux/kvm_host.h>
11
#include <asm/kvm_mmu.h>
12
#include <linux/memblock.h>
13
#include <linux/mutex.h>
14
15
#include <asm/kvm_pkvm.h>
16
17
#include "hyp_constants.h"
18
19
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
20
21
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
22
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
23
24
phys_addr_t hyp_mem_base;
25
phys_addr_t hyp_mem_size;
26
27
static int __init register_memblock_regions(void)
28
{
29
struct memblock_region *reg;
30
31
for_each_mem_region(reg) {
32
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
33
return -ENOMEM;
34
35
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
36
(*hyp_memblock_nr_ptr)++;
37
}
38
39
return 0;
40
}
41
42
void __init kvm_hyp_reserve(void)
43
{
44
u64 hyp_mem_pages = 0;
45
int ret;
46
47
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
48
return;
49
50
if (kvm_get_mode() != KVM_MODE_PROTECTED)
51
return;
52
53
ret = register_memblock_regions();
54
if (ret) {
55
*hyp_memblock_nr_ptr = 0;
56
kvm_err("Failed to register hyp memblocks: %d\n", ret);
57
return;
58
}
59
60
hyp_mem_pages += hyp_s1_pgtable_pages();
61
hyp_mem_pages += host_s2_pgtable_pages();
62
hyp_mem_pages += hyp_vm_table_pages();
63
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
64
hyp_mem_pages += pkvm_selftest_pages();
65
hyp_mem_pages += hyp_ffa_proxy_pages();
66
67
/*
68
* Try to allocate a PMD-aligned region to reduce TLB pressure once
69
* this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
70
*/
71
hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
72
hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
73
PMD_SIZE);
74
if (!hyp_mem_base)
75
hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
76
else
77
hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
78
79
if (!hyp_mem_base) {
80
kvm_err("Failed to reserve hyp memory\n");
81
return;
82
}
83
84
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
85
hyp_mem_base);
86
}
87
88
static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
89
{
90
if (host_kvm->arch.pkvm.handle) {
91
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
92
host_kvm->arch.pkvm.handle));
93
}
94
95
host_kvm->arch.pkvm.handle = 0;
96
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
97
free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
98
}
99
100
static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
101
{
102
size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
103
pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
104
void *hyp_vcpu;
105
int ret;
106
107
vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
108
109
hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
110
if (!hyp_vcpu)
111
return -ENOMEM;
112
113
ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
114
if (!ret)
115
vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
116
else
117
free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
118
119
return ret;
120
}
121
122
/*
123
* Allocates and donates memory for hypervisor VM structs at EL2.
124
*
125
* Allocates space for the VM state, which includes the hyp vm as well as
126
* the hyp vcpus.
127
*
128
* Stores an opaque handler in the kvm struct for future reference.
129
*
130
* Return 0 on success, negative error code on failure.
131
*/
132
static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
133
{
134
size_t pgd_sz, hyp_vm_sz;
135
void *pgd, *hyp_vm;
136
int ret;
137
138
if (host_kvm->created_vcpus < 1)
139
return -EINVAL;
140
141
pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
142
143
/*
144
* The PGD pages will be reclaimed using a hyp_memcache which implies
145
* page granularity. So, use alloc_pages_exact() to get individual
146
* refcounts.
147
*/
148
pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
149
if (!pgd)
150
return -ENOMEM;
151
152
/* Allocate memory to donate to hyp for vm and vcpu pointers. */
153
hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
154
size_mul(sizeof(void *),
155
host_kvm->created_vcpus)));
156
hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
157
if (!hyp_vm) {
158
ret = -ENOMEM;
159
goto free_pgd;
160
}
161
162
/* Donate the VM memory to hyp and let hyp initialize it. */
163
ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
164
if (ret < 0)
165
goto free_vm;
166
167
host_kvm->arch.pkvm.handle = ret;
168
host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
169
kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
170
171
return 0;
172
free_vm:
173
free_pages_exact(hyp_vm, hyp_vm_sz);
174
free_pgd:
175
free_pages_exact(pgd, pgd_sz);
176
return ret;
177
}
178
179
int pkvm_create_hyp_vm(struct kvm *host_kvm)
180
{
181
int ret = 0;
182
183
mutex_lock(&host_kvm->arch.config_lock);
184
if (!host_kvm->arch.pkvm.handle)
185
ret = __pkvm_create_hyp_vm(host_kvm);
186
mutex_unlock(&host_kvm->arch.config_lock);
187
188
return ret;
189
}
190
191
int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
192
{
193
int ret = 0;
194
195
mutex_lock(&vcpu->kvm->arch.config_lock);
196
if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
197
ret = __pkvm_create_hyp_vcpu(vcpu);
198
mutex_unlock(&vcpu->kvm->arch.config_lock);
199
200
return ret;
201
}
202
203
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
204
{
205
mutex_lock(&host_kvm->arch.config_lock);
206
__pkvm_destroy_hyp_vm(host_kvm);
207
mutex_unlock(&host_kvm->arch.config_lock);
208
}
209
210
int pkvm_init_host_vm(struct kvm *host_kvm)
211
{
212
return 0;
213
}
214
215
static void __init _kvm_host_prot_finalize(void *arg)
216
{
217
int *err = arg;
218
219
if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
220
WRITE_ONCE(*err, -EINVAL);
221
}
222
223
static int __init pkvm_drop_host_privileges(void)
224
{
225
int ret = 0;
226
227
/*
228
* Flip the static key upfront as that may no longer be possible
229
* once the host stage 2 is installed.
230
*/
231
static_branch_enable(&kvm_protected_mode_initialized);
232
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
233
return ret;
234
}
235
236
static int __init finalize_pkvm(void)
237
{
238
int ret;
239
240
if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
241
return 0;
242
243
/*
244
* Exclude HYP sections from kmemleak so that they don't get peeked
245
* at, which would end badly once inaccessible.
246
*/
247
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
248
kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
249
kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
250
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
251
252
ret = pkvm_drop_host_privileges();
253
if (ret)
254
pr_err("Failed to finalize Hyp protection: %d\n", ret);
255
256
return ret;
257
}
258
device_initcall_sync(finalize_pkvm);
259
260
static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
261
{
262
return m->gfn * PAGE_SIZE;
263
}
264
265
static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
266
{
267
return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
268
}
269
270
INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
271
__pkvm_mapping_start, __pkvm_mapping_end, static,
272
pkvm_mapping);
273
274
/*
275
* __tmp is updated to iter_first(pkvm_mappings) *before* entering the body of the loop to allow
276
* freeing of __map inline.
277
*/
278
#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
279
for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \
280
__start, __end - 1); \
281
__tmp && ({ \
282
__map = __tmp; \
283
__tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \
284
true; \
285
}); \
286
)
287
288
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
289
struct kvm_pgtable_mm_ops *mm_ops)
290
{
291
pgt->pkvm_mappings = RB_ROOT_CACHED;
292
pgt->mmu = mmu;
293
294
return 0;
295
}
296
297
static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
298
{
299
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
300
pkvm_handle_t handle = kvm->arch.pkvm.handle;
301
struct pkvm_mapping *mapping;
302
int ret;
303
304
if (!handle)
305
return 0;
306
307
for_each_mapping_in_range_safe(pgt, start, end, mapping) {
308
ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
309
mapping->nr_pages);
310
if (WARN_ON(ret))
311
return ret;
312
pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
313
kfree(mapping);
314
}
315
316
return 0;
317
}
318
319
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
320
u64 addr, u64 size)
321
{
322
__pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
323
}
324
325
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
326
{
327
/* Expected to be called after all pKVM mappings have been released. */
328
WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
329
}
330
331
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
332
u64 phys, enum kvm_pgtable_prot prot,
333
void *mc, enum kvm_pgtable_walk_flags flags)
334
{
335
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
336
struct pkvm_mapping *mapping = NULL;
337
struct kvm_hyp_memcache *cache = mc;
338
u64 gfn = addr >> PAGE_SHIFT;
339
u64 pfn = phys >> PAGE_SHIFT;
340
int ret;
341
342
if (size != PAGE_SIZE && size != PMD_SIZE)
343
return -EINVAL;
344
345
lockdep_assert_held_write(&kvm->mmu_lock);
346
347
/*
348
* Calling stage2_map() on top of existing mappings is either happening because of a race
349
* with another vCPU, or because we're changing between page and block mappings. As per
350
* user_mem_abort(), same-size permission faults are handled in the relax_perms() path.
351
*/
352
mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1);
353
if (mapping) {
354
if (size == (mapping->nr_pages * PAGE_SIZE))
355
return -EAGAIN;
356
357
/* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
358
ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
359
if (ret)
360
return ret;
361
mapping = NULL;
362
}
363
364
ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot);
365
if (WARN_ON(ret))
366
return ret;
367
368
swap(mapping, cache->mapping);
369
mapping->gfn = gfn;
370
mapping->pfn = pfn;
371
mapping->nr_pages = size / PAGE_SIZE;
372
pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
373
374
return ret;
375
}
376
377
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
378
{
379
lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
380
381
return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
382
}
383
384
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
385
{
386
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
387
pkvm_handle_t handle = kvm->arch.pkvm.handle;
388
struct pkvm_mapping *mapping;
389
int ret = 0;
390
391
lockdep_assert_held(&kvm->mmu_lock);
392
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
393
ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
394
mapping->nr_pages);
395
if (WARN_ON(ret))
396
break;
397
}
398
399
return ret;
400
}
401
402
int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
403
{
404
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
405
struct pkvm_mapping *mapping;
406
407
lockdep_assert_held(&kvm->mmu_lock);
408
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
409
__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
410
PAGE_SIZE * mapping->nr_pages);
411
412
return 0;
413
}
414
415
bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
416
{
417
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
418
pkvm_handle_t handle = kvm->arch.pkvm.handle;
419
struct pkvm_mapping *mapping;
420
bool young = false;
421
422
lockdep_assert_held(&kvm->mmu_lock);
423
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
424
young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
425
mapping->nr_pages, mkold);
426
427
return young;
428
}
429
430
int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
431
enum kvm_pgtable_walk_flags flags)
432
{
433
return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
434
}
435
436
void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
437
enum kvm_pgtable_walk_flags flags)
438
{
439
WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
440
}
441
442
void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
443
{
444
WARN_ON_ONCE(1);
445
}
446
447
kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
448
enum kvm_pgtable_prot prot, void *mc, bool force_pte)
449
{
450
WARN_ON_ONCE(1);
451
return NULL;
452
}
453
454
int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
455
struct kvm_mmu_memory_cache *mc)
456
{
457
WARN_ON_ONCE(1);
458
return -EINVAL;
459
}
460
461