Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kvm/mmu/page_track.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Support KVM gust page tracking
4
*
5
* This feature allows us to track page access in guest. Currently, only
6
* write access is tracked.
7
*
8
* Copyright(C) 2015 Intel Corporation.
9
*
10
* Author:
11
* Xiao Guangrong <[email protected]>
12
*/
13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15
#include <linux/lockdep.h>
16
#include <linux/kvm_host.h>
17
#include <linux/rculist.h>
18
19
#include "mmu.h"
20
#include "mmu_internal.h"
21
#include "page_track.h"
22
23
static bool kvm_external_write_tracking_enabled(struct kvm *kvm)
24
{
25
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
26
/*
27
* Read external_write_tracking_enabled before related pointers. Pairs
28
* with the smp_store_release in kvm_page_track_write_tracking_enable().
29
*/
30
return smp_load_acquire(&kvm->arch.external_write_tracking_enabled);
31
#else
32
return false;
33
#endif
34
}
35
36
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
37
{
38
return kvm_external_write_tracking_enabled(kvm) ||
39
kvm_shadow_root_allocated(kvm) || !tdp_enabled;
40
}
41
42
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
43
{
44
vfree(slot->arch.gfn_write_track);
45
slot->arch.gfn_write_track = NULL;
46
}
47
48
static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
49
unsigned long npages)
50
{
51
const size_t size = sizeof(*slot->arch.gfn_write_track);
52
53
if (!slot->arch.gfn_write_track)
54
slot->arch.gfn_write_track = __vcalloc(npages, size,
55
GFP_KERNEL_ACCOUNT);
56
57
return slot->arch.gfn_write_track ? 0 : -ENOMEM;
58
}
59
60
int kvm_page_track_create_memslot(struct kvm *kvm,
61
struct kvm_memory_slot *slot,
62
unsigned long npages)
63
{
64
if (!kvm_page_track_write_tracking_enabled(kvm))
65
return 0;
66
67
return __kvm_page_track_write_tracking_alloc(slot, npages);
68
}
69
70
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
71
{
72
return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
73
}
74
75
static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
76
short count)
77
{
78
int index, val;
79
80
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
81
82
val = slot->arch.gfn_write_track[index];
83
84
if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
85
return;
86
87
slot->arch.gfn_write_track[index] += count;
88
}
89
90
void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
91
gfn_t gfn)
92
{
93
lockdep_assert_held_write(&kvm->mmu_lock);
94
95
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
96
srcu_read_lock_held(&kvm->srcu));
97
98
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
99
return;
100
101
update_gfn_write_track(slot, gfn, 1);
102
103
/*
104
* new track stops large page mapping for the
105
* tracked page.
106
*/
107
kvm_mmu_gfn_disallow_lpage(slot, gfn);
108
109
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
110
kvm_flush_remote_tlbs(kvm);
111
}
112
113
void __kvm_write_track_remove_gfn(struct kvm *kvm,
114
struct kvm_memory_slot *slot, gfn_t gfn)
115
{
116
lockdep_assert_held_write(&kvm->mmu_lock);
117
118
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
119
srcu_read_lock_held(&kvm->srcu));
120
121
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
122
return;
123
124
update_gfn_write_track(slot, gfn, -1);
125
126
/*
127
* allow large page mapping for the tracked page
128
* after the tracker is gone.
129
*/
130
kvm_mmu_gfn_allow_lpage(slot, gfn);
131
}
132
133
/*
134
* check if the corresponding access on the specified guest page is tracked.
135
*/
136
bool kvm_gfn_is_write_tracked(struct kvm *kvm,
137
const struct kvm_memory_slot *slot, gfn_t gfn)
138
{
139
int index;
140
141
if (!slot)
142
return false;
143
144
if (!kvm_page_track_write_tracking_enabled(kvm))
145
return false;
146
147
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
148
return !!READ_ONCE(slot->arch.gfn_write_track[index]);
149
}
150
151
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
152
void kvm_page_track_cleanup(struct kvm *kvm)
153
{
154
struct kvm_page_track_notifier_head *head;
155
156
head = &kvm->arch.track_notifier_head;
157
cleanup_srcu_struct(&head->track_srcu);
158
}
159
160
int kvm_page_track_init(struct kvm *kvm)
161
{
162
struct kvm_page_track_notifier_head *head;
163
164
head = &kvm->arch.track_notifier_head;
165
INIT_HLIST_HEAD(&head->track_notifier_list);
166
return init_srcu_struct(&head->track_srcu);
167
}
168
169
static int kvm_enable_external_write_tracking(struct kvm *kvm)
170
{
171
struct kvm_memslots *slots;
172
struct kvm_memory_slot *slot;
173
int r = 0, i, bkt;
174
175
if (kvm->arch.vm_type == KVM_X86_TDX_VM)
176
return -EOPNOTSUPP;
177
178
mutex_lock(&kvm->slots_arch_lock);
179
180
/*
181
* Check for *any* write tracking user (not just external users) under
182
* lock. This avoids unnecessary work, e.g. if KVM itself is using
183
* write tracking, or if two external users raced when registering.
184
*/
185
if (kvm_page_track_write_tracking_enabled(kvm))
186
goto out_success;
187
188
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
189
slots = __kvm_memslots(kvm, i);
190
kvm_for_each_memslot(slot, bkt, slots) {
191
/*
192
* Intentionally do NOT free allocations on failure to
193
* avoid having to track which allocations were made
194
* now versus when the memslot was created. The
195
* metadata is guaranteed to be freed when the slot is
196
* freed, and will be kept/used if userspace retries
197
* the failed ioctl() instead of killing the VM.
198
*/
199
r = kvm_page_track_write_tracking_alloc(slot);
200
if (r)
201
goto out_unlock;
202
}
203
}
204
205
out_success:
206
/*
207
* Ensure that external_write_tracking_enabled becomes true strictly
208
* after all the related pointers are set.
209
*/
210
smp_store_release(&kvm->arch.external_write_tracking_enabled, true);
211
out_unlock:
212
mutex_unlock(&kvm->slots_arch_lock);
213
return r;
214
}
215
216
/*
217
* register the notifier so that event interception for the tracked guest
218
* pages can be received.
219
*/
220
int kvm_page_track_register_notifier(struct kvm *kvm,
221
struct kvm_page_track_notifier_node *n)
222
{
223
struct kvm_page_track_notifier_head *head;
224
int r;
225
226
if (!kvm || kvm->mm != current->mm)
227
return -ESRCH;
228
229
if (!kvm_external_write_tracking_enabled(kvm)) {
230
r = kvm_enable_external_write_tracking(kvm);
231
if (r)
232
return r;
233
}
234
235
kvm_get_kvm(kvm);
236
237
head = &kvm->arch.track_notifier_head;
238
239
write_lock(&kvm->mmu_lock);
240
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
241
write_unlock(&kvm->mmu_lock);
242
return 0;
243
}
244
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
245
246
/*
247
* stop receiving the event interception. It is the opposed operation of
248
* kvm_page_track_register_notifier().
249
*/
250
void kvm_page_track_unregister_notifier(struct kvm *kvm,
251
struct kvm_page_track_notifier_node *n)
252
{
253
struct kvm_page_track_notifier_head *head;
254
255
head = &kvm->arch.track_notifier_head;
256
257
write_lock(&kvm->mmu_lock);
258
hlist_del_rcu(&n->node);
259
write_unlock(&kvm->mmu_lock);
260
synchronize_srcu(&head->track_srcu);
261
262
kvm_put_kvm(kvm);
263
}
264
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
265
266
/*
267
* Notify the node that write access is intercepted and write emulation is
268
* finished at this time.
269
*
270
* The node should figure out if the written page is the one that node is
271
* interested in by itself.
272
*/
273
void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
274
{
275
struct kvm_page_track_notifier_head *head;
276
struct kvm_page_track_notifier_node *n;
277
int idx;
278
279
head = &kvm->arch.track_notifier_head;
280
281
if (hlist_empty(&head->track_notifier_list))
282
return;
283
284
idx = srcu_read_lock(&head->track_srcu);
285
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
286
srcu_read_lock_held(&head->track_srcu))
287
if (n->track_write)
288
n->track_write(gpa, new, bytes, n);
289
srcu_read_unlock(&head->track_srcu, idx);
290
}
291
292
/*
293
* Notify external page track nodes that a memory region is being removed from
294
* the VM, e.g. so that users can free any associated metadata.
295
*/
296
void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
297
{
298
struct kvm_page_track_notifier_head *head;
299
struct kvm_page_track_notifier_node *n;
300
int idx;
301
302
head = &kvm->arch.track_notifier_head;
303
304
if (hlist_empty(&head->track_notifier_list))
305
return;
306
307
idx = srcu_read_lock(&head->track_srcu);
308
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
309
srcu_read_lock_held(&head->track_srcu))
310
if (n->track_remove_region)
311
n->track_remove_region(slot->base_gfn, slot->npages, n);
312
srcu_read_unlock(&head->track_srcu, idx);
313
}
314
315
/*
316
* add guest page to the tracking pool so that corresponding access on that
317
* page will be intercepted.
318
*
319
* @kvm: the guest instance we are interested in.
320
* @gfn: the guest page.
321
*/
322
int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
323
{
324
struct kvm_memory_slot *slot;
325
int idx;
326
327
idx = srcu_read_lock(&kvm->srcu);
328
329
slot = gfn_to_memslot(kvm, gfn);
330
if (!slot) {
331
srcu_read_unlock(&kvm->srcu, idx);
332
return -EINVAL;
333
}
334
335
write_lock(&kvm->mmu_lock);
336
__kvm_write_track_add_gfn(kvm, slot, gfn);
337
write_unlock(&kvm->mmu_lock);
338
339
srcu_read_unlock(&kvm->srcu, idx);
340
341
return 0;
342
}
343
EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
344
345
/*
346
* remove the guest page from the tracking pool which stops the interception
347
* of corresponding access on that page.
348
*
349
* @kvm: the guest instance we are interested in.
350
* @gfn: the guest page.
351
*/
352
int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
353
{
354
struct kvm_memory_slot *slot;
355
int idx;
356
357
idx = srcu_read_lock(&kvm->srcu);
358
359
slot = gfn_to_memslot(kvm, gfn);
360
if (!slot) {
361
srcu_read_unlock(&kvm->srcu, idx);
362
return -EINVAL;
363
}
364
365
write_lock(&kvm->mmu_lock);
366
__kvm_write_track_remove_gfn(kvm, slot, gfn);
367
write_unlock(&kvm->mmu_lock);
368
369
srcu_read_unlock(&kvm->srcu, idx);
370
371
return 0;
372
}
373
EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
374
#endif
375
376