Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/sgx/virt.c
26516 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Device driver to expose SGX enclave memory to KVM guests.
4
*
5
* Copyright(c) 2021 Intel Corporation.
6
*/
7
8
#include <linux/miscdevice.h>
9
#include <linux/mm.h>
10
#include <linux/mman.h>
11
#include <linux/sched/mm.h>
12
#include <linux/sched/signal.h>
13
#include <linux/slab.h>
14
#include <linux/xarray.h>
15
#include <asm/sgx.h>
16
#include <uapi/asm/sgx.h>
17
18
#include "encls.h"
19
#include "sgx.h"
20
21
struct sgx_vepc {
22
struct xarray page_array;
23
struct mutex lock;
24
};
25
26
/*
27
* Temporary SECS pages that cannot be EREMOVE'd due to having child in other
28
* virtual EPC instances, and the lock to protect it.
29
*/
30
static struct mutex zombie_secs_pages_lock;
31
static struct list_head zombie_secs_pages;
32
33
static int __sgx_vepc_fault(struct sgx_vepc *vepc,
34
struct vm_area_struct *vma, unsigned long addr)
35
{
36
struct sgx_epc_page *epc_page;
37
unsigned long index, pfn;
38
int ret;
39
40
WARN_ON(!mutex_is_locked(&vepc->lock));
41
42
/* Calculate index of EPC page in virtual EPC's page_array */
43
index = vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start);
44
45
epc_page = xa_load(&vepc->page_array, index);
46
if (epc_page)
47
return 0;
48
49
epc_page = sgx_alloc_epc_page(vepc, false);
50
if (IS_ERR(epc_page))
51
return PTR_ERR(epc_page);
52
53
ret = xa_err(xa_store(&vepc->page_array, index, epc_page, GFP_KERNEL));
54
if (ret)
55
goto err_free;
56
57
pfn = PFN_DOWN(sgx_get_epc_phys_addr(epc_page));
58
59
ret = vmf_insert_pfn(vma, addr, pfn);
60
if (ret != VM_FAULT_NOPAGE) {
61
ret = -EFAULT;
62
goto err_delete;
63
}
64
65
return 0;
66
67
err_delete:
68
xa_erase(&vepc->page_array, index);
69
err_free:
70
sgx_free_epc_page(epc_page);
71
return ret;
72
}
73
74
static vm_fault_t sgx_vepc_fault(struct vm_fault *vmf)
75
{
76
struct vm_area_struct *vma = vmf->vma;
77
struct sgx_vepc *vepc = vma->vm_private_data;
78
int ret;
79
80
mutex_lock(&vepc->lock);
81
ret = __sgx_vepc_fault(vepc, vma, vmf->address);
82
mutex_unlock(&vepc->lock);
83
84
if (!ret)
85
return VM_FAULT_NOPAGE;
86
87
if (ret == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) {
88
mmap_read_unlock(vma->vm_mm);
89
return VM_FAULT_RETRY;
90
}
91
92
return VM_FAULT_SIGBUS;
93
}
94
95
static const struct vm_operations_struct sgx_vepc_vm_ops = {
96
.fault = sgx_vepc_fault,
97
};
98
99
static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma)
100
{
101
struct sgx_vepc *vepc = file->private_data;
102
103
if (!(vma->vm_flags & VM_SHARED))
104
return -EINVAL;
105
106
vma->vm_ops = &sgx_vepc_vm_ops;
107
/* Don't copy VMA in fork() */
108
vm_flags_set(vma, VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY);
109
vma->vm_private_data = vepc;
110
111
return 0;
112
}
113
114
static int sgx_vepc_remove_page(struct sgx_epc_page *epc_page)
115
{
116
/*
117
* Take a previously guest-owned EPC page and return it to the
118
* general EPC page pool.
119
*
120
* Guests can not be trusted to have left this page in a good
121
* state, so run EREMOVE on the page unconditionally. In the
122
* case that a guest properly EREMOVE'd this page, a superfluous
123
* EREMOVE is harmless.
124
*/
125
return __eremove(sgx_get_epc_virt_addr(epc_page));
126
}
127
128
static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
129
{
130
int ret = sgx_vepc_remove_page(epc_page);
131
if (ret) {
132
/*
133
* Only SGX_CHILD_PRESENT is expected, which is because of
134
* EREMOVE'ing an SECS still with child, in which case it can
135
* be handled by EREMOVE'ing the SECS again after all pages in
136
* virtual EPC have been EREMOVE'd. See comments in below in
137
* sgx_vepc_release().
138
*
139
* The user of virtual EPC (KVM) needs to guarantee there's no
140
* logical processor is still running in the enclave in guest,
141
* otherwise EREMOVE will get SGX_ENCLAVE_ACT which cannot be
142
* handled here.
143
*/
144
WARN_ONCE(ret != SGX_CHILD_PRESENT, EREMOVE_ERROR_MESSAGE,
145
ret, ret);
146
return ret;
147
}
148
149
sgx_free_epc_page(epc_page);
150
return 0;
151
}
152
153
static long sgx_vepc_remove_all(struct sgx_vepc *vepc)
154
{
155
struct sgx_epc_page *entry;
156
unsigned long index;
157
long failures = 0;
158
159
xa_for_each(&vepc->page_array, index, entry) {
160
int ret = sgx_vepc_remove_page(entry);
161
if (ret) {
162
if (ret == SGX_CHILD_PRESENT) {
163
/* The page is a SECS, userspace will retry. */
164
failures++;
165
} else {
166
/*
167
* Report errors due to #GP or SGX_ENCLAVE_ACT; do not
168
* WARN, as userspace can induce said failures by
169
* calling the ioctl concurrently on multiple vEPCs or
170
* while one or more CPUs is running the enclave. Only
171
* a #PF on EREMOVE indicates a kernel/hardware issue.
172
*/
173
WARN_ON_ONCE(encls_faulted(ret) &&
174
ENCLS_TRAPNR(ret) != X86_TRAP_GP);
175
return -EBUSY;
176
}
177
}
178
cond_resched();
179
}
180
181
/*
182
* Return the number of SECS pages that failed to be removed, so
183
* userspace knows that it has to retry.
184
*/
185
return failures;
186
}
187
188
static int sgx_vepc_release(struct inode *inode, struct file *file)
189
{
190
struct sgx_vepc *vepc = file->private_data;
191
struct sgx_epc_page *epc_page, *tmp, *entry;
192
unsigned long index;
193
194
LIST_HEAD(secs_pages);
195
196
xa_for_each(&vepc->page_array, index, entry) {
197
/*
198
* Remove all normal, child pages. sgx_vepc_free_page()
199
* will fail if EREMOVE fails, but this is OK and expected on
200
* SECS pages. Those can only be EREMOVE'd *after* all their
201
* child pages. Retries below will clean them up.
202
*/
203
if (sgx_vepc_free_page(entry))
204
continue;
205
206
xa_erase(&vepc->page_array, index);
207
cond_resched();
208
}
209
210
/*
211
* Retry EREMOVE'ing pages. This will clean up any SECS pages that
212
* only had children in this 'epc' area.
213
*/
214
xa_for_each(&vepc->page_array, index, entry) {
215
epc_page = entry;
216
/*
217
* An EREMOVE failure here means that the SECS page still
218
* has children. But, since all children in this 'sgx_vepc'
219
* have been removed, the SECS page must have a child on
220
* another instance.
221
*/
222
if (sgx_vepc_free_page(epc_page))
223
list_add_tail(&epc_page->list, &secs_pages);
224
225
xa_erase(&vepc->page_array, index);
226
cond_resched();
227
}
228
229
/*
230
* SECS pages are "pinned" by child pages, and "unpinned" once all
231
* children have been EREMOVE'd. A child page in this instance
232
* may have pinned an SECS page encountered in an earlier release(),
233
* creating a zombie. Since some children were EREMOVE'd above,
234
* try to EREMOVE all zombies in the hopes that one was unpinned.
235
*/
236
mutex_lock(&zombie_secs_pages_lock);
237
list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
238
/*
239
* Speculatively remove the page from the list of zombies,
240
* if the page is successfully EREMOVE'd it will be added to
241
* the list of free pages. If EREMOVE fails, throw the page
242
* on the local list, which will be spliced on at the end.
243
*/
244
list_del(&epc_page->list);
245
246
if (sgx_vepc_free_page(epc_page))
247
list_add_tail(&epc_page->list, &secs_pages);
248
cond_resched();
249
}
250
251
if (!list_empty(&secs_pages))
252
list_splice_tail(&secs_pages, &zombie_secs_pages);
253
mutex_unlock(&zombie_secs_pages_lock);
254
255
xa_destroy(&vepc->page_array);
256
kfree(vepc);
257
258
return 0;
259
}
260
261
static int sgx_vepc_open(struct inode *inode, struct file *file)
262
{
263
struct sgx_vepc *vepc;
264
265
vepc = kzalloc(sizeof(struct sgx_vepc), GFP_KERNEL);
266
if (!vepc)
267
return -ENOMEM;
268
mutex_init(&vepc->lock);
269
xa_init(&vepc->page_array);
270
271
file->private_data = vepc;
272
273
return 0;
274
}
275
276
static long sgx_vepc_ioctl(struct file *file,
277
unsigned int cmd, unsigned long arg)
278
{
279
struct sgx_vepc *vepc = file->private_data;
280
281
switch (cmd) {
282
case SGX_IOC_VEPC_REMOVE_ALL:
283
if (arg)
284
return -EINVAL;
285
return sgx_vepc_remove_all(vepc);
286
287
default:
288
return -ENOTTY;
289
}
290
}
291
292
static const struct file_operations sgx_vepc_fops = {
293
.owner = THIS_MODULE,
294
.open = sgx_vepc_open,
295
.unlocked_ioctl = sgx_vepc_ioctl,
296
.compat_ioctl = sgx_vepc_ioctl,
297
.release = sgx_vepc_release,
298
.mmap = sgx_vepc_mmap,
299
};
300
301
static struct miscdevice sgx_vepc_dev = {
302
.minor = MISC_DYNAMIC_MINOR,
303
.name = "sgx_vepc",
304
.nodename = "sgx_vepc",
305
.fops = &sgx_vepc_fops,
306
};
307
308
int __init sgx_vepc_init(void)
309
{
310
/* SGX virtualization requires KVM to work */
311
if (!cpu_feature_enabled(X86_FEATURE_VMX))
312
return -ENODEV;
313
314
INIT_LIST_HEAD(&zombie_secs_pages);
315
mutex_init(&zombie_secs_pages_lock);
316
317
return misc_register(&sgx_vepc_dev);
318
}
319
320
/**
321
* sgx_virt_ecreate() - Run ECREATE on behalf of guest
322
* @pageinfo: Pointer to PAGEINFO structure
323
* @secs: Userspace pointer to SECS page
324
* @trapnr: trap number injected to guest in case of ECREATE error
325
*
326
* Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose
327
* of enforcing policies of guest's enclaves, and return the trap number
328
* which should be injected to guest in case of any ECREATE error.
329
*
330
* Return:
331
* - 0: ECREATE was successful.
332
* - <0: on error.
333
*/
334
int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
335
int *trapnr)
336
{
337
int ret;
338
339
/*
340
* @secs is an untrusted, userspace-provided address. It comes from
341
* KVM and is assumed to be a valid pointer which points somewhere in
342
* userspace. This can fault and call SGX or other fault handlers when
343
* userspace mapping @secs doesn't exist.
344
*
345
* Add a WARN() to make sure @secs is already valid userspace pointer
346
* from caller (KVM), who should already have handled invalid pointer
347
* case (for instance, made by malicious guest). All other checks,
348
* such as alignment of @secs, are deferred to ENCLS itself.
349
*/
350
if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE)))
351
return -EINVAL;
352
353
__uaccess_begin();
354
ret = __ecreate(pageinfo, (void *)secs);
355
__uaccess_end();
356
357
if (encls_faulted(ret)) {
358
*trapnr = ENCLS_TRAPNR(ret);
359
return -EFAULT;
360
}
361
362
/* ECREATE doesn't return an error code, it faults or succeeds. */
363
WARN_ON_ONCE(ret);
364
return 0;
365
}
366
EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
367
368
static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
369
void __user *secs)
370
{
371
int ret;
372
373
/*
374
* Make sure all userspace pointers from caller (KVM) are valid.
375
* All other checks deferred to ENCLS itself. Also see comment
376
* for @secs in sgx_virt_ecreate().
377
*/
378
#define SGX_EINITTOKEN_SIZE 304
379
if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) ||
380
!access_ok(token, SGX_EINITTOKEN_SIZE) ||
381
!access_ok(secs, PAGE_SIZE)))
382
return -EINVAL;
383
384
__uaccess_begin();
385
ret = __einit((void *)sigstruct, (void *)token, (void *)secs);
386
__uaccess_end();
387
388
return ret;
389
}
390
391
/**
392
* sgx_virt_einit() - Run EINIT on behalf of guest
393
* @sigstruct: Userspace pointer to SIGSTRUCT structure
394
* @token: Userspace pointer to EINITTOKEN structure
395
* @secs: Userspace pointer to SECS page
396
* @lepubkeyhash: Pointer to guest's *virtual* SGX_LEPUBKEYHASH MSR values
397
* @trapnr: trap number injected to guest in case of EINIT error
398
*
399
* Run EINIT on behalf of guest after KVM traps EINIT. If SGX_LC is available
400
* in host, SGX driver may rewrite the hardware values at wish, therefore KVM
401
* needs to update hardware values to guest's virtual MSR values in order to
402
* ensure EINIT is executed with expected hardware values.
403
*
404
* Return:
405
* - 0: EINIT was successful.
406
* - <0: on error.
407
*/
408
int sgx_virt_einit(void __user *sigstruct, void __user *token,
409
void __user *secs, u64 *lepubkeyhash, int *trapnr)
410
{
411
int ret;
412
413
if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
414
ret = __sgx_virt_einit(sigstruct, token, secs);
415
} else {
416
preempt_disable();
417
418
sgx_update_lepubkeyhash(lepubkeyhash);
419
420
ret = __sgx_virt_einit(sigstruct, token, secs);
421
preempt_enable();
422
}
423
424
/* Propagate up the error from the WARN_ON_ONCE in __sgx_virt_einit() */
425
if (ret == -EINVAL)
426
return ret;
427
428
if (encls_faulted(ret)) {
429
*trapnr = ENCLS_TRAPNR(ret);
430
return -EFAULT;
431
}
432
433
return ret;
434
}
435
EXPORT_SYMBOL_GPL(sgx_virt_einit);
436
437