CoCalc -- sgx.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kvm/vmx/sgx.c
⁵¹⁶⁰³ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*  Copyright(c) 2021 Intel Corporation. */
3
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
4

5
#include <asm/msr.h>
6
#include <asm/sgx.h>
7

8
#include "x86.h"
9
#include "kvm_cache_regs.h"
10
#include "nested.h"
11
#include "sgx.h"
12
#include "vmx.h"
13

14
bool __read_mostly enable_sgx = 1;
15
module_param_named(sgx, enable_sgx, bool, 0444);
16

17
/* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
18
static u64 sgx_pubkey_hash[4] __ro_after_init;
19

20
/*
21
 * ENCLS's memory operands use a fixed segment (DS) and a fixed
22
 * address size based on the mode.  Related prefixes are ignored.
23
 */
24
static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
25
			     int size, int alignment, gva_t *gva)
26
{
27
	struct kvm_segment s;
28
	bool fault;
29

30
	/* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
31
	*gva = offset;
32
	if (!is_64_bit_mode(vcpu)) {
33
		vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
34
		*gva += s.base;
35
	}
36

37
	if (!IS_ALIGNED(*gva, alignment)) {
38
		fault = true;
39
	} else if (likely(is_64_bit_mode(vcpu))) {
40
		*gva = vmx_get_untagged_addr(vcpu, *gva, 0);
41
		fault = is_noncanonical_address(*gva, vcpu, 0);
42
	} else {
43
		*gva &= 0xffffffff;
44
		fault = (s.unusable) ||
45
			(s.type != 2 && s.type != 3) ||
46
			(*gva > s.limit) ||
47
			((s.base != 0 || s.limit != 0xffffffff) &&
48
			(((u64)*gva + size - 1) > s.limit + 1));
49
	}
50
	if (fault)
51
		kvm_inject_gp(vcpu, 0);
52
	return fault ? -EINVAL : 0;
53
}
54

55
static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
56
					 unsigned int size)
57
{
58
	uint64_t data[2] = { addr, size };
59

60
	__kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
61
}
62

63
static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
64
			unsigned int size)
65
{
66
	if (__copy_from_user(data, (void __user *)hva, size)) {
67
		sgx_handle_emulation_failure(vcpu, hva, size);
68
		return -EFAULT;
69
	}
70

71
	return 0;
72
}
73

74
static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
75
			  gpa_t *gpa)
76
{
77
	struct x86_exception ex;
78

79
	if (write)
80
		*gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
81
	else
82
		*gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
83

84
	if (*gpa == INVALID_GPA) {
85
		kvm_inject_emulated_page_fault(vcpu, &ex);
86
		return -EFAULT;
87
	}
88

89
	return 0;
90
}
91

92
static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
93
{
94
	*hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
95
	if (kvm_is_error_hva(*hva)) {
96
		sgx_handle_emulation_failure(vcpu, gpa, 1);
97
		return -EFAULT;
98
	}
99

100
	*hva |= gpa & ~PAGE_MASK;
101

102
	return 0;
103
}
104

105
static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
106
{
107
	struct x86_exception ex;
108

109
	/*
110
	 * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
111
	 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
112
	 * but the error code isn't (yet) plumbed through the ENCLS helpers.
113
	 */
114
	if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
115
		kvm_prepare_emulation_failure_exit(vcpu);
116
		return 0;
117
	}
118

119
	/*
120
	 * If the guest thinks it's running on SGX2 hardware, inject an SGX
121
	 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
122
	 * #PF on SGX2).  The assumption is that EPCM faults are much more
123
	 * likely than a bad userspace address.
124
	 */
125
	if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
126
	    guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2)) {
127
		memset(&ex, 0, sizeof(ex));
128
		ex.vector = PF_VECTOR;
129
		ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
130
				PFERR_SGX_MASK;
131
		ex.address = gva;
132
		ex.error_code_valid = true;
133
		ex.nested_page_fault = false;
134
		kvm_inject_emulated_page_fault(vcpu, &ex);
135
	} else {
136
		kvm_inject_gp(vcpu, 0);
137
	}
138
	return 1;
139
}
140

141
static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
142
				  struct sgx_pageinfo *pageinfo,
143
				  unsigned long secs_hva,
144
				  gva_t secs_gva)
145
{
146
	struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
147
	struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
148
	u64 attributes, xfrm, size;
149
	u32 miscselect;
150
	u8 max_size_log2;
151
	int trapnr, ret;
152

153
	sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
154
	sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
155
	if (!sgx_12_0 || !sgx_12_1) {
156
		kvm_prepare_emulation_failure_exit(vcpu);
157
		return 0;
158
	}
159

160
	miscselect = contents->miscselect;
161
	attributes = contents->attributes;
162
	xfrm = contents->xfrm;
163
	size = contents->size;
164

165
	/* Enforce restriction of access to the PROVISIONKEY. */
166
	if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
167
	    (attributes & SGX_ATTR_PROVISIONKEY)) {
168
		if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
169
			pr_warn_once("SGX PROVISIONKEY advertised but not allowed\n");
170
		kvm_inject_gp(vcpu, 0);
171
		return 1;
172
	}
173

174
	/*
175
	 * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM.  Note
176
	 * that the allowed XFRM (XFeature Request Mask) isn't strictly bound
177
	 * by the supported XCR0.  FP+SSE *must* be set in XFRM, even if XSAVE
178
	 * is unsupported, i.e. even if XCR0 itself is completely unsupported.
179
	 */
180
	if ((u32)miscselect & ~sgx_12_0->ebx ||
181
	    (u32)attributes & ~sgx_12_1->eax ||
182
	    (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
183
	    (u32)xfrm & ~sgx_12_1->ecx ||
184
	    (u32)(xfrm >> 32) & ~sgx_12_1->edx ||
185
	    xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) ||
186
	    (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
187
		kvm_inject_gp(vcpu, 0);
188
		return 1;
189
	}
190

191
	/* Enforce CPUID restriction on max enclave size. */
192
	max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
193
							    sgx_12_0->edx;
194
	if (size >= BIT_ULL(max_size_log2)) {
195
		kvm_inject_gp(vcpu, 0);
196
		return 1;
197
	}
198

199
	/*
200
	 * sgx_virt_ecreate() returns:
201
	 *  1) 0:	ECREATE was successful
202
	 *  2) -EFAULT:	ECREATE was run but faulted, and trapnr was set to the
203
	 *		exception number.
204
	 *  3) -EINVAL:	access_ok() on @secs_hva failed. This should never
205
	 *		happen as KVM checks host addresses at memslot creation.
206
	 *		sgx_virt_ecreate() has already warned in this case.
207
	 */
208
	ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
209
	if (!ret)
210
		return kvm_skip_emulated_instruction(vcpu);
211
	if (ret == -EFAULT)
212
		return sgx_inject_fault(vcpu, secs_gva, trapnr);
213

214
	return ret;
215
}
216

217
static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
218
{
219
	gva_t pageinfo_gva, secs_gva;
220
	gva_t metadata_gva, contents_gva;
221
	gpa_t metadata_gpa, contents_gpa, secs_gpa;
222
	unsigned long metadata_hva, contents_hva, secs_hva;
223
	struct sgx_pageinfo pageinfo;
224
	struct sgx_secs *contents;
225
	struct x86_exception ex;
226
	int r;
227

228
	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
229
	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
230
		return 1;
231

232
	/*
233
	 * Copy the PAGEINFO to local memory, its pointers need to be
234
	 * translated, i.e. we need to do a deep copy/translate.
235
	 */
236
	r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
237
				sizeof(pageinfo), &ex);
238
	if (r == X86EMUL_PROPAGATE_FAULT) {
239
		kvm_inject_emulated_page_fault(vcpu, &ex);
240
		return 1;
241
	} else if (r != X86EMUL_CONTINUE) {
242
		sgx_handle_emulation_failure(vcpu, pageinfo_gva,
243
					     sizeof(pageinfo));
244
		return 0;
245
	}
246

247
	if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
248
	    sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
249
			      &contents_gva))
250
		return 1;
251

252
	/*
253
	 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
254
	 * Resume the guest on failure to inject a #PF.
255
	 */
256
	if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
257
	    sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
258
	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
259
		return 1;
260

261
	/*
262
	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
263
	 * KVM doesn't have to fully process one address at a time.  Exit to
264
	 * userspace if a GPA is invalid.
265
	 */
266
	if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
267
	    sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
268
	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
269
		return 0;
270

271
	/*
272
	 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
273
	 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
274
	 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
275
	 * enforce restriction of access to the PROVISIONKEY.
276
	 */
277
	contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL);
278
	if (!contents)
279
		return -ENOMEM;
280

281
	/* Exit to userspace if copying from a host userspace address fails. */
282
	if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
283
		free_page((unsigned long)contents);
284
		return 0;
285
	}
286

287
	pageinfo.metadata = metadata_hva;
288
	pageinfo.contents = (u64)contents;
289

290
	r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
291

292
	free_page((unsigned long)contents);
293

294
	return r;
295
}
296

297
static int handle_encls_einit(struct kvm_vcpu *vcpu)
298
{
299
	unsigned long sig_hva, secs_hva, token_hva, rflags;
300
	struct vcpu_vmx *vmx = to_vmx(vcpu);
301
	gva_t sig_gva, secs_gva, token_gva;
302
	gpa_t sig_gpa, secs_gpa, token_gpa;
303
	int ret, trapnr;
304

305
	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
306
	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
307
	    sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
308
		return 1;
309

310
	/*
311
	 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
312
	 * Resume the guest on failure to inject a #PF.
313
	 */
314
	if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
315
	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
316
	    sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
317
		return 1;
318

319
	/*
320
	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
321
	 * KVM doesn't have to fully process one address at a time.  Exit to
322
	 * userspace if a GPA is invalid.  Note, all structures are aligned and
323
	 * cannot split pages.
324
	 */
325
	if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
326
	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
327
	    sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
328
		return 0;
329

330
	ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
331
			     (void __user *)secs_hva,
332
			     vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
333

334
	if (ret == -EFAULT)
335
		return sgx_inject_fault(vcpu, secs_gva, trapnr);
336

337
	/*
338
	 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
339
	 * @token_hva or @secs_hva. This should never happen as KVM checks host
340
	 * addresses at memslot creation. sgx_virt_einit() has already warned
341
	 * in this case, so just return.
342
	 */
343
	if (ret < 0)
344
		return ret;
345

346
	rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
347
					  X86_EFLAGS_AF | X86_EFLAGS_SF |
348
					  X86_EFLAGS_OF);
349
	if (ret)
350
		rflags |= X86_EFLAGS_ZF;
351
	else
352
		rflags &= ~X86_EFLAGS_ZF;
353
	vmx_set_rflags(vcpu, rflags);
354

355
	kvm_rax_write(vcpu, ret);
356
	return kvm_skip_emulated_instruction(vcpu);
357
}
358

359
static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
360
{
361
	/*
362
	 * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will
363
	 * be reached if and only if the SGX1 leafs are enabled.
364
	 */
365
	if (leaf >= ECREATE && leaf <= ETRACK)
366
		return true;
367

368
	if (leaf >= EAUG && leaf <= EMODT)
369
		return guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2);
370

371
	return false;
372
}
373

374
static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
375
{
376
	const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
377

378
	return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
379
}
380

381
int handle_encls(struct kvm_vcpu *vcpu)
382
{
383
	u32 leaf = (u32)kvm_rax_read(vcpu);
384

385
	if (!enable_sgx || !guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) ||
386
	    !guest_cpu_cap_has(vcpu, X86_FEATURE_SGX1)) {
387
		kvm_queue_exception(vcpu, UD_VECTOR);
388
	} else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
389
		   !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
390
		kvm_inject_gp(vcpu, 0);
391
	} else {
392
		if (leaf == ECREATE)
393
			return handle_encls_ecreate(vcpu);
394
		if (leaf == EINIT)
395
			return handle_encls_einit(vcpu);
396
		WARN_ONCE(1, "unexpected exit on ENCLS[%u]", leaf);
397
		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
398
		vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
399
		return 0;
400
	}
401
	return 1;
402
}
403

404
void setup_default_sgx_lepubkeyhash(void)
405
{
406
	/*
407
	 * Use Intel's default value for Skylake hardware if Launch Control is
408
	 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
409
	 * Launch Control is supported and enabled, i.e. mimic the reset value
410
	 * and let the guest write the MSRs at will.  If Launch Control is
411
	 * supported but disabled, then use the current MSR values as the hash
412
	 * MSRs exist but are read-only (locked and not writable).
413
	 */
414
	if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
415
	    rdmsrq_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
416
		sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
417
		sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
418
		sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
419
		sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
420
	} else {
421
		/* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
422
		rdmsrq(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
423
		rdmsrq(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
424
		rdmsrq(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
425
	}
426
}
427

428
void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
429
{
430
	struct vcpu_vmx *vmx = to_vmx(vcpu);
431

432
	memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
433
	       sizeof(sgx_pubkey_hash));
434
}
435

436
/*
437
 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
438
 * restrictions if the guest's allowed-1 settings diverge from hardware.
439
 */
440
static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
441
{
442
	struct kvm_cpuid_entry2 *guest_cpuid;
443
	u32 eax, ebx, ecx, edx;
444

445
	if (!vcpu->kvm->arch.sgx_provisioning_allowed)
446
		return true;
447

448
	guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
449
	if (!guest_cpuid)
450
		return true;
451

452
	cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
453
	if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
454
		return true;
455

456
	guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
457
	if (!guest_cpuid)
458
		return true;
459

460
	cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
461
	if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
462
	    guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
463
		return true;
464

465
	return false;
466
}
467

468
void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
469
{
470
	/*
471
	 * There is no software enable bit for SGX that is virtualized by
472
	 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
473
	 * guest (either by the host or by the guest's BIOS) but enabled in the
474
	 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
475
	 * the expected system behavior for ENCLS.
476
	 */
477
	u64 bitmap = -1ull;
478

479
	/* Nothing to do if hardware doesn't support SGX */
480
	if (!cpu_has_vmx_encls_vmexit())
481
		return;
482

483
	if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) &&
484
	    sgx_enabled_in_guest_bios(vcpu)) {
485
		if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX1)) {
486
			bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
487
			if (sgx_intercept_encls_ecreate(vcpu))
488
				bitmap |= (1 << ECREATE);
489
		}
490

491
		if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2))
492
			bitmap &= ~GENMASK_ULL(EMODT, EAUG);
493

494
		/*
495
		 * Trap and execute EINIT if launch control is enabled in the
496
		 * host using the guest's values for launch control MSRs, even
497
		 * if the guest's values are fixed to hardware default values.
498
		 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
499
		 * the MSRs is extraordinarily expensive.
500
		 */
501
		if (boot_cpu_has(X86_FEATURE_SGX_LC))
502
			bitmap |= (1 << EINIT);
503

504
		if (!vmcs12 && is_guest_mode(vcpu))
505
			vmcs12 = get_vmcs12(vcpu);
506
		if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
507
			bitmap |= vmcs12->encls_exiting_bitmap;
508
	}
509
	vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
510
}
511

512
Product

Resources

Company