Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/mm/mem_encrypt_amd.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* AMD Memory Encryption Support
4
*
5
* Copyright (C) 2016-2024 Advanced Micro Devices, Inc.
6
*
7
* Author: Tom Lendacky <[email protected]>
8
*/
9
10
#include <linux/linkage.h>
11
#include <linux/init.h>
12
#include <linux/mm.h>
13
#include <linux/dma-direct.h>
14
#include <linux/swiotlb.h>
15
#include <linux/mem_encrypt.h>
16
#include <linux/device.h>
17
#include <linux/kernel.h>
18
#include <linux/bitops.h>
19
#include <linux/dma-mapping.h>
20
#include <linux/cc_platform.h>
21
22
#include <asm/tlbflush.h>
23
#include <asm/fixmap.h>
24
#include <asm/setup.h>
25
#include <asm/mem_encrypt.h>
26
#include <asm/bootparam.h>
27
#include <asm/set_memory.h>
28
#include <asm/cacheflush.h>
29
#include <asm/processor-flags.h>
30
#include <asm/msr.h>
31
#include <asm/cmdline.h>
32
#include <asm/sev.h>
33
#include <asm/ia32.h>
34
35
#include "mm_internal.h"
36
37
/*
38
* Since SME related variables are set early in the boot process they must
39
* reside in the .data section so as not to be zeroed out when the .bss
40
* section is later cleared.
41
*/
42
u64 sme_me_mask __section(".data") = 0;
43
SYM_PIC_ALIAS(sme_me_mask);
44
u64 sev_status __section(".data") = 0;
45
SYM_PIC_ALIAS(sev_status);
46
u64 sev_check_data __section(".data") = 0;
47
EXPORT_SYMBOL(sme_me_mask);
48
49
/* Buffer used for early in-place encryption by BSP, no locking needed */
50
static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
51
52
/*
53
* SNP-specific routine which needs to additionally change the page state from
54
* private to shared before copying the data from the source to destination and
55
* restore after the copy.
56
*/
57
static inline void __init snp_memcpy(void *dst, void *src, size_t sz,
58
unsigned long paddr, bool decrypt)
59
{
60
unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
61
62
if (decrypt) {
63
/*
64
* @paddr needs to be accessed decrypted, mark the page shared in
65
* the RMP table before copying it.
66
*/
67
early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages);
68
69
memcpy(dst, src, sz);
70
71
/* Restore the page state after the memcpy. */
72
early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages);
73
} else {
74
/*
75
* @paddr need to be accessed encrypted, no need for the page state
76
* change.
77
*/
78
memcpy(dst, src, sz);
79
}
80
}
81
82
/*
83
* This routine does not change the underlying encryption setting of the
84
* page(s) that map this memory. It assumes that eventually the memory is
85
* meant to be accessed as either encrypted or decrypted but the contents
86
* are currently not in the desired state.
87
*
88
* This routine follows the steps outlined in the AMD64 Architecture
89
* Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place.
90
*/
91
static void __init __sme_early_enc_dec(resource_size_t paddr,
92
unsigned long size, bool enc)
93
{
94
void *src, *dst;
95
size_t len;
96
97
if (!sme_me_mask)
98
return;
99
100
wbinvd();
101
102
/*
103
* There are limited number of early mapping slots, so map (at most)
104
* one page at time.
105
*/
106
while (size) {
107
len = min_t(size_t, sizeof(sme_early_buffer), size);
108
109
/*
110
* Create mappings for the current and desired format of
111
* the memory. Use a write-protected mapping for the source.
112
*/
113
src = enc ? early_memremap_decrypted_wp(paddr, len) :
114
early_memremap_encrypted_wp(paddr, len);
115
116
dst = enc ? early_memremap_encrypted(paddr, len) :
117
early_memremap_decrypted(paddr, len);
118
119
/*
120
* If a mapping can't be obtained to perform the operation,
121
* then eventual access of that area in the desired mode
122
* will cause a crash.
123
*/
124
BUG_ON(!src || !dst);
125
126
/*
127
* Use a temporary buffer, of cache-line multiple size, to
128
* avoid data corruption as documented in the APM.
129
*/
130
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
131
snp_memcpy(sme_early_buffer, src, len, paddr, enc);
132
snp_memcpy(dst, sme_early_buffer, len, paddr, !enc);
133
} else {
134
memcpy(sme_early_buffer, src, len);
135
memcpy(dst, sme_early_buffer, len);
136
}
137
138
early_memunmap(dst, len);
139
early_memunmap(src, len);
140
141
paddr += len;
142
size -= len;
143
}
144
}
145
146
void __init sme_early_encrypt(resource_size_t paddr, unsigned long size)
147
{
148
__sme_early_enc_dec(paddr, size, true);
149
}
150
151
void __init sme_early_decrypt(resource_size_t paddr, unsigned long size)
152
{
153
__sme_early_enc_dec(paddr, size, false);
154
}
155
156
static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size,
157
bool map)
158
{
159
unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET;
160
pmdval_t pmd_flags, pmd;
161
162
/* Use early_pmd_flags but remove the encryption mask */
163
pmd_flags = __sme_clr(early_pmd_flags);
164
165
do {
166
pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0;
167
__early_make_pgtable((unsigned long)vaddr, pmd);
168
169
vaddr += PMD_SIZE;
170
paddr += PMD_SIZE;
171
size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE;
172
} while (size);
173
174
flush_tlb_local();
175
}
176
177
void __init sme_unmap_bootdata(char *real_mode_data)
178
{
179
struct boot_params *boot_data;
180
unsigned long cmdline_paddr;
181
182
if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
183
return;
184
185
/* Get the command line address before unmapping the real_mode_data */
186
boot_data = (struct boot_params *)real_mode_data;
187
cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
188
189
__sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false);
190
191
if (!cmdline_paddr)
192
return;
193
194
__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false);
195
}
196
197
void __init sme_map_bootdata(char *real_mode_data)
198
{
199
struct boot_params *boot_data;
200
unsigned long cmdline_paddr;
201
202
if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
203
return;
204
205
__sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true);
206
207
/* Get the command line address after mapping the real_mode_data */
208
boot_data = (struct boot_params *)real_mode_data;
209
cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
210
211
if (!cmdline_paddr)
212
return;
213
214
__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
215
}
216
217
static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
218
{
219
unsigned long pfn = 0;
220
pgprot_t prot;
221
222
switch (level) {
223
case PG_LEVEL_4K:
224
pfn = pte_pfn(*kpte);
225
prot = pte_pgprot(*kpte);
226
break;
227
case PG_LEVEL_2M:
228
pfn = pmd_pfn(*(pmd_t *)kpte);
229
prot = pmd_pgprot(*(pmd_t *)kpte);
230
break;
231
case PG_LEVEL_1G:
232
pfn = pud_pfn(*(pud_t *)kpte);
233
prot = pud_pgprot(*(pud_t *)kpte);
234
break;
235
default:
236
WARN_ONCE(1, "Invalid level for kpte\n");
237
return 0;
238
}
239
240
if (ret_prot)
241
*ret_prot = prot;
242
243
return pfn;
244
}
245
246
static bool amd_enc_tlb_flush_required(bool enc)
247
{
248
return true;
249
}
250
251
static bool amd_enc_cache_flush_required(void)
252
{
253
return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT);
254
}
255
256
static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
257
{
258
#ifdef CONFIG_PARAVIRT
259
unsigned long vaddr_end = vaddr + size;
260
261
while (vaddr < vaddr_end) {
262
int psize, pmask, level;
263
unsigned long pfn;
264
pte_t *kpte;
265
266
kpte = lookup_address(vaddr, &level);
267
if (!kpte || pte_none(*kpte)) {
268
WARN_ONCE(1, "kpte lookup for vaddr\n");
269
return;
270
}
271
272
pfn = pg_level_to_pfn(level, kpte, NULL);
273
if (!pfn)
274
continue;
275
276
psize = page_level_size(level);
277
pmask = page_level_mask(level);
278
279
notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
280
281
vaddr = (vaddr & pmask) + psize;
282
}
283
#endif
284
}
285
286
static int amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
287
{
288
/*
289
* To maintain the security guarantees of SEV-SNP guests, make sure
290
* to invalidate the memory before encryption attribute is cleared.
291
*/
292
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
293
snp_set_memory_shared(vaddr, npages);
294
295
return 0;
296
}
297
298
/* Return true unconditionally: return value doesn't matter for the SEV side */
299
static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
300
{
301
/*
302
* After memory is mapped encrypted in the page table, validate it
303
* so that it is consistent with the page table updates.
304
*/
305
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && enc)
306
snp_set_memory_private(vaddr, npages);
307
308
if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
309
enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc);
310
311
return 0;
312
}
313
314
int prepare_pte_enc(struct pte_enc_desc *d)
315
{
316
pgprot_t old_prot;
317
318
d->pfn = pg_level_to_pfn(d->pte_level, d->kpte, &old_prot);
319
if (!d->pfn)
320
return 1;
321
322
d->new_pgprot = old_prot;
323
if (d->encrypt)
324
pgprot_val(d->new_pgprot) |= _PAGE_ENC;
325
else
326
pgprot_val(d->new_pgprot) &= ~_PAGE_ENC;
327
328
/* If prot is same then do nothing. */
329
if (pgprot_val(old_prot) == pgprot_val(d->new_pgprot))
330
return 1;
331
332
d->pa = d->pfn << PAGE_SHIFT;
333
d->size = page_level_size(d->pte_level);
334
335
/*
336
* In-place en-/decryption and physical page attribute change
337
* from C=1 to C=0 or vice versa will be performed. Flush the
338
* caches to ensure that data gets accessed with the correct
339
* C-bit.
340
*/
341
if (d->va)
342
clflush_cache_range(d->va, d->size);
343
else
344
clflush_cache_range(__va(d->pa), d->size);
345
346
return 0;
347
}
348
349
void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot)
350
{
351
pte_t new_pte;
352
353
/* Change the page encryption mask. */
354
new_pte = pfn_pte(pfn, new_prot);
355
set_pte_atomic(kpte, new_pte);
356
}
357
358
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
359
{
360
struct pte_enc_desc d = {
361
.kpte = kpte,
362
.pte_level = level,
363
.encrypt = enc
364
};
365
366
if (prepare_pte_enc(&d))
367
return;
368
369
/* Encrypt/decrypt the contents in-place */
370
if (enc) {
371
sme_early_encrypt(d.pa, d.size);
372
} else {
373
sme_early_decrypt(d.pa, d.size);
374
375
/*
376
* ON SNP, the page state in the RMP table must happen
377
* before the page table updates.
378
*/
379
early_snp_set_memory_shared((unsigned long)__va(d.pa), d.pa, 1);
380
}
381
382
set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
383
384
/*
385
* If page is set encrypted in the page table, then update the RMP table to
386
* add this page as private.
387
*/
388
if (enc)
389
early_snp_set_memory_private((unsigned long)__va(d.pa), d.pa, 1);
390
}
391
392
static int __init early_set_memory_enc_dec(unsigned long vaddr,
393
unsigned long size, bool enc)
394
{
395
unsigned long vaddr_end, vaddr_next, start;
396
unsigned long psize, pmask;
397
int split_page_size_mask;
398
int level, ret;
399
pte_t *kpte;
400
401
start = vaddr;
402
vaddr_next = vaddr;
403
vaddr_end = vaddr + size;
404
405
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
406
kpte = lookup_address(vaddr, &level);
407
if (!kpte || pte_none(*kpte)) {
408
ret = 1;
409
goto out;
410
}
411
412
if (level == PG_LEVEL_4K) {
413
__set_clr_pte_enc(kpte, level, enc);
414
vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE;
415
continue;
416
}
417
418
psize = page_level_size(level);
419
pmask = page_level_mask(level);
420
421
/*
422
* Check whether we can change the large page in one go.
423
* We request a split when the address is not aligned and
424
* the number of pages to set/clear encryption bit is smaller
425
* than the number of pages in the large page.
426
*/
427
if (vaddr == (vaddr & pmask) &&
428
((vaddr_end - vaddr) >= psize)) {
429
__set_clr_pte_enc(kpte, level, enc);
430
vaddr_next = (vaddr & pmask) + psize;
431
continue;
432
}
433
434
/*
435
* The virtual address is part of a larger page, create the next
436
* level page table mapping (4K or 2M). If it is part of a 2M
437
* page then we request a split of the large page into 4K
438
* chunks. A 1GB large page is split into 2M pages, resp.
439
*/
440
if (level == PG_LEVEL_2M)
441
split_page_size_mask = 0;
442
else
443
split_page_size_mask = 1 << PG_LEVEL_2M;
444
445
/*
446
* kernel_physical_mapping_change() does not flush the TLBs, so
447
* a TLB flush is required after we exit from the for loop.
448
*/
449
kernel_physical_mapping_change(__pa(vaddr & pmask),
450
__pa((vaddr_end & pmask) + psize),
451
split_page_size_mask);
452
}
453
454
ret = 0;
455
456
early_set_mem_enc_dec_hypercall(start, size, enc);
457
out:
458
__flush_tlb_all();
459
return ret;
460
}
461
462
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size)
463
{
464
return early_set_memory_enc_dec(vaddr, size, false);
465
}
466
467
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
468
{
469
return early_set_memory_enc_dec(vaddr, size, true);
470
}
471
472
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
473
{
474
enc_dec_hypercall(vaddr, size, enc);
475
}
476
477
void __init sme_early_init(void)
478
{
479
if (!sme_me_mask)
480
return;
481
482
early_pmd_flags = __sme_set(early_pmd_flags);
483
484
__supported_pte_mask = __sme_set(__supported_pte_mask);
485
486
/* Update the protection map with memory encryption mask */
487
add_encrypt_protection_map();
488
489
x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare;
490
x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
491
x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
492
x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
493
x86_platform.guest.enc_kexec_begin = snp_kexec_begin;
494
x86_platform.guest.enc_kexec_finish = snp_kexec_finish;
495
496
/*
497
* AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
498
* parallel bringup low level code. That raises #VC which cannot be
499
* handled there.
500
* It does not provide a RDMSR GHCB protocol so the early startup
501
* code cannot directly communicate with the secure firmware. The
502
* alternative solution to retrieve the APIC ID via CPUID(0xb),
503
* which is covered by the GHCB protocol, is not viable either
504
* because there is no enforcement of the CPUID(0xb) provided
505
* "initial" APIC ID to be the same as the real APIC ID.
506
* Disable parallel bootup.
507
*/
508
if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
509
x86_cpuinit.parallel_bringup = false;
510
511
/*
512
* The VMM is capable of injecting interrupt 0x80 and triggering the
513
* compatibility syscall path.
514
*
515
* By default, the 32-bit emulation is disabled in order to ensure
516
* the safety of the VM.
517
*/
518
if (sev_status & MSR_AMD64_SEV_ENABLED)
519
ia32_disable();
520
521
/*
522
* Override init functions that scan the ROM region in SEV-SNP guests,
523
* as this memory is not pre-validated and would thus cause a crash.
524
*/
525
if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
526
x86_init.mpparse.find_mptable = x86_init_noop;
527
x86_init.pci.init_irq = x86_init_noop;
528
x86_init.resources.probe_roms = x86_init_noop;
529
530
/*
531
* DMI setup behavior for SEV-SNP guests depends on
532
* efi_enabled(EFI_CONFIG_TABLES), which hasn't been
533
* parsed yet. snp_dmi_setup() will run after that
534
* parsing has happened.
535
*/
536
x86_init.resources.dmi_setup = snp_dmi_setup;
537
}
538
539
/*
540
* Switch the SVSM CA mapping (if active) from identity mapped to
541
* kernel mapped.
542
*/
543
snp_update_svsm_ca();
544
545
if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
546
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
547
}
548
549
void __init mem_encrypt_free_decrypted_mem(void)
550
{
551
unsigned long vaddr, vaddr_end, npages;
552
int r;
553
554
vaddr = (unsigned long)__start_bss_decrypted_unused;
555
vaddr_end = (unsigned long)__end_bss_decrypted;
556
npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
557
558
/*
559
* If the unused memory range was mapped decrypted, change the encryption
560
* attribute from decrypted to encrypted before freeing it. Base the
561
* re-encryption on the same condition used for the decryption in
562
* sme_postprocess_startup(). Higher level abstractions, such as
563
* CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM
564
* using vTOM, where sme_me_mask is always zero.
565
*/
566
if (sme_me_mask) {
567
r = set_memory_encrypted(vaddr, npages);
568
if (r) {
569
pr_warn("failed to free unused decrypted pages\n");
570
return;
571
}
572
}
573
574
free_init_pages("unused decrypted", vaddr, vaddr_end);
575
}
576
577