Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/virt/vmx/tdx/tdx.c
26513 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright(c) 2023 Intel Corporation.
4
*
5
* Intel Trusted Domain Extensions (TDX) support
6
*/
7
8
#include "asm/page_types.h"
9
#define pr_fmt(fmt) "virt/tdx: " fmt
10
11
#include <linux/types.h>
12
#include <linux/cache.h>
13
#include <linux/init.h>
14
#include <linux/errno.h>
15
#include <linux/printk.h>
16
#include <linux/cpu.h>
17
#include <linux/spinlock.h>
18
#include <linux/percpu-defs.h>
19
#include <linux/mutex.h>
20
#include <linux/list.h>
21
#include <linux/memblock.h>
22
#include <linux/memory.h>
23
#include <linux/minmax.h>
24
#include <linux/sizes.h>
25
#include <linux/pfn.h>
26
#include <linux/align.h>
27
#include <linux/sort.h>
28
#include <linux/log2.h>
29
#include <linux/acpi.h>
30
#include <linux/suspend.h>
31
#include <linux/idr.h>
32
#include <asm/page.h>
33
#include <asm/special_insns.h>
34
#include <asm/msr-index.h>
35
#include <asm/msr.h>
36
#include <asm/cpufeature.h>
37
#include <asm/tdx.h>
38
#include <asm/cpu_device_id.h>
39
#include <asm/processor.h>
40
#include <asm/mce.h>
41
#include "tdx.h"
42
43
static u32 tdx_global_keyid __ro_after_init;
44
static u32 tdx_guest_keyid_start __ro_after_init;
45
static u32 tdx_nr_guest_keyids __ro_after_init;
46
47
static DEFINE_IDA(tdx_guest_keyid_pool);
48
49
static DEFINE_PER_CPU(bool, tdx_lp_initialized);
50
51
static struct tdmr_info_list tdx_tdmr_list;
52
53
static enum tdx_module_status_t tdx_module_status;
54
static DEFINE_MUTEX(tdx_module_lock);
55
56
/* All TDX-usable memory regions. Protected by mem_hotplug_lock. */
57
static LIST_HEAD(tdx_memlist);
58
59
static struct tdx_sys_info tdx_sysinfo;
60
61
typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
62
63
static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args)
64
{
65
pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err);
66
}
67
68
static inline void seamcall_err_ret(u64 fn, u64 err,
69
struct tdx_module_args *args)
70
{
71
seamcall_err(fn, err, args);
72
pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n",
73
args->rcx, args->rdx, args->r8);
74
pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n",
75
args->r9, args->r10, args->r11);
76
}
77
78
static __always_inline int sc_retry_prerr(sc_func_t func,
79
sc_err_func_t err_func,
80
u64 fn, struct tdx_module_args *args)
81
{
82
u64 sret = sc_retry(func, fn, args);
83
84
if (sret == TDX_SUCCESS)
85
return 0;
86
87
if (sret == TDX_SEAMCALL_VMFAILINVALID)
88
return -ENODEV;
89
90
if (sret == TDX_SEAMCALL_GP)
91
return -EOPNOTSUPP;
92
93
if (sret == TDX_SEAMCALL_UD)
94
return -EACCES;
95
96
err_func(fn, sret, args);
97
return -EIO;
98
}
99
100
#define seamcall_prerr(__fn, __args) \
101
sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args))
102
103
#define seamcall_prerr_ret(__fn, __args) \
104
sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args))
105
106
/*
107
* Do the module global initialization once and return its result.
108
* It can be done on any cpu. It's always called with interrupts
109
* disabled.
110
*/
111
static int try_init_module_global(void)
112
{
113
struct tdx_module_args args = {};
114
static DEFINE_RAW_SPINLOCK(sysinit_lock);
115
static bool sysinit_done;
116
static int sysinit_ret;
117
118
lockdep_assert_irqs_disabled();
119
120
raw_spin_lock(&sysinit_lock);
121
122
if (sysinit_done)
123
goto out;
124
125
/* RCX is module attributes and all bits are reserved */
126
args.rcx = 0;
127
sysinit_ret = seamcall_prerr(TDH_SYS_INIT, &args);
128
129
/*
130
* The first SEAMCALL also detects the TDX module, thus
131
* it can fail due to the TDX module is not loaded.
132
* Dump message to let the user know.
133
*/
134
if (sysinit_ret == -ENODEV)
135
pr_err("module not loaded\n");
136
137
sysinit_done = true;
138
out:
139
raw_spin_unlock(&sysinit_lock);
140
return sysinit_ret;
141
}
142
143
/**
144
* tdx_cpu_enable - Enable TDX on local cpu
145
*
146
* Do one-time TDX module per-cpu initialization SEAMCALL (and TDX module
147
* global initialization SEAMCALL if not done) on local cpu to make this
148
* cpu be ready to run any other SEAMCALLs.
149
*
150
* Always call this function via IPI function calls.
151
*
152
* Return 0 on success, otherwise errors.
153
*/
154
int tdx_cpu_enable(void)
155
{
156
struct tdx_module_args args = {};
157
int ret;
158
159
if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
160
return -ENODEV;
161
162
lockdep_assert_irqs_disabled();
163
164
if (__this_cpu_read(tdx_lp_initialized))
165
return 0;
166
167
/*
168
* The TDX module global initialization is the very first step
169
* to enable TDX. Need to do it first (if hasn't been done)
170
* before the per-cpu initialization.
171
*/
172
ret = try_init_module_global();
173
if (ret)
174
return ret;
175
176
ret = seamcall_prerr(TDH_SYS_LP_INIT, &args);
177
if (ret)
178
return ret;
179
180
__this_cpu_write(tdx_lp_initialized, true);
181
182
return 0;
183
}
184
EXPORT_SYMBOL_GPL(tdx_cpu_enable);
185
186
/*
187
* Add a memory region as a TDX memory block. The caller must make sure
188
* all memory regions are added in address ascending order and don't
189
* overlap.
190
*/
191
static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn,
192
unsigned long end_pfn, int nid)
193
{
194
struct tdx_memblock *tmb;
195
196
tmb = kmalloc(sizeof(*tmb), GFP_KERNEL);
197
if (!tmb)
198
return -ENOMEM;
199
200
INIT_LIST_HEAD(&tmb->list);
201
tmb->start_pfn = start_pfn;
202
tmb->end_pfn = end_pfn;
203
tmb->nid = nid;
204
205
/* @tmb_list is protected by mem_hotplug_lock */
206
list_add_tail(&tmb->list, tmb_list);
207
return 0;
208
}
209
210
static void free_tdx_memlist(struct list_head *tmb_list)
211
{
212
/* @tmb_list is protected by mem_hotplug_lock */
213
while (!list_empty(tmb_list)) {
214
struct tdx_memblock *tmb = list_first_entry(tmb_list,
215
struct tdx_memblock, list);
216
217
list_del(&tmb->list);
218
kfree(tmb);
219
}
220
}
221
222
/*
223
* Ensure that all memblock memory regions are convertible to TDX
224
* memory. Once this has been established, stash the memblock
225
* ranges off in a secondary structure because memblock is modified
226
* in memory hotplug while TDX memory regions are fixed.
227
*/
228
static int build_tdx_memlist(struct list_head *tmb_list)
229
{
230
unsigned long start_pfn, end_pfn;
231
int i, nid, ret;
232
233
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
234
/*
235
* The first 1MB is not reported as TDX convertible memory.
236
* Although the first 1MB is always reserved and won't end up
237
* to the page allocator, it is still in memblock's memory
238
* regions. Skip them manually to exclude them as TDX memory.
239
*/
240
start_pfn = max(start_pfn, PHYS_PFN(SZ_1M));
241
if (start_pfn >= end_pfn)
242
continue;
243
244
/*
245
* Add the memory regions as TDX memory. The regions in
246
* memblock has already guaranteed they are in address
247
* ascending order and don't overlap.
248
*/
249
ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn, nid);
250
if (ret)
251
goto err;
252
}
253
254
return 0;
255
err:
256
free_tdx_memlist(tmb_list);
257
return ret;
258
}
259
260
static int read_sys_metadata_field(u64 field_id, u64 *data)
261
{
262
struct tdx_module_args args = {};
263
int ret;
264
265
/*
266
* TDH.SYS.RD -- reads one global metadata field
267
* - RDX (in): the field to read
268
* - R8 (out): the field data
269
*/
270
args.rdx = field_id;
271
ret = seamcall_prerr_ret(TDH_SYS_RD, &args);
272
if (ret)
273
return ret;
274
275
*data = args.r8;
276
277
return 0;
278
}
279
280
#include "tdx_global_metadata.c"
281
282
static int check_features(struct tdx_sys_info *sysinfo)
283
{
284
u64 tdx_features0 = sysinfo->features.tdx_features0;
285
286
if (!(tdx_features0 & TDX_FEATURES0_NO_RBP_MOD)) {
287
pr_err("frame pointer (RBP) clobber bug present, upgrade TDX module\n");
288
return -EINVAL;
289
}
290
291
return 0;
292
}
293
294
/* Calculate the actual TDMR size */
295
static int tdmr_size_single(u16 max_reserved_per_tdmr)
296
{
297
int tdmr_sz;
298
299
/*
300
* The actual size of TDMR depends on the maximum
301
* number of reserved areas.
302
*/
303
tdmr_sz = sizeof(struct tdmr_info);
304
tdmr_sz += sizeof(struct tdmr_reserved_area) * max_reserved_per_tdmr;
305
306
return ALIGN(tdmr_sz, TDMR_INFO_ALIGNMENT);
307
}
308
309
static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list,
310
struct tdx_sys_info_tdmr *sysinfo_tdmr)
311
{
312
size_t tdmr_sz, tdmr_array_sz;
313
void *tdmr_array;
314
315
tdmr_sz = tdmr_size_single(sysinfo_tdmr->max_reserved_per_tdmr);
316
tdmr_array_sz = tdmr_sz * sysinfo_tdmr->max_tdmrs;
317
318
/*
319
* To keep things simple, allocate all TDMRs together.
320
* The buffer needs to be physically contiguous to make
321
* sure each TDMR is physically contiguous.
322
*/
323
tdmr_array = alloc_pages_exact(tdmr_array_sz,
324
GFP_KERNEL | __GFP_ZERO);
325
if (!tdmr_array)
326
return -ENOMEM;
327
328
tdmr_list->tdmrs = tdmr_array;
329
330
/*
331
* Keep the size of TDMR to find the target TDMR
332
* at a given index in the TDMR list.
333
*/
334
tdmr_list->tdmr_sz = tdmr_sz;
335
tdmr_list->max_tdmrs = sysinfo_tdmr->max_tdmrs;
336
tdmr_list->nr_consumed_tdmrs = 0;
337
338
return 0;
339
}
340
341
static void free_tdmr_list(struct tdmr_info_list *tdmr_list)
342
{
343
free_pages_exact(tdmr_list->tdmrs,
344
tdmr_list->max_tdmrs * tdmr_list->tdmr_sz);
345
}
346
347
/* Get the TDMR from the list at the given index. */
348
static struct tdmr_info *tdmr_entry(struct tdmr_info_list *tdmr_list,
349
int idx)
350
{
351
int tdmr_info_offset = tdmr_list->tdmr_sz * idx;
352
353
return (void *)tdmr_list->tdmrs + tdmr_info_offset;
354
}
355
356
#define TDMR_ALIGNMENT SZ_1G
357
#define TDMR_ALIGN_DOWN(_addr) ALIGN_DOWN((_addr), TDMR_ALIGNMENT)
358
#define TDMR_ALIGN_UP(_addr) ALIGN((_addr), TDMR_ALIGNMENT)
359
360
static inline u64 tdmr_end(struct tdmr_info *tdmr)
361
{
362
return tdmr->base + tdmr->size;
363
}
364
365
/*
366
* Take the memory referenced in @tmb_list and populate the
367
* preallocated @tdmr_list, following all the special alignment
368
* and size rules for TDMR.
369
*/
370
static int fill_out_tdmrs(struct list_head *tmb_list,
371
struct tdmr_info_list *tdmr_list)
372
{
373
struct tdx_memblock *tmb;
374
int tdmr_idx = 0;
375
376
/*
377
* Loop over TDX memory regions and fill out TDMRs to cover them.
378
* To keep it simple, always try to use one TDMR to cover one
379
* memory region.
380
*
381
* In practice TDX supports at least 64 TDMRs. A 2-socket system
382
* typically only consumes less than 10 of those. This code is
383
* dumb and simple and may use more TMDRs than is strictly
384
* required.
385
*/
386
list_for_each_entry(tmb, tmb_list, list) {
387
struct tdmr_info *tdmr = tdmr_entry(tdmr_list, tdmr_idx);
388
u64 start, end;
389
390
start = TDMR_ALIGN_DOWN(PFN_PHYS(tmb->start_pfn));
391
end = TDMR_ALIGN_UP(PFN_PHYS(tmb->end_pfn));
392
393
/*
394
* A valid size indicates the current TDMR has already
395
* been filled out to cover the previous memory region(s).
396
*/
397
if (tdmr->size) {
398
/*
399
* Loop to the next if the current memory region
400
* has already been fully covered.
401
*/
402
if (end <= tdmr_end(tdmr))
403
continue;
404
405
/* Otherwise, skip the already covered part. */
406
if (start < tdmr_end(tdmr))
407
start = tdmr_end(tdmr);
408
409
/*
410
* Create a new TDMR to cover the current memory
411
* region, or the remaining part of it.
412
*/
413
tdmr_idx++;
414
if (tdmr_idx >= tdmr_list->max_tdmrs) {
415
pr_warn("initialization failed: TDMRs exhausted.\n");
416
return -ENOSPC;
417
}
418
419
tdmr = tdmr_entry(tdmr_list, tdmr_idx);
420
}
421
422
tdmr->base = start;
423
tdmr->size = end - start;
424
}
425
426
/* @tdmr_idx is always the index of the last valid TDMR. */
427
tdmr_list->nr_consumed_tdmrs = tdmr_idx + 1;
428
429
/*
430
* Warn early that kernel is about to run out of TDMRs.
431
*
432
* This is an indication that TDMR allocation has to be
433
* reworked to be smarter to not run into an issue.
434
*/
435
if (tdmr_list->max_tdmrs - tdmr_list->nr_consumed_tdmrs < TDMR_NR_WARN)
436
pr_warn("consumed TDMRs reaching limit: %d used out of %d\n",
437
tdmr_list->nr_consumed_tdmrs,
438
tdmr_list->max_tdmrs);
439
440
return 0;
441
}
442
443
/*
444
* Calculate PAMT size given a TDMR and a page size. The returned
445
* PAMT size is always aligned up to 4K page boundary.
446
*/
447
static unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz,
448
u16 pamt_entry_size)
449
{
450
unsigned long pamt_sz, nr_pamt_entries;
451
452
switch (pgsz) {
453
case TDX_PS_4K:
454
nr_pamt_entries = tdmr->size >> PAGE_SHIFT;
455
break;
456
case TDX_PS_2M:
457
nr_pamt_entries = tdmr->size >> PMD_SHIFT;
458
break;
459
case TDX_PS_1G:
460
nr_pamt_entries = tdmr->size >> PUD_SHIFT;
461
break;
462
default:
463
WARN_ON_ONCE(1);
464
return 0;
465
}
466
467
pamt_sz = nr_pamt_entries * pamt_entry_size;
468
/* TDX requires PAMT size must be 4K aligned */
469
pamt_sz = ALIGN(pamt_sz, PAGE_SIZE);
470
471
return pamt_sz;
472
}
473
474
/*
475
* Locate a NUMA node which should hold the allocation of the @tdmr
476
* PAMT. This node will have some memory covered by the TDMR. The
477
* relative amount of memory covered is not considered.
478
*/
479
static int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list)
480
{
481
struct tdx_memblock *tmb;
482
483
/*
484
* A TDMR must cover at least part of one TMB. That TMB will end
485
* after the TDMR begins. But, that TMB may have started before
486
* the TDMR. Find the next 'tmb' that _ends_ after this TDMR
487
* begins. Ignore 'tmb' start addresses. They are irrelevant.
488
*/
489
list_for_each_entry(tmb, tmb_list, list) {
490
if (tmb->end_pfn > PHYS_PFN(tdmr->base))
491
return tmb->nid;
492
}
493
494
/*
495
* Fall back to allocating the TDMR's metadata from node 0 when
496
* no TDX memory block can be found. This should never happen
497
* since TDMRs originate from TDX memory blocks.
498
*/
499
pr_warn("TDMR [0x%llx, 0x%llx): unable to find local NUMA node for PAMT allocation, fallback to use node 0.\n",
500
tdmr->base, tdmr_end(tdmr));
501
return 0;
502
}
503
504
/*
505
* Allocate PAMTs from the local NUMA node of some memory in @tmb_list
506
* within @tdmr, and set up PAMTs for @tdmr.
507
*/
508
static int tdmr_set_up_pamt(struct tdmr_info *tdmr,
509
struct list_head *tmb_list,
510
u16 pamt_entry_size[])
511
{
512
unsigned long pamt_base[TDX_PS_NR];
513
unsigned long pamt_size[TDX_PS_NR];
514
unsigned long tdmr_pamt_base;
515
unsigned long tdmr_pamt_size;
516
struct page *pamt;
517
int pgsz, nid;
518
519
nid = tdmr_get_nid(tdmr, tmb_list);
520
521
/*
522
* Calculate the PAMT size for each TDX supported page size
523
* and the total PAMT size.
524
*/
525
tdmr_pamt_size = 0;
526
for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) {
527
pamt_size[pgsz] = tdmr_get_pamt_sz(tdmr, pgsz,
528
pamt_entry_size[pgsz]);
529
tdmr_pamt_size += pamt_size[pgsz];
530
}
531
532
/*
533
* Allocate one chunk of physically contiguous memory for all
534
* PAMTs. This helps minimize the PAMT's use of reserved areas
535
* in overlapped TDMRs.
536
*/
537
pamt = alloc_contig_pages(tdmr_pamt_size >> PAGE_SHIFT, GFP_KERNEL,
538
nid, &node_online_map);
539
if (!pamt)
540
return -ENOMEM;
541
542
/*
543
* Break the contiguous allocation back up into the
544
* individual PAMTs for each page size.
545
*/
546
tdmr_pamt_base = page_to_pfn(pamt) << PAGE_SHIFT;
547
for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) {
548
pamt_base[pgsz] = tdmr_pamt_base;
549
tdmr_pamt_base += pamt_size[pgsz];
550
}
551
552
tdmr->pamt_4k_base = pamt_base[TDX_PS_4K];
553
tdmr->pamt_4k_size = pamt_size[TDX_PS_4K];
554
tdmr->pamt_2m_base = pamt_base[TDX_PS_2M];
555
tdmr->pamt_2m_size = pamt_size[TDX_PS_2M];
556
tdmr->pamt_1g_base = pamt_base[TDX_PS_1G];
557
tdmr->pamt_1g_size = pamt_size[TDX_PS_1G];
558
559
return 0;
560
}
561
562
static void tdmr_get_pamt(struct tdmr_info *tdmr, unsigned long *pamt_base,
563
unsigned long *pamt_size)
564
{
565
unsigned long pamt_bs, pamt_sz;
566
567
/*
568
* The PAMT was allocated in one contiguous unit. The 4K PAMT
569
* should always point to the beginning of that allocation.
570
*/
571
pamt_bs = tdmr->pamt_4k_base;
572
pamt_sz = tdmr->pamt_4k_size + tdmr->pamt_2m_size + tdmr->pamt_1g_size;
573
574
WARN_ON_ONCE((pamt_bs & ~PAGE_MASK) || (pamt_sz & ~PAGE_MASK));
575
576
*pamt_base = pamt_bs;
577
*pamt_size = pamt_sz;
578
}
579
580
static void tdmr_do_pamt_func(struct tdmr_info *tdmr,
581
void (*pamt_func)(unsigned long base, unsigned long size))
582
{
583
unsigned long pamt_base, pamt_size;
584
585
tdmr_get_pamt(tdmr, &pamt_base, &pamt_size);
586
587
/* Do nothing if PAMT hasn't been allocated for this TDMR */
588
if (!pamt_size)
589
return;
590
591
if (WARN_ON_ONCE(!pamt_base))
592
return;
593
594
pamt_func(pamt_base, pamt_size);
595
}
596
597
static void free_pamt(unsigned long pamt_base, unsigned long pamt_size)
598
{
599
free_contig_range(pamt_base >> PAGE_SHIFT, pamt_size >> PAGE_SHIFT);
600
}
601
602
static void tdmr_free_pamt(struct tdmr_info *tdmr)
603
{
604
tdmr_do_pamt_func(tdmr, free_pamt);
605
}
606
607
static void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list)
608
{
609
int i;
610
611
for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
612
tdmr_free_pamt(tdmr_entry(tdmr_list, i));
613
}
614
615
/* Allocate and set up PAMTs for all TDMRs */
616
static int tdmrs_set_up_pamt_all(struct tdmr_info_list *tdmr_list,
617
struct list_head *tmb_list,
618
u16 pamt_entry_size[])
619
{
620
int i, ret = 0;
621
622
for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
623
ret = tdmr_set_up_pamt(tdmr_entry(tdmr_list, i), tmb_list,
624
pamt_entry_size);
625
if (ret)
626
goto err;
627
}
628
629
return 0;
630
err:
631
tdmrs_free_pamt_all(tdmr_list);
632
return ret;
633
}
634
635
/*
636
* Convert TDX private pages back to normal by using MOVDIR64B to
637
* clear these pages. Note this function doesn't flush cache of
638
* these TDX private pages. The caller should make sure of that.
639
*/
640
static void reset_tdx_pages(unsigned long base, unsigned long size)
641
{
642
const void *zero_page = (const void *)page_address(ZERO_PAGE(0));
643
unsigned long phys, end;
644
645
end = base + size;
646
for (phys = base; phys < end; phys += 64)
647
movdir64b(__va(phys), zero_page);
648
649
/*
650
* MOVDIR64B uses WC protocol. Use memory barrier to
651
* make sure any later user of these pages sees the
652
* updated data.
653
*/
654
mb();
655
}
656
657
static void tdmr_reset_pamt(struct tdmr_info *tdmr)
658
{
659
tdmr_do_pamt_func(tdmr, reset_tdx_pages);
660
}
661
662
static void tdmrs_reset_pamt_all(struct tdmr_info_list *tdmr_list)
663
{
664
int i;
665
666
for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
667
tdmr_reset_pamt(tdmr_entry(tdmr_list, i));
668
}
669
670
static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list)
671
{
672
unsigned long pamt_size = 0;
673
int i;
674
675
for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
676
unsigned long base, size;
677
678
tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size);
679
pamt_size += size;
680
}
681
682
return pamt_size / 1024;
683
}
684
685
static int tdmr_add_rsvd_area(struct tdmr_info *tdmr, int *p_idx, u64 addr,
686
u64 size, u16 max_reserved_per_tdmr)
687
{
688
struct tdmr_reserved_area *rsvd_areas = tdmr->reserved_areas;
689
int idx = *p_idx;
690
691
/* Reserved area must be 4K aligned in offset and size */
692
if (WARN_ON(addr & ~PAGE_MASK || size & ~PAGE_MASK))
693
return -EINVAL;
694
695
if (idx >= max_reserved_per_tdmr) {
696
pr_warn("initialization failed: TDMR [0x%llx, 0x%llx): reserved areas exhausted.\n",
697
tdmr->base, tdmr_end(tdmr));
698
return -ENOSPC;
699
}
700
701
/*
702
* Consume one reserved area per call. Make no effort to
703
* optimize or reduce the number of reserved areas which are
704
* consumed by contiguous reserved areas, for instance.
705
*/
706
rsvd_areas[idx].offset = addr - tdmr->base;
707
rsvd_areas[idx].size = size;
708
709
*p_idx = idx + 1;
710
711
return 0;
712
}
713
714
/*
715
* Go through @tmb_list to find holes between memory areas. If any of
716
* those holes fall within @tdmr, set up a TDMR reserved area to cover
717
* the hole.
718
*/
719
static int tdmr_populate_rsvd_holes(struct list_head *tmb_list,
720
struct tdmr_info *tdmr,
721
int *rsvd_idx,
722
u16 max_reserved_per_tdmr)
723
{
724
struct tdx_memblock *tmb;
725
u64 prev_end;
726
int ret;
727
728
/*
729
* Start looking for reserved blocks at the
730
* beginning of the TDMR.
731
*/
732
prev_end = tdmr->base;
733
list_for_each_entry(tmb, tmb_list, list) {
734
u64 start, end;
735
736
start = PFN_PHYS(tmb->start_pfn);
737
end = PFN_PHYS(tmb->end_pfn);
738
739
/* Break if this region is after the TDMR */
740
if (start >= tdmr_end(tdmr))
741
break;
742
743
/* Exclude regions before this TDMR */
744
if (end < tdmr->base)
745
continue;
746
747
/*
748
* Skip over memory areas that
749
* have already been dealt with.
750
*/
751
if (start <= prev_end) {
752
prev_end = end;
753
continue;
754
}
755
756
/* Add the hole before this region */
757
ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end,
758
start - prev_end,
759
max_reserved_per_tdmr);
760
if (ret)
761
return ret;
762
763
prev_end = end;
764
}
765
766
/* Add the hole after the last region if it exists. */
767
if (prev_end < tdmr_end(tdmr)) {
768
ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end,
769
tdmr_end(tdmr) - prev_end,
770
max_reserved_per_tdmr);
771
if (ret)
772
return ret;
773
}
774
775
return 0;
776
}
777
778
/*
779
* Go through @tdmr_list to find all PAMTs. If any of those PAMTs
780
* overlaps with @tdmr, set up a TDMR reserved area to cover the
781
* overlapping part.
782
*/
783
static int tdmr_populate_rsvd_pamts(struct tdmr_info_list *tdmr_list,
784
struct tdmr_info *tdmr,
785
int *rsvd_idx,
786
u16 max_reserved_per_tdmr)
787
{
788
int i, ret;
789
790
for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
791
struct tdmr_info *tmp = tdmr_entry(tdmr_list, i);
792
unsigned long pamt_base, pamt_size, pamt_end;
793
794
tdmr_get_pamt(tmp, &pamt_base, &pamt_size);
795
/* Each TDMR must already have PAMT allocated */
796
WARN_ON_ONCE(!pamt_size || !pamt_base);
797
798
pamt_end = pamt_base + pamt_size;
799
/* Skip PAMTs outside of the given TDMR */
800
if ((pamt_end <= tdmr->base) ||
801
(pamt_base >= tdmr_end(tdmr)))
802
continue;
803
804
/* Only mark the part within the TDMR as reserved */
805
if (pamt_base < tdmr->base)
806
pamt_base = tdmr->base;
807
if (pamt_end > tdmr_end(tdmr))
808
pamt_end = tdmr_end(tdmr);
809
810
ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, pamt_base,
811
pamt_end - pamt_base,
812
max_reserved_per_tdmr);
813
if (ret)
814
return ret;
815
}
816
817
return 0;
818
}
819
820
/* Compare function called by sort() for TDMR reserved areas */
821
static int rsvd_area_cmp_func(const void *a, const void *b)
822
{
823
struct tdmr_reserved_area *r1 = (struct tdmr_reserved_area *)a;
824
struct tdmr_reserved_area *r2 = (struct tdmr_reserved_area *)b;
825
826
if (r1->offset + r1->size <= r2->offset)
827
return -1;
828
if (r1->offset >= r2->offset + r2->size)
829
return 1;
830
831
/* Reserved areas cannot overlap. The caller must guarantee. */
832
WARN_ON_ONCE(1);
833
return -1;
834
}
835
836
/*
837
* Populate reserved areas for the given @tdmr, including memory holes
838
* (via @tmb_list) and PAMTs (via @tdmr_list).
839
*/
840
static int tdmr_populate_rsvd_areas(struct tdmr_info *tdmr,
841
struct list_head *tmb_list,
842
struct tdmr_info_list *tdmr_list,
843
u16 max_reserved_per_tdmr)
844
{
845
int ret, rsvd_idx = 0;
846
847
ret = tdmr_populate_rsvd_holes(tmb_list, tdmr, &rsvd_idx,
848
max_reserved_per_tdmr);
849
if (ret)
850
return ret;
851
852
ret = tdmr_populate_rsvd_pamts(tdmr_list, tdmr, &rsvd_idx,
853
max_reserved_per_tdmr);
854
if (ret)
855
return ret;
856
857
/* TDX requires reserved areas listed in address ascending order */
858
sort(tdmr->reserved_areas, rsvd_idx, sizeof(struct tdmr_reserved_area),
859
rsvd_area_cmp_func, NULL);
860
861
return 0;
862
}
863
864
/*
865
* Populate reserved areas for all TDMRs in @tdmr_list, including memory
866
* holes (via @tmb_list) and PAMTs.
867
*/
868
static int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list,
869
struct list_head *tmb_list,
870
u16 max_reserved_per_tdmr)
871
{
872
int i;
873
874
for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
875
int ret;
876
877
ret = tdmr_populate_rsvd_areas(tdmr_entry(tdmr_list, i),
878
tmb_list, tdmr_list, max_reserved_per_tdmr);
879
if (ret)
880
return ret;
881
}
882
883
return 0;
884
}
885
886
/*
887
* Construct a list of TDMRs on the preallocated space in @tdmr_list
888
* to cover all TDX memory regions in @tmb_list based on the TDX module
889
* TDMR global information in @sysinfo_tdmr.
890
*/
891
static int construct_tdmrs(struct list_head *tmb_list,
892
struct tdmr_info_list *tdmr_list,
893
struct tdx_sys_info_tdmr *sysinfo_tdmr)
894
{
895
u16 pamt_entry_size[TDX_PS_NR] = {
896
sysinfo_tdmr->pamt_4k_entry_size,
897
sysinfo_tdmr->pamt_2m_entry_size,
898
sysinfo_tdmr->pamt_1g_entry_size,
899
};
900
int ret;
901
902
ret = fill_out_tdmrs(tmb_list, tdmr_list);
903
if (ret)
904
return ret;
905
906
ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list, pamt_entry_size);
907
if (ret)
908
return ret;
909
910
ret = tdmrs_populate_rsvd_areas_all(tdmr_list, tmb_list,
911
sysinfo_tdmr->max_reserved_per_tdmr);
912
if (ret)
913
tdmrs_free_pamt_all(tdmr_list);
914
915
/*
916
* The tdmr_info_list is read-only from here on out.
917
* Ensure that these writes are seen by other CPUs.
918
* Pairs with a smp_rmb() in is_pamt_page().
919
*/
920
smp_wmb();
921
922
return ret;
923
}
924
925
static int config_tdx_module(struct tdmr_info_list *tdmr_list, u64 global_keyid)
926
{
927
struct tdx_module_args args = {};
928
u64 *tdmr_pa_array;
929
size_t array_sz;
930
int i, ret;
931
932
/*
933
* TDMRs are passed to the TDX module via an array of physical
934
* addresses of each TDMR. The array itself also has certain
935
* alignment requirement.
936
*/
937
array_sz = tdmr_list->nr_consumed_tdmrs * sizeof(u64);
938
array_sz = roundup_pow_of_two(array_sz);
939
if (array_sz < TDMR_INFO_PA_ARRAY_ALIGNMENT)
940
array_sz = TDMR_INFO_PA_ARRAY_ALIGNMENT;
941
942
tdmr_pa_array = kzalloc(array_sz, GFP_KERNEL);
943
if (!tdmr_pa_array)
944
return -ENOMEM;
945
946
for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
947
tdmr_pa_array[i] = __pa(tdmr_entry(tdmr_list, i));
948
949
args.rcx = __pa(tdmr_pa_array);
950
args.rdx = tdmr_list->nr_consumed_tdmrs;
951
args.r8 = global_keyid;
952
ret = seamcall_prerr(TDH_SYS_CONFIG, &args);
953
954
/* Free the array as it is not required anymore. */
955
kfree(tdmr_pa_array);
956
957
return ret;
958
}
959
960
static int do_global_key_config(void *unused)
961
{
962
struct tdx_module_args args = {};
963
964
return seamcall_prerr(TDH_SYS_KEY_CONFIG, &args);
965
}
966
967
/*
968
* Attempt to configure the global KeyID on all physical packages.
969
*
970
* This requires running code on at least one CPU in each package.
971
* TDMR initialization) will fail will fail if any package in the
972
* system has no online CPUs.
973
*
974
* This code takes no affirmative steps to online CPUs. Callers (aka.
975
* KVM) can ensure success by ensuring sufficient CPUs are online and
976
* can run SEAMCALLs.
977
*/
978
static int config_global_keyid(void)
979
{
980
cpumask_var_t packages;
981
int cpu, ret = -EINVAL;
982
983
if (!zalloc_cpumask_var(&packages, GFP_KERNEL))
984
return -ENOMEM;
985
986
/*
987
* Hardware doesn't guarantee cache coherency across different
988
* KeyIDs. The kernel needs to flush PAMT's dirty cachelines
989
* (associated with KeyID 0) before the TDX module can use the
990
* global KeyID to access the PAMT. Given PAMTs are potentially
991
* large (~1/256th of system RAM), just use WBINVD.
992
*/
993
wbinvd_on_all_cpus();
994
995
for_each_online_cpu(cpu) {
996
/*
997
* The key configuration only needs to be done once per
998
* package and will return an error if configured more
999
* than once. Avoid doing it multiple times per package.
1000
*/
1001
if (cpumask_test_and_set_cpu(topology_physical_package_id(cpu),
1002
packages))
1003
continue;
1004
1005
/*
1006
* TDH.SYS.KEY.CONFIG cannot run concurrently on
1007
* different cpus. Do it one by one.
1008
*/
1009
ret = smp_call_on_cpu(cpu, do_global_key_config, NULL, true);
1010
if (ret)
1011
break;
1012
}
1013
1014
free_cpumask_var(packages);
1015
return ret;
1016
}
1017
1018
static int init_tdmr(struct tdmr_info *tdmr)
1019
{
1020
u64 next;
1021
1022
/*
1023
* Initializing a TDMR can be time consuming. To avoid long
1024
* SEAMCALLs, the TDX module may only initialize a part of the
1025
* TDMR in each call.
1026
*/
1027
do {
1028
struct tdx_module_args args = {
1029
.rcx = tdmr->base,
1030
};
1031
int ret;
1032
1033
ret = seamcall_prerr_ret(TDH_SYS_TDMR_INIT, &args);
1034
if (ret)
1035
return ret;
1036
/*
1037
* RDX contains 'next-to-initialize' address if
1038
* TDH.SYS.TDMR.INIT did not fully complete and
1039
* should be retried.
1040
*/
1041
next = args.rdx;
1042
cond_resched();
1043
/* Keep making SEAMCALLs until the TDMR is done */
1044
} while (next < tdmr->base + tdmr->size);
1045
1046
return 0;
1047
}
1048
1049
static int init_tdmrs(struct tdmr_info_list *tdmr_list)
1050
{
1051
int i;
1052
1053
/*
1054
* This operation is costly. It can be parallelized,
1055
* but keep it simple for now.
1056
*/
1057
for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
1058
int ret;
1059
1060
ret = init_tdmr(tdmr_entry(tdmr_list, i));
1061
if (ret)
1062
return ret;
1063
}
1064
1065
return 0;
1066
}
1067
1068
static int init_tdx_module(void)
1069
{
1070
int ret;
1071
1072
ret = get_tdx_sys_info(&tdx_sysinfo);
1073
if (ret)
1074
return ret;
1075
1076
/* Check whether the kernel can support this module */
1077
ret = check_features(&tdx_sysinfo);
1078
if (ret)
1079
return ret;
1080
1081
/*
1082
* To keep things simple, assume that all TDX-protected memory
1083
* will come from the page allocator. Make sure all pages in the
1084
* page allocator are TDX-usable memory.
1085
*
1086
* Build the list of "TDX-usable" memory regions which cover all
1087
* pages in the page allocator to guarantee that. Do it while
1088
* holding mem_hotplug_lock read-lock as the memory hotplug code
1089
* path reads the @tdx_memlist to reject any new memory.
1090
*/
1091
get_online_mems();
1092
1093
ret = build_tdx_memlist(&tdx_memlist);
1094
if (ret)
1095
goto out_put_tdxmem;
1096
1097
/* Allocate enough space for constructing TDMRs */
1098
ret = alloc_tdmr_list(&tdx_tdmr_list, &tdx_sysinfo.tdmr);
1099
if (ret)
1100
goto err_free_tdxmem;
1101
1102
/* Cover all TDX-usable memory regions in TDMRs */
1103
ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdx_sysinfo.tdmr);
1104
if (ret)
1105
goto err_free_tdmrs;
1106
1107
/* Pass the TDMRs and the global KeyID to the TDX module */
1108
ret = config_tdx_module(&tdx_tdmr_list, tdx_global_keyid);
1109
if (ret)
1110
goto err_free_pamts;
1111
1112
/* Config the key of global KeyID on all packages */
1113
ret = config_global_keyid();
1114
if (ret)
1115
goto err_reset_pamts;
1116
1117
/* Initialize TDMRs to complete the TDX module initialization */
1118
ret = init_tdmrs(&tdx_tdmr_list);
1119
if (ret)
1120
goto err_reset_pamts;
1121
1122
pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list));
1123
1124
out_put_tdxmem:
1125
/*
1126
* @tdx_memlist is written here and read at memory hotplug time.
1127
* Lock out memory hotplug code while building it.
1128
*/
1129
put_online_mems();
1130
return ret;
1131
1132
err_reset_pamts:
1133
/*
1134
* Part of PAMTs may already have been initialized by the
1135
* TDX module. Flush cache before returning PAMTs back
1136
* to the kernel.
1137
*/
1138
wbinvd_on_all_cpus();
1139
/*
1140
* According to the TDX hardware spec, if the platform
1141
* doesn't have the "partial write machine check"
1142
* erratum, any kernel read/write will never cause #MC
1143
* in kernel space, thus it's OK to not convert PAMTs
1144
* back to normal. But do the conversion anyway here
1145
* as suggested by the TDX spec.
1146
*/
1147
tdmrs_reset_pamt_all(&tdx_tdmr_list);
1148
err_free_pamts:
1149
tdmrs_free_pamt_all(&tdx_tdmr_list);
1150
err_free_tdmrs:
1151
free_tdmr_list(&tdx_tdmr_list);
1152
err_free_tdxmem:
1153
free_tdx_memlist(&tdx_memlist);
1154
goto out_put_tdxmem;
1155
}
1156
1157
static int __tdx_enable(void)
1158
{
1159
int ret;
1160
1161
ret = init_tdx_module();
1162
if (ret) {
1163
pr_err("module initialization failed (%d)\n", ret);
1164
tdx_module_status = TDX_MODULE_ERROR;
1165
return ret;
1166
}
1167
1168
pr_info("module initialized\n");
1169
tdx_module_status = TDX_MODULE_INITIALIZED;
1170
1171
return 0;
1172
}
1173
1174
/**
1175
* tdx_enable - Enable TDX module to make it ready to run TDX guests
1176
*
1177
* This function assumes the caller has: 1) held read lock of CPU hotplug
1178
* lock to prevent any new cpu from becoming online; 2) done both VMXON
1179
* and tdx_cpu_enable() on all online cpus.
1180
*
1181
* This function requires there's at least one online cpu for each CPU
1182
* package to succeed.
1183
*
1184
* This function can be called in parallel by multiple callers.
1185
*
1186
* Return 0 if TDX is enabled successfully, otherwise error.
1187
*/
1188
int tdx_enable(void)
1189
{
1190
int ret;
1191
1192
if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
1193
return -ENODEV;
1194
1195
lockdep_assert_cpus_held();
1196
1197
mutex_lock(&tdx_module_lock);
1198
1199
switch (tdx_module_status) {
1200
case TDX_MODULE_UNINITIALIZED:
1201
ret = __tdx_enable();
1202
break;
1203
case TDX_MODULE_INITIALIZED:
1204
/* Already initialized, great, tell the caller. */
1205
ret = 0;
1206
break;
1207
default:
1208
/* Failed to initialize in the previous attempts */
1209
ret = -EINVAL;
1210
break;
1211
}
1212
1213
mutex_unlock(&tdx_module_lock);
1214
1215
return ret;
1216
}
1217
EXPORT_SYMBOL_GPL(tdx_enable);
1218
1219
static bool is_pamt_page(unsigned long phys)
1220
{
1221
struct tdmr_info_list *tdmr_list = &tdx_tdmr_list;
1222
int i;
1223
1224
/* Ensure that all remote 'tdmr_list' writes are visible: */
1225
smp_rmb();
1226
1227
/*
1228
* The TDX module is no longer returning TDX_SYS_NOT_READY and
1229
* is initialized. The 'tdmr_list' was initialized long ago
1230
* and is now read-only.
1231
*/
1232
for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
1233
unsigned long base, size;
1234
1235
tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size);
1236
1237
if (phys >= base && phys < (base + size))
1238
return true;
1239
}
1240
1241
return false;
1242
}
1243
1244
/*
1245
* Return whether the memory page at the given physical address is TDX
1246
* private memory or not.
1247
*
1248
* This can be imprecise for two known reasons:
1249
* 1. PAMTs are private memory and exist before the TDX module is
1250
* ready and TDH_PHYMEM_PAGE_RDMD works. This is a relatively
1251
* short window that occurs once per boot.
1252
* 2. TDH_PHYMEM_PAGE_RDMD reflects the TDX module's knowledge of the
1253
* page. However, the page can still cause #MC until it has been
1254
* fully converted to shared using 64-byte writes like MOVDIR64B.
1255
* Buggy hosts might still leave #MC-causing memory in place which
1256
* this function can not detect.
1257
*/
1258
static bool paddr_is_tdx_private(unsigned long phys)
1259
{
1260
struct tdx_module_args args = {
1261
.rcx = phys & PAGE_MASK,
1262
};
1263
u64 sret;
1264
1265
if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
1266
return false;
1267
1268
/* Get page type from the TDX module */
1269
sret = __seamcall_ret(TDH_PHYMEM_PAGE_RDMD, &args);
1270
1271
/*
1272
* The SEAMCALL will not return success unless there is a
1273
* working, "ready" TDX module. Assume an absence of TDX
1274
* private pages until SEAMCALL is working.
1275
*/
1276
if (sret)
1277
return false;
1278
1279
/*
1280
* SEAMCALL was successful -- read page type (via RCX):
1281
*
1282
* - PT_NDA: Page is not used by the TDX module
1283
* - PT_RSVD: Reserved for Non-TDX use
1284
* - Others: Page is used by the TDX module
1285
*
1286
* Note PAMT pages are marked as PT_RSVD but they are also TDX
1287
* private memory.
1288
*/
1289
switch (args.rcx) {
1290
case PT_NDA:
1291
return false;
1292
case PT_RSVD:
1293
return is_pamt_page(phys);
1294
default:
1295
return true;
1296
}
1297
}
1298
1299
/*
1300
* Some TDX-capable CPUs have an erratum. A write to TDX private
1301
* memory poisons that memory, and a subsequent read of that memory
1302
* triggers #MC.
1303
*
1304
* Help distinguish erratum-triggered #MCs from a normal hardware one.
1305
* Just print additional message to show such #MC may be result of the
1306
* erratum.
1307
*/
1308
const char *tdx_dump_mce_info(struct mce *m)
1309
{
1310
if (!m || !mce_is_memory_error(m) || !mce_usable_address(m))
1311
return NULL;
1312
1313
if (!paddr_is_tdx_private(m->addr))
1314
return NULL;
1315
1316
return "TDX private memory error. Possible kernel bug.";
1317
}
1318
1319
static __init int record_keyid_partitioning(u32 *tdx_keyid_start,
1320
u32 *nr_tdx_keyids)
1321
{
1322
u32 _nr_mktme_keyids, _tdx_keyid_start, _nr_tdx_keyids;
1323
int ret;
1324
1325
/*
1326
* IA32_MKTME_KEYID_PARTIONING:
1327
* Bit [31:0]: Number of MKTME KeyIDs.
1328
* Bit [63:32]: Number of TDX private KeyIDs.
1329
*/
1330
ret = rdmsr_safe(MSR_IA32_MKTME_KEYID_PARTITIONING, &_nr_mktme_keyids,
1331
&_nr_tdx_keyids);
1332
if (ret || !_nr_tdx_keyids)
1333
return -EINVAL;
1334
1335
/* TDX KeyIDs start after the last MKTME KeyID. */
1336
_tdx_keyid_start = _nr_mktme_keyids + 1;
1337
1338
*tdx_keyid_start = _tdx_keyid_start;
1339
*nr_tdx_keyids = _nr_tdx_keyids;
1340
1341
return 0;
1342
}
1343
1344
static bool is_tdx_memory(unsigned long start_pfn, unsigned long end_pfn)
1345
{
1346
struct tdx_memblock *tmb;
1347
1348
/*
1349
* This check assumes that the start_pfn<->end_pfn range does not
1350
* cross multiple @tdx_memlist entries. A single memory online
1351
* event across multiple memblocks (from which @tdx_memlist
1352
* entries are derived at the time of module initialization) is
1353
* not possible. This is because memory offline/online is done
1354
* on granularity of 'struct memory_block', and the hotpluggable
1355
* memory region (one memblock) must be multiple of memory_block.
1356
*/
1357
list_for_each_entry(tmb, &tdx_memlist, list) {
1358
if (start_pfn >= tmb->start_pfn && end_pfn <= tmb->end_pfn)
1359
return true;
1360
}
1361
return false;
1362
}
1363
1364
static int tdx_memory_notifier(struct notifier_block *nb, unsigned long action,
1365
void *v)
1366
{
1367
struct memory_notify *mn = v;
1368
1369
if (action != MEM_GOING_ONLINE)
1370
return NOTIFY_OK;
1371
1372
/*
1373
* Empty list means TDX isn't enabled. Allow any memory
1374
* to go online.
1375
*/
1376
if (list_empty(&tdx_memlist))
1377
return NOTIFY_OK;
1378
1379
/*
1380
* The TDX memory configuration is static and can not be
1381
* changed. Reject onlining any memory which is outside of
1382
* the static configuration whether it supports TDX or not.
1383
*/
1384
if (is_tdx_memory(mn->start_pfn, mn->start_pfn + mn->nr_pages))
1385
return NOTIFY_OK;
1386
1387
return NOTIFY_BAD;
1388
}
1389
1390
static struct notifier_block tdx_memory_nb = {
1391
.notifier_call = tdx_memory_notifier,
1392
};
1393
1394
static void __init check_tdx_erratum(void)
1395
{
1396
/*
1397
* These CPUs have an erratum. A partial write from non-TD
1398
* software (e.g. via MOVNTI variants or UC/WC mapping) to TDX
1399
* private memory poisons that memory, and a subsequent read of
1400
* that memory triggers #MC.
1401
*/
1402
switch (boot_cpu_data.x86_vfm) {
1403
case INTEL_SAPPHIRERAPIDS_X:
1404
case INTEL_EMERALDRAPIDS_X:
1405
setup_force_cpu_bug(X86_BUG_TDX_PW_MCE);
1406
}
1407
}
1408
1409
void __init tdx_init(void)
1410
{
1411
u32 tdx_keyid_start, nr_tdx_keyids;
1412
int err;
1413
1414
err = record_keyid_partitioning(&tdx_keyid_start, &nr_tdx_keyids);
1415
if (err)
1416
return;
1417
1418
pr_info("BIOS enabled: private KeyID range [%u, %u)\n",
1419
tdx_keyid_start, tdx_keyid_start + nr_tdx_keyids);
1420
1421
/*
1422
* The TDX module itself requires one 'global KeyID' to protect
1423
* its metadata. If there's only one TDX KeyID, there won't be
1424
* any left for TDX guests thus there's no point to enable TDX
1425
* at all.
1426
*/
1427
if (nr_tdx_keyids < 2) {
1428
pr_err("initialization failed: too few private KeyIDs available.\n");
1429
return;
1430
}
1431
1432
/*
1433
* At this point, hibernation_available() indicates whether or
1434
* not hibernation support has been permanently disabled.
1435
*/
1436
if (hibernation_available()) {
1437
pr_err("initialization failed: Hibernation support is enabled\n");
1438
return;
1439
}
1440
1441
err = register_memory_notifier(&tdx_memory_nb);
1442
if (err) {
1443
pr_err("initialization failed: register_memory_notifier() failed (%d)\n",
1444
err);
1445
return;
1446
}
1447
1448
#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
1449
pr_info("Disable ACPI S3. Turn off TDX in the BIOS to use ACPI S3.\n");
1450
acpi_suspend_lowlevel = NULL;
1451
#endif
1452
1453
/*
1454
* Just use the first TDX KeyID as the 'global KeyID' and
1455
* leave the rest for TDX guests.
1456
*/
1457
tdx_global_keyid = tdx_keyid_start;
1458
tdx_guest_keyid_start = tdx_keyid_start + 1;
1459
tdx_nr_guest_keyids = nr_tdx_keyids - 1;
1460
1461
setup_force_cpu_cap(X86_FEATURE_TDX_HOST_PLATFORM);
1462
1463
check_tdx_erratum();
1464
}
1465
1466
const struct tdx_sys_info *tdx_get_sysinfo(void)
1467
{
1468
const struct tdx_sys_info *p = NULL;
1469
1470
/* Make sure all fields in @tdx_sysinfo have been populated */
1471
mutex_lock(&tdx_module_lock);
1472
if (tdx_module_status == TDX_MODULE_INITIALIZED)
1473
p = (const struct tdx_sys_info *)&tdx_sysinfo;
1474
mutex_unlock(&tdx_module_lock);
1475
1476
return p;
1477
}
1478
EXPORT_SYMBOL_GPL(tdx_get_sysinfo);
1479
1480
u32 tdx_get_nr_guest_keyids(void)
1481
{
1482
return tdx_nr_guest_keyids;
1483
}
1484
EXPORT_SYMBOL_GPL(tdx_get_nr_guest_keyids);
1485
1486
int tdx_guest_keyid_alloc(void)
1487
{
1488
return ida_alloc_range(&tdx_guest_keyid_pool, tdx_guest_keyid_start,
1489
tdx_guest_keyid_start + tdx_nr_guest_keyids - 1,
1490
GFP_KERNEL);
1491
}
1492
EXPORT_SYMBOL_GPL(tdx_guest_keyid_alloc);
1493
1494
void tdx_guest_keyid_free(unsigned int keyid)
1495
{
1496
ida_free(&tdx_guest_keyid_pool, keyid);
1497
}
1498
EXPORT_SYMBOL_GPL(tdx_guest_keyid_free);
1499
1500
static inline u64 tdx_tdr_pa(struct tdx_td *td)
1501
{
1502
return page_to_phys(td->tdr_page);
1503
}
1504
1505
static inline u64 tdx_tdvpr_pa(struct tdx_vp *td)
1506
{
1507
return page_to_phys(td->tdvpr_page);
1508
}
1509
1510
/*
1511
* The TDX module exposes a CLFLUSH_BEFORE_ALLOC bit to specify whether
1512
* a CLFLUSH of pages is required before handing them to the TDX module.
1513
* Be conservative and make the code simpler by doing the CLFLUSH
1514
* unconditionally.
1515
*/
1516
static void tdx_clflush_page(struct page *page)
1517
{
1518
clflush_cache_range(page_to_virt(page), PAGE_SIZE);
1519
}
1520
1521
noinstr __flatten u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args)
1522
{
1523
args->rcx = tdx_tdvpr_pa(td);
1524
1525
return __seamcall_saved_ret(TDH_VP_ENTER, args);
1526
}
1527
EXPORT_SYMBOL_GPL(tdh_vp_enter);
1528
1529
u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page)
1530
{
1531
struct tdx_module_args args = {
1532
.rcx = page_to_phys(tdcs_page),
1533
.rdx = tdx_tdr_pa(td),
1534
};
1535
1536
tdx_clflush_page(tdcs_page);
1537
return seamcall(TDH_MNG_ADDCX, &args);
1538
}
1539
EXPORT_SYMBOL_GPL(tdh_mng_addcx);
1540
1541
u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2)
1542
{
1543
struct tdx_module_args args = {
1544
.rcx = gpa,
1545
.rdx = tdx_tdr_pa(td),
1546
.r8 = page_to_phys(page),
1547
.r9 = page_to_phys(source),
1548
};
1549
u64 ret;
1550
1551
tdx_clflush_page(page);
1552
ret = seamcall_ret(TDH_MEM_PAGE_ADD, &args);
1553
1554
*ext_err1 = args.rcx;
1555
*ext_err2 = args.rdx;
1556
1557
return ret;
1558
}
1559
EXPORT_SYMBOL_GPL(tdh_mem_page_add);
1560
1561
u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
1562
{
1563
struct tdx_module_args args = {
1564
.rcx = gpa | level,
1565
.rdx = tdx_tdr_pa(td),
1566
.r8 = page_to_phys(page),
1567
};
1568
u64 ret;
1569
1570
tdx_clflush_page(page);
1571
ret = seamcall_ret(TDH_MEM_SEPT_ADD, &args);
1572
1573
*ext_err1 = args.rcx;
1574
*ext_err2 = args.rdx;
1575
1576
return ret;
1577
}
1578
EXPORT_SYMBOL_GPL(tdh_mem_sept_add);
1579
1580
u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page)
1581
{
1582
struct tdx_module_args args = {
1583
.rcx = page_to_phys(tdcx_page),
1584
.rdx = tdx_tdvpr_pa(vp),
1585
};
1586
1587
tdx_clflush_page(tdcx_page);
1588
return seamcall(TDH_VP_ADDCX, &args);
1589
}
1590
EXPORT_SYMBOL_GPL(tdh_vp_addcx);
1591
1592
u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
1593
{
1594
struct tdx_module_args args = {
1595
.rcx = gpa | level,
1596
.rdx = tdx_tdr_pa(td),
1597
.r8 = page_to_phys(page),
1598
};
1599
u64 ret;
1600
1601
tdx_clflush_page(page);
1602
ret = seamcall_ret(TDH_MEM_PAGE_AUG, &args);
1603
1604
*ext_err1 = args.rcx;
1605
*ext_err2 = args.rdx;
1606
1607
return ret;
1608
}
1609
EXPORT_SYMBOL_GPL(tdh_mem_page_aug);
1610
1611
u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2)
1612
{
1613
struct tdx_module_args args = {
1614
.rcx = gpa | level,
1615
.rdx = tdx_tdr_pa(td),
1616
};
1617
u64 ret;
1618
1619
ret = seamcall_ret(TDH_MEM_RANGE_BLOCK, &args);
1620
1621
*ext_err1 = args.rcx;
1622
*ext_err2 = args.rdx;
1623
1624
return ret;
1625
}
1626
EXPORT_SYMBOL_GPL(tdh_mem_range_block);
1627
1628
u64 tdh_mng_key_config(struct tdx_td *td)
1629
{
1630
struct tdx_module_args args = {
1631
.rcx = tdx_tdr_pa(td),
1632
};
1633
1634
return seamcall(TDH_MNG_KEY_CONFIG, &args);
1635
}
1636
EXPORT_SYMBOL_GPL(tdh_mng_key_config);
1637
1638
u64 tdh_mng_create(struct tdx_td *td, u16 hkid)
1639
{
1640
struct tdx_module_args args = {
1641
.rcx = tdx_tdr_pa(td),
1642
.rdx = hkid,
1643
};
1644
1645
tdx_clflush_page(td->tdr_page);
1646
return seamcall(TDH_MNG_CREATE, &args);
1647
}
1648
EXPORT_SYMBOL_GPL(tdh_mng_create);
1649
1650
u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp)
1651
{
1652
struct tdx_module_args args = {
1653
.rcx = tdx_tdvpr_pa(vp),
1654
.rdx = tdx_tdr_pa(td),
1655
};
1656
1657
tdx_clflush_page(vp->tdvpr_page);
1658
return seamcall(TDH_VP_CREATE, &args);
1659
}
1660
EXPORT_SYMBOL_GPL(tdh_vp_create);
1661
1662
u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data)
1663
{
1664
struct tdx_module_args args = {
1665
.rcx = tdx_tdr_pa(td),
1666
.rdx = field,
1667
};
1668
u64 ret;
1669
1670
ret = seamcall_ret(TDH_MNG_RD, &args);
1671
1672
/* R8: Content of the field, or 0 in case of error. */
1673
*data = args.r8;
1674
1675
return ret;
1676
}
1677
EXPORT_SYMBOL_GPL(tdh_mng_rd);
1678
1679
u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2)
1680
{
1681
struct tdx_module_args args = {
1682
.rcx = gpa,
1683
.rdx = tdx_tdr_pa(td),
1684
};
1685
u64 ret;
1686
1687
ret = seamcall_ret(TDH_MR_EXTEND, &args);
1688
1689
*ext_err1 = args.rcx;
1690
*ext_err2 = args.rdx;
1691
1692
return ret;
1693
}
1694
EXPORT_SYMBOL_GPL(tdh_mr_extend);
1695
1696
u64 tdh_mr_finalize(struct tdx_td *td)
1697
{
1698
struct tdx_module_args args = {
1699
.rcx = tdx_tdr_pa(td),
1700
};
1701
1702
return seamcall(TDH_MR_FINALIZE, &args);
1703
}
1704
EXPORT_SYMBOL_GPL(tdh_mr_finalize);
1705
1706
u64 tdh_vp_flush(struct tdx_vp *vp)
1707
{
1708
struct tdx_module_args args = {
1709
.rcx = tdx_tdvpr_pa(vp),
1710
};
1711
1712
return seamcall(TDH_VP_FLUSH, &args);
1713
}
1714
EXPORT_SYMBOL_GPL(tdh_vp_flush);
1715
1716
u64 tdh_mng_vpflushdone(struct tdx_td *td)
1717
{
1718
struct tdx_module_args args = {
1719
.rcx = tdx_tdr_pa(td),
1720
};
1721
1722
return seamcall(TDH_MNG_VPFLUSHDONE, &args);
1723
}
1724
EXPORT_SYMBOL_GPL(tdh_mng_vpflushdone);
1725
1726
u64 tdh_mng_key_freeid(struct tdx_td *td)
1727
{
1728
struct tdx_module_args args = {
1729
.rcx = tdx_tdr_pa(td),
1730
};
1731
1732
return seamcall(TDH_MNG_KEY_FREEID, &args);
1733
}
1734
EXPORT_SYMBOL_GPL(tdh_mng_key_freeid);
1735
1736
u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err)
1737
{
1738
struct tdx_module_args args = {
1739
.rcx = tdx_tdr_pa(td),
1740
.rdx = td_params,
1741
};
1742
u64 ret;
1743
1744
ret = seamcall_ret(TDH_MNG_INIT, &args);
1745
1746
*extended_err = args.rcx;
1747
1748
return ret;
1749
}
1750
EXPORT_SYMBOL_GPL(tdh_mng_init);
1751
1752
u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data)
1753
{
1754
struct tdx_module_args args = {
1755
.rcx = tdx_tdvpr_pa(vp),
1756
.rdx = field,
1757
};
1758
u64 ret;
1759
1760
ret = seamcall_ret(TDH_VP_RD, &args);
1761
1762
/* R8: Content of the field, or 0 in case of error. */
1763
*data = args.r8;
1764
1765
return ret;
1766
}
1767
EXPORT_SYMBOL_GPL(tdh_vp_rd);
1768
1769
u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask)
1770
{
1771
struct tdx_module_args args = {
1772
.rcx = tdx_tdvpr_pa(vp),
1773
.rdx = field,
1774
.r8 = data,
1775
.r9 = mask,
1776
};
1777
1778
return seamcall(TDH_VP_WR, &args);
1779
}
1780
EXPORT_SYMBOL_GPL(tdh_vp_wr);
1781
1782
u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid)
1783
{
1784
struct tdx_module_args args = {
1785
.rcx = tdx_tdvpr_pa(vp),
1786
.rdx = initial_rcx,
1787
.r8 = x2apicid,
1788
};
1789
1790
/* apicid requires version == 1. */
1791
return seamcall(TDH_VP_INIT | (1ULL << TDX_VERSION_SHIFT), &args);
1792
}
1793
EXPORT_SYMBOL_GPL(tdh_vp_init);
1794
1795
/*
1796
* TDX ABI defines output operands as PT, OWNER and SIZE. These are TDX defined fomats.
1797
* So despite the names, they must be interpted specially as described by the spec. Return
1798
* them only for error reporting purposes.
1799
*/
1800
u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size)
1801
{
1802
struct tdx_module_args args = {
1803
.rcx = page_to_phys(page),
1804
};
1805
u64 ret;
1806
1807
ret = seamcall_ret(TDH_PHYMEM_PAGE_RECLAIM, &args);
1808
1809
*tdx_pt = args.rcx;
1810
*tdx_owner = args.rdx;
1811
*tdx_size = args.r8;
1812
1813
return ret;
1814
}
1815
EXPORT_SYMBOL_GPL(tdh_phymem_page_reclaim);
1816
1817
u64 tdh_mem_track(struct tdx_td *td)
1818
{
1819
struct tdx_module_args args = {
1820
.rcx = tdx_tdr_pa(td),
1821
};
1822
1823
return seamcall(TDH_MEM_TRACK, &args);
1824
}
1825
EXPORT_SYMBOL_GPL(tdh_mem_track);
1826
1827
u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2)
1828
{
1829
struct tdx_module_args args = {
1830
.rcx = gpa | level,
1831
.rdx = tdx_tdr_pa(td),
1832
};
1833
u64 ret;
1834
1835
ret = seamcall_ret(TDH_MEM_PAGE_REMOVE, &args);
1836
1837
*ext_err1 = args.rcx;
1838
*ext_err2 = args.rdx;
1839
1840
return ret;
1841
}
1842
EXPORT_SYMBOL_GPL(tdh_mem_page_remove);
1843
1844
u64 tdh_phymem_cache_wb(bool resume)
1845
{
1846
struct tdx_module_args args = {
1847
.rcx = resume ? 1 : 0,
1848
};
1849
1850
return seamcall(TDH_PHYMEM_CACHE_WB, &args);
1851
}
1852
EXPORT_SYMBOL_GPL(tdh_phymem_cache_wb);
1853
1854
u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td)
1855
{
1856
struct tdx_module_args args = {};
1857
1858
args.rcx = mk_keyed_paddr(tdx_global_keyid, td->tdr_page);
1859
1860
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
1861
}
1862
EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr);
1863
1864
u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
1865
{
1866
struct tdx_module_args args = {};
1867
1868
args.rcx = mk_keyed_paddr(hkid, page);
1869
1870
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
1871
}
1872
EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid);
1873
1874