Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/common.c
48907 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* cpu_feature_enabled() cannot be used this early */
3
#define USE_EARLY_PGTABLE_L5
4
5
#include <linux/memblock.h>
6
#include <linux/linkage.h>
7
#include <linux/bitops.h>
8
#include <linux/kernel.h>
9
#include <linux/export.h>
10
#include <linux/kvm_types.h>
11
#include <linux/percpu.h>
12
#include <linux/string.h>
13
#include <linux/ctype.h>
14
#include <linux/delay.h>
15
#include <linux/sched/mm.h>
16
#include <linux/sched/clock.h>
17
#include <linux/sched/task.h>
18
#include <linux/sched/smt.h>
19
#include <linux/init.h>
20
#include <linux/kprobes.h>
21
#include <linux/kgdb.h>
22
#include <linux/mem_encrypt.h>
23
#include <linux/smp.h>
24
#include <linux/cpu.h>
25
#include <linux/io.h>
26
#include <linux/syscore_ops.h>
27
#include <linux/pgtable.h>
28
#include <linux/stackprotector.h>
29
#include <linux/utsname.h>
30
#include <linux/efi.h>
31
32
#include <asm/alternative.h>
33
#include <asm/cmdline.h>
34
#include <asm/cpuid/api.h>
35
#include <asm/perf_event.h>
36
#include <asm/mmu_context.h>
37
#include <asm/doublefault.h>
38
#include <asm/archrandom.h>
39
#include <asm/hypervisor.h>
40
#include <asm/processor.h>
41
#include <asm/tlbflush.h>
42
#include <asm/debugreg.h>
43
#include <asm/sections.h>
44
#include <asm/vsyscall.h>
45
#include <linux/topology.h>
46
#include <linux/cpumask.h>
47
#include <linux/atomic.h>
48
#include <asm/proto.h>
49
#include <asm/setup.h>
50
#include <asm/apic.h>
51
#include <asm/desc.h>
52
#include <asm/fpu/api.h>
53
#include <asm/mtrr.h>
54
#include <asm/hwcap2.h>
55
#include <linux/numa.h>
56
#include <asm/numa.h>
57
#include <asm/asm.h>
58
#include <asm/bugs.h>
59
#include <asm/cpu.h>
60
#include <asm/mce.h>
61
#include <asm/msr.h>
62
#include <asm/cacheinfo.h>
63
#include <asm/memtype.h>
64
#include <asm/microcode.h>
65
#include <asm/intel-family.h>
66
#include <asm/cpu_device_id.h>
67
#include <asm/fred.h>
68
#include <asm/uv/uv.h>
69
#include <asm/ia32.h>
70
#include <asm/set_memory.h>
71
#include <asm/traps.h>
72
#include <asm/sev.h>
73
#include <asm/tdx.h>
74
#include <asm/posted_intr.h>
75
#include <asm/runtime-const.h>
76
77
#include "cpu.h"
78
79
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
80
EXPORT_PER_CPU_SYMBOL(cpu_info);
81
82
/* Used for modules: built-in code uses runtime constants */
83
unsigned long USER_PTR_MAX;
84
EXPORT_SYMBOL(USER_PTR_MAX);
85
86
u32 elf_hwcap2 __read_mostly;
87
88
/* Number of siblings per CPU package */
89
unsigned int __max_threads_per_core __ro_after_init = 1;
90
EXPORT_SYMBOL(__max_threads_per_core);
91
92
unsigned int __max_dies_per_package __ro_after_init = 1;
93
EXPORT_SYMBOL(__max_dies_per_package);
94
95
unsigned int __max_logical_packages __ro_after_init = 1;
96
EXPORT_SYMBOL(__max_logical_packages);
97
98
unsigned int __num_cores_per_package __ro_after_init = 1;
99
EXPORT_SYMBOL(__num_cores_per_package);
100
101
unsigned int __num_threads_per_package __ro_after_init = 1;
102
EXPORT_SYMBOL(__num_threads_per_package);
103
104
static struct ppin_info {
105
int feature;
106
int msr_ppin_ctl;
107
int msr_ppin;
108
} ppin_info[] = {
109
[X86_VENDOR_INTEL] = {
110
.feature = X86_FEATURE_INTEL_PPIN,
111
.msr_ppin_ctl = MSR_PPIN_CTL,
112
.msr_ppin = MSR_PPIN
113
},
114
[X86_VENDOR_AMD] = {
115
.feature = X86_FEATURE_AMD_PPIN,
116
.msr_ppin_ctl = MSR_AMD_PPIN_CTL,
117
.msr_ppin = MSR_AMD_PPIN
118
},
119
};
120
121
static const struct x86_cpu_id ppin_cpuids[] = {
122
X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]),
123
X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]),
124
125
/* Legacy models without CPUID enumeration */
126
X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]),
127
X86_MATCH_VFM(INTEL_HASWELL_X, &ppin_info[X86_VENDOR_INTEL]),
128
X86_MATCH_VFM(INTEL_BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]),
129
X86_MATCH_VFM(INTEL_BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]),
130
X86_MATCH_VFM(INTEL_SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]),
131
X86_MATCH_VFM(INTEL_ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]),
132
X86_MATCH_VFM(INTEL_ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]),
133
X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]),
134
X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &ppin_info[X86_VENDOR_INTEL]),
135
X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]),
136
X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]),
137
138
{}
139
};
140
141
static void ppin_init(struct cpuinfo_x86 *c)
142
{
143
const struct x86_cpu_id *id;
144
unsigned long long val;
145
struct ppin_info *info;
146
147
id = x86_match_cpu(ppin_cpuids);
148
if (!id)
149
return;
150
151
/*
152
* Testing the presence of the MSR is not enough. Need to check
153
* that the PPIN_CTL allows reading of the PPIN.
154
*/
155
info = (struct ppin_info *)id->driver_data;
156
157
if (rdmsrq_safe(info->msr_ppin_ctl, &val))
158
goto clear_ppin;
159
160
if ((val & 3UL) == 1UL) {
161
/* PPIN locked in disabled mode */
162
goto clear_ppin;
163
}
164
165
/* If PPIN is disabled, try to enable */
166
if (!(val & 2UL)) {
167
wrmsrq_safe(info->msr_ppin_ctl, val | 2UL);
168
rdmsrq_safe(info->msr_ppin_ctl, &val);
169
}
170
171
/* Is the enable bit set? */
172
if (val & 2UL) {
173
c->ppin = native_rdmsrq(info->msr_ppin);
174
set_cpu_cap(c, info->feature);
175
return;
176
}
177
178
clear_ppin:
179
setup_clear_cpu_cap(info->feature);
180
}
181
182
static void default_init(struct cpuinfo_x86 *c)
183
{
184
#ifdef CONFIG_X86_64
185
cpu_detect_cache_sizes(c);
186
#else
187
/* Not much we can do here... */
188
/* Check if at least it has cpuid */
189
if (c->cpuid_level == -1) {
190
/* No cpuid. It must be an ancient CPU */
191
if (c->x86 == 4)
192
strcpy(c->x86_model_id, "486");
193
else if (c->x86 == 3)
194
strcpy(c->x86_model_id, "386");
195
}
196
#endif
197
}
198
199
static const struct cpu_dev default_cpu = {
200
.c_init = default_init,
201
.c_vendor = "Unknown",
202
.c_x86_vendor = X86_VENDOR_UNKNOWN,
203
};
204
205
static const struct cpu_dev *this_cpu = &default_cpu;
206
207
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
208
#ifdef CONFIG_X86_64
209
/*
210
* We need valid kernel segments for data and code in long mode too
211
* IRET will check the segment types kkeil 2000/10/28
212
* Also sysret mandates a special GDT layout
213
*
214
* TLS descriptors are currently at a different place compared to i386.
215
* Hopefully nobody expects them at a fixed place (Wine?)
216
*/
217
[GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff),
218
[GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(DESC_CODE64, 0, 0xfffff),
219
[GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(DESC_DATA64, 0, 0xfffff),
220
[GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(DESC_CODE32 | DESC_USER, 0, 0xfffff),
221
[GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(DESC_DATA64 | DESC_USER, 0, 0xfffff),
222
[GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(DESC_CODE64 | DESC_USER, 0, 0xfffff),
223
#else
224
[GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff),
225
[GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(DESC_DATA32, 0, 0xfffff),
226
[GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(DESC_CODE32 | DESC_USER, 0, 0xfffff),
227
[GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(DESC_DATA32 | DESC_USER, 0, 0xfffff),
228
/*
229
* Segments used for calling PnP BIOS have byte granularity.
230
* They code segments and data segments have fixed 64k limits,
231
* the transfer segment sizes are set at run time.
232
*/
233
[GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(DESC_CODE32_BIOS, 0, 0xffff),
234
[GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(DESC_CODE16, 0, 0xffff),
235
[GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(DESC_DATA16, 0, 0xffff),
236
[GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(DESC_DATA16, 0, 0),
237
[GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(DESC_DATA16, 0, 0),
238
/*
239
* The APM segments have byte granularity and their bases
240
* are set at run time. All have 64k limits.
241
*/
242
[GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(DESC_CODE32_BIOS, 0, 0xffff),
243
[GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(DESC_CODE16, 0, 0xffff),
244
[GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(DESC_DATA32_BIOS, 0, 0xffff),
245
246
[GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(DESC_DATA32, 0, 0xfffff),
247
[GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(DESC_DATA32, 0, 0xfffff),
248
#endif
249
} };
250
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
251
SYM_PIC_ALIAS(gdt_page);
252
253
#ifdef CONFIG_X86_64
254
static int __init x86_nopcid_setup(char *s)
255
{
256
/* nopcid doesn't accept parameters */
257
if (s)
258
return -EINVAL;
259
260
/* do not emit a message if the feature is not present */
261
if (!boot_cpu_has(X86_FEATURE_PCID))
262
return 0;
263
264
setup_clear_cpu_cap(X86_FEATURE_PCID);
265
pr_info("nopcid: PCID feature disabled\n");
266
return 0;
267
}
268
early_param("nopcid", x86_nopcid_setup);
269
#endif
270
271
static int __init x86_noinvpcid_setup(char *s)
272
{
273
/* noinvpcid doesn't accept parameters */
274
if (s)
275
return -EINVAL;
276
277
/* do not emit a message if the feature is not present */
278
if (!boot_cpu_has(X86_FEATURE_INVPCID))
279
return 0;
280
281
setup_clear_cpu_cap(X86_FEATURE_INVPCID);
282
pr_info("noinvpcid: INVPCID feature disabled\n");
283
return 0;
284
}
285
early_param("noinvpcid", x86_noinvpcid_setup);
286
287
/* Standard macro to see if a specific flag is changeable */
288
static inline bool flag_is_changeable_p(unsigned long flag)
289
{
290
unsigned long f1, f2;
291
292
if (!IS_ENABLED(CONFIG_X86_32))
293
return true;
294
295
/*
296
* Cyrix and IDT cpus allow disabling of CPUID
297
* so the code below may return different results
298
* when it is executed before and after enabling
299
* the CPUID. Add "volatile" to not allow gcc to
300
* optimize the subsequent calls to this function.
301
*/
302
asm volatile ("pushfl \n\t"
303
"pushfl \n\t"
304
"popl %0 \n\t"
305
"movl %0, %1 \n\t"
306
"xorl %2, %0 \n\t"
307
"pushl %0 \n\t"
308
"popfl \n\t"
309
"pushfl \n\t"
310
"popl %0 \n\t"
311
"popfl \n\t"
312
313
: "=&r" (f1), "=&r" (f2)
314
: "ir" (flag));
315
316
return (f1 ^ f2) & flag;
317
}
318
319
#ifdef CONFIG_X86_32
320
static int cachesize_override = -1;
321
static int disable_x86_serial_nr = 1;
322
323
static int __init cachesize_setup(char *str)
324
{
325
get_option(&str, &cachesize_override);
326
return 1;
327
}
328
__setup("cachesize=", cachesize_setup);
329
330
/* Probe for the CPUID instruction */
331
bool cpuid_feature(void)
332
{
333
return flag_is_changeable_p(X86_EFLAGS_ID);
334
}
335
336
static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
337
{
338
unsigned long lo, hi;
339
340
if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
341
return;
342
343
/* Disable processor serial number: */
344
345
rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
346
lo |= 0x200000;
347
wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
348
349
pr_notice("CPU serial number disabled.\n");
350
clear_cpu_cap(c, X86_FEATURE_PN);
351
352
/* Disabling the serial number may affect the cpuid level */
353
c->cpuid_level = cpuid_eax(0);
354
}
355
356
static int __init x86_serial_nr_setup(char *s)
357
{
358
disable_x86_serial_nr = 0;
359
return 1;
360
}
361
__setup("serialnumber", x86_serial_nr_setup);
362
#else
363
static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
364
{
365
}
366
#endif
367
368
static __always_inline void setup_smep(struct cpuinfo_x86 *c)
369
{
370
if (cpu_has(c, X86_FEATURE_SMEP))
371
cr4_set_bits(X86_CR4_SMEP);
372
}
373
374
static __always_inline void setup_smap(struct cpuinfo_x86 *c)
375
{
376
unsigned long eflags = native_save_fl();
377
378
/* This should have been cleared long ago */
379
BUG_ON(eflags & X86_EFLAGS_AC);
380
381
if (cpu_has(c, X86_FEATURE_SMAP))
382
cr4_set_bits(X86_CR4_SMAP);
383
}
384
385
static __always_inline void setup_umip(struct cpuinfo_x86 *c)
386
{
387
/* Check the boot processor, plus build option for UMIP. */
388
if (!cpu_feature_enabled(X86_FEATURE_UMIP))
389
goto out;
390
391
/* Check the current processor's cpuid bits. */
392
if (!cpu_has(c, X86_FEATURE_UMIP))
393
goto out;
394
395
cr4_set_bits(X86_CR4_UMIP);
396
397
pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n");
398
399
return;
400
401
out:
402
/*
403
* Make sure UMIP is disabled in case it was enabled in a
404
* previous boot (e.g., via kexec).
405
*/
406
cr4_clear_bits(X86_CR4_UMIP);
407
}
408
409
static __always_inline void setup_lass(struct cpuinfo_x86 *c)
410
{
411
if (!cpu_feature_enabled(X86_FEATURE_LASS))
412
return;
413
414
/*
415
* Legacy vsyscall page access causes a #GP when LASS is active.
416
* Disable LASS because the #GP handler doesn't support vsyscall
417
* emulation.
418
*
419
* Also disable LASS when running under EFI, as some runtime and
420
* boot services rely on 1:1 mappings in the lower half.
421
*/
422
if (IS_ENABLED(CONFIG_X86_VSYSCALL_EMULATION) ||
423
IS_ENABLED(CONFIG_EFI)) {
424
setup_clear_cpu_cap(X86_FEATURE_LASS);
425
return;
426
}
427
428
cr4_set_bits(X86_CR4_LASS);
429
}
430
431
/* These bits should not change their value after CPU init is finished. */
432
static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
433
X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED;
434
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
435
static unsigned long cr4_pinned_bits __ro_after_init;
436
437
void native_write_cr0(unsigned long val)
438
{
439
unsigned long bits_missing = 0;
440
441
set_register:
442
asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
443
444
if (static_branch_likely(&cr_pinning)) {
445
if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
446
bits_missing = X86_CR0_WP;
447
val |= bits_missing;
448
goto set_register;
449
}
450
/* Warn after we've set the missing bits. */
451
WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
452
}
453
}
454
EXPORT_SYMBOL(native_write_cr0);
455
456
void __no_profile native_write_cr4(unsigned long val)
457
{
458
unsigned long bits_changed = 0;
459
460
set_register:
461
asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");
462
463
if (static_branch_likely(&cr_pinning)) {
464
if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
465
bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
466
val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
467
goto set_register;
468
}
469
/* Warn after we've corrected the changed bits. */
470
WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
471
bits_changed);
472
}
473
}
474
#if IS_MODULE(CONFIG_LKDTM)
475
EXPORT_SYMBOL_GPL(native_write_cr4);
476
#endif
477
478
void cr4_update_irqsoff(unsigned long set, unsigned long clear)
479
{
480
unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
481
482
lockdep_assert_irqs_disabled();
483
484
newval = (cr4 & ~clear) | set;
485
if (newval != cr4) {
486
this_cpu_write(cpu_tlbstate.cr4, newval);
487
__write_cr4(newval);
488
}
489
}
490
EXPORT_SYMBOL_FOR_KVM(cr4_update_irqsoff);
491
492
/* Read the CR4 shadow. */
493
unsigned long cr4_read_shadow(void)
494
{
495
return this_cpu_read(cpu_tlbstate.cr4);
496
}
497
EXPORT_SYMBOL_FOR_KVM(cr4_read_shadow);
498
499
void cr4_init(void)
500
{
501
unsigned long cr4 = __read_cr4();
502
503
if (boot_cpu_has(X86_FEATURE_PCID))
504
cr4 |= X86_CR4_PCIDE;
505
if (static_branch_likely(&cr_pinning))
506
cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
507
508
__write_cr4(cr4);
509
510
/* Initialize cr4 shadow for this CPU. */
511
this_cpu_write(cpu_tlbstate.cr4, cr4);
512
}
513
514
/*
515
* Once CPU feature detection is finished (and boot params have been
516
* parsed), record any of the sensitive CR bits that are set, and
517
* enable CR pinning.
518
*/
519
static void __init setup_cr_pinning(void)
520
{
521
cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
522
static_key_enable(&cr_pinning.key);
523
}
524
525
static __init int x86_nofsgsbase_setup(char *arg)
526
{
527
/* Require an exact match without trailing characters. */
528
if (strlen(arg))
529
return 0;
530
531
/* Do not emit a message if the feature is not present. */
532
if (!boot_cpu_has(X86_FEATURE_FSGSBASE))
533
return 1;
534
535
setup_clear_cpu_cap(X86_FEATURE_FSGSBASE);
536
pr_info("FSGSBASE disabled via kernel command line\n");
537
return 1;
538
}
539
__setup("nofsgsbase", x86_nofsgsbase_setup);
540
541
/*
542
* Protection Keys are not available in 32-bit mode.
543
*/
544
static bool pku_disabled;
545
546
static __always_inline void setup_pku(struct cpuinfo_x86 *c)
547
{
548
if (c == &boot_cpu_data) {
549
if (pku_disabled || !cpu_feature_enabled(X86_FEATURE_PKU))
550
return;
551
/*
552
* Setting CR4.PKE will cause the X86_FEATURE_OSPKE cpuid
553
* bit to be set. Enforce it.
554
*/
555
setup_force_cpu_cap(X86_FEATURE_OSPKE);
556
557
} else if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) {
558
return;
559
}
560
561
cr4_set_bits(X86_CR4_PKE);
562
/* Load the default PKRU value */
563
pkru_write_default();
564
}
565
566
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
567
static __init int setup_disable_pku(char *arg)
568
{
569
/*
570
* Do not clear the X86_FEATURE_PKU bit. All of the
571
* runtime checks are against OSPKE so clearing the
572
* bit does nothing.
573
*
574
* This way, we will see "pku" in cpuinfo, but not
575
* "ospke", which is exactly what we want. It shows
576
* that the CPU has PKU, but the OS has not enabled it.
577
* This happens to be exactly how a system would look
578
* if we disabled the config option.
579
*/
580
pr_info("x86: 'nopku' specified, disabling Memory Protection Keys\n");
581
pku_disabled = true;
582
return 1;
583
}
584
__setup("nopku", setup_disable_pku);
585
#endif
586
587
#ifdef CONFIG_X86_KERNEL_IBT
588
589
__noendbr u64 ibt_save(bool disable)
590
{
591
u64 msr = 0;
592
593
if (cpu_feature_enabled(X86_FEATURE_IBT)) {
594
rdmsrq(MSR_IA32_S_CET, msr);
595
if (disable)
596
wrmsrq(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN);
597
}
598
599
return msr;
600
}
601
602
__noendbr void ibt_restore(u64 save)
603
{
604
u64 msr;
605
606
if (cpu_feature_enabled(X86_FEATURE_IBT)) {
607
rdmsrq(MSR_IA32_S_CET, msr);
608
msr &= ~CET_ENDBR_EN;
609
msr |= (save & CET_ENDBR_EN);
610
wrmsrq(MSR_IA32_S_CET, msr);
611
}
612
}
613
614
#endif
615
616
static __always_inline void setup_cet(struct cpuinfo_x86 *c)
617
{
618
bool user_shstk, kernel_ibt;
619
620
if (!IS_ENABLED(CONFIG_X86_CET))
621
return;
622
623
kernel_ibt = HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT);
624
user_shstk = cpu_feature_enabled(X86_FEATURE_SHSTK) &&
625
IS_ENABLED(CONFIG_X86_USER_SHADOW_STACK);
626
627
if (!kernel_ibt && !user_shstk)
628
return;
629
630
if (user_shstk)
631
set_cpu_cap(c, X86_FEATURE_USER_SHSTK);
632
633
if (kernel_ibt)
634
wrmsrq(MSR_IA32_S_CET, CET_ENDBR_EN);
635
else
636
wrmsrq(MSR_IA32_S_CET, 0);
637
638
cr4_set_bits(X86_CR4_CET);
639
640
if (kernel_ibt && ibt_selftest()) {
641
pr_err("IBT selftest: Failed!\n");
642
wrmsrq(MSR_IA32_S_CET, 0);
643
setup_clear_cpu_cap(X86_FEATURE_IBT);
644
}
645
}
646
647
__noendbr void cet_disable(void)
648
{
649
if (!(cpu_feature_enabled(X86_FEATURE_IBT) ||
650
cpu_feature_enabled(X86_FEATURE_SHSTK)))
651
return;
652
653
wrmsrq(MSR_IA32_S_CET, 0);
654
wrmsrq(MSR_IA32_U_CET, 0);
655
}
656
657
/*
658
* Some CPU features depend on higher CPUID levels, which may not always
659
* be available due to CPUID level capping or broken virtualization
660
* software. Add those features to this table to auto-disable them.
661
*/
662
struct cpuid_dependent_feature {
663
u32 feature;
664
u32 level;
665
};
666
667
static const struct cpuid_dependent_feature
668
cpuid_dependent_features[] = {
669
{ X86_FEATURE_MWAIT, CPUID_LEAF_MWAIT },
670
{ X86_FEATURE_DCA, CPUID_LEAF_DCA },
671
{ X86_FEATURE_XSAVE, CPUID_LEAF_XSTATE },
672
{ 0, 0 }
673
};
674
675
static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
676
{
677
const struct cpuid_dependent_feature *df;
678
679
for (df = cpuid_dependent_features; df->feature; df++) {
680
681
if (!cpu_has(c, df->feature))
682
continue;
683
/*
684
* Note: cpuid_level is set to -1 if unavailable, but
685
* extended_extended_level is set to 0 if unavailable
686
* and the legitimate extended levels are all negative
687
* when signed; hence the weird messing around with
688
* signs here...
689
*/
690
if (!((s32)df->level < 0 ?
691
(u32)df->level > (u32)c->extended_cpuid_level :
692
(s32)df->level > (s32)c->cpuid_level))
693
continue;
694
695
clear_cpu_cap(c, df->feature);
696
if (!warn)
697
continue;
698
699
pr_warn("CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
700
x86_cap_flags[df->feature], df->level);
701
}
702
}
703
704
/*
705
* Naming convention should be: <Name> [(<Codename>)]
706
* This table only is used unless init_<vendor>() below doesn't set it;
707
* in particular, if CPUID levels 0x80000002..4 are supported, this
708
* isn't used
709
*/
710
711
/* Look up CPU names by table lookup. */
712
static const char *table_lookup_model(struct cpuinfo_x86 *c)
713
{
714
#ifdef CONFIG_X86_32
715
const struct legacy_cpu_model_info *info;
716
717
if (c->x86_model >= 16)
718
return NULL; /* Range check */
719
720
if (!this_cpu)
721
return NULL;
722
723
info = this_cpu->legacy_models;
724
725
while (info->family) {
726
if (info->family == c->x86)
727
return info->model_names[c->x86_model];
728
info++;
729
}
730
#endif
731
return NULL; /* Not found */
732
}
733
734
/* Aligned to unsigned long to avoid split lock in atomic bitmap ops */
735
__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
736
__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
737
738
#ifdef CONFIG_X86_32
739
/* The 32-bit entry code needs to find cpu_entry_area. */
740
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
741
#endif
742
743
/* Load the original GDT from the per-cpu structure */
744
void load_direct_gdt(int cpu)
745
{
746
struct desc_ptr gdt_descr;
747
748
gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
749
gdt_descr.size = GDT_SIZE - 1;
750
load_gdt(&gdt_descr);
751
}
752
EXPORT_SYMBOL_FOR_KVM(load_direct_gdt);
753
754
/* Load a fixmap remapping of the per-cpu GDT */
755
void load_fixmap_gdt(int cpu)
756
{
757
struct desc_ptr gdt_descr;
758
759
gdt_descr.address = (long)get_cpu_gdt_ro(cpu);
760
gdt_descr.size = GDT_SIZE - 1;
761
load_gdt(&gdt_descr);
762
}
763
EXPORT_SYMBOL_GPL(load_fixmap_gdt);
764
765
/**
766
* switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base
767
* @cpu: The CPU number for which this is invoked
768
*
769
* Invoked during early boot to switch from early GDT and early per CPU to
770
* the direct GDT and the runtime per CPU area. On 32-bit the percpu base
771
* switch is implicit by loading the direct GDT. On 64bit this requires
772
* to update GSBASE.
773
*/
774
void __init switch_gdt_and_percpu_base(int cpu)
775
{
776
load_direct_gdt(cpu);
777
778
#ifdef CONFIG_X86_64
779
/*
780
* No need to load %gs. It is already correct.
781
*
782
* Writing %gs on 64bit would zero GSBASE which would make any per
783
* CPU operation up to the point of the wrmsrq() fault.
784
*
785
* Set GSBASE to the new offset. Until the wrmsrq() happens the
786
* early mapping is still valid. That means the GSBASE update will
787
* lose any prior per CPU data which was not copied over in
788
* setup_per_cpu_areas().
789
*
790
* This works even with stackprotector enabled because the
791
* per CPU stack canary is 0 in both per CPU areas.
792
*/
793
wrmsrq(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
794
#else
795
/*
796
* %fs is already set to __KERNEL_PERCPU, but after switching GDT
797
* it is required to load FS again so that the 'hidden' part is
798
* updated from the new GDT. Up to this point the early per CPU
799
* translation is active. Any content of the early per CPU data
800
* which was not copied over in setup_per_cpu_areas() is lost.
801
*/
802
loadsegment(fs, __KERNEL_PERCPU);
803
#endif
804
}
805
806
static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
807
808
static void get_model_name(struct cpuinfo_x86 *c)
809
{
810
unsigned int *v;
811
char *p, *q, *s;
812
813
if (c->extended_cpuid_level < 0x80000004)
814
return;
815
816
v = (unsigned int *)c->x86_model_id;
817
cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
818
cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
819
cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
820
c->x86_model_id[48] = 0;
821
822
/* Trim whitespace */
823
p = q = s = &c->x86_model_id[0];
824
825
while (*p == ' ')
826
p++;
827
828
while (*p) {
829
/* Note the last non-whitespace index */
830
if (!isspace(*p))
831
s = q;
832
833
*q++ = *p++;
834
}
835
836
*(s + 1) = '\0';
837
}
838
839
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
840
{
841
unsigned int n, dummy, ebx, ecx, edx, l2size;
842
843
n = c->extended_cpuid_level;
844
845
if (n >= 0x80000005) {
846
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
847
c->x86_cache_size = (ecx>>24) + (edx>>24);
848
#ifdef CONFIG_X86_64
849
/* On K8 L1 TLB is inclusive, so don't count it */
850
c->x86_tlbsize = 0;
851
#endif
852
}
853
854
if (n < 0x80000006) /* Some chips just has a large L1. */
855
return;
856
857
cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
858
l2size = ecx >> 16;
859
860
#ifdef CONFIG_X86_64
861
c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
862
#else
863
/* do processor-specific cache resizing */
864
if (this_cpu->legacy_cache_size)
865
l2size = this_cpu->legacy_cache_size(c, l2size);
866
867
/* Allow user to override all this if necessary. */
868
if (cachesize_override != -1)
869
l2size = cachesize_override;
870
871
if (l2size == 0)
872
return; /* Again, no L2 cache is possible */
873
#endif
874
875
c->x86_cache_size = l2size;
876
}
877
878
u16 __read_mostly tlb_lli_4k;
879
u16 __read_mostly tlb_lli_2m;
880
u16 __read_mostly tlb_lli_4m;
881
u16 __read_mostly tlb_lld_4k;
882
u16 __read_mostly tlb_lld_2m;
883
u16 __read_mostly tlb_lld_4m;
884
u16 __read_mostly tlb_lld_1g;
885
886
static void cpu_detect_tlb(struct cpuinfo_x86 *c)
887
{
888
if (this_cpu->c_detect_tlb)
889
this_cpu->c_detect_tlb(c);
890
891
pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n",
892
tlb_lli_4k, tlb_lli_2m, tlb_lli_4m);
893
894
pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
895
tlb_lld_4k, tlb_lld_2m, tlb_lld_4m, tlb_lld_1g);
896
}
897
898
void get_cpu_vendor(struct cpuinfo_x86 *c)
899
{
900
char *v = c->x86_vendor_id;
901
int i;
902
903
for (i = 0; i < X86_VENDOR_NUM; i++) {
904
if (!cpu_devs[i])
905
break;
906
907
if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
908
(cpu_devs[i]->c_ident[1] &&
909
!strcmp(v, cpu_devs[i]->c_ident[1]))) {
910
911
this_cpu = cpu_devs[i];
912
c->x86_vendor = this_cpu->c_x86_vendor;
913
return;
914
}
915
}
916
917
pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
918
"CPU: Your system may be unstable.\n", v);
919
920
c->x86_vendor = X86_VENDOR_UNKNOWN;
921
this_cpu = &default_cpu;
922
}
923
924
void cpu_detect(struct cpuinfo_x86 *c)
925
{
926
/* Get vendor name */
927
cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
928
(unsigned int *)&c->x86_vendor_id[0],
929
(unsigned int *)&c->x86_vendor_id[8],
930
(unsigned int *)&c->x86_vendor_id[4]);
931
932
c->x86 = 4;
933
/* Intel-defined flags: level 0x00000001 */
934
if (c->cpuid_level >= 0x00000001) {
935
u32 junk, tfms, cap0, misc;
936
937
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
938
c->x86 = x86_family(tfms);
939
c->x86_model = x86_model(tfms);
940
c->x86_stepping = x86_stepping(tfms);
941
942
if (cap0 & (1<<19)) {
943
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
944
c->x86_cache_alignment = c->x86_clflush_size;
945
}
946
}
947
}
948
949
static void apply_forced_caps(struct cpuinfo_x86 *c)
950
{
951
int i;
952
953
for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
954
c->x86_capability[i] &= ~cpu_caps_cleared[i];
955
c->x86_capability[i] |= cpu_caps_set[i];
956
}
957
}
958
959
static void init_speculation_control(struct cpuinfo_x86 *c)
960
{
961
/*
962
* The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
963
* and they also have a different bit for STIBP support. Also,
964
* a hypervisor might have set the individual AMD bits even on
965
* Intel CPUs, for finer-grained selection of what's available.
966
*/
967
if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
968
set_cpu_cap(c, X86_FEATURE_IBRS);
969
set_cpu_cap(c, X86_FEATURE_IBPB);
970
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
971
}
972
973
if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
974
set_cpu_cap(c, X86_FEATURE_STIBP);
975
976
if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) ||
977
cpu_has(c, X86_FEATURE_VIRT_SSBD))
978
set_cpu_cap(c, X86_FEATURE_SSBD);
979
980
if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
981
set_cpu_cap(c, X86_FEATURE_IBRS);
982
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
983
}
984
985
if (cpu_has(c, X86_FEATURE_AMD_IBPB))
986
set_cpu_cap(c, X86_FEATURE_IBPB);
987
988
if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
989
set_cpu_cap(c, X86_FEATURE_STIBP);
990
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
991
}
992
993
if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
994
set_cpu_cap(c, X86_FEATURE_SSBD);
995
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
996
clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
997
}
998
}
999
1000
void get_cpu_cap(struct cpuinfo_x86 *c)
1001
{
1002
u32 eax, ebx, ecx, edx;
1003
1004
/* Intel-defined flags: level 0x00000001 */
1005
if (c->cpuid_level >= 0x00000001) {
1006
cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
1007
1008
c->x86_capability[CPUID_1_ECX] = ecx;
1009
c->x86_capability[CPUID_1_EDX] = edx;
1010
}
1011
1012
/* Thermal and Power Management Leaf: level 0x00000006 (eax) */
1013
if (c->cpuid_level >= 0x00000006)
1014
c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
1015
1016
/* Additional Intel-defined flags: level 0x00000007 */
1017
if (c->cpuid_level >= 0x00000007) {
1018
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
1019
c->x86_capability[CPUID_7_0_EBX] = ebx;
1020
c->x86_capability[CPUID_7_ECX] = ecx;
1021
c->x86_capability[CPUID_7_EDX] = edx;
1022
1023
/* Check valid sub-leaf index before accessing it */
1024
if (eax >= 1) {
1025
cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
1026
c->x86_capability[CPUID_7_1_EAX] = eax;
1027
}
1028
}
1029
1030
/* Extended state features: level 0x0000000d */
1031
if (c->cpuid_level >= 0x0000000d) {
1032
cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx);
1033
1034
c->x86_capability[CPUID_D_1_EAX] = eax;
1035
}
1036
1037
/*
1038
* Check if extended CPUID leaves are implemented: Max extended
1039
* CPUID leaf must be in the 0x80000001-0x8000ffff range.
1040
*/
1041
eax = cpuid_eax(0x80000000);
1042
c->extended_cpuid_level = ((eax & 0xffff0000) == 0x80000000) ? eax : 0;
1043
1044
if (c->extended_cpuid_level >= 0x80000001) {
1045
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
1046
1047
c->x86_capability[CPUID_8000_0001_ECX] = ecx;
1048
c->x86_capability[CPUID_8000_0001_EDX] = edx;
1049
}
1050
1051
if (c->extended_cpuid_level >= 0x80000007)
1052
c->x86_power = cpuid_edx(0x80000007);
1053
1054
if (c->extended_cpuid_level >= 0x80000008) {
1055
cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
1056
c->x86_capability[CPUID_8000_0008_EBX] = ebx;
1057
}
1058
1059
if (c->extended_cpuid_level >= 0x8000000a)
1060
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
1061
1062
if (c->extended_cpuid_level >= 0x8000001f)
1063
c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f);
1064
1065
if (c->extended_cpuid_level >= 0x80000021)
1066
c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(0x80000021);
1067
1068
init_scattered_cpuid_features(c);
1069
init_speculation_control(c);
1070
1071
/*
1072
* Clear/Set all flags overridden by options, after probe.
1073
* This needs to happen each time we re-probe, which may happen
1074
* several times during CPU initialization.
1075
*/
1076
apply_forced_caps(c);
1077
}
1078
1079
void get_cpu_address_sizes(struct cpuinfo_x86 *c)
1080
{
1081
u32 eax, ebx, ecx, edx;
1082
1083
if (!cpu_has(c, X86_FEATURE_CPUID) ||
1084
(c->extended_cpuid_level < 0x80000008)) {
1085
if (IS_ENABLED(CONFIG_X86_64)) {
1086
c->x86_clflush_size = 64;
1087
c->x86_phys_bits = 36;
1088
c->x86_virt_bits = 48;
1089
} else {
1090
c->x86_clflush_size = 32;
1091
c->x86_virt_bits = 32;
1092
c->x86_phys_bits = 32;
1093
1094
if (cpu_has(c, X86_FEATURE_PAE) ||
1095
cpu_has(c, X86_FEATURE_PSE36))
1096
c->x86_phys_bits = 36;
1097
}
1098
} else {
1099
cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
1100
1101
c->x86_virt_bits = (eax >> 8) & 0xff;
1102
c->x86_phys_bits = eax & 0xff;
1103
1104
/* Provide a sane default if not enumerated: */
1105
if (!c->x86_clflush_size)
1106
c->x86_clflush_size = 32;
1107
}
1108
1109
c->x86_cache_bits = c->x86_phys_bits;
1110
c->x86_cache_alignment = c->x86_clflush_size;
1111
}
1112
1113
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
1114
{
1115
int i;
1116
1117
/*
1118
* First of all, decide if this is a 486 or higher
1119
* It's a 486 if we can modify the AC flag
1120
*/
1121
if (flag_is_changeable_p(X86_EFLAGS_AC))
1122
c->x86 = 4;
1123
else
1124
c->x86 = 3;
1125
1126
for (i = 0; i < X86_VENDOR_NUM; i++)
1127
if (cpu_devs[i] && cpu_devs[i]->c_identify) {
1128
c->x86_vendor_id[0] = 0;
1129
cpu_devs[i]->c_identify(c);
1130
if (c->x86_vendor_id[0]) {
1131
get_cpu_vendor(c);
1132
break;
1133
}
1134
}
1135
}
1136
1137
#define NO_SPECULATION BIT(0)
1138
#define NO_MELTDOWN BIT(1)
1139
#define NO_SSB BIT(2)
1140
#define NO_L1TF BIT(3)
1141
#define NO_MDS BIT(4)
1142
#define MSBDS_ONLY BIT(5)
1143
#define NO_SWAPGS BIT(6)
1144
#define NO_ITLB_MULTIHIT BIT(7)
1145
#define NO_SPECTRE_V2 BIT(8)
1146
#define NO_MMIO BIT(9)
1147
#define NO_EIBRS_PBRSB BIT(10)
1148
#define NO_BHI BIT(11)
1149
1150
#define VULNWL(vendor, family, model, whitelist) \
1151
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
1152
1153
#define VULNWL_INTEL(vfm, whitelist) \
1154
X86_MATCH_VFM(vfm, whitelist)
1155
1156
#define VULNWL_AMD(family, whitelist) \
1157
VULNWL(AMD, family, X86_MODEL_ANY, whitelist)
1158
1159
#define VULNWL_HYGON(family, whitelist) \
1160
VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
1161
1162
static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
1163
VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION),
1164
VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION),
1165
VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION),
1166
VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
1167
VULNWL(VORTEX, 5, X86_MODEL_ANY, NO_SPECULATION),
1168
VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION),
1169
1170
/* Intel Family 6 */
1171
VULNWL_INTEL(INTEL_TIGERLAKE, NO_MMIO),
1172
VULNWL_INTEL(INTEL_TIGERLAKE_L, NO_MMIO),
1173
VULNWL_INTEL(INTEL_ALDERLAKE, NO_MMIO),
1174
VULNWL_INTEL(INTEL_ALDERLAKE_L, NO_MMIO),
1175
1176
VULNWL_INTEL(INTEL_ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
1177
VULNWL_INTEL(INTEL_ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
1178
VULNWL_INTEL(INTEL_ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
1179
VULNWL_INTEL(INTEL_ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
1180
VULNWL_INTEL(INTEL_ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
1181
1182
VULNWL_INTEL(INTEL_ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1183
VULNWL_INTEL(INTEL_ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1184
VULNWL_INTEL(INTEL_ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1185
VULNWL_INTEL(INTEL_ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1186
VULNWL_INTEL(INTEL_XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1187
VULNWL_INTEL(INTEL_XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1188
1189
VULNWL_INTEL(INTEL_CORE_YONAH, NO_SSB),
1190
1191
VULNWL_INTEL(INTEL_ATOM_SILVERMONT_MID2,NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY),
1192
VULNWL_INTEL(INTEL_ATOM_AIRMONT_NP, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
1193
1194
VULNWL_INTEL(INTEL_ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1195
VULNWL_INTEL(INTEL_ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1196
VULNWL_INTEL(INTEL_ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
1197
1198
/*
1199
* Technically, swapgs isn't serializing on AMD (despite it previously
1200
* being documented as such in the APM). But according to AMD, %gs is
1201
* updated non-speculatively, and the issuing of %gs-relative memory
1202
* operands will be blocked until the %gs update completes, which is
1203
* good enough for our purposes.
1204
*/
1205
1206
VULNWL_INTEL(INTEL_ATOM_TREMONT, NO_EIBRS_PBRSB),
1207
VULNWL_INTEL(INTEL_ATOM_TREMONT_L, NO_EIBRS_PBRSB),
1208
VULNWL_INTEL(INTEL_ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
1209
1210
/* AMD Family 0xf - 0x12 */
1211
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
1212
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
1213
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
1214
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
1215
1216
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
1217
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
1218
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
1219
1220
/* Zhaoxin Family 7 */
1221
VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
1222
VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
1223
{}
1224
};
1225
1226
#define VULNBL(vendor, family, model, blacklist) \
1227
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
1228
1229
#define VULNBL_INTEL_STEPS(vfm, max_stepping, issues) \
1230
X86_MATCH_VFM_STEPS(vfm, X86_STEP_MIN, max_stepping, issues)
1231
1232
#define VULNBL_INTEL_TYPE(vfm, cpu_type, issues) \
1233
X86_MATCH_VFM_CPU_TYPE(vfm, INTEL_CPU_TYPE_##cpu_type, issues)
1234
1235
#define VULNBL_AMD(family, blacklist) \
1236
VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
1237
1238
#define VULNBL_HYGON(family, blacklist) \
1239
VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
1240
1241
#define SRBDS BIT(0)
1242
/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
1243
#define MMIO BIT(1)
1244
/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
1245
#define MMIO_SBDS BIT(2)
1246
/* CPU is affected by RETbleed, speculating where you would not expect it */
1247
#define RETBLEED BIT(3)
1248
/* CPU is affected by SMT (cross-thread) return predictions */
1249
#define SMT_RSB BIT(4)
1250
/* CPU is affected by SRSO */
1251
#define SRSO BIT(5)
1252
/* CPU is affected by GDS */
1253
#define GDS BIT(6)
1254
/* CPU is affected by Register File Data Sampling */
1255
#define RFDS BIT(7)
1256
/* CPU is affected by Indirect Target Selection */
1257
#define ITS BIT(8)
1258
/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */
1259
#define ITS_NATIVE_ONLY BIT(9)
1260
/* CPU is affected by Transient Scheduler Attacks */
1261
#define TSA BIT(10)
1262
/* CPU is affected by VMSCAPE */
1263
#define VMSCAPE BIT(11)
1264
1265
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
1266
VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
1267
VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE, X86_STEP_MAX, VMSCAPE),
1268
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
1269
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS | VMSCAPE),
1270
VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
1271
VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS | VMSCAPE),
1272
VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
1273
VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
1274
VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO | VMSCAPE),
1275
VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
1276
VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
1277
VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
1278
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE),
1279
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE),
1280
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
1281
VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
1282
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
1283
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
1284
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
1285
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
1286
VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED | VMSCAPE),
1287
VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
1288
VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
1289
VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
1290
VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
1291
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS | VMSCAPE),
1292
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
1293
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
1294
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
1295
VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED),
1296
VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
1297
VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS | VMSCAPE),
1298
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, VMSCAPE),
1299
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS | VMSCAPE),
1300
VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS | VMSCAPE),
1301
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, VMSCAPE),
1302
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS | VMSCAPE),
1303
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS | VMSCAPE),
1304
VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L, X86_STEP_MAX, VMSCAPE),
1305
VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H, X86_STEP_MAX, VMSCAPE),
1306
VULNBL_INTEL_STEPS(INTEL_ARROWLAKE, X86_STEP_MAX, VMSCAPE),
1307
VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U, X86_STEP_MAX, VMSCAPE),
1308
VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M, X86_STEP_MAX, VMSCAPE),
1309
VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X, X86_STEP_MAX, VMSCAPE),
1310
VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X, X86_STEP_MAX, VMSCAPE),
1311
VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X, X86_STEP_MAX, VMSCAPE),
1312
VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS | VMSCAPE),
1313
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
1314
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS),
1315
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
1316
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS),
1317
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS),
1318
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS),
1319
VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X, X86_STEP_MAX, VMSCAPE),
1320
1321
VULNBL_AMD(0x15, RETBLEED),
1322
VULNBL_AMD(0x16, RETBLEED),
1323
VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
1324
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
1325
VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE),
1326
VULNBL_AMD(0x1a, SRSO | VMSCAPE),
1327
{}
1328
};
1329
1330
static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which)
1331
{
1332
const struct x86_cpu_id *m = x86_match_cpu(table);
1333
1334
return m && !!(m->driver_data & which);
1335
}
1336
1337
u64 x86_read_arch_cap_msr(void)
1338
{
1339
u64 x86_arch_cap_msr = 0;
1340
1341
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1342
rdmsrq(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
1343
1344
return x86_arch_cap_msr;
1345
}
1346
1347
static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
1348
{
1349
return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
1350
x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
1351
x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
1352
}
1353
1354
static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
1355
{
1356
/* The "immunity" bit trumps everything else: */
1357
if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
1358
return false;
1359
1360
/*
1361
* VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to
1362
* indicate that mitigation is needed because guest is running on a
1363
* vulnerable hardware or may migrate to such hardware:
1364
*/
1365
if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
1366
return true;
1367
1368
/* Only consult the blacklist when there is no enumeration: */
1369
return cpu_matches(cpu_vuln_blacklist, RFDS);
1370
}
1371
1372
static bool __init vulnerable_to_its(u64 x86_arch_cap_msr)
1373
{
1374
/* The "immunity" bit trumps everything else: */
1375
if (x86_arch_cap_msr & ARCH_CAP_ITS_NO)
1376
return false;
1377
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1378
return false;
1379
1380
/* None of the affected CPUs have BHI_CTRL */
1381
if (boot_cpu_has(X86_FEATURE_BHI_CTRL))
1382
return false;
1383
1384
/*
1385
* If a VMM did not expose ITS_NO, assume that a guest could
1386
* be running on a vulnerable hardware or may migrate to such
1387
* hardware.
1388
*/
1389
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1390
return true;
1391
1392
if (cpu_matches(cpu_vuln_blacklist, ITS))
1393
return true;
1394
1395
return false;
1396
}
1397
1398
static struct x86_cpu_id cpu_latest_microcode[] = {
1399
#include "microcode/intel-ucode-defs.h"
1400
{}
1401
};
1402
1403
static bool __init cpu_has_old_microcode(void)
1404
{
1405
const struct x86_cpu_id *m = x86_match_cpu(cpu_latest_microcode);
1406
1407
/* Give unknown CPUs a pass: */
1408
if (!m) {
1409
/* Intel CPUs should be in the list. Warn if not: */
1410
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1411
pr_info("x86/CPU: Model not found in latest microcode list\n");
1412
return false;
1413
}
1414
1415
/*
1416
* Hosts usually lie to guests with a super high microcode
1417
* version. Just ignore what hosts tell guests:
1418
*/
1419
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1420
return false;
1421
1422
/* Consider all debug microcode to be old: */
1423
if (boot_cpu_data.microcode & BIT(31))
1424
return true;
1425
1426
/* Give new microcode a pass: */
1427
if (boot_cpu_data.microcode >= m->driver_data)
1428
return false;
1429
1430
/* Uh oh, too old: */
1431
return true;
1432
}
1433
1434
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
1435
{
1436
u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
1437
1438
if (cpu_has_old_microcode()) {
1439
pr_warn("x86/CPU: Running old microcode\n");
1440
setup_force_cpu_bug(X86_BUG_OLD_MICROCODE);
1441
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
1442
}
1443
1444
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
1445
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
1446
!(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
1447
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
1448
1449
if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
1450
return;
1451
1452
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1453
1454
if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2)) {
1455
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1456
setup_force_cpu_bug(X86_BUG_SPECTRE_V2_USER);
1457
}
1458
1459
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
1460
!(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
1461
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
1462
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
1463
1464
/*
1465
* AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature
1466
* flag and protect from vendor-specific bugs via the whitelist.
1467
*
1468
* Don't use AutoIBRS when SNP is enabled because it degrades host
1469
* userspace indirect branch performance.
1470
*/
1471
if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) ||
1472
(cpu_has(c, X86_FEATURE_AUTOIBRS) &&
1473
!cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
1474
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
1475
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
1476
!(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
1477
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
1478
}
1479
1480
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
1481
!(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
1482
setup_force_cpu_bug(X86_BUG_MDS);
1483
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
1484
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
1485
}
1486
1487
if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS))
1488
setup_force_cpu_bug(X86_BUG_SWAPGS);
1489
1490
/*
1491
* When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
1492
* - TSX is supported or
1493
* - TSX_CTRL is present
1494
*
1495
* TSX_CTRL check is needed for cases when TSX could be disabled before
1496
* the kernel boot e.g. kexec.
1497
* TSX_CTRL check alone is not sufficient for cases when the microcode
1498
* update is not present or running as guest that don't get TSX_CTRL.
1499
*/
1500
if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
1501
(cpu_has(c, X86_FEATURE_RTM) ||
1502
(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
1503
setup_force_cpu_bug(X86_BUG_TAA);
1504
1505
/*
1506
* SRBDS affects CPUs which support RDRAND or RDSEED and are listed
1507
* in the vulnerability blacklist.
1508
*
1509
* Some of the implications and mitigation of Shared Buffers Data
1510
* Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as
1511
* SRBDS.
1512
*/
1513
if ((cpu_has(c, X86_FEATURE_RDRAND) ||
1514
cpu_has(c, X86_FEATURE_RDSEED)) &&
1515
cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS))
1516
setup_force_cpu_bug(X86_BUG_SRBDS);
1517
1518
/*
1519
* Processor MMIO Stale Data bug enumeration
1520
*
1521
* Affected CPU list is generally enough to enumerate the vulnerability,
1522
* but for virtualization case check for ARCH_CAP MSR bits also, VMM may
1523
* not want the guest to enumerate the bug.
1524
*/
1525
if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
1526
if (cpu_matches(cpu_vuln_blacklist, MMIO))
1527
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
1528
}
1529
1530
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
1531
if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
1532
setup_force_cpu_bug(X86_BUG_RETBLEED);
1533
}
1534
1535
if (cpu_matches(cpu_vuln_blacklist, SMT_RSB))
1536
setup_force_cpu_bug(X86_BUG_SMT_RSB);
1537
1538
if (!cpu_has(c, X86_FEATURE_SRSO_NO)) {
1539
if (cpu_matches(cpu_vuln_blacklist, SRSO))
1540
setup_force_cpu_bug(X86_BUG_SRSO);
1541
}
1542
1543
/*
1544
* Check if CPU is vulnerable to GDS. If running in a virtual machine on
1545
* an affected processor, the VMM may have disabled the use of GATHER by
1546
* disabling AVX2. The only way to do this in HW is to clear XCR0[2],
1547
* which means that AVX will be disabled.
1548
*/
1549
if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
1550
boot_cpu_has(X86_FEATURE_AVX))
1551
setup_force_cpu_bug(X86_BUG_GDS);
1552
1553
if (vulnerable_to_rfds(x86_arch_cap_msr))
1554
setup_force_cpu_bug(X86_BUG_RFDS);
1555
1556
/*
1557
* Intel parts with eIBRS are vulnerable to BHI attacks. Parts with
1558
* BHI_NO still need to use the BHI mitigation to prevent Intra-mode
1559
* attacks. When virtualized, eIBRS could be hidden, assume vulnerable.
1560
*/
1561
if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
1562
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
1563
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
1564
setup_force_cpu_bug(X86_BUG_BHI);
1565
1566
if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET))
1567
setup_force_cpu_bug(X86_BUG_IBPB_NO_RET);
1568
1569
if (vulnerable_to_its(x86_arch_cap_msr)) {
1570
setup_force_cpu_bug(X86_BUG_ITS);
1571
if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY))
1572
setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY);
1573
}
1574
1575
if (c->x86_vendor == X86_VENDOR_AMD) {
1576
if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) ||
1577
!cpu_has(c, X86_FEATURE_TSA_L1_NO)) {
1578
if (cpu_matches(cpu_vuln_blacklist, TSA) ||
1579
/* Enable bug on Zen guests to allow for live migration. */
1580
(cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN)))
1581
setup_force_cpu_bug(X86_BUG_TSA);
1582
}
1583
}
1584
1585
/*
1586
* Set the bug only on bare-metal. A nested hypervisor should already be
1587
* deploying IBPB to isolate itself from nested guests.
1588
*/
1589
if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) &&
1590
!boot_cpu_has(X86_FEATURE_HYPERVISOR))
1591
setup_force_cpu_bug(X86_BUG_VMSCAPE);
1592
1593
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
1594
return;
1595
1596
/* Rogue Data Cache Load? No! */
1597
if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
1598
return;
1599
1600
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
1601
1602
if (cpu_matches(cpu_vuln_whitelist, NO_L1TF))
1603
return;
1604
1605
setup_force_cpu_bug(X86_BUG_L1TF);
1606
}
1607
1608
/*
1609
* The NOPL instruction is supposed to exist on all CPUs of family >= 6;
1610
* unfortunately, that's not true in practice because of early VIA
1611
* chips and (more importantly) broken virtualizers that are not easy
1612
* to detect. In the latter case it doesn't even *fail* reliably, so
1613
* probing for it doesn't even work. Disable it completely on 32-bit
1614
* unless we can find a reliable way to detect all the broken cases.
1615
* Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
1616
*/
1617
static void detect_nopl(void)
1618
{
1619
#ifdef CONFIG_X86_32
1620
setup_clear_cpu_cap(X86_FEATURE_NOPL);
1621
#else
1622
setup_force_cpu_cap(X86_FEATURE_NOPL);
1623
#endif
1624
}
1625
1626
static inline bool parse_set_clear_cpuid(char *arg, bool set)
1627
{
1628
char *opt;
1629
int taint = 0;
1630
1631
while (arg) {
1632
bool found __maybe_unused = false;
1633
unsigned int bit;
1634
1635
opt = strsep(&arg, ",");
1636
1637
/*
1638
* Handle naked numbers first for feature flags which don't
1639
* have names. It doesn't make sense for a bug not to have a
1640
* name so don't handle bug flags here.
1641
*/
1642
if (!kstrtouint(opt, 10, &bit)) {
1643
if (bit < NCAPINTS * 32) {
1644
1645
if (set) {
1646
pr_warn("setcpuid: force-enabling CPU feature flag:");
1647
setup_force_cpu_cap(bit);
1648
} else {
1649
pr_warn("clearcpuid: force-disabling CPU feature flag:");
1650
setup_clear_cpu_cap(bit);
1651
}
1652
/* empty-string, i.e., ""-defined feature flags */
1653
if (!x86_cap_flags[bit])
1654
pr_cont(" %d:%d\n", bit >> 5, bit & 31);
1655
else
1656
pr_cont(" %s\n", x86_cap_flags[bit]);
1657
1658
taint++;
1659
}
1660
/*
1661
* The assumption is that there are no feature names with only
1662
* numbers in the name thus go to the next argument.
1663
*/
1664
continue;
1665
}
1666
1667
for (bit = 0; bit < 32 * (NCAPINTS + NBUGINTS); bit++) {
1668
const char *flag;
1669
const char *kind;
1670
1671
if (bit < 32 * NCAPINTS) {
1672
flag = x86_cap_flags[bit];
1673
kind = "feature";
1674
} else {
1675
kind = "bug";
1676
flag = x86_bug_flags[bit - (32 * NCAPINTS)];
1677
}
1678
1679
if (!flag)
1680
continue;
1681
1682
if (strcmp(flag, opt))
1683
continue;
1684
1685
if (set) {
1686
pr_warn("setcpuid: force-enabling CPU %s flag: %s\n",
1687
kind, flag);
1688
setup_force_cpu_cap(bit);
1689
} else {
1690
pr_warn("clearcpuid: force-disabling CPU %s flag: %s\n",
1691
kind, flag);
1692
setup_clear_cpu_cap(bit);
1693
}
1694
taint++;
1695
found = true;
1696
break;
1697
}
1698
1699
if (!found)
1700
pr_warn("%s: unknown CPU flag: %s", set ? "setcpuid" : "clearcpuid", opt);
1701
}
1702
1703
return taint;
1704
}
1705
1706
1707
/*
1708
* We parse cpu parameters early because fpu__init_system() is executed
1709
* before parse_early_param().
1710
*/
1711
static void __init cpu_parse_early_param(void)
1712
{
1713
bool cpuid_taint = false;
1714
char arg[128];
1715
int arglen;
1716
1717
#ifdef CONFIG_X86_32
1718
if (cmdline_find_option_bool(boot_command_line, "no387"))
1719
#ifdef CONFIG_MATH_EMULATION
1720
setup_clear_cpu_cap(X86_FEATURE_FPU);
1721
#else
1722
pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
1723
#endif
1724
1725
if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
1726
setup_clear_cpu_cap(X86_FEATURE_FXSR);
1727
#endif
1728
1729
if (cmdline_find_option_bool(boot_command_line, "noxsave"))
1730
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
1731
1732
if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
1733
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
1734
1735
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
1736
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
1737
1738
if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
1739
setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
1740
1741
/* Minimize the gap between FRED is available and available but disabled. */
1742
arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg));
1743
if (arglen != 2 || strncmp(arg, "on", 2))
1744
setup_clear_cpu_cap(X86_FEATURE_FRED);
1745
1746
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
1747
if (arglen > 0)
1748
cpuid_taint |= parse_set_clear_cpuid(arg, false);
1749
1750
arglen = cmdline_find_option(boot_command_line, "setcpuid", arg, sizeof(arg));
1751
if (arglen > 0)
1752
cpuid_taint |= parse_set_clear_cpuid(arg, true);
1753
1754
if (cpuid_taint) {
1755
pr_warn("!!! setcpuid=/clearcpuid= in use, this is for TESTING ONLY, may break things horribly. Tainting kernel.\n");
1756
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
1757
}
1758
}
1759
1760
/*
1761
* Do minimum CPU detection early.
1762
* Fields really needed: vendor, cpuid_level, family, model, mask,
1763
* cache alignment.
1764
* The others are not touched to avoid unwanted side effects.
1765
*
1766
* WARNING: this function is only called on the boot CPU. Don't add code
1767
* here that is supposed to run on all CPUs.
1768
*/
1769
static void __init early_identify_cpu(struct cpuinfo_x86 *c)
1770
{
1771
memset(&c->x86_capability, 0, sizeof(c->x86_capability));
1772
c->extended_cpuid_level = 0;
1773
1774
if (!cpuid_feature())
1775
identify_cpu_without_cpuid(c);
1776
1777
/* cyrix could have cpuid enabled via c_identify()*/
1778
if (cpuid_feature()) {
1779
cpu_detect(c);
1780
get_cpu_vendor(c);
1781
intel_unlock_cpuid_leafs(c);
1782
get_cpu_cap(c);
1783
setup_force_cpu_cap(X86_FEATURE_CPUID);
1784
get_cpu_address_sizes(c);
1785
cpu_parse_early_param();
1786
1787
cpu_init_topology(c);
1788
1789
if (this_cpu->c_early_init)
1790
this_cpu->c_early_init(c);
1791
1792
c->cpu_index = 0;
1793
filter_cpuid_features(c, false);
1794
check_cpufeature_deps(c);
1795
1796
if (this_cpu->c_bsp_init)
1797
this_cpu->c_bsp_init(c);
1798
} else {
1799
setup_clear_cpu_cap(X86_FEATURE_CPUID);
1800
get_cpu_address_sizes(c);
1801
cpu_init_topology(c);
1802
}
1803
1804
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
1805
1806
cpu_set_bug_bits(c);
1807
1808
sld_setup(c);
1809
1810
#ifdef CONFIG_X86_32
1811
/*
1812
* Regardless of whether PCID is enumerated, the SDM says
1813
* that it can't be enabled in 32-bit mode.
1814
*/
1815
setup_clear_cpu_cap(X86_FEATURE_PCID);
1816
#endif
1817
1818
/*
1819
* Later in the boot process pgtable_l5_enabled() relies on
1820
* cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not
1821
* enabled by this point we need to clear the feature bit to avoid
1822
* false-positives at the later stage.
1823
*
1824
* pgtable_l5_enabled() can be false here for several reasons:
1825
* - 5-level paging is disabled compile-time;
1826
* - it's 32-bit kernel;
1827
* - machine doesn't support 5-level paging;
1828
* - user specified 'no5lvl' in kernel command line.
1829
*/
1830
if (!pgtable_l5_enabled())
1831
setup_clear_cpu_cap(X86_FEATURE_LA57);
1832
1833
detect_nopl();
1834
mca_bsp_init(c);
1835
}
1836
1837
void __init init_cpu_devs(void)
1838
{
1839
const struct cpu_dev *const *cdev;
1840
int count = 0;
1841
1842
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
1843
const struct cpu_dev *cpudev = *cdev;
1844
1845
if (count >= X86_VENDOR_NUM)
1846
break;
1847
cpu_devs[count] = cpudev;
1848
count++;
1849
}
1850
}
1851
1852
void __init early_cpu_init(void)
1853
{
1854
#ifdef CONFIG_PROCESSOR_SELECT
1855
unsigned int i, j;
1856
1857
pr_info("KERNEL supported cpus:\n");
1858
#endif
1859
1860
init_cpu_devs();
1861
1862
#ifdef CONFIG_PROCESSOR_SELECT
1863
for (i = 0; i < X86_VENDOR_NUM && cpu_devs[i]; i++) {
1864
for (j = 0; j < 2; j++) {
1865
if (!cpu_devs[i]->c_ident[j])
1866
continue;
1867
pr_info(" %s %s\n", cpu_devs[i]->c_vendor,
1868
cpu_devs[i]->c_ident[j]);
1869
}
1870
}
1871
#endif
1872
1873
early_identify_cpu(&boot_cpu_data);
1874
}
1875
1876
static bool detect_null_seg_behavior(void)
1877
{
1878
/*
1879
* Empirically, writing zero to a segment selector on AMD does
1880
* not clear the base, whereas writing zero to a segment
1881
* selector on Intel does clear the base. Intel's behavior
1882
* allows slightly faster context switches in the common case
1883
* where GS is unused by the prev and next threads.
1884
*
1885
* Since neither vendor documents this anywhere that I can see,
1886
* detect it directly instead of hard-coding the choice by
1887
* vendor.
1888
*
1889
* I've designated AMD's behavior as the "bug" because it's
1890
* counterintuitive and less friendly.
1891
*/
1892
1893
unsigned long old_base, tmp;
1894
rdmsrq(MSR_FS_BASE, old_base);
1895
wrmsrq(MSR_FS_BASE, 1);
1896
loadsegment(fs, 0);
1897
rdmsrq(MSR_FS_BASE, tmp);
1898
wrmsrq(MSR_FS_BASE, old_base);
1899
return tmp == 0;
1900
}
1901
1902
void check_null_seg_clears_base(struct cpuinfo_x86 *c)
1903
{
1904
/* BUG_NULL_SEG is only relevant with 64bit userspace */
1905
if (!IS_ENABLED(CONFIG_X86_64))
1906
return;
1907
1908
if (cpu_has(c, X86_FEATURE_NULL_SEL_CLR_BASE))
1909
return;
1910
1911
/*
1912
* CPUID bit above wasn't set. If this kernel is still running
1913
* as a HV guest, then the HV has decided not to advertize
1914
* that CPUID bit for whatever reason. For example, one
1915
* member of the migration pool might be vulnerable. Which
1916
* means, the bug is present: set the BUG flag and return.
1917
*/
1918
if (cpu_has(c, X86_FEATURE_HYPERVISOR)) {
1919
set_cpu_bug(c, X86_BUG_NULL_SEG);
1920
return;
1921
}
1922
1923
/*
1924
* Zen2 CPUs also have this behaviour, but no CPUID bit.
1925
* 0x18 is the respective family for Hygon.
1926
*/
1927
if ((c->x86 == 0x17 || c->x86 == 0x18) &&
1928
detect_null_seg_behavior())
1929
return;
1930
1931
/* All the remaining ones are affected */
1932
set_cpu_bug(c, X86_BUG_NULL_SEG);
1933
}
1934
1935
static void generic_identify(struct cpuinfo_x86 *c)
1936
{
1937
c->extended_cpuid_level = 0;
1938
1939
if (!cpuid_feature())
1940
identify_cpu_without_cpuid(c);
1941
1942
/* cyrix could have cpuid enabled via c_identify()*/
1943
if (!cpuid_feature())
1944
return;
1945
1946
cpu_detect(c);
1947
1948
get_cpu_vendor(c);
1949
intel_unlock_cpuid_leafs(c);
1950
get_cpu_cap(c);
1951
1952
get_cpu_address_sizes(c);
1953
1954
get_model_name(c); /* Default name */
1955
1956
/*
1957
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
1958
* systems that run Linux at CPL > 0 may or may not have the
1959
* issue, but, even if they have the issue, there's absolutely
1960
* nothing we can do about it because we can't use the real IRET
1961
* instruction.
1962
*
1963
* NB: For the time being, only 32-bit kernels support
1964
* X86_BUG_ESPFIX as such. 64-bit kernels directly choose
1965
* whether to apply espfix using paravirt hooks. If any
1966
* non-paravirt system ever shows up that does *not* have the
1967
* ESPFIX issue, we can change this.
1968
*/
1969
#ifdef CONFIG_X86_32
1970
set_cpu_bug(c, X86_BUG_ESPFIX);
1971
#endif
1972
}
1973
1974
/*
1975
* This does the hard work of actually picking apart the CPU stuff...
1976
*/
1977
static void identify_cpu(struct cpuinfo_x86 *c)
1978
{
1979
int i;
1980
1981
c->loops_per_jiffy = loops_per_jiffy;
1982
c->x86_cache_size = 0;
1983
c->x86_vendor = X86_VENDOR_UNKNOWN;
1984
c->x86_model = c->x86_stepping = 0; /* So far unknown... */
1985
c->x86_vendor_id[0] = '\0'; /* Unset */
1986
c->x86_model_id[0] = '\0'; /* Unset */
1987
#ifdef CONFIG_X86_64
1988
c->x86_clflush_size = 64;
1989
c->x86_phys_bits = 36;
1990
c->x86_virt_bits = 48;
1991
#else
1992
c->cpuid_level = -1; /* CPUID not detected */
1993
c->x86_clflush_size = 32;
1994
c->x86_phys_bits = 32;
1995
c->x86_virt_bits = 32;
1996
#endif
1997
c->x86_cache_alignment = c->x86_clflush_size;
1998
memset(&c->x86_capability, 0, sizeof(c->x86_capability));
1999
#ifdef CONFIG_X86_VMX_FEATURE_NAMES
2000
memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
2001
#endif
2002
2003
generic_identify(c);
2004
2005
cpu_parse_topology(c);
2006
2007
if (this_cpu->c_identify)
2008
this_cpu->c_identify(c);
2009
2010
/* Clear/Set all flags overridden by options, after probe */
2011
apply_forced_caps(c);
2012
2013
/*
2014
* Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and
2015
* Hygon will clear it in ->c_init() below.
2016
*/
2017
set_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
2018
2019
/*
2020
* Vendor-specific initialization. In this section we
2021
* canonicalize the feature flags, meaning if there are
2022
* features a certain CPU supports which CPUID doesn't
2023
* tell us, CPUID claiming incorrect flags, or other bugs,
2024
* we handle them here.
2025
*
2026
* At the end of this section, c->x86_capability better
2027
* indicate the features this CPU genuinely supports!
2028
*/
2029
if (this_cpu->c_init)
2030
this_cpu->c_init(c);
2031
2032
bus_lock_init();
2033
2034
/* Disable the PN if appropriate */
2035
squash_the_stupid_serial_number(c);
2036
2037
setup_smep(c);
2038
setup_smap(c);
2039
setup_umip(c);
2040
setup_lass(c);
2041
2042
/* Enable FSGSBASE instructions if available. */
2043
if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
2044
cr4_set_bits(X86_CR4_FSGSBASE);
2045
elf_hwcap2 |= HWCAP2_FSGSBASE;
2046
}
2047
2048
/*
2049
* The vendor-specific functions might have changed features.
2050
* Now we do "generic changes."
2051
*/
2052
2053
/* Filter out anything that depends on CPUID levels we don't have */
2054
filter_cpuid_features(c, true);
2055
2056
/* Check for unmet dependencies based on the CPUID dependency table */
2057
check_cpufeature_deps(c);
2058
2059
/* If the model name is still unset, do table lookup. */
2060
if (!c->x86_model_id[0]) {
2061
const char *p;
2062
p = table_lookup_model(c);
2063
if (p)
2064
strcpy(c->x86_model_id, p);
2065
else
2066
/* Last resort... */
2067
sprintf(c->x86_model_id, "%02x/%02x",
2068
c->x86, c->x86_model);
2069
}
2070
2071
x86_init_rdrand(c);
2072
setup_pku(c);
2073
setup_cet(c);
2074
2075
/*
2076
* Clear/Set all flags overridden by options, need do it
2077
* before following smp all cpus cap AND.
2078
*/
2079
apply_forced_caps(c);
2080
2081
/*
2082
* On SMP, boot_cpu_data holds the common feature set between
2083
* all CPUs; so make sure that we indicate which features are
2084
* common between the CPUs. The first time this routine gets
2085
* executed, c == &boot_cpu_data.
2086
*/
2087
if (c != &boot_cpu_data) {
2088
/* AND the already accumulated flags with these */
2089
for (i = 0; i < NCAPINTS; i++)
2090
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
2091
2092
/* OR, i.e. replicate the bug flags */
2093
for (i = NCAPINTS; i < NCAPINTS + NBUGINTS; i++)
2094
c->x86_capability[i] |= boot_cpu_data.x86_capability[i];
2095
}
2096
2097
ppin_init(c);
2098
2099
/* Init Machine Check Exception if available. */
2100
mcheck_cpu_init(c);
2101
2102
numa_add_cpu(smp_processor_id());
2103
}
2104
2105
/*
2106
* Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
2107
* on 32-bit kernels:
2108
*/
2109
#ifdef CONFIG_X86_32
2110
void enable_sep_cpu(void)
2111
{
2112
struct tss_struct *tss;
2113
int cpu;
2114
2115
if (!boot_cpu_has(X86_FEATURE_SEP))
2116
return;
2117
2118
cpu = get_cpu();
2119
tss = &per_cpu(cpu_tss_rw, cpu);
2120
2121
/*
2122
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
2123
* see the big comment in struct x86_hw_tss's definition.
2124
*/
2125
2126
tss->x86_tss.ss1 = __KERNEL_CS;
2127
wrmsrq(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1);
2128
wrmsrq(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
2129
wrmsrq(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32);
2130
2131
put_cpu();
2132
}
2133
#endif
2134
2135
static __init void identify_boot_cpu(void)
2136
{
2137
identify_cpu(&boot_cpu_data);
2138
if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT))
2139
pr_info("CET detected: Indirect Branch Tracking enabled\n");
2140
#ifdef CONFIG_X86_32
2141
enable_sep_cpu();
2142
#endif
2143
cpu_detect_tlb(&boot_cpu_data);
2144
setup_cr_pinning();
2145
2146
tsx_init();
2147
tdx_init();
2148
lkgs_init();
2149
}
2150
2151
void identify_secondary_cpu(unsigned int cpu)
2152
{
2153
struct cpuinfo_x86 *c = &cpu_data(cpu);
2154
2155
/* Copy boot_cpu_data only on the first bringup */
2156
if (!c->initialized)
2157
*c = boot_cpu_data;
2158
c->cpu_index = cpu;
2159
2160
identify_cpu(c);
2161
#ifdef CONFIG_X86_32
2162
enable_sep_cpu();
2163
#endif
2164
x86_spec_ctrl_setup_ap();
2165
update_srbds_msr();
2166
if (boot_cpu_has_bug(X86_BUG_GDS))
2167
update_gds_msr();
2168
2169
tsx_ap_init();
2170
c->initialized = true;
2171
}
2172
2173
void print_cpu_info(struct cpuinfo_x86 *c)
2174
{
2175
const char *vendor = NULL;
2176
2177
if (c->x86_vendor < X86_VENDOR_NUM) {
2178
vendor = this_cpu->c_vendor;
2179
} else {
2180
if (c->cpuid_level >= 0)
2181
vendor = c->x86_vendor_id;
2182
}
2183
2184
if (vendor && !strstr(c->x86_model_id, vendor))
2185
pr_cont("%s ", vendor);
2186
2187
if (c->x86_model_id[0])
2188
pr_cont("%s", c->x86_model_id);
2189
else
2190
pr_cont("%d86", c->x86);
2191
2192
pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
2193
2194
if (c->x86_stepping || c->cpuid_level >= 0)
2195
pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
2196
else
2197
pr_cont(")\n");
2198
}
2199
2200
/*
2201
* clearcpuid= and setcpuid= were already parsed in cpu_parse_early_param().
2202
* These dummy functions prevent them from becoming an environment variable for
2203
* init.
2204
*/
2205
2206
static __init int setup_clearcpuid(char *arg)
2207
{
2208
return 1;
2209
}
2210
__setup("clearcpuid=", setup_clearcpuid);
2211
2212
static __init int setup_setcpuid(char *arg)
2213
{
2214
return 1;
2215
}
2216
__setup("setcpuid=", setup_setcpuid);
2217
2218
DEFINE_PER_CPU_CACHE_HOT(struct task_struct *, current_task) = &init_task;
2219
EXPORT_PER_CPU_SYMBOL(current_task);
2220
EXPORT_PER_CPU_SYMBOL(const_current_task);
2221
2222
DEFINE_PER_CPU_CACHE_HOT(int, __preempt_count) = INIT_PREEMPT_COUNT;
2223
EXPORT_PER_CPU_SYMBOL(__preempt_count);
2224
2225
DEFINE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
2226
2227
#ifdef CONFIG_X86_64
2228
/*
2229
* Note: Do not make this dependant on CONFIG_MITIGATION_CALL_DEPTH_TRACKING
2230
* so that this space is reserved in the hot cache section even when the
2231
* mitigation is disabled.
2232
*/
2233
DEFINE_PER_CPU_CACHE_HOT(u64, __x86_call_depth);
2234
EXPORT_PER_CPU_SYMBOL(__x86_call_depth);
2235
2236
static void wrmsrq_cstar(unsigned long val)
2237
{
2238
/*
2239
* Intel CPUs do not support 32-bit SYSCALL. Writing to MSR_CSTAR
2240
* is so far ignored by the CPU, but raises a #VE trap in a TDX
2241
* guest. Avoid the pointless write on all Intel CPUs.
2242
*/
2243
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
2244
wrmsrq(MSR_CSTAR, val);
2245
}
2246
2247
static inline void idt_syscall_init(void)
2248
{
2249
wrmsrq(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
2250
2251
if (ia32_enabled()) {
2252
wrmsrq_cstar((unsigned long)entry_SYSCALL_compat);
2253
/*
2254
* This only works on Intel CPUs.
2255
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
2256
* This does not cause SYSENTER to jump to the wrong location, because
2257
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
2258
*/
2259
wrmsrq_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
2260
wrmsrq_safe(MSR_IA32_SYSENTER_ESP,
2261
(unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
2262
wrmsrq_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
2263
} else {
2264
wrmsrq_cstar((unsigned long)entry_SYSCALL32_ignore);
2265
wrmsrq_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
2266
wrmsrq_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
2267
wrmsrq_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
2268
}
2269
2270
/*
2271
* Flags to clear on syscall; clear as much as possible
2272
* to minimize user space-kernel interference.
2273
*/
2274
wrmsrq(MSR_SYSCALL_MASK,
2275
X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
2276
X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_TF|
2277
X86_EFLAGS_IF|X86_EFLAGS_DF|X86_EFLAGS_OF|
2278
X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_RF|
2279
X86_EFLAGS_AC|X86_EFLAGS_ID);
2280
}
2281
2282
/* May not be marked __init: used by software suspend */
2283
void syscall_init(void)
2284
{
2285
/* The default user and kernel segments */
2286
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
2287
2288
/*
2289
* Except the IA32_STAR MSR, there is NO need to setup SYSCALL and
2290
* SYSENTER MSRs for FRED, because FRED uses the ring 3 FRED
2291
* entrypoint for SYSCALL and SYSENTER, and ERETU is the only legit
2292
* instruction to return to ring 3 (both sysexit and sysret cause
2293
* #UD when FRED is enabled).
2294
*/
2295
if (!cpu_feature_enabled(X86_FEATURE_FRED))
2296
idt_syscall_init();
2297
}
2298
#endif /* CONFIG_X86_64 */
2299
2300
#ifdef CONFIG_STACKPROTECTOR
2301
DEFINE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard);
2302
#ifndef CONFIG_SMP
2303
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
2304
#endif
2305
#endif
2306
2307
static void initialize_debug_regs(void)
2308
{
2309
/* Control register first -- to make sure everything is disabled. */
2310
set_debugreg(DR7_FIXED_1, 7);
2311
set_debugreg(DR6_RESERVED, 6);
2312
/* dr5 and dr4 don't exist */
2313
set_debugreg(0, 3);
2314
set_debugreg(0, 2);
2315
set_debugreg(0, 1);
2316
set_debugreg(0, 0);
2317
}
2318
2319
#ifdef CONFIG_KGDB
2320
/*
2321
* Restore debug regs if using kgdbwait and you have a kernel debugger
2322
* connection established.
2323
*/
2324
static void dbg_restore_debug_regs(void)
2325
{
2326
if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break))
2327
arch_kgdb_ops.correct_hw_break();
2328
}
2329
#else /* ! CONFIG_KGDB */
2330
#define dbg_restore_debug_regs()
2331
#endif /* ! CONFIG_KGDB */
2332
2333
static inline void setup_getcpu(int cpu)
2334
{
2335
unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
2336
struct desc_struct d = { };
2337
2338
if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID))
2339
wrmsrq(MSR_TSC_AUX, cpudata);
2340
2341
/* Store CPU and node number in limit. */
2342
d.limit0 = cpudata;
2343
d.limit1 = cpudata >> 16;
2344
2345
d.type = 5; /* RO data, expand down, accessed */
2346
d.dpl = 3; /* Visible to user code */
2347
d.s = 1; /* Not a system segment */
2348
d.p = 1; /* Present */
2349
d.d = 1; /* 32-bit */
2350
2351
write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S);
2352
}
2353
2354
#ifdef CONFIG_X86_64
2355
static inline void tss_setup_ist(struct tss_struct *tss)
2356
{
2357
/* Set up the per-CPU TSS IST stacks */
2358
tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
2359
tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
2360
tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
2361
tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
2362
/* Only mapped when SEV-ES is active */
2363
tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
2364
}
2365
#else /* CONFIG_X86_64 */
2366
static inline void tss_setup_ist(struct tss_struct *tss) { }
2367
#endif /* !CONFIG_X86_64 */
2368
2369
static inline void tss_setup_io_bitmap(struct tss_struct *tss)
2370
{
2371
tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
2372
2373
#ifdef CONFIG_X86_IOPL_IOPERM
2374
tss->io_bitmap.prev_max = 0;
2375
tss->io_bitmap.prev_sequence = 0;
2376
memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap));
2377
/*
2378
* Invalidate the extra array entry past the end of the all
2379
* permission bitmap as required by the hardware.
2380
*/
2381
tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL;
2382
#endif
2383
}
2384
2385
/*
2386
* Setup everything needed to handle exceptions from the IDT, including the IST
2387
* exceptions which use paranoid_entry().
2388
*/
2389
void cpu_init_exception_handling(bool boot_cpu)
2390
{
2391
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
2392
int cpu = raw_smp_processor_id();
2393
2394
/* paranoid_entry() gets the CPU number from the GDT */
2395
setup_getcpu(cpu);
2396
2397
/* For IDT mode, IST vectors need to be set in TSS. */
2398
if (!cpu_feature_enabled(X86_FEATURE_FRED))
2399
tss_setup_ist(tss);
2400
tss_setup_io_bitmap(tss);
2401
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
2402
2403
load_TR_desc();
2404
2405
/* GHCB needs to be setup to handle #VC. */
2406
setup_ghcb();
2407
2408
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
2409
/* The boot CPU has enabled FRED during early boot */
2410
if (!boot_cpu)
2411
cpu_init_fred_exceptions();
2412
2413
cpu_init_fred_rsps();
2414
} else {
2415
load_current_idt();
2416
}
2417
}
2418
2419
void __init cpu_init_replace_early_idt(void)
2420
{
2421
if (cpu_feature_enabled(X86_FEATURE_FRED))
2422
cpu_init_fred_exceptions();
2423
else
2424
idt_setup_early_pf();
2425
}
2426
2427
/*
2428
* cpu_init() initializes state that is per-CPU. Some data is already
2429
* initialized (naturally) in the bootstrap process, such as the GDT. We
2430
* reload it nevertheless, this function acts as a 'CPU state barrier',
2431
* nothing should get across.
2432
*/
2433
void cpu_init(void)
2434
{
2435
struct task_struct *cur = current;
2436
int cpu = raw_smp_processor_id();
2437
2438
#ifdef CONFIG_NUMA
2439
if (this_cpu_read(numa_node) == 0 &&
2440
early_cpu_to_node(cpu) != NUMA_NO_NODE)
2441
set_numa_node(early_cpu_to_node(cpu));
2442
#endif
2443
pr_debug("Initializing CPU#%d\n", cpu);
2444
2445
if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) ||
2446
boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
2447
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
2448
2449
if (IS_ENABLED(CONFIG_X86_64)) {
2450
loadsegment(fs, 0);
2451
memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
2452
syscall_init();
2453
2454
wrmsrq(MSR_FS_BASE, 0);
2455
wrmsrq(MSR_KERNEL_GS_BASE, 0);
2456
barrier();
2457
2458
x2apic_setup();
2459
2460
intel_posted_msi_init();
2461
}
2462
2463
mmgrab(&init_mm);
2464
cur->active_mm = &init_mm;
2465
BUG_ON(cur->mm);
2466
initialize_tlbstate_and_flush();
2467
enter_lazy_tlb(&init_mm, cur);
2468
2469
/*
2470
* sp0 points to the entry trampoline stack regardless of what task
2471
* is running.
2472
*/
2473
load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
2474
2475
load_mm_ldt(&init_mm);
2476
2477
initialize_debug_regs();
2478
dbg_restore_debug_regs();
2479
2480
doublefault_init_cpu_tss();
2481
2482
if (is_uv_system())
2483
uv_cpu_init();
2484
2485
load_fixmap_gdt(cpu);
2486
}
2487
2488
#ifdef CONFIG_MICROCODE_LATE_LOADING
2489
/**
2490
* store_cpu_caps() - Store a snapshot of CPU capabilities
2491
* @curr_info: Pointer where to store it
2492
*
2493
* Returns: None
2494
*/
2495
void store_cpu_caps(struct cpuinfo_x86 *curr_info)
2496
{
2497
/* Reload CPUID max function as it might've changed. */
2498
curr_info->cpuid_level = cpuid_eax(0);
2499
2500
/* Copy all capability leafs and pick up the synthetic ones. */
2501
memcpy(&curr_info->x86_capability, &boot_cpu_data.x86_capability,
2502
sizeof(curr_info->x86_capability));
2503
2504
/* Get the hardware CPUID leafs */
2505
get_cpu_cap(curr_info);
2506
}
2507
2508
/**
2509
* microcode_check() - Check if any CPU capabilities changed after an update.
2510
* @prev_info: CPU capabilities stored before an update.
2511
*
2512
* The microcode loader calls this upon late microcode load to recheck features,
2513
* only when microcode has been updated. Caller holds and CPU hotplug lock.
2514
*
2515
* Return: None
2516
*/
2517
void microcode_check(struct cpuinfo_x86 *prev_info)
2518
{
2519
struct cpuinfo_x86 curr_info;
2520
2521
perf_check_microcode();
2522
2523
amd_check_microcode();
2524
2525
store_cpu_caps(&curr_info);
2526
2527
if (!memcmp(&prev_info->x86_capability, &curr_info.x86_capability,
2528
sizeof(prev_info->x86_capability)))
2529
return;
2530
2531
pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
2532
pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
2533
}
2534
#endif
2535
2536
/*
2537
* Invoked from core CPU hotplug code after hotplug operations
2538
*/
2539
void arch_smt_update(void)
2540
{
2541
/* Handle the speculative execution misfeatures */
2542
cpu_bugs_smt_update();
2543
/* Check whether IPI broadcasting can be enabled */
2544
apic_smt_update();
2545
}
2546
2547
void __init arch_cpu_finalize_init(void)
2548
{
2549
struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info);
2550
2551
identify_boot_cpu();
2552
2553
select_idle_routine();
2554
2555
/*
2556
* identify_boot_cpu() initialized SMT support information, let the
2557
* core code know.
2558
*/
2559
cpu_smt_set_num_threads(__max_threads_per_core, __max_threads_per_core);
2560
2561
if (!IS_ENABLED(CONFIG_SMP)) {
2562
pr_info("CPU: ");
2563
print_cpu_info(&boot_cpu_data);
2564
}
2565
2566
cpu_select_mitigations();
2567
2568
arch_smt_update();
2569
2570
if (IS_ENABLED(CONFIG_X86_32)) {
2571
/*
2572
* Check whether this is a real i386 which is not longer
2573
* supported and fixup the utsname.
2574
*/
2575
if (boot_cpu_data.x86 < 4)
2576
panic("Kernel requires i486+ for 'invlpg' and other features");
2577
2578
init_utsname()->machine[1] =
2579
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
2580
}
2581
2582
/*
2583
* Must be before alternatives because it might set or clear
2584
* feature bits.
2585
*/
2586
fpu__init_system();
2587
fpu__init_cpu();
2588
2589
/*
2590
* This needs to follow the FPU initializtion, since EFI depends on it.
2591
*/
2592
if (efi_enabled(EFI_RUNTIME_SERVICES))
2593
efi_enter_virtual_mode();
2594
2595
/*
2596
* Ensure that access to the per CPU representation has the initial
2597
* boot CPU configuration.
2598
*/
2599
*c = boot_cpu_data;
2600
c->initialized = true;
2601
2602
alternative_instructions();
2603
2604
if (IS_ENABLED(CONFIG_X86_64)) {
2605
USER_PTR_MAX = TASK_SIZE_MAX;
2606
2607
/*
2608
* Enable this when LAM is gated on LASS support
2609
if (cpu_feature_enabled(X86_FEATURE_LAM))
2610
USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
2611
*/
2612
runtime_const_init(ptr, USER_PTR_MAX);
2613
2614
/*
2615
* Make sure the first 2MB area is not mapped by huge pages
2616
* There are typically fixed size MTRRs in there and overlapping
2617
* MTRRs into large pages causes slow downs.
2618
*
2619
* Right now we don't do that with gbpages because there seems
2620
* very little benefit for that case.
2621
*/
2622
if (!direct_gbpages)
2623
set_memory_4k((unsigned long)__va(0), 1);
2624
} else {
2625
fpu__init_check_bugs();
2626
}
2627
2628
/*
2629
* This needs to be called before any devices perform DMA
2630
* operations that might use the SWIOTLB bounce buffers. It will
2631
* mark the bounce buffers as decrypted so that their usage will
2632
* not cause "plain-text" data to be decrypted when accessed. It
2633
* must be called after late_time_init() so that Hyper-V x86/x64
2634
* hypercalls work when the SWIOTLB bounce buffers are decrypted.
2635
*/
2636
mem_encrypt_init();
2637
}
2638
2639