Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/fpu/xstate.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* xsave/xrstor support.
4
*
5
* Author: Suresh Siddha <[email protected]>
6
*/
7
#include <linux/bitops.h>
8
#include <linux/compat.h>
9
#include <linux/cpu.h>
10
#include <linux/mman.h>
11
#include <linux/nospec.h>
12
#include <linux/pkeys.h>
13
#include <linux/seq_file.h>
14
#include <linux/proc_fs.h>
15
#include <linux/vmalloc.h>
16
#include <linux/coredump.h>
17
#include <linux/sort.h>
18
19
#include <asm/fpu/api.h>
20
#include <asm/fpu/regset.h>
21
#include <asm/fpu/signal.h>
22
#include <asm/fpu/xcr.h>
23
24
#include <asm/cpuid/api.h>
25
#include <asm/msr.h>
26
#include <asm/tlbflush.h>
27
#include <asm/prctl.h>
28
#include <asm/elf.h>
29
30
#include <uapi/asm/elf.h>
31
32
#include "context.h"
33
#include "internal.h"
34
#include "legacy.h"
35
#include "xstate.h"
36
37
#define for_each_extended_xfeature(bit, mask) \
38
(bit) = FIRST_EXTENDED_XFEATURE; \
39
for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
40
41
/*
42
* Although we spell it out in here, the Processor Trace
43
* xfeature is completely unused. We use other mechanisms
44
* to save/restore PT state in Linux.
45
*/
46
static const char *xfeature_names[] =
47
{
48
"x87 floating point registers",
49
"SSE registers",
50
"AVX registers",
51
"MPX bounds registers",
52
"MPX CSR",
53
"AVX-512 opmask",
54
"AVX-512 Hi256",
55
"AVX-512 ZMM_Hi256",
56
"Processor Trace (unused)",
57
"Protection Keys User registers",
58
"PASID state",
59
"Control-flow User registers",
60
"Control-flow Kernel registers (KVM only)",
61
"unknown xstate feature",
62
"unknown xstate feature",
63
"unknown xstate feature",
64
"unknown xstate feature",
65
"AMX Tile config",
66
"AMX Tile data",
67
"APX registers",
68
"unknown xstate feature",
69
};
70
71
static unsigned short xsave_cpuid_features[] __initdata = {
72
[XFEATURE_FP] = X86_FEATURE_FPU,
73
[XFEATURE_SSE] = X86_FEATURE_XMM,
74
[XFEATURE_YMM] = X86_FEATURE_AVX,
75
[XFEATURE_BNDREGS] = X86_FEATURE_MPX,
76
[XFEATURE_BNDCSR] = X86_FEATURE_MPX,
77
[XFEATURE_OPMASK] = X86_FEATURE_AVX512F,
78
[XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F,
79
[XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F,
80
[XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT,
81
[XFEATURE_PKRU] = X86_FEATURE_OSPKE,
82
[XFEATURE_PASID] = X86_FEATURE_ENQCMD,
83
[XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
84
[XFEATURE_CET_KERNEL] = X86_FEATURE_SHSTK,
85
[XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
86
[XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
87
[XFEATURE_APX] = X86_FEATURE_APX,
88
};
89
90
static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
91
{ [ 0 ... XFEATURE_MAX - 1] = -1};
92
static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
93
{ [ 0 ... XFEATURE_MAX - 1] = -1};
94
static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
95
96
/*
97
* Ordering of xstate components in uncompacted format: The xfeature
98
* number does not necessarily indicate its position in the XSAVE buffer.
99
* This array defines the traversal order of xstate features.
100
*/
101
static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init =
102
{ [ 0 ... XFEATURE_MAX - 1] = -1};
103
104
static inline unsigned int next_xfeature_order(unsigned int i, u64 mask)
105
{
106
for (; xfeature_uncompact_order[i] != -1; i++) {
107
if (mask & BIT_ULL(xfeature_uncompact_order[i]))
108
break;
109
}
110
111
return i;
112
}
113
114
/* Iterate xstate features in uncompacted order: */
115
#define for_each_extended_xfeature_in_order(i, mask) \
116
for (i = 0; \
117
i = next_xfeature_order(i, mask), \
118
xfeature_uncompact_order[i] != -1; \
119
i++)
120
121
#define XSTATE_FLAG_SUPERVISOR BIT(0)
122
#define XSTATE_FLAG_ALIGNED64 BIT(1)
123
124
/*
125
* Return whether the system supports a given xfeature.
126
*
127
* Also return the name of the (most advanced) feature that the caller requested:
128
*/
129
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
130
{
131
u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
132
133
if (unlikely(feature_name)) {
134
long xfeature_idx, max_idx;
135
u64 xfeatures_print;
136
/*
137
* So we use FLS here to be able to print the most advanced
138
* feature that was requested but is missing. So if a driver
139
* asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
140
* missing AVX feature - this is the most informative message
141
* to users:
142
*/
143
if (xfeatures_missing)
144
xfeatures_print = xfeatures_missing;
145
else
146
xfeatures_print = xfeatures_needed;
147
148
xfeature_idx = fls64(xfeatures_print)-1;
149
max_idx = ARRAY_SIZE(xfeature_names)-1;
150
xfeature_idx = min(xfeature_idx, max_idx);
151
152
*feature_name = xfeature_names[xfeature_idx];
153
}
154
155
if (xfeatures_missing)
156
return 0;
157
158
return 1;
159
}
160
EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
161
162
static bool xfeature_is_aligned64(int xfeature_nr)
163
{
164
return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
165
}
166
167
static bool xfeature_is_supervisor(int xfeature_nr)
168
{
169
return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
170
}
171
172
static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
173
{
174
unsigned int offs, i;
175
176
/*
177
* Non-compacted format and legacy features use the cached fixed
178
* offsets.
179
*/
180
if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
181
xfeature <= XFEATURE_SSE)
182
return xstate_offsets[xfeature];
183
184
/*
185
* Compacted format offsets depend on the actual content of the
186
* compacted xsave area which is determined by the xcomp_bv header
187
* field.
188
*/
189
offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
190
for_each_extended_xfeature(i, xcomp_bv) {
191
if (xfeature_is_aligned64(i))
192
offs = ALIGN(offs, 64);
193
if (i == xfeature)
194
break;
195
offs += xstate_sizes[i];
196
}
197
return offs;
198
}
199
200
/*
201
* Enable the extended processor state save/restore feature.
202
* Called once per CPU onlining.
203
*/
204
void fpu__init_cpu_xstate(void)
205
{
206
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
207
return;
208
209
cr4_set_bits(X86_CR4_OSXSAVE);
210
211
/*
212
* Must happen after CR4 setup and before xsetbv() to allow KVM
213
* lazy passthrough. Write independent of the dynamic state static
214
* key as that does not work on the boot CPU. This also ensures
215
* that any stale state is wiped out from XFD. Reset the per CPU
216
* xfd cache too.
217
*/
218
if (cpu_feature_enabled(X86_FEATURE_XFD))
219
xfd_set_state(init_fpstate.xfd);
220
221
/*
222
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
223
* managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
224
* states can be set here.
225
*/
226
xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
227
228
/*
229
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
230
*/
231
if (boot_cpu_has(X86_FEATURE_XSAVES)) {
232
wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() |
233
xfeatures_mask_independent());
234
}
235
}
236
237
static bool xfeature_enabled(enum xfeature xfeature)
238
{
239
return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
240
}
241
242
static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2)
243
{
244
return xstate_offsets[*(unsigned int *)xfeature1] -
245
xstate_offsets[*(unsigned int *)xfeature2];
246
}
247
248
/*
249
* Record the offsets and sizes of various xstates contained
250
* in the XSAVE state memory layout. Also, create an ordered
251
* list of xfeatures for handling out-of-order offsets.
252
*/
253
static void __init setup_xstate_cache(void)
254
{
255
u32 eax, ebx, ecx, edx, xfeature, i = 0;
256
/*
257
* The FP xstates and SSE xstates are legacy states. They are always
258
* in the fixed offsets in the xsave area in either compacted form
259
* or standard form.
260
*/
261
xstate_offsets[XFEATURE_FP] = 0;
262
xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
263
xmm_space);
264
265
xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
266
xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
267
xmm_space);
268
269
for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) {
270
cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx);
271
272
xstate_sizes[xfeature] = eax;
273
xstate_flags[xfeature] = ecx;
274
275
/*
276
* If an xfeature is supervisor state, the offset in EBX is
277
* invalid, leave it to -1.
278
*/
279
if (xfeature_is_supervisor(xfeature))
280
continue;
281
282
xstate_offsets[xfeature] = ebx;
283
284
/* Populate the list of xfeatures before sorting */
285
xfeature_uncompact_order[i++] = xfeature;
286
}
287
288
/*
289
* Sort xfeatures by their offsets to support out-of-order
290
* offsets in the uncompacted format.
291
*/
292
sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL);
293
}
294
295
/*
296
* Print out all the supported xstate features:
297
*/
298
static void __init print_xstate_features(void)
299
{
300
int i;
301
302
for (i = 0; i < XFEATURE_MAX; i++) {
303
u64 mask = BIT_ULL(i);
304
const char *name;
305
306
if (cpu_has_xfeatures(mask, &name))
307
pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", mask, name);
308
}
309
}
310
311
/*
312
* This check is important because it is easy to get XSTATE_*
313
* confused with XSTATE_BIT_*.
314
*/
315
#define CHECK_XFEATURE(nr) do { \
316
WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \
317
WARN_ON(nr >= XFEATURE_MAX); \
318
} while (0)
319
320
/*
321
* Print out xstate component offsets and sizes
322
*/
323
static void __init print_xstate_offset_size(void)
324
{
325
int i;
326
327
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
328
pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
329
i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
330
i, xstate_sizes[i]);
331
}
332
}
333
334
/*
335
* This function is called only during boot time when x86 caps are not set
336
* up and alternative can not be used yet.
337
*/
338
static __init void os_xrstor_booting(struct xregs_state *xstate)
339
{
340
u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
341
u32 lmask = mask;
342
u32 hmask = mask >> 32;
343
int err;
344
345
if (cpu_feature_enabled(X86_FEATURE_XSAVES))
346
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
347
else
348
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
349
350
/*
351
* We should never fault when copying from a kernel buffer, and the FPU
352
* state we set at boot time should be valid.
353
*/
354
WARN_ON_FPU(err);
355
}
356
357
/*
358
* All supported features have either init state all zeros or are
359
* handled in setup_init_fpu() individually. This is an explicit
360
* feature list and does not use XFEATURE_MASK*SUPPORTED to catch
361
* newly added supported features at build time and make people
362
* actually look at the init state for the new feature.
363
*/
364
#define XFEATURES_INIT_FPSTATE_HANDLED \
365
(XFEATURE_MASK_FP | \
366
XFEATURE_MASK_SSE | \
367
XFEATURE_MASK_YMM | \
368
XFEATURE_MASK_OPMASK | \
369
XFEATURE_MASK_ZMM_Hi256 | \
370
XFEATURE_MASK_Hi16_ZMM | \
371
XFEATURE_MASK_PKRU | \
372
XFEATURE_MASK_BNDREGS | \
373
XFEATURE_MASK_BNDCSR | \
374
XFEATURE_MASK_PASID | \
375
XFEATURE_MASK_CET_USER | \
376
XFEATURE_MASK_CET_KERNEL | \
377
XFEATURE_MASK_XTILE | \
378
XFEATURE_MASK_APX)
379
380
/*
381
* setup the xstate image representing the init state
382
*/
383
static void __init setup_init_fpu_buf(void)
384
{
385
BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
386
XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
387
XFEATURES_INIT_FPSTATE_HANDLED);
388
389
if (!boot_cpu_has(X86_FEATURE_XSAVE))
390
return;
391
392
print_xstate_features();
393
394
xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
395
396
/*
397
* Init all the features state with header.xfeatures being 0x0
398
*/
399
os_xrstor_booting(&init_fpstate.regs.xsave);
400
401
/*
402
* All components are now in init state. Read the state back so
403
* that init_fpstate contains all non-zero init state. This only
404
* works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
405
* those use the init optimization which skips writing data for
406
* components in init state.
407
*
408
* XSAVE could be used, but that would require to reshuffle the
409
* data when XSAVEC/S is available because XSAVEC/S uses xstate
410
* compaction. But doing so is a pointless exercise because most
411
* components have an all zeros init state except for the legacy
412
* ones (FP and SSE). Those can be saved with FXSAVE into the
413
* legacy area. Adding new features requires to ensure that init
414
* state is all zeroes or if not to add the necessary handling
415
* here.
416
*/
417
fxsave(&init_fpstate.regs.fxsave);
418
}
419
420
int xfeature_size(int xfeature_nr)
421
{
422
u32 eax, ebx, ecx, edx;
423
424
CHECK_XFEATURE(xfeature_nr);
425
cpuid_count(CPUID_LEAF_XSTATE, xfeature_nr, &eax, &ebx, &ecx, &edx);
426
return eax;
427
}
428
429
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
430
static int validate_user_xstate_header(const struct xstate_header *hdr,
431
struct fpstate *fpstate)
432
{
433
/* No unknown or supervisor features may be set */
434
if (hdr->xfeatures & ~fpstate->user_xfeatures)
435
return -EINVAL;
436
437
/* Userspace must use the uncompacted format */
438
if (hdr->xcomp_bv)
439
return -EINVAL;
440
441
/*
442
* If 'reserved' is shrunken to add a new field, make sure to validate
443
* that new field here!
444
*/
445
BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
446
447
/* No reserved bits may be set */
448
if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
449
return -EINVAL;
450
451
return 0;
452
}
453
454
static void __init __xstate_dump_leaves(void)
455
{
456
int i;
457
u32 eax, ebx, ecx, edx;
458
static int should_dump = 1;
459
460
if (!should_dump)
461
return;
462
should_dump = 0;
463
/*
464
* Dump out a few leaves past the ones that we support
465
* just in case there are some goodies up there
466
*/
467
for (i = 0; i < XFEATURE_MAX + 10; i++) {
468
cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx);
469
pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
470
CPUID_LEAF_XSTATE, i, eax, ebx, ecx, edx);
471
}
472
}
473
474
#define XSTATE_WARN_ON(x, fmt, ...) do { \
475
if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \
476
__xstate_dump_leaves(); \
477
} \
478
} while (0)
479
480
#define XCHECK_SZ(sz, nr, __struct) ({ \
481
if (WARN_ONCE(sz != sizeof(__struct), \
482
"[%s]: struct is %zu bytes, cpu state %d bytes\n", \
483
xfeature_names[nr], sizeof(__struct), sz)) { \
484
__xstate_dump_leaves(); \
485
} \
486
true; \
487
})
488
489
490
/**
491
* check_xtile_data_against_struct - Check tile data state size.
492
*
493
* Calculate the state size by multiplying the single tile size which is
494
* recorded in a C struct, and the number of tiles that the CPU informs.
495
* Compare the provided size with the calculation.
496
*
497
* @size: The tile data state size
498
*
499
* Returns: 0 on success, -EINVAL on mismatch.
500
*/
501
static int __init check_xtile_data_against_struct(int size)
502
{
503
u32 max_palid, palid, state_size;
504
u32 eax, ebx, ecx, edx;
505
u16 max_tile;
506
507
/*
508
* Check the maximum palette id:
509
* eax: the highest numbered palette subleaf.
510
*/
511
cpuid_count(CPUID_LEAF_TILE, 0, &max_palid, &ebx, &ecx, &edx);
512
513
/*
514
* Cross-check each tile size and find the maximum number of
515
* supported tiles.
516
*/
517
for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
518
u16 tile_size, max;
519
520
/*
521
* Check the tile size info:
522
* eax[31:16]: bytes per title
523
* ebx[31:16]: the max names (or max number of tiles)
524
*/
525
cpuid_count(CPUID_LEAF_TILE, palid, &eax, &ebx, &edx, &edx);
526
tile_size = eax >> 16;
527
max = ebx >> 16;
528
529
if (tile_size != sizeof(struct xtile_data)) {
530
pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
531
__stringify(XFEATURE_XTILE_DATA),
532
sizeof(struct xtile_data), tile_size);
533
__xstate_dump_leaves();
534
return -EINVAL;
535
}
536
537
if (max > max_tile)
538
max_tile = max;
539
}
540
541
state_size = sizeof(struct xtile_data) * max_tile;
542
if (size != state_size) {
543
pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
544
__stringify(XFEATURE_XTILE_DATA), state_size, size);
545
__xstate_dump_leaves();
546
return -EINVAL;
547
}
548
return 0;
549
}
550
551
/*
552
* We have a C struct for each 'xstate'. We need to ensure
553
* that our software representation matches what the CPU
554
* tells us about the state's size.
555
*/
556
static bool __init check_xstate_against_struct(int nr)
557
{
558
/*
559
* Ask the CPU for the size of the state.
560
*/
561
int sz = xfeature_size(nr);
562
563
/*
564
* Match each CPU state with the corresponding software
565
* structure.
566
*/
567
switch (nr) {
568
case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct);
569
case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
570
case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
571
case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
572
case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
573
case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
574
case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state);
575
case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
576
case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
577
case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
578
case XFEATURE_CET_KERNEL: return XCHECK_SZ(sz, nr, struct cet_supervisor_state);
579
case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state);
580
case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
581
default:
582
XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
583
return false;
584
}
585
586
return true;
587
}
588
589
static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
590
{
591
unsigned int topmost = fls64(xfeatures) - 1;
592
unsigned int offset, i;
593
594
if (topmost <= XFEATURE_SSE)
595
return sizeof(struct xregs_state);
596
597
if (compacted) {
598
offset = xfeature_get_offset(xfeatures, topmost);
599
} else {
600
/* Walk through the xfeature order to pick the last */
601
for_each_extended_xfeature_in_order(i, xfeatures)
602
topmost = xfeature_uncompact_order[i];
603
offset = xstate_offsets[topmost];
604
}
605
606
return offset + xstate_sizes[topmost];
607
}
608
609
/*
610
* This essentially double-checks what the cpu told us about
611
* how large the XSAVE buffer needs to be. We are recalculating
612
* it to be safe.
613
*
614
* Independent XSAVE features allocate their own buffers and are not
615
* covered by these checks. Only the size of the buffer for task->fpu
616
* is checked here.
617
*/
618
static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
619
{
620
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
621
bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
622
unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
623
int i;
624
625
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
626
if (!check_xstate_against_struct(i))
627
return false;
628
/*
629
* Supervisor state components can be managed only by
630
* XSAVES.
631
*/
632
if (!xsaves && xfeature_is_supervisor(i)) {
633
XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
634
return false;
635
}
636
}
637
size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
638
XSTATE_WARN_ON(size != kernel_size,
639
"size %u != kernel_size %u\n", size, kernel_size);
640
return size == kernel_size;
641
}
642
643
/*
644
* Get total size of enabled xstates in XCR0 | IA32_XSS.
645
*
646
* Note the SDM's wording here. "sub-function 0" only enumerates
647
* the size of the *user* states. If we use it to size a buffer
648
* that we use 'XSAVES' on, we could potentially overflow the
649
* buffer because 'XSAVES' saves system states too.
650
*
651
* This also takes compaction into account. So this works for
652
* XSAVEC as well.
653
*/
654
static unsigned int __init get_compacted_size(void)
655
{
656
unsigned int eax, ebx, ecx, edx;
657
/*
658
* - CPUID function 0DH, sub-function 1:
659
* EBX enumerates the size (in bytes) required by
660
* the XSAVES instruction for an XSAVE area
661
* containing all the state components
662
* corresponding to bits currently set in
663
* XCR0 | IA32_XSS.
664
*
665
* When XSAVES is not available but XSAVEC is (virt), then there
666
* are no supervisor states, but XSAVEC still uses compacted
667
* format.
668
*/
669
cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx);
670
return ebx;
671
}
672
673
/*
674
* Get the total size of the enabled xstates without the independent supervisor
675
* features.
676
*/
677
static unsigned int __init get_xsave_compacted_size(void)
678
{
679
u64 mask = xfeatures_mask_independent();
680
unsigned int size;
681
682
if (!mask)
683
return get_compacted_size();
684
685
/* Disable independent features. */
686
wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor());
687
688
/*
689
* Ask the hardware what size is required of the buffer.
690
* This is the size required for the task->fpu buffer.
691
*/
692
size = get_compacted_size();
693
694
/* Re-enable independent features so XSAVES will work on them again. */
695
wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
696
697
return size;
698
}
699
700
static unsigned int __init get_xsave_size_user(void)
701
{
702
unsigned int eax, ebx, ecx, edx;
703
/*
704
* - CPUID function 0DH, sub-function 0:
705
* EBX enumerates the size (in bytes) required by
706
* the XSAVE instruction for an XSAVE area
707
* containing all the *user* state components
708
* corresponding to bits currently set in XCR0.
709
*/
710
cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx);
711
return ebx;
712
}
713
714
static int __init init_xstate_size(void)
715
{
716
/* Recompute the context size for enabled features: */
717
unsigned int user_size, kernel_size, kernel_default_size;
718
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
719
720
/* Uncompacted user space size */
721
user_size = get_xsave_size_user();
722
723
/*
724
* XSAVES kernel size includes supervisor states and uses compacted
725
* format. XSAVEC uses compacted format, but does not save
726
* supervisor states.
727
*
728
* XSAVE[OPT] do not support supervisor states so kernel and user
729
* size is identical.
730
*/
731
if (compacted)
732
kernel_size = get_xsave_compacted_size();
733
else
734
kernel_size = user_size;
735
736
kernel_default_size =
737
xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
738
739
if (!paranoid_xstate_size_valid(kernel_size))
740
return -EINVAL;
741
742
fpu_kernel_cfg.max_size = kernel_size;
743
fpu_user_cfg.max_size = user_size;
744
745
fpu_kernel_cfg.default_size = kernel_default_size;
746
fpu_user_cfg.default_size =
747
xstate_calculate_size(fpu_user_cfg.default_features, false);
748
749
guest_default_cfg.size =
750
xstate_calculate_size(guest_default_cfg.features, compacted);
751
752
return 0;
753
}
754
755
/*
756
* We enabled the XSAVE hardware, but something went wrong and
757
* we can not use it. Disable it.
758
*/
759
static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
760
{
761
pr_info("x86/fpu: XSAVE disabled\n");
762
763
fpu_kernel_cfg.max_features = 0;
764
cr4_clear_bits(X86_CR4_OSXSAVE);
765
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
766
767
/* Restore the legacy size.*/
768
fpu_kernel_cfg.max_size = legacy_size;
769
fpu_kernel_cfg.default_size = legacy_size;
770
fpu_user_cfg.max_size = legacy_size;
771
fpu_user_cfg.default_size = legacy_size;
772
guest_default_cfg.size = legacy_size;
773
774
/*
775
* Prevent enabling the static branch which enables writes to the
776
* XFD MSR.
777
*/
778
init_fpstate.xfd = 0;
779
780
fpstate_reset(x86_task_fpu(current));
781
}
782
783
static u64 __init host_default_mask(void)
784
{
785
/*
786
* Exclude dynamic features (require userspace opt-in) and features
787
* that are supported only for KVM guests.
788
*/
789
return ~((u64)XFEATURE_MASK_USER_DYNAMIC | XFEATURE_MASK_GUEST_SUPERVISOR);
790
}
791
792
static u64 __init guest_default_mask(void)
793
{
794
/*
795
* Exclude dynamic features, which require userspace opt-in even
796
* for KVM guests.
797
*/
798
return ~(u64)XFEATURE_MASK_USER_DYNAMIC;
799
}
800
801
/*
802
* Enable and initialize the xsave feature.
803
* Called once per system bootup.
804
*/
805
void __init fpu__init_system_xstate(unsigned int legacy_size)
806
{
807
unsigned int eax, ebx, ecx, edx;
808
u64 xfeatures;
809
int err;
810
int i;
811
812
if (!boot_cpu_has(X86_FEATURE_FPU)) {
813
pr_info("x86/fpu: No FPU detected\n");
814
return;
815
}
816
817
if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
818
pr_info("x86/fpu: x87 FPU will use %s\n",
819
boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
820
return;
821
}
822
823
/*
824
* Find user xstates supported by the processor.
825
*/
826
cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx);
827
fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
828
829
/*
830
* Find supervisor xstates supported by the processor.
831
*/
832
cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx);
833
fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
834
835
if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
836
/*
837
* This indicates that something really unexpected happened
838
* with the enumeration. Disable XSAVE and try to continue
839
* booting without it. This is too early to BUG().
840
*/
841
pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
842
fpu_kernel_cfg.max_features);
843
goto out_disable;
844
}
845
846
if (fpu_kernel_cfg.max_features & XFEATURE_MASK_APX &&
847
fpu_kernel_cfg.max_features & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) {
848
/*
849
* This is a problematic CPU configuration where two
850
* conflicting state components are both enumerated.
851
*/
852
pr_err("x86/fpu: Both APX/MPX present in the CPU's xstate features: 0x%llx.\n",
853
fpu_kernel_cfg.max_features);
854
goto out_disable;
855
}
856
857
fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
858
XFEATURE_MASK_INDEPENDENT;
859
860
/*
861
* Clear XSAVE features that are disabled in the normal CPUID.
862
*/
863
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
864
unsigned short cid = xsave_cpuid_features[i];
865
866
/* Careful: X86_FEATURE_FPU is 0! */
867
if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
868
fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
869
}
870
871
if (!cpu_feature_enabled(X86_FEATURE_XFD))
872
fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
873
874
if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
875
fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
876
else
877
fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
878
XFEATURE_MASK_SUPERVISOR_SUPPORTED;
879
880
fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
881
fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
882
883
/*
884
* Now, given maximum feature set, determine default values by
885
* applying default masks.
886
*/
887
fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features & host_default_mask();
888
fpu_user_cfg.default_features = fpu_user_cfg.max_features & host_default_mask();
889
guest_default_cfg.features = fpu_kernel_cfg.max_features & guest_default_mask();
890
891
/* Store it for paranoia check at the end */
892
xfeatures = fpu_kernel_cfg.max_features;
893
894
/*
895
* Initialize the default XFD state in initfp_state and enable the
896
* dynamic sizing mechanism if dynamic states are available. The
897
* static key cannot be enabled here because this runs before
898
* jump_label_init(). This is delayed to an initcall.
899
*/
900
init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
901
902
/* Set up compaction feature bit */
903
if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
904
cpu_feature_enabled(X86_FEATURE_XSAVES))
905
setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
906
907
/* Enable xstate instructions to be able to continue with initialization: */
908
fpu__init_cpu_xstate();
909
910
/* Cache size, offset and flags for initialization */
911
setup_xstate_cache();
912
913
err = init_xstate_size();
914
if (err)
915
goto out_disable;
916
917
/*
918
* Update info used for ptrace frames; use standard-format size and no
919
* supervisor xstates:
920
*/
921
update_regset_xstate_info(fpu_user_cfg.max_size,
922
fpu_user_cfg.max_features);
923
924
/*
925
* init_fpstate excludes dynamic states as they are large but init
926
* state is zero.
927
*/
928
init_fpstate.size = fpu_kernel_cfg.default_size;
929
init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
930
931
if (init_fpstate.size > sizeof(init_fpstate.regs)) {
932
pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d)\n",
933
sizeof(init_fpstate.regs), init_fpstate.size);
934
goto out_disable;
935
}
936
937
setup_init_fpu_buf();
938
939
/*
940
* Paranoia check whether something in the setup modified the
941
* xfeatures mask.
942
*/
943
if (xfeatures != fpu_kernel_cfg.max_features) {
944
pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init\n",
945
xfeatures, fpu_kernel_cfg.max_features);
946
goto out_disable;
947
}
948
949
/*
950
* CPU capabilities initialization runs before FPU init. So
951
* X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
952
* functional, set the feature bit so depending code works.
953
*/
954
setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
955
956
print_xstate_offset_size();
957
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
958
fpu_kernel_cfg.max_features,
959
fpu_kernel_cfg.max_size,
960
boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
961
return;
962
963
out_disable:
964
/* something went wrong, try to boot without any XSAVE support */
965
fpu__init_disable_system_xstate(legacy_size);
966
}
967
968
/*
969
* Restore minimal FPU state after suspend:
970
*/
971
void fpu__resume_cpu(void)
972
{
973
/*
974
* Restore XCR0 on xsave capable CPUs:
975
*/
976
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
977
xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
978
979
/*
980
* Restore IA32_XSS. The same CPUID bit enumerates support
981
* of XSAVES and MSR_IA32_XSS.
982
*/
983
if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
984
wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() |
985
xfeatures_mask_independent());
986
}
987
988
if (fpu_state_size_dynamic())
989
wrmsrq(MSR_IA32_XFD, x86_task_fpu(current)->fpstate->xfd);
990
}
991
992
/*
993
* Given an xstate feature nr, calculate where in the xsave
994
* buffer the state is. Callers should ensure that the buffer
995
* is valid.
996
*/
997
static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
998
{
999
u64 xcomp_bv = xsave->header.xcomp_bv;
1000
1001
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
1002
return NULL;
1003
1004
if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
1005
if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
1006
return NULL;
1007
}
1008
1009
return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
1010
}
1011
1012
/*
1013
* Given the xsave area and a state inside, this function returns the
1014
* address of the state.
1015
*
1016
* This is the API that is called to get xstate address in either
1017
* standard format or compacted format of xsave area.
1018
*
1019
* Note that if there is no data for the field in the xsave buffer
1020
* this will return NULL.
1021
*
1022
* Inputs:
1023
* xstate: the thread's storage area for all FPU data
1024
* xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
1025
* XFEATURE_SSE, etc...)
1026
* Output:
1027
* address of the state in the xsave area, or NULL if the
1028
* field is not present in the xsave buffer.
1029
*/
1030
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
1031
{
1032
/*
1033
* Do we even *have* xsave state?
1034
*/
1035
if (!boot_cpu_has(X86_FEATURE_XSAVE))
1036
return NULL;
1037
1038
/*
1039
* We should not ever be requesting features that we
1040
* have not enabled.
1041
*/
1042
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
1043
return NULL;
1044
1045
/*
1046
* This assumes the last 'xsave*' instruction to
1047
* have requested that 'xfeature_nr' be saved.
1048
* If it did not, we might be seeing and old value
1049
* of the field in the buffer.
1050
*
1051
* This can happen because the last 'xsave' did not
1052
* request that this feature be saved (unlikely)
1053
* or because the "init optimization" caused it
1054
* to not be saved.
1055
*/
1056
if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
1057
return NULL;
1058
1059
return __raw_xsave_addr(xsave, xfeature_nr);
1060
}
1061
EXPORT_SYMBOL_GPL(get_xsave_addr);
1062
1063
/*
1064
* Given an xstate feature nr, calculate where in the xsave buffer the state is.
1065
* The xsave buffer should be in standard format, not compacted (e.g. user mode
1066
* signal frames).
1067
*/
1068
void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr)
1069
{
1070
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
1071
return NULL;
1072
1073
return (void __user *)xsave + xstate_offsets[xfeature_nr];
1074
}
1075
1076
#ifdef CONFIG_ARCH_HAS_PKEYS
1077
1078
/*
1079
* This will go out and modify PKRU register to set the access
1080
* rights for @pkey to @init_val.
1081
*/
1082
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
1083
unsigned long init_val)
1084
{
1085
u32 old_pkru, new_pkru_bits = 0;
1086
int pkey_shift;
1087
1088
/*
1089
* This check implies XSAVE support. OSPKE only gets
1090
* set if we enable XSAVE and we enable PKU in XCR0.
1091
*/
1092
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
1093
return -EINVAL;
1094
1095
/*
1096
* This code should only be called with valid 'pkey'
1097
* values originating from in-kernel users. Complain
1098
* if a bad value is observed.
1099
*/
1100
if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
1101
return -EINVAL;
1102
1103
/* Set the bits we need in PKRU: */
1104
if (init_val & PKEY_DISABLE_ACCESS)
1105
new_pkru_bits |= PKRU_AD_BIT;
1106
if (init_val & PKEY_DISABLE_WRITE)
1107
new_pkru_bits |= PKRU_WD_BIT;
1108
1109
/* Shift the bits in to the correct place in PKRU for pkey: */
1110
pkey_shift = pkey * PKRU_BITS_PER_PKEY;
1111
new_pkru_bits <<= pkey_shift;
1112
1113
/* Get old PKRU and mask off any old bits in place: */
1114
old_pkru = read_pkru();
1115
old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
1116
1117
/* Write old part along with new part: */
1118
write_pkru(old_pkru | new_pkru_bits);
1119
1120
return 0;
1121
}
1122
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
1123
1124
static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
1125
void *init_xstate, unsigned int size)
1126
{
1127
membuf_write(to, from_xstate ? xstate : init_xstate, size);
1128
}
1129
1130
/**
1131
* __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1132
* @to: membuf descriptor
1133
* @fpstate: The fpstate buffer from which to copy
1134
* @xfeatures: The mask of xfeatures to save (XSAVE mode only)
1135
* @pkru_val: The PKRU value to store in the PKRU component
1136
* @copy_mode: The requested copy mode
1137
*
1138
* Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1139
* format, i.e. from the kernel internal hardware dependent storage format
1140
* to the requested @mode. UABI XSTATE is always uncompacted!
1141
*
1142
* It supports partial copy but @to.pos always starts from zero.
1143
*/
1144
void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
1145
u64 xfeatures, u32 pkru_val,
1146
enum xstate_copy_mode copy_mode)
1147
{
1148
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1149
struct xregs_state *xinit = &init_fpstate.regs.xsave;
1150
struct xregs_state *xsave = &fpstate->regs.xsave;
1151
unsigned int zerofrom, i, xfeature;
1152
struct xstate_header header;
1153
u64 mask;
1154
1155
memset(&header, 0, sizeof(header));
1156
header.xfeatures = xsave->header.xfeatures;
1157
1158
/* Mask out the feature bits depending on copy mode */
1159
switch (copy_mode) {
1160
case XSTATE_COPY_FP:
1161
header.xfeatures &= XFEATURE_MASK_FP;
1162
break;
1163
1164
case XSTATE_COPY_FX:
1165
header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1166
break;
1167
1168
case XSTATE_COPY_XSAVE:
1169
header.xfeatures &= fpstate->user_xfeatures & xfeatures;
1170
break;
1171
}
1172
1173
/* Copy FP state up to MXCSR */
1174
copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
1175
&xinit->i387, off_mxcsr);
1176
1177
/* Copy MXCSR when SSE or YMM are set in the feature mask */
1178
copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
1179
&to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
1180
MXCSR_AND_FLAGS_SIZE);
1181
1182
/* Copy the remaining FP state */
1183
copy_feature(header.xfeatures & XFEATURE_MASK_FP,
1184
&to, &xsave->i387.st_space, &xinit->i387.st_space,
1185
sizeof(xsave->i387.st_space));
1186
1187
/* Copy the SSE state - shared with YMM, but independently managed */
1188
copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
1189
&to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
1190
sizeof(xsave->i387.xmm_space));
1191
1192
if (copy_mode != XSTATE_COPY_XSAVE)
1193
goto out;
1194
1195
/* Zero the padding area */
1196
membuf_zero(&to, sizeof(xsave->i387.padding));
1197
1198
/* Copy xsave->i387.sw_reserved */
1199
membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
1200
1201
/* Copy the user space relevant state of @xsave->header */
1202
membuf_write(&to, &header, sizeof(header));
1203
1204
zerofrom = offsetof(struct xregs_state, extended_state_area);
1205
1206
/*
1207
* This 'mask' indicates which states to copy from fpstate.
1208
* Those extended states that are not present in fpstate are
1209
* either disabled or initialized:
1210
*
1211
* In non-compacted format, disabled features still occupy
1212
* state space but there is no state to copy from in the
1213
* compacted init_fpstate. The gap tracking will zero these
1214
* states.
1215
*
1216
* The extended features have an all zeroes init state. Thus,
1217
* remove them from 'mask' to zero those features in the user
1218
* buffer instead of retrieving them from init_fpstate.
1219
*/
1220
mask = header.xfeatures;
1221
1222
for_each_extended_xfeature_in_order(i, mask) {
1223
xfeature = xfeature_uncompact_order[i];
1224
/*
1225
* If there was a feature or alignment gap, zero the space
1226
* in the destination buffer.
1227
*/
1228
if (zerofrom < xstate_offsets[xfeature])
1229
membuf_zero(&to, xstate_offsets[xfeature] - zerofrom);
1230
1231
if (xfeature == XFEATURE_PKRU) {
1232
struct pkru_state pkru = {0};
1233
/*
1234
* PKRU is not necessarily up to date in the
1235
* XSAVE buffer. Use the provided value.
1236
*/
1237
pkru.pkru = pkru_val;
1238
membuf_write(&to, &pkru, sizeof(pkru));
1239
} else {
1240
membuf_write(&to,
1241
__raw_xsave_addr(xsave, xfeature),
1242
xstate_sizes[xfeature]);
1243
}
1244
/*
1245
* Keep track of the last copied state in the non-compacted
1246
* target buffer for gap zeroing.
1247
*/
1248
zerofrom = xstate_offsets[xfeature] + xstate_sizes[xfeature];
1249
}
1250
1251
out:
1252
if (to.left)
1253
membuf_zero(&to, to.left);
1254
}
1255
1256
/**
1257
* copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1258
* @to: membuf descriptor
1259
* @tsk: The task from which to copy the saved xstate
1260
* @copy_mode: The requested copy mode
1261
*
1262
* Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1263
* format, i.e. from the kernel internal hardware dependent storage format
1264
* to the requested @mode. UABI XSTATE is always uncompacted!
1265
*
1266
* It supports partial copy but @to.pos always starts from zero.
1267
*/
1268
void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1269
enum xstate_copy_mode copy_mode)
1270
{
1271
__copy_xstate_to_uabi_buf(to, x86_task_fpu(tsk)->fpstate,
1272
x86_task_fpu(tsk)->fpstate->user_xfeatures,
1273
tsk->thread.pkru, copy_mode);
1274
}
1275
1276
static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1277
const void *kbuf, const void __user *ubuf)
1278
{
1279
if (kbuf) {
1280
memcpy(dst, kbuf + offset, size);
1281
} else {
1282
if (copy_from_user(dst, ubuf + offset, size))
1283
return -EFAULT;
1284
}
1285
return 0;
1286
}
1287
1288
1289
/**
1290
* copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
1291
* @fpstate: The fpstate buffer to copy to
1292
* @kbuf: The UABI format buffer, if it comes from the kernel
1293
* @ubuf: The UABI format buffer, if it comes from userspace
1294
* @pkru: The location to write the PKRU value to
1295
*
1296
* Converts from the UABI format into the kernel internal hardware
1297
* dependent format.
1298
*
1299
* This function ultimately has three different callers with distinct PKRU
1300
* behavior.
1301
* 1. When called from sigreturn the PKRU register will be restored from
1302
* @fpstate via an XRSTOR. Correctly copying the UABI format buffer to
1303
* @fpstate is sufficient to cover this case, but the caller will also
1304
* pass a pointer to the thread_struct's pkru field in @pkru and updating
1305
* it is harmless.
1306
* 2. When called from ptrace the PKRU register will be restored from the
1307
* thread_struct's pkru field. A pointer to that is passed in @pkru.
1308
* The kernel will restore it manually, so the XRSTOR behavior that resets
1309
* the PKRU register to the hardware init value (0) if the corresponding
1310
* xfeatures bit is not set is emulated here.
1311
* 3. When called from KVM the PKRU register will be restored from the vcpu's
1312
* pkru field. A pointer to that is passed in @pkru. KVM hasn't used
1313
* XRSTOR and hasn't had the PKRU resetting behavior described above. To
1314
* preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
1315
* bit is not set.
1316
*/
1317
static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
1318
const void __user *ubuf, u32 *pkru)
1319
{
1320
struct xregs_state *xsave = &fpstate->regs.xsave;
1321
unsigned int offset, size;
1322
struct xstate_header hdr;
1323
u64 mask;
1324
int i;
1325
1326
offset = offsetof(struct xregs_state, header);
1327
if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
1328
return -EFAULT;
1329
1330
if (validate_user_xstate_header(&hdr, fpstate))
1331
return -EINVAL;
1332
1333
/* Validate MXCSR when any of the related features is in use */
1334
mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1335
if (hdr.xfeatures & mask) {
1336
u32 mxcsr[2];
1337
1338
offset = offsetof(struct fxregs_state, mxcsr);
1339
if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1340
return -EFAULT;
1341
1342
/* Reserved bits in MXCSR must be zero. */
1343
if (mxcsr[0] & ~mxcsr_feature_mask)
1344
return -EINVAL;
1345
1346
/* SSE and YMM require MXCSR even when FP is not in use. */
1347
if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1348
xsave->i387.mxcsr = mxcsr[0];
1349
xsave->i387.mxcsr_mask = mxcsr[1];
1350
}
1351
}
1352
1353
for (i = 0; i < XFEATURE_MAX; i++) {
1354
mask = BIT_ULL(i);
1355
1356
if (hdr.xfeatures & mask) {
1357
void *dst = __raw_xsave_addr(xsave, i);
1358
1359
offset = xstate_offsets[i];
1360
size = xstate_sizes[i];
1361
1362
if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1363
return -EFAULT;
1364
}
1365
}
1366
1367
if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
1368
struct pkru_state *xpkru;
1369
1370
xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
1371
*pkru = xpkru->pkru;
1372
} else {
1373
/*
1374
* KVM may pass NULL here to indicate that it does not need
1375
* PKRU updated.
1376
*/
1377
if (pkru)
1378
*pkru = 0;
1379
}
1380
1381
/*
1382
* The state that came in from userspace was user-state only.
1383
* Mask all the user states out of 'xfeatures':
1384
*/
1385
xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
1386
1387
/*
1388
* Add back in the features that came in from userspace:
1389
*/
1390
xsave->header.xfeatures |= hdr.xfeatures;
1391
1392
return 0;
1393
}
1394
1395
/*
1396
* Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1397
* format and copy to the target thread. Used by ptrace and KVM.
1398
*/
1399
int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
1400
{
1401
return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
1402
}
1403
1404
/*
1405
* Convert from a sigreturn standard-format user-space buffer to kernel
1406
* XSAVE[S] format and copy to the target thread. This is called from the
1407
* sigreturn() and rt_sigreturn() system calls.
1408
*/
1409
int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
1410
const void __user *ubuf)
1411
{
1412
return copy_uabi_to_xstate(x86_task_fpu(tsk)->fpstate, NULL, ubuf, &tsk->thread.pkru);
1413
}
1414
1415
static bool validate_independent_components(u64 mask)
1416
{
1417
u64 xchk;
1418
1419
if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1420
return false;
1421
1422
xchk = ~xfeatures_mask_independent();
1423
1424
if (WARN_ON_ONCE(!mask || mask & xchk))
1425
return false;
1426
1427
return true;
1428
}
1429
1430
/**
1431
* xsaves - Save selected components to a kernel xstate buffer
1432
* @xstate: Pointer to the buffer
1433
* @mask: Feature mask to select the components to save
1434
*
1435
* The @xstate buffer must be 64 byte aligned and correctly initialized as
1436
* XSAVES does not write the full xstate header. Before first use the
1437
* buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1438
* can #GP.
1439
*
1440
* The feature mask must be a subset of the independent features.
1441
*/
1442
void xsaves(struct xregs_state *xstate, u64 mask)
1443
{
1444
int err;
1445
1446
if (!validate_independent_components(mask))
1447
return;
1448
1449
XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1450
WARN_ON_ONCE(err);
1451
}
1452
1453
/**
1454
* xrstors - Restore selected components from a kernel xstate buffer
1455
* @xstate: Pointer to the buffer
1456
* @mask: Feature mask to select the components to restore
1457
*
1458
* The @xstate buffer must be 64 byte aligned and correctly initialized
1459
* otherwise XRSTORS from that buffer can #GP.
1460
*
1461
* Proper usage is to restore the state which was saved with
1462
* xsaves() into @xstate.
1463
*
1464
* The feature mask must be a subset of the independent features.
1465
*/
1466
void xrstors(struct xregs_state *xstate, u64 mask)
1467
{
1468
int err;
1469
1470
if (!validate_independent_components(mask))
1471
return;
1472
1473
XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1474
WARN_ON_ONCE(err);
1475
}
1476
1477
#if IS_ENABLED(CONFIG_KVM)
1478
void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature)
1479
{
1480
void *addr = get_xsave_addr(&fpstate->regs.xsave, xfeature);
1481
1482
if (addr)
1483
memset(addr, 0, xstate_sizes[xfeature]);
1484
}
1485
EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
1486
#endif
1487
1488
#ifdef CONFIG_X86_64
1489
1490
#ifdef CONFIG_X86_DEBUG_FPU
1491
/*
1492
* Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
1493
* can safely operate on the @fpstate buffer.
1494
*/
1495
static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
1496
{
1497
u64 xfd = __this_cpu_read(xfd_state);
1498
1499
if (fpstate->xfd == xfd)
1500
return true;
1501
1502
/*
1503
* The XFD MSR does not match fpstate->xfd. That's invalid when
1504
* the passed in fpstate is current's fpstate.
1505
*/
1506
if (fpstate->xfd == x86_task_fpu(current)->fpstate->xfd)
1507
return false;
1508
1509
/*
1510
* XRSTOR(S) from init_fpstate are always correct as it will just
1511
* bring all components into init state and not read from the
1512
* buffer. XSAVE(S) raises #PF after init.
1513
*/
1514
if (fpstate == &init_fpstate)
1515
return rstor;
1516
1517
/*
1518
* XSAVE(S): clone(), fpu_swap_kvm_fpstate()
1519
* XRSTORS(S): fpu_swap_kvm_fpstate()
1520
*/
1521
1522
/*
1523
* No XSAVE/XRSTOR instructions (except XSAVE itself) touch
1524
* the buffer area for XFD-disabled state components.
1525
*/
1526
mask &= ~xfd;
1527
1528
/*
1529
* Remove features which are valid in fpstate. They
1530
* have space allocated in fpstate.
1531
*/
1532
mask &= ~fpstate->xfeatures;
1533
1534
/*
1535
* Any remaining state components in 'mask' might be written
1536
* by XSAVE/XRSTOR. Fail validation it found.
1537
*/
1538
return !mask;
1539
}
1540
1541
void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
1542
{
1543
WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
1544
}
1545
#endif /* CONFIG_X86_DEBUG_FPU */
1546
1547
static int __init xfd_update_static_branch(void)
1548
{
1549
/*
1550
* If init_fpstate.xfd has bits set then dynamic features are
1551
* available and the dynamic sizing must be enabled.
1552
*/
1553
if (init_fpstate.xfd)
1554
static_branch_enable(&__fpu_state_size_dynamic);
1555
return 0;
1556
}
1557
arch_initcall(xfd_update_static_branch)
1558
1559
void fpstate_free(struct fpu *fpu)
1560
{
1561
if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1562
vfree(fpu->fpstate);
1563
}
1564
1565
/**
1566
* fpstate_realloc - Reallocate struct fpstate for the requested new features
1567
*
1568
* @xfeatures: A bitmap of xstate features which extend the enabled features
1569
* of that task
1570
* @ksize: The required size for the kernel buffer
1571
* @usize: The required size for user space buffers
1572
* @guest_fpu: Pointer to a guest FPU container. NULL for host allocations
1573
*
1574
* Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1575
* terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1576
* with large states are likely to live longer.
1577
*
1578
* Returns: 0 on success, -ENOMEM on allocation error.
1579
*/
1580
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1581
unsigned int usize, struct fpu_guest *guest_fpu)
1582
{
1583
struct fpu *fpu = x86_task_fpu(current);
1584
struct fpstate *curfps, *newfps = NULL;
1585
unsigned int fpsize;
1586
bool in_use;
1587
1588
fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1589
1590
newfps = vzalloc(fpsize);
1591
if (!newfps)
1592
return -ENOMEM;
1593
newfps->size = ksize;
1594
newfps->user_size = usize;
1595
newfps->is_valloc = true;
1596
1597
/*
1598
* When a guest FPU is supplied, use @guest_fpu->fpstate
1599
* as reference independent whether it is in use or not.
1600
*/
1601
curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1602
1603
/* Determine whether @curfps is the active fpstate */
1604
in_use = fpu->fpstate == curfps;
1605
1606
if (guest_fpu) {
1607
newfps->is_guest = true;
1608
newfps->is_confidential = curfps->is_confidential;
1609
newfps->in_use = curfps->in_use;
1610
guest_fpu->xfeatures |= xfeatures;
1611
guest_fpu->uabi_size = usize;
1612
}
1613
1614
fpregs_lock();
1615
/*
1616
* If @curfps is in use, ensure that the current state is in the
1617
* registers before swapping fpstate as that might invalidate it
1618
* due to layout changes.
1619
*/
1620
if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1621
fpregs_restore_userregs();
1622
1623
newfps->xfeatures = curfps->xfeatures | xfeatures;
1624
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1625
newfps->xfd = curfps->xfd & ~xfeatures;
1626
1627
/* Do the final updates within the locked region */
1628
xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1629
1630
if (guest_fpu) {
1631
guest_fpu->fpstate = newfps;
1632
/* If curfps is active, update the FPU fpstate pointer */
1633
if (in_use)
1634
fpu->fpstate = newfps;
1635
} else {
1636
fpu->fpstate = newfps;
1637
}
1638
1639
if (in_use)
1640
xfd_update_state(fpu->fpstate);
1641
fpregs_unlock();
1642
1643
/* Only free valloc'ed state */
1644
if (curfps && curfps->is_valloc)
1645
vfree(curfps);
1646
1647
return 0;
1648
}
1649
1650
static int validate_sigaltstack(unsigned int usize)
1651
{
1652
struct task_struct *thread, *leader = current->group_leader;
1653
unsigned long framesize = get_sigframe_size();
1654
1655
lockdep_assert_held(&current->sighand->siglock);
1656
1657
/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1658
framesize -= fpu_user_cfg.max_size;
1659
framesize += usize;
1660
for_each_thread(leader, thread) {
1661
if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1662
return -ENOSPC;
1663
}
1664
return 0;
1665
}
1666
1667
static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1668
{
1669
/*
1670
* This deliberately does not exclude !XSAVES as we still might
1671
* decide to optionally context switch XCR0 or talk the silicon
1672
* vendors into extending XFD for the pre AMX states, especially
1673
* AVX512.
1674
*/
1675
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
1676
struct fpu *fpu = x86_task_fpu(current->group_leader);
1677
struct fpu_state_perm *perm;
1678
unsigned int ksize, usize;
1679
u64 mask;
1680
int ret = 0;
1681
1682
/* Check whether fully enabled */
1683
if ((permitted & requested) == requested)
1684
return 0;
1685
1686
/*
1687
* Calculate the resulting kernel state size. Note, @permitted also
1688
* contains supervisor xfeatures even though supervisor are always
1689
* permitted for kernel and guest FPUs, and never permitted for user
1690
* FPUs.
1691
*/
1692
mask = permitted | requested;
1693
ksize = xstate_calculate_size(mask, compacted);
1694
1695
/*
1696
* Calculate the resulting user state size. Take care not to clobber
1697
* the supervisor xfeatures in the new mask!
1698
*/
1699
usize = xstate_calculate_size(mask & XFEATURE_MASK_USER_SUPPORTED, false);
1700
1701
if (!guest) {
1702
ret = validate_sigaltstack(usize);
1703
if (ret)
1704
return ret;
1705
}
1706
1707
perm = guest ? &fpu->guest_perm : &fpu->perm;
1708
/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1709
WRITE_ONCE(perm->__state_perm, mask);
1710
/* Protected by sighand lock */
1711
perm->__state_size = ksize;
1712
perm->__user_state_size = usize;
1713
return ret;
1714
}
1715
1716
/*
1717
* Permissions array to map facilities with more than one component
1718
*/
1719
static const u64 xstate_prctl_req[XFEATURE_MAX] = {
1720
[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1721
};
1722
1723
static int xstate_request_perm(unsigned long idx, bool guest)
1724
{
1725
u64 permitted, requested;
1726
int ret;
1727
1728
if (idx >= XFEATURE_MAX)
1729
return -EINVAL;
1730
1731
/*
1732
* Look up the facility mask which can require more than
1733
* one xstate component.
1734
*/
1735
idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1736
requested = xstate_prctl_req[idx];
1737
if (!requested)
1738
return -EOPNOTSUPP;
1739
1740
if ((fpu_user_cfg.max_features & requested) != requested)
1741
return -EOPNOTSUPP;
1742
1743
/* Lockless quick check */
1744
permitted = xstate_get_group_perm(guest);
1745
if ((permitted & requested) == requested)
1746
return 0;
1747
1748
/* Protect against concurrent modifications */
1749
spin_lock_irq(&current->sighand->siglock);
1750
permitted = xstate_get_group_perm(guest);
1751
1752
/* First vCPU allocation locks the permissions. */
1753
if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1754
ret = -EBUSY;
1755
else
1756
ret = __xstate_request_perm(permitted, requested, guest);
1757
spin_unlock_irq(&current->sighand->siglock);
1758
return ret;
1759
}
1760
1761
int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1762
{
1763
u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1764
struct fpu_state_perm *perm;
1765
unsigned int ksize, usize;
1766
struct fpu *fpu;
1767
1768
if (!xfd_event) {
1769
if (!guest_fpu)
1770
pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1771
return 0;
1772
}
1773
1774
/* Protect against concurrent modifications */
1775
spin_lock_irq(&current->sighand->siglock);
1776
1777
/* If not permitted let it die */
1778
if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1779
spin_unlock_irq(&current->sighand->siglock);
1780
return -EPERM;
1781
}
1782
1783
fpu = x86_task_fpu(current->group_leader);
1784
perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1785
ksize = perm->__state_size;
1786
usize = perm->__user_state_size;
1787
1788
/*
1789
* The feature is permitted. State size is sufficient. Dropping
1790
* the lock is safe here even if more features are added from
1791
* another task, the retrieved buffer sizes are valid for the
1792
* currently requested feature(s).
1793
*/
1794
spin_unlock_irq(&current->sighand->siglock);
1795
1796
/*
1797
* Try to allocate a new fpstate. If that fails there is no way
1798
* out.
1799
*/
1800
if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1801
return -EFAULT;
1802
return 0;
1803
}
1804
1805
int xfd_enable_feature(u64 xfd_err)
1806
{
1807
return __xfd_enable_feature(xfd_err, NULL);
1808
}
1809
1810
#else /* CONFIG_X86_64 */
1811
static inline int xstate_request_perm(unsigned long idx, bool guest)
1812
{
1813
return -EPERM;
1814
}
1815
#endif /* !CONFIG_X86_64 */
1816
1817
u64 xstate_get_guest_group_perm(void)
1818
{
1819
return xstate_get_group_perm(true);
1820
}
1821
EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
1822
1823
/**
1824
* fpu_xstate_prctl - xstate permission operations
1825
* @option: A subfunction of arch_prctl()
1826
* @arg2: option argument
1827
* Return: 0 if successful; otherwise, an error code
1828
*
1829
* Option arguments:
1830
*
1831
* ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1832
* ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1833
* ARCH_REQ_XCOMP_PERM: Facility number requested
1834
*
1835
* For facilities which require more than one XSTATE component, the request
1836
* must be the highest state component number related to that facility,
1837
* e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1838
* XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1839
*/
1840
long fpu_xstate_prctl(int option, unsigned long arg2)
1841
{
1842
u64 __user *uptr = (u64 __user *)arg2;
1843
u64 permitted, supported;
1844
unsigned long idx = arg2;
1845
bool guest = false;
1846
1847
switch (option) {
1848
case ARCH_GET_XCOMP_SUPP:
1849
supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
1850
return put_user(supported, uptr);
1851
1852
case ARCH_GET_XCOMP_PERM:
1853
/*
1854
* Lockless snapshot as it can also change right after the
1855
* dropping the lock.
1856
*/
1857
permitted = xstate_get_host_group_perm();
1858
permitted &= XFEATURE_MASK_USER_SUPPORTED;
1859
return put_user(permitted, uptr);
1860
1861
case ARCH_GET_XCOMP_GUEST_PERM:
1862
permitted = xstate_get_guest_group_perm();
1863
permitted &= XFEATURE_MASK_USER_SUPPORTED;
1864
return put_user(permitted, uptr);
1865
1866
case ARCH_REQ_XCOMP_GUEST_PERM:
1867
guest = true;
1868
fallthrough;
1869
1870
case ARCH_REQ_XCOMP_PERM:
1871
if (!IS_ENABLED(CONFIG_X86_64))
1872
return -EOPNOTSUPP;
1873
1874
return xstate_request_perm(idx, guest);
1875
1876
default:
1877
return -EINVAL;
1878
}
1879
}
1880
1881
#ifdef CONFIG_PROC_PID_ARCH_STATUS
1882
/*
1883
* Report the amount of time elapsed in millisecond since last AVX512
1884
* use in the task. Report -1 if no AVX-512 usage.
1885
*/
1886
static void avx512_status(struct seq_file *m, struct task_struct *task)
1887
{
1888
unsigned long timestamp;
1889
long delta = -1;
1890
1891
/* AVX-512 usage is not tracked for kernel threads. Don't report anything. */
1892
if (task->flags & (PF_KTHREAD | PF_USER_WORKER))
1893
return;
1894
1895
timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp);
1896
1897
if (timestamp) {
1898
delta = (long)(jiffies - timestamp);
1899
/*
1900
* Cap to LONG_MAX if time difference > LONG_MAX
1901
*/
1902
if (delta < 0)
1903
delta = LONG_MAX;
1904
delta = jiffies_to_msecs(delta);
1905
}
1906
1907
seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
1908
seq_putc(m, '\n');
1909
}
1910
1911
/*
1912
* Report architecture specific information
1913
*/
1914
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
1915
struct pid *pid, struct task_struct *task)
1916
{
1917
/*
1918
* Report AVX512 state if the processor and build option supported.
1919
*/
1920
if (cpu_feature_enabled(X86_FEATURE_AVX512F))
1921
avx512_status(m, task);
1922
1923
return 0;
1924
}
1925
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
1926
1927
#ifdef CONFIG_COREDUMP
1928
static const char owner_name[] = "LINUX";
1929
1930
/*
1931
* Dump type, size, offset and flag values for every xfeature that is present.
1932
*/
1933
static int dump_xsave_layout_desc(struct coredump_params *cprm)
1934
{
1935
int num_records = 0;
1936
int i;
1937
1938
for_each_extended_xfeature(i, fpu_user_cfg.max_features) {
1939
struct x86_xfeat_component xc = {
1940
.type = i,
1941
.size = xstate_sizes[i],
1942
.offset = xstate_offsets[i],
1943
/* reserved for future use */
1944
.flags = 0,
1945
};
1946
1947
if (!dump_emit(cprm, &xc, sizeof(xc)))
1948
return 0;
1949
1950
num_records++;
1951
}
1952
return num_records;
1953
}
1954
1955
static u32 get_xsave_desc_size(void)
1956
{
1957
u32 cnt = 0;
1958
u32 i;
1959
1960
for_each_extended_xfeature(i, fpu_user_cfg.max_features)
1961
cnt++;
1962
1963
return cnt * (sizeof(struct x86_xfeat_component));
1964
}
1965
1966
int elf_coredump_extra_notes_write(struct coredump_params *cprm)
1967
{
1968
int num_records = 0;
1969
struct elf_note en;
1970
1971
if (!fpu_user_cfg.max_features)
1972
return 0;
1973
1974
en.n_namesz = sizeof(owner_name);
1975
en.n_descsz = get_xsave_desc_size();
1976
en.n_type = NT_X86_XSAVE_LAYOUT;
1977
1978
if (!dump_emit(cprm, &en, sizeof(en)))
1979
return 1;
1980
if (!dump_emit(cprm, owner_name, en.n_namesz))
1981
return 1;
1982
if (!dump_align(cprm, 4))
1983
return 1;
1984
1985
num_records = dump_xsave_layout_desc(cprm);
1986
if (!num_records)
1987
return 1;
1988
1989
/* Total size should be equal to the number of records */
1990
if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz)
1991
return 1;
1992
1993
return 0;
1994
}
1995
1996
int elf_coredump_extra_notes_size(void)
1997
{
1998
int size;
1999
2000
if (!fpu_user_cfg.max_features)
2001
return 0;
2002
2003
/* .note header */
2004
size = sizeof(struct elf_note);
2005
/* Name plus alignment to 4 bytes */
2006
size += roundup(sizeof(owner_name), 4);
2007
size += get_xsave_desc_size();
2008
2009
return size;
2010
}
2011
#endif /* CONFIG_COREDUMP */
2012
2013