Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/fpu/xstate.c
50082 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* xsave/xrstor support.
4
*
5
* Author: Suresh Siddha <[email protected]>
6
*/
7
#include <linux/bitops.h>
8
#include <linux/compat.h>
9
#include <linux/cpu.h>
10
#include <linux/mman.h>
11
#include <linux/kvm_types.h>
12
#include <linux/nospec.h>
13
#include <linux/pkeys.h>
14
#include <linux/seq_file.h>
15
#include <linux/proc_fs.h>
16
#include <linux/vmalloc.h>
17
#include <linux/coredump.h>
18
#include <linux/sort.h>
19
20
#include <asm/fpu/api.h>
21
#include <asm/fpu/regset.h>
22
#include <asm/fpu/signal.h>
23
#include <asm/fpu/xcr.h>
24
25
#include <asm/cpuid/api.h>
26
#include <asm/msr.h>
27
#include <asm/tlbflush.h>
28
#include <asm/prctl.h>
29
#include <asm/elf.h>
30
31
#include <uapi/asm/elf.h>
32
33
#include "context.h"
34
#include "internal.h"
35
#include "legacy.h"
36
#include "xstate.h"
37
38
#define for_each_extended_xfeature(bit, mask) \
39
(bit) = FIRST_EXTENDED_XFEATURE; \
40
for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
41
42
/*
43
* Although we spell it out in here, the Processor Trace
44
* xfeature is completely unused. We use other mechanisms
45
* to save/restore PT state in Linux.
46
*/
47
static const char *xfeature_names[] =
48
{
49
"x87 floating point registers",
50
"SSE registers",
51
"AVX registers",
52
"MPX bounds registers",
53
"MPX CSR",
54
"AVX-512 opmask",
55
"AVX-512 Hi256",
56
"AVX-512 ZMM_Hi256",
57
"Processor Trace (unused)",
58
"Protection Keys User registers",
59
"PASID state",
60
"Control-flow User registers",
61
"Control-flow Kernel registers (KVM only)",
62
"unknown xstate feature",
63
"unknown xstate feature",
64
"unknown xstate feature",
65
"unknown xstate feature",
66
"AMX Tile config",
67
"AMX Tile data",
68
"APX registers",
69
"unknown xstate feature",
70
};
71
72
static unsigned short xsave_cpuid_features[] __initdata = {
73
[XFEATURE_FP] = X86_FEATURE_FPU,
74
[XFEATURE_SSE] = X86_FEATURE_XMM,
75
[XFEATURE_YMM] = X86_FEATURE_AVX,
76
[XFEATURE_BNDREGS] = X86_FEATURE_MPX,
77
[XFEATURE_BNDCSR] = X86_FEATURE_MPX,
78
[XFEATURE_OPMASK] = X86_FEATURE_AVX512F,
79
[XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F,
80
[XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F,
81
[XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT,
82
[XFEATURE_PKRU] = X86_FEATURE_OSPKE,
83
[XFEATURE_PASID] = X86_FEATURE_ENQCMD,
84
[XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
85
[XFEATURE_CET_KERNEL] = X86_FEATURE_SHSTK,
86
[XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
87
[XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
88
[XFEATURE_APX] = X86_FEATURE_APX,
89
};
90
91
static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
92
{ [ 0 ... XFEATURE_MAX - 1] = -1};
93
static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
94
{ [ 0 ... XFEATURE_MAX - 1] = -1};
95
static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
96
97
/*
98
* Ordering of xstate components in uncompacted format: The xfeature
99
* number does not necessarily indicate its position in the XSAVE buffer.
100
* This array defines the traversal order of xstate features.
101
*/
102
static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init =
103
{ [ 0 ... XFEATURE_MAX - 1] = -1};
104
105
static inline unsigned int next_xfeature_order(unsigned int i, u64 mask)
106
{
107
for (; xfeature_uncompact_order[i] != -1; i++) {
108
if (mask & BIT_ULL(xfeature_uncompact_order[i]))
109
break;
110
}
111
112
return i;
113
}
114
115
/* Iterate xstate features in uncompacted order: */
116
#define for_each_extended_xfeature_in_order(i, mask) \
117
for (i = 0; \
118
i = next_xfeature_order(i, mask), \
119
xfeature_uncompact_order[i] != -1; \
120
i++)
121
122
#define XSTATE_FLAG_SUPERVISOR BIT(0)
123
#define XSTATE_FLAG_ALIGNED64 BIT(1)
124
125
/*
126
* Return whether the system supports a given xfeature.
127
*
128
* Also return the name of the (most advanced) feature that the caller requested:
129
*/
130
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
131
{
132
u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
133
134
if (unlikely(feature_name)) {
135
long xfeature_idx, max_idx;
136
u64 xfeatures_print;
137
/*
138
* So we use FLS here to be able to print the most advanced
139
* feature that was requested but is missing. So if a driver
140
* asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
141
* missing AVX feature - this is the most informative message
142
* to users:
143
*/
144
if (xfeatures_missing)
145
xfeatures_print = xfeatures_missing;
146
else
147
xfeatures_print = xfeatures_needed;
148
149
xfeature_idx = fls64(xfeatures_print)-1;
150
max_idx = ARRAY_SIZE(xfeature_names)-1;
151
xfeature_idx = min(xfeature_idx, max_idx);
152
153
*feature_name = xfeature_names[xfeature_idx];
154
}
155
156
if (xfeatures_missing)
157
return 0;
158
159
return 1;
160
}
161
EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
162
163
static bool xfeature_is_aligned64(int xfeature_nr)
164
{
165
return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
166
}
167
168
static bool xfeature_is_supervisor(int xfeature_nr)
169
{
170
return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
171
}
172
173
static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
174
{
175
unsigned int offs, i;
176
177
/*
178
* Non-compacted format and legacy features use the cached fixed
179
* offsets.
180
*/
181
if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
182
xfeature <= XFEATURE_SSE)
183
return xstate_offsets[xfeature];
184
185
/*
186
* Compacted format offsets depend on the actual content of the
187
* compacted xsave area which is determined by the xcomp_bv header
188
* field.
189
*/
190
offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
191
for_each_extended_xfeature(i, xcomp_bv) {
192
if (xfeature_is_aligned64(i))
193
offs = ALIGN(offs, 64);
194
if (i == xfeature)
195
break;
196
offs += xstate_sizes[i];
197
}
198
return offs;
199
}
200
201
/*
202
* Enable the extended processor state save/restore feature.
203
* Called once per CPU onlining.
204
*/
205
void fpu__init_cpu_xstate(void)
206
{
207
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
208
return;
209
210
cr4_set_bits(X86_CR4_OSXSAVE);
211
212
/*
213
* Must happen after CR4 setup and before xsetbv() to allow KVM
214
* lazy passthrough. Write independent of the dynamic state static
215
* key as that does not work on the boot CPU. This also ensures
216
* that any stale state is wiped out from XFD. Reset the per CPU
217
* xfd cache too.
218
*/
219
if (cpu_feature_enabled(X86_FEATURE_XFD))
220
xfd_set_state(init_fpstate.xfd);
221
222
/*
223
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
224
* managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
225
* states can be set here.
226
*/
227
xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
228
229
/*
230
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
231
*/
232
if (boot_cpu_has(X86_FEATURE_XSAVES)) {
233
wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() |
234
xfeatures_mask_independent());
235
}
236
}
237
238
static bool xfeature_enabled(enum xfeature xfeature)
239
{
240
return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
241
}
242
243
static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2)
244
{
245
return xstate_offsets[*(unsigned int *)xfeature1] -
246
xstate_offsets[*(unsigned int *)xfeature2];
247
}
248
249
/*
250
* Record the offsets and sizes of various xstates contained
251
* in the XSAVE state memory layout. Also, create an ordered
252
* list of xfeatures for handling out-of-order offsets.
253
*/
254
static void __init setup_xstate_cache(void)
255
{
256
u32 eax, ebx, ecx, edx, xfeature, i = 0;
257
/*
258
* The FP xstates and SSE xstates are legacy states. They are always
259
* in the fixed offsets in the xsave area in either compacted form
260
* or standard form.
261
*/
262
xstate_offsets[XFEATURE_FP] = 0;
263
xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
264
xmm_space);
265
266
xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
267
xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
268
xmm_space);
269
270
for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) {
271
cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx);
272
273
xstate_sizes[xfeature] = eax;
274
xstate_flags[xfeature] = ecx;
275
276
/*
277
* If an xfeature is supervisor state, the offset in EBX is
278
* invalid, leave it to -1.
279
*/
280
if (xfeature_is_supervisor(xfeature))
281
continue;
282
283
xstate_offsets[xfeature] = ebx;
284
285
/* Populate the list of xfeatures before sorting */
286
xfeature_uncompact_order[i++] = xfeature;
287
}
288
289
/*
290
* Sort xfeatures by their offsets to support out-of-order
291
* offsets in the uncompacted format.
292
*/
293
sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL);
294
}
295
296
/*
297
* Print out all the supported xstate features:
298
*/
299
static void __init print_xstate_features(void)
300
{
301
int i;
302
303
for (i = 0; i < XFEATURE_MAX; i++) {
304
u64 mask = BIT_ULL(i);
305
const char *name;
306
307
if (cpu_has_xfeatures(mask, &name))
308
pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", mask, name);
309
}
310
}
311
312
/*
313
* This check is important because it is easy to get XSTATE_*
314
* confused with XSTATE_BIT_*.
315
*/
316
#define CHECK_XFEATURE(nr) do { \
317
WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \
318
WARN_ON(nr >= XFEATURE_MAX); \
319
} while (0)
320
321
/*
322
* Print out xstate component offsets and sizes
323
*/
324
static void __init print_xstate_offset_size(void)
325
{
326
int i;
327
328
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
329
pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
330
i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
331
i, xstate_sizes[i]);
332
}
333
}
334
335
/*
336
* This function is called only during boot time when x86 caps are not set
337
* up and alternative can not be used yet.
338
*/
339
static __init void os_xrstor_booting(struct xregs_state *xstate)
340
{
341
u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
342
u32 lmask = mask;
343
u32 hmask = mask >> 32;
344
int err;
345
346
if (cpu_feature_enabled(X86_FEATURE_XSAVES))
347
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
348
else
349
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
350
351
/*
352
* We should never fault when copying from a kernel buffer, and the FPU
353
* state we set at boot time should be valid.
354
*/
355
WARN_ON_FPU(err);
356
}
357
358
/*
359
* All supported features have either init state all zeros or are
360
* handled in setup_init_fpu() individually. This is an explicit
361
* feature list and does not use XFEATURE_MASK*SUPPORTED to catch
362
* newly added supported features at build time and make people
363
* actually look at the init state for the new feature.
364
*/
365
#define XFEATURES_INIT_FPSTATE_HANDLED \
366
(XFEATURE_MASK_FP | \
367
XFEATURE_MASK_SSE | \
368
XFEATURE_MASK_YMM | \
369
XFEATURE_MASK_OPMASK | \
370
XFEATURE_MASK_ZMM_Hi256 | \
371
XFEATURE_MASK_Hi16_ZMM | \
372
XFEATURE_MASK_PKRU | \
373
XFEATURE_MASK_BNDREGS | \
374
XFEATURE_MASK_BNDCSR | \
375
XFEATURE_MASK_PASID | \
376
XFEATURE_MASK_CET_USER | \
377
XFEATURE_MASK_CET_KERNEL | \
378
XFEATURE_MASK_XTILE | \
379
XFEATURE_MASK_APX)
380
381
/*
382
* setup the xstate image representing the init state
383
*/
384
static void __init setup_init_fpu_buf(void)
385
{
386
BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
387
XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
388
XFEATURES_INIT_FPSTATE_HANDLED);
389
390
if (!boot_cpu_has(X86_FEATURE_XSAVE))
391
return;
392
393
print_xstate_features();
394
395
xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
396
397
/*
398
* Init all the features state with header.xfeatures being 0x0
399
*/
400
os_xrstor_booting(&init_fpstate.regs.xsave);
401
402
/*
403
* All components are now in init state. Read the state back so
404
* that init_fpstate contains all non-zero init state. This only
405
* works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
406
* those use the init optimization which skips writing data for
407
* components in init state.
408
*
409
* XSAVE could be used, but that would require to reshuffle the
410
* data when XSAVEC/S is available because XSAVEC/S uses xstate
411
* compaction. But doing so is a pointless exercise because most
412
* components have an all zeros init state except for the legacy
413
* ones (FP and SSE). Those can be saved with FXSAVE into the
414
* legacy area. Adding new features requires to ensure that init
415
* state is all zeroes or if not to add the necessary handling
416
* here.
417
*/
418
fxsave(&init_fpstate.regs.fxsave);
419
}
420
421
int xfeature_size(int xfeature_nr)
422
{
423
u32 eax, ebx, ecx, edx;
424
425
CHECK_XFEATURE(xfeature_nr);
426
cpuid_count(CPUID_LEAF_XSTATE, xfeature_nr, &eax, &ebx, &ecx, &edx);
427
return eax;
428
}
429
430
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
431
static int validate_user_xstate_header(const struct xstate_header *hdr,
432
struct fpstate *fpstate)
433
{
434
/* No unknown or supervisor features may be set */
435
if (hdr->xfeatures & ~fpstate->user_xfeatures)
436
return -EINVAL;
437
438
/* Userspace must use the uncompacted format */
439
if (hdr->xcomp_bv)
440
return -EINVAL;
441
442
/*
443
* If 'reserved' is shrunken to add a new field, make sure to validate
444
* that new field here!
445
*/
446
BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
447
448
/* No reserved bits may be set */
449
if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
450
return -EINVAL;
451
452
return 0;
453
}
454
455
static void __init __xstate_dump_leaves(void)
456
{
457
int i;
458
u32 eax, ebx, ecx, edx;
459
static int should_dump = 1;
460
461
if (!should_dump)
462
return;
463
should_dump = 0;
464
/*
465
* Dump out a few leaves past the ones that we support
466
* just in case there are some goodies up there
467
*/
468
for (i = 0; i < XFEATURE_MAX + 10; i++) {
469
cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx);
470
pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
471
CPUID_LEAF_XSTATE, i, eax, ebx, ecx, edx);
472
}
473
}
474
475
#define XSTATE_WARN_ON(x, fmt, ...) do { \
476
if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \
477
__xstate_dump_leaves(); \
478
} \
479
} while (0)
480
481
#define XCHECK_SZ(sz, nr, __struct) ({ \
482
if (WARN_ONCE(sz != sizeof(__struct), \
483
"[%s]: struct is %zu bytes, cpu state %d bytes\n", \
484
xfeature_names[nr], sizeof(__struct), sz)) { \
485
__xstate_dump_leaves(); \
486
} \
487
true; \
488
})
489
490
491
/**
492
* check_xtile_data_against_struct - Check tile data state size.
493
*
494
* Calculate the state size by multiplying the single tile size which is
495
* recorded in a C struct, and the number of tiles that the CPU informs.
496
* Compare the provided size with the calculation.
497
*
498
* @size: The tile data state size
499
*
500
* Returns: 0 on success, -EINVAL on mismatch.
501
*/
502
static int __init check_xtile_data_against_struct(int size)
503
{
504
u32 max_palid, palid, state_size;
505
u32 eax, ebx, ecx, edx;
506
u16 max_tile;
507
508
/*
509
* Check the maximum palette id:
510
* eax: the highest numbered palette subleaf.
511
*/
512
cpuid_count(CPUID_LEAF_TILE, 0, &max_palid, &ebx, &ecx, &edx);
513
514
/*
515
* Cross-check each tile size and find the maximum number of
516
* supported tiles.
517
*/
518
for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
519
u16 tile_size, max;
520
521
/*
522
* Check the tile size info:
523
* eax[31:16]: bytes per title
524
* ebx[31:16]: the max names (or max number of tiles)
525
*/
526
cpuid_count(CPUID_LEAF_TILE, palid, &eax, &ebx, &edx, &edx);
527
tile_size = eax >> 16;
528
max = ebx >> 16;
529
530
if (tile_size != sizeof(struct xtile_data)) {
531
pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
532
__stringify(XFEATURE_XTILE_DATA),
533
sizeof(struct xtile_data), tile_size);
534
__xstate_dump_leaves();
535
return -EINVAL;
536
}
537
538
if (max > max_tile)
539
max_tile = max;
540
}
541
542
state_size = sizeof(struct xtile_data) * max_tile;
543
if (size != state_size) {
544
pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
545
__stringify(XFEATURE_XTILE_DATA), state_size, size);
546
__xstate_dump_leaves();
547
return -EINVAL;
548
}
549
return 0;
550
}
551
552
/*
553
* We have a C struct for each 'xstate'. We need to ensure
554
* that our software representation matches what the CPU
555
* tells us about the state's size.
556
*/
557
static bool __init check_xstate_against_struct(int nr)
558
{
559
/*
560
* Ask the CPU for the size of the state.
561
*/
562
int sz = xfeature_size(nr);
563
564
/*
565
* Match each CPU state with the corresponding software
566
* structure.
567
*/
568
switch (nr) {
569
case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct);
570
case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
571
case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
572
case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
573
case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
574
case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
575
case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state);
576
case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
577
case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
578
case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
579
case XFEATURE_CET_KERNEL: return XCHECK_SZ(sz, nr, struct cet_supervisor_state);
580
case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state);
581
case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
582
default:
583
XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
584
return false;
585
}
586
587
return true;
588
}
589
590
static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
591
{
592
unsigned int topmost = fls64(xfeatures) - 1;
593
unsigned int offset, i;
594
595
if (topmost <= XFEATURE_SSE)
596
return sizeof(struct xregs_state);
597
598
if (compacted) {
599
offset = xfeature_get_offset(xfeatures, topmost);
600
} else {
601
/* Walk through the xfeature order to pick the last */
602
for_each_extended_xfeature_in_order(i, xfeatures)
603
topmost = xfeature_uncompact_order[i];
604
offset = xstate_offsets[topmost];
605
}
606
607
return offset + xstate_sizes[topmost];
608
}
609
610
/*
611
* This essentially double-checks what the cpu told us about
612
* how large the XSAVE buffer needs to be. We are recalculating
613
* it to be safe.
614
*
615
* Independent XSAVE features allocate their own buffers and are not
616
* covered by these checks. Only the size of the buffer for task->fpu
617
* is checked here.
618
*/
619
static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
620
{
621
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
622
bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
623
unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
624
int i;
625
626
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
627
if (!check_xstate_against_struct(i))
628
return false;
629
/*
630
* Supervisor state components can be managed only by
631
* XSAVES.
632
*/
633
if (!xsaves && xfeature_is_supervisor(i)) {
634
XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
635
return false;
636
}
637
}
638
size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
639
XSTATE_WARN_ON(size != kernel_size,
640
"size %u != kernel_size %u\n", size, kernel_size);
641
return size == kernel_size;
642
}
643
644
/*
645
* Get total size of enabled xstates in XCR0 | IA32_XSS.
646
*
647
* Note the SDM's wording here. "sub-function 0" only enumerates
648
* the size of the *user* states. If we use it to size a buffer
649
* that we use 'XSAVES' on, we could potentially overflow the
650
* buffer because 'XSAVES' saves system states too.
651
*
652
* This also takes compaction into account. So this works for
653
* XSAVEC as well.
654
*/
655
static unsigned int __init get_compacted_size(void)
656
{
657
unsigned int eax, ebx, ecx, edx;
658
/*
659
* - CPUID function 0DH, sub-function 1:
660
* EBX enumerates the size (in bytes) required by
661
* the XSAVES instruction for an XSAVE area
662
* containing all the state components
663
* corresponding to bits currently set in
664
* XCR0 | IA32_XSS.
665
*
666
* When XSAVES is not available but XSAVEC is (virt), then there
667
* are no supervisor states, but XSAVEC still uses compacted
668
* format.
669
*/
670
cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx);
671
return ebx;
672
}
673
674
/*
675
* Get the total size of the enabled xstates without the independent supervisor
676
* features.
677
*/
678
static unsigned int __init get_xsave_compacted_size(void)
679
{
680
u64 mask = xfeatures_mask_independent();
681
unsigned int size;
682
683
if (!mask)
684
return get_compacted_size();
685
686
/* Disable independent features. */
687
wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor());
688
689
/*
690
* Ask the hardware what size is required of the buffer.
691
* This is the size required for the task->fpu buffer.
692
*/
693
size = get_compacted_size();
694
695
/* Re-enable independent features so XSAVES will work on them again. */
696
wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
697
698
return size;
699
}
700
701
static unsigned int __init get_xsave_size_user(void)
702
{
703
unsigned int eax, ebx, ecx, edx;
704
/*
705
* - CPUID function 0DH, sub-function 0:
706
* EBX enumerates the size (in bytes) required by
707
* the XSAVE instruction for an XSAVE area
708
* containing all the *user* state components
709
* corresponding to bits currently set in XCR0.
710
*/
711
cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx);
712
return ebx;
713
}
714
715
static int __init init_xstate_size(void)
716
{
717
/* Recompute the context size for enabled features: */
718
unsigned int user_size, kernel_size, kernel_default_size;
719
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
720
721
/* Uncompacted user space size */
722
user_size = get_xsave_size_user();
723
724
/*
725
* XSAVES kernel size includes supervisor states and uses compacted
726
* format. XSAVEC uses compacted format, but does not save
727
* supervisor states.
728
*
729
* XSAVE[OPT] do not support supervisor states so kernel and user
730
* size is identical.
731
*/
732
if (compacted)
733
kernel_size = get_xsave_compacted_size();
734
else
735
kernel_size = user_size;
736
737
kernel_default_size =
738
xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
739
740
if (!paranoid_xstate_size_valid(kernel_size))
741
return -EINVAL;
742
743
fpu_kernel_cfg.max_size = kernel_size;
744
fpu_user_cfg.max_size = user_size;
745
746
fpu_kernel_cfg.default_size = kernel_default_size;
747
fpu_user_cfg.default_size =
748
xstate_calculate_size(fpu_user_cfg.default_features, false);
749
750
guest_default_cfg.size =
751
xstate_calculate_size(guest_default_cfg.features, compacted);
752
753
return 0;
754
}
755
756
/*
757
* We enabled the XSAVE hardware, but something went wrong and
758
* we can not use it. Disable it.
759
*/
760
static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
761
{
762
pr_info("x86/fpu: XSAVE disabled\n");
763
764
fpu_kernel_cfg.max_features = 0;
765
cr4_clear_bits(X86_CR4_OSXSAVE);
766
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
767
768
/* Restore the legacy size.*/
769
fpu_kernel_cfg.max_size = legacy_size;
770
fpu_kernel_cfg.default_size = legacy_size;
771
fpu_user_cfg.max_size = legacy_size;
772
fpu_user_cfg.default_size = legacy_size;
773
guest_default_cfg.size = legacy_size;
774
775
/*
776
* Prevent enabling the static branch which enables writes to the
777
* XFD MSR.
778
*/
779
init_fpstate.xfd = 0;
780
781
fpstate_reset(x86_task_fpu(current));
782
}
783
784
static u64 __init host_default_mask(void)
785
{
786
/*
787
* Exclude dynamic features (require userspace opt-in) and features
788
* that are supported only for KVM guests.
789
*/
790
return ~((u64)XFEATURE_MASK_USER_DYNAMIC | XFEATURE_MASK_GUEST_SUPERVISOR);
791
}
792
793
static u64 __init guest_default_mask(void)
794
{
795
/*
796
* Exclude dynamic features, which require userspace opt-in even
797
* for KVM guests.
798
*/
799
return ~(u64)XFEATURE_MASK_USER_DYNAMIC;
800
}
801
802
/*
803
* Enable and initialize the xsave feature.
804
* Called once per system bootup.
805
*/
806
void __init fpu__init_system_xstate(unsigned int legacy_size)
807
{
808
unsigned int eax, ebx, ecx, edx;
809
u64 xfeatures;
810
int err;
811
int i;
812
813
if (!boot_cpu_has(X86_FEATURE_FPU)) {
814
pr_info("x86/fpu: No FPU detected\n");
815
return;
816
}
817
818
if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
819
pr_info("x86/fpu: x87 FPU will use %s\n",
820
boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
821
return;
822
}
823
824
/*
825
* Find user xstates supported by the processor.
826
*/
827
cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx);
828
fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
829
830
/*
831
* Find supervisor xstates supported by the processor.
832
*/
833
cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx);
834
fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
835
836
if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
837
/*
838
* This indicates that something really unexpected happened
839
* with the enumeration. Disable XSAVE and try to continue
840
* booting without it. This is too early to BUG().
841
*/
842
pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
843
fpu_kernel_cfg.max_features);
844
goto out_disable;
845
}
846
847
if (fpu_kernel_cfg.max_features & XFEATURE_MASK_APX &&
848
fpu_kernel_cfg.max_features & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) {
849
/*
850
* This is a problematic CPU configuration where two
851
* conflicting state components are both enumerated.
852
*/
853
pr_err("x86/fpu: Both APX/MPX present in the CPU's xstate features: 0x%llx.\n",
854
fpu_kernel_cfg.max_features);
855
goto out_disable;
856
}
857
858
fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
859
XFEATURE_MASK_INDEPENDENT;
860
861
/*
862
* Clear XSAVE features that are disabled in the normal CPUID.
863
*/
864
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
865
unsigned short cid = xsave_cpuid_features[i];
866
867
/* Careful: X86_FEATURE_FPU is 0! */
868
if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
869
fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
870
}
871
872
if (!cpu_feature_enabled(X86_FEATURE_XFD))
873
fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
874
875
if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
876
fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
877
else
878
fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
879
XFEATURE_MASK_SUPERVISOR_SUPPORTED;
880
881
fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
882
fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
883
884
/*
885
* Now, given maximum feature set, determine default values by
886
* applying default masks.
887
*/
888
fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features & host_default_mask();
889
fpu_user_cfg.default_features = fpu_user_cfg.max_features & host_default_mask();
890
guest_default_cfg.features = fpu_kernel_cfg.max_features & guest_default_mask();
891
892
/* Store it for paranoia check at the end */
893
xfeatures = fpu_kernel_cfg.max_features;
894
895
/*
896
* Initialize the default XFD state in initfp_state and enable the
897
* dynamic sizing mechanism if dynamic states are available. The
898
* static key cannot be enabled here because this runs before
899
* jump_label_init(). This is delayed to an initcall.
900
*/
901
init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
902
903
/* Set up compaction feature bit */
904
if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
905
cpu_feature_enabled(X86_FEATURE_XSAVES))
906
setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
907
908
/* Enable xstate instructions to be able to continue with initialization: */
909
fpu__init_cpu_xstate();
910
911
/* Cache size, offset and flags for initialization */
912
setup_xstate_cache();
913
914
err = init_xstate_size();
915
if (err)
916
goto out_disable;
917
918
/*
919
* Update info used for ptrace frames; use standard-format size and no
920
* supervisor xstates:
921
*/
922
update_regset_xstate_info(fpu_user_cfg.max_size,
923
fpu_user_cfg.max_features);
924
925
/*
926
* init_fpstate excludes dynamic states as they are large but init
927
* state is zero.
928
*/
929
init_fpstate.size = fpu_kernel_cfg.default_size;
930
init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
931
932
if (init_fpstate.size > sizeof(init_fpstate.regs)) {
933
pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d)\n",
934
sizeof(init_fpstate.regs), init_fpstate.size);
935
goto out_disable;
936
}
937
938
setup_init_fpu_buf();
939
940
/*
941
* Paranoia check whether something in the setup modified the
942
* xfeatures mask.
943
*/
944
if (xfeatures != fpu_kernel_cfg.max_features) {
945
pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init\n",
946
xfeatures, fpu_kernel_cfg.max_features);
947
goto out_disable;
948
}
949
950
/*
951
* CPU capabilities initialization runs before FPU init. So
952
* X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
953
* functional, set the feature bit so depending code works.
954
*/
955
setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
956
957
print_xstate_offset_size();
958
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
959
fpu_kernel_cfg.max_features,
960
fpu_kernel_cfg.max_size,
961
boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
962
return;
963
964
out_disable:
965
/* something went wrong, try to boot without any XSAVE support */
966
fpu__init_disable_system_xstate(legacy_size);
967
}
968
969
/*
970
* Restore minimal FPU state after suspend:
971
*/
972
void fpu__resume_cpu(void)
973
{
974
/*
975
* Restore XCR0 on xsave capable CPUs:
976
*/
977
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
978
xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
979
980
/*
981
* Restore IA32_XSS. The same CPUID bit enumerates support
982
* of XSAVES and MSR_IA32_XSS.
983
*/
984
if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
985
wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() |
986
xfeatures_mask_independent());
987
}
988
989
if (fpu_state_size_dynamic())
990
wrmsrq(MSR_IA32_XFD, x86_task_fpu(current)->fpstate->xfd);
991
}
992
993
/*
994
* Given an xstate feature nr, calculate where in the xsave
995
* buffer the state is. Callers should ensure that the buffer
996
* is valid.
997
*/
998
static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
999
{
1000
u64 xcomp_bv = xsave->header.xcomp_bv;
1001
1002
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
1003
return NULL;
1004
1005
if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
1006
if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
1007
return NULL;
1008
}
1009
1010
return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
1011
}
1012
1013
/*
1014
* Given the xsave area and a state inside, this function returns the
1015
* address of the state.
1016
*
1017
* This is the API that is called to get xstate address in either
1018
* standard format or compacted format of xsave area.
1019
*
1020
* Note that if there is no data for the field in the xsave buffer
1021
* this will return NULL.
1022
*
1023
* Inputs:
1024
* xstate: the thread's storage area for all FPU data
1025
* xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
1026
* XFEATURE_SSE, etc...)
1027
* Output:
1028
* address of the state in the xsave area, or NULL if the
1029
* field is not present in the xsave buffer.
1030
*/
1031
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
1032
{
1033
/*
1034
* Do we even *have* xsave state?
1035
*/
1036
if (!boot_cpu_has(X86_FEATURE_XSAVE))
1037
return NULL;
1038
1039
/*
1040
* We should not ever be requesting features that we
1041
* have not enabled.
1042
*/
1043
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
1044
return NULL;
1045
1046
/*
1047
* This assumes the last 'xsave*' instruction to
1048
* have requested that 'xfeature_nr' be saved.
1049
* If it did not, we might be seeing and old value
1050
* of the field in the buffer.
1051
*
1052
* This can happen because the last 'xsave' did not
1053
* request that this feature be saved (unlikely)
1054
* or because the "init optimization" caused it
1055
* to not be saved.
1056
*/
1057
if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
1058
return NULL;
1059
1060
return __raw_xsave_addr(xsave, xfeature_nr);
1061
}
1062
EXPORT_SYMBOL_FOR_KVM(get_xsave_addr);
1063
1064
/*
1065
* Given an xstate feature nr, calculate where in the xsave buffer the state is.
1066
* The xsave buffer should be in standard format, not compacted (e.g. user mode
1067
* signal frames).
1068
*/
1069
void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr)
1070
{
1071
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
1072
return NULL;
1073
1074
return (void __user *)xsave + xstate_offsets[xfeature_nr];
1075
}
1076
1077
#ifdef CONFIG_ARCH_HAS_PKEYS
1078
1079
/*
1080
* This will go out and modify PKRU register to set the access
1081
* rights for @pkey to @init_val.
1082
*/
1083
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
1084
unsigned long init_val)
1085
{
1086
u32 old_pkru, new_pkru_bits = 0;
1087
int pkey_shift;
1088
1089
/*
1090
* This check implies XSAVE support. OSPKE only gets
1091
* set if we enable XSAVE and we enable PKU in XCR0.
1092
*/
1093
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
1094
return -EINVAL;
1095
1096
/*
1097
* This code should only be called with valid 'pkey'
1098
* values originating from in-kernel users. Complain
1099
* if a bad value is observed.
1100
*/
1101
if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
1102
return -EINVAL;
1103
1104
/* Set the bits we need in PKRU: */
1105
if (init_val & PKEY_DISABLE_ACCESS)
1106
new_pkru_bits |= PKRU_AD_BIT;
1107
if (init_val & PKEY_DISABLE_WRITE)
1108
new_pkru_bits |= PKRU_WD_BIT;
1109
1110
/* Shift the bits in to the correct place in PKRU for pkey: */
1111
pkey_shift = pkey * PKRU_BITS_PER_PKEY;
1112
new_pkru_bits <<= pkey_shift;
1113
1114
/* Get old PKRU and mask off any old bits in place: */
1115
old_pkru = read_pkru();
1116
old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
1117
1118
/* Write old part along with new part: */
1119
write_pkru(old_pkru | new_pkru_bits);
1120
1121
return 0;
1122
}
1123
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
1124
1125
static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
1126
void *init_xstate, unsigned int size)
1127
{
1128
membuf_write(to, from_xstate ? xstate : init_xstate, size);
1129
}
1130
1131
/**
1132
* __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1133
* @to: membuf descriptor
1134
* @fpstate: The fpstate buffer from which to copy
1135
* @xfeatures: The mask of xfeatures to save (XSAVE mode only)
1136
* @pkru_val: The PKRU value to store in the PKRU component
1137
* @copy_mode: The requested copy mode
1138
*
1139
* Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1140
* format, i.e. from the kernel internal hardware dependent storage format
1141
* to the requested @mode. UABI XSTATE is always uncompacted!
1142
*
1143
* It supports partial copy but @to.pos always starts from zero.
1144
*/
1145
void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
1146
u64 xfeatures, u32 pkru_val,
1147
enum xstate_copy_mode copy_mode)
1148
{
1149
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1150
struct xregs_state *xinit = &init_fpstate.regs.xsave;
1151
struct xregs_state *xsave = &fpstate->regs.xsave;
1152
unsigned int zerofrom, i, xfeature;
1153
struct xstate_header header;
1154
u64 mask;
1155
1156
memset(&header, 0, sizeof(header));
1157
header.xfeatures = xsave->header.xfeatures;
1158
1159
/* Mask out the feature bits depending on copy mode */
1160
switch (copy_mode) {
1161
case XSTATE_COPY_FP:
1162
header.xfeatures &= XFEATURE_MASK_FP;
1163
break;
1164
1165
case XSTATE_COPY_FX:
1166
header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1167
break;
1168
1169
case XSTATE_COPY_XSAVE:
1170
header.xfeatures &= fpstate->user_xfeatures & xfeatures;
1171
break;
1172
}
1173
1174
/* Copy FP state up to MXCSR */
1175
copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
1176
&xinit->i387, off_mxcsr);
1177
1178
/* Copy MXCSR when SSE or YMM are set in the feature mask */
1179
copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
1180
&to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
1181
MXCSR_AND_FLAGS_SIZE);
1182
1183
/* Copy the remaining FP state */
1184
copy_feature(header.xfeatures & XFEATURE_MASK_FP,
1185
&to, &xsave->i387.st_space, &xinit->i387.st_space,
1186
sizeof(xsave->i387.st_space));
1187
1188
/* Copy the SSE state - shared with YMM, but independently managed */
1189
copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
1190
&to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
1191
sizeof(xsave->i387.xmm_space));
1192
1193
if (copy_mode != XSTATE_COPY_XSAVE)
1194
goto out;
1195
1196
/* Zero the padding area */
1197
membuf_zero(&to, sizeof(xsave->i387.padding));
1198
1199
/* Copy xsave->i387.sw_reserved */
1200
membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
1201
1202
/* Copy the user space relevant state of @xsave->header */
1203
membuf_write(&to, &header, sizeof(header));
1204
1205
zerofrom = offsetof(struct xregs_state, extended_state_area);
1206
1207
/*
1208
* This 'mask' indicates which states to copy from fpstate.
1209
* Those extended states that are not present in fpstate are
1210
* either disabled or initialized:
1211
*
1212
* In non-compacted format, disabled features still occupy
1213
* state space but there is no state to copy from in the
1214
* compacted init_fpstate. The gap tracking will zero these
1215
* states.
1216
*
1217
* The extended features have an all zeroes init state. Thus,
1218
* remove them from 'mask' to zero those features in the user
1219
* buffer instead of retrieving them from init_fpstate.
1220
*/
1221
mask = header.xfeatures;
1222
1223
for_each_extended_xfeature_in_order(i, mask) {
1224
xfeature = xfeature_uncompact_order[i];
1225
/*
1226
* If there was a feature or alignment gap, zero the space
1227
* in the destination buffer.
1228
*/
1229
if (zerofrom < xstate_offsets[xfeature])
1230
membuf_zero(&to, xstate_offsets[xfeature] - zerofrom);
1231
1232
if (xfeature == XFEATURE_PKRU) {
1233
struct pkru_state pkru = {0};
1234
/*
1235
* PKRU is not necessarily up to date in the
1236
* XSAVE buffer. Use the provided value.
1237
*/
1238
pkru.pkru = pkru_val;
1239
membuf_write(&to, &pkru, sizeof(pkru));
1240
} else {
1241
membuf_write(&to,
1242
__raw_xsave_addr(xsave, xfeature),
1243
xstate_sizes[xfeature]);
1244
}
1245
/*
1246
* Keep track of the last copied state in the non-compacted
1247
* target buffer for gap zeroing.
1248
*/
1249
zerofrom = xstate_offsets[xfeature] + xstate_sizes[xfeature];
1250
}
1251
1252
out:
1253
if (to.left)
1254
membuf_zero(&to, to.left);
1255
}
1256
1257
/**
1258
* copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1259
* @to: membuf descriptor
1260
* @tsk: The task from which to copy the saved xstate
1261
* @copy_mode: The requested copy mode
1262
*
1263
* Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1264
* format, i.e. from the kernel internal hardware dependent storage format
1265
* to the requested @mode. UABI XSTATE is always uncompacted!
1266
*
1267
* It supports partial copy but @to.pos always starts from zero.
1268
*/
1269
void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1270
enum xstate_copy_mode copy_mode)
1271
{
1272
__copy_xstate_to_uabi_buf(to, x86_task_fpu(tsk)->fpstate,
1273
x86_task_fpu(tsk)->fpstate->user_xfeatures,
1274
tsk->thread.pkru, copy_mode);
1275
}
1276
1277
static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1278
const void *kbuf, const void __user *ubuf)
1279
{
1280
if (kbuf) {
1281
memcpy(dst, kbuf + offset, size);
1282
} else {
1283
if (copy_from_user(dst, ubuf + offset, size))
1284
return -EFAULT;
1285
}
1286
return 0;
1287
}
1288
1289
1290
/**
1291
* copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
1292
* @fpstate: The fpstate buffer to copy to
1293
* @kbuf: The UABI format buffer, if it comes from the kernel
1294
* @ubuf: The UABI format buffer, if it comes from userspace
1295
* @pkru: The location to write the PKRU value to
1296
*
1297
* Converts from the UABI format into the kernel internal hardware
1298
* dependent format.
1299
*
1300
* This function ultimately has three different callers with distinct PKRU
1301
* behavior.
1302
* 1. When called from sigreturn the PKRU register will be restored from
1303
* @fpstate via an XRSTOR. Correctly copying the UABI format buffer to
1304
* @fpstate is sufficient to cover this case, but the caller will also
1305
* pass a pointer to the thread_struct's pkru field in @pkru and updating
1306
* it is harmless.
1307
* 2. When called from ptrace the PKRU register will be restored from the
1308
* thread_struct's pkru field. A pointer to that is passed in @pkru.
1309
* The kernel will restore it manually, so the XRSTOR behavior that resets
1310
* the PKRU register to the hardware init value (0) if the corresponding
1311
* xfeatures bit is not set is emulated here.
1312
* 3. When called from KVM the PKRU register will be restored from the vcpu's
1313
* pkru field. A pointer to that is passed in @pkru. KVM hasn't used
1314
* XRSTOR and hasn't had the PKRU resetting behavior described above. To
1315
* preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
1316
* bit is not set.
1317
*/
1318
static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
1319
const void __user *ubuf, u32 *pkru)
1320
{
1321
struct xregs_state *xsave = &fpstate->regs.xsave;
1322
unsigned int offset, size;
1323
struct xstate_header hdr;
1324
u64 mask;
1325
int i;
1326
1327
offset = offsetof(struct xregs_state, header);
1328
if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
1329
return -EFAULT;
1330
1331
if (validate_user_xstate_header(&hdr, fpstate))
1332
return -EINVAL;
1333
1334
/* Validate MXCSR when any of the related features is in use */
1335
mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1336
if (hdr.xfeatures & mask) {
1337
u32 mxcsr[2];
1338
1339
offset = offsetof(struct fxregs_state, mxcsr);
1340
if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1341
return -EFAULT;
1342
1343
/* Reserved bits in MXCSR must be zero. */
1344
if (mxcsr[0] & ~mxcsr_feature_mask)
1345
return -EINVAL;
1346
1347
/* SSE and YMM require MXCSR even when FP is not in use. */
1348
if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1349
xsave->i387.mxcsr = mxcsr[0];
1350
xsave->i387.mxcsr_mask = mxcsr[1];
1351
}
1352
}
1353
1354
for (i = 0; i < XFEATURE_MAX; i++) {
1355
mask = BIT_ULL(i);
1356
1357
if (hdr.xfeatures & mask) {
1358
void *dst = __raw_xsave_addr(xsave, i);
1359
1360
offset = xstate_offsets[i];
1361
size = xstate_sizes[i];
1362
1363
if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1364
return -EFAULT;
1365
}
1366
}
1367
1368
if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
1369
struct pkru_state *xpkru;
1370
1371
xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
1372
*pkru = xpkru->pkru;
1373
} else {
1374
/*
1375
* KVM may pass NULL here to indicate that it does not need
1376
* PKRU updated.
1377
*/
1378
if (pkru)
1379
*pkru = 0;
1380
}
1381
1382
/*
1383
* The state that came in from userspace was user-state only.
1384
* Mask all the user states out of 'xfeatures':
1385
*/
1386
xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
1387
1388
/*
1389
* Add back in the features that came in from userspace:
1390
*/
1391
xsave->header.xfeatures |= hdr.xfeatures;
1392
1393
return 0;
1394
}
1395
1396
/*
1397
* Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1398
* format and copy to the target thread. Used by ptrace and KVM.
1399
*/
1400
int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
1401
{
1402
return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
1403
}
1404
1405
/*
1406
* Convert from a sigreturn standard-format user-space buffer to kernel
1407
* XSAVE[S] format and copy to the target thread. This is called from the
1408
* sigreturn() and rt_sigreturn() system calls.
1409
*/
1410
int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
1411
const void __user *ubuf)
1412
{
1413
return copy_uabi_to_xstate(x86_task_fpu(tsk)->fpstate, NULL, ubuf, &tsk->thread.pkru);
1414
}
1415
1416
static bool validate_independent_components(u64 mask)
1417
{
1418
u64 xchk;
1419
1420
if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1421
return false;
1422
1423
xchk = ~xfeatures_mask_independent();
1424
1425
if (WARN_ON_ONCE(!mask || mask & xchk))
1426
return false;
1427
1428
return true;
1429
}
1430
1431
/**
1432
* xsaves - Save selected components to a kernel xstate buffer
1433
* @xstate: Pointer to the buffer
1434
* @mask: Feature mask to select the components to save
1435
*
1436
* The @xstate buffer must be 64 byte aligned and correctly initialized as
1437
* XSAVES does not write the full xstate header. Before first use the
1438
* buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1439
* can #GP.
1440
*
1441
* The feature mask must be a subset of the independent features.
1442
*/
1443
void xsaves(struct xregs_state *xstate, u64 mask)
1444
{
1445
int err;
1446
1447
if (!validate_independent_components(mask))
1448
return;
1449
1450
XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1451
WARN_ON_ONCE(err);
1452
}
1453
1454
/**
1455
* xrstors - Restore selected components from a kernel xstate buffer
1456
* @xstate: Pointer to the buffer
1457
* @mask: Feature mask to select the components to restore
1458
*
1459
* The @xstate buffer must be 64 byte aligned and correctly initialized
1460
* otherwise XRSTORS from that buffer can #GP.
1461
*
1462
* Proper usage is to restore the state which was saved with
1463
* xsaves() into @xstate.
1464
*
1465
* The feature mask must be a subset of the independent features.
1466
*/
1467
void xrstors(struct xregs_state *xstate, u64 mask)
1468
{
1469
int err;
1470
1471
if (!validate_independent_components(mask))
1472
return;
1473
1474
XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1475
WARN_ON_ONCE(err);
1476
}
1477
1478
#if IS_ENABLED(CONFIG_KVM)
1479
void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature)
1480
{
1481
void *addr = get_xsave_addr(&fpstate->regs.xsave, xfeature);
1482
1483
if (addr)
1484
memset(addr, 0, xstate_sizes[xfeature]);
1485
}
1486
EXPORT_SYMBOL_FOR_KVM(fpstate_clear_xstate_component);
1487
#endif
1488
1489
#ifdef CONFIG_X86_64
1490
1491
#ifdef CONFIG_X86_DEBUG_FPU
1492
/*
1493
* Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
1494
* can safely operate on the @fpstate buffer.
1495
*/
1496
static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
1497
{
1498
u64 xfd = __this_cpu_read(xfd_state);
1499
1500
if (fpstate->xfd == xfd)
1501
return true;
1502
1503
/*
1504
* The XFD MSR does not match fpstate->xfd. That's invalid when
1505
* the passed in fpstate is current's fpstate.
1506
*/
1507
if (fpstate->xfd == x86_task_fpu(current)->fpstate->xfd)
1508
return false;
1509
1510
/*
1511
* XRSTOR(S) from init_fpstate are always correct as it will just
1512
* bring all components into init state and not read from the
1513
* buffer. XSAVE(S) raises #PF after init.
1514
*/
1515
if (fpstate == &init_fpstate)
1516
return rstor;
1517
1518
/*
1519
* XSAVE(S): clone(), fpu_swap_kvm_fpstate()
1520
* XRSTORS(S): fpu_swap_kvm_fpstate()
1521
*/
1522
1523
/*
1524
* No XSAVE/XRSTOR instructions (except XSAVE itself) touch
1525
* the buffer area for XFD-disabled state components.
1526
*/
1527
mask &= ~xfd;
1528
1529
/*
1530
* Remove features which are valid in fpstate. They
1531
* have space allocated in fpstate.
1532
*/
1533
mask &= ~fpstate->xfeatures;
1534
1535
/*
1536
* Any remaining state components in 'mask' might be written
1537
* by XSAVE/XRSTOR. Fail validation it found.
1538
*/
1539
return !mask;
1540
}
1541
1542
void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
1543
{
1544
WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
1545
}
1546
#endif /* CONFIG_X86_DEBUG_FPU */
1547
1548
static int __init xfd_update_static_branch(void)
1549
{
1550
/*
1551
* If init_fpstate.xfd has bits set then dynamic features are
1552
* available and the dynamic sizing must be enabled.
1553
*/
1554
if (init_fpstate.xfd)
1555
static_branch_enable(&__fpu_state_size_dynamic);
1556
return 0;
1557
}
1558
arch_initcall(xfd_update_static_branch)
1559
1560
void fpstate_free(struct fpu *fpu)
1561
{
1562
if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1563
vfree(fpu->fpstate);
1564
}
1565
1566
/**
1567
* fpstate_realloc - Reallocate struct fpstate for the requested new features
1568
*
1569
* @xfeatures: A bitmap of xstate features which extend the enabled features
1570
* of that task
1571
* @ksize: The required size for the kernel buffer
1572
* @usize: The required size for user space buffers
1573
* @guest_fpu: Pointer to a guest FPU container. NULL for host allocations
1574
*
1575
* Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1576
* terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1577
* with large states are likely to live longer.
1578
*
1579
* Returns: 0 on success, -ENOMEM on allocation error.
1580
*/
1581
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1582
unsigned int usize, struct fpu_guest *guest_fpu)
1583
{
1584
struct fpu *fpu = x86_task_fpu(current);
1585
struct fpstate *curfps, *newfps = NULL;
1586
unsigned int fpsize;
1587
bool in_use;
1588
1589
fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1590
1591
newfps = vzalloc(fpsize);
1592
if (!newfps)
1593
return -ENOMEM;
1594
newfps->size = ksize;
1595
newfps->user_size = usize;
1596
newfps->is_valloc = true;
1597
1598
/*
1599
* When a guest FPU is supplied, use @guest_fpu->fpstate
1600
* as reference independent whether it is in use or not.
1601
*/
1602
curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1603
1604
/* Determine whether @curfps is the active fpstate */
1605
in_use = fpu->fpstate == curfps;
1606
1607
if (guest_fpu) {
1608
newfps->is_guest = true;
1609
newfps->is_confidential = curfps->is_confidential;
1610
newfps->in_use = curfps->in_use;
1611
guest_fpu->xfeatures |= xfeatures;
1612
guest_fpu->uabi_size = usize;
1613
}
1614
1615
fpregs_lock();
1616
/*
1617
* If @curfps is in use, ensure that the current state is in the
1618
* registers before swapping fpstate as that might invalidate it
1619
* due to layout changes.
1620
*/
1621
if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1622
fpregs_restore_userregs();
1623
1624
newfps->xfeatures = curfps->xfeatures | xfeatures;
1625
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1626
newfps->xfd = curfps->xfd & ~xfeatures;
1627
1628
/* Do the final updates within the locked region */
1629
xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1630
1631
if (guest_fpu) {
1632
guest_fpu->fpstate = newfps;
1633
/* If curfps is active, update the FPU fpstate pointer */
1634
if (in_use)
1635
fpu->fpstate = newfps;
1636
} else {
1637
fpu->fpstate = newfps;
1638
}
1639
1640
if (in_use)
1641
xfd_update_state(fpu->fpstate);
1642
fpregs_unlock();
1643
1644
/* Only free valloc'ed state */
1645
if (curfps && curfps->is_valloc)
1646
vfree(curfps);
1647
1648
return 0;
1649
}
1650
1651
static int validate_sigaltstack(unsigned int usize)
1652
{
1653
struct task_struct *thread, *leader = current->group_leader;
1654
unsigned long framesize = get_sigframe_size();
1655
1656
lockdep_assert_held(&current->sighand->siglock);
1657
1658
/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1659
framesize -= fpu_user_cfg.max_size;
1660
framesize += usize;
1661
for_each_thread(leader, thread) {
1662
if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1663
return -ENOSPC;
1664
}
1665
return 0;
1666
}
1667
1668
static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1669
{
1670
/*
1671
* This deliberately does not exclude !XSAVES as we still might
1672
* decide to optionally context switch XCR0 or talk the silicon
1673
* vendors into extending XFD for the pre AMX states, especially
1674
* AVX512.
1675
*/
1676
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
1677
struct fpu *fpu = x86_task_fpu(current->group_leader);
1678
struct fpu_state_perm *perm;
1679
unsigned int ksize, usize;
1680
u64 mask;
1681
int ret = 0;
1682
1683
/* Check whether fully enabled */
1684
if ((permitted & requested) == requested)
1685
return 0;
1686
1687
/*
1688
* Calculate the resulting kernel state size. Note, @permitted also
1689
* contains supervisor xfeatures even though supervisor are always
1690
* permitted for kernel and guest FPUs, and never permitted for user
1691
* FPUs.
1692
*/
1693
mask = permitted | requested;
1694
ksize = xstate_calculate_size(mask, compacted);
1695
1696
/*
1697
* Calculate the resulting user state size. Take care not to clobber
1698
* the supervisor xfeatures in the new mask!
1699
*/
1700
usize = xstate_calculate_size(mask & XFEATURE_MASK_USER_SUPPORTED, false);
1701
1702
if (!guest) {
1703
ret = validate_sigaltstack(usize);
1704
if (ret)
1705
return ret;
1706
}
1707
1708
perm = guest ? &fpu->guest_perm : &fpu->perm;
1709
/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1710
WRITE_ONCE(perm->__state_perm, mask);
1711
/* Protected by sighand lock */
1712
perm->__state_size = ksize;
1713
perm->__user_state_size = usize;
1714
return ret;
1715
}
1716
1717
/*
1718
* Permissions array to map facilities with more than one component
1719
*/
1720
static const u64 xstate_prctl_req[XFEATURE_MAX] = {
1721
[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1722
};
1723
1724
static int xstate_request_perm(unsigned long idx, bool guest)
1725
{
1726
u64 permitted, requested;
1727
int ret;
1728
1729
if (idx >= XFEATURE_MAX)
1730
return -EINVAL;
1731
1732
/*
1733
* Look up the facility mask which can require more than
1734
* one xstate component.
1735
*/
1736
idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1737
requested = xstate_prctl_req[idx];
1738
if (!requested)
1739
return -EOPNOTSUPP;
1740
1741
if ((fpu_user_cfg.max_features & requested) != requested)
1742
return -EOPNOTSUPP;
1743
1744
/* Lockless quick check */
1745
permitted = xstate_get_group_perm(guest);
1746
if ((permitted & requested) == requested)
1747
return 0;
1748
1749
/* Protect against concurrent modifications */
1750
spin_lock_irq(&current->sighand->siglock);
1751
permitted = xstate_get_group_perm(guest);
1752
1753
/* First vCPU allocation locks the permissions. */
1754
if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1755
ret = -EBUSY;
1756
else
1757
ret = __xstate_request_perm(permitted, requested, guest);
1758
spin_unlock_irq(&current->sighand->siglock);
1759
return ret;
1760
}
1761
1762
int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1763
{
1764
u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1765
struct fpu_state_perm *perm;
1766
unsigned int ksize, usize;
1767
struct fpu *fpu;
1768
1769
if (!xfd_event) {
1770
if (!guest_fpu)
1771
pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1772
return 0;
1773
}
1774
1775
/* Protect against concurrent modifications */
1776
spin_lock_irq(&current->sighand->siglock);
1777
1778
/* If not permitted let it die */
1779
if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1780
spin_unlock_irq(&current->sighand->siglock);
1781
return -EPERM;
1782
}
1783
1784
fpu = x86_task_fpu(current->group_leader);
1785
perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1786
ksize = perm->__state_size;
1787
usize = perm->__user_state_size;
1788
1789
/*
1790
* The feature is permitted. State size is sufficient. Dropping
1791
* the lock is safe here even if more features are added from
1792
* another task, the retrieved buffer sizes are valid for the
1793
* currently requested feature(s).
1794
*/
1795
spin_unlock_irq(&current->sighand->siglock);
1796
1797
/*
1798
* Try to allocate a new fpstate. If that fails there is no way
1799
* out.
1800
*/
1801
if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1802
return -EFAULT;
1803
return 0;
1804
}
1805
1806
int xfd_enable_feature(u64 xfd_err)
1807
{
1808
return __xfd_enable_feature(xfd_err, NULL);
1809
}
1810
1811
#else /* CONFIG_X86_64 */
1812
static inline int xstate_request_perm(unsigned long idx, bool guest)
1813
{
1814
return -EPERM;
1815
}
1816
#endif /* !CONFIG_X86_64 */
1817
1818
u64 xstate_get_guest_group_perm(void)
1819
{
1820
return xstate_get_group_perm(true);
1821
}
1822
EXPORT_SYMBOL_FOR_KVM(xstate_get_guest_group_perm);
1823
1824
/**
1825
* fpu_xstate_prctl - xstate permission operations
1826
* @option: A subfunction of arch_prctl()
1827
* @arg2: option argument
1828
* Return: 0 if successful; otherwise, an error code
1829
*
1830
* Option arguments:
1831
*
1832
* ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1833
* ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1834
* ARCH_REQ_XCOMP_PERM: Facility number requested
1835
*
1836
* For facilities which require more than one XSTATE component, the request
1837
* must be the highest state component number related to that facility,
1838
* e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1839
* XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1840
*/
1841
long fpu_xstate_prctl(int option, unsigned long arg2)
1842
{
1843
u64 __user *uptr = (u64 __user *)arg2;
1844
u64 permitted, supported;
1845
unsigned long idx = arg2;
1846
bool guest = false;
1847
1848
switch (option) {
1849
case ARCH_GET_XCOMP_SUPP:
1850
supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
1851
return put_user(supported, uptr);
1852
1853
case ARCH_GET_XCOMP_PERM:
1854
/*
1855
* Lockless snapshot as it can also change right after the
1856
* dropping the lock.
1857
*/
1858
permitted = xstate_get_host_group_perm();
1859
permitted &= XFEATURE_MASK_USER_SUPPORTED;
1860
return put_user(permitted, uptr);
1861
1862
case ARCH_GET_XCOMP_GUEST_PERM:
1863
permitted = xstate_get_guest_group_perm();
1864
permitted &= XFEATURE_MASK_USER_SUPPORTED;
1865
return put_user(permitted, uptr);
1866
1867
case ARCH_REQ_XCOMP_GUEST_PERM:
1868
guest = true;
1869
fallthrough;
1870
1871
case ARCH_REQ_XCOMP_PERM:
1872
if (!IS_ENABLED(CONFIG_X86_64))
1873
return -EOPNOTSUPP;
1874
1875
return xstate_request_perm(idx, guest);
1876
1877
default:
1878
return -EINVAL;
1879
}
1880
}
1881
1882
#ifdef CONFIG_PROC_PID_ARCH_STATUS
1883
/*
1884
* Report the amount of time elapsed in millisecond since last AVX512
1885
* use in the task. Report -1 if no AVX-512 usage.
1886
*/
1887
static void avx512_status(struct seq_file *m, struct task_struct *task)
1888
{
1889
unsigned long timestamp;
1890
long delta = -1;
1891
1892
/* AVX-512 usage is not tracked for kernel threads. Don't report anything. */
1893
if (task->flags & (PF_KTHREAD | PF_USER_WORKER))
1894
return;
1895
1896
timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp);
1897
1898
if (timestamp) {
1899
delta = (long)(jiffies - timestamp);
1900
/*
1901
* Cap to LONG_MAX if time difference > LONG_MAX
1902
*/
1903
if (delta < 0)
1904
delta = LONG_MAX;
1905
delta = jiffies_to_msecs(delta);
1906
}
1907
1908
seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
1909
seq_putc(m, '\n');
1910
}
1911
1912
/*
1913
* Report architecture specific information
1914
*/
1915
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
1916
struct pid *pid, struct task_struct *task)
1917
{
1918
/*
1919
* Report AVX512 state if the processor and build option supported.
1920
*/
1921
if (cpu_feature_enabled(X86_FEATURE_AVX512F))
1922
avx512_status(m, task);
1923
1924
return 0;
1925
}
1926
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
1927
1928
#ifdef CONFIG_COREDUMP
1929
static const char owner_name[] = "LINUX";
1930
1931
/*
1932
* Dump type, size, offset and flag values for every xfeature that is present.
1933
*/
1934
static int dump_xsave_layout_desc(struct coredump_params *cprm)
1935
{
1936
int num_records = 0;
1937
int i;
1938
1939
for_each_extended_xfeature(i, fpu_user_cfg.max_features) {
1940
struct x86_xfeat_component xc = {
1941
.type = i,
1942
.size = xstate_sizes[i],
1943
.offset = xstate_offsets[i],
1944
/* reserved for future use */
1945
.flags = 0,
1946
};
1947
1948
if (!dump_emit(cprm, &xc, sizeof(xc)))
1949
return -1;
1950
1951
num_records++;
1952
}
1953
return num_records;
1954
}
1955
1956
static u32 get_xsave_desc_size(void)
1957
{
1958
u32 cnt = 0;
1959
u32 i;
1960
1961
for_each_extended_xfeature(i, fpu_user_cfg.max_features)
1962
cnt++;
1963
1964
return cnt * (sizeof(struct x86_xfeat_component));
1965
}
1966
1967
int elf_coredump_extra_notes_write(struct coredump_params *cprm)
1968
{
1969
int num_records = 0;
1970
struct elf_note en;
1971
1972
if (!fpu_user_cfg.max_features)
1973
return 0;
1974
1975
en.n_namesz = sizeof(owner_name);
1976
en.n_descsz = get_xsave_desc_size();
1977
en.n_type = NT_X86_XSAVE_LAYOUT;
1978
1979
if (!dump_emit(cprm, &en, sizeof(en)))
1980
return 1;
1981
if (!dump_emit(cprm, owner_name, en.n_namesz))
1982
return 1;
1983
if (!dump_align(cprm, 4))
1984
return 1;
1985
1986
num_records = dump_xsave_layout_desc(cprm);
1987
if (num_records < 0)
1988
return 1;
1989
1990
/* Total size should be equal to the number of records */
1991
if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz)
1992
return 1;
1993
1994
return 0;
1995
}
1996
1997
int elf_coredump_extra_notes_size(void)
1998
{
1999
int size;
2000
2001
if (!fpu_user_cfg.max_features)
2002
return 0;
2003
2004
/* .note header */
2005
size = sizeof(struct elf_note);
2006
/* Name plus alignment to 4 bytes */
2007
size += roundup(sizeof(owner_name), 4);
2008
size += get_xsave_desc_size();
2009
2010
return size;
2011
}
2012
#endif /* CONFIG_COREDUMP */
2013
2014