// SPDX-License-Identifier: GPL-2.0-only1/*2* Copyright (C) 1994 Linus Torvalds3*4* Pentium III FXSR, SSE support5* General FPU state handling cleanups6* Gareth Hughes <[email protected]>, May 20007*/8#include <asm/fpu/api.h>9#include <asm/fpu/regset.h>10#include <asm/fpu/sched.h>11#include <asm/fpu/signal.h>12#include <asm/fpu/types.h>13#include <asm/msr.h>14#include <asm/traps.h>15#include <asm/irq_regs.h>1617#include <uapi/asm/kvm.h>1819#include <linux/hardirq.h>20#include <linux/pkeys.h>21#include <linux/vmalloc.h>2223#include "context.h"24#include "internal.h"25#include "legacy.h"26#include "xstate.h"2728#define CREATE_TRACE_POINTS29#include <asm/trace/fpu.h>3031#ifdef CONFIG_X86_6432DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);33DEFINE_PER_CPU(u64, xfd_state);34#endif3536/* The FPU state configuration data for kernel and user space */37struct fpu_state_config fpu_kernel_cfg __ro_after_init;38struct fpu_state_config fpu_user_cfg __ro_after_init;39struct vcpu_fpu_config guest_default_cfg __ro_after_init;4041/*42* Represents the initial FPU state. It's mostly (but not completely) zeroes,43* depending on the FPU hardware format:44*/45struct fpstate init_fpstate __ro_after_init;4647/*48* Track FPU initialization and kernel-mode usage. 'true' means the FPU is49* initialized and is not currently being used by the kernel:50*/51DEFINE_PER_CPU(bool, kernel_fpu_allowed);5253/*54* Track which context is using the FPU on the CPU:55*/56DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);5758#ifdef CONFIG_X86_DEBUG_FPU59struct fpu *x86_task_fpu(struct task_struct *task)60{61if (WARN_ON_ONCE(task->flags & PF_KTHREAD))62return NULL;6364return (void *)task + sizeof(*task);65}66#endif6768/*69* Can we use the FPU in kernel mode with the70* whole "kernel_fpu_begin/end()" sequence?71*/72bool irq_fpu_usable(void)73{74if (WARN_ON_ONCE(in_nmi()))75return false;7677/*78* Return false in the following cases:79*80* - FPU is not yet initialized. This can happen only when the call is81* coming from CPU onlining, for example for microcode checksumming.82* - The kernel is already using the FPU, either because of explicit83* nesting (which should never be done), or because of implicit84* nesting when a hardirq interrupted a kernel-mode FPU section.85*86* The single boolean check below handles both cases:87*/88if (!this_cpu_read(kernel_fpu_allowed))89return false;9091/*92* When not in NMI or hard interrupt context, FPU can be used in:93*94* - Task context except from within fpregs_lock()'ed critical95* regions.96*97* - Soft interrupt processing context which cannot happen98* while in a fpregs_lock()'ed critical region.99*/100if (!in_hardirq())101return true;102103/*104* In hard interrupt context it's safe when soft interrupts105* are enabled, which means the interrupt did not hit in106* a fpregs_lock()'ed critical region.107*/108return !softirq_count();109}110EXPORT_SYMBOL(irq_fpu_usable);111112/*113* Track AVX512 state use because it is known to slow the max clock114* speed of the core.115*/116static void update_avx_timestamp(struct fpu *fpu)117{118119#define AVX512_TRACKING_MASK (XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM)120121if (fpu->fpstate->regs.xsave.header.xfeatures & AVX512_TRACKING_MASK)122fpu->avx512_timestamp = jiffies;123}124125/*126* Save the FPU register state in fpu->fpstate->regs. The register state is127* preserved.128*129* Must be called with fpregs_lock() held.130*131* The legacy FNSAVE instruction clears all FPU state unconditionally, so132* register state has to be reloaded. That might be a pointless exercise133* when the FPU is going to be used by another task right after that. But134* this only affects 20+ years old 32bit systems and avoids conditionals all135* over the place.136*137* FXSAVE and all XSAVE variants preserve the FPU register state.138*/139void save_fpregs_to_fpstate(struct fpu *fpu)140{141if (likely(use_xsave())) {142os_xsave(fpu->fpstate);143update_avx_timestamp(fpu);144return;145}146147if (likely(use_fxsr())) {148fxsave(&fpu->fpstate->regs.fxsave);149return;150}151152/*153* Legacy FPU register saving, FNSAVE always clears FPU registers,154* so we have to reload them from the memory state.155*/156asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave));157frstor(&fpu->fpstate->regs.fsave);158}159160void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask)161{162/*163* AMD K7/K8 and later CPUs up to Zen don't save/restore164* FDP/FIP/FOP unless an exception is pending. Clear the x87 state165* here by setting it to fixed values. "m" is a random variable166* that should be in L1.167*/168if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {169asm volatile(170"fnclex\n\t"171"emms\n\t"172"fildl %[addr]" /* set F?P to defined value */173: : [addr] "m" (*fpstate));174}175176if (use_xsave()) {177/*178* Dynamically enabled features are enabled in XCR0, but179* usage requires also that the corresponding bits in XFD180* are cleared. If the bits are set then using a related181* instruction will raise #NM. This allows to do the182* allocation of the larger FPU buffer lazy from #NM or if183* the task has no permission to kill it which would happen184* via #UD if the feature is disabled in XCR0.185*186* XFD state is following the same life time rules as187* XSTATE and to restore state correctly XFD has to be188* updated before XRSTORS otherwise the component would189* stay in or go into init state even if the bits are set190* in fpstate::regs::xsave::xfeatures.191*/192xfd_update_state(fpstate);193194/*195* Restoring state always needs to modify all features196* which are in @mask even if the current task cannot use197* extended features.198*199* So fpstate->xfeatures cannot be used here, because then200* a feature for which the task has no permission but was201* used by the previous task would not go into init state.202*/203mask = fpu_kernel_cfg.max_features & mask;204205os_xrstor(fpstate, mask);206} else {207if (use_fxsr())208fxrstor(&fpstate->regs.fxsave);209else210frstor(&fpstate->regs.fsave);211}212}213214void fpu_reset_from_exception_fixup(void)215{216restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE);217}218219#if IS_ENABLED(CONFIG_KVM)220static void __fpstate_reset(struct fpstate *fpstate);221222static void fpu_lock_guest_permissions(void)223{224struct fpu_state_perm *fpuperm;225u64 perm;226227if (!IS_ENABLED(CONFIG_X86_64))228return;229230spin_lock_irq(¤t->sighand->siglock);231fpuperm = &x86_task_fpu(current->group_leader)->guest_perm;232perm = fpuperm->__state_perm;233234/* First fpstate allocation locks down permissions. */235WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);236237spin_unlock_irq(¤t->sighand->siglock);238}239240bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)241{242struct fpstate *fpstate;243unsigned int size;244245size = guest_default_cfg.size + ALIGN(offsetof(struct fpstate, regs), 64);246247fpstate = vzalloc(size);248if (!fpstate)249return false;250251/* Initialize indicators to reflect properties of the fpstate */252fpstate->is_valloc = true;253fpstate->is_guest = true;254255__fpstate_reset(fpstate);256fpstate_init_user(fpstate);257258gfpu->fpstate = fpstate;259gfpu->xfeatures = guest_default_cfg.features;260261/*262* KVM sets the FP+SSE bits in the XSAVE header when copying FPU state263* to userspace, even when XSAVE is unsupported, so that restoring FPU264* state on a different CPU that does support XSAVE can cleanly load265* the incoming state using its natural XSAVE. In other words, KVM's266* uABI size may be larger than this host's default size. Conversely,267* the default size should never be larger than KVM's base uABI size;268* all features that can expand the uABI size must be opt-in.269*/270gfpu->uabi_size = sizeof(struct kvm_xsave);271if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size))272gfpu->uabi_size = fpu_user_cfg.default_size;273274fpu_lock_guest_permissions();275276return true;277}278EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);279280void fpu_free_guest_fpstate(struct fpu_guest *gfpu)281{282struct fpstate *fpstate = gfpu->fpstate;283284if (!fpstate)285return;286287if (WARN_ON_ONCE(!fpstate->is_valloc || !fpstate->is_guest || fpstate->in_use))288return;289290gfpu->fpstate = NULL;291vfree(fpstate);292}293EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);294295/*296* fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable297* @guest_fpu: Pointer to the guest FPU container298* @xfeatures: Features requested by guest CPUID299*300* Enable all dynamic xfeatures according to guest perm and requested CPUID.301*302* Return: 0 on success, error code otherwise303*/304int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures)305{306lockdep_assert_preemption_enabled();307308/* Nothing to do if all requested features are already enabled. */309xfeatures &= ~guest_fpu->xfeatures;310if (!xfeatures)311return 0;312313return __xfd_enable_feature(xfeatures, guest_fpu);314}315EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features);316317#ifdef CONFIG_X86_64318void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)319{320fpregs_lock();321guest_fpu->fpstate->xfd = xfd;322if (guest_fpu->fpstate->in_use)323xfd_update_state(guest_fpu->fpstate);324fpregs_unlock();325}326EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);327328/**329* fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state330*331* Must be invoked from KVM after a VMEXIT before enabling interrupts when332* XFD write emulation is disabled. This is required because the guest can333* freely modify XFD and the state at VMEXIT is not guaranteed to be the334* same as the state on VMENTER. So software state has to be updated before335* any operation which depends on it can take place.336*337* Note: It can be invoked unconditionally even when write emulation is338* enabled for the price of a then pointless MSR read.339*/340void fpu_sync_guest_vmexit_xfd_state(void)341{342struct fpstate *fpstate = x86_task_fpu(current)->fpstate;343344lockdep_assert_irqs_disabled();345if (fpu_state_size_dynamic()) {346rdmsrq(MSR_IA32_XFD, fpstate->xfd);347__this_cpu_write(xfd_state, fpstate->xfd);348}349}350EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);351#endif /* CONFIG_X86_64 */352353int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)354{355struct fpstate *guest_fps = guest_fpu->fpstate;356struct fpu *fpu = x86_task_fpu(current);357struct fpstate *cur_fps = fpu->fpstate;358359fpregs_lock();360if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD))361save_fpregs_to_fpstate(fpu);362363/* Swap fpstate */364if (enter_guest) {365fpu->__task_fpstate = cur_fps;366fpu->fpstate = guest_fps;367guest_fps->in_use = true;368} else {369guest_fps->in_use = false;370fpu->fpstate = fpu->__task_fpstate;371fpu->__task_fpstate = NULL;372}373374cur_fps = fpu->fpstate;375376if (!cur_fps->is_confidential) {377/* Includes XFD update */378restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE);379} else {380/*381* XSTATE is restored by firmware from encrypted382* memory. Make sure XFD state is correct while383* running with guest fpstate384*/385xfd_update_state(cur_fps);386}387388fpregs_mark_activate();389fpregs_unlock();390return 0;391}392EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);393394void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,395unsigned int size, u64 xfeatures, u32 pkru)396{397struct fpstate *kstate = gfpu->fpstate;398union fpregs_state *ustate = buf;399struct membuf mb = { .p = buf, .left = size };400401if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {402__copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,403XSTATE_COPY_XSAVE);404} else {405memcpy(&ustate->fxsave, &kstate->regs.fxsave,406sizeof(ustate->fxsave));407/* Make it restorable on a XSAVE enabled host */408ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE;409}410}411EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi);412413int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,414u64 xcr0, u32 *vpkru)415{416struct fpstate *kstate = gfpu->fpstate;417const union fpregs_state *ustate = buf;418419if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {420if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)421return -EINVAL;422if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask)423return -EINVAL;424memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave));425return 0;426}427428if (ustate->xsave.header.xfeatures & ~xcr0)429return -EINVAL;430431/*432* Nullify @vpkru to preserve its current value if PKRU's bit isn't set433* in the header. KVM's odd ABI is to leave PKRU untouched in this434* case (all other components are eventually re-initialized).435*/436if (!(ustate->xsave.header.xfeatures & XFEATURE_MASK_PKRU))437vpkru = NULL;438439return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru);440}441EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);442#endif /* CONFIG_KVM */443444void kernel_fpu_begin_mask(unsigned int kfpu_mask)445{446if (!irqs_disabled())447fpregs_lock();448449WARN_ON_FPU(!irq_fpu_usable());450451/* Toggle kernel_fpu_allowed to false: */452WARN_ON_FPU(!this_cpu_read(kernel_fpu_allowed));453this_cpu_write(kernel_fpu_allowed, false);454455if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) &&456!test_thread_flag(TIF_NEED_FPU_LOAD)) {457set_thread_flag(TIF_NEED_FPU_LOAD);458save_fpregs_to_fpstate(x86_task_fpu(current));459}460__cpu_invalidate_fpregs_state();461462/* Put sane initial values into the control registers. */463if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))464ldmxcsr(MXCSR_DEFAULT);465466if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))467asm volatile ("fninit");468}469EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);470471void kernel_fpu_end(void)472{473/* Toggle kernel_fpu_allowed back to true: */474WARN_ON_FPU(this_cpu_read(kernel_fpu_allowed));475this_cpu_write(kernel_fpu_allowed, true);476477if (!irqs_disabled())478fpregs_unlock();479}480EXPORT_SYMBOL_GPL(kernel_fpu_end);481482/*483* Sync the FPU register state to current's memory register state when the484* current task owns the FPU. The hardware register state is preserved.485*/486void fpu_sync_fpstate(struct fpu *fpu)487{488WARN_ON_FPU(fpu != x86_task_fpu(current));489490fpregs_lock();491trace_x86_fpu_before_save(fpu);492493if (!test_thread_flag(TIF_NEED_FPU_LOAD))494save_fpregs_to_fpstate(fpu);495496trace_x86_fpu_after_save(fpu);497fpregs_unlock();498}499500static inline unsigned int init_fpstate_copy_size(void)501{502if (!use_xsave())503return fpu_kernel_cfg.default_size;504505/* XSAVE(S) just needs the legacy and the xstate header part */506return sizeof(init_fpstate.regs.xsave);507}508509static inline void fpstate_init_fxstate(struct fpstate *fpstate)510{511fpstate->regs.fxsave.cwd = 0x37f;512fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT;513}514515/*516* Legacy x87 fpstate state init:517*/518static inline void fpstate_init_fstate(struct fpstate *fpstate)519{520fpstate->regs.fsave.cwd = 0xffff037fu;521fpstate->regs.fsave.swd = 0xffff0000u;522fpstate->regs.fsave.twd = 0xffffffffu;523fpstate->regs.fsave.fos = 0xffff0000u;524}525526/*527* Used in two places:528* 1) Early boot to setup init_fpstate for non XSAVE systems529* 2) fpu_alloc_guest_fpstate() which is invoked from KVM530*/531void fpstate_init_user(struct fpstate *fpstate)532{533if (!cpu_feature_enabled(X86_FEATURE_FPU)) {534fpstate_init_soft(&fpstate->regs.soft);535return;536}537538xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures);539540if (cpu_feature_enabled(X86_FEATURE_FXSR))541fpstate_init_fxstate(fpstate);542else543fpstate_init_fstate(fpstate);544}545546static void __fpstate_reset(struct fpstate *fpstate)547{548/*549* Supervisor features (and thus sizes) may diverge between guest550* FPUs and host FPUs, as some supervisor features are supported551* for guests despite not being utilized by the host. User552* features and sizes are always identical, which allows for553* common guest and userspace ABI.554*555* For the host, set XFD to the kernel's desired initialization556* value. For guests, set XFD to its architectural RESET value.557*/558if (fpstate->is_guest) {559fpstate->size = guest_default_cfg.size;560fpstate->xfeatures = guest_default_cfg.features;561fpstate->xfd = 0;562} else {563fpstate->size = fpu_kernel_cfg.default_size;564fpstate->xfeatures = fpu_kernel_cfg.default_features;565fpstate->xfd = init_fpstate.xfd;566}567568fpstate->user_size = fpu_user_cfg.default_size;569fpstate->user_xfeatures = fpu_user_cfg.default_features;570}571572void fpstate_reset(struct fpu *fpu)573{574/* Set the fpstate pointer to the default fpstate */575fpu->fpstate = &fpu->__fpstate;576__fpstate_reset(fpu->fpstate);577578/* Initialize the permission related info in fpu */579fpu->perm.__state_perm = fpu_kernel_cfg.default_features;580fpu->perm.__state_size = fpu_kernel_cfg.default_size;581fpu->perm.__user_state_size = fpu_user_cfg.default_size;582583fpu->guest_perm.__state_perm = guest_default_cfg.features;584fpu->guest_perm.__state_size = guest_default_cfg.size;585/*586* User features and sizes are always identical between host and587* guest FPUs, which allows for common guest and userspace ABI.588*/589fpu->guest_perm.__user_state_size = fpu_user_cfg.default_size;590}591592static inline void fpu_inherit_perms(struct fpu *dst_fpu)593{594if (fpu_state_size_dynamic()) {595struct fpu *src_fpu = x86_task_fpu(current->group_leader);596597spin_lock_irq(¤t->sighand->siglock);598/* Fork also inherits the permissions of the parent */599dst_fpu->perm = src_fpu->perm;600dst_fpu->guest_perm = src_fpu->guest_perm;601spin_unlock_irq(¤t->sighand->siglock);602}603}604605/* A passed ssp of zero will not cause any update */606static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)607{608#ifdef CONFIG_X86_USER_SHADOW_STACK609struct cet_user_state *xstate;610611/* If ssp update is not needed. */612if (!ssp)613return 0;614615xstate = get_xsave_addr(&x86_task_fpu(dst)->fpstate->regs.xsave,616XFEATURE_CET_USER);617618/*619* If there is a non-zero ssp, then 'dst' must be configured with a shadow620* stack and the fpu state should be up to date since it was just copied621* from the parent in fpu_clone(). So there must be a valid non-init CET622* state location in the buffer.623*/624if (WARN_ON_ONCE(!xstate))625return 1;626627xstate->user_ssp = (u64)ssp;628#endif629return 0;630}631632/* Clone current's FPU state on fork */633int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,634unsigned long ssp)635{636/*637* We allocate the new FPU structure right after the end of the task struct.638* task allocation size already took this into account.639*640* This is safe because task_struct size is a multiple of cacheline size,641* thus x86_task_fpu() will always be cacheline aligned as well.642*/643struct fpu *dst_fpu = (void *)dst + sizeof(*dst);644645BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);646647/* The new task's FPU state cannot be valid in the hardware. */648dst_fpu->last_cpu = -1;649650fpstate_reset(dst_fpu);651652if (!cpu_feature_enabled(X86_FEATURE_FPU))653return 0;654655/*656* Enforce reload for user space tasks and prevent kernel threads657* from trying to save the FPU registers on context switch.658*/659set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);660661/*662* No FPU state inheritance for kernel threads and IO663* worker threads.664*/665if (minimal) {666/* Clear out the minimal state */667memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,668init_fpstate_copy_size());669return 0;670}671672/*673* If a new feature is added, ensure all dynamic features are674* caller-saved from here!675*/676BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);677678/*679* Save the default portion of the current FPU state into the680* clone. Assume all dynamic features to be defined as caller-681* saved, which enables skipping both the expansion of fpstate682* and the copying of any dynamic state.683*684* Do not use memcpy() when TIF_NEED_FPU_LOAD is set because685* copying is not valid when current uses non-default states.686*/687fpregs_lock();688if (test_thread_flag(TIF_NEED_FPU_LOAD))689fpregs_restore_userregs();690save_fpregs_to_fpstate(dst_fpu);691fpregs_unlock();692if (!(clone_flags & CLONE_THREAD))693fpu_inherit_perms(dst_fpu);694695/*696* Children never inherit PASID state.697* Force it to have its init value:698*/699if (use_xsave())700dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;701702/*703* Update shadow stack pointer, in case it changed during clone.704*/705if (update_fpu_shstk(dst, ssp))706return 1;707708trace_x86_fpu_copy_dst(dst_fpu);709710return 0;711}712713/*714* While struct fpu is no longer part of struct thread_struct, it is still715* allocated after struct task_struct in the "task_struct" kmem cache. But716* since FPU is expected to be part of struct thread_struct, we have to717* adjust for it here.718*/719void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)720{721/* The allocation follows struct task_struct. */722*offset = sizeof(struct task_struct) - offsetof(struct task_struct, thread);723*offset += offsetof(struct fpu, __fpstate.regs);724*size = fpu_kernel_cfg.default_size;725}726727/*728* Drops current FPU state: deactivates the fpregs and729* the fpstate. NOTE: it still leaves previous contents730* in the fpregs in the eager-FPU case.731*732* This function can be used in cases where we know that733* a state-restore is coming: either an explicit one,734* or a reschedule.735*/736void fpu__drop(struct task_struct *tsk)737{738struct fpu *fpu;739740if (test_tsk_thread_flag(tsk, TIF_NEED_FPU_LOAD))741return;742743fpu = x86_task_fpu(tsk);744745preempt_disable();746747if (fpu == x86_task_fpu(current)) {748/* Ignore delayed exceptions from user space */749asm volatile("1: fwait\n"750"2:\n"751_ASM_EXTABLE(1b, 2b));752fpregs_deactivate(fpu);753}754755trace_x86_fpu_dropped(fpu);756757preempt_enable();758}759760/*761* Clear FPU registers by setting them up from the init fpstate.762* Caller must do fpregs_[un]lock() around it.763*/764static inline void restore_fpregs_from_init_fpstate(u64 features_mask)765{766if (use_xsave())767os_xrstor(&init_fpstate, features_mask);768else if (use_fxsr())769fxrstor(&init_fpstate.regs.fxsave);770else771frstor(&init_fpstate.regs.fsave);772773pkru_write_default();774}775776/*777* Reset current->fpu memory state to the init values.778*/779static void fpu_reset_fpstate_regs(void)780{781struct fpu *fpu = x86_task_fpu(current);782783fpregs_lock();784__fpu_invalidate_fpregs_state(fpu);785/*786* This does not change the actual hardware registers. It just787* resets the memory image and sets TIF_NEED_FPU_LOAD so a788* subsequent return to usermode will reload the registers from the789* task's memory image.790*791* Do not use fpstate_init() here. Just copy init_fpstate which has792* the correct content already except for PKRU.793*794* PKRU handling does not rely on the xstate when restoring for795* user space as PKRU is eagerly written in switch_to() and796* flush_thread().797*/798memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size());799set_thread_flag(TIF_NEED_FPU_LOAD);800fpregs_unlock();801}802803/*804* Reset current's user FPU states to the init states. current's805* supervisor states, if any, are not modified by this function. The806* caller guarantees that the XSTATE header in memory is intact.807*/808void fpu__clear_user_states(struct fpu *fpu)809{810WARN_ON_FPU(fpu != x86_task_fpu(current));811812fpregs_lock();813if (!cpu_feature_enabled(X86_FEATURE_FPU)) {814fpu_reset_fpstate_regs();815fpregs_unlock();816return;817}818819/*820* Ensure that current's supervisor states are loaded into their821* corresponding registers.822*/823if (xfeatures_mask_supervisor() &&824!fpregs_state_valid(fpu, smp_processor_id()))825os_xrstor_supervisor(fpu->fpstate);826827/* Reset user states in registers. */828restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE);829830/*831* Now all FPU registers have their desired values. Inform the FPU832* state machine that current's FPU registers are in the hardware833* registers. The memory image does not need to be updated because834* any operation relying on it has to save the registers first when835* current's FPU is marked active.836*/837fpregs_mark_activate();838fpregs_unlock();839}840841void fpu_flush_thread(void)842{843fpstate_reset(x86_task_fpu(current));844fpu_reset_fpstate_regs();845}846/*847* Load FPU context before returning to userspace.848*/849void switch_fpu_return(void)850{851if (!static_cpu_has(X86_FEATURE_FPU))852return;853854fpregs_restore_userregs();855}856EXPORT_SYMBOL_GPL(switch_fpu_return);857858void fpregs_lock_and_load(void)859{860/*861* fpregs_lock() only disables preemption (mostly). So modifying state862* in an interrupt could screw up some in progress fpregs operation.863* Warn about it.864*/865WARN_ON_ONCE(!irq_fpu_usable());866WARN_ON_ONCE(current->flags & PF_KTHREAD);867868fpregs_lock();869870fpregs_assert_state_consistent();871872if (test_thread_flag(TIF_NEED_FPU_LOAD))873fpregs_restore_userregs();874}875876#ifdef CONFIG_X86_DEBUG_FPU877/*878* If current FPU state according to its tracking (loaded FPU context on this879* CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is880* loaded on return to userland.881*/882void fpregs_assert_state_consistent(void)883{884struct fpu *fpu = x86_task_fpu(current);885886if (test_thread_flag(TIF_NEED_FPU_LOAD))887return;888889WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));890}891EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);892#endif893894void fpregs_mark_activate(void)895{896struct fpu *fpu = x86_task_fpu(current);897898fpregs_activate(fpu);899fpu->last_cpu = smp_processor_id();900clear_thread_flag(TIF_NEED_FPU_LOAD);901}902903/*904* x87 math exception handling:905*/906907int fpu__exception_code(struct fpu *fpu, int trap_nr)908{909int err;910911if (trap_nr == X86_TRAP_MF) {912unsigned short cwd, swd;913/*914* (~cwd & swd) will mask out exceptions that are not set to unmasked915* status. 0x3f is the exception bits in these regs, 0x200 is the916* C1 reg you need in case of a stack fault, 0x040 is the stack917* fault bit. We should only be taking one exception at a time,918* so if this combination doesn't produce any single exception,919* then we have a bad program that isn't synchronizing its FPU usage920* and it will suffer the consequences since we won't be able to921* fully reproduce the context of the exception.922*/923if (boot_cpu_has(X86_FEATURE_FXSR)) {924cwd = fpu->fpstate->regs.fxsave.cwd;925swd = fpu->fpstate->regs.fxsave.swd;926} else {927cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd;928swd = (unsigned short)fpu->fpstate->regs.fsave.swd;929}930931err = swd & ~cwd;932} else {933/*934* The SIMD FPU exceptions are handled a little differently, as there935* is only a single status/control register. Thus, to determine which936* unmasked exception was caught we must mask the exception mask bits937* at 0x1f80, and then use these to mask the exception bits at 0x3f.938*/939unsigned short mxcsr = MXCSR_DEFAULT;940941if (boot_cpu_has(X86_FEATURE_XMM))942mxcsr = fpu->fpstate->regs.fxsave.mxcsr;943944err = ~(mxcsr >> 7) & mxcsr;945}946947if (err & 0x001) { /* Invalid op */948/*949* swd & 0x240 == 0x040: Stack Underflow950* swd & 0x240 == 0x240: Stack Overflow951* User must clear the SF bit (0x40) if set952*/953return FPE_FLTINV;954} else if (err & 0x004) { /* Divide by Zero */955return FPE_FLTDIV;956} else if (err & 0x008) { /* Overflow */957return FPE_FLTOVF;958} else if (err & 0x012) { /* Denormal, Underflow */959return FPE_FLTUND;960} else if (err & 0x020) { /* Precision */961return FPE_FLTRES;962}963964/*965* If we're using IRQ 13, or supposedly even some trap966* X86_TRAP_MF implementations, it's possible967* we get a spurious trap, which is not an error.968*/969return 0;970}971972/*973* Initialize register state that may prevent from entering low-power idle.974* This function will be invoked from the cpuidle driver only when needed.975*/976noinstr void fpu_idle_fpregs(void)977{978/* Note: AMX_TILE being enabled implies XGETBV1 support */979if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) &&980(xfeatures_in_use() & XFEATURE_MASK_XTILE)) {981tile_release();982__this_cpu_write(fpu_fpregs_owner_ctx, NULL);983}984}985986987