// SPDX-License-Identifier: GPL-2.0-only1/*2* FP/SIMD context switching and fault handling3*4* Copyright (C) 2012 ARM Ltd.5* Author: Catalin Marinas <[email protected]>6*/78#include <linux/bitmap.h>9#include <linux/bitops.h>10#include <linux/bottom_half.h>11#include <linux/bug.h>12#include <linux/cache.h>13#include <linux/compat.h>14#include <linux/compiler.h>15#include <linux/cpu.h>16#include <linux/cpu_pm.h>17#include <linux/ctype.h>18#include <linux/kernel.h>19#include <linux/linkage.h>20#include <linux/irqflags.h>21#include <linux/init.h>22#include <linux/percpu.h>23#include <linux/prctl.h>24#include <linux/preempt.h>25#include <linux/ptrace.h>26#include <linux/sched/signal.h>27#include <linux/sched/task_stack.h>28#include <linux/signal.h>29#include <linux/slab.h>30#include <linux/stddef.h>31#include <linux/sysctl.h>32#include <linux/swab.h>3334#include <asm/esr.h>35#include <asm/exception.h>36#include <asm/fpsimd.h>37#include <asm/cpufeature.h>38#include <asm/cputype.h>39#include <asm/neon.h>40#include <asm/processor.h>41#include <asm/simd.h>42#include <asm/sigcontext.h>43#include <asm/sysreg.h>44#include <asm/traps.h>45#include <asm/virt.h>4647#define FPEXC_IOF (1 << 0)48#define FPEXC_DZF (1 << 1)49#define FPEXC_OFF (1 << 2)50#define FPEXC_UFF (1 << 3)51#define FPEXC_IXF (1 << 4)52#define FPEXC_IDF (1 << 7)5354/*55* (Note: in this discussion, statements about FPSIMD apply equally to SVE.)56*57* In order to reduce the number of times the FPSIMD state is needlessly saved58* and restored, we need to keep track of two things:59* (a) for each task, we need to remember which CPU was the last one to have60* the task's FPSIMD state loaded into its FPSIMD registers;61* (b) for each CPU, we need to remember which task's userland FPSIMD state has62* been loaded into its FPSIMD registers most recently, or whether it has63* been used to perform kernel mode NEON in the meantime.64*65* For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to66* the id of the current CPU every time the state is loaded onto a CPU. For (b),67* we add the per-cpu variable 'fpsimd_last_state' (below), which contains the68* address of the userland FPSIMD state of the task that was loaded onto the CPU69* the most recently, or NULL if kernel mode NEON has been performed after that.70*71* With this in place, we no longer have to restore the next FPSIMD state right72* when switching between tasks. Instead, we can defer this check to userland73* resume, at which time we verify whether the CPU's fpsimd_last_state and the74* task's fpsimd_cpu are still mutually in sync. If this is the case, we75* can omit the FPSIMD restore.76*77* As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to78* indicate whether or not the userland FPSIMD state of the current task is79* present in the registers. The flag is set unless the FPSIMD registers of this80* CPU currently contain the most recent userland FPSIMD state of the current81* task. If the task is behaving as a VMM, then this is will be managed by82* KVM which will clear it to indicate that the vcpu FPSIMD state is currently83* loaded on the CPU, allowing the state to be saved if a FPSIMD-aware84* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and85* flag the register state as invalid.86*87* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be88* called from softirq context, which will save the task's FPSIMD context back89* to task_struct. To prevent this from racing with the manipulation of the90* task's FPSIMD state from task context and thereby corrupting the state, it91* is necessary to protect any manipulation of a task's fpsimd_state or92* TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend93* softirq servicing entirely until put_cpu_fpsimd_context() is called.94*95* For a certain task, the sequence may look something like this:96* - the task gets scheduled in; if both the task's fpsimd_cpu field97* contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu98* variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is99* cleared, otherwise it is set;100*101* - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's102* userland FPSIMD state is copied from memory to the registers, the task's103* fpsimd_cpu field is set to the id of the current CPU, the current104* CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the105* TIF_FOREIGN_FPSTATE flag is cleared;106*107* - the task executes an ordinary syscall; upon return to userland, the108* TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is109* restored;110*111* - the task executes a syscall which executes some NEON instructions; this is112* preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD113* register contents to memory, clears the fpsimd_last_state per-cpu variable114* and sets the TIF_FOREIGN_FPSTATE flag;115*116* - the task gets preempted after kernel_neon_end() is called; as we have not117* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so118* whatever is in the FPSIMD registers is not saved to memory, but discarded.119*/120121DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);122123__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {124#ifdef CONFIG_ARM64_SVE125[ARM64_VEC_SVE] = {126.type = ARM64_VEC_SVE,127.name = "SVE",128.min_vl = SVE_VL_MIN,129.max_vl = SVE_VL_MIN,130.max_virtualisable_vl = SVE_VL_MIN,131},132#endif133#ifdef CONFIG_ARM64_SME134[ARM64_VEC_SME] = {135.type = ARM64_VEC_SME,136.name = "SME",137},138#endif139};140141static unsigned int vec_vl_inherit_flag(enum vec_type type)142{143switch (type) {144case ARM64_VEC_SVE:145return TIF_SVE_VL_INHERIT;146case ARM64_VEC_SME:147return TIF_SME_VL_INHERIT;148default:149WARN_ON_ONCE(1);150return 0;151}152}153154struct vl_config {155int __default_vl; /* Default VL for tasks */156};157158static struct vl_config vl_config[ARM64_VEC_MAX];159160static inline int get_default_vl(enum vec_type type)161{162return READ_ONCE(vl_config[type].__default_vl);163}164165#ifdef CONFIG_ARM64_SVE166167static inline int get_sve_default_vl(void)168{169return get_default_vl(ARM64_VEC_SVE);170}171172static inline void set_default_vl(enum vec_type type, int val)173{174WRITE_ONCE(vl_config[type].__default_vl, val);175}176177static inline void set_sve_default_vl(int val)178{179set_default_vl(ARM64_VEC_SVE, val);180}181182#endif /* ! CONFIG_ARM64_SVE */183184#ifdef CONFIG_ARM64_SME185186static int get_sme_default_vl(void)187{188return get_default_vl(ARM64_VEC_SME);189}190191static void set_sme_default_vl(int val)192{193set_default_vl(ARM64_VEC_SME, val);194}195196static void sme_free(struct task_struct *);197198#else199200static inline void sme_free(struct task_struct *t) { }201202#endif203204static void fpsimd_bind_task_to_cpu(void);205206/*207* Claim ownership of the CPU FPSIMD context for use by the calling context.208*209* The caller may freely manipulate the FPSIMD context metadata until210* put_cpu_fpsimd_context() is called.211*212* On RT kernels local_bh_disable() is not sufficient because it only213* serializes soft interrupt related sections via a local lock, but stays214* preemptible. Disabling preemption is the right choice here as bottom215* half processing is always in thread context on RT kernels so it216* implicitly prevents bottom half processing as well.217*/218static void get_cpu_fpsimd_context(void)219{220if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {221/*222* The softirq subsystem lacks a true unmask/mask API, and223* re-enabling softirq processing using local_bh_enable() will224* not only unmask softirqs, it will also result in immediate225* delivery of any pending softirqs.226* This is undesirable when running with IRQs disabled, but in227* that case, there is no need to mask softirqs in the first228* place, so only bother doing so when IRQs are enabled.229*/230if (!irqs_disabled())231local_bh_disable();232} else {233preempt_disable();234}235}236237/*238* Release the CPU FPSIMD context.239*240* Must be called from a context in which get_cpu_fpsimd_context() was241* previously called, with no call to put_cpu_fpsimd_context() in the242* meantime.243*/244static void put_cpu_fpsimd_context(void)245{246if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {247if (!irqs_disabled())248local_bh_enable();249} else {250preempt_enable();251}252}253254unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)255{256return task->thread.vl[type];257}258259void task_set_vl(struct task_struct *task, enum vec_type type,260unsigned long vl)261{262task->thread.vl[type] = vl;263}264265unsigned int task_get_vl_onexec(const struct task_struct *task,266enum vec_type type)267{268return task->thread.vl_onexec[type];269}270271void task_set_vl_onexec(struct task_struct *task, enum vec_type type,272unsigned long vl)273{274task->thread.vl_onexec[type] = vl;275}276277/*278* TIF_SME controls whether a task can use SME without trapping while279* in userspace, when TIF_SME is set then we must have storage280* allocated in sve_state and sme_state to store the contents of both ZA281* and the SVE registers for both streaming and non-streaming modes.282*283* If both SVCR.ZA and SVCR.SM are disabled then at any point we284* may disable TIF_SME and reenable traps.285*/286287288/*289* TIF_SVE controls whether a task can use SVE without trapping while290* in userspace, and also (together with TIF_SME) the way a task's291* FPSIMD/SVE state is stored in thread_struct.292*293* The kernel uses this flag to track whether a user task is actively294* using SVE, and therefore whether full SVE register state needs to295* be tracked. If not, the cheaper FPSIMD context handling code can296* be used instead of the more costly SVE equivalents.297*298* * TIF_SVE or SVCR.SM set:299*300* The task can execute SVE instructions while in userspace without301* trapping to the kernel.302*303* During any syscall, the kernel may optionally clear TIF_SVE and304* discard the vector state except for the FPSIMD subset.305*306* * TIF_SVE clear:307*308* An attempt by the user task to execute an SVE instruction causes309* do_sve_acc() to be called, which does some preparation and then310* sets TIF_SVE.311*312* During any syscall, the kernel may optionally clear TIF_SVE and313* discard the vector state except for the FPSIMD subset.314*315* The data will be stored in one of two formats:316*317* * FPSIMD only - FP_STATE_FPSIMD:318*319* When the FPSIMD only state stored task->thread.fp_type is set to320* FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in321* task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are322* logically zero but not stored anywhere; P0-P15 and FFR are not323* stored and have unspecified values from userspace's point of324* view. For hygiene purposes, the kernel zeroes them on next use,325* but userspace is discouraged from relying on this.326*327* task->thread.sve_state does not need to be non-NULL, valid or any328* particular size: it must not be dereferenced and any data stored329* there should be considered stale and not referenced.330*331* * SVE state - FP_STATE_SVE:332*333* When the full SVE state is stored task->thread.fp_type is set to334* FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the335* corresponding Zn), P0-P15 and FFR are encoded in in336* task->thread.sve_state, formatted appropriately for vector337* length task->thread.sve_vl or, if SVCR.SM is set,338* task->thread.sme_vl. The storage for the vector registers in339* task->thread.uw.fpsimd_state should be ignored.340*341* task->thread.sve_state must point to a valid buffer at least342* sve_state_size(task) bytes in size. The data stored in343* task->thread.uw.fpsimd_state.vregs should be considered stale344* and not referenced.345*346* * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state347* irrespective of whether TIF_SVE is clear or set, since these are348* not vector length dependent.349*/350351/*352* Update current's FPSIMD/SVE registers from thread_struct.353*354* This function should be called only when the FPSIMD/SVE state in355* thread_struct is known to be up to date, when preparing to enter356* userspace.357*/358static void task_fpsimd_load(void)359{360bool restore_sve_regs = false;361bool restore_ffr;362363WARN_ON(!system_supports_fpsimd());364WARN_ON(preemptible());365WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));366367if (system_supports_sve() || system_supports_sme()) {368switch (current->thread.fp_type) {369case FP_STATE_FPSIMD:370/* Stop tracking SVE for this task until next use. */371clear_thread_flag(TIF_SVE);372break;373case FP_STATE_SVE:374if (!thread_sm_enabled(¤t->thread))375WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE));376377if (test_thread_flag(TIF_SVE))378sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);379380restore_sve_regs = true;381restore_ffr = true;382break;383default:384/*385* This indicates either a bug in386* fpsimd_save_user_state() or memory corruption, we387* should always record an explicit format388* when we save. We always at least have the389* memory allocated for FPSIMD registers so390* try that and hope for the best.391*/392WARN_ON_ONCE(1);393clear_thread_flag(TIF_SVE);394break;395}396}397398/* Restore SME, override SVE register configuration if needed */399if (system_supports_sme()) {400unsigned long sme_vl = task_get_sme_vl(current);401402/* Ensure VL is set up for restoring data */403if (test_thread_flag(TIF_SME))404sme_set_vq(sve_vq_from_vl(sme_vl) - 1);405406write_sysreg_s(current->thread.svcr, SYS_SVCR);407408if (thread_za_enabled(¤t->thread))409sme_load_state(current->thread.sme_state,410system_supports_sme2());411412if (thread_sm_enabled(¤t->thread))413restore_ffr = system_supports_fa64();414}415416if (system_supports_fpmr())417write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);418419if (restore_sve_regs) {420WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);421sve_load_state(sve_pffr(¤t->thread),422¤t->thread.uw.fpsimd_state.fpsr,423restore_ffr);424} else {425WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);426fpsimd_load_state(¤t->thread.uw.fpsimd_state);427}428}429430/*431* Ensure FPSIMD/SVE storage in memory for the loaded context is up to432* date with respect to the CPU registers. Note carefully that the433* current context is the context last bound to the CPU stored in434* last, if KVM is involved this may be the guest VM context rather435* than the host thread for the VM pointed to by current. This means436* that we must always reference the state storage via last rather437* than via current, if we are saving KVM state then it will have438* ensured that the type of registers to save is set in last->to_save.439*/440static void fpsimd_save_user_state(void)441{442struct cpu_fp_state const *last =443this_cpu_ptr(&fpsimd_last_state);444/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */445bool save_sve_regs = false;446bool save_ffr;447unsigned int vl;448449WARN_ON(!system_supports_fpsimd());450WARN_ON(preemptible());451452if (test_thread_flag(TIF_FOREIGN_FPSTATE))453return;454455if (system_supports_fpmr())456*(last->fpmr) = read_sysreg_s(SYS_FPMR);457458/*459* Save SVE state if it is live.460*461* The syscall ABI discards live SVE state at syscall entry. When462* entering a syscall, fpsimd_syscall_enter() sets to_save to463* FP_STATE_FPSIMD to allow the SVE state to be lazily discarded until464* either new SVE state is loaded+bound or fpsimd_syscall_exit() is465* called prior to a return to userspace.466*/467if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) ||468last->to_save == FP_STATE_SVE) {469save_sve_regs = true;470save_ffr = true;471vl = last->sve_vl;472}473474if (system_supports_sme()) {475u64 *svcr = last->svcr;476477*svcr = read_sysreg_s(SYS_SVCR);478479if (*svcr & SVCR_ZA_MASK)480sme_save_state(last->sme_state,481system_supports_sme2());482483/* If we are in streaming mode override regular SVE. */484if (*svcr & SVCR_SM_MASK) {485save_sve_regs = true;486save_ffr = system_supports_fa64();487vl = last->sme_vl;488}489}490491if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {492/* Get the configured VL from RDVL, will account for SM */493if (WARN_ON(sve_get_vl() != vl)) {494/*495* Can't save the user regs, so current would496* re-enter user with corrupt state.497* There's no way to recover, so kill it:498*/499force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);500return;501}502503sve_save_state((char *)last->sve_state +504sve_ffr_offset(vl),505&last->st->fpsr, save_ffr);506*last->fp_type = FP_STATE_SVE;507} else {508fpsimd_save_state(last->st);509*last->fp_type = FP_STATE_FPSIMD;510}511}512513/*514* All vector length selection from userspace comes through here.515* We're on a slow path, so some sanity-checks are included.516* If things go wrong there's a bug somewhere, but try to fall back to a517* safe choice.518*/519static unsigned int find_supported_vector_length(enum vec_type type,520unsigned int vl)521{522struct vl_info *info = &vl_info[type];523int bit;524int max_vl = info->max_vl;525526if (WARN_ON(!sve_vl_valid(vl)))527vl = info->min_vl;528529if (WARN_ON(!sve_vl_valid(max_vl)))530max_vl = info->min_vl;531532if (vl > max_vl)533vl = max_vl;534if (vl < info->min_vl)535vl = info->min_vl;536537bit = find_next_bit(info->vq_map, SVE_VQ_MAX,538__vq_to_bit(sve_vq_from_vl(vl)));539return sve_vl_from_vq(__bit_to_vq(bit));540}541542#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)543544static int vec_proc_do_default_vl(const struct ctl_table *table, int write,545void *buffer, size_t *lenp, loff_t *ppos)546{547struct vl_info *info = table->extra1;548enum vec_type type = info->type;549int ret;550int vl = get_default_vl(type);551struct ctl_table tmp_table = {552.data = &vl,553.maxlen = sizeof(vl),554};555556ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);557if (ret || !write)558return ret;559560/* Writing -1 has the special meaning "set to max": */561if (vl == -1)562vl = info->max_vl;563564if (!sve_vl_valid(vl))565return -EINVAL;566567set_default_vl(type, find_supported_vector_length(type, vl));568return 0;569}570571static const struct ctl_table sve_default_vl_table[] = {572{573.procname = "sve_default_vector_length",574.mode = 0644,575.proc_handler = vec_proc_do_default_vl,576.extra1 = &vl_info[ARM64_VEC_SVE],577},578};579580static int __init sve_sysctl_init(void)581{582if (system_supports_sve())583if (!register_sysctl("abi", sve_default_vl_table))584return -EINVAL;585586return 0;587}588589#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */590static int __init sve_sysctl_init(void) { return 0; }591#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */592593#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)594static const struct ctl_table sme_default_vl_table[] = {595{596.procname = "sme_default_vector_length",597.mode = 0644,598.proc_handler = vec_proc_do_default_vl,599.extra1 = &vl_info[ARM64_VEC_SME],600},601};602603static int __init sme_sysctl_init(void)604{605if (system_supports_sme())606if (!register_sysctl("abi", sme_default_vl_table))607return -EINVAL;608609return 0;610}611612#else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */613static int __init sme_sysctl_init(void) { return 0; }614#endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */615616#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \617(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))618619#ifdef CONFIG_CPU_BIG_ENDIAN620static __uint128_t arm64_cpu_to_le128(__uint128_t x)621{622u64 a = swab64(x);623u64 b = swab64(x >> 64);624625return ((__uint128_t)a << 64) | b;626}627#else628static __uint128_t arm64_cpu_to_le128(__uint128_t x)629{630return x;631}632#endif633634#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)635636static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,637unsigned int vq)638{639unsigned int i;640__uint128_t *p;641642for (i = 0; i < SVE_NUM_ZREGS; ++i) {643p = (__uint128_t *)ZREG(sst, vq, i);644*p = arm64_cpu_to_le128(fst->vregs[i]);645}646}647648/*649* Transfer the FPSIMD state in task->thread.uw.fpsimd_state to650* task->thread.sve_state.651*652* Task can be a non-runnable task, or current. In the latter case,653* the caller must have ownership of the cpu FPSIMD context before calling654* this function.655* task->thread.sve_state must point to at least sve_state_size(task)656* bytes of allocated kernel memory.657* task->thread.uw.fpsimd_state must be up to date before calling this658* function.659*/660static inline void fpsimd_to_sve(struct task_struct *task)661{662unsigned int vq;663void *sst = task->thread.sve_state;664struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;665666if (!system_supports_sve() && !system_supports_sme())667return;668669vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));670__fpsimd_to_sve(sst, fst, vq);671}672673/*674* Transfer the SVE state in task->thread.sve_state to675* task->thread.uw.fpsimd_state.676*677* Task can be a non-runnable task, or current. In the latter case,678* the caller must have ownership of the cpu FPSIMD context before calling679* this function.680* task->thread.sve_state must point to at least sve_state_size(task)681* bytes of allocated kernel memory.682* task->thread.sve_state must be up to date before calling this function.683*/684static inline void sve_to_fpsimd(struct task_struct *task)685{686unsigned int vq, vl;687void const *sst = task->thread.sve_state;688struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;689unsigned int i;690__uint128_t const *p;691692if (!system_supports_sve() && !system_supports_sme())693return;694695vl = thread_get_cur_vl(&task->thread);696vq = sve_vq_from_vl(vl);697for (i = 0; i < SVE_NUM_ZREGS; ++i) {698p = (__uint128_t const *)ZREG(sst, vq, i);699fst->vregs[i] = arm64_le128_to_cpu(*p);700}701}702703static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd)704{705memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs));706}707708/*709* Simulate the effects of an SMSTOP SM instruction.710*/711void task_smstop_sm(struct task_struct *task)712{713if (!thread_sm_enabled(&task->thread))714return;715716__fpsimd_zero_vregs(&task->thread.uw.fpsimd_state);717task->thread.uw.fpsimd_state.fpsr = 0x0800009f;718if (system_supports_fpmr())719task->thread.uw.fpmr = 0;720721task->thread.svcr &= ~SVCR_SM_MASK;722task->thread.fp_type = FP_STATE_FPSIMD;723}724725void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)726{727write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,728SYS_SCTLR_EL1);729}730731#ifdef CONFIG_ARM64_SVE732static void sve_free(struct task_struct *task)733{734kfree(task->thread.sve_state);735task->thread.sve_state = NULL;736}737738/*739* Ensure that task->thread.sve_state is allocated and sufficiently large.740*741* This function should be used only in preparation for replacing742* task->thread.sve_state with new data. The memory is always zeroed743* here to prevent stale data from showing through: this is done in744* the interest of testability and predictability: except in the745* do_sve_acc() case, there is no ABI requirement to hide stale data746* written previously be task.747*/748void sve_alloc(struct task_struct *task, bool flush)749{750if (task->thread.sve_state) {751if (flush)752memset(task->thread.sve_state, 0,753sve_state_size(task));754return;755}756757/* This is a small allocation (maximum ~8KB) and Should Not Fail. */758task->thread.sve_state =759kzalloc(sve_state_size(task), GFP_KERNEL);760}761762/*763* Ensure that task->thread.uw.fpsimd_state is up to date with respect to the764* task's currently effective FPSIMD/SVE state.765*766* The task's FPSIMD/SVE/SME state must not be subject to concurrent767* manipulation.768*/769void fpsimd_sync_from_effective_state(struct task_struct *task)770{771if (task->thread.fp_type == FP_STATE_SVE)772sve_to_fpsimd(task);773}774775/*776* Ensure that the task's currently effective FPSIMD/SVE state is up to date777* with respect to task->thread.uw.fpsimd_state, zeroing any effective778* non-FPSIMD (S)SVE state.779*780* The task's FPSIMD/SVE/SME state must not be subject to concurrent781* manipulation.782*/783void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task)784{785unsigned int vq;786void *sst = task->thread.sve_state;787struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;788789if (task->thread.fp_type != FP_STATE_SVE)790return;791792vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));793794memset(sst, 0, SVE_SIG_REGS_SIZE(vq));795__fpsimd_to_sve(sst, fst, vq);796}797798static int change_live_vector_length(struct task_struct *task,799enum vec_type type,800unsigned long vl)801{802unsigned int sve_vl = task_get_sve_vl(task);803unsigned int sme_vl = task_get_sme_vl(task);804void *sve_state = NULL, *sme_state = NULL;805806if (type == ARM64_VEC_SME)807sme_vl = vl;808else809sve_vl = vl;810811/*812* Allocate the new sve_state and sme_state before freeing the old813* copies so that allocation failure can be handled without needing to814* mutate the task's state in any way.815*816* Changes to the SVE vector length must not discard live ZA state or817* clear PSTATE.ZA, as userspace code which is unaware of the AAPCS64818* ZA lazy saving scheme may attempt to change the SVE vector length819* while unsaved/dormant ZA state exists.820*/821sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL);822if (!sve_state)823goto out_mem;824825if (type == ARM64_VEC_SME) {826sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL);827if (!sme_state)828goto out_mem;829}830831if (task == current)832fpsimd_save_and_flush_current_state();833else834fpsimd_flush_task_state(task);835836/*837* Always preserve PSTATE.SM and the effective FPSIMD state, zeroing838* other SVE state.839*/840fpsimd_sync_from_effective_state(task);841task_set_vl(task, type, vl);842kfree(task->thread.sve_state);843task->thread.sve_state = sve_state;844fpsimd_sync_to_effective_state_zeropad(task);845846if (type == ARM64_VEC_SME) {847task->thread.svcr &= ~SVCR_ZA_MASK;848kfree(task->thread.sme_state);849task->thread.sme_state = sme_state;850}851852return 0;853854out_mem:855kfree(sve_state);856kfree(sme_state);857return -ENOMEM;858}859860int vec_set_vector_length(struct task_struct *task, enum vec_type type,861unsigned long vl, unsigned long flags)862{863bool onexec = flags & PR_SVE_SET_VL_ONEXEC;864bool inherit = flags & PR_SVE_VL_INHERIT;865866if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |867PR_SVE_SET_VL_ONEXEC))868return -EINVAL;869870if (!sve_vl_valid(vl))871return -EINVAL;872873/*874* Clamp to the maximum vector length that VL-agnostic code875* can work with. A flag may be assigned in the future to876* allow setting of larger vector lengths without confusing877* older software.878*/879if (vl > VL_ARCH_MAX)880vl = VL_ARCH_MAX;881882vl = find_supported_vector_length(type, vl);883884if (!onexec && vl != task_get_vl(task, type)) {885if (change_live_vector_length(task, type, vl))886return -ENOMEM;887}888889if (onexec || inherit)890task_set_vl_onexec(task, type, vl);891else892/* Reset VL to system default on next exec: */893task_set_vl_onexec(task, type, 0);894895update_tsk_thread_flag(task, vec_vl_inherit_flag(type),896flags & PR_SVE_VL_INHERIT);897898return 0;899}900901/*902* Encode the current vector length and flags for return.903* This is only required for prctl(): ptrace has separate fields.904* SVE and SME use the same bits for _ONEXEC and _INHERIT.905*906* flags are as for vec_set_vector_length().907*/908static int vec_prctl_status(enum vec_type type, unsigned long flags)909{910int ret;911912if (flags & PR_SVE_SET_VL_ONEXEC)913ret = task_get_vl_onexec(current, type);914else915ret = task_get_vl(current, type);916917if (test_thread_flag(vec_vl_inherit_flag(type)))918ret |= PR_SVE_VL_INHERIT;919920return ret;921}922923/* PR_SVE_SET_VL */924int sve_set_current_vl(unsigned long arg)925{926unsigned long vl, flags;927int ret;928929vl = arg & PR_SVE_VL_LEN_MASK;930flags = arg & ~vl;931932if (!system_supports_sve() || is_compat_task())933return -EINVAL;934935ret = vec_set_vector_length(current, ARM64_VEC_SVE, vl, flags);936if (ret)937return ret;938939return vec_prctl_status(ARM64_VEC_SVE, flags);940}941942/* PR_SVE_GET_VL */943int sve_get_current_vl(void)944{945if (!system_supports_sve() || is_compat_task())946return -EINVAL;947948return vec_prctl_status(ARM64_VEC_SVE, 0);949}950951#ifdef CONFIG_ARM64_SME952/* PR_SME_SET_VL */953int sme_set_current_vl(unsigned long arg)954{955unsigned long vl, flags;956int ret;957958vl = arg & PR_SME_VL_LEN_MASK;959flags = arg & ~vl;960961if (!system_supports_sme() || is_compat_task())962return -EINVAL;963964ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);965if (ret)966return ret;967968return vec_prctl_status(ARM64_VEC_SME, flags);969}970971/* PR_SME_GET_VL */972int sme_get_current_vl(void)973{974if (!system_supports_sme() || is_compat_task())975return -EINVAL;976977return vec_prctl_status(ARM64_VEC_SME, 0);978}979#endif /* CONFIG_ARM64_SME */980981static void vec_probe_vqs(struct vl_info *info,982DECLARE_BITMAP(map, SVE_VQ_MAX))983{984unsigned int vq, vl;985986bitmap_zero(map, SVE_VQ_MAX);987988for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {989write_vl(info->type, vq - 1); /* self-syncing */990991switch (info->type) {992case ARM64_VEC_SVE:993vl = sve_get_vl();994break;995case ARM64_VEC_SME:996vl = sme_get_vl();997break;998default:999vl = 0;1000break;1001}10021003/* Minimum VL identified? */1004if (sve_vq_from_vl(vl) > vq)1005break;10061007vq = sve_vq_from_vl(vl); /* skip intervening lengths */1008set_bit(__vq_to_bit(vq), map);1009}1010}10111012/*1013* Initialise the set of known supported VQs for the boot CPU.1014* This is called during kernel boot, before secondary CPUs are brought up.1015*/1016void __init vec_init_vq_map(enum vec_type type)1017{1018struct vl_info *info = &vl_info[type];1019vec_probe_vqs(info, info->vq_map);1020bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);1021}10221023/*1024* If we haven't committed to the set of supported VQs yet, filter out1025* those not supported by the current CPU.1026* This function is called during the bring-up of early secondary CPUs only.1027*/1028void vec_update_vq_map(enum vec_type type)1029{1030struct vl_info *info = &vl_info[type];1031DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);10321033vec_probe_vqs(info, tmp_map);1034bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);1035bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,1036SVE_VQ_MAX);1037}10381039/*1040* Check whether the current CPU supports all VQs in the committed set.1041* This function is called during the bring-up of late secondary CPUs only.1042*/1043int vec_verify_vq_map(enum vec_type type)1044{1045struct vl_info *info = &vl_info[type];1046DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);1047unsigned long b;10481049vec_probe_vqs(info, tmp_map);10501051bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);1052if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {1053pr_warn("%s: cpu%d: Required vector length(s) missing\n",1054info->name, smp_processor_id());1055return -EINVAL;1056}10571058if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())1059return 0;10601061/*1062* For KVM, it is necessary to ensure that this CPU doesn't1063* support any vector length that guests may have probed as1064* unsupported.1065*/10661067/* Recover the set of supported VQs: */1068bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);1069/* Find VQs supported that are not globally supported: */1070bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX);10711072/* Find the lowest such VQ, if any: */1073b = find_last_bit(tmp_map, SVE_VQ_MAX);1074if (b >= SVE_VQ_MAX)1075return 0; /* no mismatches */10761077/*1078* Mismatches above sve_max_virtualisable_vl are fine, since1079* no guest is allowed to configure ZCR_EL2.LEN to exceed this:1080*/1081if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) {1082pr_warn("%s: cpu%d: Unsupported vector length(s) present\n",1083info->name, smp_processor_id());1084return -EINVAL;1085}10861087return 0;1088}10891090void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)1091{1092write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);1093isb();10941095write_sysreg_s(0, SYS_ZCR_EL1);1096}10971098void __init sve_setup(void)1099{1100struct vl_info *info = &vl_info[ARM64_VEC_SVE];1101DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);1102unsigned long b;1103int max_bit;11041105if (!system_supports_sve())1106return;11071108/*1109* The SVE architecture mandates support for 128-bit vectors,1110* so sve_vq_map must have at least SVE_VQ_MIN set.1111* If something went wrong, at least try to patch it up:1112*/1113if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))1114set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);11151116max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);1117info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));11181119/*1120* For the default VL, pick the maximum supported value <= 64.1121* VL == 64 is guaranteed not to grow the signal frame.1122*/1123set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64));11241125bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map,1126SVE_VQ_MAX);11271128b = find_last_bit(tmp_map, SVE_VQ_MAX);1129if (b >= SVE_VQ_MAX)1130/* No non-virtualisable VLs found */1131info->max_virtualisable_vl = SVE_VQ_MAX;1132else if (WARN_ON(b == SVE_VQ_MAX - 1))1133/* No virtualisable VLs? This is architecturally forbidden. */1134info->max_virtualisable_vl = SVE_VQ_MIN;1135else /* b + 1 < SVE_VQ_MAX */1136info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));11371138if (info->max_virtualisable_vl > info->max_vl)1139info->max_virtualisable_vl = info->max_vl;11401141pr_info("%s: maximum available vector length %u bytes per vector\n",1142info->name, info->max_vl);1143pr_info("%s: default vector length %u bytes per vector\n",1144info->name, get_sve_default_vl());11451146/* KVM decides whether to support mismatched systems. Just warn here: */1147if (sve_max_virtualisable_vl() < sve_max_vl())1148pr_warn("%s: unvirtualisable vector lengths present\n",1149info->name);1150}11511152/*1153* Called from the put_task_struct() path, which cannot get here1154* unless dead_task is really dead and not schedulable.1155*/1156void fpsimd_release_task(struct task_struct *dead_task)1157{1158sve_free(dead_task);1159sme_free(dead_task);1160}11611162#endif /* CONFIG_ARM64_SVE */11631164#ifdef CONFIG_ARM64_SME11651166/*1167* Ensure that task->thread.sme_state is allocated and sufficiently large.1168*1169* This function should be used only in preparation for replacing1170* task->thread.sme_state with new data. The memory is always zeroed1171* here to prevent stale data from showing through: this is done in1172* the interest of testability and predictability, the architecture1173* guarantees that when ZA is enabled it will be zeroed.1174*/1175void sme_alloc(struct task_struct *task, bool flush)1176{1177if (task->thread.sme_state) {1178if (flush)1179memset(task->thread.sme_state, 0,1180sme_state_size(task));1181return;1182}11831184/* This could potentially be up to 64K. */1185task->thread.sme_state =1186kzalloc(sme_state_size(task), GFP_KERNEL);1187}11881189static void sme_free(struct task_struct *task)1190{1191kfree(task->thread.sme_state);1192task->thread.sme_state = NULL;1193}11941195void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)1196{1197/* Set priority for all PEs to architecturally defined minimum */1198write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,1199SYS_SMPRI_EL1);12001201/* Allow SME in kernel */1202write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);1203isb();12041205/* Ensure all bits in SMCR are set to known values */1206write_sysreg_s(0, SYS_SMCR_EL1);12071208/* Allow EL0 to access TPIDR2 */1209write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);1210isb();1211}12121213void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p)1214{1215/* This must be enabled after SME */1216BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME);12171218/* Allow use of ZT0 */1219write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,1220SYS_SMCR_EL1);1221}12221223void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p)1224{1225/* This must be enabled after SME */1226BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME);12271228/* Allow use of FA64 */1229write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,1230SYS_SMCR_EL1);1231}12321233void __init sme_setup(void)1234{1235struct vl_info *info = &vl_info[ARM64_VEC_SME];1236int min_bit, max_bit;12371238if (!system_supports_sme())1239return;12401241min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);12421243/*1244* SME doesn't require any particular vector length be1245* supported but it does require at least one. We should have1246* disabled the feature entirely while bringing up CPUs but1247* let's double check here. The bitmap is SVE_VQ_MAP sized for1248* sharing with SVE.1249*/1250WARN_ON(min_bit >= SVE_VQ_MAX);12511252info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));12531254max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);1255info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));12561257WARN_ON(info->min_vl > info->max_vl);12581259/*1260* For the default VL, pick the maximum supported value <= 321261* (256 bits) if there is one since this is guaranteed not to1262* grow the signal frame when in streaming mode, otherwise the1263* minimum available VL will be used.1264*/1265set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));12661267pr_info("SME: minimum available vector length %u bytes per vector\n",1268info->min_vl);1269pr_info("SME: maximum available vector length %u bytes per vector\n",1270info->max_vl);1271pr_info("SME: default vector length %u bytes per vector\n",1272get_sme_default_vl());1273}12741275void sme_suspend_exit(void)1276{1277u64 smcr = 0;12781279if (!system_supports_sme())1280return;12811282if (system_supports_fa64())1283smcr |= SMCR_ELx_FA64;1284if (system_supports_sme2())1285smcr |= SMCR_ELx_EZT0;12861287write_sysreg_s(smcr, SYS_SMCR_EL1);1288write_sysreg_s(0, SYS_SMPRI_EL1);1289}12901291#endif /* CONFIG_ARM64_SME */12921293static void sve_init_regs(void)1294{1295/*1296* Convert the FPSIMD state to SVE, zeroing all the state that1297* is not shared with FPSIMD. If (as is likely) the current1298* state is live in the registers then do this there and1299* update our metadata for the current task including1300* disabling the trap, otherwise update our in-memory copy.1301* We are guaranteed to not be in streaming mode, we can only1302* take a SVE trap when not in streaming mode and we can't be1303* in streaming mode when taking a SME trap.1304*/1305if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {1306unsigned long vq_minus_one =1307sve_vq_from_vl(task_get_sve_vl(current)) - 1;1308sve_set_vq(vq_minus_one);1309sve_flush_live(true, vq_minus_one);1310fpsimd_bind_task_to_cpu();1311} else {1312fpsimd_to_sve(current);1313current->thread.fp_type = FP_STATE_SVE;1314fpsimd_flush_task_state(current);1315}1316}13171318/*1319* Trapped SVE access1320*1321* Storage is allocated for the full SVE state, the current FPSIMD1322* register contents are migrated across, and the access trap is1323* disabled.1324*1325* TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()1326* would have disabled the SVE access trap for userspace during1327* ret_to_user, making an SVE access trap impossible in that case.1328*/1329void do_sve_acc(unsigned long esr, struct pt_regs *regs)1330{1331/* Even if we chose not to use SVE, the hardware could still trap: */1332if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {1333force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1334return;1335}13361337sve_alloc(current, true);1338if (!current->thread.sve_state) {1339force_sig(SIGKILL);1340return;1341}13421343get_cpu_fpsimd_context();13441345if (test_and_set_thread_flag(TIF_SVE))1346WARN_ON(1); /* SVE access shouldn't have trapped */13471348/*1349* Even if the task can have used streaming mode we can only1350* generate SVE access traps in normal SVE mode and1351* transitioning out of streaming mode may discard any1352* streaming mode state. Always clear the high bits to avoid1353* any potential errors tracking what is properly initialised.1354*/1355sve_init_regs();13561357put_cpu_fpsimd_context();1358}13591360/*1361* Trapped SME access1362*1363* Storage is allocated for the full SVE and SME state, the current1364* FPSIMD register contents are migrated to SVE if SVE is not already1365* active, and the access trap is disabled.1366*1367* TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()1368* would have disabled the SME access trap for userspace during1369* ret_to_user, making an SME access trap impossible in that case.1370*/1371void do_sme_acc(unsigned long esr, struct pt_regs *regs)1372{1373/* Even if we chose not to use SME, the hardware could still trap: */1374if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {1375force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1376return;1377}13781379/*1380* If this not a trap due to SME being disabled then something1381* is being used in the wrong mode, report as SIGILL.1382*/1383if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) {1384force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1385return;1386}13871388sve_alloc(current, false);1389sme_alloc(current, true);1390if (!current->thread.sve_state || !current->thread.sme_state) {1391force_sig(SIGKILL);1392return;1393}13941395get_cpu_fpsimd_context();13961397/* With TIF_SME userspace shouldn't generate any traps */1398if (test_and_set_thread_flag(TIF_SME))1399WARN_ON(1);14001401if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {1402unsigned long vq_minus_one =1403sve_vq_from_vl(task_get_sme_vl(current)) - 1;1404sme_set_vq(vq_minus_one);14051406fpsimd_bind_task_to_cpu();1407} else {1408fpsimd_flush_task_state(current);1409}14101411put_cpu_fpsimd_context();1412}14131414/*1415* Trapped FP/ASIMD access.1416*/1417void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)1418{1419/* Even if we chose not to use FPSIMD, the hardware could still trap: */1420if (!system_supports_fpsimd()) {1421force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1422return;1423}14241425/*1426* When FPSIMD is enabled, we should never take a trap unless something1427* has gone very wrong.1428*/1429BUG();1430}14311432/*1433* Raise a SIGFPE for the current process.1434*/1435void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)1436{1437unsigned int si_code = FPE_FLTUNK;14381439if (esr & ESR_ELx_FP_EXC_TFV) {1440if (esr & FPEXC_IOF)1441si_code = FPE_FLTINV;1442else if (esr & FPEXC_DZF)1443si_code = FPE_FLTDIV;1444else if (esr & FPEXC_OFF)1445si_code = FPE_FLTOVF;1446else if (esr & FPEXC_UFF)1447si_code = FPE_FLTUND;1448else if (esr & FPEXC_IXF)1449si_code = FPE_FLTRES;1450}14511452send_sig_fault(SIGFPE, si_code,1453(void __user *)instruction_pointer(regs),1454current);1455}14561457static void fpsimd_load_kernel_state(struct task_struct *task)1458{1459struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);14601461/*1462* Elide the load if this CPU holds the most recent kernel mode1463* FPSIMD context of the current task.1464*/1465if (last->st == task->thread.kernel_fpsimd_state &&1466task->thread.kernel_fpsimd_cpu == smp_processor_id())1467return;14681469fpsimd_load_state(task->thread.kernel_fpsimd_state);1470}14711472static void fpsimd_save_kernel_state(struct task_struct *task)1473{1474struct cpu_fp_state cpu_fp_state = {1475.st = task->thread.kernel_fpsimd_state,1476.to_save = FP_STATE_FPSIMD,1477};14781479BUG_ON(!cpu_fp_state.st);14801481fpsimd_save_state(task->thread.kernel_fpsimd_state);1482fpsimd_bind_state_to_cpu(&cpu_fp_state);14831484task->thread.kernel_fpsimd_cpu = smp_processor_id();1485}14861487/*1488* Invalidate any task's FPSIMD state that is present on this cpu.1489* The FPSIMD context should be acquired with get_cpu_fpsimd_context()1490* before calling this function.1491*/1492static void fpsimd_flush_cpu_state(void)1493{1494WARN_ON(!system_supports_fpsimd());1495__this_cpu_write(fpsimd_last_state.st, NULL);14961497/*1498* Leaving streaming mode enabled will cause issues for any kernel1499* NEON and leaving streaming mode or ZA enabled may increase power1500* consumption.1501*/1502if (system_supports_sme())1503sme_smstop();15041505set_thread_flag(TIF_FOREIGN_FPSTATE);1506}15071508void fpsimd_thread_switch(struct task_struct *next)1509{1510bool wrong_task, wrong_cpu;15111512if (!system_supports_fpsimd())1513return;15141515WARN_ON_ONCE(!irqs_disabled());15161517/* Save unsaved fpsimd state, if any: */1518if (test_thread_flag(TIF_KERNEL_FPSTATE))1519fpsimd_save_kernel_state(current);1520else1521fpsimd_save_user_state();15221523if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {1524fpsimd_flush_cpu_state();1525fpsimd_load_kernel_state(next);1526} else {1527/*1528* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's1529* state. For kernel threads, FPSIMD registers are never1530* loaded with user mode FPSIMD state and so wrong_task and1531* wrong_cpu will always be true.1532*/1533wrong_task = __this_cpu_read(fpsimd_last_state.st) !=1534&next->thread.uw.fpsimd_state;1535wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();15361537update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,1538wrong_task || wrong_cpu);1539}1540}15411542static void fpsimd_flush_thread_vl(enum vec_type type)1543{1544int vl, supported_vl;15451546/*1547* Reset the task vector length as required. This is where we1548* ensure that all user tasks have a valid vector length1549* configured: no kernel task can become a user task without1550* an exec and hence a call to this function. By the time the1551* first call to this function is made, all early hardware1552* probing is complete, so __sve_default_vl should be valid.1553* If a bug causes this to go wrong, we make some noise and1554* try to fudge thread.sve_vl to a safe value here.1555*/1556vl = task_get_vl_onexec(current, type);1557if (!vl)1558vl = get_default_vl(type);15591560if (WARN_ON(!sve_vl_valid(vl)))1561vl = vl_info[type].min_vl;15621563supported_vl = find_supported_vector_length(type, vl);1564if (WARN_ON(supported_vl != vl))1565vl = supported_vl;15661567task_set_vl(current, type, vl);15681569/*1570* If the task is not set to inherit, ensure that the vector1571* length will be reset by a subsequent exec:1572*/1573if (!test_thread_flag(vec_vl_inherit_flag(type)))1574task_set_vl_onexec(current, type, 0);1575}15761577void fpsimd_flush_thread(void)1578{1579void *sve_state = NULL;1580void *sme_state = NULL;15811582if (!system_supports_fpsimd())1583return;15841585get_cpu_fpsimd_context();15861587fpsimd_flush_task_state(current);1588memset(¤t->thread.uw.fpsimd_state, 0,1589sizeof(current->thread.uw.fpsimd_state));15901591if (system_supports_sve()) {1592clear_thread_flag(TIF_SVE);15931594/* Defer kfree() while in atomic context */1595sve_state = current->thread.sve_state;1596current->thread.sve_state = NULL;15971598fpsimd_flush_thread_vl(ARM64_VEC_SVE);1599}16001601if (system_supports_sme()) {1602clear_thread_flag(TIF_SME);16031604/* Defer kfree() while in atomic context */1605sme_state = current->thread.sme_state;1606current->thread.sme_state = NULL;16071608fpsimd_flush_thread_vl(ARM64_VEC_SME);1609current->thread.svcr = 0;1610}16111612if (system_supports_fpmr())1613current->thread.uw.fpmr = 0;16141615current->thread.fp_type = FP_STATE_FPSIMD;16161617put_cpu_fpsimd_context();1618kfree(sve_state);1619kfree(sme_state);1620}16211622/*1623* Save the userland FPSIMD state of 'current' to memory, but only if the state1624* currently held in the registers does in fact belong to 'current'1625*/1626void fpsimd_preserve_current_state(void)1627{1628if (!system_supports_fpsimd())1629return;16301631get_cpu_fpsimd_context();1632fpsimd_save_user_state();1633put_cpu_fpsimd_context();1634}16351636/*1637* Associate current's FPSIMD context with this cpu1638* The caller must have ownership of the cpu FPSIMD context before calling1639* this function.1640*/1641static void fpsimd_bind_task_to_cpu(void)1642{1643struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);16441645WARN_ON(!system_supports_fpsimd());1646last->st = ¤t->thread.uw.fpsimd_state;1647last->sve_state = current->thread.sve_state;1648last->sme_state = current->thread.sme_state;1649last->sve_vl = task_get_sve_vl(current);1650last->sme_vl = task_get_sme_vl(current);1651last->svcr = ¤t->thread.svcr;1652last->fpmr = ¤t->thread.uw.fpmr;1653last->fp_type = ¤t->thread.fp_type;1654last->to_save = FP_STATE_CURRENT;1655current->thread.fpsimd_cpu = smp_processor_id();16561657/*1658* Toggle SVE and SME trapping for userspace if needed, these1659* are serialsied by ret_to_user().1660*/1661if (system_supports_sme()) {1662if (test_thread_flag(TIF_SME))1663sme_user_enable();1664else1665sme_user_disable();1666}16671668if (system_supports_sve()) {1669if (test_thread_flag(TIF_SVE))1670sve_user_enable();1671else1672sve_user_disable();1673}1674}16751676void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)1677{1678struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);16791680WARN_ON(!system_supports_fpsimd());1681WARN_ON(!in_softirq() && !irqs_disabled());16821683*last = *state;1684}16851686/*1687* Load the userland FPSIMD state of 'current' from memory, but only if the1688* FPSIMD state already held in the registers is /not/ the most recent FPSIMD1689* state of 'current'. This is called when we are preparing to return to1690* userspace to ensure that userspace sees a good register state.1691*/1692void fpsimd_restore_current_state(void)1693{1694/*1695* TIF_FOREIGN_FPSTATE is set on the init task and copied by1696* arch_dup_task_struct() regardless of whether FP/SIMD is detected.1697* Thus user threads can have this set even when FP/SIMD hasn't been1698* detected.1699*1700* When FP/SIMD is detected, begin_new_exec() will set1701* TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(),1702* and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when1703* switching tasks. We detect FP/SIMD before we exec the first user1704* process, ensuring this has TIF_FOREIGN_FPSTATE set and1705* do_notify_resume() will call fpsimd_restore_current_state() to1706* install the user FP/SIMD context.1707*1708* When FP/SIMD is not detected, nothing else will clear or set1709* TIF_FOREIGN_FPSTATE prior to the first return to userspace, and1710* we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume()1711* looping forever calling fpsimd_restore_current_state().1712*/1713if (!system_supports_fpsimd()) {1714clear_thread_flag(TIF_FOREIGN_FPSTATE);1715return;1716}17171718get_cpu_fpsimd_context();17191720if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {1721task_fpsimd_load();1722fpsimd_bind_task_to_cpu();1723}17241725put_cpu_fpsimd_context();1726}17271728void fpsimd_update_current_state(struct user_fpsimd_state const *state)1729{1730if (WARN_ON(!system_supports_fpsimd()))1731return;17321733current->thread.uw.fpsimd_state = *state;1734if (current->thread.fp_type == FP_STATE_SVE)1735fpsimd_to_sve(current);1736}17371738/*1739* Invalidate live CPU copies of task t's FPSIMD state1740*1741* This function may be called with preemption enabled. The barrier()1742* ensures that the assignment to fpsimd_cpu is visible to any1743* preemption/softirq that could race with set_tsk_thread_flag(), so1744* that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared.1745*1746* The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any1747* subsequent code.1748*/1749void fpsimd_flush_task_state(struct task_struct *t)1750{1751t->thread.fpsimd_cpu = NR_CPUS;1752t->thread.kernel_fpsimd_state = NULL;1753/*1754* If we don't support fpsimd, bail out after we have1755* reset the fpsimd_cpu for this task and clear the1756* FPSTATE.1757*/1758if (!system_supports_fpsimd())1759return;1760barrier();1761set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);17621763barrier();1764}17651766void fpsimd_save_and_flush_current_state(void)1767{1768if (!system_supports_fpsimd())1769return;17701771get_cpu_fpsimd_context();1772fpsimd_save_user_state();1773fpsimd_flush_task_state(current);1774put_cpu_fpsimd_context();1775}17761777/*1778* Save the FPSIMD state to memory and invalidate cpu view.1779* This function must be called with preemption disabled.1780*/1781void fpsimd_save_and_flush_cpu_state(void)1782{1783unsigned long flags;17841785if (!system_supports_fpsimd())1786return;1787WARN_ON(preemptible());1788local_irq_save(flags);1789fpsimd_save_user_state();1790fpsimd_flush_cpu_state();1791local_irq_restore(flags);1792}17931794#ifdef CONFIG_KERNEL_MODE_NEON17951796/*1797* Kernel-side NEON support functions1798*/17991800/*1801* kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling1802* context1803*1804* Must not be called unless may_use_simd() returns true.1805* Task context in the FPSIMD registers is saved back to memory as necessary.1806*1807* A matching call to kernel_neon_end() must be made before returning from the1808* calling context.1809*1810* The caller may freely use the FPSIMD registers until kernel_neon_end() is1811* called.1812*1813* Unless called from non-preemptible task context, @state must point to a1814* caller provided buffer that will be used to preserve the task's kernel mode1815* FPSIMD context when it is scheduled out, or if it is interrupted by kernel1816* mode FPSIMD occurring in softirq context. May be %NULL otherwise.1817*/1818void kernel_neon_begin(struct user_fpsimd_state *state)1819{1820if (WARN_ON(!system_supports_fpsimd()))1821return;18221823WARN_ON((preemptible() || in_serving_softirq()) && !state);18241825BUG_ON(!may_use_simd());18261827get_cpu_fpsimd_context();18281829/* Save unsaved fpsimd state, if any: */1830if (test_thread_flag(TIF_KERNEL_FPSTATE)) {1831BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());1832fpsimd_save_state(state);1833} else {1834fpsimd_save_user_state();18351836/*1837* Set the thread flag so that the kernel mode FPSIMD state1838* will be context switched along with the rest of the task1839* state.1840*1841* On non-PREEMPT_RT, softirqs may interrupt task level kernel1842* mode FPSIMD, but the task will not be preemptible so setting1843* TIF_KERNEL_FPSTATE for those would be both wrong (as it1844* would mark the task context FPSIMD state as requiring a1845* context switch) and unnecessary.1846*1847* On PREEMPT_RT, softirqs are serviced from a separate thread,1848* which is scheduled as usual, and this guarantees that these1849* softirqs are not interrupting use of the FPSIMD in kernel1850* mode in task context. So in this case, setting the flag here1851* is always appropriate.1852*/1853if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {1854/*1855* Record the caller provided buffer as the kernel mode1856* FP/SIMD buffer for this task, so that the state can1857* be preserved and restored on a context switch.1858*/1859WARN_ON(current->thread.kernel_fpsimd_state != NULL);1860current->thread.kernel_fpsimd_state = state;1861set_thread_flag(TIF_KERNEL_FPSTATE);1862}1863}18641865/* Invalidate any task state remaining in the fpsimd regs: */1866fpsimd_flush_cpu_state();18671868put_cpu_fpsimd_context();1869}1870EXPORT_SYMBOL_GPL(kernel_neon_begin);18711872/*1873* kernel_neon_end(): give the CPU FPSIMD registers back to the current task1874*1875* Must be called from a context in which kernel_neon_begin() was previously1876* called, with no call to kernel_neon_end() in the meantime.1877*1878* The caller must not use the FPSIMD registers after this function is called,1879* unless kernel_neon_begin() is called again in the meantime.1880*1881* The value of @state must match the value passed to the preceding call to1882* kernel_neon_begin().1883*/1884void kernel_neon_end(struct user_fpsimd_state *state)1885{1886if (!system_supports_fpsimd())1887return;18881889if (!test_thread_flag(TIF_KERNEL_FPSTATE))1890return;18911892/*1893* If we are returning from a nested use of kernel mode FPSIMD, restore1894* the task context kernel mode FPSIMD state. This can only happen when1895* running in softirq context on non-PREEMPT_RT.1896*/1897if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {1898fpsimd_load_state(state);1899} else {1900clear_thread_flag(TIF_KERNEL_FPSTATE);1901WARN_ON(current->thread.kernel_fpsimd_state != state);1902current->thread.kernel_fpsimd_state = NULL;1903}1904}1905EXPORT_SYMBOL_GPL(kernel_neon_end);19061907#ifdef CONFIG_EFI19081909static struct user_fpsimd_state efi_fpsimd_state;19101911/*1912* EFI runtime services support functions1913*1914* The ABI for EFI runtime services allows EFI to use FPSIMD during the call.1915* This means that for EFI (and only for EFI), we have to assume that FPSIMD1916* is always used rather than being an optional accelerator.1917*1918* These functions provide the necessary support for ensuring FPSIMD1919* save/restore in the contexts from which EFI is used.1920*1921* Do not use them for any other purpose -- if tempted to do so, you are1922* either doing something wrong or you need to propose some refactoring.1923*/19241925/*1926* __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call1927*/1928void __efi_fpsimd_begin(void)1929{1930if (!system_supports_fpsimd())1931return;19321933if (may_use_simd()) {1934kernel_neon_begin(&efi_fpsimd_state);1935} else {1936/*1937* We are running in hardirq or NMI context, and the only1938* legitimate case where this might happen is when EFI pstore1939* is attempting to record the system's dying gasps into EFI1940* variables. This could be due to an oops, a panic or a call1941* to emergency_restart(), and in none of those cases, we can1942* expect the current task to ever return to user space again,1943* or for the kernel to resume any normal execution, for that1944* matter (an oops in hardirq context triggers a panic too).1945*1946* Therefore, there is no point in attempting to preserve any1947* SVE/SME state here. On the off chance that we might have1948* ended up here for a different reason inadvertently, kill the1949* task and preserve/restore the base FP/SIMD state, which1950* might belong to kernel mode FP/SIMD.1951*/1952pr_warn_ratelimited("Calling EFI runtime from %s context\n",1953in_nmi() ? "NMI" : "hardirq");1954force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);1955fpsimd_save_state(&efi_fpsimd_state);1956}1957}19581959/*1960* __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call1961*/1962void __efi_fpsimd_end(void)1963{1964if (!system_supports_fpsimd())1965return;19661967if (may_use_simd()) {1968kernel_neon_end(&efi_fpsimd_state);1969} else {1970fpsimd_load_state(&efi_fpsimd_state);1971}1972}19731974#endif /* CONFIG_EFI */19751976#endif /* CONFIG_KERNEL_MODE_NEON */19771978#ifdef CONFIG_CPU_PM1979static int fpsimd_cpu_pm_notifier(struct notifier_block *self,1980unsigned long cmd, void *v)1981{1982switch (cmd) {1983case CPU_PM_ENTER:1984fpsimd_save_and_flush_cpu_state();1985break;1986case CPU_PM_EXIT:1987break;1988case CPU_PM_ENTER_FAILED:1989default:1990return NOTIFY_DONE;1991}1992return NOTIFY_OK;1993}19941995static struct notifier_block fpsimd_cpu_pm_notifier_block = {1996.notifier_call = fpsimd_cpu_pm_notifier,1997};19981999static void __init fpsimd_pm_init(void)2000{2001cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);2002}20032004#else2005static inline void fpsimd_pm_init(void) { }2006#endif /* CONFIG_CPU_PM */20072008#ifdef CONFIG_HOTPLUG_CPU2009static int fpsimd_cpu_dead(unsigned int cpu)2010{2011per_cpu(fpsimd_last_state.st, cpu) = NULL;2012return 0;2013}20142015static inline void fpsimd_hotplug_init(void)2016{2017cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",2018NULL, fpsimd_cpu_dead);2019}20202021#else2022static inline void fpsimd_hotplug_init(void) { }2023#endif20242025void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)2026{2027unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;2028write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);2029isb();2030}20312032/*2033* FP/SIMD support code initialisation.2034*/2035static int __init fpsimd_init(void)2036{2037if (cpu_have_named_feature(FP)) {2038fpsimd_pm_init();2039fpsimd_hotplug_init();2040} else {2041pr_notice("Floating-point is not implemented\n");2042}20432044if (!cpu_have_named_feature(ASIMD))2045pr_notice("Advanced SIMD is not implemented\n");204620472048sve_sysctl_init();2049sme_sysctl_init();20502051return 0;2052}2053core_initcall(fpsimd_init);205420552056