// SPDX-License-Identifier: GPL-2.0-only1/*2* FP/SIMD context switching and fault handling3*4* Copyright (C) 2012 ARM Ltd.5* Author: Catalin Marinas <[email protected]>6*/78#include <linux/bitmap.h>9#include <linux/bitops.h>10#include <linux/bottom_half.h>11#include <linux/bug.h>12#include <linux/cache.h>13#include <linux/compat.h>14#include <linux/compiler.h>15#include <linux/cpu.h>16#include <linux/cpu_pm.h>17#include <linux/ctype.h>18#include <linux/kernel.h>19#include <linux/linkage.h>20#include <linux/irqflags.h>21#include <linux/init.h>22#include <linux/percpu.h>23#include <linux/prctl.h>24#include <linux/preempt.h>25#include <linux/ptrace.h>26#include <linux/sched/signal.h>27#include <linux/sched/task_stack.h>28#include <linux/signal.h>29#include <linux/slab.h>30#include <linux/stddef.h>31#include <linux/sysctl.h>32#include <linux/swab.h>3334#include <asm/esr.h>35#include <asm/exception.h>36#include <asm/fpsimd.h>37#include <asm/cpufeature.h>38#include <asm/cputype.h>39#include <asm/neon.h>40#include <asm/processor.h>41#include <asm/simd.h>42#include <asm/sigcontext.h>43#include <asm/sysreg.h>44#include <asm/traps.h>45#include <asm/virt.h>4647#define FPEXC_IOF (1 << 0)48#define FPEXC_DZF (1 << 1)49#define FPEXC_OFF (1 << 2)50#define FPEXC_UFF (1 << 3)51#define FPEXC_IXF (1 << 4)52#define FPEXC_IDF (1 << 7)5354/*55* (Note: in this discussion, statements about FPSIMD apply equally to SVE.)56*57* In order to reduce the number of times the FPSIMD state is needlessly saved58* and restored, we need to keep track of two things:59* (a) for each task, we need to remember which CPU was the last one to have60* the task's FPSIMD state loaded into its FPSIMD registers;61* (b) for each CPU, we need to remember which task's userland FPSIMD state has62* been loaded into its FPSIMD registers most recently, or whether it has63* been used to perform kernel mode NEON in the meantime.64*65* For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to66* the id of the current CPU every time the state is loaded onto a CPU. For (b),67* we add the per-cpu variable 'fpsimd_last_state' (below), which contains the68* address of the userland FPSIMD state of the task that was loaded onto the CPU69* the most recently, or NULL if kernel mode NEON has been performed after that.70*71* With this in place, we no longer have to restore the next FPSIMD state right72* when switching between tasks. Instead, we can defer this check to userland73* resume, at which time we verify whether the CPU's fpsimd_last_state and the74* task's fpsimd_cpu are still mutually in sync. If this is the case, we75* can omit the FPSIMD restore.76*77* As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to78* indicate whether or not the userland FPSIMD state of the current task is79* present in the registers. The flag is set unless the FPSIMD registers of this80* CPU currently contain the most recent userland FPSIMD state of the current81* task. If the task is behaving as a VMM, then this is will be managed by82* KVM which will clear it to indicate that the vcpu FPSIMD state is currently83* loaded on the CPU, allowing the state to be saved if a FPSIMD-aware84* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and85* flag the register state as invalid.86*87* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be88* called from softirq context, which will save the task's FPSIMD context back89* to task_struct. To prevent this from racing with the manipulation of the90* task's FPSIMD state from task context and thereby corrupting the state, it91* is necessary to protect any manipulation of a task's fpsimd_state or92* TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend93* softirq servicing entirely until put_cpu_fpsimd_context() is called.94*95* For a certain task, the sequence may look something like this:96* - the task gets scheduled in; if both the task's fpsimd_cpu field97* contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu98* variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is99* cleared, otherwise it is set;100*101* - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's102* userland FPSIMD state is copied from memory to the registers, the task's103* fpsimd_cpu field is set to the id of the current CPU, the current104* CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the105* TIF_FOREIGN_FPSTATE flag is cleared;106*107* - the task executes an ordinary syscall; upon return to userland, the108* TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is109* restored;110*111* - the task executes a syscall which executes some NEON instructions; this is112* preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD113* register contents to memory, clears the fpsimd_last_state per-cpu variable114* and sets the TIF_FOREIGN_FPSTATE flag;115*116* - the task gets preempted after kernel_neon_end() is called; as we have not117* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so118* whatever is in the FPSIMD registers is not saved to memory, but discarded.119*/120121DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);122123__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {124#ifdef CONFIG_ARM64_SVE125[ARM64_VEC_SVE] = {126.type = ARM64_VEC_SVE,127.name = "SVE",128.min_vl = SVE_VL_MIN,129.max_vl = SVE_VL_MIN,130.max_virtualisable_vl = SVE_VL_MIN,131},132#endif133#ifdef CONFIG_ARM64_SME134[ARM64_VEC_SME] = {135.type = ARM64_VEC_SME,136.name = "SME",137},138#endif139};140141static unsigned int vec_vl_inherit_flag(enum vec_type type)142{143switch (type) {144case ARM64_VEC_SVE:145return TIF_SVE_VL_INHERIT;146case ARM64_VEC_SME:147return TIF_SME_VL_INHERIT;148default:149WARN_ON_ONCE(1);150return 0;151}152}153154struct vl_config {155int __default_vl; /* Default VL for tasks */156};157158static struct vl_config vl_config[ARM64_VEC_MAX];159160static inline int get_default_vl(enum vec_type type)161{162return READ_ONCE(vl_config[type].__default_vl);163}164165#ifdef CONFIG_ARM64_SVE166167static inline int get_sve_default_vl(void)168{169return get_default_vl(ARM64_VEC_SVE);170}171172static inline void set_default_vl(enum vec_type type, int val)173{174WRITE_ONCE(vl_config[type].__default_vl, val);175}176177static inline void set_sve_default_vl(int val)178{179set_default_vl(ARM64_VEC_SVE, val);180}181182static u8 *efi_sve_state;183184#else /* ! CONFIG_ARM64_SVE */185186/* Dummy declaration for code that will be optimised out: */187extern u8 *efi_sve_state;188189#endif /* ! CONFIG_ARM64_SVE */190191#ifdef CONFIG_ARM64_SME192193static int get_sme_default_vl(void)194{195return get_default_vl(ARM64_VEC_SME);196}197198static void set_sme_default_vl(int val)199{200set_default_vl(ARM64_VEC_SME, val);201}202203static void sme_free(struct task_struct *);204205#else206207static inline void sme_free(struct task_struct *t) { }208209#endif210211static void fpsimd_bind_task_to_cpu(void);212213/*214* Claim ownership of the CPU FPSIMD context for use by the calling context.215*216* The caller may freely manipulate the FPSIMD context metadata until217* put_cpu_fpsimd_context() is called.218*219* On RT kernels local_bh_disable() is not sufficient because it only220* serializes soft interrupt related sections via a local lock, but stays221* preemptible. Disabling preemption is the right choice here as bottom222* half processing is always in thread context on RT kernels so it223* implicitly prevents bottom half processing as well.224*/225static void get_cpu_fpsimd_context(void)226{227if (!IS_ENABLED(CONFIG_PREEMPT_RT))228local_bh_disable();229else230preempt_disable();231}232233/*234* Release the CPU FPSIMD context.235*236* Must be called from a context in which get_cpu_fpsimd_context() was237* previously called, with no call to put_cpu_fpsimd_context() in the238* meantime.239*/240static void put_cpu_fpsimd_context(void)241{242if (!IS_ENABLED(CONFIG_PREEMPT_RT))243local_bh_enable();244else245preempt_enable();246}247248unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)249{250return task->thread.vl[type];251}252253void task_set_vl(struct task_struct *task, enum vec_type type,254unsigned long vl)255{256task->thread.vl[type] = vl;257}258259unsigned int task_get_vl_onexec(const struct task_struct *task,260enum vec_type type)261{262return task->thread.vl_onexec[type];263}264265void task_set_vl_onexec(struct task_struct *task, enum vec_type type,266unsigned long vl)267{268task->thread.vl_onexec[type] = vl;269}270271/*272* TIF_SME controls whether a task can use SME without trapping while273* in userspace, when TIF_SME is set then we must have storage274* allocated in sve_state and sme_state to store the contents of both ZA275* and the SVE registers for both streaming and non-streaming modes.276*277* If both SVCR.ZA and SVCR.SM are disabled then at any point we278* may disable TIF_SME and reenable traps.279*/280281282/*283* TIF_SVE controls whether a task can use SVE without trapping while284* in userspace, and also (together with TIF_SME) the way a task's285* FPSIMD/SVE state is stored in thread_struct.286*287* The kernel uses this flag to track whether a user task is actively288* using SVE, and therefore whether full SVE register state needs to289* be tracked. If not, the cheaper FPSIMD context handling code can290* be used instead of the more costly SVE equivalents.291*292* * TIF_SVE or SVCR.SM set:293*294* The task can execute SVE instructions while in userspace without295* trapping to the kernel.296*297* During any syscall, the kernel may optionally clear TIF_SVE and298* discard the vector state except for the FPSIMD subset.299*300* * TIF_SVE clear:301*302* An attempt by the user task to execute an SVE instruction causes303* do_sve_acc() to be called, which does some preparation and then304* sets TIF_SVE.305*306* During any syscall, the kernel may optionally clear TIF_SVE and307* discard the vector state except for the FPSIMD subset.308*309* The data will be stored in one of two formats:310*311* * FPSIMD only - FP_STATE_FPSIMD:312*313* When the FPSIMD only state stored task->thread.fp_type is set to314* FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in315* task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are316* logically zero but not stored anywhere; P0-P15 and FFR are not317* stored and have unspecified values from userspace's point of318* view. For hygiene purposes, the kernel zeroes them on next use,319* but userspace is discouraged from relying on this.320*321* task->thread.sve_state does not need to be non-NULL, valid or any322* particular size: it must not be dereferenced and any data stored323* there should be considered stale and not referenced.324*325* * SVE state - FP_STATE_SVE:326*327* When the full SVE state is stored task->thread.fp_type is set to328* FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the329* corresponding Zn), P0-P15 and FFR are encoded in in330* task->thread.sve_state, formatted appropriately for vector331* length task->thread.sve_vl or, if SVCR.SM is set,332* task->thread.sme_vl. The storage for the vector registers in333* task->thread.uw.fpsimd_state should be ignored.334*335* task->thread.sve_state must point to a valid buffer at least336* sve_state_size(task) bytes in size. The data stored in337* task->thread.uw.fpsimd_state.vregs should be considered stale338* and not referenced.339*340* * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state341* irrespective of whether TIF_SVE is clear or set, since these are342* not vector length dependent.343*/344345/*346* Update current's FPSIMD/SVE registers from thread_struct.347*348* This function should be called only when the FPSIMD/SVE state in349* thread_struct is known to be up to date, when preparing to enter350* userspace.351*/352static void task_fpsimd_load(void)353{354bool restore_sve_regs = false;355bool restore_ffr;356357WARN_ON(!system_supports_fpsimd());358WARN_ON(preemptible());359WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));360361if (system_supports_sve() || system_supports_sme()) {362switch (current->thread.fp_type) {363case FP_STATE_FPSIMD:364/* Stop tracking SVE for this task until next use. */365clear_thread_flag(TIF_SVE);366break;367case FP_STATE_SVE:368if (!thread_sm_enabled(¤t->thread))369WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE));370371if (test_thread_flag(TIF_SVE))372sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);373374restore_sve_regs = true;375restore_ffr = true;376break;377default:378/*379* This indicates either a bug in380* fpsimd_save_user_state() or memory corruption, we381* should always record an explicit format382* when we save. We always at least have the383* memory allocated for FPSIMD registers so384* try that and hope for the best.385*/386WARN_ON_ONCE(1);387clear_thread_flag(TIF_SVE);388break;389}390}391392/* Restore SME, override SVE register configuration if needed */393if (system_supports_sme()) {394unsigned long sme_vl = task_get_sme_vl(current);395396/* Ensure VL is set up for restoring data */397if (test_thread_flag(TIF_SME))398sme_set_vq(sve_vq_from_vl(sme_vl) - 1);399400write_sysreg_s(current->thread.svcr, SYS_SVCR);401402if (thread_za_enabled(¤t->thread))403sme_load_state(current->thread.sme_state,404system_supports_sme2());405406if (thread_sm_enabled(¤t->thread))407restore_ffr = system_supports_fa64();408}409410if (system_supports_fpmr())411write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);412413if (restore_sve_regs) {414WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);415sve_load_state(sve_pffr(¤t->thread),416¤t->thread.uw.fpsimd_state.fpsr,417restore_ffr);418} else {419WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);420fpsimd_load_state(¤t->thread.uw.fpsimd_state);421}422}423424/*425* Ensure FPSIMD/SVE storage in memory for the loaded context is up to426* date with respect to the CPU registers. Note carefully that the427* current context is the context last bound to the CPU stored in428* last, if KVM is involved this may be the guest VM context rather429* than the host thread for the VM pointed to by current. This means430* that we must always reference the state storage via last rather431* than via current, if we are saving KVM state then it will have432* ensured that the type of registers to save is set in last->to_save.433*/434static void fpsimd_save_user_state(void)435{436struct cpu_fp_state const *last =437this_cpu_ptr(&fpsimd_last_state);438/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */439bool save_sve_regs = false;440bool save_ffr;441unsigned int vl;442443WARN_ON(!system_supports_fpsimd());444WARN_ON(preemptible());445446if (test_thread_flag(TIF_FOREIGN_FPSTATE))447return;448449if (system_supports_fpmr())450*(last->fpmr) = read_sysreg_s(SYS_FPMR);451452/*453* Save SVE state if it is live.454*455* The syscall ABI discards live SVE state at syscall entry. When456* entering a syscall, fpsimd_syscall_enter() sets to_save to457* FP_STATE_FPSIMD to allow the SVE state to be lazily discarded until458* either new SVE state is loaded+bound or fpsimd_syscall_exit() is459* called prior to a return to userspace.460*/461if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) ||462last->to_save == FP_STATE_SVE) {463save_sve_regs = true;464save_ffr = true;465vl = last->sve_vl;466}467468if (system_supports_sme()) {469u64 *svcr = last->svcr;470471*svcr = read_sysreg_s(SYS_SVCR);472473if (*svcr & SVCR_ZA_MASK)474sme_save_state(last->sme_state,475system_supports_sme2());476477/* If we are in streaming mode override regular SVE. */478if (*svcr & SVCR_SM_MASK) {479save_sve_regs = true;480save_ffr = system_supports_fa64();481vl = last->sme_vl;482}483}484485if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {486/* Get the configured VL from RDVL, will account for SM */487if (WARN_ON(sve_get_vl() != vl)) {488/*489* Can't save the user regs, so current would490* re-enter user with corrupt state.491* There's no way to recover, so kill it:492*/493force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);494return;495}496497sve_save_state((char *)last->sve_state +498sve_ffr_offset(vl),499&last->st->fpsr, save_ffr);500*last->fp_type = FP_STATE_SVE;501} else {502fpsimd_save_state(last->st);503*last->fp_type = FP_STATE_FPSIMD;504}505}506507/*508* All vector length selection from userspace comes through here.509* We're on a slow path, so some sanity-checks are included.510* If things go wrong there's a bug somewhere, but try to fall back to a511* safe choice.512*/513static unsigned int find_supported_vector_length(enum vec_type type,514unsigned int vl)515{516struct vl_info *info = &vl_info[type];517int bit;518int max_vl = info->max_vl;519520if (WARN_ON(!sve_vl_valid(vl)))521vl = info->min_vl;522523if (WARN_ON(!sve_vl_valid(max_vl)))524max_vl = info->min_vl;525526if (vl > max_vl)527vl = max_vl;528if (vl < info->min_vl)529vl = info->min_vl;530531bit = find_next_bit(info->vq_map, SVE_VQ_MAX,532__vq_to_bit(sve_vq_from_vl(vl)));533return sve_vl_from_vq(__bit_to_vq(bit));534}535536#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)537538static int vec_proc_do_default_vl(const struct ctl_table *table, int write,539void *buffer, size_t *lenp, loff_t *ppos)540{541struct vl_info *info = table->extra1;542enum vec_type type = info->type;543int ret;544int vl = get_default_vl(type);545struct ctl_table tmp_table = {546.data = &vl,547.maxlen = sizeof(vl),548};549550ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);551if (ret || !write)552return ret;553554/* Writing -1 has the special meaning "set to max": */555if (vl == -1)556vl = info->max_vl;557558if (!sve_vl_valid(vl))559return -EINVAL;560561set_default_vl(type, find_supported_vector_length(type, vl));562return 0;563}564565static const struct ctl_table sve_default_vl_table[] = {566{567.procname = "sve_default_vector_length",568.mode = 0644,569.proc_handler = vec_proc_do_default_vl,570.extra1 = &vl_info[ARM64_VEC_SVE],571},572};573574static int __init sve_sysctl_init(void)575{576if (system_supports_sve())577if (!register_sysctl("abi", sve_default_vl_table))578return -EINVAL;579580return 0;581}582583#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */584static int __init sve_sysctl_init(void) { return 0; }585#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */586587#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)588static const struct ctl_table sme_default_vl_table[] = {589{590.procname = "sme_default_vector_length",591.mode = 0644,592.proc_handler = vec_proc_do_default_vl,593.extra1 = &vl_info[ARM64_VEC_SME],594},595};596597static int __init sme_sysctl_init(void)598{599if (system_supports_sme())600if (!register_sysctl("abi", sme_default_vl_table))601return -EINVAL;602603return 0;604}605606#else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */607static int __init sme_sysctl_init(void) { return 0; }608#endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */609610#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \611(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))612613#ifdef CONFIG_CPU_BIG_ENDIAN614static __uint128_t arm64_cpu_to_le128(__uint128_t x)615{616u64 a = swab64(x);617u64 b = swab64(x >> 64);618619return ((__uint128_t)a << 64) | b;620}621#else622static __uint128_t arm64_cpu_to_le128(__uint128_t x)623{624return x;625}626#endif627628#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)629630static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,631unsigned int vq)632{633unsigned int i;634__uint128_t *p;635636for (i = 0; i < SVE_NUM_ZREGS; ++i) {637p = (__uint128_t *)ZREG(sst, vq, i);638*p = arm64_cpu_to_le128(fst->vregs[i]);639}640}641642/*643* Transfer the FPSIMD state in task->thread.uw.fpsimd_state to644* task->thread.sve_state.645*646* Task can be a non-runnable task, or current. In the latter case,647* the caller must have ownership of the cpu FPSIMD context before calling648* this function.649* task->thread.sve_state must point to at least sve_state_size(task)650* bytes of allocated kernel memory.651* task->thread.uw.fpsimd_state must be up to date before calling this652* function.653*/654static inline void fpsimd_to_sve(struct task_struct *task)655{656unsigned int vq;657void *sst = task->thread.sve_state;658struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;659660if (!system_supports_sve() && !system_supports_sme())661return;662663vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));664__fpsimd_to_sve(sst, fst, vq);665}666667/*668* Transfer the SVE state in task->thread.sve_state to669* task->thread.uw.fpsimd_state.670*671* Task can be a non-runnable task, or current. In the latter case,672* the caller must have ownership of the cpu FPSIMD context before calling673* this function.674* task->thread.sve_state must point to at least sve_state_size(task)675* bytes of allocated kernel memory.676* task->thread.sve_state must be up to date before calling this function.677*/678static inline void sve_to_fpsimd(struct task_struct *task)679{680unsigned int vq, vl;681void const *sst = task->thread.sve_state;682struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;683unsigned int i;684__uint128_t const *p;685686if (!system_supports_sve() && !system_supports_sme())687return;688689vl = thread_get_cur_vl(&task->thread);690vq = sve_vq_from_vl(vl);691for (i = 0; i < SVE_NUM_ZREGS; ++i) {692p = (__uint128_t const *)ZREG(sst, vq, i);693fst->vregs[i] = arm64_le128_to_cpu(*p);694}695}696697static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd)698{699memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs));700}701702/*703* Simulate the effects of an SMSTOP SM instruction.704*/705void task_smstop_sm(struct task_struct *task)706{707if (!thread_sm_enabled(&task->thread))708return;709710__fpsimd_zero_vregs(&task->thread.uw.fpsimd_state);711task->thread.uw.fpsimd_state.fpsr = 0x0800009f;712if (system_supports_fpmr())713task->thread.uw.fpmr = 0;714715task->thread.svcr &= ~SVCR_SM_MASK;716task->thread.fp_type = FP_STATE_FPSIMD;717}718719void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)720{721write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,722SYS_SCTLR_EL1);723}724725#ifdef CONFIG_ARM64_SVE726static void sve_free(struct task_struct *task)727{728kfree(task->thread.sve_state);729task->thread.sve_state = NULL;730}731732/*733* Ensure that task->thread.sve_state is allocated and sufficiently large.734*735* This function should be used only in preparation for replacing736* task->thread.sve_state with new data. The memory is always zeroed737* here to prevent stale data from showing through: this is done in738* the interest of testability and predictability: except in the739* do_sve_acc() case, there is no ABI requirement to hide stale data740* written previously be task.741*/742void sve_alloc(struct task_struct *task, bool flush)743{744if (task->thread.sve_state) {745if (flush)746memset(task->thread.sve_state, 0,747sve_state_size(task));748return;749}750751/* This is a small allocation (maximum ~8KB) and Should Not Fail. */752task->thread.sve_state =753kzalloc(sve_state_size(task), GFP_KERNEL);754}755756/*757* Ensure that task->thread.uw.fpsimd_state is up to date with respect to the758* task's currently effective FPSIMD/SVE state.759*760* The task's FPSIMD/SVE/SME state must not be subject to concurrent761* manipulation.762*/763void fpsimd_sync_from_effective_state(struct task_struct *task)764{765if (task->thread.fp_type == FP_STATE_SVE)766sve_to_fpsimd(task);767}768769/*770* Ensure that the task's currently effective FPSIMD/SVE state is up to date771* with respect to task->thread.uw.fpsimd_state, zeroing any effective772* non-FPSIMD (S)SVE state.773*774* The task's FPSIMD/SVE/SME state must not be subject to concurrent775* manipulation.776*/777void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task)778{779unsigned int vq;780void *sst = task->thread.sve_state;781struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;782783if (task->thread.fp_type != FP_STATE_SVE)784return;785786vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));787788memset(sst, 0, SVE_SIG_REGS_SIZE(vq));789__fpsimd_to_sve(sst, fst, vq);790}791792static int change_live_vector_length(struct task_struct *task,793enum vec_type type,794unsigned long vl)795{796unsigned int sve_vl = task_get_sve_vl(task);797unsigned int sme_vl = task_get_sme_vl(task);798void *sve_state = NULL, *sme_state = NULL;799800if (type == ARM64_VEC_SME)801sme_vl = vl;802else803sve_vl = vl;804805/*806* Allocate the new sve_state and sme_state before freeing the old807* copies so that allocation failure can be handled without needing to808* mutate the task's state in any way.809*810* Changes to the SVE vector length must not discard live ZA state or811* clear PSTATE.ZA, as userspace code which is unaware of the AAPCS64812* ZA lazy saving scheme may attempt to change the SVE vector length813* while unsaved/dormant ZA state exists.814*/815sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL);816if (!sve_state)817goto out_mem;818819if (type == ARM64_VEC_SME) {820sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL);821if (!sme_state)822goto out_mem;823}824825if (task == current)826fpsimd_save_and_flush_current_state();827else828fpsimd_flush_task_state(task);829830/*831* Always preserve PSTATE.SM and the effective FPSIMD state, zeroing832* other SVE state.833*/834fpsimd_sync_from_effective_state(task);835task_set_vl(task, type, vl);836kfree(task->thread.sve_state);837task->thread.sve_state = sve_state;838fpsimd_sync_to_effective_state_zeropad(task);839840if (type == ARM64_VEC_SME) {841task->thread.svcr &= ~SVCR_ZA_MASK;842kfree(task->thread.sme_state);843task->thread.sme_state = sme_state;844}845846return 0;847848out_mem:849kfree(sve_state);850kfree(sme_state);851return -ENOMEM;852}853854int vec_set_vector_length(struct task_struct *task, enum vec_type type,855unsigned long vl, unsigned long flags)856{857bool onexec = flags & PR_SVE_SET_VL_ONEXEC;858bool inherit = flags & PR_SVE_VL_INHERIT;859860if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |861PR_SVE_SET_VL_ONEXEC))862return -EINVAL;863864if (!sve_vl_valid(vl))865return -EINVAL;866867/*868* Clamp to the maximum vector length that VL-agnostic code869* can work with. A flag may be assigned in the future to870* allow setting of larger vector lengths without confusing871* older software.872*/873if (vl > VL_ARCH_MAX)874vl = VL_ARCH_MAX;875876vl = find_supported_vector_length(type, vl);877878if (!onexec && vl != task_get_vl(task, type)) {879if (change_live_vector_length(task, type, vl))880return -ENOMEM;881}882883if (onexec || inherit)884task_set_vl_onexec(task, type, vl);885else886/* Reset VL to system default on next exec: */887task_set_vl_onexec(task, type, 0);888889update_tsk_thread_flag(task, vec_vl_inherit_flag(type),890flags & PR_SVE_VL_INHERIT);891892return 0;893}894895/*896* Encode the current vector length and flags for return.897* This is only required for prctl(): ptrace has separate fields.898* SVE and SME use the same bits for _ONEXEC and _INHERIT.899*900* flags are as for vec_set_vector_length().901*/902static int vec_prctl_status(enum vec_type type, unsigned long flags)903{904int ret;905906if (flags & PR_SVE_SET_VL_ONEXEC)907ret = task_get_vl_onexec(current, type);908else909ret = task_get_vl(current, type);910911if (test_thread_flag(vec_vl_inherit_flag(type)))912ret |= PR_SVE_VL_INHERIT;913914return ret;915}916917/* PR_SVE_SET_VL */918int sve_set_current_vl(unsigned long arg)919{920unsigned long vl, flags;921int ret;922923vl = arg & PR_SVE_VL_LEN_MASK;924flags = arg & ~vl;925926if (!system_supports_sve() || is_compat_task())927return -EINVAL;928929ret = vec_set_vector_length(current, ARM64_VEC_SVE, vl, flags);930if (ret)931return ret;932933return vec_prctl_status(ARM64_VEC_SVE, flags);934}935936/* PR_SVE_GET_VL */937int sve_get_current_vl(void)938{939if (!system_supports_sve() || is_compat_task())940return -EINVAL;941942return vec_prctl_status(ARM64_VEC_SVE, 0);943}944945#ifdef CONFIG_ARM64_SME946/* PR_SME_SET_VL */947int sme_set_current_vl(unsigned long arg)948{949unsigned long vl, flags;950int ret;951952vl = arg & PR_SME_VL_LEN_MASK;953flags = arg & ~vl;954955if (!system_supports_sme() || is_compat_task())956return -EINVAL;957958ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);959if (ret)960return ret;961962return vec_prctl_status(ARM64_VEC_SME, flags);963}964965/* PR_SME_GET_VL */966int sme_get_current_vl(void)967{968if (!system_supports_sme() || is_compat_task())969return -EINVAL;970971return vec_prctl_status(ARM64_VEC_SME, 0);972}973#endif /* CONFIG_ARM64_SME */974975static void vec_probe_vqs(struct vl_info *info,976DECLARE_BITMAP(map, SVE_VQ_MAX))977{978unsigned int vq, vl;979980bitmap_zero(map, SVE_VQ_MAX);981982for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {983write_vl(info->type, vq - 1); /* self-syncing */984985switch (info->type) {986case ARM64_VEC_SVE:987vl = sve_get_vl();988break;989case ARM64_VEC_SME:990vl = sme_get_vl();991break;992default:993vl = 0;994break;995}996997/* Minimum VL identified? */998if (sve_vq_from_vl(vl) > vq)999break;10001001vq = sve_vq_from_vl(vl); /* skip intervening lengths */1002set_bit(__vq_to_bit(vq), map);1003}1004}10051006/*1007* Initialise the set of known supported VQs for the boot CPU.1008* This is called during kernel boot, before secondary CPUs are brought up.1009*/1010void __init vec_init_vq_map(enum vec_type type)1011{1012struct vl_info *info = &vl_info[type];1013vec_probe_vqs(info, info->vq_map);1014bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);1015}10161017/*1018* If we haven't committed to the set of supported VQs yet, filter out1019* those not supported by the current CPU.1020* This function is called during the bring-up of early secondary CPUs only.1021*/1022void vec_update_vq_map(enum vec_type type)1023{1024struct vl_info *info = &vl_info[type];1025DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);10261027vec_probe_vqs(info, tmp_map);1028bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);1029bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,1030SVE_VQ_MAX);1031}10321033/*1034* Check whether the current CPU supports all VQs in the committed set.1035* This function is called during the bring-up of late secondary CPUs only.1036*/1037int vec_verify_vq_map(enum vec_type type)1038{1039struct vl_info *info = &vl_info[type];1040DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);1041unsigned long b;10421043vec_probe_vqs(info, tmp_map);10441045bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);1046if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {1047pr_warn("%s: cpu%d: Required vector length(s) missing\n",1048info->name, smp_processor_id());1049return -EINVAL;1050}10511052if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())1053return 0;10541055/*1056* For KVM, it is necessary to ensure that this CPU doesn't1057* support any vector length that guests may have probed as1058* unsupported.1059*/10601061/* Recover the set of supported VQs: */1062bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);1063/* Find VQs supported that are not globally supported: */1064bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX);10651066/* Find the lowest such VQ, if any: */1067b = find_last_bit(tmp_map, SVE_VQ_MAX);1068if (b >= SVE_VQ_MAX)1069return 0; /* no mismatches */10701071/*1072* Mismatches above sve_max_virtualisable_vl are fine, since1073* no guest is allowed to configure ZCR_EL2.LEN to exceed this:1074*/1075if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) {1076pr_warn("%s: cpu%d: Unsupported vector length(s) present\n",1077info->name, smp_processor_id());1078return -EINVAL;1079}10801081return 0;1082}10831084static void __init sve_efi_setup(void)1085{1086int max_vl = 0;1087int i;10881089if (!IS_ENABLED(CONFIG_EFI))1090return;10911092for (i = 0; i < ARRAY_SIZE(vl_info); i++)1093max_vl = max(vl_info[i].max_vl, max_vl);10941095/*1096* alloc_percpu() warns and prints a backtrace if this goes wrong.1097* This is evidence of a crippled system and we are returning void,1098* so no attempt is made to handle this situation here.1099*/1100if (!sve_vl_valid(max_vl))1101goto fail;11021103efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)),1104GFP_KERNEL);1105if (!efi_sve_state)1106goto fail;11071108return;11091110fail:1111panic("Cannot allocate memory for EFI SVE save/restore");1112}11131114void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)1115{1116write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);1117isb();11181119write_sysreg_s(0, SYS_ZCR_EL1);1120}11211122void __init sve_setup(void)1123{1124struct vl_info *info = &vl_info[ARM64_VEC_SVE];1125DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);1126unsigned long b;1127int max_bit;11281129if (!system_supports_sve())1130return;11311132/*1133* The SVE architecture mandates support for 128-bit vectors,1134* so sve_vq_map must have at least SVE_VQ_MIN set.1135* If something went wrong, at least try to patch it up:1136*/1137if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))1138set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);11391140max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);1141info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));11421143/*1144* For the default VL, pick the maximum supported value <= 64.1145* VL == 64 is guaranteed not to grow the signal frame.1146*/1147set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64));11481149bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map,1150SVE_VQ_MAX);11511152b = find_last_bit(tmp_map, SVE_VQ_MAX);1153if (b >= SVE_VQ_MAX)1154/* No non-virtualisable VLs found */1155info->max_virtualisable_vl = SVE_VQ_MAX;1156else if (WARN_ON(b == SVE_VQ_MAX - 1))1157/* No virtualisable VLs? This is architecturally forbidden. */1158info->max_virtualisable_vl = SVE_VQ_MIN;1159else /* b + 1 < SVE_VQ_MAX */1160info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));11611162if (info->max_virtualisable_vl > info->max_vl)1163info->max_virtualisable_vl = info->max_vl;11641165pr_info("%s: maximum available vector length %u bytes per vector\n",1166info->name, info->max_vl);1167pr_info("%s: default vector length %u bytes per vector\n",1168info->name, get_sve_default_vl());11691170/* KVM decides whether to support mismatched systems. Just warn here: */1171if (sve_max_virtualisable_vl() < sve_max_vl())1172pr_warn("%s: unvirtualisable vector lengths present\n",1173info->name);11741175sve_efi_setup();1176}11771178/*1179* Called from the put_task_struct() path, which cannot get here1180* unless dead_task is really dead and not schedulable.1181*/1182void fpsimd_release_task(struct task_struct *dead_task)1183{1184sve_free(dead_task);1185sme_free(dead_task);1186}11871188#endif /* CONFIG_ARM64_SVE */11891190#ifdef CONFIG_ARM64_SME11911192/*1193* Ensure that task->thread.sme_state is allocated and sufficiently large.1194*1195* This function should be used only in preparation for replacing1196* task->thread.sme_state with new data. The memory is always zeroed1197* here to prevent stale data from showing through: this is done in1198* the interest of testability and predictability, the architecture1199* guarantees that when ZA is enabled it will be zeroed.1200*/1201void sme_alloc(struct task_struct *task, bool flush)1202{1203if (task->thread.sme_state) {1204if (flush)1205memset(task->thread.sme_state, 0,1206sme_state_size(task));1207return;1208}12091210/* This could potentially be up to 64K. */1211task->thread.sme_state =1212kzalloc(sme_state_size(task), GFP_KERNEL);1213}12141215static void sme_free(struct task_struct *task)1216{1217kfree(task->thread.sme_state);1218task->thread.sme_state = NULL;1219}12201221void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)1222{1223/* Set priority for all PEs to architecturally defined minimum */1224write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,1225SYS_SMPRI_EL1);12261227/* Allow SME in kernel */1228write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);1229isb();12301231/* Ensure all bits in SMCR are set to known values */1232write_sysreg_s(0, SYS_SMCR_EL1);12331234/* Allow EL0 to access TPIDR2 */1235write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);1236isb();1237}12381239void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p)1240{1241/* This must be enabled after SME */1242BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME);12431244/* Allow use of ZT0 */1245write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,1246SYS_SMCR_EL1);1247}12481249void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p)1250{1251/* This must be enabled after SME */1252BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME);12531254/* Allow use of FA64 */1255write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,1256SYS_SMCR_EL1);1257}12581259void __init sme_setup(void)1260{1261struct vl_info *info = &vl_info[ARM64_VEC_SME];1262int min_bit, max_bit;12631264if (!system_supports_sme())1265return;12661267/*1268* SME doesn't require any particular vector length be1269* supported but it does require at least one. We should have1270* disabled the feature entirely while bringing up CPUs but1271* let's double check here. The bitmap is SVE_VQ_MAP sized for1272* sharing with SVE.1273*/1274WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));12751276min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);1277info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));12781279max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);1280info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));12811282WARN_ON(info->min_vl > info->max_vl);12831284/*1285* For the default VL, pick the maximum supported value <= 321286* (256 bits) if there is one since this is guaranteed not to1287* grow the signal frame when in streaming mode, otherwise the1288* minimum available VL will be used.1289*/1290set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));12911292pr_info("SME: minimum available vector length %u bytes per vector\n",1293info->min_vl);1294pr_info("SME: maximum available vector length %u bytes per vector\n",1295info->max_vl);1296pr_info("SME: default vector length %u bytes per vector\n",1297get_sme_default_vl());1298}12991300void sme_suspend_exit(void)1301{1302u64 smcr = 0;13031304if (!system_supports_sme())1305return;13061307if (system_supports_fa64())1308smcr |= SMCR_ELx_FA64;1309if (system_supports_sme2())1310smcr |= SMCR_ELx_EZT0;13111312write_sysreg_s(smcr, SYS_SMCR_EL1);1313write_sysreg_s(0, SYS_SMPRI_EL1);1314}13151316#endif /* CONFIG_ARM64_SME */13171318static void sve_init_regs(void)1319{1320/*1321* Convert the FPSIMD state to SVE, zeroing all the state that1322* is not shared with FPSIMD. If (as is likely) the current1323* state is live in the registers then do this there and1324* update our metadata for the current task including1325* disabling the trap, otherwise update our in-memory copy.1326* We are guaranteed to not be in streaming mode, we can only1327* take a SVE trap when not in streaming mode and we can't be1328* in streaming mode when taking a SME trap.1329*/1330if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {1331unsigned long vq_minus_one =1332sve_vq_from_vl(task_get_sve_vl(current)) - 1;1333sve_set_vq(vq_minus_one);1334sve_flush_live(true, vq_minus_one);1335fpsimd_bind_task_to_cpu();1336} else {1337fpsimd_to_sve(current);1338current->thread.fp_type = FP_STATE_SVE;1339fpsimd_flush_task_state(current);1340}1341}13421343/*1344* Trapped SVE access1345*1346* Storage is allocated for the full SVE state, the current FPSIMD1347* register contents are migrated across, and the access trap is1348* disabled.1349*1350* TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()1351* would have disabled the SVE access trap for userspace during1352* ret_to_user, making an SVE access trap impossible in that case.1353*/1354void do_sve_acc(unsigned long esr, struct pt_regs *regs)1355{1356/* Even if we chose not to use SVE, the hardware could still trap: */1357if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {1358force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1359return;1360}13611362sve_alloc(current, true);1363if (!current->thread.sve_state) {1364force_sig(SIGKILL);1365return;1366}13671368get_cpu_fpsimd_context();13691370if (test_and_set_thread_flag(TIF_SVE))1371WARN_ON(1); /* SVE access shouldn't have trapped */13721373/*1374* Even if the task can have used streaming mode we can only1375* generate SVE access traps in normal SVE mode and1376* transitioning out of streaming mode may discard any1377* streaming mode state. Always clear the high bits to avoid1378* any potential errors tracking what is properly initialised.1379*/1380sve_init_regs();13811382put_cpu_fpsimd_context();1383}13841385/*1386* Trapped SME access1387*1388* Storage is allocated for the full SVE and SME state, the current1389* FPSIMD register contents are migrated to SVE if SVE is not already1390* active, and the access trap is disabled.1391*1392* TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()1393* would have disabled the SME access trap for userspace during1394* ret_to_user, making an SME access trap impossible in that case.1395*/1396void do_sme_acc(unsigned long esr, struct pt_regs *regs)1397{1398/* Even if we chose not to use SME, the hardware could still trap: */1399if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {1400force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1401return;1402}14031404/*1405* If this not a trap due to SME being disabled then something1406* is being used in the wrong mode, report as SIGILL.1407*/1408if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) {1409force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1410return;1411}14121413sve_alloc(current, false);1414sme_alloc(current, true);1415if (!current->thread.sve_state || !current->thread.sme_state) {1416force_sig(SIGKILL);1417return;1418}14191420get_cpu_fpsimd_context();14211422/* With TIF_SME userspace shouldn't generate any traps */1423if (test_and_set_thread_flag(TIF_SME))1424WARN_ON(1);14251426if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {1427unsigned long vq_minus_one =1428sve_vq_from_vl(task_get_sme_vl(current)) - 1;1429sme_set_vq(vq_minus_one);14301431fpsimd_bind_task_to_cpu();1432} else {1433fpsimd_flush_task_state(current);1434}14351436put_cpu_fpsimd_context();1437}14381439/*1440* Trapped FP/ASIMD access.1441*/1442void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)1443{1444/* Even if we chose not to use FPSIMD, the hardware could still trap: */1445if (!system_supports_fpsimd()) {1446force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1447return;1448}14491450/*1451* When FPSIMD is enabled, we should never take a trap unless something1452* has gone very wrong.1453*/1454BUG();1455}14561457/*1458* Raise a SIGFPE for the current process.1459*/1460void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)1461{1462unsigned int si_code = FPE_FLTUNK;14631464if (esr & ESR_ELx_FP_EXC_TFV) {1465if (esr & FPEXC_IOF)1466si_code = FPE_FLTINV;1467else if (esr & FPEXC_DZF)1468si_code = FPE_FLTDIV;1469else if (esr & FPEXC_OFF)1470si_code = FPE_FLTOVF;1471else if (esr & FPEXC_UFF)1472si_code = FPE_FLTUND;1473else if (esr & FPEXC_IXF)1474si_code = FPE_FLTRES;1475}14761477send_sig_fault(SIGFPE, si_code,1478(void __user *)instruction_pointer(regs),1479current);1480}14811482static void fpsimd_load_kernel_state(struct task_struct *task)1483{1484struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);14851486/*1487* Elide the load if this CPU holds the most recent kernel mode1488* FPSIMD context of the current task.1489*/1490if (last->st == &task->thread.kernel_fpsimd_state &&1491task->thread.kernel_fpsimd_cpu == smp_processor_id())1492return;14931494fpsimd_load_state(&task->thread.kernel_fpsimd_state);1495}14961497static void fpsimd_save_kernel_state(struct task_struct *task)1498{1499struct cpu_fp_state cpu_fp_state = {1500.st = &task->thread.kernel_fpsimd_state,1501.to_save = FP_STATE_FPSIMD,1502};15031504fpsimd_save_state(&task->thread.kernel_fpsimd_state);1505fpsimd_bind_state_to_cpu(&cpu_fp_state);15061507task->thread.kernel_fpsimd_cpu = smp_processor_id();1508}15091510/*1511* Invalidate any task's FPSIMD state that is present on this cpu.1512* The FPSIMD context should be acquired with get_cpu_fpsimd_context()1513* before calling this function.1514*/1515static void fpsimd_flush_cpu_state(void)1516{1517WARN_ON(!system_supports_fpsimd());1518__this_cpu_write(fpsimd_last_state.st, NULL);15191520/*1521* Leaving streaming mode enabled will cause issues for any kernel1522* NEON and leaving streaming mode or ZA enabled may increase power1523* consumption.1524*/1525if (system_supports_sme())1526sme_smstop();15271528set_thread_flag(TIF_FOREIGN_FPSTATE);1529}15301531void fpsimd_thread_switch(struct task_struct *next)1532{1533bool wrong_task, wrong_cpu;15341535if (!system_supports_fpsimd())1536return;15371538WARN_ON_ONCE(!irqs_disabled());15391540/* Save unsaved fpsimd state, if any: */1541if (test_thread_flag(TIF_KERNEL_FPSTATE))1542fpsimd_save_kernel_state(current);1543else1544fpsimd_save_user_state();15451546if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {1547fpsimd_flush_cpu_state();1548fpsimd_load_kernel_state(next);1549} else {1550/*1551* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's1552* state. For kernel threads, FPSIMD registers are never1553* loaded with user mode FPSIMD state and so wrong_task and1554* wrong_cpu will always be true.1555*/1556wrong_task = __this_cpu_read(fpsimd_last_state.st) !=1557&next->thread.uw.fpsimd_state;1558wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();15591560update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,1561wrong_task || wrong_cpu);1562}1563}15641565static void fpsimd_flush_thread_vl(enum vec_type type)1566{1567int vl, supported_vl;15681569/*1570* Reset the task vector length as required. This is where we1571* ensure that all user tasks have a valid vector length1572* configured: no kernel task can become a user task without1573* an exec and hence a call to this function. By the time the1574* first call to this function is made, all early hardware1575* probing is complete, so __sve_default_vl should be valid.1576* If a bug causes this to go wrong, we make some noise and1577* try to fudge thread.sve_vl to a safe value here.1578*/1579vl = task_get_vl_onexec(current, type);1580if (!vl)1581vl = get_default_vl(type);15821583if (WARN_ON(!sve_vl_valid(vl)))1584vl = vl_info[type].min_vl;15851586supported_vl = find_supported_vector_length(type, vl);1587if (WARN_ON(supported_vl != vl))1588vl = supported_vl;15891590task_set_vl(current, type, vl);15911592/*1593* If the task is not set to inherit, ensure that the vector1594* length will be reset by a subsequent exec:1595*/1596if (!test_thread_flag(vec_vl_inherit_flag(type)))1597task_set_vl_onexec(current, type, 0);1598}15991600void fpsimd_flush_thread(void)1601{1602void *sve_state = NULL;1603void *sme_state = NULL;16041605if (!system_supports_fpsimd())1606return;16071608get_cpu_fpsimd_context();16091610fpsimd_flush_task_state(current);1611memset(¤t->thread.uw.fpsimd_state, 0,1612sizeof(current->thread.uw.fpsimd_state));16131614if (system_supports_sve()) {1615clear_thread_flag(TIF_SVE);16161617/* Defer kfree() while in atomic context */1618sve_state = current->thread.sve_state;1619current->thread.sve_state = NULL;16201621fpsimd_flush_thread_vl(ARM64_VEC_SVE);1622}16231624if (system_supports_sme()) {1625clear_thread_flag(TIF_SME);16261627/* Defer kfree() while in atomic context */1628sme_state = current->thread.sme_state;1629current->thread.sme_state = NULL;16301631fpsimd_flush_thread_vl(ARM64_VEC_SME);1632current->thread.svcr = 0;1633}16341635if (system_supports_fpmr())1636current->thread.uw.fpmr = 0;16371638current->thread.fp_type = FP_STATE_FPSIMD;16391640put_cpu_fpsimd_context();1641kfree(sve_state);1642kfree(sme_state);1643}16441645/*1646* Save the userland FPSIMD state of 'current' to memory, but only if the state1647* currently held in the registers does in fact belong to 'current'1648*/1649void fpsimd_preserve_current_state(void)1650{1651if (!system_supports_fpsimd())1652return;16531654get_cpu_fpsimd_context();1655fpsimd_save_user_state();1656put_cpu_fpsimd_context();1657}16581659/*1660* Associate current's FPSIMD context with this cpu1661* The caller must have ownership of the cpu FPSIMD context before calling1662* this function.1663*/1664static void fpsimd_bind_task_to_cpu(void)1665{1666struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);16671668WARN_ON(!system_supports_fpsimd());1669last->st = ¤t->thread.uw.fpsimd_state;1670last->sve_state = current->thread.sve_state;1671last->sme_state = current->thread.sme_state;1672last->sve_vl = task_get_sve_vl(current);1673last->sme_vl = task_get_sme_vl(current);1674last->svcr = ¤t->thread.svcr;1675last->fpmr = ¤t->thread.uw.fpmr;1676last->fp_type = ¤t->thread.fp_type;1677last->to_save = FP_STATE_CURRENT;1678current->thread.fpsimd_cpu = smp_processor_id();16791680/*1681* Toggle SVE and SME trapping for userspace if needed, these1682* are serialsied by ret_to_user().1683*/1684if (system_supports_sme()) {1685if (test_thread_flag(TIF_SME))1686sme_user_enable();1687else1688sme_user_disable();1689}16901691if (system_supports_sve()) {1692if (test_thread_flag(TIF_SVE))1693sve_user_enable();1694else1695sve_user_disable();1696}1697}16981699void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)1700{1701struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);17021703WARN_ON(!system_supports_fpsimd());1704WARN_ON(!in_softirq() && !irqs_disabled());17051706*last = *state;1707}17081709/*1710* Load the userland FPSIMD state of 'current' from memory, but only if the1711* FPSIMD state already held in the registers is /not/ the most recent FPSIMD1712* state of 'current'. This is called when we are preparing to return to1713* userspace to ensure that userspace sees a good register state.1714*/1715void fpsimd_restore_current_state(void)1716{1717/*1718* TIF_FOREIGN_FPSTATE is set on the init task and copied by1719* arch_dup_task_struct() regardless of whether FP/SIMD is detected.1720* Thus user threads can have this set even when FP/SIMD hasn't been1721* detected.1722*1723* When FP/SIMD is detected, begin_new_exec() will set1724* TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(),1725* and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when1726* switching tasks. We detect FP/SIMD before we exec the first user1727* process, ensuring this has TIF_FOREIGN_FPSTATE set and1728* do_notify_resume() will call fpsimd_restore_current_state() to1729* install the user FP/SIMD context.1730*1731* When FP/SIMD is not detected, nothing else will clear or set1732* TIF_FOREIGN_FPSTATE prior to the first return to userspace, and1733* we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume()1734* looping forever calling fpsimd_restore_current_state().1735*/1736if (!system_supports_fpsimd()) {1737clear_thread_flag(TIF_FOREIGN_FPSTATE);1738return;1739}17401741get_cpu_fpsimd_context();17421743if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {1744task_fpsimd_load();1745fpsimd_bind_task_to_cpu();1746}17471748put_cpu_fpsimd_context();1749}17501751void fpsimd_update_current_state(struct user_fpsimd_state const *state)1752{1753if (WARN_ON(!system_supports_fpsimd()))1754return;17551756current->thread.uw.fpsimd_state = *state;1757if (current->thread.fp_type == FP_STATE_SVE)1758fpsimd_to_sve(current);1759}17601761/*1762* Invalidate live CPU copies of task t's FPSIMD state1763*1764* This function may be called with preemption enabled. The barrier()1765* ensures that the assignment to fpsimd_cpu is visible to any1766* preemption/softirq that could race with set_tsk_thread_flag(), so1767* that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared.1768*1769* The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any1770* subsequent code.1771*/1772void fpsimd_flush_task_state(struct task_struct *t)1773{1774t->thread.fpsimd_cpu = NR_CPUS;1775/*1776* If we don't support fpsimd, bail out after we have1777* reset the fpsimd_cpu for this task and clear the1778* FPSTATE.1779*/1780if (!system_supports_fpsimd())1781return;1782barrier();1783set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);17841785barrier();1786}17871788void fpsimd_save_and_flush_current_state(void)1789{1790if (!system_supports_fpsimd())1791return;17921793get_cpu_fpsimd_context();1794fpsimd_save_user_state();1795fpsimd_flush_task_state(current);1796put_cpu_fpsimd_context();1797}17981799/*1800* Save the FPSIMD state to memory and invalidate cpu view.1801* This function must be called with preemption disabled.1802*/1803void fpsimd_save_and_flush_cpu_state(void)1804{1805unsigned long flags;18061807if (!system_supports_fpsimd())1808return;1809WARN_ON(preemptible());1810local_irq_save(flags);1811fpsimd_save_user_state();1812fpsimd_flush_cpu_state();1813local_irq_restore(flags);1814}18151816#ifdef CONFIG_KERNEL_MODE_NEON18171818/*1819* Kernel-side NEON support functions1820*/18211822/*1823* kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling1824* context1825*1826* Must not be called unless may_use_simd() returns true.1827* Task context in the FPSIMD registers is saved back to memory as necessary.1828*1829* A matching call to kernel_neon_end() must be made before returning from the1830* calling context.1831*1832* The caller may freely use the FPSIMD registers until kernel_neon_end() is1833* called.1834*/1835void kernel_neon_begin(void)1836{1837if (WARN_ON(!system_supports_fpsimd()))1838return;18391840BUG_ON(!may_use_simd());18411842get_cpu_fpsimd_context();18431844/* Save unsaved fpsimd state, if any: */1845if (test_thread_flag(TIF_KERNEL_FPSTATE)) {1846BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());1847fpsimd_save_kernel_state(current);1848} else {1849fpsimd_save_user_state();18501851/*1852* Set the thread flag so that the kernel mode FPSIMD state1853* will be context switched along with the rest of the task1854* state.1855*1856* On non-PREEMPT_RT, softirqs may interrupt task level kernel1857* mode FPSIMD, but the task will not be preemptible so setting1858* TIF_KERNEL_FPSTATE for those would be both wrong (as it1859* would mark the task context FPSIMD state as requiring a1860* context switch) and unnecessary.1861*1862* On PREEMPT_RT, softirqs are serviced from a separate thread,1863* which is scheduled as usual, and this guarantees that these1864* softirqs are not interrupting use of the FPSIMD in kernel1865* mode in task context. So in this case, setting the flag here1866* is always appropriate.1867*/1868if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())1869set_thread_flag(TIF_KERNEL_FPSTATE);1870}18711872/* Invalidate any task state remaining in the fpsimd regs: */1873fpsimd_flush_cpu_state();18741875put_cpu_fpsimd_context();1876}1877EXPORT_SYMBOL_GPL(kernel_neon_begin);18781879/*1880* kernel_neon_end(): give the CPU FPSIMD registers back to the current task1881*1882* Must be called from a context in which kernel_neon_begin() was previously1883* called, with no call to kernel_neon_end() in the meantime.1884*1885* The caller must not use the FPSIMD registers after this function is called,1886* unless kernel_neon_begin() is called again in the meantime.1887*/1888void kernel_neon_end(void)1889{1890if (!system_supports_fpsimd())1891return;18921893/*1894* If we are returning from a nested use of kernel mode FPSIMD, restore1895* the task context kernel mode FPSIMD state. This can only happen when1896* running in softirq context on non-PREEMPT_RT.1897*/1898if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&1899test_thread_flag(TIF_KERNEL_FPSTATE))1900fpsimd_load_kernel_state(current);1901else1902clear_thread_flag(TIF_KERNEL_FPSTATE);1903}1904EXPORT_SYMBOL_GPL(kernel_neon_end);19051906#ifdef CONFIG_EFI19071908static struct user_fpsimd_state efi_fpsimd_state;1909static bool efi_fpsimd_state_used;1910static bool efi_sve_state_used;1911static bool efi_sm_state;19121913/*1914* EFI runtime services support functions1915*1916* The ABI for EFI runtime services allows EFI to use FPSIMD during the call.1917* This means that for EFI (and only for EFI), we have to assume that FPSIMD1918* is always used rather than being an optional accelerator.1919*1920* These functions provide the necessary support for ensuring FPSIMD1921* save/restore in the contexts from which EFI is used.1922*1923* Do not use them for any other purpose -- if tempted to do so, you are1924* either doing something wrong or you need to propose some refactoring.1925*/19261927/*1928* __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call1929*/1930void __efi_fpsimd_begin(void)1931{1932if (!system_supports_fpsimd())1933return;19341935WARN_ON(preemptible());19361937if (may_use_simd()) {1938kernel_neon_begin();1939} else {1940/*1941* If !efi_sve_state, SVE can't be in use yet and doesn't need1942* preserving:1943*/1944if (system_supports_sve() && efi_sve_state != NULL) {1945bool ffr = true;1946u64 svcr;19471948efi_sve_state_used = true;19491950if (system_supports_sme()) {1951svcr = read_sysreg_s(SYS_SVCR);19521953efi_sm_state = svcr & SVCR_SM_MASK;19541955/*1956* Unless we have FA64 FFR does not1957* exist in streaming mode.1958*/1959if (!system_supports_fa64())1960ffr = !(svcr & SVCR_SM_MASK);1961}19621963sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),1964&efi_fpsimd_state.fpsr, ffr);19651966if (system_supports_sme())1967sysreg_clear_set_s(SYS_SVCR,1968SVCR_SM_MASK, 0);19691970} else {1971fpsimd_save_state(&efi_fpsimd_state);1972}19731974efi_fpsimd_state_used = true;1975}1976}19771978/*1979* __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call1980*/1981void __efi_fpsimd_end(void)1982{1983if (!system_supports_fpsimd())1984return;19851986if (!efi_fpsimd_state_used) {1987kernel_neon_end();1988} else {1989if (system_supports_sve() && efi_sve_state_used) {1990bool ffr = true;19911992/*1993* Restore streaming mode; EFI calls are1994* normal function calls so should not return in1995* streaming mode.1996*/1997if (system_supports_sme()) {1998if (efi_sm_state) {1999sysreg_clear_set_s(SYS_SVCR,20000,2001SVCR_SM_MASK);20022003/*2004* Unless we have FA64 FFR does not2005* exist in streaming mode.2006*/2007if (!system_supports_fa64())2008ffr = false;2009}2010}20112012sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),2013&efi_fpsimd_state.fpsr, ffr);20142015efi_sve_state_used = false;2016} else {2017fpsimd_load_state(&efi_fpsimd_state);2018}20192020efi_fpsimd_state_used = false;2021}2022}20232024#endif /* CONFIG_EFI */20252026#endif /* CONFIG_KERNEL_MODE_NEON */20272028#ifdef CONFIG_CPU_PM2029static int fpsimd_cpu_pm_notifier(struct notifier_block *self,2030unsigned long cmd, void *v)2031{2032switch (cmd) {2033case CPU_PM_ENTER:2034fpsimd_save_and_flush_cpu_state();2035break;2036case CPU_PM_EXIT:2037break;2038case CPU_PM_ENTER_FAILED:2039default:2040return NOTIFY_DONE;2041}2042return NOTIFY_OK;2043}20442045static struct notifier_block fpsimd_cpu_pm_notifier_block = {2046.notifier_call = fpsimd_cpu_pm_notifier,2047};20482049static void __init fpsimd_pm_init(void)2050{2051cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);2052}20532054#else2055static inline void fpsimd_pm_init(void) { }2056#endif /* CONFIG_CPU_PM */20572058#ifdef CONFIG_HOTPLUG_CPU2059static int fpsimd_cpu_dead(unsigned int cpu)2060{2061per_cpu(fpsimd_last_state.st, cpu) = NULL;2062return 0;2063}20642065static inline void fpsimd_hotplug_init(void)2066{2067cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",2068NULL, fpsimd_cpu_dead);2069}20702071#else2072static inline void fpsimd_hotplug_init(void) { }2073#endif20742075void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)2076{2077unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;2078write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);2079isb();2080}20812082/*2083* FP/SIMD support code initialisation.2084*/2085static int __init fpsimd_init(void)2086{2087if (cpu_have_named_feature(FP)) {2088fpsimd_pm_init();2089fpsimd_hotplug_init();2090} else {2091pr_notice("Floating-point is not implemented\n");2092}20932094if (!cpu_have_named_feature(ASIMD))2095pr_notice("Advanced SIMD is not implemented\n");209620972098sve_sysctl_init();2099sme_sysctl_init();21002101return 0;2102}2103core_initcall(fpsimd_init);210421052106