// SPDX-License-Identifier: GPL-2.0-only1/*2*3* Copyright 2012 Paul Mackerras, IBM Corp. <[email protected]>4*/56#include <linux/types.h>7#include <linux/string.h>8#include <linux/kvm.h>9#include <linux/kvm_host.h>10#include <linux/kernel.h>11#include <asm/lppaca.h>12#include <asm/opal.h>13#include <asm/mce.h>14#include <asm/machdep.h>15#include <asm/cputhreads.h>16#include <asm/hmi.h>17#include <asm/kvm_ppc.h>1819/* SRR1 bits for machine check on POWER7 */20#define SRR1_MC_LDSTERR (1ul << (63-42))21#define SRR1_MC_IFETCH_SH (63-45)22#define SRR1_MC_IFETCH_MASK 0x723#define SRR1_MC_IFETCH_SLBPAR 2 /* SLB parity error */24#define SRR1_MC_IFETCH_SLBMULTI 3 /* SLB multi-hit */25#define SRR1_MC_IFETCH_SLBPARMULTI 4 /* SLB parity + multi-hit */26#define SRR1_MC_IFETCH_TLBMULTI 5 /* I-TLB multi-hit */2728/* DSISR bits for machine check on POWER7 */29#define DSISR_MC_DERAT_MULTI 0x800 /* D-ERAT multi-hit */30#define DSISR_MC_TLB_MULTI 0x400 /* D-TLB multi-hit */31#define DSISR_MC_SLB_PARITY 0x100 /* SLB parity error */32#define DSISR_MC_SLB_MULTI 0x080 /* SLB multi-hit */33#define DSISR_MC_SLB_PARMULTI 0x040 /* SLB parity + multi-hit */3435/* POWER7 SLB flush and reload */36static void reload_slb(struct kvm_vcpu *vcpu)37{38struct slb_shadow *slb;39unsigned long i, n;4041/* First clear out SLB */42asm volatile("slbmte %0,%0; slbia" : : "r" (0));4344/* Do they have an SLB shadow buffer registered? */45slb = vcpu->arch.slb_shadow.pinned_addr;46if (!slb)47return;4849/* Sanity check */50n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);51if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)52return;5354/* Load up the SLB from that */55for (i = 0; i < n; ++i) {56unsigned long rb = be64_to_cpu(slb->save_area[i].esid);57unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);5859rb = (rb & ~0xFFFul) | i; /* insert entry number */60asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));61}62}6364/*65* On POWER7, see if we can handle a machine check that occurred inside66* the guest in real mode, without switching to the host partition.67*/68static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)69{70unsigned long srr1 = vcpu->arch.shregs.msr;71long handled = 1;7273if (srr1 & SRR1_MC_LDSTERR) {74/* error on load/store */75unsigned long dsisr = vcpu->arch.shregs.dsisr;7677if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |78DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {79/* flush and reload SLB; flushes D-ERAT too */80reload_slb(vcpu);81dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |82DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);83}84if (dsisr & DSISR_MC_TLB_MULTI) {85tlbiel_all_lpid(vcpu->kvm->arch.radix);86dsisr &= ~DSISR_MC_TLB_MULTI;87}88/* Any other errors we don't understand? */89if (dsisr & 0xffffffffUL)90handled = 0;91}9293switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {94case 0:95break;96case SRR1_MC_IFETCH_SLBPAR:97case SRR1_MC_IFETCH_SLBMULTI:98case SRR1_MC_IFETCH_SLBPARMULTI:99reload_slb(vcpu);100break;101case SRR1_MC_IFETCH_TLBMULTI:102tlbiel_all_lpid(vcpu->kvm->arch.radix);103break;104default:105handled = 0;106}107108return handled;109}110111void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)112{113struct machine_check_event mce_evt;114long handled;115116if (vcpu->kvm->arch.fwnmi_enabled) {117/* FWNMI guests handle their own recovery */118handled = 0;119} else {120handled = kvmppc_realmode_mc_power7(vcpu);121}122123/*124* Now get the event and stash it in the vcpu struct so it can125* be handled by the primary thread in virtual mode. We can't126* call machine_check_queue_event() here if we are running on127* an offline secondary thread.128*/129if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {130if (handled && mce_evt.version == MCE_V1)131mce_evt.disposition = MCE_DISPOSITION_RECOVERED;132} else {133memset(&mce_evt, 0, sizeof(mce_evt));134}135136vcpu->arch.mce_evt = mce_evt;137}138139140long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu)141{142struct kvmppc_vcore *vc = vcpu->arch.vcore;143long ret = 0;144145/*146* Unapply and clear the offset first. That way, if the TB was not147* resynced then it will remain in host-offset, and if it was resynced148* then it is brought into host-offset. Then the tb offset is149* re-applied before continuing with the KVM exit.150*151* This way, we don't need to actually know whether not OPAL resynced152* the timebase or do any of the complicated dance that the P7/8153* path requires.154*/155if (vc->tb_offset_applied) {156u64 new_tb = mftb() - vc->tb_offset_applied;157mtspr(SPRN_TBU40, new_tb);158if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {159new_tb += 0x1000000;160mtspr(SPRN_TBU40, new_tb);161}162vc->tb_offset_applied = 0;163}164165local_paca->hmi_irqs++;166167if (hmi_handle_debugtrig(NULL) >= 0) {168ret = 1;169goto out;170}171172if (ppc_md.hmi_exception_early)173ppc_md.hmi_exception_early(NULL);174175out:176if (kvmppc_get_tb_offset(vcpu)) {177u64 new_tb = mftb() + vc->tb_offset;178mtspr(SPRN_TBU40, new_tb);179if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {180new_tb += 0x1000000;181mtspr(SPRN_TBU40, new_tb);182}183vc->tb_offset_applied = kvmppc_get_tb_offset(vcpu);184}185186return ret;187}188189/*190* The following subcore HMI handling is all only for pre-POWER9 CPUs.191*/192193/* Check if dynamic split is in force and return subcore size accordingly. */194static inline int kvmppc_cur_subcore_size(void)195{196if (local_paca->kvm_hstate.kvm_split_mode)197return local_paca->kvm_hstate.kvm_split_mode->subcore_size;198199return threads_per_subcore;200}201202void kvmppc_subcore_enter_guest(void)203{204int thread_id, subcore_id;205206thread_id = cpu_thread_in_core(local_paca->paca_index);207subcore_id = thread_id / kvmppc_cur_subcore_size();208209local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;210}211EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);212213void kvmppc_subcore_exit_guest(void)214{215int thread_id, subcore_id;216217thread_id = cpu_thread_in_core(local_paca->paca_index);218subcore_id = thread_id / kvmppc_cur_subcore_size();219220local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;221}222EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);223224static bool kvmppc_tb_resync_required(void)225{226if (test_and_set_bit(CORE_TB_RESYNC_REQ_BIT,227&local_paca->sibling_subcore_state->flags))228return false;229230return true;231}232233static void kvmppc_tb_resync_done(void)234{235clear_bit(CORE_TB_RESYNC_REQ_BIT,236&local_paca->sibling_subcore_state->flags);237}238239/*240* kvmppc_realmode_hmi_handler() is called only by primary thread during241* guest exit path.242*243* There are multiple reasons why HMI could occur, one of them is244* Timebase (TB) error. If this HMI is due to TB error, then TB would245* have been in stopped state. The opal hmi handler Will fix it and246* restore the TB value with host timebase value. For HMI caused due247* to non-TB errors, opal hmi handler will not touch/restore TB register248* and hence there won't be any change in TB value.249*250* Since we are not sure about the cause of this HMI, we can't be sure251* about the content of TB register whether it holds guest or host timebase252* value. Hence the idea is to resync the TB on every HMI, so that we253* know about the exact state of the TB value. Resync TB call will254* restore TB to host timebase.255*256* Things to consider:257* - On TB error, HMI interrupt is reported on all the threads of the core258* that has encountered TB error irrespective of split-core mode.259* - The very first thread on the core that get chance to fix TB error260* would rsync the TB with local chipTOD value.261* - The resync TB is a core level action i.e. it will sync all the TBs262* in that core independent of split-core mode. This means if we trigger263* TB sync from a thread from one subcore, it would affect TB values of264* sibling subcores of the same core.265*266* All threads need to co-ordinate before making opal hmi handler.267* All threads will use sibling_subcore_state->in_guest[] (shared by all268* threads in the core) in paca which holds information about whether269* sibling subcores are in Guest mode or host mode. The in_guest[] array270* is of size MAX_SUBCORE_PER_CORE=4, indexed using subcore id to set/unset271* subcore status. Only primary threads from each subcore is responsible272* to set/unset its designated array element while entering/exiting the273* guset.274*275* After invoking opal hmi handler call, one of the thread (of entire core)276* will need to resync the TB. Bit 63 from subcore state bitmap flags277* (sibling_subcore_state->flags) will be used to co-ordinate between278* primary threads to decide who takes up the responsibility.279*280* This is what we do:281* - Primary thread from each subcore tries to set resync required bit[63]282* of paca->sibling_subcore_state->flags.283* - The first primary thread that is able to set the flag takes the284* responsibility of TB resync. (Let us call it as thread leader)285* - All other threads which are in host will call286* wait_for_subcore_guest_exit() and wait for in_guest[0-3] from287* paca->sibling_subcore_state to get cleared.288* - All the primary thread will clear its subcore status from subcore289* state in_guest[] array respectively.290* - Once all primary threads clear in_guest[0-3], all of them will invoke291* opal hmi handler.292* - Now all threads will wait for TB resync to complete by invoking293* wait_for_tb_resync() except the thread leader.294* - Thread leader will do a TB resync by invoking opal_resync_timebase()295* call and the it will clear the resync required bit.296* - All other threads will now come out of resync wait loop and proceed297* with individual execution.298* - On return of this function, primary thread will signal all299* secondary threads to proceed.300* - All secondary threads will eventually call opal hmi handler on301* their exit path.302*303* Returns 1 if the timebase offset should be applied, 0 if not.304*/305306long kvmppc_realmode_hmi_handler(void)307{308bool resync_req;309310local_paca->hmi_irqs++;311312if (hmi_handle_debugtrig(NULL) >= 0)313return 1;314315/*316* By now primary thread has already completed guest->host317* partition switch but haven't signaled secondaries yet.318* All the secondary threads on this subcore is waiting319* for primary thread to signal them to go ahead.320*321* For threads from subcore which isn't in guest, they all will322* wait until all other subcores on this core exit the guest.323*324* Now set the resync required bit. If you are the first to325* set this bit then kvmppc_tb_resync_required() function will326* return true. For rest all other subcores327* kvmppc_tb_resync_required() will return false.328*329* If resync_req == true, then this thread is responsible to330* initiate TB resync after hmi handler has completed.331* All other threads on this core will wait until this thread332* clears the resync required bit flag.333*/334resync_req = kvmppc_tb_resync_required();335336/* Reset the subcore status to indicate it has exited guest */337kvmppc_subcore_exit_guest();338339/*340* Wait for other subcores on this core to exit the guest.341* All the primary threads and threads from subcore that are342* not in guest will wait here until all subcores are out343* of guest context.344*/345wait_for_subcore_guest_exit();346347/*348* At this point we are sure that primary threads from each349* subcore on this core have completed guest->host partition350* switch. Now it is safe to call HMI handler.351*/352if (ppc_md.hmi_exception_early)353ppc_md.hmi_exception_early(NULL);354355/*356* Check if this thread is responsible to resync TB.357* All other threads will wait until this thread completes the358* TB resync.359*/360if (resync_req) {361opal_resync_timebase();362/* Reset TB resync req bit */363kvmppc_tb_resync_done();364} else {365wait_for_tb_resync();366}367368/*369* Reset tb_offset_applied so the guest exit code won't try370* to subtract the previous timebase offset from the timebase.371*/372if (local_paca->kvm_hstate.kvm_vcore)373local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;374375return 0;376}377378379