Path: blob/main/sys/contrib/xen/arch-x86/xen-mca.h
48255 views
/******************************************************************************1* arch-x86/mca.h2*3* Contributed by Advanced Micro Devices, Inc.4* Author: Christoph Egger <[email protected]>5*6* Guest OS machine check interface to x86 Xen.7*8* Permission is hereby granted, free of charge, to any person obtaining a copy9* of this software and associated documentation files (the "Software"), to10* deal in the Software without restriction, including without limitation the11* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or12* sell copies of the Software, and to permit persons to whom the Software is13* furnished to do so, subject to the following conditions:14*15* The above copyright notice and this permission notice shall be included in16* all copies or substantial portions of the Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR19* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,20* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE21* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER22* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING23* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER24* DEALINGS IN THE SOFTWARE.25*/2627/* Full MCA functionality has the following Usecases from the guest side:28*29* Must have's:30* 1. Dom0 and DomU register machine check trap callback handlers31* (already done via "set_trap_table" hypercall)32* 2. Dom0 registers machine check event callback handler33* (doable via EVTCHNOP_bind_virq)34* 3. Dom0 and DomU fetches machine check data35* 4. Dom0 wants Xen to notify a DomU36* 5. Dom0 gets DomU ID from physical address37* 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")38*39* Nice to have's:40* 7. Dom0 wants Xen to deactivate a physical CPU41* This is better done as separate task, physical CPU hotplugging,42* and hypercall(s) should be sysctl's43* 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to44* move a DomU (or Dom0 itself) away from a malicious page45* producing correctable errors.46* 9. offlining physical page:47* Xen free's and never re-uses a certain physical page.48* 10. Testfacility: Allow Dom0 to write values into machine check MSR's49* and tell Xen to trigger a machine check50*/5152#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__53#define __XEN_PUBLIC_ARCH_X86_MCA_H__5455/* Hypercall */56#define __HYPERVISOR_mca __HYPERVISOR_arch_05758/*59* The xen-unstable repo has interface version 0x03000001; out interface60* is incompatible with that and any future minor revisions, so we61* choose a different version number range that is numerically less62* than that used in xen-unstable.63*/64#define XEN_MCA_INTERFACE_VERSION 0x01ecc0036566/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */67#define XEN_MC_NONURGENT 0x000168/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */69#define XEN_MC_URGENT 0x000270/* IN: Dom0 acknowledges previosly-fetched telemetry */71#define XEN_MC_ACK 0x00047273/* OUT: All is ok */74#define XEN_MC_OK 0x075/* OUT: Domain could not fetch data. */76#define XEN_MC_FETCHFAILED 0x177/* OUT: There was no machine check data to fetch. */78#define XEN_MC_NODATA 0x279/* OUT: Between notification time and this hypercall an other80* (most likely) correctable error happened. The fetched data,81* does not match the original machine check data. */82#define XEN_MC_NOMATCH 0x48384/* OUT: DomU did not register MC NMI handler. Try something else. */85#define XEN_MC_CANNOTHANDLE 0x886/* OUT: Notifying DomU failed. Retry later or try something else. */87#define XEN_MC_NOTDELIVERED 0x1088/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */8990/* Applicable to all mc_vcpuid fields below. */91#define XEN_MC_VCPUID_INVALID 0xffff9293#ifndef __ASSEMBLY__9495#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */9697/*98* Machine Check Architecure:99* structs are read-only and used to report all kinds of100* correctable and uncorrectable errors detected by the HW.101* Dom0 and DomU: register a handler to get notified.102* Dom0 only: Correctable errors are reported via VIRQ_MCA103* Dom0 and DomU: Uncorrectable errors are reported via nmi handlers104*/105#define MC_TYPE_GLOBAL 0106#define MC_TYPE_BANK 1107#define MC_TYPE_EXTENDED 2108#define MC_TYPE_RECOVERY 3109110struct mcinfo_common {111uint16_t type; /* structure type */112uint16_t size; /* size of this struct in bytes */113};114typedef struct mcinfo_common xen_mcinfo_common_t;115116#define MC_FLAG_CORRECTABLE (1 << 0)117#define MC_FLAG_UNCORRECTABLE (1 << 1)118#define MC_FLAG_RECOVERABLE (1 << 2)119#define MC_FLAG_POLLED (1 << 3)120#define MC_FLAG_RESET (1 << 4)121#define MC_FLAG_CMCI (1 << 5)122#define MC_FLAG_MCE (1 << 6)123/* contains global x86 mc information */124struct mcinfo_global {125xen_mcinfo_common_t common;126127/* running domain at the time in error (most likely the impacted one) */128uint16_t mc_domid;129uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */130uint32_t mc_socketid; /* physical socket of the physical core */131uint16_t mc_coreid; /* physical impacted core */132uint16_t mc_core_threadid; /* core thread of physical core */133uint32_t mc_apicid;134uint32_t mc_flags;135uint64_t mc_gstatus; /* global status */136};137138/* contains bank local x86 mc information */139struct mcinfo_bank {140xen_mcinfo_common_t common;141142uint16_t mc_bank; /* bank nr */143uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0144* and if mc_addr is valid. Never valid on DomU. */145uint64_t mc_status; /* bank status */146uint64_t mc_addr; /* bank address, only valid147* if addr bit is set in mc_status */148uint64_t mc_misc;149uint64_t mc_ctrl2;150uint64_t mc_tsc;151};152153154struct mcinfo_msr {155uint64_t reg; /* MSR */156uint64_t value; /* MSR value */157};158typedef struct mcinfo_msr xen_mcinfo_msr_t;159160/* contains mc information from other161* or additional mc MSRs */162struct mcinfo_extended {163xen_mcinfo_common_t common;164165/* You can fill up to five registers.166* If you need more, then use this structure167* multiple times. */168169uint32_t mc_msrs; /* Number of msr with valid values. */170/*171* Currently Intel extended MSR (32/64) include all gp registers172* and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be173* useful at present. So expand this array to 32 to leave room.174*/175xen_mcinfo_msr_t mc_msr[32];176};177178/* Recovery Action flags. Giving recovery result information to DOM0 */179180/* Xen takes successful recovery action, the error is recovered */181#define REC_ACTION_RECOVERED (0x1 << 0)182/* No action is performed by XEN */183#define REC_ACTION_NONE (0x1 << 1)184/* It's possible DOM0 might take action ownership in some case */185#define REC_ACTION_NEED_RESET (0x1 << 2)186187/* Different Recovery Action types, if the action is performed successfully,188* REC_ACTION_RECOVERED flag will be returned.189*/190191/* Page Offline Action */192#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)193/* CPU offline Action */194#define MC_ACTION_CPU_OFFLINE (0x1 << 1)195/* L3 cache disable Action */196#define MC_ACTION_CACHE_SHRINK (0x1 << 2)197198/* Below interface used between XEN/DOM0 for passing XEN's recovery action199* information to DOM0.200* usage Senario: After offlining broken page, XEN might pass its page offline201* recovery action result to DOM0. DOM0 will save the information in202* non-volatile memory for further proactive actions, such as offlining the203* easy broken page earlier when doing next reboot.204*/205struct page_offline_action206{207/* Params for passing the offlined page number to DOM0 */208uint64_t mfn;209uint64_t status;210};211typedef struct page_offline_action xen_page_offline_action_t;212213struct cpu_offline_action214{215/* Params for passing the identity of the offlined CPU to DOM0 */216uint32_t mc_socketid;217uint16_t mc_coreid;218uint16_t mc_core_threadid;219};220typedef struct cpu_offline_action xen_cpu_offline_action_t;221222#define MAX_UNION_SIZE 16223struct mcinfo_recovery224{225xen_mcinfo_common_t common;226uint16_t mc_bank; /* bank nr */227uint8_t action_flags;228uint8_t action_types;229union {230xen_page_offline_action_t page_retire;231xen_cpu_offline_action_t cpu_offline;232uint8_t pad[MAX_UNION_SIZE];233} action_info;234};235236237#define MCINFO_HYPERCALLSIZE 1024238#define MCINFO_MAXSIZE 768239240#define MCINFO_FLAGS_UNCOMPLETE 0x1241struct mc_info {242/* Number of mcinfo_* entries in mi_data */243uint32_t mi_nentries;244uint32_t flags;245uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];246};247typedef struct mc_info mc_info_t;248DEFINE_XEN_GUEST_HANDLE(mc_info_t);249250#define __MC_MSR_ARRAYSIZE 8251#if __XEN_INTERFACE_VERSION__ <= 0x00040d00252#define __MC_NMSRS 1253#endif254#define MC_NCAPS 7 /* 7 CPU feature flag words */255#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */256#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */257#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */258#define MC_CAPS_LINUX 3 /* Linux-defined */259#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */260#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */261#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */262263struct mcinfo_logical_cpu {264uint32_t mc_cpunr;265uint32_t mc_chipid;266uint16_t mc_coreid;267uint16_t mc_threadid;268uint32_t mc_apicid;269uint32_t mc_clusterid;270uint32_t mc_ncores;271uint32_t mc_ncores_active;272uint32_t mc_nthreads;273int32_t mc_cpuid_level;274uint32_t mc_family;275uint32_t mc_vendor;276uint32_t mc_model;277uint32_t mc_step;278char mc_vendorid[16];279char mc_brandid[64];280uint32_t mc_cpu_caps[MC_NCAPS];281uint32_t mc_cache_size;282uint32_t mc_cache_alignment;283int32_t mc_nmsrvals;284xen_mcinfo_msr_t mc_msrvalues[__MC_MSR_ARRAYSIZE];285};286typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t;287DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);288289290/*291* OS's should use these instead of writing their own lookup function292* each with its own bugs and drawbacks.293* We use macros instead of static inline functions to allow guests294* to include this header in assembly files (*.S).295*/296/* Prototype:297* uint32_t x86_mcinfo_nentries(struct mc_info *mi);298*/299#define x86_mcinfo_nentries(_mi) \300(_mi)->mi_nentries301/* Prototype:302* struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);303*/304#define x86_mcinfo_first(_mi) \305((struct mcinfo_common *)(_mi)->mi_data)306/* Prototype:307* struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);308*/309#define x86_mcinfo_next(_mic) \310((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))311312/* Prototype:313* void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);314*/315#define x86_mcinfo_lookup(_ret, _mi, _type) \316do { \317uint32_t found, i; \318struct mcinfo_common *_mic; \319\320found = 0; \321(_ret) = NULL; \322if (_mi == NULL) break; \323_mic = x86_mcinfo_first(_mi); \324for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \325if (_mic->type == (_type)) { \326found = 1; \327break; \328} \329_mic = x86_mcinfo_next(_mic); \330} \331(_ret) = found ? _mic : NULL; \332} while (0)333334335/* Usecase 1336* Register machine check trap callback handler337* (already done via "set_trap_table" hypercall)338*/339340/* Usecase 2341* Dom0 registers machine check event callback handler342* done by EVTCHNOP_bind_virq343*/344345/* Usecase 3346* Fetch machine check data from hypervisor.347* Note, this hypercall is special, because both Dom0 and DomU must use this.348*/349#define XEN_MC_fetch 1350struct xen_mc_fetch {351/* IN/OUT variables. */352uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,353XEN_MC_ACK if ack'ing an earlier fetch */354/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,355XEN_MC_NODATA, XEN_MC_NOMATCH */356uint32_t _pad0;357uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */358359/* OUT variables. */360XEN_GUEST_HANDLE(mc_info_t) data;361};362typedef struct xen_mc_fetch xen_mc_fetch_t;363DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t);364365366/* Usecase 4367* This tells the hypervisor to notify a DomU about the machine check error368*/369#define XEN_MC_notifydomain 2370struct xen_mc_notifydomain {371/* IN variables. */372uint16_t mc_domid; /* The unprivileged domain to notify. */373uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify.374* Usually echo'd value from the fetch hypercall. */375376/* IN/OUT variables. */377uint32_t flags;378379/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */380/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */381};382typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;383DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);384385#define XEN_MC_physcpuinfo 3386struct xen_mc_physcpuinfo {387/* IN/OUT */388uint32_t ncpus;389uint32_t _pad0;390/* OUT */391XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;392};393typedef struct xen_mc_physcpuinfo xen_mc_physcpuinfo_t;394395#define XEN_MC_msrinject 4396#define MC_MSRINJ_MAXMSRS 8397struct xen_mc_msrinject {398/* IN */399uint32_t mcinj_cpunr; /* target processor id */400uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */401uint32_t mcinj_count; /* 0 .. count-1 in array are valid */402domid_t mcinj_domid; /* valid only if MC_MSRINJ_F_GPADDR is403present in mcinj_flags */404uint16_t _pad0;405xen_mcinfo_msr_t mcinj_msr[MC_MSRINJ_MAXMSRS];406};407typedef struct xen_mc_msrinject xen_mc_msrinject_t;408409/* Flags for mcinj_flags above; bits 16-31 are reserved */410#define MC_MSRINJ_F_INTERPOSE 0x1411#define MC_MSRINJ_F_GPADDR 0x2412413#define XEN_MC_mceinject 5414struct xen_mc_mceinject {415unsigned int mceinj_cpunr; /* target processor id */416};417typedef struct xen_mc_mceinject xen_mc_mceinject_t;418419#if defined(__XEN__) || defined(__XEN_TOOLS__)420#define XEN_MC_inject_v2 6421#define XEN_MC_INJECT_TYPE_MASK 0x7422#define XEN_MC_INJECT_TYPE_MCE 0x0423#define XEN_MC_INJECT_TYPE_CMCI 0x1424#define XEN_MC_INJECT_TYPE_LMCE 0x2425426#define XEN_MC_INJECT_CPU_BROADCAST 0x8427428struct xen_mc_inject_v2 {429uint32_t flags;430xenctl_bitmap_t cpumap;431};432typedef struct xen_mc_inject_v2 xen_mc_inject_v2_t;433#endif434435struct xen_mc {436uint32_t cmd;437uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */438union {439xen_mc_fetch_t mc_fetch;440xen_mc_notifydomain_t mc_notifydomain;441xen_mc_physcpuinfo_t mc_physcpuinfo;442xen_mc_msrinject_t mc_msrinject;443xen_mc_mceinject_t mc_mceinject;444#if defined(__XEN__) || defined(__XEN_TOOLS__)445xen_mc_inject_v2_t mc_inject_v2;446#endif447} u;448};449typedef struct xen_mc xen_mc_t;450DEFINE_XEN_GUEST_HANDLE(xen_mc_t);451452#endif /* __ASSEMBLY__ */453454#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */455456457