/* SPDX-License-Identifier: MIT */1/******************************************************************************2* arch-x86/mca.h3* Guest OS machine check interface to x86 Xen.4*5* Contributed by Advanced Micro Devices, Inc.6* Author: Christoph Egger <[email protected]>7*8* Updated by Intel Corporation9* Author: Liu, Jinsong <[email protected]>10*11* Permission is hereby granted, free of charge, to any person obtaining a copy12* of this software and associated documentation files (the "Software"), to13* deal in the Software without restriction, including without limitation the14* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or15* sell copies of the Software, and to permit persons to whom the Software is16* furnished to do so, subject to the following conditions:17*18* The above copyright notice and this permission notice shall be included in19* all copies or substantial portions of the Software.20*21* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR22* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,23* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE24* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER25* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING26* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER27* DEALINGS IN THE SOFTWARE.28*/2930#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__31#define __XEN_PUBLIC_ARCH_X86_MCA_H__3233/* Hypercall */34#define __HYPERVISOR_mca __HYPERVISOR_arch_03536#define XEN_MCA_INTERFACE_VERSION 0x01ecc0033738/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */39#define XEN_MC_NONURGENT 0x140/* IN: Dom0 calls hypercall to retrieve urgent error log entry */41#define XEN_MC_URGENT 0x242/* IN: Dom0 acknowledges previosly-fetched error log entry */43#define XEN_MC_ACK 0x44445/* OUT: All is ok */46#define XEN_MC_OK 0x047/* OUT: Domain could not fetch data. */48#define XEN_MC_FETCHFAILED 0x149/* OUT: There was no machine check data to fetch. */50#define XEN_MC_NODATA 0x25152#ifndef __ASSEMBLY__53/* vIRQ injected to Dom0 */54#define VIRQ_MCA VIRQ_ARCH_05556/*57* mc_info entry types58* mca machine check info are recorded in mc_info entries.59* when fetch mca info, it can use MC_TYPE_... to distinguish60* different mca info.61*/62#define MC_TYPE_GLOBAL 063#define MC_TYPE_BANK 164#define MC_TYPE_EXTENDED 265#define MC_TYPE_RECOVERY 36667struct mcinfo_common {68uint16_t type; /* structure type */69uint16_t size; /* size of this struct in bytes */70};7172#define MC_FLAG_CORRECTABLE (1 << 0)73#define MC_FLAG_UNCORRECTABLE (1 << 1)74#define MC_FLAG_RECOVERABLE (1 << 2)75#define MC_FLAG_POLLED (1 << 3)76#define MC_FLAG_RESET (1 << 4)77#define MC_FLAG_CMCI (1 << 5)78#define MC_FLAG_MCE (1 << 6)7980/* contains x86 global mc information */81struct mcinfo_global {82struct mcinfo_common common;8384uint16_t mc_domid; /* running domain at the time in error */85uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */86uint32_t mc_socketid; /* physical socket of the physical core */87uint16_t mc_coreid; /* physical impacted core */88uint16_t mc_core_threadid; /* core thread of physical core */89uint32_t mc_apicid;90uint32_t mc_flags;91uint64_t mc_gstatus; /* global status */92};9394/* contains x86 bank mc information */95struct mcinfo_bank {96struct mcinfo_common common;9798uint16_t mc_bank; /* bank nr */99uint16_t mc_domid; /* domain referenced by mc_addr if valid */100uint64_t mc_status; /* bank status */101uint64_t mc_addr; /* bank address */102uint64_t mc_misc;103uint64_t mc_ctrl2;104uint64_t mc_tsc;105};106107struct mcinfo_msr {108uint64_t reg; /* MSR */109uint64_t value; /* MSR value */110};111112/* contains mc information from other or additional mc MSRs */113struct mcinfo_extended {114struct mcinfo_common common;115uint32_t mc_msrs; /* Number of msr with valid values. */116/*117* Currently Intel extended MSR (32/64) include all gp registers118* and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be119* useful at present. So expand this array to 16/32 to leave room.120*/121struct mcinfo_msr mc_msr[sizeof(void *) * 4];122};123124/* Recovery Action flags. Giving recovery result information to DOM0 */125126/* Xen takes successful recovery action, the error is recovered */127#define REC_ACTION_RECOVERED (0x1 << 0)128/* No action is performed by XEN */129#define REC_ACTION_NONE (0x1 << 1)130/* It's possible DOM0 might take action ownership in some case */131#define REC_ACTION_NEED_RESET (0x1 << 2)132133/*134* Different Recovery Action types, if the action is performed successfully,135* REC_ACTION_RECOVERED flag will be returned.136*/137138/* Page Offline Action */139#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)140/* CPU offline Action */141#define MC_ACTION_CPU_OFFLINE (0x1 << 1)142/* L3 cache disable Action */143#define MC_ACTION_CACHE_SHRINK (0x1 << 2)144145/*146* Below interface used between XEN/DOM0 for passing XEN's recovery action147* information to DOM0.148*/149struct page_offline_action {150/* Params for passing the offlined page number to DOM0 */151uint64_t mfn;152uint64_t status;153};154155struct cpu_offline_action {156/* Params for passing the identity of the offlined CPU to DOM0 */157uint32_t mc_socketid;158uint16_t mc_coreid;159uint16_t mc_core_threadid;160};161162#define MAX_UNION_SIZE 16163struct mcinfo_recovery {164struct mcinfo_common common;165uint16_t mc_bank; /* bank nr */166uint8_t action_flags;167uint8_t action_types;168union {169struct page_offline_action page_retire;170struct cpu_offline_action cpu_offline;171uint8_t pad[MAX_UNION_SIZE];172} action_info;173};174175176#define MCINFO_MAXSIZE 768177struct mc_info {178/* Number of mcinfo_* entries in mi_data */179uint32_t mi_nentries;180uint32_t flags;181uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];182};183DEFINE_GUEST_HANDLE_STRUCT(mc_info);184185#define __MC_MSR_ARRAYSIZE 8186#define __MC_NMSRS 1187#define MC_NCAPS 7188struct mcinfo_logical_cpu {189uint32_t mc_cpunr;190uint32_t mc_chipid;191uint16_t mc_coreid;192uint16_t mc_threadid;193uint32_t mc_apicid;194uint32_t mc_clusterid;195uint32_t mc_ncores;196uint32_t mc_ncores_active;197uint32_t mc_nthreads;198uint32_t mc_cpuid_level;199uint32_t mc_family;200uint32_t mc_vendor;201uint32_t mc_model;202uint32_t mc_step;203char mc_vendorid[16];204char mc_brandid[64];205uint32_t mc_cpu_caps[MC_NCAPS];206uint32_t mc_cache_size;207uint32_t mc_cache_alignment;208uint32_t mc_nmsrvals;209struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];210};211DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);212213/*214* Prototype:215* uint32_t x86_mcinfo_nentries(struct mc_info *mi);216*/217#define x86_mcinfo_nentries(_mi) \218((_mi)->mi_nentries)219/*220* Prototype:221* struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);222*/223#define x86_mcinfo_first(_mi) \224((struct mcinfo_common *)(_mi)->mi_data)225/*226* Prototype:227* struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);228*/229#define x86_mcinfo_next(_mic) \230((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))231232/*233* Prototype:234* void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);235*/236static inline void x86_mcinfo_lookup(struct mcinfo_common **ret,237struct mc_info *mi, uint16_t type)238{239uint32_t i;240struct mcinfo_common *mic;241bool found = 0;242243if (!ret || !mi)244return;245246mic = x86_mcinfo_first(mi);247for (i = 0; i < x86_mcinfo_nentries(mi); i++) {248if (mic->type == type) {249found = 1;250break;251}252mic = x86_mcinfo_next(mic);253}254255*ret = found ? mic : NULL;256}257258/*259* Fetch machine check data from hypervisor.260*/261#define XEN_MC_fetch 1262struct xen_mc_fetch {263/*264* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,265* XEN_MC_ACK if ack'king an earlier fetch266* OUT: XEN_MC_OK, XEN_MC_FETCHAILED, XEN_MC_NODATA267*/268uint32_t flags;269uint32_t _pad0;270/* OUT: id for ack, IN: id we are ack'ing */271uint64_t fetch_id;272273/* OUT variables. */274GUEST_HANDLE(mc_info) data;275};276DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);277278279/*280* This tells the hypervisor to notify a DomU about the machine check error281*/282#define XEN_MC_notifydomain 2283struct xen_mc_notifydomain {284/* IN variables */285uint16_t mc_domid; /* The unprivileged domain to notify */286uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify */287288/* IN/OUT variables */289uint32_t flags;290};291DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);292293#define XEN_MC_physcpuinfo 3294struct xen_mc_physcpuinfo {295/* IN/OUT */296uint32_t ncpus;297uint32_t _pad0;298/* OUT */299GUEST_HANDLE(mcinfo_logical_cpu) info;300};301302#define XEN_MC_msrinject 4303#define MC_MSRINJ_MAXMSRS 8304struct xen_mc_msrinject {305/* IN */306uint32_t mcinj_cpunr; /* target processor id */307uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */308uint32_t mcinj_count; /* 0 .. count-1 in array are valid */309uint32_t _pad0;310struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];311};312313/* Flags for mcinj_flags above; bits 16-31 are reserved */314#define MC_MSRINJ_F_INTERPOSE 0x1315316#define XEN_MC_mceinject 5317struct xen_mc_mceinject {318unsigned int mceinj_cpunr; /* target processor id */319};320321struct xen_mc {322uint32_t cmd;323uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */324union {325struct xen_mc_fetch mc_fetch;326struct xen_mc_notifydomain mc_notifydomain;327struct xen_mc_physcpuinfo mc_physcpuinfo;328struct xen_mc_msrinject mc_msrinject;329struct xen_mc_mceinject mc_mceinject;330} u;331};332DEFINE_GUEST_HANDLE_STRUCT(xen_mc);333334/*335* Fields are zero when not available. Also, this struct is shared with336* userspace mcelog and thus must keep existing fields at current offsets.337* Only add new fields to the end of the structure338*/339struct xen_mce {340__u64 status;341__u64 misc;342__u64 addr;343__u64 mcgstatus;344__u64 ip;345__u64 tsc; /* cpu time stamp counter */346__u64 time; /* wall time_t when error was detected */347__u8 cpuvendor; /* cpu vendor as encoded in system.h */348__u8 inject_flags; /* software inject flags */349__u16 pad;350__u32 cpuid; /* CPUID 1 EAX */351__u8 cs; /* code segment */352__u8 bank; /* machine check bank */353__u8 cpu; /* cpu number; obsolete; use extcpu now */354__u8 finished; /* entry is valid */355__u32 extcpu; /* linux cpu number that detected the error */356__u32 socketid; /* CPU socket ID */357__u32 apicid; /* CPU initial apic ID */358__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */359__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */360__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */361__u64 ppin; /* Protected Processor Inventory Number */362};363364/*365* This structure contains all data related to the MCE log. Also366* carries a signature to make it easier to find from external367* debugging tools. Each entry is only valid when its finished flag368* is set.369*/370371#define XEN_MCE_LOG_LEN 32372373struct xen_mce_log {374char signature[12] __nonstring; /* "MACHINECHECK" */375unsigned len; /* = XEN_MCE_LOG_LEN */376unsigned next;377unsigned flags;378unsigned recordlen; /* length of struct xen_mce */379struct xen_mce entry[XEN_MCE_LOG_LEN];380};381382#define XEN_MCE_OVERFLOW 0 /* bit 0 in flags means overflow */383384#define XEN_MCE_LOG_SIGNATURE "MACHINECHECK"385386#define MCE_GET_RECORD_LEN _IOR('M', 1, int)387#define MCE_GET_LOG_LEN _IOR('M', 2, int)388#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)389390#endif /* __ASSEMBLY__ */391#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */392393394