Path: blob/main/cranelift/codegen/src/isa/x64/inst/mod.rs
3073 views
//! This module defines x86_64-specific machine instruction types.12pub use emit_state::EmitState;34use crate::binemit::{Addend, CodeOffset, Reloc};5use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};6use crate::isa::x64::abi::X64ABIMachineSpec;7use crate::isa::x64::inst::regs::pretty_print_reg;8use crate::isa::x64::settings as x64_settings;9use crate::isa::{CallConv, FunctionAlignment};10use crate::{CodegenError, CodegenResult, settings};11use crate::{machinst::*, trace};12use alloc::boxed::Box;13use alloc::string::{String, ToString};14use alloc::vec;15use alloc::vec::Vec;16use core::fmt::{self, Write};17use core::slice;18use cranelift_assembler_x64 as asm;19use smallvec::{SmallVec, smallvec};2021pub mod args;22mod emit;23mod emit_state;24#[cfg(test)]25mod emit_tests;26pub mod external;27pub mod regs;28mod stack_switch;29pub mod unwind;3031use args::*;3233//=============================================================================34// Instructions (top level): definition3536// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.37pub use super::lower::isle::generated_code::AtomicRmwSeqOp;38pub use super::lower::isle::generated_code::MInst as Inst;3940/// Out-of-line data for return-calls, to keep the size of `Inst` down.41#[derive(Clone, Debug)]42pub struct ReturnCallInfo<T> {43/// Where this call is going.44pub dest: T,4546/// The size of the argument area for this return-call, potentially smaller than that of the47/// caller, but never larger.48pub new_stack_arg_size: u32,4950/// The in-register arguments and their constraints.51pub uses: CallArgList,5253/// A temporary for use when moving the return address.54pub tmp: WritableGpr,55}5657#[test]58#[cfg(target_pointer_width = "64")]59fn inst_size_test() {60// This test will help with unintentionally growing the size61// of the Inst enum.62assert_eq!(48, core::mem::size_of::<Inst>());63}6465impl Inst {66/// Check if the instruction (or pseudo-instruction) can be emitted given67/// the current target architecture given by `emit_info`. For non-assembler68/// instructions, this assumes a baseline feature set (i.e., 64-bit AND SSE269/// and below).70fn is_available(&self, emit_info: &EmitInfo) -> bool {71use asm::AvailableFeatures;7273match self {74// These instructions are part of SSE2, which is a basic requirement75// in Cranelift, and don't have to be checked.76Inst::AtomicRmwSeq { .. }77| Inst::CallKnown { .. }78| Inst::CallUnknown { .. }79| Inst::ReturnCallKnown { .. }80| Inst::ReturnCallUnknown { .. }81| Inst::CheckedSRemSeq { .. }82| Inst::CheckedSRemSeq8 { .. }83| Inst::CvtFloatToSintSeq { .. }84| Inst::CvtFloatToUintSeq { .. }85| Inst::CvtUint64ToFloatSeq { .. }86| Inst::JmpCond { .. }87| Inst::JmpCondOr { .. }88| Inst::WinchJmpIf { .. }89| Inst::JmpKnown { .. }90| Inst::JmpTableSeq { .. }91| Inst::LoadExtName { .. }92| Inst::MovFromPReg { .. }93| Inst::MovToPReg { .. }94| Inst::StackProbeLoop { .. }95| Inst::Args { .. }96| Inst::Rets { .. }97| Inst::StackSwitchBasic { .. }98| Inst::TrapIf { .. }99| Inst::TrapIfAnd { .. }100| Inst::TrapIfOr { .. }101| Inst::XmmCmove { .. }102| Inst::XmmMinMaxSeq { .. }103| Inst::XmmUninitializedValue { .. }104| Inst::GprUninitializedValue { .. }105| Inst::ElfTlsGetAddr { .. }106| Inst::MachOTlsGetAddr { .. }107| Inst::CoffTlsGetAddr { .. }108| Inst::Unwind { .. }109| Inst::DummyUse { .. }110| Inst::LabelAddress { .. }111| Inst::SequencePoint => true,112113Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => emit_info.cmpxchg16b(),114115Inst::External { inst } => inst.is_available(&emit_info),116}117}118}119120// Handy constructors for Insts.121122impl Inst {123pub(crate) fn nop(len: u8) -> Self {124assert!(len > 0 && len <= 9);125let inst = match len {1261 => asm::inst::nop_1b::new().into(),1272 => asm::inst::nop_2b::new().into(),1283 => asm::inst::nop_3b::new().into(),1294 => asm::inst::nop_4b::new().into(),1305 => asm::inst::nop_5b::new().into(),1316 => asm::inst::nop_6b::new().into(),1327 => asm::inst::nop_7b::new().into(),1338 => asm::inst::nop_8b::new().into(),1349 => asm::inst::nop_9b::new().into(),135_ => unreachable!("nop length must be between 1 and 9"),136};137Self::External { inst }138}139140pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {141let inst = if let Ok(simm8) = i8::try_from(simm32) {142asm::inst::addq_mi_sxb::new(dst, simm8).into()143} else {144asm::inst::addq_mi_sxl::new(dst, simm32).into()145};146Inst::External { inst }147}148149pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {150let inst = if let Ok(simm8) = i8::try_from(simm32) {151asm::inst::subq_mi_sxb::new(dst, simm8).into()152} else {153asm::inst::subq_mi_sxl::new(dst, simm32).into()154};155Inst::External { inst }156}157158/// Writes the `simm64` immedaite into `dst`.159///160/// Note that if `dst_size` is less than 64-bits then the upper bits of161/// `simm64` will be converted to zero.162pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {163debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));164debug_assert!(dst.to_reg().class() == RegClass::Int);165let dst = WritableGpr::from_writable_reg(dst).unwrap();166let inst = match dst_size {167OperandSize::Size64 => match u32::try_from(simm64) {168// If `simm64` is zero-extended use `movl` which zeros the169// upper bits.170Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),171_ => match i32::try_from(simm64.cast_signed()) {172// If `simm64` is sign-extended use `movq` which sign the173// upper bits.174Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),175// fall back to embedding the entire immediate.176_ => asm::inst::movabsq_oi::new(dst, simm64).into(),177},178},179// FIXME: the input to this function is a logical `simm64` stored180// as `u64`. That means that ideally what we would do here is cast181// the `simm64` to an `i64`, perform a `i32::try_from()`, then cast182// that back to `u32`. That would ensure that the immediate loses183// no meaning and has the same logical value. Currently though184// Cranelift relies on discarding the upper bits because literals185// like `0x8000_0000_u64` fail to convert to an `i32`. In theory186// the input to this function should change to `i64`. In the187// meantime this is documented as discarding the upper bits,188// although this is an old function so that's unlikely to help189// much.190_ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),191};192Inst::External { inst }193}194195pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {196src.assert_regclass_is(RegClass::Int);197debug_assert!(dst.to_reg().class() == RegClass::Int);198let src = match src {199RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),200RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),201};202let inst = match ext_mode {203ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),204ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),205ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),206ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),207ExtMode::LQ => {208// This instruction selection may seem strange but is correct in209// 64-bit mode: section 3.4.1.1 of the Intel manual says that210// "32-bit operands generate a 32-bit result, zero-extended to a211// 64-bit result in the destination general-purpose register."212// This is applicable beyond `mov` but we use this fact to213// zero-extend `src` into `dst`.214asm::inst::movl_rm::new(dst, src).into()215}216};217Inst::External { inst }218}219220pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {221src.assert_regclass_is(RegClass::Int);222debug_assert!(dst.to_reg().class() == RegClass::Int);223let src = match src {224RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),225RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),226};227let inst = match ext_mode {228ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),229ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),230ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),231ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),232ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),233};234Inst::External { inst }235}236237/// Compares `src1` against `src2`238pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {239let inst = match size {240OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.cast_unsigned()).into(),241OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),242OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),243OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),244};245Inst::External { inst }246}247248pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {249Inst::TrapIf { cc, trap_code }250}251252pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {253Inst::CallKnown { info }254}255256pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {257info.dest.assert_regclass_is(RegClass::Int);258Inst::CallUnknown { info }259}260261pub(crate) fn jmp_known(dst: MachLabel) -> Inst {262Inst::JmpKnown { dst }263}264265/// Choose which instruction to use for loading a register value from memory. For loads smaller266/// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],267/// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.268pub(crate) fn load(269ty: Type,270from_addr: impl Into<SyntheticAmode>,271to_reg: Writable<Reg>,272ext_kind: ExtKind,273) -> Inst {274let rc = to_reg.to_reg().class();275match rc {276RegClass::Int => {277let ext_mode = match ty.bytes() {2781 => Some(ExtMode::BQ),2792 => Some(ExtMode::WQ),2804 => Some(ExtMode::LQ),2818 => None,282_ => unreachable!("the type should never use a scalar load: {}", ty),283};284if let Some(ext_mode) = ext_mode {285// Values smaller than 64 bits must be extended in some way.286match ext_kind {287ExtKind::SignExtend => {288Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)289}290ExtKind::ZeroExtend => {291Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)292}293ExtKind::None => {294panic!("expected an extension kind for extension mode: {ext_mode:?}")295}296}297} else {298// 64-bit values can be moved directly.299let from_addr = asm::GprMem::from(from_addr.into());300Inst::External {301inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),302}303}304}305RegClass::Float => {306let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());307let from_addr = from_addr.into();308let inst = match ty {309types::F16 | types::I8X2 => {310panic!("loading a f16 or i8x2 requires multiple instructions")311}312_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {313asm::inst::movss_a_m::new(to_reg, from_addr).into()314}315_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {316asm::inst::movsd_a_m::new(to_reg, from_addr).into()317}318types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),319types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),320_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {321asm::inst::movdqu_a::new(to_reg, from_addr).into()322}323_ => unimplemented!("unable to load type: {}", ty),324};325Inst::External { inst }326}327RegClass::Vector => unreachable!(),328}329}330331/// Choose which instruction to use for storing a register value to memory.332pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {333let rc = from_reg.class();334let to_addr = to_addr.into();335let inst = match rc {336RegClass::Int => {337let from_reg = Gpr::unwrap_new(from_reg);338match ty {339types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),340types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),341types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),342types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),343_ => unreachable!(),344}345}346RegClass::Float => {347let from_reg = Xmm::new(from_reg).unwrap();348match ty {349types::F16 | types::I8X2 => {350panic!("storing a f16 or i8x2 requires multiple instructions")351}352_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {353asm::inst::movss_c_m::new(to_addr, from_reg).into()354}355_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {356asm::inst::movsd_c_m::new(to_addr, from_reg).into()357}358types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),359types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),360_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {361asm::inst::movdqu_b::new(to_addr, from_reg).into()362}363_ => unimplemented!("unable to store type: {}", ty),364}365}366RegClass::Vector => unreachable!(),367};368Inst::External { inst }369}370}371372//=============================================================================373// Instructions: printing374375impl PrettyPrint for Inst {376fn pretty_print(&self, _size: u8) -> String {377fn ljustify(s: String) -> String {378let w = 7;379if s.len() >= w {380s381} else {382let need = usize::min(w, w - s.len());383s + &format!("{nil: <width$}", nil = "", width = need)384}385}386387fn ljustify2(s1: String, s2: String) -> String {388ljustify(s1 + &s2)389}390391match self {392Inst::CheckedSRemSeq {393size,394divisor,395dividend_lo,396dividend_hi,397dst_quotient,398dst_remainder,399} => {400let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());401let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());402let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());403let dst_quotient =404pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());405let dst_remainder =406pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());407format!(408"checked_srem_seq {dividend_lo}, {dividend_hi}, \409{divisor}, {dst_quotient}, {dst_remainder}",410)411}412413Inst::CheckedSRemSeq8 {414divisor,415dividend,416dst,417} => {418let divisor = pretty_print_reg(divisor.to_reg(), 1);419let dividend = pretty_print_reg(dividend.to_reg(), 1);420let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);421format!("checked_srem_seq {dividend}, {divisor}, {dst}")422}423424Inst::XmmMinMaxSeq {425lhs,426rhs,427dst,428is_min,429size,430} => {431let rhs = pretty_print_reg(rhs.to_reg(), 8);432let lhs = pretty_print_reg(lhs.to_reg(), 8);433let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);434let op = ljustify2(435if *is_min {436"xmm min seq ".to_string()437} else {438"xmm max seq ".to_string()439},440format!("f{}", size.to_bits()),441);442format!("{op} {lhs}, {rhs}, {dst}")443}444445Inst::XmmUninitializedValue { dst } => {446let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);447let op = ljustify("uninit".into());448format!("{op} {dst}")449}450451Inst::GprUninitializedValue { dst } => {452let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);453let op = ljustify("uninit".into());454format!("{op} {dst}")455}456457Inst::CvtUint64ToFloatSeq {458src,459dst,460dst_size,461tmp_gpr1,462tmp_gpr2,463..464} => {465let src = pretty_print_reg(src.to_reg(), 8);466let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());467let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);468let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);469let op = ljustify(format!(470"u64_to_{}_seq",471if *dst_size == OperandSize::Size64 {472"f64"473} else {474"f32"475}476));477format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")478}479480Inst::CvtFloatToSintSeq {481src,482dst,483src_size,484dst_size,485tmp_xmm,486tmp_gpr,487is_saturating,488} => {489let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());490let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());491let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);492let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);493let op = ljustify(format!(494"cvt_float{}_to_sint{}{}_seq",495src_size.to_bits(),496dst_size.to_bits(),497if *is_saturating { "_sat" } else { "" },498));499format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")500}501502Inst::CvtFloatToUintSeq {503src,504dst,505src_size,506dst_size,507tmp_gpr,508tmp_xmm,509tmp_xmm2,510is_saturating,511} => {512let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());513let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());514let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);515let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);516let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);517let op = ljustify(format!(518"cvt_float{}_to_uint{}{}_seq",519src_size.to_bits(),520dst_size.to_bits(),521if *is_saturating { "_sat" } else { "" },522));523format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")524}525526Inst::MovFromPReg { src, dst } => {527let src: Reg = (*src).into();528let src = pretty_print_reg(src, 8);529let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);530let op = ljustify("movq".to_string());531format!("{op} {src}, {dst}")532}533534Inst::MovToPReg { src, dst } => {535let src = pretty_print_reg(src.to_reg(), 8);536let dst: Reg = (*dst).into();537let dst = pretty_print_reg(dst, 8);538let op = ljustify("movq".to_string());539format!("{op} {src}, {dst}")540}541542Inst::XmmCmove {543ty,544cc,545consequent,546alternative,547dst,548..549} => {550let size = u8::try_from(ty.bytes()).unwrap();551let alternative = pretty_print_reg(alternative.to_reg(), size);552let dst = pretty_print_reg(dst.to_reg().to_reg(), size);553let consequent = pretty_print_reg(consequent.to_reg(), size);554let suffix = match *ty {555types::F64 => "sd",556types::F32 => "ss",557types::F16 => "ss",558types::F32X4 => "aps",559types::F64X2 => "apd",560_ => "dqa",561};562let cc = cc.invert();563format!(564"mov{suffix} {alternative}, {dst}; \565j{cc} $next; \566mov{suffix} {consequent}, {dst}; \567$next:"568)569}570571Inst::StackProbeLoop {572tmp,573frame_size,574guard_size,575} => {576let tmp = pretty_print_reg(tmp.to_reg(), 8);577let op = ljustify("stack_probe_loop".to_string());578format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")579}580581Inst::CallKnown { info } => {582let op = ljustify("call".to_string());583let try_call = info584.try_call_info585.as_ref()586.map(|tci| pretty_print_try_call(tci))587.unwrap_or_default();588format!("{op} {:?}{try_call}", info.dest)589}590591Inst::CallUnknown { info } => {592let dest = info.dest.pretty_print(8);593let op = ljustify("call".to_string());594let try_call = info595.try_call_info596.as_ref()597.map(|tci| pretty_print_try_call(tci))598.unwrap_or_default();599format!("{op} *{dest}{try_call}")600}601602Inst::ReturnCallKnown { info } => {603let ReturnCallInfo {604uses,605new_stack_arg_size,606tmp,607dest,608} = &**info;609let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);610let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");611for ret in uses {612let preg = pretty_print_reg(ret.preg, 8);613let vreg = pretty_print_reg(ret.vreg, 8);614write!(&mut s, " {vreg}={preg}").unwrap();615}616s617}618619Inst::ReturnCallUnknown { info } => {620let ReturnCallInfo {621uses,622new_stack_arg_size,623tmp,624dest,625} = &**info;626let callee = pretty_print_reg(*dest, 8);627let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);628let mut s =629format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");630for ret in uses {631let preg = pretty_print_reg(ret.preg, 8);632let vreg = pretty_print_reg(ret.vreg, 8);633write!(&mut s, " {vreg}={preg}").unwrap();634}635s636}637638Inst::Args { args } => {639let mut s = "args".to_string();640for arg in args {641let preg = pretty_print_reg(arg.preg, 8);642let def = pretty_print_reg(arg.vreg.to_reg(), 8);643write!(&mut s, " {def}={preg}").unwrap();644}645s646}647648Inst::Rets { rets } => {649let mut s = "rets".to_string();650for ret in rets {651let preg = pretty_print_reg(ret.preg, 8);652let vreg = pretty_print_reg(ret.vreg, 8);653write!(&mut s, " {vreg}={preg}").unwrap();654}655s656}657658Inst::StackSwitchBasic {659store_context_ptr,660load_context_ptr,661in_payload0,662out_payload0,663} => {664let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);665let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);666let in_payload0 = pretty_print_reg(**in_payload0, 8);667let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);668format!(669"{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"670)671}672673Inst::JmpKnown { dst } => {674let op = ljustify("jmp".to_string());675let dst = dst.to_string();676format!("{op} {dst}")677}678679Inst::WinchJmpIf { cc, taken } => {680let taken = taken.to_string();681let op = ljustify2("j".to_string(), cc.to_string());682format!("{op} {taken}")683}684685Inst::JmpCondOr {686cc1,687cc2,688taken,689not_taken,690} => {691let taken = taken.to_string();692let not_taken = not_taken.to_string();693let op = ljustify(format!("j{cc1},{cc2}"));694format!("{op} {taken}; j {not_taken}")695}696697Inst::JmpCond {698cc,699taken,700not_taken,701} => {702let taken = taken.to_string();703let not_taken = not_taken.to_string();704let op = ljustify2("j".to_string(), cc.to_string());705format!("{op} {taken}; j {not_taken}")706}707708Inst::JmpTableSeq {709idx, tmp1, tmp2, ..710} => {711let idx = pretty_print_reg(*idx, 8);712let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);713let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);714let op = ljustify("br_table".into());715format!("{op} {idx}, {tmp1}, {tmp2}")716}717718Inst::TrapIf { cc, trap_code, .. } => {719format!("j{cc} #trap={trap_code}")720}721722Inst::TrapIfAnd {723cc1,724cc2,725trap_code,726..727} => {728let cc1 = cc1.invert();729let cc2 = cc2.invert();730format!("trap_if_and {cc1}, {cc2}, {trap_code}")731}732733Inst::TrapIfOr {734cc1,735cc2,736trap_code,737..738} => {739let cc2 = cc2.invert();740format!("trap_if_or {cc1}, {cc2}, {trap_code}")741}742743Inst::LoadExtName {744dst, name, offset, ..745} => {746let dst = pretty_print_reg(*dst.to_reg(), 8);747let name = name.display(None);748let op = ljustify("load_ext_name".into());749format!("{op} {name}+{offset}, {dst}")750}751752Inst::AtomicRmwSeq { ty, op, .. } => {753let ty = ty.bits();754format!(755"atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"756)757}758759Inst::Atomic128RmwSeq {760op,761mem,762operand_low,763operand_high,764temp_low,765temp_high,766dst_old_low,767dst_old_high,768} => {769let operand_low = pretty_print_reg(**operand_low, 8);770let operand_high = pretty_print_reg(**operand_high, 8);771let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);772let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);773let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);774let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);775let mem = mem.pretty_print(16);776format!(777"atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"778)779}780781Inst::Atomic128XchgSeq {782mem,783operand_low,784operand_high,785dst_old_low,786dst_old_high,787} => {788let operand_low = pretty_print_reg(**operand_low, 8);789let operand_high = pretty_print_reg(**operand_high, 8);790let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);791let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);792let mem = mem.pretty_print(16);793format!(794"atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"795)796}797798Inst::ElfTlsGetAddr { symbol, dst } => {799let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);800format!("{dst} = elf_tls_get_addr {symbol:?}")801}802803Inst::MachOTlsGetAddr { symbol, dst } => {804let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);805format!("{dst} = macho_tls_get_addr {symbol:?}")806}807808Inst::CoffTlsGetAddr { symbol, dst, tmp } => {809let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);810let tmp = tmp.to_reg().to_reg();811812let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");813if tmp.is_virtual() {814let tmp = pretty_print_reg(tmp, 8);815write!(&mut s, ", {tmp}").unwrap();816};817818s819}820821Inst::Unwind { inst } => format!("unwind {inst:?}"),822823Inst::DummyUse { reg } => {824let reg = pretty_print_reg(*reg, 8);825format!("dummy_use {reg}")826}827828Inst::LabelAddress { dst, label } => {829let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);830format!("label_address {dst}, {label:?}")831}832833Inst::SequencePoint {} => {834format!("sequence_point")835}836837Inst::External { inst } => {838format!("{inst}")839}840}841}842}843844fn pretty_print_try_call(info: &TryCallInfo) -> String {845format!(846"; jmp {:?}; catch [{}]",847info.continuation,848info.pretty_print_dests()849)850}851852impl fmt::Debug for Inst {853fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {854write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))855}856}857858fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {859// Note: because we need to statically know the indices of each860// reg in the operands list in order to fetch its allocation861// later, we put the variable-operand-count bits (the RegMem,862// RegMemImm, etc args) last. regalloc2 doesn't care what order863// the operands come in; they can be freely reordered.864865// N.B.: we MUST keep the below in careful sync with (i) emission,866// in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`867// method above.868match inst {869Inst::CheckedSRemSeq {870divisor,871dividend_lo,872dividend_hi,873dst_quotient,874dst_remainder,875..876} => {877collector.reg_use(divisor);878collector.reg_fixed_use(dividend_lo, regs::rax());879collector.reg_fixed_use(dividend_hi, regs::rdx());880collector.reg_fixed_def(dst_quotient, regs::rax());881collector.reg_fixed_def(dst_remainder, regs::rdx());882}883Inst::CheckedSRemSeq8 {884divisor,885dividend,886dst,887..888} => {889collector.reg_use(divisor);890collector.reg_fixed_use(dividend, regs::rax());891collector.reg_fixed_def(dst, regs::rax());892}893Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),894Inst::GprUninitializedValue { dst } => collector.reg_def(dst),895Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {896collector.reg_use(rhs);897collector.reg_use(lhs);898collector.reg_reuse_def(dst, 0); // Reuse RHS.899}900Inst::MovFromPReg { dst, src } => {901debug_assert!(dst.to_reg().to_reg().is_virtual());902collector.reg_fixed_nonallocatable(*src);903collector.reg_def(dst);904}905Inst::MovToPReg { dst, src } => {906debug_assert!(src.to_reg().is_virtual());907collector.reg_use(src);908collector.reg_fixed_nonallocatable(*dst);909}910Inst::CvtUint64ToFloatSeq {911src,912dst,913tmp_gpr1,914tmp_gpr2,915..916} => {917collector.reg_use(src);918collector.reg_early_def(dst);919collector.reg_early_def(tmp_gpr1);920collector.reg_early_def(tmp_gpr2);921}922Inst::CvtFloatToSintSeq {923src,924dst,925tmp_xmm,926tmp_gpr,927..928} => {929collector.reg_use(src);930collector.reg_early_def(dst);931collector.reg_early_def(tmp_gpr);932collector.reg_early_def(tmp_xmm);933}934Inst::CvtFloatToUintSeq {935src,936dst,937tmp_gpr,938tmp_xmm,939tmp_xmm2,940..941} => {942collector.reg_use(src);943collector.reg_early_def(dst);944collector.reg_early_def(tmp_gpr);945collector.reg_early_def(tmp_xmm);946collector.reg_early_def(tmp_xmm2);947}948949Inst::XmmCmove {950consequent,951alternative,952dst,953..954} => {955collector.reg_use(alternative);956collector.reg_reuse_def(dst, 0);957collector.reg_use(consequent);958}959Inst::StackProbeLoop { tmp, .. } => {960collector.reg_early_def(tmp);961}962963Inst::CallKnown { info } => {964// Probestack is special and is only inserted after965// regalloc, so we do not need to represent its ABI to the966// register allocator. Assert that we don't alter that967// arrangement.968let CallInfo {969uses,970defs,971clobbers,972dest,973try_call_info,974..975} = &mut **info;976debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));977for CallArgPair { vreg, preg } in uses {978collector.reg_fixed_use(vreg, *preg);979}980for CallRetPair { vreg, location } in defs {981match location {982RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),983RetLocation::Stack(..) => collector.any_def(vreg),984}985}986collector.reg_clobbers(*clobbers);987if let Some(try_call_info) = try_call_info {988try_call_info.collect_operands(collector);989}990}991992Inst::CallUnknown { info } => {993let CallInfo {994uses,995defs,996clobbers,997callee_conv,998dest,999try_call_info,1000..1001} = &mut **info;1002match dest {1003RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {1004// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):1005// This shouldn't be a fixed register constraint. r10 is caller-saved, so this1006// should be safe to use.1007collector.reg_fixed_use(reg, regs::r10());1008}1009_ => dest.get_operands(collector),1010}1011for CallArgPair { vreg, preg } in uses {1012collector.reg_fixed_use(vreg, *preg);1013}1014for CallRetPair { vreg, location } in defs {1015match location {1016RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),1017RetLocation::Stack(..) => collector.any_def(vreg),1018}1019}1020collector.reg_clobbers(*clobbers);1021if let Some(try_call_info) = try_call_info {1022try_call_info.collect_operands(collector);1023}1024}1025Inst::StackSwitchBasic {1026store_context_ptr,1027load_context_ptr,1028in_payload0,1029out_payload0,1030} => {1031collector.reg_use(load_context_ptr);1032collector.reg_use(store_context_ptr);1033collector.reg_fixed_use(in_payload0, stack_switch::payload_register());1034collector.reg_fixed_def(out_payload0, stack_switch::payload_register());10351036let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;1037// The return/payload reg must not be included in the clobber set1038clobbers.remove(1039stack_switch::payload_register()1040.to_real_reg()1041.unwrap()1042.into(),1043);1044collector.reg_clobbers(clobbers);1045}10461047Inst::ReturnCallKnown { info } => {1048let ReturnCallInfo {1049dest, uses, tmp, ..1050} = &mut **info;1051collector.reg_fixed_def(tmp, regs::r11());1052// Same as in the `Inst::CallKnown` branch.1053debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));1054for CallArgPair { vreg, preg } in uses {1055collector.reg_fixed_use(vreg, *preg);1056}1057}10581059Inst::ReturnCallUnknown { info } => {1060let ReturnCallInfo {1061dest, uses, tmp, ..1062} = &mut **info;10631064// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):1065// This shouldn't be a fixed register constraint, but it's not clear how to1066// pick a register that won't be clobbered by the callee-save restore code1067// emitted with a return_call_indirect. r10 is caller-saved, so this should be1068// safe to use.1069collector.reg_fixed_use(dest, regs::r10());10701071collector.reg_fixed_def(tmp, regs::r11());1072for CallArgPair { vreg, preg } in uses {1073collector.reg_fixed_use(vreg, *preg);1074}1075}10761077Inst::JmpTableSeq {1078idx, tmp1, tmp2, ..1079} => {1080collector.reg_use(idx);1081collector.reg_early_def(tmp1);1082// In the sequence emitted for this pseudoinstruction in emit.rs,1083// tmp2 is only written after idx is read, so it doesn't need to be1084// an early def.1085collector.reg_def(tmp2);1086}10871088Inst::LoadExtName { dst, .. } => {1089collector.reg_def(dst);1090}10911092Inst::AtomicRmwSeq {1093operand,1094temp,1095dst_old,1096mem,1097..1098} => {1099collector.reg_late_use(operand);1100collector.reg_early_def(temp);1101// This `fixed_def` is needed because `CMPXCHG` always uses this1102// register implicitly.1103collector.reg_fixed_def(dst_old, regs::rax());1104mem.get_operands_late(collector)1105}11061107Inst::Atomic128RmwSeq {1108operand_low,1109operand_high,1110temp_low,1111temp_high,1112dst_old_low,1113dst_old_high,1114mem,1115..1116} => {1117// All registers are collected in the `Late` position so that they don't overlap.1118collector.reg_late_use(operand_low);1119collector.reg_late_use(operand_high);1120collector.reg_fixed_def(temp_low, regs::rbx());1121collector.reg_fixed_def(temp_high, regs::rcx());1122collector.reg_fixed_def(dst_old_low, regs::rax());1123collector.reg_fixed_def(dst_old_high, regs::rdx());1124mem.get_operands_late(collector)1125}11261127Inst::Atomic128XchgSeq {1128operand_low,1129operand_high,1130dst_old_low,1131dst_old_high,1132mem,1133..1134} => {1135// All registers are collected in the `Late` position so that they don't overlap.1136collector.reg_fixed_late_use(operand_low, regs::rbx());1137collector.reg_fixed_late_use(operand_high, regs::rcx());1138collector.reg_fixed_def(dst_old_low, regs::rax());1139collector.reg_fixed_def(dst_old_high, regs::rdx());1140mem.get_operands_late(collector)1141}11421143Inst::Args { args } => {1144for ArgPair { vreg, preg } in args {1145collector.reg_fixed_def(vreg, *preg);1146}1147}11481149Inst::Rets { rets } => {1150// The return value(s) are live-out; we represent this1151// with register uses on the return instruction.1152for RetPair { vreg, preg } in rets {1153collector.reg_fixed_use(vreg, *preg);1154}1155}11561157Inst::JmpKnown { .. }1158| Inst::WinchJmpIf { .. }1159| Inst::JmpCond { .. }1160| Inst::JmpCondOr { .. }1161| Inst::TrapIf { .. }1162| Inst::TrapIfAnd { .. }1163| Inst::TrapIfOr { .. } => {1164// No registers are used.1165}11661167Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {1168collector.reg_fixed_def(dst, regs::rax());1169// All caller-saves are clobbered.1170//1171// We use the SysV calling convention here because the1172// pseudoinstruction (and relocation that it emits) is specific to1173// ELF systems; other x86-64 targets with other conventions (i.e.,1174// Windows) use different TLS strategies.1175let mut clobbers =1176X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);1177clobbers.remove(regs::gpr_preg(asm::gpr::enc::RAX));1178collector.reg_clobbers(clobbers);1179}11801181Inst::CoffTlsGetAddr { dst, tmp, .. } => {1182// We also use the gs register. But that register is not allocatable by the1183// register allocator, so we don't need to mark it as used here.11841185// We use %rax to set the address1186collector.reg_fixed_def(dst, regs::rax());11871188// We use %rcx as a temporary variable to load the _tls_index1189collector.reg_fixed_def(tmp, regs::rcx());1190}11911192Inst::Unwind { .. } => {}11931194Inst::DummyUse { reg } => {1195collector.reg_use(reg);1196}11971198Inst::LabelAddress { dst, .. } => {1199collector.reg_def(dst);1200}12011202Inst::SequencePoint { .. } => {}12031204Inst::External { inst } => {1205inst.visit(&mut external::RegallocVisitor { collector });1206}1207}1208}12091210//=============================================================================1211// Instructions: misc functions and external interface12121213impl MachInst for Inst {1214type ABIMachineSpec = X64ABIMachineSpec;12151216fn get_operands(&mut self, collector: &mut impl OperandVisitor) {1217x64_get_operands(self, collector)1218}12191220fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {1221use asm::inst::Inst as I;1222match self {1223// Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes1224// out the upper 32 bits of the destination. For example, we could1225// conceivably use `movl %reg, %reg` to zero out the top 32 bits of1226// %reg.1227Self::External {1228inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),1229} => match rm64 {1230asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),1231asm::GprMem::Mem(_) => None,1232},1233Self::External {1234inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),1235} => match rm64 {1236asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),1237asm::GprMem::Mem(_) => None,1238},12391240// Note that `movss_a_r` and `movsd_a_r` are specifically omitted1241// here because they only overwrite the low bits in the destination1242// register, otherwise preserving the upper bits. That can be used1243// for lane-insertion instructions, for example, meaning it's not1244// classified as a register move.1245//1246// Otherwise though all register-to-register movement instructions1247// which move 128-bits are registered as moves.1248Self::External {1249inst:1250I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })1251| I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })1252| I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })1253| I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })1254| I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })1255| I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),1256} => match xmm_m128 {1257asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),1258asm::XmmMem::Mem(_) => None,1259},1260// In addition to the "A" format of instructions above also1261// recognize the "B" format which while it can be used for stores it1262// can also be used for register moves.1263Self::External {1264inst:1265I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })1266| I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })1267| I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })1268| I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })1269| I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })1270| I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),1271} => match xmm_m128 {1272asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),1273asm::XmmMem::Mem(_) => None,1274},1275_ => None,1276}1277}12781279fn is_included_in_clobbers(&self) -> bool {1280match self {1281&Inst::Args { .. } => false,1282_ => true,1283}1284}12851286fn is_trap(&self) -> bool {1287match self {1288Self::External {1289inst: asm::inst::Inst::ud2_zo(..),1290} => true,1291_ => false,1292}1293}12941295fn is_args(&self) -> bool {1296match self {1297Self::Args { .. } => true,1298_ => false,1299}1300}13011302fn call_type(&self) -> CallType {1303match self {1304Inst::CallKnown { .. }1305| Inst::CallUnknown { .. }1306| Inst::ElfTlsGetAddr { .. }1307| Inst::MachOTlsGetAddr { .. } => CallType::Regular,13081309Inst::ReturnCallKnown { .. } | Inst::ReturnCallUnknown { .. } => CallType::TailCall,13101311_ => CallType::None,1312}1313}13141315fn is_term(&self) -> MachTerminator {1316match self {1317// Interesting cases.1318&Self::Rets { .. } => MachTerminator::Ret,1319&Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {1320MachTerminator::RetCall1321}1322&Self::JmpKnown { .. } => MachTerminator::Branch,1323&Self::JmpCond { .. } => MachTerminator::Branch,1324&Self::JmpCondOr { .. } => MachTerminator::Branch,1325&Self::JmpTableSeq { .. } => MachTerminator::Branch,1326&Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,1327&Self::CallUnknown { ref info } if info.try_call_info.is_some() => {1328MachTerminator::Branch1329}1330// All other cases are boring.1331_ => MachTerminator::None,1332}1333}13341335fn is_low_level_branch(&self) -> bool {1336match self {1337&Self::WinchJmpIf { .. } => true,1338_ => false,1339}1340}13411342fn is_mem_access(&self) -> bool {1343panic!("TODO FILL ME OUT")1344}13451346fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {1347trace!(1348"Inst::gen_move {:?} -> {:?} (type: {:?})",1349src_reg,1350dst_reg.to_reg(),1351ty1352);1353let rc_dst = dst_reg.to_reg().class();1354let rc_src = src_reg.class();1355// If this isn't true, we have gone way off the rails.1356debug_assert!(rc_dst == rc_src);1357let inst = match rc_dst {1358RegClass::Int => {1359asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))1360.into()1361}1362RegClass::Float => {1363// The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",1364// doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for1365// those, which may write more lanes that we need, but are specified to have1366// zero-latency.1367let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());1368let src_reg = Xmm::new(src_reg).unwrap();1369match ty {1370types::F16 | types::F32 | types::F64 | types::F32X4 => {1371asm::inst::movaps_a::new(dst_reg, src_reg).into()1372}1373types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),1374_ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {1375asm::inst::movdqa_a::new(dst_reg, src_reg).into()1376}1377_ => unimplemented!("unable to move type: {}", ty),1378}1379}1380RegClass::Vector => unreachable!(),1381};1382Inst::External { inst }1383}13841385fn gen_nop(preferred_size: usize) -> Inst {1386Inst::nop(core::cmp::min(preferred_size, 9) as u8)1387}13881389fn gen_nop_units() -> Vec<Vec<u8>> {1390vec![1391// Standard 1-byte NOP.1392vec![0x90],1393// 5-byte NOP useful for patching out patchable calls.1394vec![0x0f, 0x1f, 0x44, 0x00, 0x00],1395]1396}13971398fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {1399match ty {1400types::I8 => Ok((&[RegClass::Int], &[types::I8])),1401types::I16 => Ok((&[RegClass::Int], &[types::I16])),1402types::I32 => Ok((&[RegClass::Int], &[types::I32])),1403types::I64 => Ok((&[RegClass::Int], &[types::I64])),1404types::F16 => Ok((&[RegClass::Float], &[types::F16])),1405types::F32 => Ok((&[RegClass::Float], &[types::F32])),1406types::F64 => Ok((&[RegClass::Float], &[types::F64])),1407types::F128 => Ok((&[RegClass::Float], &[types::F128])),1408types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),1409_ if ty.is_vector() && ty.bits() <= 128 => {1410let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];1411Ok((1412&[RegClass::Float],1413slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),1414))1415}1416_ => Err(CodegenError::Unsupported(format!(1417"Unexpected SSA-value type: {ty}"1418))),1419}1420}14211422fn canonical_type_for_rc(rc: RegClass) -> Type {1423match rc {1424RegClass::Float => types::I8X16,1425RegClass::Int => types::I64,1426RegClass::Vector => unreachable!(),1427}1428}14291430fn gen_jump(label: MachLabel) -> Inst {1431Inst::jmp_known(label)1432}14331434fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {1435Some(Inst::imm(OperandSize::Size64, value, dst))1436}14371438fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {1439let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);1440let gpr_to_xmm = Inst::External {1441inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),1442};1443smallvec![imm_to_gpr, gpr_to_xmm]1444}14451446fn gen_dummy_use(reg: Reg) -> Self {1447Inst::DummyUse { reg }1448}14491450fn worst_case_size() -> CodeOffset {1451151452}14531454fn ref_type_regclass(_: &settings::Flags) -> RegClass {1455RegClass::Int1456}14571458fn is_safepoint(&self) -> bool {1459match self {1460Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,1461_ => false,1462}1463}14641465fn function_alignment() -> FunctionAlignment {1466FunctionAlignment {1467minimum: 1,1468// Change the alignment from 16-bytes to 32-bytes for better performance.1469// fix-8573: https://github.com/bytecodealliance/wasmtime/issues/85731470preferred: 32,1471}1472}14731474type LabelUse = LabelUse;14751476const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];1477}14781479/// Constant state used during emissions of a sequence of instructions.1480pub struct EmitInfo {1481pub(super) flags: settings::Flags,1482isa_flags: x64_settings::Flags,1483}14841485impl EmitInfo {1486/// Create a constant state for emission of instructions.1487pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {1488Self { flags, isa_flags }1489}1490}14911492impl asm::AvailableFeatures for &EmitInfo {1493fn _64b(&self) -> bool {1494// Currently, this x64 backend always assumes 64-bit mode.1495true1496}14971498fn compat(&self) -> bool {1499// For 32-bit compatibility mode, see1500// https://github.com/bytecodealliance/wasmtime/issues/1980 (TODO).1501false1502}15031504fn sse(&self) -> bool {1505// Currently, this x64 backend always assumes SSE.1506true1507}15081509fn sse2(&self) -> bool {1510// Currently, this x64 backend always assumes SSE2.1511true1512}15131514fn sse3(&self) -> bool {1515self.isa_flags.has_sse3()1516}15171518fn ssse3(&self) -> bool {1519self.isa_flags.has_ssse3()1520}15211522fn sse41(&self) -> bool {1523self.isa_flags.has_sse41()1524}15251526fn sse42(&self) -> bool {1527self.isa_flags.has_sse42()1528}15291530fn bmi1(&self) -> bool {1531self.isa_flags.has_bmi1()1532}15331534fn bmi2(&self) -> bool {1535self.isa_flags.has_bmi2()1536}15371538fn lzcnt(&self) -> bool {1539self.isa_flags.has_lzcnt()1540}15411542fn popcnt(&self) -> bool {1543self.isa_flags.has_popcnt()1544}15451546fn avx(&self) -> bool {1547self.isa_flags.has_avx()1548}15491550fn avx2(&self) -> bool {1551self.isa_flags.has_avx2()1552}15531554fn avx512f(&self) -> bool {1555self.isa_flags.has_avx512f()1556}15571558fn avx512vl(&self) -> bool {1559self.isa_flags.has_avx512vl()1560}15611562fn cmpxchg16b(&self) -> bool {1563self.isa_flags.has_cmpxchg16b()1564}15651566fn fma(&self) -> bool {1567self.isa_flags.has_fma()1568}15691570fn avx512dq(&self) -> bool {1571self.isa_flags.has_avx512dq()1572}15731574fn avx512bitalg(&self) -> bool {1575self.isa_flags.has_avx512bitalg()1576}15771578fn avx512vbmi(&self) -> bool {1579self.isa_flags.has_avx512vbmi()1580}1581}15821583impl MachInstEmit for Inst {1584type State = EmitState;1585type Info = EmitInfo;15861587fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {1588emit::emit(self, sink, info, state);1589}15901591fn pretty_print_inst(&self, _: &mut Self::State) -> String {1592PrettyPrint::pretty_print(self, 0)1593}1594}15951596/// A label-use (internal relocation) in generated code.1597#[derive(Clone, Copy, Debug, PartialEq, Eq)]1598pub enum LabelUse {1599/// A 32-bit offset from location of relocation itself, added to the existing value at that1600/// location. Used for control flow instructions which consider an offset from the start of the1601/// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).1602JmpRel32,16031604/// A 32-bit offset from location of relocation itself, added to the existing value at that1605/// location.1606PCRel32,1607}16081609impl MachInstLabelUse for LabelUse {1610const ALIGN: CodeOffset = 1;16111612fn max_pos_range(self) -> CodeOffset {1613match self {1614LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,1615}1616}16171618fn max_neg_range(self) -> CodeOffset {1619match self {1620LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,1621}1622}16231624fn patch_size(self) -> CodeOffset {1625match self {1626LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,1627}1628}16291630fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {1631let pc_rel = (label_offset as i64) - (use_offset as i64);1632debug_assert!(pc_rel <= self.max_pos_range() as i64);1633debug_assert!(pc_rel >= -(self.max_neg_range() as i64));1634let pc_rel = pc_rel as u32;1635match self {1636LabelUse::JmpRel32 => {1637let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);1638let value = pc_rel.wrapping_add(addend).wrapping_sub(4);1639buffer.copy_from_slice(&value.to_le_bytes()[..]);1640}1641LabelUse::PCRel32 => {1642let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);1643let value = pc_rel.wrapping_add(addend);1644buffer.copy_from_slice(&value.to_le_bytes()[..]);1645}1646}1647}16481649fn supports_veneer(self) -> bool {1650match self {1651LabelUse::JmpRel32 | LabelUse::PCRel32 => false,1652}1653}16541655fn veneer_size(self) -> CodeOffset {1656match self {1657LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,1658}1659}16601661fn worst_case_veneer_size() -> CodeOffset {166201663}16641665fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {1666match self {1667LabelUse::JmpRel32 | LabelUse::PCRel32 => {1668panic!("Veneer not supported for JumpRel32 label-use.");1669}1670}1671}16721673fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {1674match (reloc, addend) {1675(Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),1676_ => None,1677}1678}1679}168016811682