Path: blob/main/cranelift/codegen/src/isa/x64/inst/mod.rs
1693 views
//! This module defines x86_64-specific machine instruction types.12pub use emit_state::EmitState;34use crate::binemit::{Addend, CodeOffset, Reloc};5use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};6use crate::isa::x64::abi::X64ABIMachineSpec;7use crate::isa::x64::inst::regs::pretty_print_reg;8use crate::isa::x64::settings as x64_settings;9use crate::isa::{CallConv, FunctionAlignment};10use crate::{CodegenError, CodegenResult, settings};11use crate::{machinst::*, trace};12use alloc::boxed::Box;13use core::slice;14use cranelift_assembler_x64 as asm;15use smallvec::{SmallVec, smallvec};16use std::fmt::{self, Write};17use std::string::{String, ToString};1819pub mod args;20mod emit;21mod emit_state;22#[cfg(test)]23mod emit_tests;24pub mod external;25pub mod regs;26mod stack_switch;27pub mod unwind;2829use args::*;3031//=============================================================================32// Instructions (top level): definition3334// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.35pub use super::lower::isle::generated_code::AtomicRmwSeqOp;36pub use super::lower::isle::generated_code::MInst as Inst;3738/// Out-of-line data for return-calls, to keep the size of `Inst` down.39#[derive(Clone, Debug)]40pub struct ReturnCallInfo<T> {41/// Where this call is going.42pub dest: T,4344/// The size of the argument area for this return-call, potentially smaller than that of the45/// caller, but never larger.46pub new_stack_arg_size: u32,4748/// The in-register arguments and their constraints.49pub uses: CallArgList,5051/// A temporary for use when moving the return address.52pub tmp: WritableGpr,53}5455#[test]56#[cfg(target_pointer_width = "64")]57fn inst_size_test() {58// This test will help with unintentionally growing the size59// of the Inst enum.60assert_eq!(48, std::mem::size_of::<Inst>());61}6263impl Inst {64/// Check if the instruction (or pseudo-instruction) can be emitted given65/// the current target architecture given by `emit_info`. For non-assembler66/// instructions, this assumes a baseline feature set (i.e., 64-bit AND SSE267/// and below).68fn is_available(&self, emit_info: &EmitInfo) -> bool {69use asm::AvailableFeatures;7071match self {72// These instructions are part of SSE2, which is a basic requirement73// in Cranelift, and don't have to be checked.74Inst::AtomicRmwSeq { .. }75| Inst::CallKnown { .. }76| Inst::CallUnknown { .. }77| Inst::ReturnCallKnown { .. }78| Inst::ReturnCallUnknown { .. }79| Inst::CheckedSRemSeq { .. }80| Inst::CheckedSRemSeq8 { .. }81| Inst::CvtFloatToSintSeq { .. }82| Inst::CvtFloatToUintSeq { .. }83| Inst::CvtUint64ToFloatSeq { .. }84| Inst::JmpCond { .. }85| Inst::JmpCondOr { .. }86| Inst::WinchJmpIf { .. }87| Inst::JmpKnown { .. }88| Inst::JmpTableSeq { .. }89| Inst::LoadExtName { .. }90| Inst::MovFromPReg { .. }91| Inst::MovToPReg { .. }92| Inst::StackProbeLoop { .. }93| Inst::Args { .. }94| Inst::Rets { .. }95| Inst::StackSwitchBasic { .. }96| Inst::TrapIf { .. }97| Inst::TrapIfAnd { .. }98| Inst::TrapIfOr { .. }99| Inst::XmmCmove { .. }100| Inst::XmmMinMaxSeq { .. }101| Inst::XmmUninitializedValue { .. }102| Inst::GprUninitializedValue { .. }103| Inst::ElfTlsGetAddr { .. }104| Inst::MachOTlsGetAddr { .. }105| Inst::CoffTlsGetAddr { .. }106| Inst::Unwind { .. }107| Inst::DummyUse { .. }108| Inst::LabelAddress { .. } => true,109110Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => emit_info.cmpxchg16b(),111112Inst::External { inst } => inst.is_available(&emit_info),113}114}115}116117// Handy constructors for Insts.118119impl Inst {120pub(crate) fn nop(len: u8) -> Self {121assert!(len > 0 && len <= 9);122let inst = match len {1231 => asm::inst::nop_1b::new().into(),1242 => asm::inst::nop_2b::new().into(),1253 => asm::inst::nop_3b::new().into(),1264 => asm::inst::nop_4b::new().into(),1275 => asm::inst::nop_5b::new().into(),1286 => asm::inst::nop_6b::new().into(),1297 => asm::inst::nop_7b::new().into(),1308 => asm::inst::nop_8b::new().into(),1319 => asm::inst::nop_9b::new().into(),132_ => unreachable!("nop length must be between 1 and 9"),133};134Self::External { inst }135}136137pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {138let inst = if let Ok(simm8) = i8::try_from(simm32) {139asm::inst::addq_mi_sxb::new(dst, simm8).into()140} else {141asm::inst::addq_mi_sxl::new(dst, simm32).into()142};143Inst::External { inst }144}145146pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {147let inst = if let Ok(simm8) = i8::try_from(simm32) {148asm::inst::subq_mi_sxb::new(dst, simm8).into()149} else {150asm::inst::subq_mi_sxl::new(dst, simm32).into()151};152Inst::External { inst }153}154155/// Writes the `simm64` immedaite into `dst`.156///157/// Note that if `dst_size` is less than 64-bits then the upper bits of158/// `simm64` will be converted to zero.159pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {160debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));161debug_assert!(dst.to_reg().class() == RegClass::Int);162let dst = WritableGpr::from_writable_reg(dst).unwrap();163let inst = match dst_size {164OperandSize::Size64 => match u32::try_from(simm64) {165// If `simm64` is zero-extended use `movl` which zeros the166// upper bits.167Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),168_ => match i32::try_from(simm64.cast_signed()) {169// If `simm64` is sign-extended use `movq` which sign the170// upper bits.171Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),172// fall back to embedding the entire immediate.173_ => asm::inst::movabsq_oi::new(dst, simm64).into(),174},175},176// FIXME: the input to this function is a logical `simm64` stored177// as `u64`. That means that ideally what we would do here is cast178// the `simm64` to an `i64`, perform a `i32::try_from()`, then cast179// that back to `u32`. That would ensure that the immediate loses180// no meaning and has the same logical value. Currently though181// Cranelift relies on discarding the upper bits because literals182// like `0x8000_0000_u64` fail to convert to an `i32`. In theory183// the input to this function should change to `i64`. In the184// meantime this is documented as discarding the upper bits,185// although this is an old function so that's unlikely to help186// much.187_ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),188};189Inst::External { inst }190}191192pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {193src.assert_regclass_is(RegClass::Int);194debug_assert!(dst.to_reg().class() == RegClass::Int);195let src = match src {196RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),197RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),198};199let inst = match ext_mode {200ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),201ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),202ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),203ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),204ExtMode::LQ => {205// This instruction selection may seem strange but is correct in206// 64-bit mode: section 3.4.1.1 of the Intel manual says that207// "32-bit operands generate a 32-bit result, zero-extended to a208// 64-bit result in the destination general-purpose register."209// This is applicable beyond `mov` but we use this fact to210// zero-extend `src` into `dst`.211asm::inst::movl_rm::new(dst, src).into()212}213};214Inst::External { inst }215}216217pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {218src.assert_regclass_is(RegClass::Int);219debug_assert!(dst.to_reg().class() == RegClass::Int);220let src = match src {221RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),222RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),223};224let inst = match ext_mode {225ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),226ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),227ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),228ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),229ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),230};231Inst::External { inst }232}233234/// Compares `src1` against `src2`235pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {236let inst = match size {237OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.cast_unsigned()).into(),238OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),239OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),240OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),241};242Inst::External { inst }243}244245pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {246Inst::TrapIf { cc, trap_code }247}248249pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {250Inst::CallKnown { info }251}252253pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {254info.dest.assert_regclass_is(RegClass::Int);255Inst::CallUnknown { info }256}257258pub(crate) fn jmp_known(dst: MachLabel) -> Inst {259Inst::JmpKnown { dst }260}261262/// Choose which instruction to use for loading a register value from memory. For loads smaller263/// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],264/// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.265pub(crate) fn load(266ty: Type,267from_addr: impl Into<SyntheticAmode>,268to_reg: Writable<Reg>,269ext_kind: ExtKind,270) -> Inst {271let rc = to_reg.to_reg().class();272match rc {273RegClass::Int => {274let ext_mode = match ty.bytes() {2751 => Some(ExtMode::BQ),2762 => Some(ExtMode::WQ),2774 => Some(ExtMode::LQ),2788 => None,279_ => unreachable!("the type should never use a scalar load: {}", ty),280};281if let Some(ext_mode) = ext_mode {282// Values smaller than 64 bits must be extended in some way.283match ext_kind {284ExtKind::SignExtend => {285Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)286}287ExtKind::ZeroExtend => {288Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)289}290ExtKind::None => {291panic!("expected an extension kind for extension mode: {ext_mode:?}")292}293}294} else {295// 64-bit values can be moved directly.296let from_addr = asm::GprMem::from(from_addr.into());297Inst::External {298inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),299}300}301}302RegClass::Float => {303let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());304let from_addr = from_addr.into();305let inst = match ty {306types::F16 | types::I8X2 => {307panic!("loading a f16 or i8x2 requires multiple instructions")308}309_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {310asm::inst::movss_a_m::new(to_reg, from_addr).into()311}312_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {313asm::inst::movsd_a_m::new(to_reg, from_addr).into()314}315types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),316types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),317_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {318asm::inst::movdqu_a::new(to_reg, from_addr).into()319}320_ => unimplemented!("unable to load type: {}", ty),321};322Inst::External { inst }323}324RegClass::Vector => unreachable!(),325}326}327328/// Choose which instruction to use for storing a register value to memory.329pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {330let rc = from_reg.class();331let to_addr = to_addr.into();332let inst = match rc {333RegClass::Int => {334let from_reg = Gpr::unwrap_new(from_reg);335match ty {336types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),337types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),338types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),339types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),340_ => unreachable!(),341}342}343RegClass::Float => {344let from_reg = Xmm::new(from_reg).unwrap();345match ty {346types::F16 | types::I8X2 => {347panic!("storing a f16 or i8x2 requires multiple instructions")348}349_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {350asm::inst::movss_c_m::new(to_addr, from_reg).into()351}352_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {353asm::inst::movsd_c_m::new(to_addr, from_reg).into()354}355types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),356types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),357_ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {358asm::inst::movdqu_b::new(to_addr, from_reg).into()359}360_ => unimplemented!("unable to store type: {}", ty),361}362}363RegClass::Vector => unreachable!(),364};365Inst::External { inst }366}367}368369//=============================================================================370// Instructions: printing371372impl PrettyPrint for Inst {373fn pretty_print(&self, _size: u8) -> String {374fn ljustify(s: String) -> String {375let w = 7;376if s.len() >= w {377s378} else {379let need = usize::min(w, w - s.len());380s + &format!("{nil: <width$}", nil = "", width = need)381}382}383384fn ljustify2(s1: String, s2: String) -> String {385ljustify(s1 + &s2)386}387388match self {389Inst::CheckedSRemSeq {390size,391divisor,392dividend_lo,393dividend_hi,394dst_quotient,395dst_remainder,396} => {397let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());398let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());399let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());400let dst_quotient =401pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());402let dst_remainder =403pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());404format!(405"checked_srem_seq {dividend_lo}, {dividend_hi}, \406{divisor}, {dst_quotient}, {dst_remainder}",407)408}409410Inst::CheckedSRemSeq8 {411divisor,412dividend,413dst,414} => {415let divisor = pretty_print_reg(divisor.to_reg(), 1);416let dividend = pretty_print_reg(dividend.to_reg(), 1);417let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);418format!("checked_srem_seq {dividend}, {divisor}, {dst}")419}420421Inst::XmmMinMaxSeq {422lhs,423rhs,424dst,425is_min,426size,427} => {428let rhs = pretty_print_reg(rhs.to_reg(), 8);429let lhs = pretty_print_reg(lhs.to_reg(), 8);430let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);431let op = ljustify2(432if *is_min {433"xmm min seq ".to_string()434} else {435"xmm max seq ".to_string()436},437format!("f{}", size.to_bits()),438);439format!("{op} {lhs}, {rhs}, {dst}")440}441442Inst::XmmUninitializedValue { dst } => {443let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);444let op = ljustify("uninit".into());445format!("{op} {dst}")446}447448Inst::GprUninitializedValue { dst } => {449let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);450let op = ljustify("uninit".into());451format!("{op} {dst}")452}453454Inst::CvtUint64ToFloatSeq {455src,456dst,457dst_size,458tmp_gpr1,459tmp_gpr2,460..461} => {462let src = pretty_print_reg(src.to_reg(), 8);463let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());464let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);465let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);466let op = ljustify(format!(467"u64_to_{}_seq",468if *dst_size == OperandSize::Size64 {469"f64"470} else {471"f32"472}473));474format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")475}476477Inst::CvtFloatToSintSeq {478src,479dst,480src_size,481dst_size,482tmp_xmm,483tmp_gpr,484is_saturating,485} => {486let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());487let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());488let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);489let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);490let op = ljustify(format!(491"cvt_float{}_to_sint{}{}_seq",492src_size.to_bits(),493dst_size.to_bits(),494if *is_saturating { "_sat" } else { "" },495));496format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")497}498499Inst::CvtFloatToUintSeq {500src,501dst,502src_size,503dst_size,504tmp_gpr,505tmp_xmm,506tmp_xmm2,507is_saturating,508} => {509let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());510let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());511let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);512let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);513let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);514let op = ljustify(format!(515"cvt_float{}_to_uint{}{}_seq",516src_size.to_bits(),517dst_size.to_bits(),518if *is_saturating { "_sat" } else { "" },519));520format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")521}522523Inst::MovFromPReg { src, dst } => {524let src: Reg = (*src).into();525let src = pretty_print_reg(src, 8);526let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);527let op = ljustify("movq".to_string());528format!("{op} {src}, {dst}")529}530531Inst::MovToPReg { src, dst } => {532let src = pretty_print_reg(src.to_reg(), 8);533let dst: Reg = (*dst).into();534let dst = pretty_print_reg(dst, 8);535let op = ljustify("movq".to_string());536format!("{op} {src}, {dst}")537}538539Inst::XmmCmove {540ty,541cc,542consequent,543alternative,544dst,545..546} => {547let size = u8::try_from(ty.bytes()).unwrap();548let alternative = pretty_print_reg(alternative.to_reg(), size);549let dst = pretty_print_reg(dst.to_reg().to_reg(), size);550let consequent = pretty_print_reg(consequent.to_reg(), size);551let suffix = match *ty {552types::F64 => "sd",553types::F32 => "ss",554types::F16 => "ss",555types::F32X4 => "aps",556types::F64X2 => "apd",557_ => "dqa",558};559let cc = cc.invert();560format!(561"mov{suffix} {alternative}, {dst}; \562j{cc} $next; \563mov{suffix} {consequent}, {dst}; \564$next:"565)566}567568Inst::StackProbeLoop {569tmp,570frame_size,571guard_size,572} => {573let tmp = pretty_print_reg(tmp.to_reg(), 8);574let op = ljustify("stack_probe_loop".to_string());575format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")576}577578Inst::CallKnown { info } => {579let op = ljustify("call".to_string());580let try_call = info581.try_call_info582.as_ref()583.map(|tci| pretty_print_try_call(tci))584.unwrap_or_default();585format!("{op} {:?}{try_call}", info.dest)586}587588Inst::CallUnknown { info } => {589let dest = info.dest.pretty_print(8);590let op = ljustify("call".to_string());591let try_call = info592.try_call_info593.as_ref()594.map(|tci| pretty_print_try_call(tci))595.unwrap_or_default();596format!("{op} *{dest}{try_call}")597}598599Inst::ReturnCallKnown { info } => {600let ReturnCallInfo {601uses,602new_stack_arg_size,603tmp,604dest,605} = &**info;606let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);607let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");608for ret in uses {609let preg = pretty_print_reg(ret.preg, 8);610let vreg = pretty_print_reg(ret.vreg, 8);611write!(&mut s, " {vreg}={preg}").unwrap();612}613s614}615616Inst::ReturnCallUnknown { info } => {617let ReturnCallInfo {618uses,619new_stack_arg_size,620tmp,621dest,622} = &**info;623let callee = pretty_print_reg(*dest, 8);624let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);625let mut s =626format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");627for ret in uses {628let preg = pretty_print_reg(ret.preg, 8);629let vreg = pretty_print_reg(ret.vreg, 8);630write!(&mut s, " {vreg}={preg}").unwrap();631}632s633}634635Inst::Args { args } => {636let mut s = "args".to_string();637for arg in args {638let preg = pretty_print_reg(arg.preg, 8);639let def = pretty_print_reg(arg.vreg.to_reg(), 8);640write!(&mut s, " {def}={preg}").unwrap();641}642s643}644645Inst::Rets { rets } => {646let mut s = "rets".to_string();647for ret in rets {648let preg = pretty_print_reg(ret.preg, 8);649let vreg = pretty_print_reg(ret.vreg, 8);650write!(&mut s, " {vreg}={preg}").unwrap();651}652s653}654655Inst::StackSwitchBasic {656store_context_ptr,657load_context_ptr,658in_payload0,659out_payload0,660} => {661let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);662let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);663let in_payload0 = pretty_print_reg(**in_payload0, 8);664let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);665format!(666"{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"667)668}669670Inst::JmpKnown { dst } => {671let op = ljustify("jmp".to_string());672let dst = dst.to_string();673format!("{op} {dst}")674}675676Inst::WinchJmpIf { cc, taken } => {677let taken = taken.to_string();678let op = ljustify2("j".to_string(), cc.to_string());679format!("{op} {taken}")680}681682Inst::JmpCondOr {683cc1,684cc2,685taken,686not_taken,687} => {688let taken = taken.to_string();689let not_taken = not_taken.to_string();690let op = ljustify(format!("j{cc1},{cc2}"));691format!("{op} {taken}; j {not_taken}")692}693694Inst::JmpCond {695cc,696taken,697not_taken,698} => {699let taken = taken.to_string();700let not_taken = not_taken.to_string();701let op = ljustify2("j".to_string(), cc.to_string());702format!("{op} {taken}; j {not_taken}")703}704705Inst::JmpTableSeq {706idx, tmp1, tmp2, ..707} => {708let idx = pretty_print_reg(*idx, 8);709let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);710let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);711let op = ljustify("br_table".into());712format!("{op} {idx}, {tmp1}, {tmp2}")713}714715Inst::TrapIf { cc, trap_code, .. } => {716format!("j{cc} #trap={trap_code}")717}718719Inst::TrapIfAnd {720cc1,721cc2,722trap_code,723..724} => {725let cc1 = cc1.invert();726let cc2 = cc2.invert();727format!("trap_if_and {cc1}, {cc2}, {trap_code}")728}729730Inst::TrapIfOr {731cc1,732cc2,733trap_code,734..735} => {736let cc2 = cc2.invert();737format!("trap_if_or {cc1}, {cc2}, {trap_code}")738}739740Inst::LoadExtName {741dst, name, offset, ..742} => {743let dst = pretty_print_reg(*dst.to_reg(), 8);744let name = name.display(None);745let op = ljustify("load_ext_name".into());746format!("{op} {name}+{offset}, {dst}")747}748749Inst::AtomicRmwSeq { ty, op, .. } => {750let ty = ty.bits();751format!(752"atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"753)754}755756Inst::Atomic128RmwSeq {757op,758mem,759operand_low,760operand_high,761temp_low,762temp_high,763dst_old_low,764dst_old_high,765} => {766let operand_low = pretty_print_reg(**operand_low, 8);767let operand_high = pretty_print_reg(**operand_high, 8);768let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);769let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);770let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);771let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);772let mem = mem.pretty_print(16);773format!(774"atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"775)776}777778Inst::Atomic128XchgSeq {779mem,780operand_low,781operand_high,782dst_old_low,783dst_old_high,784} => {785let operand_low = pretty_print_reg(**operand_low, 8);786let operand_high = pretty_print_reg(**operand_high, 8);787let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);788let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);789let mem = mem.pretty_print(16);790format!(791"atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"792)793}794795Inst::ElfTlsGetAddr { symbol, dst } => {796let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);797format!("{dst} = elf_tls_get_addr {symbol:?}")798}799800Inst::MachOTlsGetAddr { symbol, dst } => {801let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);802format!("{dst} = macho_tls_get_addr {symbol:?}")803}804805Inst::CoffTlsGetAddr { symbol, dst, tmp } => {806let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);807let tmp = tmp.to_reg().to_reg();808809let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");810if tmp.is_virtual() {811let tmp = pretty_print_reg(tmp, 8);812write!(&mut s, ", {tmp}").unwrap();813};814815s816}817818Inst::Unwind { inst } => format!("unwind {inst:?}"),819820Inst::DummyUse { reg } => {821let reg = pretty_print_reg(*reg, 8);822format!("dummy_use {reg}")823}824825Inst::LabelAddress { dst, label } => {826let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);827format!("label_address {dst}, {label:?}")828}829830Inst::External { inst } => {831format!("{inst}")832}833}834}835}836837fn pretty_print_try_call(info: &TryCallInfo) -> String {838format!(839"; jmp {:?}; catch [{}]",840info.continuation,841info.pretty_print_dests()842)843}844845impl fmt::Debug for Inst {846fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {847write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))848}849}850851fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {852// Note: because we need to statically know the indices of each853// reg in the operands list in order to fetch its allocation854// later, we put the variable-operand-count bits (the RegMem,855// RegMemImm, etc args) last. regalloc2 doesn't care what order856// the operands come in; they can be freely reordered.857858// N.B.: we MUST keep the below in careful sync with (i) emission,859// in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`860// method above.861match inst {862Inst::CheckedSRemSeq {863divisor,864dividend_lo,865dividend_hi,866dst_quotient,867dst_remainder,868..869} => {870collector.reg_use(divisor);871collector.reg_fixed_use(dividend_lo, regs::rax());872collector.reg_fixed_use(dividend_hi, regs::rdx());873collector.reg_fixed_def(dst_quotient, regs::rax());874collector.reg_fixed_def(dst_remainder, regs::rdx());875}876Inst::CheckedSRemSeq8 {877divisor,878dividend,879dst,880..881} => {882collector.reg_use(divisor);883collector.reg_fixed_use(dividend, regs::rax());884collector.reg_fixed_def(dst, regs::rax());885}886Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),887Inst::GprUninitializedValue { dst } => collector.reg_def(dst),888Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {889collector.reg_use(rhs);890collector.reg_use(lhs);891collector.reg_reuse_def(dst, 0); // Reuse RHS.892}893Inst::MovFromPReg { dst, src } => {894debug_assert!(dst.to_reg().to_reg().is_virtual());895collector.reg_fixed_nonallocatable(*src);896collector.reg_def(dst);897}898Inst::MovToPReg { dst, src } => {899debug_assert!(src.to_reg().is_virtual());900collector.reg_use(src);901collector.reg_fixed_nonallocatable(*dst);902}903Inst::CvtUint64ToFloatSeq {904src,905dst,906tmp_gpr1,907tmp_gpr2,908..909} => {910collector.reg_use(src);911collector.reg_early_def(dst);912collector.reg_early_def(tmp_gpr1);913collector.reg_early_def(tmp_gpr2);914}915Inst::CvtFloatToSintSeq {916src,917dst,918tmp_xmm,919tmp_gpr,920..921} => {922collector.reg_use(src);923collector.reg_early_def(dst);924collector.reg_early_def(tmp_gpr);925collector.reg_early_def(tmp_xmm);926}927Inst::CvtFloatToUintSeq {928src,929dst,930tmp_gpr,931tmp_xmm,932tmp_xmm2,933..934} => {935collector.reg_use(src);936collector.reg_early_def(dst);937collector.reg_early_def(tmp_gpr);938collector.reg_early_def(tmp_xmm);939collector.reg_early_def(tmp_xmm2);940}941942Inst::XmmCmove {943consequent,944alternative,945dst,946..947} => {948collector.reg_use(alternative);949collector.reg_reuse_def(dst, 0);950collector.reg_use(consequent);951}952Inst::StackProbeLoop { tmp, .. } => {953collector.reg_early_def(tmp);954}955956Inst::CallKnown { info } => {957// Probestack is special and is only inserted after958// regalloc, so we do not need to represent its ABI to the959// register allocator. Assert that we don't alter that960// arrangement.961let CallInfo {962uses,963defs,964clobbers,965dest,966try_call_info,967..968} = &mut **info;969debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));970for CallArgPair { vreg, preg } in uses {971collector.reg_fixed_use(vreg, *preg);972}973for CallRetPair { vreg, location } in defs {974match location {975RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),976RetLocation::Stack(..) => collector.any_def(vreg),977}978}979collector.reg_clobbers(*clobbers);980if let Some(try_call_info) = try_call_info {981try_call_info.collect_operands(collector);982}983}984985Inst::CallUnknown { info } => {986let CallInfo {987uses,988defs,989clobbers,990callee_conv,991dest,992try_call_info,993..994} = &mut **info;995match dest {996RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {997// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):998// This shouldn't be a fixed register constraint. r10 is caller-saved, so this999// should be safe to use.1000collector.reg_fixed_use(reg, regs::r10());1001}1002_ => dest.get_operands(collector),1003}1004for CallArgPair { vreg, preg } in uses {1005collector.reg_fixed_use(vreg, *preg);1006}1007for CallRetPair { vreg, location } in defs {1008match location {1009RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),1010RetLocation::Stack(..) => collector.any_def(vreg),1011}1012}1013collector.reg_clobbers(*clobbers);1014if let Some(try_call_info) = try_call_info {1015try_call_info.collect_operands(collector);1016}1017}1018Inst::StackSwitchBasic {1019store_context_ptr,1020load_context_ptr,1021in_payload0,1022out_payload0,1023} => {1024collector.reg_use(load_context_ptr);1025collector.reg_use(store_context_ptr);1026collector.reg_fixed_use(in_payload0, stack_switch::payload_register());1027collector.reg_fixed_def(out_payload0, stack_switch::payload_register());10281029let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;1030// The return/payload reg must not be included in the clobber set1031clobbers.remove(1032stack_switch::payload_register()1033.to_real_reg()1034.unwrap()1035.into(),1036);1037collector.reg_clobbers(clobbers);1038}10391040Inst::ReturnCallKnown { info } => {1041let ReturnCallInfo {1042dest, uses, tmp, ..1043} = &mut **info;1044collector.reg_fixed_def(tmp, regs::r11());1045// Same as in the `Inst::CallKnown` branch.1046debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));1047for CallArgPair { vreg, preg } in uses {1048collector.reg_fixed_use(vreg, *preg);1049}1050}10511052Inst::ReturnCallUnknown { info } => {1053let ReturnCallInfo {1054dest, uses, tmp, ..1055} = &mut **info;10561057// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):1058// This shouldn't be a fixed register constraint, but it's not clear how to1059// pick a register that won't be clobbered by the callee-save restore code1060// emitted with a return_call_indirect. r10 is caller-saved, so this should be1061// safe to use.1062collector.reg_fixed_use(dest, regs::r10());10631064collector.reg_fixed_def(tmp, regs::r11());1065for CallArgPair { vreg, preg } in uses {1066collector.reg_fixed_use(vreg, *preg);1067}1068}10691070Inst::JmpTableSeq {1071idx, tmp1, tmp2, ..1072} => {1073collector.reg_use(idx);1074collector.reg_early_def(tmp1);1075// In the sequence emitted for this pseudoinstruction in emit.rs,1076// tmp2 is only written after idx is read, so it doesn't need to be1077// an early def.1078collector.reg_def(tmp2);1079}10801081Inst::LoadExtName { dst, .. } => {1082collector.reg_def(dst);1083}10841085Inst::AtomicRmwSeq {1086operand,1087temp,1088dst_old,1089mem,1090..1091} => {1092collector.reg_late_use(operand);1093collector.reg_early_def(temp);1094// This `fixed_def` is needed because `CMPXCHG` always uses this1095// register implicitly.1096collector.reg_fixed_def(dst_old, regs::rax());1097mem.get_operands_late(collector)1098}10991100Inst::Atomic128RmwSeq {1101operand_low,1102operand_high,1103temp_low,1104temp_high,1105dst_old_low,1106dst_old_high,1107mem,1108..1109} => {1110// All registers are collected in the `Late` position so that they don't overlap.1111collector.reg_late_use(operand_low);1112collector.reg_late_use(operand_high);1113collector.reg_fixed_def(temp_low, regs::rbx());1114collector.reg_fixed_def(temp_high, regs::rcx());1115collector.reg_fixed_def(dst_old_low, regs::rax());1116collector.reg_fixed_def(dst_old_high, regs::rdx());1117mem.get_operands_late(collector)1118}11191120Inst::Atomic128XchgSeq {1121operand_low,1122operand_high,1123dst_old_low,1124dst_old_high,1125mem,1126..1127} => {1128// All registers are collected in the `Late` position so that they don't overlap.1129collector.reg_fixed_late_use(operand_low, regs::rbx());1130collector.reg_fixed_late_use(operand_high, regs::rcx());1131collector.reg_fixed_def(dst_old_low, regs::rax());1132collector.reg_fixed_def(dst_old_high, regs::rdx());1133mem.get_operands_late(collector)1134}11351136Inst::Args { args } => {1137for ArgPair { vreg, preg } in args {1138collector.reg_fixed_def(vreg, *preg);1139}1140}11411142Inst::Rets { rets } => {1143// The return value(s) are live-out; we represent this1144// with register uses on the return instruction.1145for RetPair { vreg, preg } in rets {1146collector.reg_fixed_use(vreg, *preg);1147}1148}11491150Inst::JmpKnown { .. }1151| Inst::WinchJmpIf { .. }1152| Inst::JmpCond { .. }1153| Inst::JmpCondOr { .. }1154| Inst::TrapIf { .. }1155| Inst::TrapIfAnd { .. }1156| Inst::TrapIfOr { .. } => {1157// No registers are used.1158}11591160Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {1161collector.reg_fixed_def(dst, regs::rax());1162// All caller-saves are clobbered.1163//1164// We use the SysV calling convention here because the1165// pseudoinstruction (and relocation that it emits) is specific to1166// ELF systems; other x86-64 targets with other conventions (i.e.,1167// Windows) use different TLS strategies.1168let mut clobbers =1169X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);1170clobbers.remove(regs::gpr_preg(asm::gpr::enc::RAX));1171collector.reg_clobbers(clobbers);1172}11731174Inst::CoffTlsGetAddr { dst, tmp, .. } => {1175// We also use the gs register. But that register is not allocatable by the1176// register allocator, so we don't need to mark it as used here.11771178// We use %rax to set the address1179collector.reg_fixed_def(dst, regs::rax());11801181// We use %rcx as a temporary variable to load the _tls_index1182collector.reg_fixed_def(tmp, regs::rcx());1183}11841185Inst::Unwind { .. } => {}11861187Inst::DummyUse { reg } => {1188collector.reg_use(reg);1189}11901191Inst::LabelAddress { dst, .. } => {1192collector.reg_def(dst);1193}11941195Inst::External { inst } => {1196inst.visit(&mut external::RegallocVisitor { collector });1197}1198}1199}12001201//=============================================================================1202// Instructions: misc functions and external interface12031204impl MachInst for Inst {1205type ABIMachineSpec = X64ABIMachineSpec;12061207fn get_operands(&mut self, collector: &mut impl OperandVisitor) {1208x64_get_operands(self, collector)1209}12101211fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {1212use asm::inst::Inst as I;1213match self {1214// Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes1215// out the upper 32 bits of the destination. For example, we could1216// conceivably use `movl %reg, %reg` to zero out the top 32 bits of1217// %reg.1218Self::External {1219inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),1220} => match rm64 {1221asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),1222asm::GprMem::Mem(_) => None,1223},1224Self::External {1225inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),1226} => match rm64 {1227asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),1228asm::GprMem::Mem(_) => None,1229},12301231// Note that `movss_a_r` and `movsd_a_r` are specifically omitted1232// here because they only overwrite the low bits in the destination1233// register, otherwise preserving the upper bits. That can be used1234// for lane-insertion instructions, for example, meaning it's not1235// classified as a register move.1236//1237// Otherwise though all register-to-register movement instructions1238// which move 128-bits are registered as moves.1239Self::External {1240inst:1241I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })1242| I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })1243| I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })1244| I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })1245| I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })1246| I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),1247} => match xmm_m128 {1248asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),1249asm::XmmMem::Mem(_) => None,1250},1251// In addition to the "A" format of instructions above also1252// recognize the "B" format which while it can be used for stores it1253// can also be used for register moves.1254Self::External {1255inst:1256I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })1257| I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })1258| I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })1259| I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })1260| I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })1261| I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),1262} => match xmm_m128 {1263asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),1264asm::XmmMem::Mem(_) => None,1265},1266_ => None,1267}1268}12691270fn is_included_in_clobbers(&self) -> bool {1271match self {1272&Inst::Args { .. } => false,1273_ => true,1274}1275}12761277fn is_trap(&self) -> bool {1278match self {1279Self::External {1280inst: asm::inst::Inst::ud2_zo(..),1281} => true,1282_ => false,1283}1284}12851286fn is_args(&self) -> bool {1287match self {1288Self::Args { .. } => true,1289_ => false,1290}1291}12921293fn call_type(&self) -> CallType {1294match self {1295Inst::CallKnown { .. }1296| Inst::CallUnknown { .. }1297| Inst::ElfTlsGetAddr { .. }1298| Inst::MachOTlsGetAddr { .. } => CallType::Regular,12991300Inst::ReturnCallKnown { .. } | Inst::ReturnCallUnknown { .. } => CallType::TailCall,13011302_ => CallType::None,1303}1304}13051306fn is_term(&self) -> MachTerminator {1307match self {1308// Interesting cases.1309&Self::Rets { .. } => MachTerminator::Ret,1310&Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {1311MachTerminator::RetCall1312}1313&Self::JmpKnown { .. } => MachTerminator::Branch,1314&Self::JmpCond { .. } => MachTerminator::Branch,1315&Self::JmpCondOr { .. } => MachTerminator::Branch,1316&Self::JmpTableSeq { .. } => MachTerminator::Branch,1317&Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,1318&Self::CallUnknown { ref info } if info.try_call_info.is_some() => {1319MachTerminator::Branch1320}1321// All other cases are boring.1322_ => MachTerminator::None,1323}1324}13251326fn is_low_level_branch(&self) -> bool {1327match self {1328&Self::WinchJmpIf { .. } => true,1329_ => false,1330}1331}13321333fn is_mem_access(&self) -> bool {1334panic!("TODO FILL ME OUT")1335}13361337fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {1338trace!(1339"Inst::gen_move {:?} -> {:?} (type: {:?})",1340src_reg,1341dst_reg.to_reg(),1342ty1343);1344let rc_dst = dst_reg.to_reg().class();1345let rc_src = src_reg.class();1346// If this isn't true, we have gone way off the rails.1347debug_assert!(rc_dst == rc_src);1348let inst = match rc_dst {1349RegClass::Int => {1350asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))1351.into()1352}1353RegClass::Float => {1354// The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",1355// doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for1356// those, which may write more lanes that we need, but are specified to have1357// zero-latency.1358let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());1359let src_reg = Xmm::new(src_reg).unwrap();1360match ty {1361types::F16 | types::F32 | types::F64 | types::F32X4 => {1362asm::inst::movaps_a::new(dst_reg, src_reg).into()1363}1364types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),1365_ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {1366asm::inst::movdqa_a::new(dst_reg, src_reg).into()1367}1368_ => unimplemented!("unable to move type: {}", ty),1369}1370}1371RegClass::Vector => unreachable!(),1372};1373Inst::External { inst }1374}13751376fn gen_nop(preferred_size: usize) -> Inst {1377Inst::nop(std::cmp::min(preferred_size, 9) as u8)1378}13791380fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {1381match ty {1382types::I8 => Ok((&[RegClass::Int], &[types::I8])),1383types::I16 => Ok((&[RegClass::Int], &[types::I16])),1384types::I32 => Ok((&[RegClass::Int], &[types::I32])),1385types::I64 => Ok((&[RegClass::Int], &[types::I64])),1386types::F16 => Ok((&[RegClass::Float], &[types::F16])),1387types::F32 => Ok((&[RegClass::Float], &[types::F32])),1388types::F64 => Ok((&[RegClass::Float], &[types::F64])),1389types::F128 => Ok((&[RegClass::Float], &[types::F128])),1390types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),1391_ if ty.is_vector() && ty.bits() <= 128 => {1392let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];1393Ok((1394&[RegClass::Float],1395slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),1396))1397}1398_ => Err(CodegenError::Unsupported(format!(1399"Unexpected SSA-value type: {ty}"1400))),1401}1402}14031404fn canonical_type_for_rc(rc: RegClass) -> Type {1405match rc {1406RegClass::Float => types::I8X16,1407RegClass::Int => types::I64,1408RegClass::Vector => unreachable!(),1409}1410}14111412fn gen_jump(label: MachLabel) -> Inst {1413Inst::jmp_known(label)1414}14151416fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {1417Some(Inst::imm(OperandSize::Size64, value, dst))1418}14191420fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {1421let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);1422let gpr_to_xmm = Inst::External {1423inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),1424};1425smallvec![imm_to_gpr, gpr_to_xmm]1426}14271428fn gen_dummy_use(reg: Reg) -> Self {1429Inst::DummyUse { reg }1430}14311432fn worst_case_size() -> CodeOffset {1433151434}14351436fn ref_type_regclass(_: &settings::Flags) -> RegClass {1437RegClass::Int1438}14391440fn is_safepoint(&self) -> bool {1441match self {1442Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,1443_ => false,1444}1445}14461447fn function_alignment() -> FunctionAlignment {1448FunctionAlignment {1449minimum: 1,1450// Change the alignment from 16-bytes to 32-bytes for better performance.1451// fix-8573: https://github.com/bytecodealliance/wasmtime/issues/85731452preferred: 32,1453}1454}14551456type LabelUse = LabelUse;14571458const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];1459}14601461/// Constant state used during emissions of a sequence of instructions.1462pub struct EmitInfo {1463pub(super) flags: settings::Flags,1464isa_flags: x64_settings::Flags,1465}14661467impl EmitInfo {1468/// Create a constant state for emission of instructions.1469pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {1470Self { flags, isa_flags }1471}1472}14731474impl asm::AvailableFeatures for &EmitInfo {1475fn _64b(&self) -> bool {1476// Currently, this x64 backend always assumes 64-bit mode.1477true1478}14791480fn compat(&self) -> bool {1481// For 32-bit compatibility mode, see1482// https://github.com/bytecodealliance/wasmtime/issues/1980 (TODO).1483false1484}14851486fn sse(&self) -> bool {1487// Currently, this x64 backend always assumes SSE.1488true1489}14901491fn sse2(&self) -> bool {1492// Currently, this x64 backend always assumes SSE2.1493true1494}14951496fn sse3(&self) -> bool {1497self.isa_flags.has_sse3()1498}14991500fn ssse3(&self) -> bool {1501self.isa_flags.has_ssse3()1502}15031504fn sse41(&self) -> bool {1505self.isa_flags.has_sse41()1506}15071508fn sse42(&self) -> bool {1509self.isa_flags.has_sse42()1510}15111512fn bmi1(&self) -> bool {1513self.isa_flags.has_bmi1()1514}15151516fn bmi2(&self) -> bool {1517self.isa_flags.has_bmi2()1518}15191520fn lzcnt(&self) -> bool {1521self.isa_flags.has_lzcnt()1522}15231524fn popcnt(&self) -> bool {1525self.isa_flags.has_popcnt()1526}15271528fn avx(&self) -> bool {1529self.isa_flags.has_avx()1530}15311532fn avx2(&self) -> bool {1533self.isa_flags.has_avx2()1534}15351536fn avx512f(&self) -> bool {1537self.isa_flags.has_avx512f()1538}15391540fn avx512vl(&self) -> bool {1541self.isa_flags.has_avx512vl()1542}15431544fn cmpxchg16b(&self) -> bool {1545self.isa_flags.has_cmpxchg16b()1546}15471548fn fma(&self) -> bool {1549self.isa_flags.has_fma()1550}15511552fn avx512dq(&self) -> bool {1553self.isa_flags.has_avx512dq()1554}15551556fn avx512bitalg(&self) -> bool {1557self.isa_flags.has_avx512bitalg()1558}15591560fn avx512vbmi(&self) -> bool {1561self.isa_flags.has_avx512vbmi()1562}1563}15641565impl MachInstEmit for Inst {1566type State = EmitState;1567type Info = EmitInfo;15681569fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {1570emit::emit(self, sink, info, state);1571}15721573fn pretty_print_inst(&self, _: &mut Self::State) -> String {1574PrettyPrint::pretty_print(self, 0)1575}1576}15771578/// A label-use (internal relocation) in generated code.1579#[derive(Clone, Copy, Debug, PartialEq, Eq)]1580pub enum LabelUse {1581/// A 32-bit offset from location of relocation itself, added to the existing value at that1582/// location. Used for control flow instructions which consider an offset from the start of the1583/// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).1584JmpRel32,15851586/// A 32-bit offset from location of relocation itself, added to the existing value at that1587/// location.1588PCRel32,1589}15901591impl MachInstLabelUse for LabelUse {1592const ALIGN: CodeOffset = 1;15931594fn max_pos_range(self) -> CodeOffset {1595match self {1596LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,1597}1598}15991600fn max_neg_range(self) -> CodeOffset {1601match self {1602LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,1603}1604}16051606fn patch_size(self) -> CodeOffset {1607match self {1608LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,1609}1610}16111612fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {1613let pc_rel = (label_offset as i64) - (use_offset as i64);1614debug_assert!(pc_rel <= self.max_pos_range() as i64);1615debug_assert!(pc_rel >= -(self.max_neg_range() as i64));1616let pc_rel = pc_rel as u32;1617match self {1618LabelUse::JmpRel32 => {1619let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);1620let value = pc_rel.wrapping_add(addend).wrapping_sub(4);1621buffer.copy_from_slice(&value.to_le_bytes()[..]);1622}1623LabelUse::PCRel32 => {1624let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);1625let value = pc_rel.wrapping_add(addend);1626buffer.copy_from_slice(&value.to_le_bytes()[..]);1627}1628}1629}16301631fn supports_veneer(self) -> bool {1632match self {1633LabelUse::JmpRel32 | LabelUse::PCRel32 => false,1634}1635}16361637fn veneer_size(self) -> CodeOffset {1638match self {1639LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,1640}1641}16421643fn worst_case_veneer_size() -> CodeOffset {164401645}16461647fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {1648match self {1649LabelUse::JmpRel32 | LabelUse::PCRel32 => {1650panic!("Veneer not supported for JumpRel32 label-use.");1651}1652}1653}16541655fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {1656match (reloc, addend) {1657(Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),1658_ => None,1659}1660}1661}166216631664