Path: blob/main/cranelift/codegen/src/isa/x64/lower.rs
1693 views
//! Lowering rules for X64.12// ISLE integration glue.3pub(super) mod isle;45use crate::ir::pcc::{FactContext, PccResult};6use crate::ir::{7Endianness, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type, types,8};9use crate::isa::x64::abi::*;10use crate::isa::x64::inst::args::*;11use crate::isa::x64::inst::*;12use crate::isa::x64::pcc;13use crate::isa::{CallConv, x64::X64Backend};14use crate::machinst::lower::*;15use crate::machinst::*;16use crate::result::CodegenResult;17use crate::settings::Flags;18use std::boxed::Box;19use target_lexicon::Triple;2021/// Identifier for a particular input of an instruction.22#[derive(Clone, Copy, Debug, PartialEq, Eq)]23struct InsnInput {24insn: IRInst,25input: usize,26}2728//=============================================================================29// Helpers for instruction lowering.3031impl Lower<'_, Inst> {32#[inline]33pub fn temp_writable_gpr(&mut self) -> WritableGpr {34WritableGpr::from_writable_reg(self.alloc_tmp(types::I64).only_reg().unwrap()).unwrap()35}3637#[inline]38pub fn temp_writable_xmm(&mut self) -> WritableXmm {39WritableXmm::from_writable_reg(self.alloc_tmp(types::F64).only_reg().unwrap()).unwrap()40}41}4243fn is_int_or_ref_ty(ty: Type) -> bool {44match ty {45types::I8 | types::I16 | types::I32 | types::I64 => true,46_ => false,47}48}4950/// Returns whether the given specified `input` is a result produced by an instruction with Opcode51/// `op`.52// TODO investigate failures with checking against the result index.53fn matches_input(ctx: &mut Lower<Inst>, input: InsnInput, op: Opcode) -> Option<IRInst> {54let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);55inputs.inst.as_inst().and_then(|(src_inst, _)| {56let data = ctx.data(src_inst);57if data.opcode() == op {58return Some(src_inst);59}60None61})62}6364/// Put the given input into possibly multiple registers, and mark it as used (side-effect).65fn put_input_in_regs(ctx: &mut Lower<Inst>, spec: InsnInput) -> ValueRegs<Reg> {66let ty = ctx.input_ty(spec.insn, spec.input);67let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);6869if let Some(c) = input.constant {70// Generate constants fresh at each use to minimize long-range register pressure.71let size = if ty_bits(ty) < 64 {72OperandSize::Size3273} else {74OperandSize::Size6475};76assert!(is_int_or_ref_ty(ty)); // Only used for addresses.77let cst_copy = ctx.alloc_tmp(ty);78ctx.emit(Inst::imm(size, c, cst_copy.only_reg().unwrap()));79non_writable_value_regs(cst_copy)80} else {81ctx.put_input_in_regs(spec.insn, spec.input)82}83}8485/// Put the given input into a register, and mark it as used (side-effect).86fn put_input_in_reg(ctx: &mut Lower<Inst>, spec: InsnInput) -> Reg {87put_input_in_regs(ctx, spec)88.only_reg()89.expect("Multi-register value not expected")90}9192enum MergeableLoadSize {93/// The load size performed by a sinkable load merging operation is94/// precisely the size necessary for the type in question.95Exact,9697/// Narrower-than-32-bit values are handled by ALU insts that are at least98/// 32 bits wide, which is normally OK as we ignore upper buts; but, if we99/// generate, e.g., a direct-from-memory 32-bit add for a byte value and100/// the byte is the last byte in a page, the extra data that we load is101/// incorrectly accessed. So we only allow loads to merge for102/// 32-bit-and-above widths.103Min32,104}105106/// Determines whether a load operation (indicated by `src_insn`) can be merged107/// into the current lowering point. If so, returns the address-base source (as108/// an `InsnInput`) and an offset from that address from which to perform the109/// load.110fn is_mergeable_load(111ctx: &mut Lower<Inst>,112src_insn: IRInst,113size: MergeableLoadSize,114) -> Option<(InsnInput, i32)> {115let insn_data = ctx.data(src_insn);116let inputs = ctx.num_inputs(src_insn);117if inputs != 1 {118return None;119}120121// If this type is too small to get a merged load, don't merge the load.122let load_ty = ctx.output_ty(src_insn, 0);123if ty_bits(load_ty) < 32 {124match size {125MergeableLoadSize::Exact => {}126MergeableLoadSize::Min32 => return None,127}128}129130// If the load's flags specify big-endian, we can't merge.131if let Some(flags) = ctx.memflags(src_insn) {132if flags.explicit_endianness() == Some(Endianness::Big) {133return None;134}135}136137// Just testing the opcode is enough, because the width will always match if138// the type does (and the type should match if the CLIF is properly139// constructed).140if let &InstructionData::Load {141opcode: Opcode::Load,142offset,143..144} = insn_data145{146Some((147InsnInput {148insn: src_insn,149input: 0,150},151offset.into(),152))153} else {154None155}156}157158fn input_to_imm(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<u64> {159ctx.get_input_as_source_or_const(spec.insn, spec.input)160.constant161}162163fn emit_vm_call(164ctx: &mut Lower<Inst>,165flags: &Flags,166triple: &Triple,167libcall: LibCall,168inputs: &[ValueRegs<Reg>],169) -> CodegenResult<InstOutput> {170let extname = ExternalName::LibCall(libcall);171172// TODO avoid recreating signatures for every single Libcall function.173let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));174let sig = libcall.signature(call_conv, types::I64);175let outputs = ctx.gen_call_output(&sig);176177if !ctx.sigs().have_abi_sig_for_signature(&sig) {178ctx.sigs_mut()179.make_abi_sig_from_ir_signature::<X64ABIMachineSpec>(sig.clone(), flags)?;180}181let sig = ctx.sigs().abi_sig_for_signature(&sig);182183let uses = ctx.gen_call_args(sig, inputs);184let defs = ctx.gen_call_rets(sig, &outputs);185186let stack_ret_space = ctx.sigs()[sig].sized_stack_ret_space();187let stack_arg_space = ctx.sigs()[sig].sized_stack_arg_space();188ctx.abi_mut()189.accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);190191if flags.use_colocated_libcalls() {192let call_info = ctx.gen_call_info(sig, extname, uses, defs, None);193ctx.emit(Inst::call_known(Box::new(call_info)));194} else {195let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();196ctx.emit(Inst::LoadExtName {197dst: tmp.map(Gpr::unwrap_new),198name: Box::new(extname),199offset: 0,200distance: RelocDistance::Far,201});202let call_info = ctx.gen_call_info(sig, RegMem::reg(tmp.to_reg()), uses, defs, None);203ctx.emit(Inst::call_unknown(Box::new(call_info)));204}205Ok(outputs)206}207208/// Returns whether the given input is a shift by a constant value less or equal than 3.209/// The goal is to embed it within an address mode.210fn matches_small_constant_shift(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<(InsnInput, u8)> {211matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| {212match input_to_imm(213ctx,214InsnInput {215insn: shift,216input: 1,217},218) {219Some(shift_amt) if shift_amt <= 3 => Some((220InsnInput {221insn: shift,222input: 0,223},224shift_amt as u8,225)),226_ => None,227}228})229}230231/// Lowers an instruction to one of the x86 addressing modes.232///233/// Note: the 32-bit offset in Cranelift has to be sign-extended, which maps x86's behavior.234fn lower_to_amode(ctx: &mut Lower<Inst>, spec: InsnInput, offset: i32) -> Amode {235let flags = ctx236.memflags(spec.insn)237.expect("Instruction with amode should have memflags");238239// We now either have an add that we must materialize, or some other input; as well as the240// final offset.241if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {242let output_ty = ctx.output_ty(add, 0);243debug_assert_eq!(244output_ty,245types::I64,246"Address width of 64 expected, got {output_ty}"247);248let add_inputs = &[249InsnInput {250insn: add,251input: 0,252},253InsnInput {254insn: add,255input: 1,256},257];258259// TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations260// aren't happening in the wasm case. We could do better, given some range analysis.261let (base, index, shift) = if let Some((shift_input, shift_amt)) =262matches_small_constant_shift(ctx, add_inputs[0])263{264(265put_input_in_reg(ctx, add_inputs[1]),266put_input_in_reg(ctx, shift_input),267shift_amt,268)269} else if let Some((shift_input, shift_amt)) =270matches_small_constant_shift(ctx, add_inputs[1])271{272(273put_input_in_reg(ctx, add_inputs[0]),274put_input_in_reg(ctx, shift_input),275shift_amt,276)277} else {278for input in 0..=1 {279// Try to pierce through uextend.280let (inst, inst_input) = if let Some(uextend) =281matches_input(ctx, InsnInput { insn: add, input }, Opcode::Uextend)282{283(uextend, 0)284} else {285(add, input)286};287288// If it's a constant, add it directly!289if let Some(cst) = ctx.get_input_as_source_or_const(inst, inst_input).constant {290let final_offset = (offset as i64).wrapping_add(cst as i64);291if let Ok(final_offset) = i32::try_from(final_offset) {292let base = put_input_in_reg(ctx, add_inputs[1 - input]);293return Amode::imm_reg(final_offset, base).with_flags(flags);294}295}296}297298(299put_input_in_reg(ctx, add_inputs[0]),300put_input_in_reg(ctx, add_inputs[1]),3010,302)303};304305return Amode::imm_reg_reg_shift(306offset,307Gpr::unwrap_new(base),308Gpr::unwrap_new(index),309shift,310)311.with_flags(flags);312}313314let input = put_input_in_reg(ctx, spec);315Amode::imm_reg(offset, input).with_flags(flags)316}317318//=============================================================================319// Lowering-backend trait implementation.320321impl LowerBackend for X64Backend {322type MInst = Inst;323324fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> Option<InstOutput> {325isle::lower(ctx, self, ir_inst)326}327328fn lower_branch(329&self,330ctx: &mut Lower<Inst>,331ir_inst: IRInst,332targets: &[MachLabel],333) -> Option<()> {334isle::lower_branch(ctx, self, ir_inst, targets)335}336337fn maybe_pinned_reg(&self) -> Option<Reg> {338Some(regs::pinned_reg())339}340341fn check_fact(342&self,343ctx: &FactContext<'_>,344vcode: &mut VCode<Self::MInst>,345inst: InsnIndex,346state: &mut pcc::FactFlowState,347) -> PccResult<()> {348pcc::check(ctx, vcode, inst, state)349}350351type FactFlowState = pcc::FactFlowState;352}353354355