Path: blob/main/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
3103 views
//! Pulley binary code emission.12use super::*;3use crate::ir::{self, Endianness};4use crate::isa;5use crate::isa::pulley_shared::PointerWidth;6use crate::isa::pulley_shared::abi::PulleyMachineDeps;7use core::marker::PhantomData;8use cranelift_control::ControlPlane;9use pulley_interpreter::encode as enc;10use pulley_interpreter::regs::BinaryOperands;1112pub struct EmitInfo {13call_conv: isa::CallConv,14shared_flags: settings::Flags,15isa_flags: crate::isa::pulley_shared::settings::Flags,16}1718impl EmitInfo {19pub(crate) fn new(20call_conv: isa::CallConv,21shared_flags: settings::Flags,22isa_flags: crate::isa::pulley_shared::settings::Flags,23) -> Self {24Self {25call_conv,26shared_flags,27isa_flags,28}29}3031fn endianness(&self, flags: MemFlags) -> Endianness {32flags.endianness(self.isa_flags.endianness())33}34}3536/// State carried between emissions of a sequence of instructions.37#[derive(Default, Clone, Debug)]38pub struct EmitState<P>39where40P: PulleyTargetKind,41{42_phantom: PhantomData<P>,43ctrl_plane: ControlPlane,44user_stack_map: Option<ir::UserStackMap>,45frame_layout: FrameLayout,46}4748impl<P> EmitState<P>49where50P: PulleyTargetKind,51{52fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {53self.user_stack_map.take()54}55}5657impl<P> MachInstEmitState<InstAndKind<P>> for EmitState<P>58where59P: PulleyTargetKind,60{61fn new(abi: &Callee<PulleyMachineDeps<P>>, ctrl_plane: ControlPlane) -> Self {62EmitState {63_phantom: PhantomData,64ctrl_plane,65user_stack_map: None,66frame_layout: abi.frame_layout().clone(),67}68}6970fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {71self.user_stack_map = user_stack_map;72}7374fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {75&mut self.ctrl_plane76}7778fn take_ctrl_plane(self) -> ControlPlane {79self.ctrl_plane80}8182fn frame_layout(&self) -> &FrameLayout {83&self.frame_layout84}85}8687impl<P> MachInstEmit for InstAndKind<P>88where89P: PulleyTargetKind,90{91type State = EmitState<P>;92type Info = EmitInfo;9394fn emit(&self, sink: &mut MachBuffer<Self>, emit_info: &Self::Info, state: &mut Self::State) {95// N.B.: we *must* not exceed the "worst-case size" used to compute96// where to insert islands, except when islands are explicitly triggered97// (with an `EmitIsland`). We check this in debug builds. This is `mut`98// to allow disabling the check for `JTSequence`, which is always99// emitted following an `EmitIsland`.100let mut start = sink.cur_offset();101pulley_emit(self, sink, emit_info, state, &mut start);102103let end = sink.cur_offset();104assert!(105(end - start) <= InstAndKind::<P>::worst_case_size(),106"encoded inst {self:?} longer than worst-case size: length: {}, Inst::worst_case_size() = {}",107end - start,108InstAndKind::<P>::worst_case_size()109);110}111112fn pretty_print_inst(&self, state: &mut Self::State) -> String {113self.print_with_state(state)114}115}116117fn pulley_emit<P>(118inst: &Inst,119sink: &mut MachBuffer<InstAndKind<P>>,120emit_info: &EmitInfo,121state: &mut EmitState<P>,122start_offset: &mut u32,123) where124P: PulleyTargetKind,125{126match inst {127// Pseduo-instructions that don't actually encode to anything.128Inst::Args { .. } | Inst::Rets { .. } | Inst::DummyUse { .. } => {}129130Inst::TrapIf { cond, code } => {131let trap = sink.defer_trap(*code);132let not_trap = sink.get_label();133134<InstAndKind<P>>::from(Inst::BrIf {135cond: cond.clone(),136taken: trap,137not_taken: not_trap,138})139.emit(sink, emit_info, state);140sink.bind_label(not_trap, &mut state.ctrl_plane);141}142143Inst::Nop => todo!(),144145Inst::GetSpecial { dst, reg } => enc::xmov(sink, dst, reg),146147Inst::LoadExtNameNear { dst, name, offset } => {148patch_pc_rel_offset(sink, |sink| enc::xpcadd(sink, dst, 0));149let end = sink.cur_offset();150sink.add_reloc_at_offset(end - 4, Reloc::PulleyPcRel, &**name, *offset);151}152153Inst::LoadExtNameFar { dst, name, offset } => {154let size = match P::pointer_width() {155PointerWidth::PointerWidth32 => {156enc::xconst32(sink, dst, 0);1574158}159PointerWidth::PointerWidth64 => {160enc::xconst64(sink, dst, 0);1618162}163};164let end = sink.cur_offset();165sink.add_reloc_at_offset(end - size, Reloc::Abs8, &**name, *offset);166}167168Inst::Call { info } => {169let start = sink.cur_offset();170171// If arguments happen to already be in the right register for the172// ABI then remove them from this list. Otherwise emit the173// appropriate `Call` instruction depending on how many arguments we174// have that aren't already in their correct register according to175// ABI conventions.176let mut args = &info.dest.args[..];177while !args.is_empty() && args.last().copied() == XReg::new(x_reg(args.len() - 1)) {178args = &args[..args.len() - 1];179}180patch_pc_rel_offset(sink, |sink| match args {181[] => enc::call(sink, 0),182[x0] => enc::call1(sink, x0, 0),183[x0, x1] => enc::call2(sink, x0, x1, 0),184[x0, x1, x2] => enc::call3(sink, x0, x1, x2, 0),185[x0, x1, x2, x3] => enc::call4(sink, x0, x1, x2, x3, 0),186_ => unreachable!(),187});188let end = sink.cur_offset();189sink.add_reloc_at_offset(end - 4, Reloc::PulleyPcRel, &info.dest.name, 0);190if let Some(s) = state.take_stack_map() {191let offset = sink.cur_offset();192sink.push_user_stack_map(state, offset, s);193}194195if let Some(try_call) = info.try_call_info.as_ref() {196sink.add_try_call_site(197Some(state.frame_layout.sp_to_fp()),198try_call.exception_handlers(&state.frame_layout),199);200} else {201sink.add_call_site();202}203204if info.patchable {205sink.add_patchable_call_site(sink.cur_offset() - start);206} else {207let adjust = -i32::try_from(info.callee_pop_size).unwrap();208for i in PulleyMachineDeps::<P>::gen_sp_reg_adjust(adjust) {209i.emit(sink, emit_info, state);210}211212// Load any stack-carried return values.213info.emit_retval_loads::<PulleyMachineDeps<P>, _, _>(214state.frame_layout().stackslots_size,215|inst| inst.emit(sink, emit_info, state),216|space_needed| Some(<InstAndKind<P>>::from(Inst::EmitIsland { space_needed })),217);218}219220// If this is a try-call, jump to the continuation221// (normal-return) block.222if let Some(try_call) = info.try_call_info.as_ref() {223let jmp = InstAndKind::<P>::from(Inst::Jump {224label: try_call.continuation,225});226jmp.emit(sink, emit_info, state);227}228229// We produce an island above if needed, so disable230// the worst-case-size check in this case.231*start_offset = sink.cur_offset();232}233234Inst::IndirectCall { info } => {235enc::call_indirect(sink, info.dest);236237if let Some(s) = state.take_stack_map() {238let offset = sink.cur_offset();239sink.push_user_stack_map(state, offset, s);240}241242if let Some(try_call) = info.try_call_info.as_ref() {243sink.add_try_call_site(244Some(state.frame_layout.sp_to_fp()),245try_call.exception_handlers(&state.frame_layout),246);247} else {248sink.add_call_site();249}250251let adjust = -i32::try_from(info.callee_pop_size).unwrap();252for i in PulleyMachineDeps::<P>::gen_sp_reg_adjust(adjust) {253i.emit(sink, emit_info, state);254}255256// Load any stack-carried return values.257info.emit_retval_loads::<PulleyMachineDeps<P>, _, _>(258state.frame_layout().stackslots_size,259|inst| inst.emit(sink, emit_info, state),260|space_needed| Some(<InstAndKind<P>>::from(Inst::EmitIsland { space_needed })),261);262263// If this is a try-call, jump to the continuation264// (normal-return) block.265if let Some(try_call) = info.try_call_info.as_ref() {266let jmp = InstAndKind::<P>::from(Inst::Jump {267label: try_call.continuation,268});269jmp.emit(sink, emit_info, state);270}271272// We produce an island above if needed, so disable273// the worst-case-size check in this case.274*start_offset = sink.cur_offset();275}276277Inst::ReturnCall { info } => {278emit_return_call_common_sequence(sink, emit_info, state, &info);279280// Emit an unconditional jump which is quite similar to `Inst::Call`281// except that a `jump` opcode is used instead of a `call` opcode.282sink.put1(pulley_interpreter::Opcode::Jump as u8);283sink.add_reloc(Reloc::PulleyPcRel, &info.dest, 0);284sink.put4(1);285286// Islands were manually handled in287// `emit_return_call_common_sequence`.288*start_offset = sink.cur_offset();289}290291Inst::ReturnIndirectCall { info } => {292emit_return_call_common_sequence(sink, emit_info, state, &info);293enc::xjump(sink, info.dest);294295// Islands were manually handled in296// `emit_return_call_common_sequence`.297*start_offset = sink.cur_offset();298}299300Inst::IndirectCallHost { info } => {301// Emit a relocation to fill in the actual immediate argument here302// in `call_indirect_host`.303sink.add_reloc(Reloc::PulleyCallIndirectHost, &info.dest, 0);304enc::call_indirect_host(sink, 0_u8);305306if let Some(s) = state.take_stack_map() {307let offset = sink.cur_offset();308sink.push_user_stack_map(state, offset, s);309}310311if let Some(try_call) = info.try_call_info.as_ref() {312sink.add_try_call_site(313Some(state.frame_layout.sp_to_fp()),314try_call.exception_handlers(&state.frame_layout),315);316} else {317sink.add_call_site();318}319320// If a callee pop is happening here that means that something has321// messed up, these are expected to be "very simple" signatures.322assert!(info.callee_pop_size == 0);323}324325Inst::Jump { label } => {326sink.use_label_at_offset(*start_offset + 1, *label, LabelUse::PcRel);327sink.add_uncond_branch(*start_offset, *start_offset + 5, *label);328patch_pc_rel_offset(sink, |sink| enc::jump(sink, 0));329}330331Inst::BrIf {332cond,333taken,334not_taken,335} => {336// Encode the inverted form of the branch. Branches always have337// their trailing 4 bytes as the relative offset which is what we're338// going to target here within the `MachBuffer`.339let mut inverted = SmallVec::<[u8; 16]>::new();340cond.invert().encode(&mut inverted, 0);341let len = inverted.len() as u32;342inverted.clear();343cond.invert()344.encode(&mut inverted, i32::try_from(len - 4).unwrap());345assert!(len > 4);346347// Use the `taken` label 4 bytes before the end of the instruction348// we're about to emit as that's the base of `PcRelOffset`. Note349// that the `Jump` here factors in the offset from the start of the350// instruction to the start of the relative offset, hence `len - 4`351// as the factor to adjust by.352let taken_end = *start_offset + len;353sink.use_label_at_offset(taken_end - 4, *taken, LabelUse::PcRel);354sink.add_cond_branch(*start_offset, taken_end, *taken, &inverted);355patch_pc_rel_offset(sink, |sink| cond.encode(sink, 0));356debug_assert_eq!(sink.cur_offset(), taken_end);357358// For the not-taken branch use an unconditional jump to the359// relevant label, and we know that the jump instruction is 5 bytes360// long where the final 4 bytes are the offset to jump by.361let not_taken_start = taken_end + 1;362let not_taken_end = not_taken_start + 4;363sink.use_label_at_offset(not_taken_start, *not_taken, LabelUse::PcRel);364sink.add_uncond_branch(taken_end, not_taken_end, *not_taken);365patch_pc_rel_offset(sink, |sink| enc::jump(sink, 0));366assert_eq!(sink.cur_offset(), not_taken_end);367}368369Inst::LoadAddr { dst, mem } => {370let base = mem.get_base_register();371let offset = mem.get_offset_with_state(state);372373if let Some(base) = base {374if offset == 0 {375enc::xmov(sink, dst, base);376} else {377if let Ok(offset) = i8::try_from(offset) {378enc::xconst8(sink, dst, offset);379} else if let Ok(offset) = i16::try_from(offset) {380enc::xconst16(sink, dst, offset);381} else {382enc::xconst32(sink, dst, offset);383}384385match P::pointer_width() {386PointerWidth::PointerWidth32 => {387enc::xadd32(sink, BinaryOperands::new(dst, base, dst))388}389PointerWidth::PointerWidth64 => {390enc::xadd64(sink, BinaryOperands::new(dst, base, dst))391}392}393}394} else {395unreachable!("all pulley amodes have a base register right now")396}397}398399Inst::XLoad {400dst,401mem,402ty,403flags,404} => {405use Endianness as E;406assert!(flags.trap_code().is_none());407let addr = AddrO32::Base {408addr: mem.get_base_register().unwrap(),409offset: mem.get_offset_with_state(state),410};411let endian = emit_info.endianness(*flags);412match *ty {413I8 => enc::xload8_u32_o32(sink, dst, addr),414I16 => match endian {415E::Little => enc::xload16le_s32_o32(sink, dst, addr),416E::Big => enc::xload16be_s32_o32(sink, dst, addr),417},418I32 => match endian {419E::Little => enc::xload32le_o32(sink, dst, addr),420E::Big => enc::xload32be_o32(sink, dst, addr),421},422I64 => match endian {423E::Little => enc::xload64le_o32(sink, dst, addr),424E::Big => enc::xload64be_o32(sink, dst, addr),425},426_ => unimplemented!("xload ty={ty:?}"),427}428}429430Inst::FLoad {431dst,432mem,433ty,434flags,435} => {436use Endianness as E;437assert!(flags.trap_code().is_none());438let addr = AddrO32::Base {439addr: mem.get_base_register().unwrap(),440offset: mem.get_offset_with_state(state),441};442let endian = emit_info.endianness(*flags);443match *ty {444F32 => match endian {445E::Little => enc::fload32le_o32(sink, dst, addr),446E::Big => enc::fload32be_o32(sink, dst, addr),447},448F64 => match endian {449E::Little => enc::fload64le_o32(sink, dst, addr),450E::Big => enc::fload64be_o32(sink, dst, addr),451},452_ => unimplemented!("fload ty={ty:?}"),453}454}455456Inst::VLoad {457dst,458mem,459ty,460flags,461} => {462assert!(flags.trap_code().is_none());463let addr = AddrO32::Base {464addr: mem.get_base_register().unwrap(),465offset: mem.get_offset_with_state(state),466};467let endian = emit_info.endianness(*flags);468assert_eq!(endian, Endianness::Little);469assert_eq!(ty.bytes(), 16);470enc::vload128le_o32(sink, dst, addr);471}472473Inst::XStore {474mem,475src,476ty,477flags,478} => {479use Endianness as E;480assert!(flags.trap_code().is_none());481let addr = AddrO32::Base {482addr: mem.get_base_register().unwrap(),483offset: mem.get_offset_with_state(state),484};485let endian = emit_info.endianness(*flags);486match *ty {487I8 => enc::xstore8_o32(sink, addr, src),488I16 => match endian {489E::Little => enc::xstore16le_o32(sink, addr, src),490E::Big => enc::xstore16be_o32(sink, addr, src),491},492I32 => match endian {493E::Little => enc::xstore32le_o32(sink, addr, src),494E::Big => enc::xstore32be_o32(sink, addr, src),495},496I64 => match endian {497E::Little => enc::xstore64le_o32(sink, addr, src),498E::Big => enc::xstore64be_o32(sink, addr, src),499},500_ => unimplemented!("xstore ty={ty:?}"),501}502}503504Inst::FStore {505mem,506src,507ty,508flags,509} => {510use Endianness as E;511assert!(flags.trap_code().is_none());512let addr = AddrO32::Base {513addr: mem.get_base_register().unwrap(),514offset: mem.get_offset_with_state(state),515};516let endian = emit_info.endianness(*flags);517match *ty {518F32 => match endian {519E::Little => enc::fstore32le_o32(sink, addr, src),520E::Big => enc::fstore32be_o32(sink, addr, src),521},522F64 => match endian {523E::Little => enc::fstore64le_o32(sink, addr, src),524E::Big => enc::fstore64be_o32(sink, addr, src),525},526_ => unimplemented!("fstore ty={ty:?}"),527}528}529530Inst::VStore {531mem,532src,533ty,534flags,535} => {536assert!(flags.trap_code().is_none());537let addr = AddrO32::Base {538addr: mem.get_base_register().unwrap(),539offset: mem.get_offset_with_state(state),540};541let endian = emit_info.endianness(*flags);542assert_eq!(endian, Endianness::Little);543assert_eq!(ty.bytes(), 16);544enc::vstore128le_o32(sink, addr, src);545}546547Inst::BrTable {548idx,549default,550targets,551} => {552// Encode the `br_table32` instruction directly which expects the553// next `amt` 4-byte integers to all be relative offsets. Each554// offset is the pc-relative offset of the branch destination.555//556// Pulley clamps the branch targets to the `amt` specified so the557// final branch target is the default jump target.558//559// Note that this instruction may have many branch targets so it560// manually checks to see if an island is needed. If so we emit a561// jump around the island before the `br_table32` itself gets562// emitted.563let amt = u32::try_from(targets.len() + 1).expect("too many branch targets");564let br_table_size = amt * 4 + 6;565if sink.island_needed(br_table_size) {566let label = sink.get_label();567<InstAndKind<P>>::from(Inst::Jump { label }).emit(sink, emit_info, state);568sink.emit_island(br_table_size, &mut state.ctrl_plane);569sink.bind_label(label, &mut state.ctrl_plane);570}571enc::br_table32(sink, *idx, amt);572for target in targets.iter() {573let offset = sink.cur_offset();574sink.use_label_at_offset(offset, *target, LabelUse::PcRel);575sink.put4(0);576}577let offset = sink.cur_offset();578sink.use_label_at_offset(offset, *default, LabelUse::PcRel);579sink.put4(0);580581// We manually handled `emit_island` above when dealing with582// `island_needed` so update the starting offset to the current583// offset so this instruction doesn't accidentally trigger584// the assertion that we're always under worst-case-size.585*start_offset = sink.cur_offset();586}587588Inst::Raw { raw } => super::generated::emit(raw, sink),589590Inst::EmitIsland { space_needed } => {591if sink.island_needed(*space_needed) {592let label = sink.get_label();593<InstAndKind<P>>::from(Inst::Jump { label }).emit(sink, emit_info, state);594sink.emit_island(space_needed + 8, &mut state.ctrl_plane);595sink.bind_label(label, &mut state.ctrl_plane);596}597}598599Inst::LabelAddress { dst, label } => {600patch_pc_rel_offset(sink, |sink| enc::xpcadd(sink, dst, 0));601let end = sink.cur_offset();602sink.use_label_at_offset(end - 4, *label, LabelUse::PcRel);603}604605Inst::SequencePoint { .. } => {606// Nothing.607}608}609}610611fn emit_return_call_common_sequence<T, P>(612sink: &mut MachBuffer<InstAndKind<P>>,613emit_info: &EmitInfo,614state: &mut EmitState<P>,615info: &ReturnCallInfo<T>,616) where617P: PulleyTargetKind,618{619// The return call sequence can potentially emit a lot of instructions, so620// lets emit an island here if we need it.621//622// It is difficult to calculate exactly how many instructions are going to623// be emitted, so we calculate it by emitting it into a disposable buffer,624// and then checking how many instructions were actually emitted.625let mut buffer = MachBuffer::new();626let mut fake_emit_state = state.clone();627628return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);629630// Finalize the buffer and get the number of bytes emitted.631let buffer = buffer.finish(&Default::default(), &mut Default::default());632let length = buffer.data().len() as u32;633634// And now emit the island inline with this instruction.635if sink.island_needed(length) {636let jump_around_label = sink.get_label();637<InstAndKind<P>>::gen_jump(jump_around_label).emit(sink, emit_info, state);638sink.emit_island(length + 4, &mut state.ctrl_plane);639sink.bind_label(jump_around_label, &mut state.ctrl_plane);640}641642// Now that we're done, emit the *actual* return sequence.643return_call_emit_impl(sink, emit_info, state, info);644}645646/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].647fn return_call_emit_impl<T, P>(648sink: &mut MachBuffer<InstAndKind<P>>,649emit_info: &EmitInfo,650state: &mut EmitState<P>,651info: &ReturnCallInfo<T>,652) where653P: PulleyTargetKind,654{655let epilogue = <PulleyMachineDeps<P>>::gen_epilogue_frame_restore(656emit_info.call_conv,657&emit_info.shared_flags,658&emit_info.isa_flags,659&state.frame_layout,660);661662for inst in epilogue {663inst.emit(sink, emit_info, state);664}665666// Now that `sp` is restored to what it was on function entry it may need to667// be adjusted if the stack arguments of our own function differ from the668// stack arguments of the callee. Perform any necessary adjustment here.669//670// Note that this means that there's a brief window where stack arguments671// might be below `sp` in the case that the callee has more stack arguments672// than ourselves. That's in theory ok though as we're inventing the pulley673// ABI and nothing like async signals are happening that we have to worry674// about.675let incoming_args_diff =676i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);677678if incoming_args_diff != 0 {679let amt = i32::try_from(incoming_args_diff).unwrap();680for inst in PulleyMachineDeps::<P>::gen_sp_reg_adjust(amt) {681inst.emit(sink, emit_info, state);682}683}684}685686/// Invokes `f` with `sink` and assumes that a single instruction is emitted687/// which ends with a Pulley `PcRelOffset`.688///689/// The offset at that location is patched to include the size of the690/// instruction before the relative offset since relocations will be applied to691/// the address of the offset and added to the contents at the offset. The692/// Pulley interpreter, however, will calculate the offset from the start of the693/// instruction, so this extra offset is required.694fn patch_pc_rel_offset<P>(695sink: &mut MachBuffer<InstAndKind<P>>,696f: impl FnOnce(&mut MachBuffer<InstAndKind<P>>),697) where698P: PulleyTargetKind,699{700let patch = sink.start_patchable();701let start = sink.cur_offset();702f(sink);703let end = sink.cur_offset();704let region = sink.end_patchable(patch).patch(sink);705let chunk = region.last_chunk_mut::<4>().unwrap();706assert_eq!(*chunk, [0, 0, 0, 0]);707*chunk = (end - start - 4).to_le_bytes();708}709710711