Path: blob/main/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
1693 views
//! Pulley binary code emission.12use super::*;3use crate::ir::{self, Endianness};4use crate::isa;5use crate::isa::pulley_shared::PointerWidth;6use crate::isa::pulley_shared::abi::PulleyMachineDeps;7use core::marker::PhantomData;8use cranelift_control::ControlPlane;9use pulley_interpreter::encode as enc;10use pulley_interpreter::regs::BinaryOperands;1112pub struct EmitInfo {13call_conv: isa::CallConv,14shared_flags: settings::Flags,15isa_flags: crate::isa::pulley_shared::settings::Flags,16}1718impl EmitInfo {19pub(crate) fn new(20call_conv: isa::CallConv,21shared_flags: settings::Flags,22isa_flags: crate::isa::pulley_shared::settings::Flags,23) -> Self {24Self {25call_conv,26shared_flags,27isa_flags,28}29}3031fn endianness(&self, flags: MemFlags) -> Endianness {32flags.endianness(self.isa_flags.endianness())33}34}3536/// State carried between emissions of a sequence of instructions.37#[derive(Default, Clone, Debug)]38pub struct EmitState<P>39where40P: PulleyTargetKind,41{42_phantom: PhantomData<P>,43ctrl_plane: ControlPlane,44user_stack_map: Option<ir::UserStackMap>,45frame_layout: FrameLayout,46}4748impl<P> EmitState<P>49where50P: PulleyTargetKind,51{52fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {53self.user_stack_map.take()54}55}5657impl<P> MachInstEmitState<InstAndKind<P>> for EmitState<P>58where59P: PulleyTargetKind,60{61fn new(abi: &Callee<PulleyMachineDeps<P>>, ctrl_plane: ControlPlane) -> Self {62EmitState {63_phantom: PhantomData,64ctrl_plane,65user_stack_map: None,66frame_layout: abi.frame_layout().clone(),67}68}6970fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {71self.user_stack_map = user_stack_map;72}7374fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {75&mut self.ctrl_plane76}7778fn take_ctrl_plane(self) -> ControlPlane {79self.ctrl_plane80}8182fn frame_layout(&self) -> &FrameLayout {83&self.frame_layout84}85}8687impl<P> MachInstEmit for InstAndKind<P>88where89P: PulleyTargetKind,90{91type State = EmitState<P>;92type Info = EmitInfo;9394fn emit(&self, sink: &mut MachBuffer<Self>, emit_info: &Self::Info, state: &mut Self::State) {95// N.B.: we *must* not exceed the "worst-case size" used to compute96// where to insert islands, except when islands are explicitly triggered97// (with an `EmitIsland`). We check this in debug builds. This is `mut`98// to allow disabling the check for `JTSequence`, which is always99// emitted following an `EmitIsland`.100let mut start = sink.cur_offset();101pulley_emit(self, sink, emit_info, state, &mut start);102103let end = sink.cur_offset();104assert!(105(end - start) <= InstAndKind::<P>::worst_case_size(),106"encoded inst {self:?} longer than worst-case size: length: {}, Inst::worst_case_size() = {}",107end - start,108InstAndKind::<P>::worst_case_size()109);110}111112fn pretty_print_inst(&self, state: &mut Self::State) -> String {113self.print_with_state(state)114}115}116117fn pulley_emit<P>(118inst: &Inst,119sink: &mut MachBuffer<InstAndKind<P>>,120emit_info: &EmitInfo,121state: &mut EmitState<P>,122start_offset: &mut u32,123) where124P: PulleyTargetKind,125{126match inst {127// Pseduo-instructions that don't actually encode to anything.128Inst::Args { .. } | Inst::Rets { .. } | Inst::DummyUse { .. } => {}129130Inst::TrapIf { cond, code } => {131let trap = sink.defer_trap(*code);132let not_trap = sink.get_label();133134<InstAndKind<P>>::from(Inst::BrIf {135cond: cond.clone(),136taken: trap,137not_taken: not_trap,138})139.emit(sink, emit_info, state);140sink.bind_label(not_trap, &mut state.ctrl_plane);141}142143Inst::Nop => todo!(),144145Inst::GetSpecial { dst, reg } => enc::xmov(sink, dst, reg),146147Inst::LoadExtNameNear { dst, name, offset } => {148patch_pc_rel_offset(sink, |sink| enc::xpcadd(sink, dst, 0));149let end = sink.cur_offset();150sink.add_reloc_at_offset(end - 4, Reloc::PulleyPcRel, &**name, *offset);151}152153Inst::LoadExtNameFar { dst, name, offset } => {154let size = match P::pointer_width() {155PointerWidth::PointerWidth32 => {156enc::xconst32(sink, dst, 0);1574158}159PointerWidth::PointerWidth64 => {160enc::xconst64(sink, dst, 0);1618162}163};164let end = sink.cur_offset();165sink.add_reloc_at_offset(end - size, Reloc::Abs8, &**name, *offset);166}167168Inst::Call { info } => {169// If arguments happen to already be in the right register for the170// ABI then remove them from this list. Otherwise emit the171// appropriate `Call` instruction depending on how many arguments we172// have that aren't already in their correct register according to173// ABI conventions.174let mut args = &info.dest.args[..];175while !args.is_empty() && args.last().copied() == XReg::new(x_reg(args.len() - 1)) {176args = &args[..args.len() - 1];177}178patch_pc_rel_offset(sink, |sink| match args {179[] => enc::call(sink, 0),180[x0] => enc::call1(sink, x0, 0),181[x0, x1] => enc::call2(sink, x0, x1, 0),182[x0, x1, x2] => enc::call3(sink, x0, x1, x2, 0),183[x0, x1, x2, x3] => enc::call4(sink, x0, x1, x2, x3, 0),184_ => unreachable!(),185});186let end = sink.cur_offset();187sink.add_reloc_at_offset(end - 4, Reloc::PulleyPcRel, &info.dest.name, 0);188if let Some(s) = state.take_stack_map() {189let offset = sink.cur_offset();190sink.push_user_stack_map(state, offset, s);191}192193if let Some(try_call) = info.try_call_info.as_ref() {194sink.add_try_call_site(195Some(state.frame_layout.sp_to_fp()),196try_call.exception_handlers(&state.frame_layout),197);198} else {199sink.add_call_site();200}201202let adjust = -i32::try_from(info.callee_pop_size).unwrap();203for i in PulleyMachineDeps::<P>::gen_sp_reg_adjust(adjust) {204i.emit(sink, emit_info, state);205}206207// Load any stack-carried return values.208info.emit_retval_loads::<PulleyMachineDeps<P>, _, _>(209state.frame_layout().stackslots_size,210|inst| inst.emit(sink, emit_info, state),211|space_needed| Some(<InstAndKind<P>>::from(Inst::EmitIsland { space_needed })),212);213214// If this is a try-call, jump to the continuation215// (normal-return) block.216if let Some(try_call) = info.try_call_info.as_ref() {217let jmp = InstAndKind::<P>::from(Inst::Jump {218label: try_call.continuation,219});220jmp.emit(sink, emit_info, state);221}222223// We produce an island above if needed, so disable224// the worst-case-size check in this case.225*start_offset = sink.cur_offset();226}227228Inst::IndirectCall { info } => {229enc::call_indirect(sink, info.dest);230231if let Some(s) = state.take_stack_map() {232let offset = sink.cur_offset();233sink.push_user_stack_map(state, offset, s);234}235236if let Some(try_call) = info.try_call_info.as_ref() {237sink.add_try_call_site(238Some(state.frame_layout.sp_to_fp()),239try_call.exception_handlers(&state.frame_layout),240);241} else {242sink.add_call_site();243}244245let adjust = -i32::try_from(info.callee_pop_size).unwrap();246for i in PulleyMachineDeps::<P>::gen_sp_reg_adjust(adjust) {247i.emit(sink, emit_info, state);248}249250// Load any stack-carried return values.251info.emit_retval_loads::<PulleyMachineDeps<P>, _, _>(252state.frame_layout().stackslots_size,253|inst| inst.emit(sink, emit_info, state),254|space_needed| Some(<InstAndKind<P>>::from(Inst::EmitIsland { space_needed })),255);256257// If this is a try-call, jump to the continuation258// (normal-return) block.259if let Some(try_call) = info.try_call_info.as_ref() {260let jmp = InstAndKind::<P>::from(Inst::Jump {261label: try_call.continuation,262});263jmp.emit(sink, emit_info, state);264}265266// We produce an island above if needed, so disable267// the worst-case-size check in this case.268*start_offset = sink.cur_offset();269}270271Inst::ReturnCall { info } => {272emit_return_call_common_sequence(sink, emit_info, state, &info);273274// Emit an unconditional jump which is quite similar to `Inst::Call`275// except that a `jump` opcode is used instead of a `call` opcode.276sink.put1(pulley_interpreter::Opcode::Jump as u8);277sink.add_reloc(Reloc::PulleyPcRel, &info.dest, 0);278sink.put4(1);279280// Islands were manually handled in281// `emit_return_call_common_sequence`.282*start_offset = sink.cur_offset();283}284285Inst::ReturnIndirectCall { info } => {286emit_return_call_common_sequence(sink, emit_info, state, &info);287enc::xjump(sink, info.dest);288289// Islands were manually handled in290// `emit_return_call_common_sequence`.291*start_offset = sink.cur_offset();292}293294Inst::IndirectCallHost { info } => {295// Emit a relocation to fill in the actual immediate argument here296// in `call_indirect_host`.297sink.add_reloc(Reloc::PulleyCallIndirectHost, &info.dest, 0);298enc::call_indirect_host(sink, 0_u8);299300if let Some(s) = state.take_stack_map() {301let offset = sink.cur_offset();302sink.push_user_stack_map(state, offset, s);303}304305if let Some(try_call) = info.try_call_info.as_ref() {306sink.add_try_call_site(307Some(state.frame_layout.sp_to_fp()),308try_call.exception_handlers(&state.frame_layout),309);310} else {311sink.add_call_site();312}313314// If a callee pop is happening here that means that something has315// messed up, these are expected to be "very simple" signatures.316assert!(info.callee_pop_size == 0);317}318319Inst::Jump { label } => {320sink.use_label_at_offset(*start_offset + 1, *label, LabelUse::PcRel);321sink.add_uncond_branch(*start_offset, *start_offset + 5, *label);322patch_pc_rel_offset(sink, |sink| enc::jump(sink, 0));323}324325Inst::BrIf {326cond,327taken,328not_taken,329} => {330// Encode the inverted form of the branch. Branches always have331// their trailing 4 bytes as the relative offset which is what we're332// going to target here within the `MachBuffer`.333let mut inverted = SmallVec::<[u8; 16]>::new();334cond.invert().encode(&mut inverted, 0);335let len = inverted.len() as u32;336inverted.clear();337cond.invert()338.encode(&mut inverted, i32::try_from(len - 4).unwrap());339assert!(len > 4);340341// Use the `taken` label 4 bytes before the end of the instruction342// we're about to emit as that's the base of `PcRelOffset`. Note343// that the `Jump` here factors in the offset from the start of the344// instruction to the start of the relative offset, hence `len - 4`345// as the factor to adjust by.346let taken_end = *start_offset + len;347sink.use_label_at_offset(taken_end - 4, *taken, LabelUse::PcRel);348sink.add_cond_branch(*start_offset, taken_end, *taken, &inverted);349patch_pc_rel_offset(sink, |sink| cond.encode(sink, 0));350debug_assert_eq!(sink.cur_offset(), taken_end);351352// For the not-taken branch use an unconditional jump to the353// relevant label, and we know that the jump instruction is 5 bytes354// long where the final 4 bytes are the offset to jump by.355let not_taken_start = taken_end + 1;356let not_taken_end = not_taken_start + 4;357sink.use_label_at_offset(not_taken_start, *not_taken, LabelUse::PcRel);358sink.add_uncond_branch(taken_end, not_taken_end, *not_taken);359patch_pc_rel_offset(sink, |sink| enc::jump(sink, 0));360assert_eq!(sink.cur_offset(), not_taken_end);361}362363Inst::LoadAddr { dst, mem } => {364let base = mem.get_base_register();365let offset = mem.get_offset_with_state(state);366367if let Some(base) = base {368if offset == 0 {369enc::xmov(sink, dst, base);370} else {371if let Ok(offset) = i8::try_from(offset) {372enc::xconst8(sink, dst, offset);373} else if let Ok(offset) = i16::try_from(offset) {374enc::xconst16(sink, dst, offset);375} else {376enc::xconst32(sink, dst, offset);377}378379match P::pointer_width() {380PointerWidth::PointerWidth32 => {381enc::xadd32(sink, BinaryOperands::new(dst, base, dst))382}383PointerWidth::PointerWidth64 => {384enc::xadd64(sink, BinaryOperands::new(dst, base, dst))385}386}387}388} else {389unreachable!("all pulley amodes have a base register right now")390}391}392393Inst::XLoad {394dst,395mem,396ty,397flags,398} => {399use Endianness as E;400assert!(flags.trap_code().is_none());401let addr = AddrO32::Base {402addr: mem.get_base_register().unwrap(),403offset: mem.get_offset_with_state(state),404};405let endian = emit_info.endianness(*flags);406match *ty {407I8 => enc::xload8_u32_o32(sink, dst, addr),408I16 => match endian {409E::Little => enc::xload16le_s32_o32(sink, dst, addr),410E::Big => enc::xload16be_s32_o32(sink, dst, addr),411},412I32 => match endian {413E::Little => enc::xload32le_o32(sink, dst, addr),414E::Big => enc::xload32be_o32(sink, dst, addr),415},416I64 => match endian {417E::Little => enc::xload64le_o32(sink, dst, addr),418E::Big => enc::xload64be_o32(sink, dst, addr),419},420_ => unimplemented!("xload ty={ty:?}"),421}422}423424Inst::FLoad {425dst,426mem,427ty,428flags,429} => {430use Endianness as E;431assert!(flags.trap_code().is_none());432let addr = AddrO32::Base {433addr: mem.get_base_register().unwrap(),434offset: mem.get_offset_with_state(state),435};436let endian = emit_info.endianness(*flags);437match *ty {438F32 => match endian {439E::Little => enc::fload32le_o32(sink, dst, addr),440E::Big => enc::fload32be_o32(sink, dst, addr),441},442F64 => match endian {443E::Little => enc::fload64le_o32(sink, dst, addr),444E::Big => enc::fload64be_o32(sink, dst, addr),445},446_ => unimplemented!("fload ty={ty:?}"),447}448}449450Inst::VLoad {451dst,452mem,453ty,454flags,455} => {456assert!(flags.trap_code().is_none());457let addr = AddrO32::Base {458addr: mem.get_base_register().unwrap(),459offset: mem.get_offset_with_state(state),460};461let endian = emit_info.endianness(*flags);462assert_eq!(endian, Endianness::Little);463assert_eq!(ty.bytes(), 16);464enc::vload128le_o32(sink, dst, addr);465}466467Inst::XStore {468mem,469src,470ty,471flags,472} => {473use Endianness as E;474assert!(flags.trap_code().is_none());475let addr = AddrO32::Base {476addr: mem.get_base_register().unwrap(),477offset: mem.get_offset_with_state(state),478};479let endian = emit_info.endianness(*flags);480match *ty {481I8 => enc::xstore8_o32(sink, addr, src),482I16 => match endian {483E::Little => enc::xstore16le_o32(sink, addr, src),484E::Big => enc::xstore16be_o32(sink, addr, src),485},486I32 => match endian {487E::Little => enc::xstore32le_o32(sink, addr, src),488E::Big => enc::xstore32be_o32(sink, addr, src),489},490I64 => match endian {491E::Little => enc::xstore64le_o32(sink, addr, src),492E::Big => enc::xstore64be_o32(sink, addr, src),493},494_ => unimplemented!("xstore ty={ty:?}"),495}496}497498Inst::FStore {499mem,500src,501ty,502flags,503} => {504use Endianness as E;505assert!(flags.trap_code().is_none());506let addr = AddrO32::Base {507addr: mem.get_base_register().unwrap(),508offset: mem.get_offset_with_state(state),509};510let endian = emit_info.endianness(*flags);511match *ty {512F32 => match endian {513E::Little => enc::fstore32le_o32(sink, addr, src),514E::Big => enc::fstore32be_o32(sink, addr, src),515},516F64 => match endian {517E::Little => enc::fstore64le_o32(sink, addr, src),518E::Big => enc::fstore64be_o32(sink, addr, src),519},520_ => unimplemented!("fstore ty={ty:?}"),521}522}523524Inst::VStore {525mem,526src,527ty,528flags,529} => {530assert!(flags.trap_code().is_none());531let addr = AddrO32::Base {532addr: mem.get_base_register().unwrap(),533offset: mem.get_offset_with_state(state),534};535let endian = emit_info.endianness(*flags);536assert_eq!(endian, Endianness::Little);537assert_eq!(ty.bytes(), 16);538enc::vstore128le_o32(sink, addr, src);539}540541Inst::BrTable {542idx,543default,544targets,545} => {546// Encode the `br_table32` instruction directly which expects the547// next `amt` 4-byte integers to all be relative offsets. Each548// offset is the pc-relative offset of the branch destination.549//550// Pulley clamps the branch targets to the `amt` specified so the551// final branch target is the default jump target.552//553// Note that this instruction may have many branch targets so it554// manually checks to see if an island is needed. If so we emit a555// jump around the island before the `br_table32` itself gets556// emitted.557let amt = u32::try_from(targets.len() + 1).expect("too many branch targets");558let br_table_size = amt * 4 + 6;559if sink.island_needed(br_table_size) {560let label = sink.get_label();561<InstAndKind<P>>::from(Inst::Jump { label }).emit(sink, emit_info, state);562sink.emit_island(br_table_size, &mut state.ctrl_plane);563sink.bind_label(label, &mut state.ctrl_plane);564}565enc::br_table32(sink, *idx, amt);566for target in targets.iter() {567let offset = sink.cur_offset();568sink.use_label_at_offset(offset, *target, LabelUse::PcRel);569sink.put4(0);570}571let offset = sink.cur_offset();572sink.use_label_at_offset(offset, *default, LabelUse::PcRel);573sink.put4(0);574575// We manually handled `emit_island` above when dealing with576// `island_needed` so update the starting offset to the current577// offset so this instruction doesn't accidentally trigger578// the assertion that we're always under worst-case-size.579*start_offset = sink.cur_offset();580}581582Inst::Raw { raw } => super::generated::emit(raw, sink),583584Inst::EmitIsland { space_needed } => {585if sink.island_needed(*space_needed) {586let label = sink.get_label();587<InstAndKind<P>>::from(Inst::Jump { label }).emit(sink, emit_info, state);588sink.emit_island(space_needed + 8, &mut state.ctrl_plane);589sink.bind_label(label, &mut state.ctrl_plane);590}591}592593Inst::LabelAddress { dst, label } => {594patch_pc_rel_offset(sink, |sink| enc::xpcadd(sink, dst, 0));595let end = sink.cur_offset();596sink.use_label_at_offset(end - 4, *label, LabelUse::PcRel);597}598}599}600601fn emit_return_call_common_sequence<T, P>(602sink: &mut MachBuffer<InstAndKind<P>>,603emit_info: &EmitInfo,604state: &mut EmitState<P>,605info: &ReturnCallInfo<T>,606) where607P: PulleyTargetKind,608{609// The return call sequence can potentially emit a lot of instructions, so610// lets emit an island here if we need it.611//612// It is difficult to calculate exactly how many instructions are going to613// be emitted, so we calculate it by emitting it into a disposable buffer,614// and then checking how many instructions were actually emitted.615let mut buffer = MachBuffer::new();616let mut fake_emit_state = state.clone();617618return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);619620// Finalize the buffer and get the number of bytes emitted.621let buffer = buffer.finish(&Default::default(), &mut Default::default());622let length = buffer.data().len() as u32;623624// And now emit the island inline with this instruction.625if sink.island_needed(length) {626let jump_around_label = sink.get_label();627<InstAndKind<P>>::gen_jump(jump_around_label).emit(sink, emit_info, state);628sink.emit_island(length + 4, &mut state.ctrl_plane);629sink.bind_label(jump_around_label, &mut state.ctrl_plane);630}631632// Now that we're done, emit the *actual* return sequence.633return_call_emit_impl(sink, emit_info, state, info);634}635636/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].637fn return_call_emit_impl<T, P>(638sink: &mut MachBuffer<InstAndKind<P>>,639emit_info: &EmitInfo,640state: &mut EmitState<P>,641info: &ReturnCallInfo<T>,642) where643P: PulleyTargetKind,644{645let epilogue = <PulleyMachineDeps<P>>::gen_epilogue_frame_restore(646emit_info.call_conv,647&emit_info.shared_flags,648&emit_info.isa_flags,649&state.frame_layout,650);651652for inst in epilogue {653inst.emit(sink, emit_info, state);654}655656// Now that `sp` is restored to what it was on function entry it may need to657// be adjusted if the stack arguments of our own function differ from the658// stack arguments of the callee. Perform any necessary adjustment here.659//660// Note that this means that there's a brief window where stack arguments661// might be below `sp` in the case that the callee has more stack arguments662// than ourselves. That's in theory ok though as we're inventing the pulley663// ABI and nothing like async signals are happening that we have to worry664// about.665let incoming_args_diff =666i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);667668if incoming_args_diff != 0 {669let amt = i32::try_from(incoming_args_diff).unwrap();670for inst in PulleyMachineDeps::<P>::gen_sp_reg_adjust(amt) {671inst.emit(sink, emit_info, state);672}673}674}675676/// Invokes `f` with `sink` and assumes that a single instruction is emitted677/// which ends with a Pulley `PcRelOffset`.678///679/// The offset at that location is patched to include the size of the680/// instruction before the relative offset since relocations will be applied to681/// the address of the offset and added to the contents at the offset. The682/// Pulley interpreter, however, will calculate the offset from the start of the683/// instruction, so this extra offset is required.684fn patch_pc_rel_offset<P>(685sink: &mut MachBuffer<InstAndKind<P>>,686f: impl FnOnce(&mut MachBuffer<InstAndKind<P>>),687) where688P: PulleyTargetKind,689{690let patch = sink.start_patchable();691let start = sink.cur_offset();692f(sink);693let end = sink.cur_offset();694let region = sink.end_patchable(patch).patch(sink);695let chunk = region.last_chunk_mut::<4>().unwrap();696assert_eq!(*chunk, [0, 0, 0, 0]);697*chunk = (end - start - 4).to_le_bytes();698}699700701