Path: blob/main/cranelift/assembler-x64/src/mem.rs
3073 views
//! Memory operands to instructions.12use alloc::string::{String, ToString};34use crate::api::{AsReg, CodeSink, Constant, KnownOffset, Label, TrapCode};5use crate::gpr::{self, NonRspGpr, Size};6use crate::rex::{Disp, RexPrefix, encode_modrm, encode_sib};78/// x64 memory addressing modes.9#[derive(Copy, Clone, Debug, PartialEq)]10#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]11pub enum Amode<R: AsReg> {12ImmReg {13base: R,14simm32: AmodeOffsetPlusKnownOffset,15trap: Option<TrapCode>,16},17ImmRegRegShift {18base: R,19index: NonRspGpr<R>,20scale: Scale,21simm32: AmodeOffset,22trap: Option<TrapCode>,23},24RipRelative {25target: DeferredTarget,26},27}2829impl<R: AsReg> Amode<R> {30/// Return the [`TrapCode`] associated with this [`Amode`], if any.31pub fn trap_code(&self) -> Option<TrapCode> {32match self {33Amode::ImmReg { trap, .. } | Amode::ImmRegRegShift { trap, .. } => *trap,34Amode::RipRelative { .. } => None,35}36}3738/// Return the [`RexPrefix`] for each variant of this [`Amode`].39#[must_use]40pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {41match self {42Amode::ImmReg { base, .. } => {43RexPrefix::mem_op(enc_reg, base.enc(), has_w_bit, uses_8bit)44}45Amode::ImmRegRegShift { base, index, .. } => {46RexPrefix::three_op(enc_reg, index.enc(), base.enc(), has_w_bit, uses_8bit)47}48Amode::RipRelative { .. } => RexPrefix::two_op(enc_reg, 0, has_w_bit, uses_8bit),49}50}5152/// Emit the ModR/M, SIB, and displacement suffixes as needed for this53/// `Amode`.54pub(crate) fn encode_rex_suffixes(55&self,56sink: &mut impl CodeSink,57enc_reg: u8,58bytes_at_end: u8,59evex_scaling: Option<i8>,60) {61emit_modrm_sib_disp(sink, enc_reg, self, bytes_at_end, evex_scaling);62}6364/// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX65/// prefix).66///67/// During encoding, the `b` bit is set by the topmost bit (the fourth bit)68/// of either the `reg` register or, if this is a memory address, the `base`69/// register. The `x` bit is set by the `index` register, when used.70pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {71match self {72Amode::ImmReg { base, .. } => (Some(base.enc()), None),73Amode::ImmRegRegShift { base, index, .. } => (Some(base.enc()), Some(index.enc())),74Amode::RipRelative { .. } => (None, None),75}76}77}7879/// A 32-bit immediate for address offsets.80#[derive(Clone, Copy, Debug, PartialEq)]81pub struct AmodeOffset(i32);8283impl AmodeOffset {84pub const ZERO: AmodeOffset = AmodeOffset::new(0);8586#[must_use]87pub const fn new(value: i32) -> Self {88Self(value)89}9091#[must_use]92pub fn value(self) -> i32 {93self.094}95}9697impl From<i32> for AmodeOffset {98fn from(value: i32) -> Self {99Self(value)100}101}102103impl core::fmt::LowerHex for AmodeOffset {104fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {105// This rather complex implementation is necessary to match how106// `capstone` pretty-prints memory immediates.107if self.0 == 0 {108return Ok(());109}110if self.0 < 0 {111write!(f, "-")?;112}113if self.0 > 9 || self.0 < -9 {114write!(f, "0x")?;115}116let abs = match self.0.checked_abs() {117Some(i) => i,118None => -2_147_483_648,119};120core::fmt::LowerHex::fmt(&abs, f)121}122}123124/// An [`AmodeOffset`] immediate with an optional known offset.125///126/// Cranelift does not know certain offsets until emission time. To accommodate127/// Cranelift, this structure stores an optional [`KnownOffset`]. The following128/// happens immediately before emission:129/// - the [`KnownOffset`] is looked up, mapping it to an offset value130/// - the [`AmodeOffset`] value is added to the offset value131#[derive(Copy, Clone, Debug, PartialEq)]132pub struct AmodeOffsetPlusKnownOffset {133pub simm32: AmodeOffset,134pub offset: Option<KnownOffset>,135}136137impl AmodeOffsetPlusKnownOffset {138pub const ZERO: AmodeOffsetPlusKnownOffset = AmodeOffsetPlusKnownOffset {139simm32: AmodeOffset::ZERO,140offset: None,141};142143/// # Panics144///145/// Panics if the sum of the immediate and the known offset value overflows.146#[must_use]147pub fn value(&self, sink: &impl CodeSink) -> i32 {148let known_offset = match self.offset {149Some(offset) => sink.known_offset(offset),150None => 0,151};152known_offset153.checked_add(self.simm32.value())154.expect("no wrapping")155}156}157158impl core::fmt::LowerHex for AmodeOffsetPlusKnownOffset {159fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {160if let Some(offset) = self.offset {161write!(f, "<offset:{offset}>+")?;162}163core::fmt::LowerHex::fmt(&self.simm32, f)164}165}166167/// For RIP-relative addressing, keep track of the [`CodeSink`]-specific target.168#[derive(Copy, Clone, Debug, PartialEq)]169#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]170pub enum DeferredTarget {171Label(Label),172Constant(Constant),173None,174}175176impl<R: AsReg> core::fmt::Display for Amode<R> {177fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {178let pointer_width = Size::Quadword;179match self {180Amode::ImmReg { simm32, base, .. } => {181// Note: size is always 8; the address is 64 bits,182// even if the addressed operand is smaller.183let base = base.to_string(Some(pointer_width));184write!(f, "{simm32:x}({base})")185}186Amode::ImmRegRegShift {187simm32,188base,189index,190scale,191..192} => {193let base = base.to_string(Some(pointer_width));194let index = index.to_string(pointer_width);195let shift = scale.shift();196if shift > 1 {197write!(f, "{simm32:x}({base}, {index}, {shift})")198} else {199write!(f, "{simm32:x}({base}, {index})")200}201}202Amode::RipRelative { .. } => write!(f, "(%rip)"),203}204}205}206207/// The scaling factor for the index register in certain [`Amode`]s.208#[derive(Copy, Clone, Debug, PartialEq)]209#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]210pub enum Scale {211One,212Two,213Four,214Eight,215}216217impl Scale {218/// Create a new [`Scale`] from its hardware encoding.219///220/// # Panics221///222/// Panics if `enc` is not a valid encoding for a scale (0-3).223#[must_use]224pub fn new(enc: u8) -> Self {225match enc {2260b00 => Scale::One,2270b01 => Scale::Two,2280b10 => Scale::Four,2290b11 => Scale::Eight,230_ => panic!("invalid scale encoding: {enc}"),231}232}233234/// Return the hardware encoding of this [`Scale`].235fn enc(&self) -> u8 {236match self {237Scale::One => 0b00,238Scale::Two => 0b01,239Scale::Four => 0b10,240Scale::Eight => 0b11,241}242}243244/// Return how much this [`Scale`] will shift the value in the index245/// register of the SIB byte.246///247/// This is useful for pretty-printing; when encoding, one usually needs248/// [`Scale::enc`].249fn shift(&self) -> u8 {2501 << self.enc()251}252}253254/// A general-purpose register or memory operand.255#[derive(Copy, Clone, Debug, PartialEq)]256#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]257#[allow(258clippy::module_name_repetitions,259reason = "'GprMem' indicates this has GPR and memory variants"260)]261pub enum GprMem<R: AsReg, M: AsReg> {262Gpr(R),263Mem(Amode<M>),264}265266impl<R: AsReg, M: AsReg> GprMem<R, M> {267/// Pretty-print the operand.268pub fn to_string(&self, size: Size) -> String {269match self {270GprMem::Gpr(gpr) => gpr.to_string(Some(size)),271GprMem::Mem(amode) => amode.to_string(),272}273}274275/// Return the [`RexPrefix`] for each variant of this [`GprMem`].276#[must_use]277pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {278match self {279GprMem::Gpr(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),280GprMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),281}282}283284/// Emit the ModR/M, SIB, and displacement suffixes for this [`GprMem`].285pub(crate) fn encode_rex_suffixes(286&self,287sink: &mut impl CodeSink,288enc_reg: u8,289bytes_at_end: u8,290evex_scaling: Option<i8>,291) {292match self {293GprMem::Gpr(gpr) => {294sink.put1(encode_modrm(0b11, enc_reg & 0b111, gpr.enc() & 0b111));295}296GprMem::Mem(amode) => {297amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end, evex_scaling);298}299}300}301302/// Same as `XmmMem::encode_bx_regs`, but for `GprMem`.303pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {304match self {305GprMem::Gpr(reg) => (Some(reg.enc()), None),306GprMem::Mem(amode) => amode.encode_bx_regs(),307}308}309}310311impl<R: AsReg, M: AsReg> From<R> for GprMem<R, M> {312fn from(reg: R) -> GprMem<R, M> {313GprMem::Gpr(reg)314}315}316317impl<R: AsReg, M: AsReg> From<Amode<M>> for GprMem<R, M> {318fn from(amode: Amode<M>) -> GprMem<R, M> {319GprMem::Mem(amode)320}321}322323/// An XMM register or memory operand.324#[derive(Copy, Clone, Debug)]325#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]326#[allow(327clippy::module_name_repetitions,328reason = "'XmmMem' indicates this has Xmm and memory variants"329)]330pub enum XmmMem<R: AsReg, M: AsReg> {331Xmm(R),332Mem(Amode<M>),333}334335impl<R: AsReg, M: AsReg> XmmMem<R, M> {336/// Pretty-print the operand.337pub fn to_string(&self) -> String {338match self {339XmmMem::Xmm(xmm) => xmm.to_string(None),340XmmMem::Mem(amode) => amode.to_string(),341}342}343344/// Return the [`RexPrefix`] for each variant of this [`XmmMem`].345#[must_use]346pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {347match self {348XmmMem::Xmm(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),349XmmMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),350}351}352353/// Emit the ModR/M, SIB, and displacement suffixes for this [`XmmMem`].354pub(crate) fn encode_rex_suffixes(355&self,356sink: &mut impl CodeSink,357enc_reg: u8,358bytes_at_end: u8,359evex_scaling: Option<i8>,360) {361match self {362XmmMem::Xmm(xmm) => {363sink.put1(encode_modrm(0b11, enc_reg & 0b111, xmm.enc() & 0b111));364}365XmmMem::Mem(amode) => {366amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end, evex_scaling);367}368}369}370371/// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX372/// prefix).373///374/// During encoding, the `b` bit is set by the topmost bit (the fourth bit)375/// of either the `reg` register or, if this is a memory address, the `base`376/// register. The `x` bit is set by the `index` register, when used.377pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {378match self {379XmmMem::Xmm(reg) => (Some(reg.enc()), None),380XmmMem::Mem(amode) => amode.encode_bx_regs(),381}382}383}384385impl<R: AsReg, M: AsReg> From<R> for XmmMem<R, M> {386fn from(reg: R) -> XmmMem<R, M> {387XmmMem::Xmm(reg)388}389}390391impl<R: AsReg, M: AsReg> From<Amode<M>> for XmmMem<R, M> {392fn from(amode: Amode<M>) -> XmmMem<R, M> {393XmmMem::Mem(amode)394}395}396397/// Emit the ModRM/SIB/displacement sequence for a memory operand.398pub fn emit_modrm_sib_disp<R: AsReg>(399sink: &mut impl CodeSink,400enc_g: u8,401mem_e: &Amode<R>,402bytes_at_end: u8,403evex_scaling: Option<i8>,404) {405match *mem_e {406Amode::ImmReg { simm32, base, .. } => {407let enc_e = base.enc();408let mut imm = Disp::new(simm32.value(sink), evex_scaling);409410// Most base registers allow for a single ModRM byte plus an411// optional immediate. If rsp is the base register, however, then a412// SIB byte must be used.413let enc_e_low3 = enc_e & 7;414if enc_e_low3 == gpr::enc::RSP {415// Displacement from RSP is encoded with a SIB byte where416// the index and base are both encoded as RSP's encoding of417// 0b100. This special encoding means that the index register418// isn't used and the base is 0b100 with or without a419// REX-encoded 4th bit (e.g. rsp or r12)420sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));421sink.put1(0b00_100_100);422imm.emit(sink);423} else {424// If the base register is rbp and there's no offset then force425// a 1-byte zero offset since otherwise the encoding would be426// invalid.427if enc_e_low3 == gpr::enc::RBP {428imm.force_immediate();429}430sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));431imm.emit(sink);432}433}434435Amode::ImmRegRegShift {436simm32,437base,438index,439scale,440..441} => {442let enc_base = base.enc();443let enc_index = index.enc();444445// Encoding of ModRM/SIB bytes don't allow the index register to446// ever be rsp. Note, though, that the encoding of r12, whose three447// lower bits match the encoding of rsp, is explicitly allowed with448// REX bytes so only rsp is disallowed.449assert!(enc_index != gpr::enc::RSP);450451// If the offset is zero then there is no immediate. Note, though,452// that if the base register's lower three bits are `101` then an453// offset must be present. This is a special case in the encoding of454// the SIB byte and requires an explicit displacement with rbp/r13.455let mut imm = Disp::new(simm32.value(), evex_scaling);456if enc_base & 7 == gpr::enc::RBP {457imm.force_immediate();458}459460// With the above determined encode the ModRM byte, then the SIB461// byte, then any immediate as necessary.462sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));463sink.put1(encode_sib(scale.enc(), enc_index & 7, enc_base & 7));464imm.emit(sink);465}466467Amode::RipRelative { target } => {468// RIP-relative is mod=00, rm=101.469sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));470471// Inform the code sink about the RIP-relative `target` at the472// current offset, emitting a `LabelUse`, a relocation, or etc as473// appropriate.474sink.use_target(target);475476// N.B.: some instructions (XmmRmRImm format for example)477// have bytes *after* the RIP-relative offset. The478// addressed location is relative to the end of the479// instruction, but the relocation is nominally relative480// to the end of the u32 field. So, to compensate for481// this, we emit a negative extra offset in the u32 field482// initially, and the relocation will add to it.483sink.put4(-(i32::from(bytes_at_end)) as u32);484}485}486}487488489