Path: blob/main/cranelift/assembler-x64/src/mem.rs
1692 views
//! Memory operands to instructions.12use crate::api::{AsReg, CodeSink, Constant, KnownOffset, Label, TrapCode};3use crate::gpr::{self, NonRspGpr, Size};4use crate::rex::{Disp, RexPrefix, encode_modrm, encode_sib};56/// x64 memory addressing modes.7#[derive(Copy, Clone, Debug, PartialEq)]8#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]9pub enum Amode<R: AsReg> {10ImmReg {11base: R,12simm32: AmodeOffsetPlusKnownOffset,13trap: Option<TrapCode>,14},15ImmRegRegShift {16base: R,17index: NonRspGpr<R>,18scale: Scale,19simm32: AmodeOffset,20trap: Option<TrapCode>,21},22RipRelative {23target: DeferredTarget,24},25}2627impl<R: AsReg> Amode<R> {28/// Return the [`TrapCode`] associated with this [`Amode`], if any.29pub fn trap_code(&self) -> Option<TrapCode> {30match self {31Amode::ImmReg { trap, .. } | Amode::ImmRegRegShift { trap, .. } => *trap,32Amode::RipRelative { .. } => None,33}34}3536/// Return the [`RexPrefix`] for each variant of this [`Amode`].37#[must_use]38pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {39match self {40Amode::ImmReg { base, .. } => {41RexPrefix::mem_op(enc_reg, base.enc(), has_w_bit, uses_8bit)42}43Amode::ImmRegRegShift { base, index, .. } => {44RexPrefix::three_op(enc_reg, index.enc(), base.enc(), has_w_bit, uses_8bit)45}46Amode::RipRelative { .. } => RexPrefix::two_op(enc_reg, 0, has_w_bit, uses_8bit),47}48}4950/// Emit the ModR/M, SIB, and displacement suffixes as needed for this51/// `Amode`.52pub(crate) fn encode_rex_suffixes(53&self,54sink: &mut impl CodeSink,55enc_reg: u8,56bytes_at_end: u8,57evex_scaling: Option<i8>,58) {59emit_modrm_sib_disp(sink, enc_reg, self, bytes_at_end, evex_scaling);60}6162/// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX63/// prefix).64///65/// During encoding, the `b` bit is set by the topmost bit (the fourth bit)66/// of either the `reg` register or, if this is a memory address, the `base`67/// register. The `x` bit is set by the `index` register, when used.68pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {69match self {70Amode::ImmReg { base, .. } => (Some(base.enc()), None),71Amode::ImmRegRegShift { base, index, .. } => (Some(base.enc()), Some(index.enc())),72Amode::RipRelative { .. } => (None, None),73}74}75}7677/// A 32-bit immediate for address offsets.78#[derive(Clone, Copy, Debug, PartialEq)]79pub struct AmodeOffset(i32);8081impl AmodeOffset {82pub const ZERO: AmodeOffset = AmodeOffset::new(0);8384#[must_use]85pub const fn new(value: i32) -> Self {86Self(value)87}8889#[must_use]90pub fn value(self) -> i32 {91self.092}93}9495impl From<i32> for AmodeOffset {96fn from(value: i32) -> Self {97Self(value)98}99}100101impl std::fmt::LowerHex for AmodeOffset {102fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {103// This rather complex implementation is necessary to match how104// `capstone` pretty-prints memory immediates.105if self.0 == 0 {106return Ok(());107}108if self.0 < 0 {109write!(f, "-")?;110}111if self.0 > 9 || self.0 < -9 {112write!(f, "0x")?;113}114let abs = match self.0.checked_abs() {115Some(i) => i,116None => -2_147_483_648,117};118std::fmt::LowerHex::fmt(&abs, f)119}120}121122/// An [`AmodeOffset`] immediate with an optional known offset.123///124/// Cranelift does not know certain offsets until emission time. To accommodate125/// Cranelift, this structure stores an optional [`KnownOffset`]. The following126/// happens immediately before emission:127/// - the [`KnownOffset`] is looked up, mapping it to an offset value128/// - the [`Simm32`] value is added to the offset value129#[derive(Copy, Clone, Debug, PartialEq)]130pub struct AmodeOffsetPlusKnownOffset {131pub simm32: AmodeOffset,132pub offset: Option<KnownOffset>,133}134135impl AmodeOffsetPlusKnownOffset {136pub const ZERO: AmodeOffsetPlusKnownOffset = AmodeOffsetPlusKnownOffset {137simm32: AmodeOffset::ZERO,138offset: None,139};140141/// # Panics142///143/// Panics if the sum of the immediate and the known offset value overflows.144#[must_use]145pub fn value(&self, sink: &impl CodeSink) -> i32 {146let known_offset = match self.offset {147Some(offset) => sink.known_offset(offset),148None => 0,149};150known_offset151.checked_add(self.simm32.value())152.expect("no wrapping")153}154}155156impl std::fmt::LowerHex for AmodeOffsetPlusKnownOffset {157fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {158if let Some(offset) = self.offset {159write!(f, "<offset:{offset}>+")?;160}161std::fmt::LowerHex::fmt(&self.simm32, f)162}163}164165/// For RIP-relative addressing, keep track of the [`CodeSink`]-specific target.166#[derive(Copy, Clone, Debug, PartialEq)]167#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]168pub enum DeferredTarget {169Label(Label),170Constant(Constant),171None,172}173174impl<R: AsReg> std::fmt::Display for Amode<R> {175fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {176let pointer_width = Size::Quadword;177match self {178Amode::ImmReg { simm32, base, .. } => {179// Note: size is always 8; the address is 64 bits,180// even if the addressed operand is smaller.181let base = base.to_string(Some(pointer_width));182write!(f, "{simm32:x}({base})")183}184Amode::ImmRegRegShift {185simm32,186base,187index,188scale,189..190} => {191let base = base.to_string(Some(pointer_width));192let index = index.to_string(pointer_width);193let shift = scale.shift();194if shift > 1 {195write!(f, "{simm32:x}({base}, {index}, {shift})")196} else {197write!(f, "{simm32:x}({base}, {index})")198}199}200Amode::RipRelative { .. } => write!(f, "(%rip)"),201}202}203}204205/// The scaling factor for the index register in certain [`Amode`]s.206#[derive(Copy, Clone, Debug, PartialEq)]207#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]208pub enum Scale {209One,210Two,211Four,212Eight,213}214215impl Scale {216/// Create a new [`Scale`] from its hardware encoding.217///218/// # Panics219///220/// Panics if `enc` is not a valid encoding for a scale (0-3).221#[must_use]222pub fn new(enc: u8) -> Self {223match enc {2240b00 => Scale::One,2250b01 => Scale::Two,2260b10 => Scale::Four,2270b11 => Scale::Eight,228_ => panic!("invalid scale encoding: {enc}"),229}230}231232/// Return the hardware encoding of this [`Scale`].233fn enc(&self) -> u8 {234match self {235Scale::One => 0b00,236Scale::Two => 0b01,237Scale::Four => 0b10,238Scale::Eight => 0b11,239}240}241242/// Return how much this [`Scale`] will shift the value in the index243/// register of the SIB byte.244///245/// This is useful for pretty-printing; when encoding, one usually needs246/// [`Scale::enc`].247fn shift(&self) -> u8 {2481 << self.enc()249}250}251252/// A general-purpose register or memory operand.253#[derive(Copy, Clone, Debug, PartialEq)]254#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]255#[allow(256clippy::module_name_repetitions,257reason = "'GprMem' indicates this has GPR and memory variants"258)]259pub enum GprMem<R: AsReg, M: AsReg> {260Gpr(R),261Mem(Amode<M>),262}263264impl<R: AsReg, M: AsReg> GprMem<R, M> {265/// Pretty-print the operand.266pub fn to_string(&self, size: Size) -> String {267match self {268GprMem::Gpr(gpr) => gpr.to_string(Some(size)),269GprMem::Mem(amode) => amode.to_string(),270}271}272273/// Return the [`RexPrefix`] for each variant of this [`GprMem`].274#[must_use]275pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {276match self {277GprMem::Gpr(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),278GprMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),279}280}281282/// Emit the ModR/M, SIB, and displacement suffixes for this [`GprMem`].283pub(crate) fn encode_rex_suffixes(284&self,285sink: &mut impl CodeSink,286enc_reg: u8,287bytes_at_end: u8,288evex_scaling: Option<i8>,289) {290match self {291GprMem::Gpr(gpr) => {292sink.put1(encode_modrm(0b11, enc_reg & 0b111, gpr.enc() & 0b111));293}294GprMem::Mem(amode) => {295amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end, evex_scaling);296}297}298}299300/// Same as `XmmMem::encode_bx_regs`, but for `GprMem`.301pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {302match self {303GprMem::Gpr(reg) => (Some(reg.enc()), None),304GprMem::Mem(amode) => amode.encode_bx_regs(),305}306}307}308309impl<R: AsReg, M: AsReg> From<R> for GprMem<R, M> {310fn from(reg: R) -> GprMem<R, M> {311GprMem::Gpr(reg)312}313}314315impl<R: AsReg, M: AsReg> From<Amode<M>> for GprMem<R, M> {316fn from(amode: Amode<M>) -> GprMem<R, M> {317GprMem::Mem(amode)318}319}320321/// An XMM register or memory operand.322#[derive(Copy, Clone, Debug)]323#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]324#[allow(325clippy::module_name_repetitions,326reason = "'XmmMem' indicates this has Xmm and memory variants"327)]328pub enum XmmMem<R: AsReg, M: AsReg> {329Xmm(R),330Mem(Amode<M>),331}332333impl<R: AsReg, M: AsReg> XmmMem<R, M> {334/// Pretty-print the operand.335pub fn to_string(&self) -> String {336match self {337XmmMem::Xmm(xmm) => xmm.to_string(None),338XmmMem::Mem(amode) => amode.to_string(),339}340}341342/// Return the [`RexPrefix`] for each variant of this [`XmmMem`].343#[must_use]344pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {345match self {346XmmMem::Xmm(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),347XmmMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),348}349}350351/// Emit the ModR/M, SIB, and displacement suffixes for this [`XmmMem`].352pub(crate) fn encode_rex_suffixes(353&self,354sink: &mut impl CodeSink,355enc_reg: u8,356bytes_at_end: u8,357evex_scaling: Option<i8>,358) {359match self {360XmmMem::Xmm(xmm) => {361sink.put1(encode_modrm(0b11, enc_reg & 0b111, xmm.enc() & 0b111));362}363XmmMem::Mem(amode) => {364amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end, evex_scaling);365}366}367}368369/// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX370/// prefix).371///372/// During encoding, the `b` bit is set by the topmost bit (the fourth bit)373/// of either the `reg` register or, if this is a memory address, the `base`374/// register. The `x` bit is set by the `index` register, when used.375pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {376match self {377XmmMem::Xmm(reg) => (Some(reg.enc()), None),378XmmMem::Mem(amode) => amode.encode_bx_regs(),379}380}381}382383impl<R: AsReg, M: AsReg> From<R> for XmmMem<R, M> {384fn from(reg: R) -> XmmMem<R, M> {385XmmMem::Xmm(reg)386}387}388389impl<R: AsReg, M: AsReg> From<Amode<M>> for XmmMem<R, M> {390fn from(amode: Amode<M>) -> XmmMem<R, M> {391XmmMem::Mem(amode)392}393}394395/// Emit the ModRM/SIB/displacement sequence for a memory operand.396pub fn emit_modrm_sib_disp<R: AsReg>(397sink: &mut impl CodeSink,398enc_g: u8,399mem_e: &Amode<R>,400bytes_at_end: u8,401evex_scaling: Option<i8>,402) {403match *mem_e {404Amode::ImmReg { simm32, base, .. } => {405let enc_e = base.enc();406let mut imm = Disp::new(simm32.value(sink), evex_scaling);407408// Most base registers allow for a single ModRM byte plus an409// optional immediate. If rsp is the base register, however, then a410// SIB byte must be used.411let enc_e_low3 = enc_e & 7;412if enc_e_low3 == gpr::enc::RSP {413// Displacement from RSP is encoded with a SIB byte where414// the index and base are both encoded as RSP's encoding of415// 0b100. This special encoding means that the index register416// isn't used and the base is 0b100 with or without a417// REX-encoded 4th bit (e.g. rsp or r12)418sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));419sink.put1(0b00_100_100);420imm.emit(sink);421} else {422// If the base register is rbp and there's no offset then force423// a 1-byte zero offset since otherwise the encoding would be424// invalid.425if enc_e_low3 == gpr::enc::RBP {426imm.force_immediate();427}428sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));429imm.emit(sink);430}431}432433Amode::ImmRegRegShift {434simm32,435base,436index,437scale,438..439} => {440let enc_base = base.enc();441let enc_index = index.enc();442443// Encoding of ModRM/SIB bytes don't allow the index register to444// ever be rsp. Note, though, that the encoding of r12, whose three445// lower bits match the encoding of rsp, is explicitly allowed with446// REX bytes so only rsp is disallowed.447assert!(enc_index != gpr::enc::RSP);448449// If the offset is zero then there is no immediate. Note, though,450// that if the base register's lower three bits are `101` then an451// offset must be present. This is a special case in the encoding of452// the SIB byte and requires an explicit displacement with rbp/r13.453let mut imm = Disp::new(simm32.value(), evex_scaling);454if enc_base & 7 == gpr::enc::RBP {455imm.force_immediate();456}457458// With the above determined encode the ModRM byte, then the SIB459// byte, then any immediate as necessary.460sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));461sink.put1(encode_sib(scale.enc(), enc_index & 7, enc_base & 7));462imm.emit(sink);463}464465Amode::RipRelative { target } => {466// RIP-relative is mod=00, rm=101.467sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));468469// Inform the code sink about the RIP-relative `target` at the470// current offset, emitting a `LabelUse`, a relocation, or etc as471// appropriate.472sink.use_target(target);473474// N.B.: some instructions (XmmRmRImm format for example)475// have bytes *after* the RIP-relative offset. The476// addressed location is relative to the end of the477// instruction, but the relocation is nominally relative478// to the end of the u32 field. So, to compensate for479// this, we emit a negative extra offset in the u32 field480// initially, and the relocation will add to it.481sink.put4(-(i32::from(bytes_at_end)) as u32);482}483}484}485486487