Path: blob/main/cranelift/assembler-x64/meta/src/dsl/encoding.rs
1693 views
//! A DSL for describing x64 encodings.1//!2//! Intended use:3//! - construct an encoding using an abbreviated helper, e.g., [`rex`]4//! - then, configure the encoding using builder methods, e.g., [`Rex::w`]5//!6//! ```7//! # use cranelift_assembler_x64_meta::dsl::rex;8//! let enc = rex(0x25).w().id();9//! assert_eq!(enc.to_string(), "REX.W + 0x25 id")10//! ```11//!12//! This module references the Intel® 64 and IA-32 Architectures Software13//! Development Manual, Volume 2: [link].14//!15//! [link]: https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html1617use super::{Operand, OperandKind};18use core::fmt;1920/// An abbreviated constructor for REX-encoded instructions.21#[must_use]22pub fn rex(opcode: impl Into<Opcodes>) -> Rex {23Rex {24opcodes: opcode.into(),25w: WBit::W0,26modrm: None,27imm: Imm::None,28opcode_mod: None,29}30}3132/// An abbreviated constructor for VEX-encoded instructions.33#[must_use]34pub fn vex(length: Length) -> Vex {35Vex {36length,37pp: None,38mmmmm: None,39w: WBit::WIG,40opcode: u8::MAX,41modrm: None,42imm: Imm::None,43is4: false,44}45}4647/// An abbreviated constructor for EVEX-encoded instructions.48#[must_use]49pub fn evex(length: Length, tuple_type: TupleType) -> Evex {50Evex {51length,52pp: None,53mmm: None,54w: WBit::WIG,55opcode: u8::MAX,56modrm: None,57imm: Imm::None,58tuple_type,59}60}6162/// Enumerate the ways x64 encodes instructions.63pub enum Encoding {64Rex(Rex),65Vex(Vex),66Evex(Evex),67}6869impl Encoding {70/// Check that the encoding is valid for the given operands; this can find71/// issues earlier, before generating any Rust code.72pub fn validate(&self, operands: &[Operand]) {73match self {74Encoding::Rex(rex) => rex.validate(operands),75Encoding::Vex(vex) => vex.validate(operands),76Encoding::Evex(evex) => evex.validate(operands),77}78}7980/// Return the opcode for this encoding.81pub fn opcode(&self) -> u8 {82match self {83Encoding::Rex(rex) => rex.opcodes.opcode(),84Encoding::Vex(vex) => vex.opcode,85Encoding::Evex(evex) => evex.opcode,86}87}88}8990impl fmt::Display for Encoding {91fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {92match self {93Encoding::Rex(rex) => write!(f, "{rex}"),94Encoding::Vex(vex) => write!(f, "{vex}"),95Encoding::Evex(evex) => write!(f, "{evex}"),96}97}98}99100#[derive(Clone, Copy, PartialEq)]101pub enum ModRmKind {102/// Models `/digit`.103///104/// From the reference manual: "a digit between 0 and 7 indicates that the105/// ModR/M byte of the instruction uses only the r/m (register or memory)106/// operand. The reg field contains the digit that provides an extension to107/// the instruction's opcode."108Digit(u8),109110/// Models `/r`.111///112/// From the reference manual: "indicates that the ModR/M byte of the113/// instruction contains a register operand and an r/m operand."114Reg,115}116117impl ModRmKind {118/// Return the digit extending the opcode, if available.119#[must_use]120pub fn digit(&self) -> Option<u8> {121match self {122Self::Digit(digit) => Some(*digit),123_ => None,124}125}126127/// Return the digit extending the opcode.128///129/// # Panics130///131/// Panics if not extension was defined.132pub fn unwrap_digit(&self) -> u8 {133self.digit().expect("expected an extension digit")134}135}136137impl fmt::Display for ModRmKind {138fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {139match self {140ModRmKind::Digit(digit) => write!(f, "/{digit}"),141ModRmKind::Reg => write!(f, "/r"),142}143}144}145146/// The traditional x64 encoding.147///148/// We use the "REX" name here in a slightly unorthodox way: "REX" is the name149/// for the optional _byte_ extending the number of available registers, e.g.,150/// but we use it here to distinguish this from other encoding formats (e.g.,151/// VEX, EVEX). The "REX" _byte_ is still optional in this encoding and only152/// emitted when necessary.153pub struct Rex {154/// The opcodes for this instruction.155///156/// Multi-byte opcodes are handled by passing an array of opcodes (including157/// prefixes like `0x66` and escape bytes like `0x0f`) to the constructor.158/// E.g., `66 0F 54` (`ANDPD`) is expressed as follows:159///160/// ```161/// # use cranelift_assembler_x64_meta::dsl::rex;162/// let enc = rex([0x66, 0x0f, 0x54]);163/// ```164pub opcodes: Opcodes,165/// Indicates setting the REX.W bit.166///167/// From the reference manual: "Indicates the use of a REX prefix that168/// affects operand size or instruction semantics. The ordering of the REX169/// prefix and other optional/mandatory instruction prefixes are discussed170/// in chapter 2. Note that REX prefixes that promote legacy instructions to171/// 64-bit behavior are not listed explicitly in the opcode column."172pub w: WBit,173/// Indicates modifications to the ModR/M byte.174pub modrm: Option<ModRmKind>,175/// The number of bits used as an immediate operand to the instruction.176pub imm: Imm,177/// Used for `+rb`, `+rw`, `+rd`, and `+ro` instructions, which encode `reg`178/// bits in the opcode byte; if `Some`, this contains the expected bit width179/// of `reg`.180///181/// From the reference manual: "[...] the lower 3 bits of the opcode byte is182/// used to encode the register operand without a modR/M byte. The183/// instruction lists the corresponding hexadecimal value of the opcode byte184/// with low 3 bits as 000b. In non-64-bit mode, a register code, from 0185/// through 7, is added to the hexadecimal value of the opcode byte. In186/// 64-bit mode, indicates the four bit field of REX.b and opcode[2:0] field187/// encodes the register operand of the instruction. “+ro” is applicable188/// only in 64-bit mode."189pub opcode_mod: Option<OpcodeMod>,190}191192impl Rex {193/// Set the `REX.W` bit.194#[must_use]195pub fn w(self) -> Self {196Self {197w: WBit::W1,198..self199}200}201202/// Set the ModR/M byte to contain a register operand and an r/m operand;203/// equivalent to `/r` in the reference manual.204#[must_use]205pub fn r(self) -> Self {206Self {207modrm: Some(ModRmKind::Reg),208..self209}210}211212/// Set the digit extending the opcode; equivalent to `/<digit>` in the213/// reference manual.214///215/// # Panics216///217/// Panics if `extension` is too large.218#[must_use]219pub fn digit(self, extension: u8) -> Self {220assert!(extension <= 0b111, "must fit in 3 bits");221Self {222modrm: Some(ModRmKind::Digit(extension)),223..self224}225}226227/// Retrieve the digit extending the opcode, if available.228#[must_use]229pub fn unwrap_digit(&self) -> Option<u8> {230match self.modrm {231Some(ModRmKind::Digit(digit)) => Some(digit),232_ => None,233}234}235236/// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the237/// reference manual.238///239/// # Panics240///241/// Panics if an immediate operand is already set.242#[must_use]243pub fn ib(self) -> Self {244assert_eq!(self.imm, Imm::None);245Self {246imm: Imm::ib,247..self248}249}250251/// Append a word-sized immediate operand (16-bit); equivalent to `iw` in252/// the reference manual.253///254/// # Panics255///256/// Panics if an immediate operand is already set.257#[must_use]258pub fn iw(self) -> Self {259assert_eq!(self.imm, Imm::None);260Self {261imm: Imm::iw,262..self263}264}265266/// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`267/// in the reference manual.268///269/// # Panics270///271/// Panics if an immediate operand is already set.272#[must_use]273pub fn id(self) -> Self {274assert_eq!(self.imm, Imm::None);275Self {276imm: Imm::id,277..self278}279}280281/// Append a quadword-sized immediate operand (64-bit); equivalent to `io`282/// in the reference manual.283///284/// # Panics285///286/// Panics if an immediate operand is already set.287#[must_use]288pub fn io(self) -> Self {289assert_eq!(self.imm, Imm::None);290Self {291imm: Imm::io,292..self293}294}295296/// Modify the opcode byte with bits from an 8-bit `reg`; equivalent to297/// `+rb` in the reference manual.298#[must_use]299pub fn rb(self) -> Self {300Self {301opcode_mod: Some(OpcodeMod::rb),302..self303}304}305306/// Modify the opcode byte with bits from a 16-bit `reg`; equivalent to307/// `+rw` in the reference manual.308#[must_use]309pub fn rw(self) -> Self {310Self {311opcode_mod: Some(OpcodeMod::rw),312..self313}314}315316/// Modify the opcode byte with bits from a 32-bit `reg`; equivalent to317/// `+rd` in the reference manual.318#[must_use]319pub fn rd(self) -> Self {320Self {321opcode_mod: Some(OpcodeMod::rd),322..self323}324}325326/// Modify the opcode byte with bits from a 64-bit `reg`; equivalent to327/// `+ro` in the reference manual.328#[must_use]329pub fn ro(self) -> Self {330Self {331opcode_mod: Some(OpcodeMod::ro),332..self333}334}335336/// Check a subset of the rules for valid encodings outlined in chapter 2,337/// _Instruction Format_, of the Intel® 64 and IA-32 Architectures Software338/// Developer’s Manual, Volume 2A.339fn validate(&self, operands: &[Operand]) {340if let Some(OperandKind::Imm(op)) = operands341.iter()342.map(|o| o.location.kind())343.find(|k| matches!(k, OperandKind::Imm(_)))344{345assert_eq!(346op.bits(),347self.imm.bits(),348"for an immediate, the encoding width must match the declared operand width"349);350}351352if let Some(opcode_mod) = &self.opcode_mod {353assert!(354self.opcodes.primary & 0b111 == 0,355"the lower three bits of the opcode byte should be 0"356);357assert!(358operands359.iter()360.all(|o| o.location.bits() == opcode_mod.bits().into()),361"the opcode modifier width must match the operand widths"362);363}364365assert!(!matches!(self.w, WBit::WIG));366}367}368369impl From<Rex> for Encoding {370fn from(rex: Rex) -> Encoding {371Encoding::Rex(rex)372}373}374375impl fmt::Display for Rex {376fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {377if let Some(group1) = &self.opcodes.prefixes.group1 {378write!(f, "{group1} + ")?;379}380if let Some(group2) = &self.opcodes.prefixes.group2 {381write!(f, "{group2} + ")?;382}383if let Some(group3) = &self.opcodes.prefixes.group3 {384write!(f, "{group3} + ")?;385}386if let Some(group4) = &self.opcodes.prefixes.group4 {387write!(f, "{group4} + ")?;388}389if self.w.as_bool() {390write!(f, "REX.W + ")?;391}392if self.opcodes.escape {393write!(f, "0x0F + ")?;394}395write!(f, "{:#04X}", self.opcodes.primary)?;396if let Some(secondary) = self.opcodes.secondary {397write!(f, " {secondary:#04X}")?;398}399if let Some(modrm) = self.modrm {400write!(f, " {modrm}")?;401}402if let Some(opcode_mod) = &self.opcode_mod {403write!(f, " {opcode_mod}")?;404}405if self.imm != Imm::None {406write!(f, " {}", self.imm)?;407}408Ok(())409}410}411412/// Describe an instruction's opcodes. From section 2.1.2 "Opcodes" in the413/// reference manual:414///415/// > A primary opcode can be 1, 2, or 3 bytes in length. An additional 3-bit416/// > opcode field is sometimes encoded in the ModR/M byte. Smaller fields can417/// > be defined within the primary opcode. Such fields define the direction of418/// > operation, size of displacements, register encoding, condition codes, or419/// > sign extension. Encoding fields used by an opcode vary depending on the420/// > class of operation.421/// >422/// > Two-byte opcode formats for general-purpose and SIMD instructions consist423/// > of one of the following:424/// > - An escape opcode byte `0FH` as the primary opcode and a second opcode425/// > byte.426/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, and427/// > a second opcode byte (same as previous bullet).428/// >429/// > For example, `CVTDQ2PD` consists of the following sequence: `F3 0F E6`.430/// > The first byte is a mandatory prefix (it is not considered as a repeat431/// > prefix).432/// >433/// > Three-byte opcode formats for general-purpose and SIMD instructions434/// > consist of one of the following:435/// > - An escape opcode byte `0FH` as the primary opcode, plus two additional436/// > opcode bytes.437/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, plus438/// > two additional opcode bytes (same as previous bullet).439/// >440/// > For example, `PHADDW` for XMM registers consists of the following441/// > sequence: `66 0F 38 01`. The first byte is the mandatory prefix.442pub struct Opcodes {443/// The prefix bytes for this instruction.444pub prefixes: Prefixes,445/// Indicates the use of an escape opcode byte, `0x0f`.446pub escape: bool,447/// The primary opcode.448pub primary: u8,449/// Some instructions (e.g., SIMD) may have a secondary opcode.450pub secondary: Option<u8>,451}452453impl Opcodes {454/// Return the main opcode for this instruction.455///456/// Note that [`Rex`]-encoded instructions have a complex opcode scheme (see457/// [`Opcodes`] documentation); the opcode one is usually looking for is the458/// last one. This returns the last opcode: the secondary opcode if one is459/// available and the primary otherwise.460fn opcode(&self) -> u8 {461if let Some(secondary) = self.secondary {462secondary463} else {464self.primary465}466}467}468469impl From<u8> for Opcodes {470fn from(primary: u8) -> Opcodes {471Opcodes {472prefixes: Prefixes::default(),473escape: false,474primary,475secondary: None,476}477}478}479480impl<const N: usize> From<[u8; N]> for Opcodes {481fn from(bytes: [u8; N]) -> Self {482let (prefixes, remaining) = Prefixes::parse(&bytes);483let (escape, primary, secondary) = match remaining {484[primary] => (false, *primary, None),485[0x0f, primary] => (true, *primary, None),486[0x0f, primary, secondary] => (true, *primary, Some(*secondary)),487_ => panic!(488"invalid opcodes after prefix; expected [opcode], [0x0f, opcode], or [0x0f, opcode, opcode], found {remaining:x?}"489),490};491Self {492prefixes,493escape,494primary,495secondary,496}497}498}499500/// The allowed prefixes for an instruction. From the reference manual (section501/// 2.1.1):502///503/// > Instruction prefixes are divided into four groups, each with a set of504/// > allowable prefix codes. For each instruction, it is only useful to include505/// > up to one prefix code from each of the four groups (Groups 1, 2, 3, 4).506/// > Groups 1 through 4 may be placed in any order relative to each other.507#[derive(Default)]508pub struct Prefixes {509pub group1: Option<Group1Prefix>,510pub group2: Option<Group2Prefix>,511pub group3: Option<Group3Prefix>,512pub group4: Option<Group4Prefix>,513}514515impl Prefixes {516/// Parse a slice of `bytes` into a set of prefixes, returning both the517/// configured [`Prefixes`] as well as any remaining bytes.518fn parse(mut bytes: &[u8]) -> (Self, &[u8]) {519let mut prefixes = Self::default();520while !bytes.is_empty() && prefixes.try_assign(bytes[0]).is_ok() {521bytes = &bytes[1..];522}523(prefixes, bytes)524}525526/// Attempt to parse a `byte` as a prefix and, if successful, assigns it to527/// the correct prefix group.528///529/// # Panics530///531/// This function panics if the prefix for a group is already set; this532/// disallows specifying multiple prefixes per group.533fn try_assign(&mut self, byte: u8) -> Result<(), ()> {534if let Ok(p) = Group1Prefix::try_from(byte) {535assert!(self.group1.is_none());536self.group1 = Some(p);537Ok(())538} else if let Ok(p) = Group2Prefix::try_from(byte) {539assert!(self.group2.is_none());540self.group2 = Some(p);541Ok(())542} else if let Ok(p) = Group3Prefix::try_from(byte) {543assert!(self.group3.is_none());544self.group3 = Some(p);545Ok(())546} else if let Ok(p) = Group4Prefix::try_from(byte) {547assert!(self.group4.is_none());548self.group4 = Some(p);549Ok(())550} else {551Err(())552}553}554555/// Check if any prefix is present.556pub fn is_empty(&self) -> bool {557self.group1.is_none()558&& self.group2.is_none()559&& self.group3.is_none()560&& self.group4.is_none()561}562}563564pub enum Group1Prefix {565/// The LOCK prefix (`0xf0`). From the reference manual:566///567/// > The LOCK prefix (F0H) forces an operation that ensures exclusive use568/// > of shared memory in a multiprocessor environment. See "LOCK—Assert569/// > LOCK# Signal Prefix" in Chapter 3, Instruction Set Reference, A-L, for570/// > a description of this prefix.571Lock,572/// A REPNE/REPNZ prefix (`0xf2`) or a BND prefix under certain conditions.573/// `REP*` prefixes apply only to string and input/output instructions but574/// can be used as mandatory prefixes in other kinds of instructions (e.g.,575/// SIMD) From the reference manual:576///577/// > Repeat prefixes (F2H, F3H) cause an instruction to be repeated for578/// > each element of a string. Use these prefixes only with string and I/O579/// > instructions (MOVS, CMPS, SCAS, LODS, STOS, INS, and OUTS). Use of580/// > repeat prefixes and/or undefined opcodes with other Intel 64 or IA-32581/// > instructions is reserved; such use may cause unpredictable behavior.582/// >583/// > Some instructions may use F2H, F3H as a mandatory prefix to express584/// > distinct functionality.585REPNorBND,586/// A REPE/REPZ prefix (`0xf3`); `REP*` prefixes apply only to string and587/// input/output instructions but can be used as mandatory prefixes in other588/// kinds of instructions (e.g., SIMD). See `REPNorBND` for more details.589REP_,590}591592impl TryFrom<u8> for Group1Prefix {593type Error = u8;594fn try_from(byte: u8) -> Result<Self, Self::Error> {595Ok(match byte {5960xF0 => Group1Prefix::Lock,5970xF2 => Group1Prefix::REPNorBND,5980xF3 => Group1Prefix::REP_,599byte => return Err(byte),600})601}602}603604impl fmt::Display for Group1Prefix {605fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {606match self {607Group1Prefix::Lock => write!(f, "0xF0"),608Group1Prefix::REPNorBND => write!(f, "0xF2"),609Group1Prefix::REP_ => write!(f, "0xF3"),610}611}612}613614/// Contains the segment override prefixes or a (deprecated) branch hint when615/// used on a `Jcc` instruction. Note that using the segment override prefixes616/// on a branch instruction is reserved. See section 2.1.1, "Instruction617/// Prefixes," in the reference manual.618pub enum Group2Prefix {619/// The CS segment override prefix (`0x2e`); also the "branch not taken"620/// hint.621CSorBNT,622/// The SS segment override prefix (`0x36`).623SS,624/// The DS segment override prefix (`0x3e`); also the "branch taken" hint.625DSorBT,626/// The ES segment override prefix (`0x26`).627ES,628/// The FS segment override prefix (`0x64`).629FS,630/// The GS segment override prefix (`0x65`).631GS,632}633634impl TryFrom<u8> for Group2Prefix {635type Error = u8;636fn try_from(byte: u8) -> Result<Self, Self::Error> {637Ok(match byte {6380x2E => Group2Prefix::CSorBNT,6390x36 => Group2Prefix::SS,6400x3E => Group2Prefix::DSorBT,6410x26 => Group2Prefix::ES,6420x64 => Group2Prefix::FS,6430x65 => Group2Prefix::GS,644byte => return Err(byte),645})646}647}648649impl fmt::Display for Group2Prefix {650fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {651match self {652Group2Prefix::CSorBNT => write!(f, "0x2E"),653Group2Prefix::SS => write!(f, "0x36"),654Group2Prefix::DSorBT => write!(f, "0x3E"),655Group2Prefix::ES => write!(f, "0x26"),656Group2Prefix::FS => write!(f, "0x64"),657Group2Prefix::GS => write!(f, "0x65"),658}659}660}661662/// Contains the operand-size override prefix (`0x66`); also used as a SIMD663/// prefix. From the reference manual:664///665/// > The operand-size override prefix allows a program to switch between 16-666/// > and 32-bit operand sizes. Either size can be the default; use of the667/// > prefix selects the non-default size. Some SSE2/SSE3/SSSE3/SSE4668/// > instructions and instructions using a three-byte sequence of primary669/// > opcode bytes may use 66H as a mandatory prefix to express distinct670/// > functionality.671pub enum Group3Prefix {672OperandSizeOverride,673}674675impl TryFrom<u8> for Group3Prefix {676type Error = u8;677fn try_from(byte: u8) -> Result<Self, Self::Error> {678Ok(match byte {6790x66 => Group3Prefix::OperandSizeOverride,680byte => return Err(byte),681})682}683}684685impl fmt::Display for Group3Prefix {686fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {687match self {688Group3Prefix::OperandSizeOverride => write!(f, "0x66"),689}690}691}692693/// Contains the address-size override prefix (`0x67`). From the reference694/// manual:695///696/// > The address-size override prefix (67H) allows programs to switch between697/// > 16- and 32-bit addressing. Either size can be the default; the prefix698/// > selects the non-default size.699pub enum Group4Prefix {700AddressSizeOverride,701}702703impl TryFrom<u8> for Group4Prefix {704type Error = u8;705fn try_from(byte: u8) -> Result<Self, Self::Error> {706Ok(match byte {7070x67 => Group4Prefix::AddressSizeOverride,708byte => return Err(byte),709})710}711}712713impl fmt::Display for Group4Prefix {714fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {715match self {716Group4Prefix::AddressSizeOverride => write!(f, "0x67"),717}718}719}720721/// Indicate the size of an immediate operand. From the reference manual:722///723/// > A 1-byte (ib), 2-byte (iw), 4-byte (id) or 8-byte (io) immediate operand724/// > to the instruction that follows the opcode, ModR/M bytes or scale-indexing725/// > bytes. The opcode determines if the operand is a signed value. All words,726/// > doublewords, and quadwords are given with the low-order byte first.727#[derive(Debug, PartialEq)]728#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]729pub enum Imm {730None,731ib,732iw,733id,734io,735}736737impl Imm {738fn bits(&self) -> u16 {739match self {740Self::None => 0,741Self::ib => 8,742Self::iw => 16,743Self::id => 32,744Self::io => 64,745}746}747}748749impl fmt::Display for Imm {750fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {751match self {752Self::None => write!(f, ""),753Self::ib => write!(f, "ib"),754Self::iw => write!(f, "iw"),755Self::id => write!(f, "id"),756Self::io => write!(f, "io"),757}758}759}760761/// Indicate the size of the `reg` used when modifying the lower three bits of762/// the opcode byte; this corresponds to the `+rb`, `+rw`, `+rd`, and `+ro`763/// modifiers in the reference manual.764///765/// ```766/// # use cranelift_assembler_x64_meta::dsl::{rex};767/// // The `bswap` instruction extends the opcode byte:768/// let enc = rex([0x0F, 0xC8]).rd();769/// assert_eq!(enc.to_string(), "0x0F + 0xC8 +rd");770/// ```771#[derive(Clone, Copy, Debug, PartialEq)]772#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]773pub enum OpcodeMod {774rb,775rw,776rd,777ro,778}779780impl OpcodeMod {781fn bits(&self) -> u8 {782match self {783Self::rb => 8,784Self::rw => 16,785Self::rd => 32,786Self::ro => 64,787}788}789}790791impl fmt::Display for OpcodeMod {792fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {793match self {794Self::rb => write!(f, "+rb"),795Self::rw => write!(f, "+rw"),796Self::rd => write!(f, "+rd"),797Self::ro => write!(f, "+ro"),798}799}800}801802/// Contains the legacy prefixes allowed for VEX-encoded instructions.803///804/// VEX encodes a subset of [`Group1Prefix`] and `0x66` (see [`Group3Prefix`])805/// as part of the `pp` bit field.806#[derive(Clone, Copy, PartialEq)]807pub enum VexPrefix {808_66,809_F2,810_F3,811}812813impl VexPrefix {814/// Encode the `pp` bits.815#[inline(always)]816pub(crate) fn bits(self) -> u8 {817match self {818Self::_66 => 0b01,819Self::_F3 => 0b10,820Self::_F2 => 0b11,821}822}823}824825impl fmt::Display for VexPrefix {826fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {827match self {828Self::_66 => write!(f, "66"),829Self::_F3 => write!(f, "F3"),830Self::_F2 => write!(f, "F2"),831}832}833}834835/// Contains the escape sequences allowed for VEX-encoded instructions.836///837/// VEX encodes these in the `mmmmmm` bit field.838#[derive(Clone, Copy, PartialEq)]839pub enum VexEscape {840_0F,841_0F3A,842_0F38,843}844845impl VexEscape {846/// Encode the `m-mmmm` bits.847#[inline(always)]848pub(crate) fn bits(&self) -> u8 {849match self {850Self::_0F => 0b01,851Self::_0F38 => 0b10,852Self::_0F3A => 0b11,853}854}855}856857impl fmt::Display for VexEscape {858fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {859match self {860Self::_0F => write!(f, "0F"),861Self::_0F3A => write!(f, "0F3A"),862Self::_0F38 => write!(f, "0F38"),863}864}865}866867/// Contains vector length definitions.868///869/// VEX encodes these in the `L` bit field, a single bit with `128-bit = 0` and870/// `256-bit = 1`. For convenience, we also include the `LIG` and `LZ` syntax,871/// used by the reference manual, and always set these to `0`.872///873/// EVEX encodes this in the `L'L` bits, two bits that typically indicate the874/// vector length for packed vector instructions but can also be used for875/// rounding control for floating-point instructions with rounding semantics876/// (see section 2.7.1 in the reference manual).877pub enum Length {878/// 128-bit vector length.879L128,880/// 256-bit vector length.881L256,882/// 512-bit vector length; invalid for VEX instructions.883L512,884/// Force the length bits to `0`, but not necessarily for 128-bit operation.885/// From the reference manual: "The VEX.L must be encoded to be 0B, an #UD886/// occurs if VEX.L is not zero."887LZ,888/// The length bits are ignored (e.g., for floating point scalar889/// instructions). This assembler will emit `0`.890LIG,891}892893impl Length {894/// Encode the `VEX.L` bit.895pub fn vex_bits(&self) -> u8 {896match self {897Self::L128 | Self::LIG | Self::LZ => 0b0,898Self::L256 => 0b1,899Self::L512 => unreachable!("VEX does not support 512-bit vector length"),900}901}902903/// Encode the `EVEX.L'L` bits.904///905/// See section 2.7.10, Vector Length Orthogonality, in the reference manual906pub fn evex_bits(&self) -> u8 {907match self {908Self::L128 | Self::LIG | Self::LZ => 0b00,909Self::L256 => 0b01,910Self::L512 => 0b10,911}912}913}914915impl fmt::Display for Length {916fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {917match self {918Self::L128 => write!(f, "128"),919Self::L256 => write!(f, "256"),920Self::L512 => write!(f, "512"),921Self::LIG => write!(f, "LIG"),922Self::LZ => write!(f, "LZ"),923}924}925}926927/// Model the `W` bit.928pub enum WBit {929/// The `W` bit is ignored; equivalent to `.WIG` in the manual.930WIG,931/// The `W` bit is set to `0`; equivalent to `.W0` in the manual.932W0,933/// The `W` bit is set to `1`; equivalent to `.W1` in the manual.934W1,935}936937impl WBit {938/// Return `true` if the `W` bit is ignored; this is useful to check in the939/// DSL for the default case.940fn is_ignored(&self) -> bool {941match self {942Self::WIG => true,943Self::W0 | Self::W1 => false,944}945}946947/// Return `true` if the `W` bit is set (`W1`); otherwise, return `false`.948pub(crate) fn as_bool(&self) -> bool {949match self {950Self::W1 => true,951Self::W0 | Self::WIG => false,952}953}954}955956impl fmt::Display for WBit {957fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {958match self {959Self::WIG => write!(f, "WIG"),960Self::W0 => write!(f, "W0"),961Self::W1 => write!(f, "W1"),962}963}964}965966/// The VEX encoding, introduced for AVX instructions.967///968/// ```969/// # use cranelift_assembler_x64_meta::dsl::{vex, Length::L128};970/// // To encode a BLENDPD instruction in the manual: VEX.128.66.0F3A.WIG 0D /r ib971/// let enc = vex(L128)._66()._0f3a().wig().op(0x0D).r().ib();972/// assert_eq!(enc.to_string(), "VEX.128.66.0F3A.WIG 0x0D /r ib");973/// ```974pub struct Vex {975/// The length of the operand (e.g., 128-bit or 256-bit).976pub length: Length,977/// Any SIMD prefixes, but encoded in the `VEX.pp` bit field.978pub pp: Option<VexPrefix>,979/// Any leading map bytes, but encoded in the `VEX.mmmmm` bit field.980pub mmmmm: Option<VexEscape>,981/// The `W` bit.982pub w: WBit,983/// VEX-encoded instructions have a single-byte opcode. Other prefix-related984/// bytes (see [`Opcodes`]) are encoded in the VEX prefixes (see `pp`,985/// `mmmmmm`). From the reference manual: "One (and only one) opcode byte986/// follows the 2 or 3 byte VEX."987pub opcode: u8,988/// See [`Rex.modrm`](Rex.modrm).989pub modrm: Option<ModRmKind>,990/// See [`Rex.imm`](Rex.imm).991pub imm: Imm,992/// See [`Vex::is4`]993pub is4: bool,994}995996impl Vex {997/// Set the `pp` field to use [`VexPrefix::_66`]; equivalent to `.66` in the998/// manual.999pub fn _66(self) -> Self {1000assert!(self.pp.is_none());1001Self {1002pp: Some(VexPrefix::_66),1003..self1004}1005}10061007/// Set the `pp` field to use [`VexPrefix::_F2`]; equivalent to `.F2` in the1008/// manual.1009pub fn _f2(self) -> Self {1010assert!(self.pp.is_none());1011Self {1012pp: Some(VexPrefix::_F2),1013..self1014}1015}10161017/// Set the `pp` field to use [`VexPrefix::_F3`]; equivalent to `.F3` in the1018/// manual.1019pub fn _f3(self) -> Self {1020assert!(self.pp.is_none());1021Self {1022pp: Some(VexPrefix::_F3),1023..self1024}1025}10261027/// Set the `mmmmmm` field to use [`VexEscape::_0F`]; equivalent to `.0F` in1028/// the manual.1029pub fn _0f(self) -> Self {1030assert!(self.mmmmm.is_none());1031Self {1032mmmmm: Some(VexEscape::_0F),1033..self1034}1035}10361037/// Set the `mmmmmm` field to use [`VexEscape::_0F3A`]; equivalent to1038/// `.0F3A` in the manual.1039pub fn _0f3a(self) -> Self {1040assert!(self.mmmmm.is_none());1041Self {1042mmmmm: Some(VexEscape::_0F3A),1043..self1044}1045}10461047/// Set the `mmmmmm` field to use [`VexEscape::_0F38`]; equivalent to1048/// `.0F38` in the manual.1049pub fn _0f38(self) -> Self {1050assert!(self.mmmmm.is_none());1051Self {1052mmmmm: Some(VexEscape::_0F38),1053..self1054}1055}10561057/// Set the `W` bit to `0`; equivalent to `.W0` in the manual.1058pub fn w0(self) -> Self {1059assert!(self.w.is_ignored());1060Self {1061w: WBit::W0,1062..self1063}1064}10651066/// Set the `W` bit to `1`; equivalent to `.W1` in the manual.1067pub fn w1(self) -> Self {1068assert!(self.w.is_ignored());1069Self {1070w: WBit::W1,1071..self1072}1073}10741075/// Ignore the `W` bit; equivalent to `.WIG` in the manual.1076pub fn wig(self) -> Self {1077assert!(self.w.is_ignored());1078Self {1079w: WBit::WIG,1080..self1081}1082}10831084/// Set the single opcode for this VEX-encoded instruction.1085pub fn op(self, opcode: u8) -> Self {1086assert_eq!(self.opcode, u8::MAX);1087Self { opcode, ..self }1088}10891090/// Set the ModR/M byte to contain a register operand; see [`Rex::r`].1091pub fn r(self) -> Self {1092assert!(self.modrm.is_none());1093Self {1094modrm: Some(ModRmKind::Reg),1095..self1096}1097}10981099/// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the1100/// reference manual.1101///1102/// # Panics1103///1104/// Panics if an immediate operand is already set.1105#[must_use]1106pub fn ib(self) -> Self {1107assert_eq!(self.imm, Imm::None);1108Self {1109imm: Imm::ib,1110..self1111}1112}11131114/// Append a word-sized immediate operand (16-bit); equivalent to `iw` in1115/// the reference manual.1116///1117/// # Panics1118///1119/// Panics if an immediate operand is already set.1120#[must_use]1121pub fn iw(self) -> Self {1122assert_eq!(self.imm, Imm::None);1123Self {1124imm: Imm::iw,1125..self1126}1127}11281129/// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`1130/// in the reference manual.1131///1132/// # Panics1133///1134/// Panics if an immediate operand is already set.1135#[must_use]1136pub fn id(self) -> Self {1137assert_eq!(self.imm, Imm::None);1138Self {1139imm: Imm::id,1140..self1141}1142}11431144/// Append a quadword-sized immediate operand (64-bit); equivalent to `io`1145/// in the reference manual.1146///1147/// # Panics1148///1149/// Panics if an immediate operand is already set.1150#[must_use]1151pub fn io(self) -> Self {1152assert_eq!(self.imm, Imm::None);1153Self {1154imm: Imm::io,1155..self1156}1157}11581159/// Set the digit extending the opcode; equivalent to `/<digit>` in the1160/// reference manual.1161///1162/// # Panics1163///1164/// Panics if `extension` is too large.1165#[must_use]1166pub fn digit(self, extension: u8) -> Self {1167assert!(extension <= 0b111, "must fit in 3 bits");1168Self {1169modrm: Some(ModRmKind::Digit(extension)),1170..self1171}1172}11731174/// An 8-bit immediate byte is present containing a source register1175/// specifier in either imm8[7:4] (for 64-bit1176/// mode) or imm8[6:4] (for 32-bit mode), and instruction-specific payload1177/// in imm8[3:0].1178pub fn is4(self) -> Self {1179Self { is4: true, ..self }1180}11811182fn validate(&self, _operands: &[Operand]) {1183assert!(self.opcode != u8::MAX);1184assert!(self.mmmmm.is_some());1185assert!(!matches!(self.length, Length::L512));1186}11871188/// Retrieve the digit extending the opcode, if available.1189#[must_use]1190pub fn unwrap_digit(&self) -> Option<u8> {1191match self.modrm {1192Some(ModRmKind::Digit(digit)) => Some(digit),1193_ => None,1194}1195}1196}11971198impl From<Vex> for Encoding {1199fn from(vex: Vex) -> Encoding {1200Encoding::Vex(vex)1201}1202}12031204impl fmt::Display for Vex {1205fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {1206write!(f, "VEX.{}", self.length)?;1207if let Some(pp) = self.pp {1208write!(f, ".{pp}")?;1209}1210if let Some(mmmmm) = self.mmmmm {1211write!(f, ".{mmmmm}")?;1212}1213write!(f, ".{} {:#04X}", self.w, self.opcode)?;1214if let Some(modrm) = self.modrm {1215write!(f, " {modrm}")?;1216}1217if self.imm != Imm::None {1218write!(f, " {}", self.imm)?;1219}1220Ok(())1221}1222}12231224pub struct Evex {1225/// The vector length of the operand (e.g., 128-bit, 256-bit, or 512-bit).1226pub length: Length,1227/// Any SIMD prefixes, but encoded in the `EVEX.pp` bit field (see similar:1228/// [`Vex::pp`]).1229pub pp: Option<VexPrefix>,1230/// The `mmm` bits.1231///1232/// Bits `1:0` are identical to the lowest 2 bits of `VEX.mmmmm`; EVEX adds1233/// one more bit here. From the reference manual: "provides access to up to1234/// eight decoding maps. Currently, only the following decoding maps are1235/// supported: 1, 2, 3, 5, and 6. Map ids 1, 2, and 3 are denoted by 0F,1236/// 0F38, and 0F3A, respectively, in the instruction encoding descriptions."1237pub mmm: Option<VexEscape>,1238/// The `W` bit.1239pub w: WBit,1240/// EVEX-encoded instructions opcode byte"1241pub opcode: u8,1242/// See [`Rex.modrm`](Rex.modrm).1243pub modrm: Option<ModRmKind>,1244/// See [`Rex.imm`](Rex.imm).1245pub imm: Imm,1246/// The "Tuple Type" corresponding to scaling of the 8-bit displacement1247/// parameter for memory operands. See [`TupleType`] for more information.1248pub tuple_type: TupleType,1249}12501251impl Evex {1252/// Set the `pp` field to use [`VexPrefix::_66`]; equivalent to `.66` in the1253/// manual.1254pub fn _66(self) -> Self {1255assert!(self.pp.is_none());1256Self {1257pp: Some(VexPrefix::_66),1258..self1259}1260}12611262/// Set the `pp` field to use [`VexPrefix::_F2`]; equivalent to `.F2` in the1263/// manual.1264pub fn _f2(self) -> Self {1265assert!(self.pp.is_none());1266Self {1267pp: Some(VexPrefix::_F2),1268..self1269}1270}12711272/// Set the `pp` field to use [`VexPrefix::_F3`]; equivalent to `.F3` in the1273/// manual.1274pub fn _f3(self) -> Self {1275assert!(self.pp.is_none());1276Self {1277pp: Some(VexPrefix::_F3),1278..self1279}1280}12811282/// Set the `mmmmmm` field to use [`VexEscape::_0F`]; equivalent to `.0F` in1283/// the manual.1284pub fn _0f(self) -> Self {1285assert!(self.mmm.is_none());1286Self {1287mmm: Some(VexEscape::_0F),1288..self1289}1290}12911292/// Set the `mmmmmm` field to use [`VexEscape::_0F3A`]; equivalent to1293/// `.0F3A` in the manual.1294pub fn _0f3a(self) -> Self {1295assert!(self.mmm.is_none());1296Self {1297mmm: Some(VexEscape::_0F3A),1298..self1299}1300}13011302/// Set the `mmmmmm` field to use [`VexEscape::_0F38`]; equivalent to1303/// `.0F38` in the manual.1304pub fn _0f38(self) -> Self {1305assert!(self.mmm.is_none());1306Self {1307mmm: Some(VexEscape::_0F38),1308..self1309}1310}13111312/// Set the `W` bit to `0`; equivalent to `.W0` in the manual.1313pub fn w0(self) -> Self {1314assert!(self.w.is_ignored());1315Self {1316w: WBit::W0,1317..self1318}1319}13201321/// Set the `W` bit to `1`; equivalent to `.W1` in the manual.1322pub fn w1(self) -> Self {1323assert!(self.w.is_ignored());1324Self {1325w: WBit::W1,1326..self1327}1328}13291330/// Ignore the `W` bit; equivalent to `.WIG` in the manual.1331pub fn wig(self) -> Self {1332assert!(self.w.is_ignored());1333Self {1334w: WBit::WIG,1335..self1336}1337}13381339/// Set the single opcode for this VEX-encoded instruction.1340pub fn op(self, opcode: u8) -> Self {1341assert_eq!(self.opcode, u8::MAX);1342Self { opcode, ..self }1343}13441345/// Set the ModR/M byte to contain a register operand; see [`Rex::r`].1346pub fn r(self) -> Self {1347assert!(self.modrm.is_none());1348Self {1349modrm: Some(ModRmKind::Reg),1350..self1351}1352}13531354fn validate(&self, _operands: &[Operand]) {1355assert!(self.opcode != u8::MAX);1356assert!(self.mmm.is_some());1357}13581359/// Retrieve the digit extending the opcode, if available.1360#[must_use]1361pub fn unwrap_digit(&self) -> Option<u8> {1362match self.modrm {1363Some(ModRmKind::Digit(digit)) => Some(digit),1364_ => None,1365}1366}13671368/// Set the digit extending the opcode; equivalent to `/<digit>` in the1369/// reference manual.1370///1371/// # Panics1372///1373/// Panics if `extension` is too large.1374#[must_use]1375pub fn digit(self, extension: u8) -> Self {1376assert!(extension <= 0b111, "must fit in 3 bits");1377Self {1378modrm: Some(ModRmKind::Digit(extension)),1379..self1380}1381}13821383/// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the1384/// reference manual.1385///1386/// # Panics1387///1388/// Panics if an immediate operand is already set.1389#[must_use]1390pub fn ib(self) -> Self {1391assert_eq!(self.imm, Imm::None);1392Self {1393imm: Imm::ib,1394..self1395}1396}1397}13981399impl From<Evex> for Encoding {1400fn from(evex: Evex) -> Encoding {1401Encoding::Evex(evex)1402}1403}14041405impl fmt::Display for Evex {1406fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {1407write!(f, "EVEX.{}", self.length)?;1408if let Some(pp) = self.pp {1409write!(f, ".{pp}")?;1410}1411if let Some(mmmmm) = self.mmm {1412write!(f, ".{mmmmm}")?;1413}1414write!(f, ".{} {:#04X}", self.w, self.opcode)?;1415if let Some(modrm) = self.modrm {1416write!(f, " {modrm}")?;1417}1418if self.imm != Imm::None {1419write!(f, " {}", self.imm)?;1420}1421Ok(())1422}1423}14241425/// Tuple Type definitions used in EVEX encodings.1426///1427/// This enumeration corresponds to table 2-34 and 2-35 in the Intel manual.1428/// This is a property of all instruction formats listed in the encoding table1429/// for each instruction.1430#[expect(missing_docs, reason = "matching manual names")]1431pub enum TupleType {1432Full,1433Half,1434FullMem,1435Tuple1Scalar,1436Tuple1Fixed,1437Tuple2,1438Tuple4,1439Tuple8,1440HalfMem,1441QuarterMem,1442EigthMem,1443Mem128,1444Movddup,1445}144614471448