Path: blob/main/cranelift/assembler-x64/meta/src/instructions/fma.rs
1693 views
use crate::dsl::{Feature::*, Inst, Length::*, Location::*};1use crate::dsl::{fmt, inst, r, rw, vex};23#[rustfmt::skip] // Keeps instructions on a single line.4pub fn list() -> Vec<Inst> {5let single_ops = [rw(xmm1), r(xmm2), r(xmm_m32)];6let double_ops = [rw(xmm1), r(xmm2), r(xmm_m64)];7let packed_ops = [rw(xmm1), r(xmm2), r(xmm_m128)];8let enc = || vex(LIG)._66()._0f38();9vec![10// Fused Multiply-Add (FMA) instructions. The digits in the instruction11// mnemonic correspond to the combination of operands (`op*`): e.g.,12// - `132` means `op1 * op2 + op3`,13// - `213` means `op2 * op1 + op3`, and14// - `231` means `op2 * op3 + op1`.15inst("vfmadd132ss", fmt("A", single_ops), enc().w0().op(0x99).r(), (_64b | compat) & fma),16inst("vfmadd213ss", fmt("A", single_ops), enc().w0().op(0xA9).r(), (_64b | compat) & fma),17inst("vfmadd231ss", fmt("A", single_ops), enc().w0().op(0xB9).r(), (_64b | compat) & fma),18inst("vfmadd132sd", fmt("A", double_ops), enc().w1().op(0x99).r(), (_64b | compat) & fma),19inst("vfmadd213sd", fmt("A", double_ops), enc().w1().op(0xA9).r(), (_64b | compat) & fma),20inst("vfmadd231sd", fmt("A", double_ops), enc().w1().op(0xB9).r(), (_64b | compat) & fma),21inst("vfmadd132ps", fmt("A", packed_ops), enc().w0().op(0x98).r(), (_64b | compat) & fma),22inst("vfmadd213ps", fmt("A", packed_ops), enc().w0().op(0xA8).r(), (_64b | compat) & fma),23inst("vfmadd231ps", fmt("A", packed_ops), enc().w0().op(0xB8).r(), (_64b | compat) & fma),24inst("vfmadd132pd", fmt("A", packed_ops), enc().w1().op(0x98).r(), (_64b | compat) & fma),25inst("vfmadd213pd", fmt("A", packed_ops), enc().w1().op(0xA8).r(), (_64b | compat) & fma),26inst("vfmadd231pd", fmt("A", packed_ops), enc().w1().op(0xB8).r(), (_64b | compat) & fma),27// Fused Negative Multiply-Add (FNMA); like FMA, but with the28// multiplication result negated.29inst("vfnmadd132ss", fmt("A", single_ops), enc().w0().op(0x9D).r(), (_64b | compat) & fma),30inst("vfnmadd213ss", fmt("A", single_ops), enc().w0().op(0xAD).r(), (_64b | compat) & fma),31inst("vfnmadd231ss", fmt("A", single_ops), enc().w0().op(0xBD).r(), (_64b | compat) & fma),32inst("vfnmadd132sd", fmt("A", double_ops), enc().w1().op(0x9D).r(), (_64b | compat) & fma),33inst("vfnmadd213sd", fmt("A", double_ops), enc().w1().op(0xAD).r(), (_64b | compat) & fma),34inst("vfnmadd231sd", fmt("A", double_ops), enc().w1().op(0xBD).r(), (_64b | compat) & fma),35inst("vfnmadd132ps", fmt("A", packed_ops), enc().w0().op(0x9C).r(), (_64b | compat) & fma),36inst("vfnmadd213ps", fmt("A", packed_ops), enc().w0().op(0xAC).r(), (_64b | compat) & fma),37inst("vfnmadd231ps", fmt("A", packed_ops), enc().w0().op(0xBC).r(), (_64b | compat) & fma),38inst("vfnmadd132pd", fmt("A", packed_ops), enc().w1().op(0x9C).r(), (_64b | compat) & fma),39inst("vfnmadd213pd", fmt("A", packed_ops), enc().w1().op(0xAC).r(), (_64b | compat) & fma),40inst("vfnmadd231pd", fmt("A", packed_ops), enc().w1().op(0xBC).r(), (_64b | compat) & fma),41// Fused Multiply-Subtract (FMS); like FMA, but subtracting42// from the multiplication result.43inst("vfmsub132ss", fmt("A", single_ops), enc().w0().op(0x9B).r(), (_64b | compat) & fma),44inst("vfmsub213ss", fmt("A", single_ops), enc().w0().op(0xAB).r(), (_64b | compat) & fma),45inst("vfmsub231ss", fmt("A", single_ops), enc().w0().op(0xBB).r(), (_64b | compat) & fma),46inst("vfmsub132sd", fmt("A", double_ops), enc().w1().op(0x9B).r(), (_64b | compat) & fma),47inst("vfmsub213sd", fmt("A", double_ops), enc().w1().op(0xAB).r(), (_64b | compat) & fma),48inst("vfmsub231sd", fmt("A", double_ops), enc().w1().op(0xBB).r(), (_64b | compat) & fma),49inst("vfmsub132ps", fmt("A", packed_ops), enc().w0().op(0x9A).r(), (_64b | compat) & fma),50inst("vfmsub213ps", fmt("A", packed_ops), enc().w0().op(0xAA).r(), (_64b | compat) & fma),51inst("vfmsub231ps", fmt("A", packed_ops), enc().w0().op(0xBA).r(), (_64b | compat) & fma),52inst("vfmsub132pd", fmt("A", packed_ops), enc().w1().op(0x9A).r(), (_64b | compat) & fma),53inst("vfmsub213pd", fmt("A", packed_ops), enc().w1().op(0xAA).r(), (_64b | compat) & fma),54inst("vfmsub231pd", fmt("A", packed_ops), enc().w1().op(0xBA).r(), (_64b | compat) & fma),55// Fused Negative Multiply-Subtract (FNMS).56inst("vfnmsub132ss", fmt("A", single_ops), enc().w0().op(0x9F).r(), (_64b | compat) & fma),57inst("vfnmsub213ss", fmt("A", single_ops), enc().w0().op(0xAF).r(), (_64b | compat) & fma),58inst("vfnmsub231ss", fmt("A", single_ops), enc().w0().op(0xBF).r(), (_64b | compat) & fma),59inst("vfnmsub132sd", fmt("A", double_ops), enc().w1().op(0x9F).r(), (_64b | compat) & fma),60inst("vfnmsub213sd", fmt("A", double_ops), enc().w1().op(0xAF).r(), (_64b | compat) & fma),61inst("vfnmsub231sd", fmt("A", double_ops), enc().w1().op(0xBF).r(), (_64b | compat) & fma),62inst("vfnmsub132ps", fmt("A", packed_ops), enc().w0().op(0x9E).r(), (_64b | compat) & fma),63inst("vfnmsub213ps", fmt("A", packed_ops), enc().w0().op(0xAE).r(), (_64b | compat) & fma),64inst("vfnmsub231ps", fmt("A", packed_ops), enc().w0().op(0xBE).r(), (_64b | compat) & fma),65inst("vfnmsub132pd", fmt("A", packed_ops), enc().w1().op(0x9E).r(), (_64b | compat) & fma),66inst("vfnmsub213pd", fmt("A", packed_ops), enc().w1().op(0xAE).r(), (_64b | compat) & fma),67inst("vfnmsub231pd", fmt("A", packed_ops), enc().w1().op(0xBE).r(), (_64b | compat) & fma),68]69}707172