Path: blob/main/cranelift/assembler-x64/meta/src/instructions/pma.rs
1693 views
use crate::dsl::{Feature::*, Inst, Length::*, Location::*};1use crate::dsl::{align, fmt, inst, r, rex, rw, vex, w};23#[rustfmt::skip] // Keeps instructions on a single line.4pub fn list() -> Vec<Inst> {5vec![6// Packed multiply-add instructions; from the manual: "Multiplies the7// individual signed words of the destination operand (first operand) by8// the corresponding signed words of the source operand (second9// operand), producing temporary signed, doubleword results. The10// adjacent doubleword results are then summed and stored in the11// destination operand. For example, the corresponding low-order words12// (15-0) and (31-16) in the source and destination operands are13// multiplied by one another and the doubleword results are added14// together and stored in the low doubleword of the destination register15// (31-0). The same operation is performed on the other pairs of16// adjacent words."17inst("pmaddwd", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xF5]), (_64b | compat) & sse2).alt(avx, "vpmaddwd_b"),18inst("vpmaddwd", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f().op(0xF5), (_64b | compat) & avx),19// Like `pmaddwd`, but this "multiplies vertically each unsigned byte of20// the destination operand (first operand) with the corresponding signed21// byte of the source operand (second operand), producing intermediate22// signed 16-bit integers. Each adjacent pair of signed words is added23// and the saturated result is packed to the destination operand."24inst("pmaddubsw", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x04]), (_64b | compat) & ssse3).alt(avx, "vpmaddubsw_b"),25inst("vpmaddubsw", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f38().op(0x04), (_64b | compat) & avx),26]27}282930