Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/assembler-x64/meta/src/instructions/pma.rs
1693 views
1
use crate::dsl::{Feature::*, Inst, Length::*, Location::*};
2
use crate::dsl::{align, fmt, inst, r, rex, rw, vex, w};
3
4
#[rustfmt::skip] // Keeps instructions on a single line.
5
pub fn list() -> Vec<Inst> {
6
vec![
7
// Packed multiply-add instructions; from the manual: "Multiplies the
8
// individual signed words of the destination operand (first operand) by
9
// the corresponding signed words of the source operand (second
10
// operand), producing temporary signed, doubleword results. The
11
// adjacent doubleword results are then summed and stored in the
12
// destination operand. For example, the corresponding low-order words
13
// (15-0) and (31-16) in the source and destination operands are
14
// multiplied by one another and the doubleword results are added
15
// together and stored in the low doubleword of the destination register
16
// (31-0). The same operation is performed on the other pairs of
17
// adjacent words."
18
inst("pmaddwd", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xF5]), (_64b | compat) & sse2).alt(avx, "vpmaddwd_b"),
19
inst("vpmaddwd", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f().op(0xF5), (_64b | compat) & avx),
20
// Like `pmaddwd`, but this "multiplies vertically each unsigned byte of
21
// the destination operand (first operand) with the corresponding signed
22
// byte of the source operand (second operand), producing intermediate
23
// signed 16-bit integers. Each adjacent pair of signed words is added
24
// and the saturated result is packed to the destination operand."
25
inst("pmaddubsw", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x04]), (_64b | compat) & ssse3).alt(avx, "vpmaddubsw_b"),
26
inst("vpmaddubsw", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f38().op(0x04), (_64b | compat) & avx),
27
]
28
}
29
30