CoCalc -- pma.rs

GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/assembler-x64/meta/src/instructions/pma.rs
¹⁶⁹³ views
1
use crate::dsl::{Feature::*, Inst, Length::*, Location::*};
2
use crate::dsl::{align, fmt, inst, r, rex, rw, vex, w};
3

4
#[rustfmt::skip] // Keeps instructions on a single line.
5
pub fn list() -> Vec<Inst> {
6
    vec![
7
        // Packed multiply-add instructions; from the manual: "Multiplies the
8
        // individual signed words of the destination operand (first operand) by
9
        // the corresponding signed words of the source operand (second
10
        // operand), producing temporary signed, doubleword results. The
11
        // adjacent doubleword results are then summed and stored in the
12
        // destination operand. For example, the corresponding low-order words
13
        // (15-0) and (31-16) in the source and destination operands are
14
        // multiplied by one another and the doubleword results are added
15
        // together and stored in the low doubleword of the destination register
16
        // (31-0). The same operation is performed on the other pairs of
17
        // adjacent words."
18
        inst("pmaddwd", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xF5]), (_64b | compat) & sse2).alt(avx, "vpmaddwd_b"),
19
        inst("vpmaddwd", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f().op(0xF5), (_64b | compat) & avx),
20
        // Like `pmaddwd`, but this "multiplies vertically each unsigned byte of
21
        // the destination operand (first operand) with the corresponding signed
22
        // byte of the source operand (second operand), producing intermediate
23
        // signed 16-bit integers. Each adjacent pair of signed words is added
24
        // and the saturated result is packed to the destination operand."
25
        inst("pmaddubsw", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x04]), (_64b | compat) & ssse3).alt(avx, "vpmaddubsw_b"),
26
        inst("vpmaddubsw", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f38().op(0x04), (_64b | compat) & avx),
27
     ]
28
}
29

30
Product

Resources

Company