Path: blob/main/cranelift/assembler-x64/meta/src/instructions/lanes.rs
1693 views
use crate::dsl::{Feature::*, Inst, Length::*, Location::*, TupleType::*};1use crate::dsl::{align, evex, fmt, inst, r, rex, rw, vex, w};23#[rustfmt::skip] // Keeps instructions on a single line.4pub fn list() -> Vec<Inst> {5// Note that `p{extr,ins}r{w,b}` below operate on 32-bit registers but a6// smaller-width memory location. This means that disassembly in Capstone7// doesn't match `rm8`, for example. For now pretend both of these are8// `rm32` to get disassembly matching Capstone.9let r32m8 = rm32;10let r32m16 = rm32;1112vec![13// Extract from a single XMM lane.14inst("extractps", fmt("A", [w(rm32), r(xmm1), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x17]).r().ib(), (_64b | compat) & sse41).alt(avx, "vextractps_b"),15inst("pextrb", fmt("A", [w(r32m8), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x14]).r().ib(), (_64b | compat) & sse41).alt(avx, "vpextrb_a"),16inst("pextrw", fmt("A", [w(r32), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0xC5]).r().ib(), (_64b | compat) & sse2).alt(avx, "vpextrw_a"),17inst("pextrw", fmt("B", [w(r32m16), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x15]).r().ib(), (_64b | compat) & sse41).alt(avx, "vpextrw_b"),18inst("pextrd", fmt("A", [w(rm32), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x16]).r().ib(), (_64b | compat) & sse41).alt(avx, "vpextrd_a"),19inst("pextrq", fmt("A", [w(rm64), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x16]).w().r().ib(), _64b & sse41).alt(avx, "vpextrq_a"),20inst("vextractps", fmt("B", [w(rm32), r(xmm1), r(imm8)]), vex(L128)._66()._0f3a().wig().op(0x17).r().ib(), (_64b | compat) & avx),21inst("vpextrb", fmt("A", [w(r32m8), r(xmm2), r(imm8)]), vex(L128)._66()._0f3a().w0().op(0x14).r().ib(), (_64b | compat) & avx),22inst("vpextrw", fmt("A", [w(r32), r(xmm2), r(imm8)]), vex(L128)._66()._0f().w0().op(0xC5).r().ib(), (_64b | compat) & avx),23inst("vpextrw", fmt("B", [w(r32m16), r(xmm2), r(imm8)]), vex(L128)._66()._0f3a().w0().op(0x15).r().ib(), (_64b | compat) & avx),24inst("vpextrd", fmt("A", [w(rm32), r(xmm2), r(imm8)]), vex(L128)._66()._0f3a().w0().op(0x16).r().ib(), (_64b | compat) & avx),25inst("vpextrq", fmt("A", [w(rm64), r(xmm2), r(imm8)]), vex(L128)._66()._0f3a().w1().op(0x16).r().ib(), (_64b | compat) & avx),2627// Insert into a single XMM lane.28inst("insertps", fmt("A", [rw(xmm1), r(xmm_m32), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x21]).r().ib(), (_64b | compat) & sse41).alt(avx, "vinsertps_b"),29inst("pinsrb", fmt("A", [rw(xmm1), r(r32m8), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x20]).r().ib(), (_64b | compat) & sse41),30inst("pinsrw", fmt("A", [rw(xmm1), r(r32m16), r(imm8)]), rex([0x66, 0x0F, 0xC4]).r().ib(), (_64b | compat) & sse2),31inst("pinsrd", fmt("A", [rw(xmm1), r(rm32), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x22]).r().ib(), (_64b | compat) & sse41),32inst("pinsrq", fmt("A", [rw(xmm1), r(rm64), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x22]).r().ib().w(), _64b & sse41),33inst("vinsertps", fmt("B", [w(xmm1), r(xmm2), r(xmm_m32), r(imm8)]), vex(L128)._66()._0f3a().wig().op(0x21).r().ib(), (_64b | compat) & avx),34inst("vpinsrb", fmt("B", [w(xmm1), r(xmm2), r(r32m8), r(imm8)]), vex(L128)._66()._0f3a().w0().op(0x20).r().ib(), (_64b | compat) & avx),35inst("vpinsrw", fmt("B", [w(xmm1), r(xmm2), r(r32m16), r(imm8)]), vex(L128)._66()._0f().w0().op(0xC4).r().ib(), (_64b | compat) & avx),36inst("vpinsrd", fmt("B", [w(xmm1), r(xmm2), r(rm32), r(imm8)]), vex(L128)._66()._0f3a().w0().op(0x22).r().ib(), (_64b | compat) & avx),37inst("vpinsrq", fmt("B", [w(xmm1), r(xmm2), r(rm64), r(imm8)]), vex(L128)._66()._0f3a().w1().op(0x22).r().ib(), _64b & avx),3839// Extract sign masks from the floating-point lanes.40inst("movmskps", fmt("RM", [w(r32), r(xmm2)]), rex([0x0F, 0x50]).r(), (_64b | compat) & sse).alt(avx, "vmovmskps_rm"),41inst("movmskpd", fmt("RM", [w(r32), r(xmm2)]), rex([0x66, 0x0F, 0x50]).r(), (_64b | compat) & sse2).alt(avx, "vmovmskpd_rm"),42inst("pmovmskb", fmt("RM", [w(r32), r(xmm2)]), rex([0x66, 0x0F, 0xD7]).r(), (_64b | compat) & sse2).alt(avx, "vpmovmskb_rm"),43inst("vmovmskps", fmt("RM", [w(r32), r(xmm2)]), vex(L128)._0f().op(0x50).r(), (_64b | compat) & avx),44inst("vmovmskpd", fmt("RM", [w(r32), r(xmm2)]), vex(L128)._66()._0f().op(0x50).r(), (_64b | compat) & avx),45inst("vpmovmskb", fmt("RM", [w(r32), r(xmm2)]), vex(L128)._66()._0f().op(0xD7).r(), (_64b | compat) & avx),4647// Move two lower 32-bit floats to the high two lanes.48inst("movhps", fmt("A", [rw(xmm1), r(m64)]), rex([0x0F, 0x16]).r(), (_64b | compat) & sse).alt(avx, "vmovhps_b"),49inst("movlhps", fmt("RM", [rw(xmm1), r(xmm2)]), rex([0x0F, 0x16]).r(), (_64b | compat) & sse).alt(avx, "vmovlhps_rvm"),50inst("vmovhps", fmt("B", [w(xmm2), r(xmm1), r(m64)]), vex(L128)._0f().op(0x16).r(), (_64b | compat) & avx),51inst("vmovlhps", fmt("RVM", [w(xmm1), r(xmm2), r(xmm3)]), vex(L128)._0f().op(0x16).r(), (_64b | compat) & avx),5253// Duplicate the lower 64 bits of the source into 128 bits of the destination.54inst("movddup", fmt("A", [w(xmm1), r(xmm_m64)]), rex([0xF2, 0x0F, 0x12]).r(), (_64b | compat) & sse3).alt(avx, "vmovddup_a"),55inst("vmovddup", fmt("A", [w(xmm1), r(xmm_m64)]), vex(L128)._f2()._0f().op(0x12).r(), (_64b | compat) & avx),5657// Blend lanes in various ways.58inst("pblendw", fmt("RMI", [rw(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x0E]).r().ib(), (_64b | compat) & sse41).alt(avx, "vpblendw_rvmi"),59inst("pblendvb", fmt("RM", [rw(xmm1), r(align(xmm_m128)), r(xmm0)]), rex([0x66, 0x0F, 0x38, 0x10]).r(), (_64b | compat) & sse41),60inst("blendvps", fmt("RM0", [rw(xmm1), r(align(xmm_m128)), r(xmm0)]), rex([0x66, 0x0F, 0x38, 0x14]).r(), (_64b | compat) & sse41),61inst("blendvpd", fmt("RM0", [rw(xmm1), r(align(xmm_m128)), r(xmm0)]), rex([0x66, 0x0F, 0x38, 0x15]).r(), (_64b | compat) & sse41),62inst("vpblendw", fmt("RVMI", [w(xmm1), r(xmm2), r(xmm_m128), r(imm8)]), vex(L128)._66()._0f3a().w0().op(0x0E).r().ib(), (_64b | compat) & avx),63inst("vpblendvb", fmt("RVMR", [w(xmm1), r(xmm2), r(xmm_m128), r(xmm3)]), vex(L128)._66()._0f3a().w0().op(0x4C).r().is4(), (_64b | compat) & avx),64inst("vblendvps", fmt("RVMR", [w(xmm1), r(xmm2), r(xmm_m128), r(xmm3)]), vex(L128)._66()._0f3a().w0().op(0x4A).r().is4(), (_64b | compat) & avx),65inst("vblendvpd", fmt("RVMR", [w(xmm1), r(xmm2), r(xmm_m128), r(xmm3)]), vex(L128)._66()._0f3a().w0().op(0x4B).r().is4(), (_64b | compat) & avx),6667// Shuffle lanes in various ways.68inst("shufpd", fmt("A", [rw(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0x66, 0x0F, 0xC6]).ib(), (_64b | compat) & sse2).alt(avx, "vshufpd_b"),69inst("vshufpd", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128), r(imm8)]), vex(L128)._66()._0f().ib().op(0xC6), (_64b | compat) & avx),70inst("shufps", fmt("A", [rw(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0x0F, 0xC6]).ib(), (_64b | compat) & sse).alt(avx, "vshufps_b"),71inst("vshufps", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128), r(imm8)]), vex(L128)._0f().ib().op(0xC6), (_64b | compat) & avx),72inst("pshufb", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x00]), (_64b | compat) & ssse3).alt(avx, "vpshufb_b"),73inst("pshufd", fmt("A", [w(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0x66, 0x0F, 0x70]).r().ib(), (_64b | compat) & sse2).alt(avx, "vpshufd_a"),74inst("pshuflw", fmt("A", [w(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0xF2, 0x0F, 0x70]).r().ib(), (_64b | compat) & sse2).alt(avx, "vpshuflw_a"),75inst("pshufhw", fmt("A", [w(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0xF3, 0x0F, 0x70]).r().ib(), (_64b | compat) & sse2).alt(avx, "vpshufhw_a"),76inst("vpshufb", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f38().op(0x00), (_64b | compat) & avx),77inst("vpshufd", fmt("A", [w(xmm1), r(xmm_m128), r(imm8)]), vex(L128)._66()._0f().op(0x70).r().ib(), (_64b | compat) & avx),78inst("vpshuflw", fmt("A", [w(xmm1), r(xmm_m128), r(imm8)]), vex(L128)._f2()._0f().op(0x70).r().ib(), (_64b | compat) & avx),79inst("vpshufhw", fmt("A", [w(xmm1), r(xmm_m128), r(imm8)]), vex(L128)._f3()._0f().op(0x70).r().ib(), (_64b | compat) & avx),8081// Broadcast a single lane to all lanes of the destination.82inst("vbroadcastss", fmt("A_M", [w(xmm1), r(m32)]), vex(L128)._66()._0f38().w0().op(0x18).r(), (_64b | compat) & avx),83inst("vbroadcastss", fmt("A_R", [w(xmm1), r(xmm2)]), vex(L128)._66()._0f38().w0().op(0x18).r(), (_64b | compat) & avx2),84inst("vpbroadcastb", fmt("A", [w(xmm1), r(xmm_m8)]), vex(L128)._66()._0f38().w0().op(0x78).r(), (_64b | compat) & avx2),85inst("vpbroadcastw", fmt("A", [w(xmm1), r(xmm_m16)]), vex(L128)._66()._0f38().w0().op(0x79).r(), (_64b | compat) & avx2),86inst("vpbroadcastd", fmt("A", [w(xmm1), r(xmm_m32)]), vex(L128)._66()._0f38().w0().op(0x58).r(), (_64b | compat) & avx2),87inst("vpbroadcastq", fmt("A", [w(xmm1), r(xmm_m64)]), vex(L128)._66()._0f38().w0().op(0x59).r(), (_64b | compat) & avx2),8889// AVX-512 permutations90inst("vpermi2b", fmt("A", [rw(xmm1), r(xmm2), r(xmm_m128)]), evex(L128, FullMem)._66()._0f38().w0().op(0x75).r(), (_64b | compat) & avx512vl & avx512vbmi),91]92}939495