Path: blob/main/cranelift/assembler-x64/meta/src/instructions/mul.rs
1693 views
use crate::dsl::{Customization::*, Feature::*, Inst, Length::*, Location::*, TupleType::*};1use crate::dsl::{align, evex, fmt, implicit, inst, r, rex, rw, sxl, sxq, sxw, vex, w};23#[rustfmt::skip] // Keeps instructions on a single line.4pub fn list() -> Vec<Inst> {5vec![6// Multiply unsigned; low bits in `rax`, high bits in `rdx`.7inst("mulb", fmt("M", [rw(implicit(ax)), r(rm8)]), rex(0xF6).digit(4), _64b | compat),8inst("mulw", fmt("M", [rw(implicit(ax)), w(implicit(dx)), r(rm16)]), rex([0x66, 0xF7]).digit(4), _64b | compat),9inst("mull", fmt("M", [rw(implicit(eax)), w(implicit(edx)), r(rm32)]), rex(0xF7).digit(4), _64b | compat),10inst("mulq", fmt("M", [rw(implicit(rax)), w(implicit(rdx)), r(rm64)]), rex(0xF7).w().digit(4), _64b),11// Multiply signed; low bits in `rax`, high bits in `rdx`.12inst("imulb", fmt("M", [rw(implicit(ax)), r(rm8)]), rex(0xF6).digit(5), _64b | compat),13inst("imulw", fmt("M", [rw(implicit(ax)), w(implicit(dx)), r(rm16)]), rex([0x66, 0xF7]).digit(5), _64b | compat),14inst("imull", fmt("M", [rw(implicit(eax)), w(implicit(edx)), r(rm32)]), rex(0xF7).digit(5), _64b | compat),15inst("imulq", fmt("M", [rw(implicit(rax)), w(implicit(rdx)), r(rm64)]), rex(0xF7).w().digit(5), _64b),16inst("imulw", fmt("RM", [rw(r16), r(rm16)]), rex([0x66, 0x0F, 0xAF]), _64b | compat),17inst("imull", fmt("RM", [rw(r32), r(rm32)]), rex([0x0F, 0xAF]), _64b | compat),18inst("imulq", fmt("RM", [rw(r64), r(rm64)]), rex([0x0F, 0xAF]).w(), _64b),19inst("imulw", fmt("RMI_SXB", [w(r16), r(rm16), sxw(imm8)]), rex([0x66, 0x6B]).ib(), _64b | compat),20inst("imull", fmt("RMI_SXB", [w(r32), r(rm32), sxl(imm8)]), rex(0x6B).ib(), _64b | compat),21inst("imulq", fmt("RMI_SXB", [w(r64), r(rm64), sxq(imm8)]), rex(0x6B).w().ib(), _64b),22inst("imulw", fmt("RMI", [w(r16), r(rm16), r(imm16)]), rex([0x66, 0x69]).iw(), _64b | compat),23inst("imull", fmt("RMI", [w(r32), r(rm32), r(imm32)]), rex(0x69).id(), _64b | compat),24inst("imulq", fmt("RMI_SXL", [w(r64), r(rm64), sxq(imm32)]), rex(0x69).w().id(), _64b),25// MULX, a BMI2 extension26//27// Note that these have custom visitors for regalloc to handle the case28// where both output operands are the same. This is where the29// instruction only calculates the high half of the multiplication, as30// opposed to when the operands are different to calculate both the high31// and low halves.32inst("mulxl", fmt("RVM", [w(r32a), w(r32b), r(rm32), r(implicit(edx))]), vex(LZ)._f2()._0f38().w0().op(0xF6), (_64b | compat) & bmi2).custom(Visit),33inst("mulxq", fmt("RVM", [w(r64a), w(r64b), r(rm64), r(implicit(rdx))]), vex(LZ)._f2()._0f38().w1().op(0xF6), _64b & bmi2).custom(Visit),34// Vector instructions.35inst("mulss", fmt("A", [rw(xmm1), r(xmm_m32)]), rex([0xF3, 0x0F, 0x59]).r(), (_64b | compat) & sse).alt(avx, "vmulss_b"),36inst("mulsd", fmt("A", [rw(xmm1), r(xmm_m64)]), rex([0xF2, 0x0F, 0x59]).r(), (_64b | compat) & sse2).alt(avx, "vmulsd_b"),37inst("mulps", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x0F, 0x59]).r(), (_64b | compat) & sse).alt(avx, "vmulps_b"),38inst("mulpd", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x59]).r(), (_64b | compat) & sse2).alt(avx, "vmulpd_b"),39inst("pmuldq", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x28]).r(), (_64b | compat) & sse41).alt(avx, "vpmuldq_b"),40inst("pmulhrsw", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x0B]).r(), (_64b | compat) & ssse3).alt(avx, "vpmulhrsw_b"),41inst("pmulhuw", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xE4]).r(), (_64b | compat) & sse2).alt(avx, "vpmulhuw_b"),42inst("pmulhw", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xE5]).r(), (_64b | compat) & sse2).alt(avx, "vpmulhw_b"),43inst("pmulld", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x40]).r(), (_64b | compat) & sse41).alt(avx, "vpmulld_b"),44inst("pmullw", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xD5]).r(), (_64b | compat) & sse2).alt(avx, "vpmullw_b"),45inst("pmuludq", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xF4]).r(), (_64b | compat) & sse2).alt(avx, "vpmuludq_b"),46inst("vmulss", fmt("B", [w(xmm1), r(xmm2), r(xmm_m32)]), vex(LIG)._f3()._0f().op(0x59), (_64b | compat) & avx),47inst("vmulsd", fmt("B", [w(xmm1), r(xmm2), r(xmm_m64)]), vex(LIG)._f2()._0f().op(0x59), (_64b | compat) & avx),48inst("vmulps", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._0f().op(0x59), (_64b | compat) & avx),49inst("vmulpd", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f().op(0x59), (_64b | compat) & avx),50inst("vpmuldq", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f38().op(0x28), (_64b | compat) & avx),51inst("vpmulhrsw", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f38().op(0x0B), (_64b | compat) & avx),52inst("vpmulhuw", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f().op(0xE4), (_64b | compat) & avx),53inst("vpmulhw", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f().op(0xE5), (_64b | compat) & avx),54inst("vpmulld", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f38().op(0x40), (_64b | compat) & avx),55inst("vpmullw", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f().op(0xD5), (_64b | compat) & avx),56inst("vpmuludq", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f().op(0xF4), (_64b | compat) & avx),57// AVX-512 instructions.58inst("vpmulld", fmt("C", [w(xmm1), r(xmm2), r(xmm_m128)]), evex(L128, Full)._66()._0f38().w0().op(0x40).r(), (_64b | compat) & avx512vl & avx512f),59inst("vpmullq", fmt("C", [w(xmm1), r(xmm2), r(xmm_m128)]), evex(L128, Full)._66()._0f38().w1().op(0x40).r(), (_64b | compat) & avx512vl & avx512dq),60]61}626364