Path: blob/main/cranelift/codegen/src/isa/pulley_shared/lower.isle
3088 views
;; Pulley instruction selection and CLIF-to-MachInst lowering.
;; The main lowering constructor term: takes a clif `Inst` and returns the
;; register(s) within which the lowered instruction's result values live.
(decl partial lower (Inst) InstOutput)
;;;; Rules for Control Flow ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Helper to place a conditional `Value` provided into a register. Pulley
;; conditional values occupy the full low 32-bits of a register and so this
;; needs to handle situations such as when the `Value` is 64-bits an explicit
;; comparison must be made. Additionally if `Value` is smaller than 32-bits
;; then it must be sign-extended up to at least 32 bits.
(decl lower_cond (Value) Cond)
(rule 0 (lower_cond val @ (value_type (fits_in_32 _))) (Cond.If32 (zext32 val)))
(rule 1 (lower_cond val @ (value_type $I64))
(Cond.IfXneq64I32 val 0))
;; Peel away explicit `uextend` values to take a look at the inner value.
(rule 2 (lower_cond (uextend val)) (lower_cond val))
;; Conditional branches on `icmp`s.
(rule 2 (lower_cond (icmp cc a b @ (value_type $I32))) (lower_cond_icmp32 cc a b))
(rule 2 (lower_cond (icmp cc a b @ (value_type $I64))) (lower_cond_icmp64 cc a b))
(decl lower_cond_icmp32 (IntCC Value Value) Cond)
(rule (lower_cond_icmp32 (IntCC.Equal) a b) (Cond.IfXeq32 a b))
(rule (lower_cond_icmp32 (IntCC.NotEqual) a b) (Cond.IfXneq32 a b))
(rule (lower_cond_icmp32 (IntCC.SignedLessThan) a b) (Cond.IfXslt32 a b))
(rule (lower_cond_icmp32 (IntCC.SignedLessThanOrEqual) a b) (Cond.IfXslteq32 a b))
(rule (lower_cond_icmp32 (IntCC.UnsignedLessThan) a b) (Cond.IfXult32 a b))
(rule (lower_cond_icmp32 (IntCC.UnsignedLessThanOrEqual) a b) (Cond.IfXulteq32 a b))
;; Swap args for conditions pulley doesn't have
(rule (lower_cond_icmp32 (IntCC.SignedGreaterThan) a b) (Cond.IfXslt32 b a))
(rule (lower_cond_icmp32 (IntCC.SignedGreaterThanOrEqual) a b) (Cond.IfXslteq32 b a))
(rule (lower_cond_icmp32 (IntCC.UnsignedGreaterThan) a b) (Cond.IfXult32 b a))
(rule (lower_cond_icmp32 (IntCC.UnsignedGreaterThanOrEqual) a b) (Cond.IfXulteq32 b a))
(rule 1 (lower_cond_icmp32 (IntCC.Equal) a (i32_from_iconst b))
(Cond.IfXeq32I32 a b))
(rule 1 (lower_cond_icmp32 (IntCC.NotEqual) a (i32_from_iconst b))
(Cond.IfXneq32I32 a b))
(rule 1 (lower_cond_icmp32 (IntCC.SignedLessThan) a (i32_from_iconst b))
(Cond.IfXslt32I32 a b))
(rule 1 (lower_cond_icmp32 (IntCC.SignedLessThanOrEqual) a (i32_from_iconst b))
(Cond.IfXslteq32I32 a b))
(rule 1 (lower_cond_icmp32 (IntCC.SignedGreaterThan) a (i32_from_iconst b))
(Cond.IfXsgt32I32 a b))
(rule 1 (lower_cond_icmp32 (IntCC.SignedGreaterThanOrEqual) a (i32_from_iconst b))
(Cond.IfXsgteq32I32 a b))
(rule 1 (lower_cond_icmp32 (IntCC.UnsignedLessThan) a (u32_from_iconst b))
(Cond.IfXult32I32 a b))
(rule 1 (lower_cond_icmp32 (IntCC.UnsignedLessThanOrEqual) a (u32_from_iconst b))
(Cond.IfXulteq32I32 a b))
(rule 1 (lower_cond_icmp32 (IntCC.UnsignedGreaterThan) a (u32_from_iconst b))
(Cond.IfXugt32I32 a b))
(rule 1 (lower_cond_icmp32 (IntCC.UnsignedGreaterThanOrEqual) a (u32_from_iconst b))
(Cond.IfXugteq32I32 a b))
(decl lower_cond_icmp64 (IntCC Value Value) Cond)
(rule (lower_cond_icmp64 (IntCC.Equal) a b) (Cond.IfXeq64 a b))
(rule (lower_cond_icmp64 (IntCC.NotEqual) a b) (Cond.IfXneq64 a b))
(rule (lower_cond_icmp64 (IntCC.SignedLessThan) a b) (Cond.IfXslt64 a b))
(rule (lower_cond_icmp64 (IntCC.SignedLessThanOrEqual) a b) (Cond.IfXslteq64 a b))
(rule (lower_cond_icmp64 (IntCC.UnsignedLessThan) a b) (Cond.IfXult64 a b))
(rule (lower_cond_icmp64 (IntCC.UnsignedLessThanOrEqual) a b) (Cond.IfXulteq64 a b))
;; Swap args for conditions pulley doesn't have
(rule (lower_cond_icmp64 (IntCC.SignedGreaterThan) a b) (Cond.IfXslt64 b a))
(rule (lower_cond_icmp64 (IntCC.SignedGreaterThanOrEqual) a b) (Cond.IfXslteq64 b a))
(rule (lower_cond_icmp64 (IntCC.UnsignedGreaterThan) a b) (Cond.IfXult64 b a))
(rule (lower_cond_icmp64 (IntCC.UnsignedGreaterThanOrEqual) a b) (Cond.IfXulteq64 b a))
(rule 1 (lower_cond_icmp64 (IntCC.Equal) a (i32_from_iconst b))
(Cond.IfXeq64I32 a b))
(rule 1 (lower_cond_icmp64 (IntCC.NotEqual) a (i32_from_iconst b))
(Cond.IfXneq64I32 a b))
(rule 1 (lower_cond_icmp64 (IntCC.SignedLessThan) a (i32_from_iconst b))
(Cond.IfXslt64I32 a b))
(rule 1 (lower_cond_icmp64 (IntCC.SignedLessThanOrEqual) a (i32_from_iconst b))
(Cond.IfXslteq64I32 a b))
(rule 1 (lower_cond_icmp64 (IntCC.SignedGreaterThan) a (i32_from_iconst b))
(Cond.IfXsgt64I32 a b))
(rule 1 (lower_cond_icmp64 (IntCC.SignedGreaterThanOrEqual) a (i32_from_iconst b))
(Cond.IfXsgteq64I32 a b))
(rule 1 (lower_cond_icmp64 (IntCC.UnsignedLessThan) a (u32_from_iconst b))
(Cond.IfXult64I32 a b))
(rule 1 (lower_cond_icmp64 (IntCC.UnsignedLessThanOrEqual) a (u32_from_iconst b))
(Cond.IfXulteq64I32 a b))
(rule 1 (lower_cond_icmp64 (IntCC.UnsignedGreaterThan) a (u32_from_iconst b))
(Cond.IfXugt64I32 a b))
(rule 1 (lower_cond_icmp64 (IntCC.UnsignedGreaterThanOrEqual) a (u32_from_iconst b))
(Cond.IfXugteq64I32 a b))
;; The main control-flow-lowering term: takes a control-flow instruction and
;; target(s) and emits the necessary instructions.
(decl partial lower_branch (Inst MachLabelSlice) Unit)
;; Unconditional jumps.
(rule (lower_branch (jump _) (single_target label))
(emit_side_effect (pulley_jump label)))
;; Generic case for conditional branches.
(rule -1 (lower_branch (brif c _ _) (two_targets then else))
(emit_side_effect (pulley_br_if (lower_cond c) then else)))
;; Branch tables.
(rule (lower_branch (br_table index _) (jump_table_targets default targets))
(gen_br_table index default targets))
;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (trap code))
(side_effect (pulley_trap code)))
;;;; Rules for `trapz` and `trapnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (trapz cond code))
(side_effect (pulley_trap_if (cond_invert (lower_cond cond)) code)))
(rule (lower (trapnz cond code))
(side_effect (pulley_trap_if (lower_cond cond) code)))
;;;; Rules for `get_stack_pointer` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (get_stack_pointer))
(pulley_get_special (sp_reg)))
;;;; Rules for `get_frame_pointer` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (get_frame_pointer)) (pulley_xmov_fp))
;;;; Rules for `get_return_address` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (get_return_address)) (pulley_xmov_lr))
;;;; Rules for `return` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; N.B.: the `ret` itself is generated by the ABI.
(rule (lower (return args))
(lower_return args))
;; Rules for `call` and `call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Direct call to a pulley function.
(rule 1 (lower (call (func_ref_data sig_ref name (RelocDistance.Near) patchable) args))
(let ((output ValueRegsVec (gen_call_output sig_ref))
(abi Sig (abi_sig sig_ref))
(uses CallArgList (gen_call_args abi args))
(defs CallRetList (gen_call_rets abi output))
(info BoxCallInfo (gen_call_info abi name uses defs (try_call_none) patchable))
(_ Unit (emit_side_effect (call_impl info))))
output))
;; Direct call to a host function.
(rule (lower (call (func_ref_data sig_ref name _ false) args))
(let ((output ValueRegsVec (gen_call_output sig_ref))
(abi Sig (abi_sig sig_ref))
(uses CallArgList (gen_call_args abi args))
(defs CallRetList (gen_call_rets abi output))
(info BoxCallIndirectHostInfo (gen_call_host_info abi name uses defs (try_call_none)))
(_ Unit (emit_side_effect (indirect_call_host_impl info))))
output))
;; Indirect call (assumed to be pulley->pulley).
(rule (lower (call_indirect sig_ref ptr args))
(let ((output ValueRegsVec (gen_call_output sig_ref))
(abi Sig (abi_sig sig_ref))
(target Reg (put_in_reg ptr))
(uses CallArgList (gen_call_args abi args))
(defs CallRetList (gen_call_rets abi output))
(info BoxCallIndInfo (gen_call_ind_info abi target uses defs (try_call_none)))
(_ Unit (emit_side_effect (indirect_call_impl info))))
output))
;;;; Rules for `try_call` and `try_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Direct call to a pulley function.
(rule 1 (lower_branch (try_call (func_ref_data sig_ref name (RelocDistance.Near) patchable) args et) targets)
(let ((abi Sig (abi_sig sig_ref))
(trycall OptionTryCallInfo (try_call_info et targets))
(uses CallArgList (gen_call_args abi args))
(defs CallRetList (gen_try_call_rets abi))
(info BoxCallInfo (gen_call_info abi name uses defs trycall patchable)))
(emit_side_effect (call_impl info))))
;; Direct call to a host function.
(rule (lower_branch (try_call (func_ref_data sig_ref name _ false) args et) targets)
(let ((abi Sig (abi_sig sig_ref))
(trycall OptionTryCallInfo (try_call_info et targets))
(uses CallArgList (gen_call_args abi args))
(defs CallRetList (gen_try_call_rets abi))
(info BoxCallIndirectHostInfo (gen_call_host_info abi name uses defs trycall)))
(emit_side_effect (indirect_call_host_impl info))))
;; Indirect call (assumed to be pulley->pulley).
(rule (lower_branch (try_call_indirect ptr args et) targets)
(if-let (exception_sig sig_ref) et)
(let ((abi Sig (abi_sig sig_ref))
(trycall OptionTryCallInfo (try_call_info et targets))
(target Reg (put_in_reg ptr))
(uses CallArgList (gen_call_args abi args))
(defs CallRetList (gen_try_call_rets abi))
(info BoxCallIndInfo (gen_call_ind_info abi target uses defs trycall)))
(emit_side_effect (indirect_call_impl info))))
;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;;
;; Direct call to a pulley function.
(rule 1 (lower (return_call (func_ref_data sig_ref name (RelocDistance.Near) false) args))
(let ((abi Sig (abi_sig sig_ref))
(uses CallArgList (gen_return_call_args abi args))
(info BoxReturnCallInfo (gen_return_call_info abi name uses)))
(side_effect (return_call_impl info))))
;; Direct call to a pulley function.
(rule (lower (return_call (func_ref_data sig_ref name dist false) args))
(let ((abi Sig (abi_sig sig_ref))
(uses CallArgList (gen_return_call_args abi args))
(target Reg (load_ext_name name 0 dist))
(info BoxReturnCallIndInfo (gen_return_call_ind_info abi target uses)))
(side_effect (return_indirect_call_impl info))))
;; Indirect call (assumed to be pulley->pulley).
(rule (lower (return_call_indirect sig_ref ptr args))
(let ((abi Sig (abi_sig sig_ref))
(target Reg (put_in_reg ptr))
(uses CallArgList (gen_return_call_args abi args))
(info BoxReturnCallIndInfo (gen_return_call_ind_info abi target uses)))
(side_effect (return_indirect_call_impl info))))
;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (iconst c))) (imm (i64_sextend_imm64 ty c)))
;;;; Rules for `f32const`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (f32const (u32_from_ieee32 x))) (pulley_fconst32 x))
;;;; Rules for `f64const`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (f64const (u64_from_ieee64 x))) (pulley_fconst64 x))
;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int (fits_in_64 ty)) (iadd a b))) (pulley_xadd32 a b))
(rule 1 (lower (has_type $I64 (iadd a b))) (pulley_xadd64 a b))
;; Fold constants into the instruction if possible
(rule 10 (lower (has_type (ty_int (fits_in_32 _)) (iadd a (u32_from_iconst b))))
(pulley_xadd32_u32 a b))
(rule 11 (lower (has_type (ty_int (fits_in_32 _)) (iadd a (u8_from_iconst b))))
(pulley_xadd32_u8 a b))
(rule 12 (lower (has_type $I64 (iadd a (u32_from_iconst b))))
(pulley_xadd64_u32 a b))
(rule 13 (lower (has_type $I64 (iadd a (u8_from_iconst b))))
(pulley_xadd64_u8 a b))
;; If the rhs is a constant and the negated version can fit within a smaller
;; constant then switch this to a subtraction with the negated constant.
(rule 14 (lower (has_type (ty_int (fits_in_32 _)) (iadd a b)))
(if-let c (u32_from_negated_iconst b))
(pulley_xsub32_u32 a c))
(rule 15 (lower (has_type $I64 (iadd a b)))
(if-let c (u32_from_negated_iconst b))
(pulley_xsub64_u32 a c))
(rule 16 (lower (has_type (ty_int (fits_in_32 _)) (iadd a b)))
(if-let c (u8_from_negated_iconst b))
(pulley_xsub32_u8 a c))
(rule 17 (lower (has_type $I64 (iadd a b)))
(if-let c (u8_from_negated_iconst b))
(pulley_xsub64_u8 a c))
;; Helper extract a constant from a `Value`, negate it, and fit it within a
;; `u8`.
(decl pure partial u8_from_negated_iconst (Value) u8)
(rule (u8_from_negated_iconst (i32_from_iconst i))
(if-let neg_i64 (i64_checked_neg i))
(if-let neg_u8 (i64_try_into_u8 neg_i64))
neg_u8)
;; Helper extract a constant from a `Value`, negate it, and fit it within a
;; `u32`.
(decl pure partial u32_from_negated_iconst (Value) u32)
(rule (u32_from_negated_iconst (i32_from_iconst i))
(if-let neg_i64 (i64_checked_neg i))
(if-let neg_u32 (i64_try_into_u32 neg_i64))
neg_u32)
;; 128-bit addition
(rule 1 (lower (has_type $I128 (iadd a b)))
(let ((a ValueRegs a)
(b ValueRegs b))
(pulley_xadd128
(value_regs_get a 0)
(value_regs_get a 1)
(value_regs_get b 0)
(value_regs_get b 1))))
;; vector addition
(rule 1 (lower (has_type $I8X16 (iadd a b))) (pulley_vaddi8x16 a b))
(rule 1 (lower (has_type $I16X8 (iadd a b))) (pulley_vaddi16x8 a b))
(rule 1 (lower (has_type $I32X4 (iadd a b))) (pulley_vaddi32x4 a b))
(rule 1 (lower (has_type $I64X2 (iadd a b))) (pulley_vaddi64x2 a b))
(rule 1 (lower (has_type $I8X16 (sadd_sat a b))) (pulley_vaddi8x16_sat a b))
(rule 1 (lower (has_type $I8X16 (uadd_sat a b))) (pulley_vaddu8x16_sat a b))
(rule 1 (lower (has_type $I16X8 (sadd_sat a b))) (pulley_vaddi16x8_sat a b))
(rule 1 (lower (has_type $I16X8 (uadd_sat a b))) (pulley_vaddu16x8_sat a b))
;; Specialized lowerings for multiply-and-add
(rule 2 (lower (has_type $I32 (iadd (imul a b) c))) (pulley_xmadd32 a b c))
(rule 3 (lower (has_type $I32 (iadd c (imul a b)))) (pulley_xmadd32 a b c))
(rule 2 (lower (has_type $I64 (iadd (imul a b) c))) (pulley_xmadd64 a b c))
(rule 3 (lower (has_type $I64 (iadd c (imul a b)))) (pulley_xmadd64 a b c))
;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I16X8 (iadd_pairwise a b))) (pulley_vaddpairwisei16x8_s a b))
(rule (lower (has_type $I32X4 (iadd_pairwise a b))) (pulley_vaddpairwisei32x4_s a b))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (isub a b))) (pulley_xsub32 a b))
(rule 1 (lower (has_type $I64 (isub a b))) (pulley_xsub64 a b))
;; Fold a rhs constant into the instruction if possible.
(rule 2 (lower (has_type (ty_int (fits_in_32 _)) (isub a (u32_from_iconst b))))
(pulley_xsub32_u32 a b))
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (isub a (u8_from_iconst b))))
(pulley_xsub32_u8 a b))
(rule 4 (lower (has_type $I64 (isub a (u32_from_iconst b))))
(pulley_xsub64_u32 a b))
(rule 5 (lower (has_type $I64 (isub a (u8_from_iconst b))))
(pulley_xsub64_u8 a b))
;; If the rhs is a constant and the negated version can fit within a smaller
;; constant then switch this to an addition with the negated constant.
(rule 6 (lower (has_type (ty_int (fits_in_32 _)) (isub a b)))
(if-let c (u32_from_negated_iconst b))
(pulley_xadd32_u32 a c))
(rule 7 (lower (has_type $I64 (isub a b)))
(if-let c (u32_from_negated_iconst b))
(pulley_xadd64_u32 a c))
(rule 8 (lower (has_type (ty_int (fits_in_32 _)) (isub a b)))
(if-let c (u8_from_negated_iconst b))
(pulley_xadd32_u8 a c))
(rule 9 (lower (has_type $I64 (isub a b)))
(if-let c (u8_from_negated_iconst b))
(pulley_xadd64_u8 a c))
;; 128-bit subtraction
(rule 1 (lower (has_type $I128 (isub a b)))
(let ((a ValueRegs a)
(b ValueRegs b))
(pulley_xsub128
(value_regs_get a 0)
(value_regs_get a 1)
(value_regs_get b 0)
(value_regs_get b 1))))
;; vector subtraction
(rule 1 (lower (has_type $I8X16 (isub a b))) (pulley_vsubi8x16 a b))
(rule 1 (lower (has_type $I16X8 (isub a b))) (pulley_vsubi16x8 a b))
(rule 1 (lower (has_type $I32X4 (isub a b))) (pulley_vsubi32x4 a b))
(rule 1 (lower (has_type $I64X2 (isub a b))) (pulley_vsubi64x2 a b))
(rule 1 (lower (has_type $I8X16 (ssub_sat a b))) (pulley_vsubi8x16_sat a b))
(rule 1 (lower (has_type $I8X16 (usub_sat a b))) (pulley_vsubu8x16_sat a b))
(rule 1 (lower (has_type $I16X8 (ssub_sat a b))) (pulley_vsubi16x8_sat a b))
(rule 1 (lower (has_type $I16X8 (usub_sat a b))) (pulley_vsubu16x8_sat a b))
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (imul a b))) (pulley_xmul32 a b))
(rule (lower (has_type $I16 (imul a b))) (pulley_xmul32 a b))
(rule (lower (has_type $I32 (imul a b))) (pulley_xmul32 a b))
(rule (lower (has_type $I64 (imul a b))) (pulley_xmul64 a b))
(rule 1 (lower (has_type (ty_int (fits_in_32 _)) (imul a (i32_from_iconst b))))
(pulley_xmul32_s32 a b))
(rule 2 (lower (has_type $I64 (imul a (i32_from_iconst b))))
(pulley_xmul64_s32 a b))
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (imul a (i8_from_iconst b))))
(pulley_xmul32_s8 a b))
(rule 4 (lower (has_type $I64 (imul a (i8_from_iconst b))))
(pulley_xmul64_s8 a b))
;; 128-bit (or wide) multiplication
(rule 4 (lower (has_type $I128 (imul (uextend a) (uextend b))))
(pulley_xwidemul64_u (zext64 a) (zext64 b)))
(rule 4 (lower (has_type $I128 (imul (sextend a) (sextend b))))
(pulley_xwidemul64_s (sext64 a) (sext64 b)))
;; for I128
(rule (lower (has_type $I128 (imul x y)))
(let
((x_regs ValueRegs x)
(x_lo XReg (value_regs_get x_regs 0))
(x_hi XReg (value_regs_get x_regs 1))
;; Get the high/low registers for `y`.
(y_regs ValueRegs y)
(y_lo XReg (value_regs_get y_regs 0))
(y_hi XReg (value_regs_get y_regs 1))
;; 128bit mul formula:
;; dst_lo = x_lo * y_lo
;; dst_hi = mul_high(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
(wide_regs ValueRegs (pulley_xwidemul64_u x_lo y_lo))
(wide_lo XReg (value_regs_get wide_regs 0))
(wide_hi XReg (value_regs_get wide_regs 1))
(tmp_hi1 XReg (pulley_xmul64 x_lo y_hi))
(tmp_hi2 XReg (pulley_xmul64 x_hi y_lo))
(tmp_add XReg (pulley_xadd64 wide_hi tmp_hi1))
(result_hi XReg (pulley_xadd64 tmp_add tmp_hi2))
)
(value_regs wide_lo result_hi)))
;; vector multiplication
(rule (lower (has_type $I8X16 (imul a b))) (pulley_vmuli8x16 a b))
(rule (lower (has_type $I16X8 (imul a b))) (pulley_vmuli16x8 a b))
(rule (lower (has_type $I32X4 (imul a b))) (pulley_vmuli32x4 a b))
(rule (lower (has_type $I64X2 (imul a b))) (pulley_vmuli64x2 a b))
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (umulhi a b)))
(if-let (u6_from_u8 shift) (u64_try_into_u8 8))
(pulley_xshr32_u_u6 (pulley_xmul32 (zext32 a) (zext32 b)) shift))
(rule (lower (has_type $I16 (umulhi a b)))
(if-let (u6_from_u8 shift) (u64_try_into_u8 16))
(pulley_xshr32_u_u6 (pulley_xmul32 (zext32 a) (zext32 b)) shift))
(rule (lower (has_type $I32 (umulhi a b)))
(if-let (u6_from_u8 shift) (u64_try_into_u8 32))
(pulley_xshr64_u_u6 (pulley_xmul64 (zext64 a) (zext64 b)) shift))
(rule (lower (has_type $I64 (umulhi a b)))
(pulley_xmulhi64_u a b))
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (smulhi a b)))
(if-let (u6_from_u8 shift) (u64_try_into_u8 8))
(pulley_xshr32_s_u6 (pulley_xmul32 (sext32 a) (sext32 b)) shift))
(rule (lower (has_type $I16 (smulhi a b)))
(if-let (u6_from_u8 shift) (u64_try_into_u8 16))
(pulley_xshr32_s_u6 (pulley_xmul32 (sext32 a) (sext32 b)) shift))
(rule (lower (has_type $I32 (smulhi a b)))
(if-let (u6_from_u8 shift) (u64_try_into_u8 32))
(pulley_xshr64_s_u6 (pulley_xmul64 (sext64 a) (sext64 b)) shift))
(rule (lower (has_type $I64 (smulhi a b)))
(pulley_xmulhi64_s a b))
;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I16X8 (sqmul_round_sat a b))) (pulley_vqmulrsi16x8 a b))
;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (sdiv a b)))
(pulley_xdiv32_s (sext32 a) (sext32 b)))
(rule 1 (lower (has_type $I64 (sdiv a b))) (pulley_xdiv64_s a b))
;;;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (srem a b)))
(pulley_xrem32_s (sext32 a) (sext32 b)))
(rule 1 (lower (has_type $I64 (srem a b))) (pulley_xrem64_s a b))
;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (udiv a b)))
(pulley_xdiv32_u (zext32 a) (zext32 b)))
(rule 1 (lower (has_type $I64 (udiv a b))) (pulley_xdiv64_u a b))
;;;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (urem a b)))
(pulley_xrem32_u (zext32 a) (zext32 b)))
(rule 1 (lower (has_type $I64 (urem a b))) (pulley_xrem64_u a b))
;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (avg_round a b))) (pulley_vavground8x16 a b))
(rule (lower (has_type $I16X8 (avg_round a b))) (pulley_vavground16x8 a b))
;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (ishl a b)))
(pulley_xshl32 a (pulley_xband32_s8 b 7)))
(rule (lower (has_type $I16 (ishl a b)))
(pulley_xshl32 a (pulley_xband32_s8 b 15)))
(rule (lower (has_type $I32 (ishl a b)))
(pulley_xshl32 a b))
(rule (lower (has_type $I64 (ishl a b)))
(pulley_xshl64 a b))
;; Special-case constant shift amounts.
(rule 1 (lower (has_type $I32 (ishl a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshl32_u6 a n))
(rule 1 (lower (has_type $I64 (ishl a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshl64_u6 a n))
;; vector shifts
(rule (lower (has_type $I8X16 (ishl a b))) (pulley_vshli8x16 a b))
(rule (lower (has_type $I16X8 (ishl a b))) (pulley_vshli16x8 a b))
(rule (lower (has_type $I32X4 (ishl a b))) (pulley_vshli32x4 a b))
(rule (lower (has_type $I64X2 (ishl a b))) (pulley_vshli64x2 a b))
;; Helper to extract a constant from `Value`, mask it to 6 bits, and then make a
;; `U6`.
(decl pure partial u6_shift_from_iconst (Value) U6)
(rule (u6_shift_from_iconst (u64_from_iconst val))
(if-let (u6_from_u8 x) (u64_try_into_u8 (u64_and val 0x3f)))
x)
(decl u6_from_u8 (U6) u8)
(extern extractor u6_from_u8 u6_from_u8)
;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (ushr a b)))
(pulley_xshr32_u (zext32 a) (pulley_xband32_s8 b 7)))
(rule (lower (has_type $I16 (ushr a b)))
(pulley_xshr32_u (zext32 a) (pulley_xband32_s8 b 15)))
(rule (lower (has_type $I32 (ushr a b)))
(pulley_xshr32_u a b))
(rule (lower (has_type $I64 (ushr a b)))
(pulley_xshr64_u a b))
;; Special-case constant shift amounts.
(rule 1 (lower (has_type $I32 (ushr a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshr32_u_u6 a n))
(rule 1 (lower (has_type $I64 (ushr a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshr64_u_u6 a n))
;; vector shifts
(rule (lower (has_type $I8X16 (ushr a b))) (pulley_vshri8x16_u a b))
(rule (lower (has_type $I16X8 (ushr a b))) (pulley_vshri16x8_u a b))
(rule (lower (has_type $I32X4 (ushr a b))) (pulley_vshri32x4_u a b))
(rule (lower (has_type $I64X2 (ushr a b))) (pulley_vshri64x2_u a b))
;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (sshr a b)))
(pulley_xshr32_u (sext32 a) (pulley_xband32_s8 b 7)))
(rule (lower (has_type $I16 (sshr a b)))
(pulley_xshr32_u (sext32 a) (pulley_xband32_s8 b 15)))
(rule (lower (has_type $I32 (sshr a b)))
(pulley_xshr32_s a b))
(rule (lower (has_type $I64 (sshr a b)))
(pulley_xshr64_s a b))
;; Special-case constant shift amounts.
(rule 1 (lower (has_type $I32 (sshr a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshr32_s_u6 a n))
(rule 1 (lower (has_type $I64 (sshr a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshr64_s_u6 a n))
;; vector shifts
(rule (lower (has_type $I8X16 (sshr a b))) (pulley_vshri8x16_s a b))
(rule (lower (has_type $I16X8 (sshr a b))) (pulley_vshri16x8_s a b))
(rule (lower (has_type $I32X4 (sshr a b))) (pulley_vshri32x4_s a b))
(rule (lower (has_type $I64X2 (sshr a b))) (pulley_vshri64x2_s a b))
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (band a b))) (pulley_xband32 a b))
(rule 1 (lower (has_type $I64 (band a b))) (pulley_xband64 a b))
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (band a (i32_from_iconst b))))
(pulley_xband32_s32 a b))
(rule 4 (lower (has_type $I64 (band a (i32_from_iconst b))))
(pulley_xband64_s32 a b))
(rule 5 (lower (has_type (ty_int (fits_in_32 _)) (band a (i8_from_iconst b))))
(pulley_xband32_s8 a b))
(rule 6 (lower (has_type $I64 (band a (i8_from_iconst b))))
(pulley_xband64_s8 a b))
(rule 2 (lower (has_type (ty_vec128 _) (band a b)))
(pulley_vband128 a b))
;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (bor a b))) (pulley_xbor32 a b))
(rule 1 (lower (has_type $I64 (bor a b))) (pulley_xbor64 a b))
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (bor a (i32_from_iconst b))))
(pulley_xbor32_s32 a b))
(rule 4 (lower (has_type $I64 (bor a (i32_from_iconst b))))
(pulley_xbor64_s32 a b))
(rule 5 (lower (has_type (ty_int (fits_in_32 _)) (bor a (i8_from_iconst b))))
(pulley_xbor32_s8 a b))
(rule 6 (lower (has_type $I64 (bor a (i8_from_iconst b))))
(pulley_xbor64_s8 a b))
(rule 2 (lower (has_type (ty_vec128 _) (bor a b)))
(pulley_vbor128 a b))
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (bxor a b))) (pulley_xbxor32 a b))
(rule 1 (lower (has_type $I64 (bxor a b))) (pulley_xbxor64 a b))
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (bxor a (i32_from_iconst b))))
(pulley_xbxor32_s32 a b))
(rule 4 (lower (has_type $I64 (bxor a (i32_from_iconst b))))
(pulley_xbxor64_s32 a b))
(rule 5 (lower (has_type (ty_int (fits_in_32 _)) (bxor a (i8_from_iconst b))))
(pulley_xbxor32_s8 a b))
(rule 6 (lower (has_type $I64 (bxor a (i8_from_iconst b))))
(pulley_xbxor64_s8 a b))
(rule 2 (lower (has_type (ty_vec128 _) (bxor a b)))
(pulley_vbxor128 a b))
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (bnot a)))
(pulley_xbnot32 a))
(rule 1 (lower (has_type $I64 (bnot a)))
(pulley_xbnot64 a))
(rule 2 (lower (has_type (ty_vec128 _) (bnot a)))
(pulley_vbnot128 a))
;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (ty_vec128 _) (bitselect c x y)))
(pulley_vbitselect128 c x y))
;;;; Rules for `umin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (umin a b)))
(pulley_xmin32_u (zext32 a) (zext32 b)))
(rule 1 (lower (has_type $I64 (umin a b))) (pulley_xmin64_u a b))
(rule 1 (lower (has_type $I8X16 (umin a b))) (pulley_vmin8x16_u a b))
(rule 1 (lower (has_type $I16X8 (umin a b))) (pulley_vmin16x8_u a b))
(rule 1 (lower (has_type $I32X4 (umin a b))) (pulley_vmin32x4_u a b))
;;;; Rules for `smin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (smin a b)))
(pulley_xmin32_s (sext32 a) (sext32 b)))
(rule 1 (lower (has_type $I64 (smin a b))) (pulley_xmin64_s a b))
(rule 1 (lower (has_type $I8X16 (smin a b))) (pulley_vmin8x16_s a b))
(rule 1 (lower (has_type $I16X8 (smin a b))) (pulley_vmin16x8_s a b))
(rule 1 (lower (has_type $I32X4 (smin a b))) (pulley_vmin32x4_s a b))
;;;; Rules for `umax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (umax a b)))
(pulley_xmax32_u (zext32 a) (zext32 b)))
(rule 1 (lower (has_type $I64 (umax a b))) (pulley_xmax64_u a b))
(rule 1 (lower (has_type $I8X16 (umax a b))) (pulley_vmax8x16_u a b))
(rule 1 (lower (has_type $I16X8 (umax a b))) (pulley_vmax16x8_u a b))
(rule 1 (lower (has_type $I32X4 (umax a b))) (pulley_vmax32x4_u a b))
;;;; Rules for `smax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (smax a b)))
(pulley_xmax32_s (sext32 a) (sext32 b)))
(rule 1 (lower (has_type $I64 (smax a b))) (pulley_xmax64_s a b))
(rule 1 (lower (has_type $I8X16 (smax a b))) (pulley_vmax8x16_s a b))
(rule 1 (lower (has_type $I16X8 (smax a b))) (pulley_vmax16x8_s a b))
(rule 1 (lower (has_type $I32X4 (smax a b))) (pulley_vmax32x4_s a b))
;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (bmask a @ (value_type (fits_in_32 _)))))
(pulley_xbmask32 (zext32 a)))
(rule 1 (lower (has_type $I64 (bmask a)))
(pulley_xbmask64 (zext64 a)))
(rule 2 (lower (bmask a @ (value_type $I64)))
(pulley_xbmask64 a))
;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (ctz a)))
(pulley_xctz32 (pulley_xbor32_s32 a 0x100)))
(rule (lower (has_type $I16 (ctz a)))
(pulley_xctz32 (pulley_xbor32_s32 a 0x10000)))
(rule (lower (has_type $I32 (ctz a))) (pulley_xctz32 a))
(rule (lower (has_type $I64 (ctz a))) (pulley_xctz64 a))
;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (clz a)))
(pulley_xsub32_u8 (pulley_xclz32 (zext32 a)) 24))
(rule (lower (has_type $I16 (clz a)))
(pulley_xsub32_u8 (pulley_xclz32 (zext32 a)) 16))
(rule (lower (has_type $I32 (clz a))) (pulley_xclz32 a))
(rule (lower (has_type $I64 (clz a))) (pulley_xclz64 a))
;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (popcnt a))) (pulley_xpopcnt32 (zext32 a)))
(rule 1 (lower (has_type $I64 (popcnt a))) (pulley_xpopcnt64 a))
(rule 1 (lower (has_type $I8X16 (popcnt a))) (pulley_vpopcnt8x16 a))
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I32 (rotl a b))) (pulley_xrotl32 a b))
(rule (lower (has_type $I64 (rotl a b))) (pulley_xrotl64 a b))
;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I32 (rotr a b))) (pulley_xrotr32 a b))
(rule (lower (has_type $I64 (rotr a b))) (pulley_xrotr64 a b))
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (icmp cc a b @ (value_type (ty_int ty))))
(lower_icmp ty cc a b))
(decl lower_icmp (Type IntCC Value Value) XReg)
(rule (lower_icmp $I64 (IntCC.Equal) a b)
(pulley_xeq64 a b))
(rule (lower_icmp $I64 (IntCC.NotEqual) a b)
(pulley_xneq64 a b))
(rule (lower_icmp $I64 (IntCC.SignedLessThan) a b)
(pulley_xslt64 a b))
(rule (lower_icmp $I64 (IntCC.SignedLessThanOrEqual) a b)
(pulley_xslteq64 a b))
(rule (lower_icmp $I64 (IntCC.UnsignedLessThan) a b)
(pulley_xult64 a b))
(rule (lower_icmp $I64 (IntCC.UnsignedLessThanOrEqual) a b)
(pulley_xulteq64 a b))
(rule 1 (lower_icmp (fits_in_32 _) (IntCC.Equal) a b)
(pulley_xeq32 (zext32 a) (zext32 b)))
(rule 1 (lower_icmp (fits_in_32 _) (IntCC.NotEqual) a b)
(pulley_xneq32 (zext32 a) (zext32 b)))
(rule 1 (lower_icmp (fits_in_32 _) (IntCC.SignedLessThan) a b)
(pulley_xslt32 (sext32 a) (sext32 b)))
(rule 1 (lower_icmp (fits_in_32 _) (IntCC.SignedLessThanOrEqual) a b)
(pulley_xslteq32 (sext32 a) (sext32 b)))
(rule 1 (lower_icmp (fits_in_32 _) (IntCC.UnsignedLessThan) a b)
(pulley_xult32 (zext32 a) (zext32 b)))
(rule 1 (lower_icmp (fits_in_32 _) (IntCC.UnsignedLessThanOrEqual) a b)
(pulley_xulteq32 (zext32 a) (zext32 b)))
;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the
;; operation.
(rule (lower_icmp ty (IntCC.SignedGreaterThan) a b)
(lower_icmp ty (IntCC.SignedLessThan) b a))
(rule (lower_icmp ty (IntCC.SignedGreaterThanOrEqual) a b)
(lower_icmp ty (IntCC.SignedLessThanOrEqual) b a))
(rule (lower_icmp ty (IntCC.UnsignedGreaterThan) a b)
(lower_icmp ty (IntCC.UnsignedLessThan) b a))
(rule (lower_icmp ty (IntCC.UnsignedGreaterThanOrEqual) a b)
(lower_icmp ty (IntCC.UnsignedLessThanOrEqual) b a))
;; `i128` comparisons
;;
;; While we could pretty easily add 128-bit comparisons to the interpreter it's
;; currently predicted that it's relatively niche to need this and that it's
;; not performance-sensitive in an interpreter context. In lieu of adding more
;; opcodes this is an adaptation of riscv64's lowering rules for 128-bit
;; integers. In the future if this is a performance bottleneck it should be
;; possible to add new opcodes to pulley for 128-bit comparisons.
(rule (lower_icmp $I128 (IntCC.Equal) x y)
(let ((lo XReg (pulley_xbxor64 (value_regs_get x 0) (value_regs_get y 0)))
(hi XReg (pulley_xbxor64 (value_regs_get x 1) (value_regs_get y 1))))
(pulley_xeq64 (pulley_xbor64 lo hi) (pulley_xconst8 0))))
(rule (lower_icmp $I128 (IntCC.NotEqual) x y)
(let ((lo XReg (pulley_xbxor64 (value_regs_get x 0) (value_regs_get y 0)))
(hi XReg (pulley_xbxor64 (value_regs_get x 1) (value_regs_get y 1))))
(pulley_xneq64 (pulley_xbor64 lo hi) (pulley_xconst8 0))))
;; swap args for `>` to use `<` instead
;;(rule 1 (lower_icmp $I128 cc @ (IntCC.SignedGreaterThan) x y)
;; (lower_icmp $I128 (intcc_swap_args cc) y x))
;;(rule 1 (lower_icmp $I128 cc @ (IntCC.UnsignedGreaterThan) x y)
;; (lower_icmp $I128 (intcc_swap_args cc) y x))
;; complement `=`-related conditions to get ones that don't use `=`.
(rule 2 (lower_icmp $I128 cc @ (IntCC.SignedLessThanOrEqual) x y)
(pulley_xbxor32_s8 (lower_icmp $I128 (intcc_complement cc) x y) 1))
(rule 2 (lower_icmp $I128 cc @ (IntCC.SignedGreaterThanOrEqual) x y)
(pulley_xbxor32_s8 (lower_icmp $I128 (intcc_complement cc) x y) 1))
(rule 2 (lower_icmp $I128 cc @ (IntCC.UnsignedLessThanOrEqual) x y)
(pulley_xbxor32_s8 (lower_icmp $I128 (intcc_complement cc) x y) 1))
(rule 2 (lower_icmp $I128 cc @ (IntCC.UnsignedGreaterThanOrEqual) x y)
(pulley_xbxor32_s8 (lower_icmp $I128 (intcc_complement cc) x y) 1))
;; Compare both the bottom and upper halves of the 128-bit values. If
;; the top half is equal use the bottom comparison, otherwise use the upper
;; comparison. Note that the lower comparison is always unsigned since if it's
;; used the top halves are all zeros and the semantic values are positive.
(rule 3 (lower_icmp $I128 cc x y)
(if-let (IntCC.UnsignedLessThan) (intcc_unsigned cc))
(let ((x_lo XReg (value_regs_get x 0))
(x_hi XReg (value_regs_get x 1))
(y_lo XReg (value_regs_get y 0))
(y_hi XReg (value_regs_get y 1))
(top_cmp XReg (lower_icmp128_hi cc x_hi y_hi))
(bottom_cmp XReg (pulley_xult64 x_lo y_lo)))
(pulley_xselect32
(pulley_xeq64 (pulley_xbxor64 x_hi y_hi) (pulley_xconst8 0))
bottom_cmp
top_cmp)))
(decl lower_icmp128_hi (IntCC XReg XReg) XReg)
(rule (lower_icmp128_hi (IntCC.SignedLessThan) a b) (pulley_xslt64 a b))
(rule (lower_icmp128_hi (IntCC.UnsignedLessThan) a b) (pulley_xult64 a b))
;; vector comparisons
(rule 1 (lower (icmp cc a @ (value_type (ty_vec128 ty)) b))
(lower_vcmp ty cc a b))
(decl lower_vcmp (Type IntCC Value Value) VReg)
(rule (lower_vcmp $I8X16 (IntCC.Equal) a b) (pulley_veq8x16 a b))
(rule (lower_vcmp $I8X16 (IntCC.NotEqual) a b) (pulley_vneq8x16 a b))
(rule (lower_vcmp $I8X16 (IntCC.SignedLessThan) a b) (pulley_vslt8x16 a b))
(rule (lower_vcmp $I8X16 (IntCC.SignedLessThanOrEqual) a b) (pulley_vslteq8x16 a b))
(rule (lower_vcmp $I8X16 (IntCC.UnsignedLessThan) a b) (pulley_vult8x16 a b))
(rule (lower_vcmp $I8X16 (IntCC.UnsignedLessThanOrEqual) a b) (pulley_vulteq8x16 a b))
(rule (lower_vcmp $I16X8 (IntCC.Equal) a b) (pulley_veq16x8 a b))
(rule (lower_vcmp $I16X8 (IntCC.NotEqual) a b) (pulley_vneq16x8 a b))
(rule (lower_vcmp $I16X8 (IntCC.SignedLessThan) a b) (pulley_vslt16x8 a b))
(rule (lower_vcmp $I16X8 (IntCC.SignedLessThanOrEqual) a b) (pulley_vslteq16x8 a b))
(rule (lower_vcmp $I16X8 (IntCC.UnsignedLessThan) a b) (pulley_vult16x8 a b))
(rule (lower_vcmp $I16X8 (IntCC.UnsignedLessThanOrEqual) a b) (pulley_vulteq16x8 a b))
(rule (lower_vcmp $I32X4 (IntCC.Equal) a b) (pulley_veq32x4 a b))
(rule (lower_vcmp $I32X4 (IntCC.NotEqual) a b) (pulley_vneq32x4 a b))
(rule (lower_vcmp $I32X4 (IntCC.SignedLessThan) a b) (pulley_vslt32x4 a b))
(rule (lower_vcmp $I32X4 (IntCC.SignedLessThanOrEqual) a b) (pulley_vslteq32x4 a b))
(rule (lower_vcmp $I32X4 (IntCC.UnsignedLessThan) a b) (pulley_vult32x4 a b))
(rule (lower_vcmp $I32X4 (IntCC.UnsignedLessThanOrEqual) a b) (pulley_vulteq32x4 a b))
(rule (lower_vcmp $I64X2 (IntCC.Equal) a b) (pulley_veq64x2 a b))
(rule (lower_vcmp $I64X2 (IntCC.NotEqual) a b) (pulley_vneq64x2 a b))
(rule (lower_vcmp $I64X2 (IntCC.SignedLessThan) a b) (pulley_vslt64x2 a b))
(rule (lower_vcmp $I64X2 (IntCC.SignedLessThanOrEqual) a b) (pulley_vslteq64x2 a b))
(rule (lower_vcmp $I64X2 (IntCC.UnsignedLessThan) a b) (pulley_vult64x2 a b))
(rule (lower_vcmp $I64X2 (IntCC.UnsignedLessThanOrEqual) a b) (pulley_vulteq64x2 a b))
;; Sweap operand order of ops pulley doesn't support
(rule (lower_vcmp ty cc @ (IntCC.SignedGreaterThan) a b)
(lower_vcmp ty (intcc_swap_args cc) b a))
(rule (lower_vcmp ty cc @ (IntCC.SignedGreaterThanOrEqual) a b)
(lower_vcmp ty (intcc_swap_args cc) b a))
(rule (lower_vcmp ty cc @ (IntCC.UnsignedGreaterThan) a b)
(lower_vcmp ty (intcc_swap_args cc) b a))
(rule (lower_vcmp ty cc @ (IntCC.UnsignedGreaterThanOrEqual) a b)
(lower_vcmp ty (intcc_swap_args cc) b a))
;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (fcmp cc a b @ (value_type (ty_scalar_float ty))))
(lower_fcmp ty cc a b))
(rule 1 (lower (fcmp cc a b @ (value_type (ty_vec128 ty))))
(lower_vfcmp ty cc a b))
(decl lower_fcmp (Type FloatCC Value Value) XReg)
(rule (lower_fcmp $F32 (FloatCC.Equal) a b) (pulley_feq32 a b))
(rule (lower_fcmp $F64 (FloatCC.Equal) a b) (pulley_feq64 a b))
(rule (lower_fcmp $F32 (FloatCC.NotEqual) a b) (pulley_fneq32 a b))
(rule (lower_fcmp $F64 (FloatCC.NotEqual) a b) (pulley_fneq64 a b))
(rule (lower_fcmp $F32 (FloatCC.LessThan) a b) (pulley_flt32 a b))
(rule (lower_fcmp $F64 (FloatCC.LessThan) a b) (pulley_flt64 a b))
(rule (lower_fcmp $F32 (FloatCC.LessThanOrEqual) a b) (pulley_flteq32 a b))
(rule (lower_fcmp $F64 (FloatCC.LessThanOrEqual) a b) (pulley_flteq64 a b))
;; Ordered == !a.is_nan() && !b.is_nan()
(rule (lower_fcmp ty (FloatCC.Ordered) a b)
(pulley_xband32 (lower_fcmp ty (FloatCC.Equal) a a) (lower_fcmp ty (FloatCC.Equal) b b)))
;; OrderedNotEqual == a < b || a > b
(rule (lower_fcmp ty (FloatCC.OrderedNotEqual) a b)
(pulley_xbor32 (lower_fcmp ty (FloatCC.LessThan) a b) (lower_fcmp ty (FloatCC.GreaterThan) a b)))
;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the
;; operation.
(rule (lower_fcmp ty (FloatCC.GreaterThan) a b)
(lower_fcmp ty (FloatCC.LessThan) b a))
(rule (lower_fcmp ty (FloatCC.GreaterThanOrEqual) a b)
(lower_fcmp ty (FloatCC.LessThanOrEqual) b a))
;; For other `Unordered*` comparisons generate its complement and invert the result.
(rule -1 (lower_fcmp ty cc a b)
(if-let true (floatcc_unordered cc))
(pulley_xbxor32_s8 (lower_fcmp ty (floatcc_complement cc) a b) 1))
(decl lower_vfcmp (Type FloatCC Value Value) VReg)
(rule (lower_vfcmp $F32X4 (FloatCC.Equal) a b) (pulley_veqf32x4 a b))
(rule (lower_vfcmp $F64X2 (FloatCC.Equal) a b) (pulley_veqf64x2 a b))
(rule (lower_vfcmp $F32X4 (FloatCC.NotEqual) a b) (pulley_vneqf32x4 a b))
(rule (lower_vfcmp $F64X2 (FloatCC.NotEqual) a b) (pulley_vneqf64x2 a b))
(rule (lower_vfcmp $F32X4 (FloatCC.LessThan) a b) (pulley_vltf32x4 a b))
(rule (lower_vfcmp $F64X2 (FloatCC.LessThan) a b) (pulley_vltf64x2 a b))
(rule (lower_vfcmp $F32X4 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf32x4 a b))
(rule (lower_vfcmp $F64X2 (FloatCC.LessThanOrEqual) a b) (pulley_vlteqf64x2 a b))
(rule (lower_vfcmp ty (FloatCC.Unordered) a b)
(pulley_vbor128
(lower_vfcmp ty (FloatCC.NotEqual) a a)
(lower_vfcmp ty (FloatCC.NotEqual) b b)))
;; NB: Pulley doesn't have lowerings for `Ordered` or `Unordered*` `FloatCC`
;; conditions as that's not needed by wasm at this time.
;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the
;; operation.
(rule (lower_vfcmp ty (FloatCC.GreaterThan) a b)
(lower_vfcmp ty (FloatCC.LessThan) b a))
(rule (lower_vfcmp ty (FloatCC.GreaterThanOrEqual) a b)
(lower_vfcmp ty (FloatCC.LessThanOrEqual) b a))
;;;; Rules for `load` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (ty_int (fits_in_64 ty)) (load flags addr offset)))
(gen_xload addr offset flags ty (ExtKind.None)))
(rule 1 (lower (has_type (ty_scalar_float ty) (load flags addr offset)))
(gen_fload addr offset flags ty))
(rule 3 (lower (has_type $I128 (load flags addr offset)))
(if-let offsetp8 (i32_checked_add offset 8))
(let ((lo XReg (pulley_xload (amode addr offset) $I64 flags))
(hi XReg (pulley_xload (amode addr offsetp8) $I64 flags)))
(value_regs lo hi)))
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (uload8 flags addr offset)))
(gen_xload addr offset flags $I8 (ExtKind.Zero32)))
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (uload16 flags addr offset)))
(gen_xload addr offset flags $I16 (ExtKind.Zero32)))
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (uload32 flags addr offset)))
(gen_xload addr offset flags $I32 (ExtKind.None)))
(rule 1 (lower (has_type $I64 (uload8 flags addr offset)))
(gen_xload addr offset flags $I8 (ExtKind.Zero64)))
(rule 1 (lower (has_type $I64 (uload16 flags addr offset)))
(gen_xload addr offset flags $I16 (ExtKind.Zero64)))
(rule 1 (lower (has_type $I64 (uload32 flags addr offset)))
(gen_xload addr offset flags $I32 (ExtKind.Zero64)))
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (sload8 flags addr offset)))
(gen_xload addr offset flags $I8 (ExtKind.Sign32)))
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (sload16 flags addr offset)))
(gen_xload addr offset flags $I16 (ExtKind.Sign32)))
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (sload32 flags addr offset)))
(gen_xload addr offset flags $I32 (ExtKind.None)))
(rule 1 (lower (has_type $I64 (sload8 flags addr offset)))
(gen_xload addr offset flags $I8 (ExtKind.Sign64)))
(rule 1 (lower (has_type $I64 (sload16 flags addr offset)))
(gen_xload addr offset flags $I16 (ExtKind.Sign64)))
(rule 1 (lower (has_type $I64 (sload32 flags addr offset)))
(gen_xload addr offset flags $I32 (ExtKind.Sign64)))
(rule 2 (lower (has_type (ty_vec128 ty) (load flags addr offset)))
(gen_vload addr offset flags ty (VExtKind.None)))
(rule (lower (has_type ty (sload8x8 flags addr offset)))
(gen_vload addr offset flags ty (VExtKind.S8x8)))
(rule (lower (has_type ty (uload8x8 flags addr offset)))
(gen_vload addr offset flags ty (VExtKind.U8x8)))
(rule (lower (has_type ty (sload16x4 flags addr offset)))
(gen_vload addr offset flags ty (VExtKind.S16x4)))
(rule (lower (has_type ty (uload16x4 flags addr offset)))
(gen_vload addr offset flags ty (VExtKind.U16x4)))
(rule (lower (has_type ty (sload32x2 flags addr offset)))
(gen_vload addr offset flags ty (VExtKind.S32x2)))
(rule (lower (has_type ty (uload32x2 flags addr offset)))
(gen_vload addr offset flags ty (VExtKind.U32x2)))
;; Helper to generate an `xload*` instruction, of which there are many. This
;; falls back to the pseudo-instruction `XLoad` which has code in `emit.rs` to
;; emit lots of forms but it additionally supports more specific pattern-matched
;; versions too.
(decl gen_xload (Value Offset32 MemFlags Type ExtKind) XReg)
;; Base case: use `pulley_xload` pseudo-inst for big-endian loads/stores with
;; no extension. This should only be used for host-things on wasm and
;; intentionally is minimal at this time as we don't currently need to match
;; all `ExtKind` variants.
(rule 0 (gen_xload addr offset flags ty (ExtKind.None))
(if-let true (memflags_nontrapping flags))
(if-let (Endianness.Big) (endianness flags))
(pulley_xload (amode addr offset) ty flags))
;; Base case: use `*_o32` addressing modes for little-endian and nontrapping
;; loads/stores
(rule 0 (gen_xload addr offset flags ty ext)
(if-let true (memflags_nontrapping flags))
(if-let (Endianness.Little) (endianness flags))
(emit_addro32_xload (addro32 addr offset) ty ext))
(decl emit_addro32_xload (AddrO32 Type ExtKind) XReg)
(rule (emit_addro32_xload addr $I8 (ExtKind.None)) (pulley_xload8_u32_o32 addr))
(rule (emit_addro32_xload addr $I8 (ExtKind.Sign32)) (pulley_xload8_s32_o32 addr))
(rule (emit_addro32_xload addr $I8 (ExtKind.Zero32)) (pulley_xload8_u32_o32 addr))
(rule (emit_addro32_xload addr $I8 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload8_s32_o32 addr)))
(rule (emit_addro32_xload addr $I8 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload8_u32_o32 addr)))
(rule (emit_addro32_xload addr $I16 (ExtKind.None)) (pulley_xload16le_u32_o32 addr))
(rule (emit_addro32_xload addr $I16 (ExtKind.Sign32)) (pulley_xload16le_s32_o32 addr))
(rule (emit_addro32_xload addr $I16 (ExtKind.Zero32)) (pulley_xload16le_u32_o32 addr))
(rule (emit_addro32_xload addr $I16 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload16le_s32_o32 addr)))
(rule (emit_addro32_xload addr $I16 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload16le_u32_o32 addr)))
(rule (emit_addro32_xload addr $I32 (ExtKind.None)) (pulley_xload32le_o32 addr))
(rule (emit_addro32_xload addr $I32 (ExtKind.Sign32)) (pulley_xload32le_o32 addr))
(rule (emit_addro32_xload addr $I32 (ExtKind.Zero32)) (pulley_xload32le_o32 addr))
(rule (emit_addro32_xload addr $I32 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload32le_o32 addr)))
(rule (emit_addro32_xload addr $I32 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload32le_o32 addr)))
(rule (emit_addro32_xload addr $I64 _ext) (pulley_xload64le_o32 addr))
;; Special case: wasm loads/stores that map trap use the `*_z` addressing modes
;; which generates a trap for load-from-null.
(rule 1 (gen_xload addr offset flags ty ext)
(if-let true (memflags_is_wasm flags))
(emit_addrz_xload (addrz addr offset) ty ext))
(decl emit_addrz_xload (AddrZ Type ExtKind) XReg)
(rule (emit_addrz_xload addr $I8 (ExtKind.None)) (pulley_xload8_u32_z addr))
(rule (emit_addrz_xload addr $I8 (ExtKind.Sign32)) (pulley_xload8_s32_z addr))
(rule (emit_addrz_xload addr $I8 (ExtKind.Zero32)) (pulley_xload8_u32_z addr))
(rule (emit_addrz_xload addr $I8 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload8_s32_z addr)))
(rule (emit_addrz_xload addr $I8 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload8_u32_z addr)))
(rule (emit_addrz_xload addr $I16 (ExtKind.None)) (pulley_xload16le_u32_z addr))
(rule (emit_addrz_xload addr $I16 (ExtKind.Sign32)) (pulley_xload16le_s32_z addr))
(rule (emit_addrz_xload addr $I16 (ExtKind.Zero32)) (pulley_xload16le_u32_z addr))
(rule (emit_addrz_xload addr $I16 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload16le_s32_z addr)))
(rule (emit_addrz_xload addr $I16 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload16le_u32_z addr)))
(rule (emit_addrz_xload addr $I32 (ExtKind.None)) (pulley_xload32le_z addr))
(rule (emit_addrz_xload addr $I32 (ExtKind.Sign32)) (pulley_xload32le_z addr))
(rule (emit_addrz_xload addr $I32 (ExtKind.Zero32)) (pulley_xload32le_z addr))
(rule (emit_addrz_xload addr $I32 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload32le_z addr)))
(rule (emit_addrz_xload addr $I32 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload32le_z addr)))
(rule (emit_addrz_xload addr $I64 _ext) (pulley_xload64le_z addr))
;; Special case: wasm loads/stores that may trap use the `*_g32` addressing
;; modes. This is a superset of the `*_z` modes above and requires extracting
;; a bounds check from within `addr` using a `select` condition.
(rule 2 (gen_xload addr offset flags ty ext)
(if-let addrg32 (wasm_g32 addr offset flags ty))
(gen_xload_g32 addrg32 ty ext))
;; Base case of "g32" loads, emit the instructions
(decl gen_xload_g32 (G32 Type ExtKind) XReg)
(rule 0 (gen_xload_g32 addr ty ext) (emit_addrg32_xload addr ty ext))
(decl emit_addrg32_xload (AddrG32 Type ExtKind) XReg)
(rule (emit_addrg32_xload addr $I8 (ExtKind.None)) (pulley_xload8_u32_g32 addr))
(rule (emit_addrg32_xload addr $I8 (ExtKind.Sign32)) (pulley_xload8_s32_g32 addr))
(rule (emit_addrg32_xload addr $I8 (ExtKind.Zero32)) (pulley_xload8_u32_g32 addr))
(rule (emit_addrg32_xload addr $I8 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload8_s32_g32 addr)))
(rule (emit_addrg32_xload addr $I8 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload8_u32_g32 addr)))
(rule (emit_addrg32_xload addr $I16 (ExtKind.None)) (pulley_xload16le_u32_g32 addr))
(rule (emit_addrg32_xload addr $I16 (ExtKind.Sign32)) (pulley_xload16le_s32_g32 addr))
(rule (emit_addrg32_xload addr $I16 (ExtKind.Zero32)) (pulley_xload16le_u32_g32 addr))
(rule (emit_addrg32_xload addr $I16 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload16le_s32_g32 addr)))
(rule (emit_addrg32_xload addr $I16 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload16le_u32_g32 addr)))
(rule (emit_addrg32_xload addr $I32 (ExtKind.None)) (pulley_xload32le_g32 addr))
(rule (emit_addrg32_xload addr $I32 (ExtKind.Sign32)) (pulley_xload32le_g32 addr))
(rule (emit_addrg32_xload addr $I32 (ExtKind.Zero32)) (pulley_xload32le_g32 addr))
(rule (emit_addrg32_xload addr $I32 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload32le_g32 addr)))
(rule (emit_addrg32_xload addr $I32 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload32le_g32 addr)))
(rule (emit_addrg32_xload addr $I64 _ext) (pulley_xload64le_g32 addr))
;; Special case of "g32" load - if a "g32bne" mode is possible instead use that.
(rule 1 (gen_xload_g32 addr ty ext)
(if-let addrbne (addrg32bne addr))
(emit_addrg32bne_xload addrbne ty ext))
(decl emit_addrg32bne_xload (AddrG32Bne Type ExtKind) XReg)
(rule (emit_addrg32bne_xload addr $I8 (ExtKind.None)) (pulley_xload8_u32_g32bne addr))
(rule (emit_addrg32bne_xload addr $I8 (ExtKind.Sign32)) (pulley_xload8_s32_g32bne addr))
(rule (emit_addrg32bne_xload addr $I8 (ExtKind.Zero32)) (pulley_xload8_u32_g32bne addr))
(rule (emit_addrg32bne_xload addr $I8 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload8_s32_g32bne addr)))
(rule (emit_addrg32bne_xload addr $I8 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload8_u32_g32bne addr)))
(rule (emit_addrg32bne_xload addr $I16 (ExtKind.None)) (pulley_xload16le_u32_g32bne addr))
(rule (emit_addrg32bne_xload addr $I16 (ExtKind.Sign32)) (pulley_xload16le_s32_g32bne addr))
(rule (emit_addrg32bne_xload addr $I16 (ExtKind.Zero32)) (pulley_xload16le_u32_g32bne addr))
(rule (emit_addrg32bne_xload addr $I16 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload16le_s32_g32bne addr)))
(rule (emit_addrg32bne_xload addr $I16 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload16le_u32_g32bne addr)))
(rule (emit_addrg32bne_xload addr $I32 (ExtKind.None)) (pulley_xload32le_g32bne addr))
(rule (emit_addrg32bne_xload addr $I32 (ExtKind.Sign32)) (pulley_xload32le_g32bne addr))
(rule (emit_addrg32bne_xload addr $I32 (ExtKind.Zero32)) (pulley_xload32le_g32bne addr))
(rule (emit_addrg32bne_xload addr $I32 (ExtKind.Sign64)) (pulley_sext32 (pulley_xload32le_g32bne addr)))
(rule (emit_addrg32bne_xload addr $I32 (ExtKind.Zero64)) (pulley_zext32 (pulley_xload32le_g32bne addr)))
(rule (emit_addrg32bne_xload addr $I64 _ext) (pulley_xload64le_g32bne addr))
;; Analog of `gen_xload`, but for floating-point registers
(decl gen_fload (Value Offset32 MemFlags Type) FReg)
;; Base case: nontrapping loads/stores use the `fload` helper which bottoms out
;; in `emit.rs` using the `*_o32` addressing mode.
(rule 0 (gen_fload addr offset flags ty)
(if-let true (memflags_nontrapping flags))
(pulley_fload (amode addr offset) ty flags))
;; Base case: use trapping loads stores with the `*_z` addressing mode for
;; wasm-looking loads/stores.
(rule 1 (gen_fload addr offset flags ty)
(if-let true (memflags_is_wasm flags))
(emit_addrz_fload (addrz addr offset) ty))
(decl emit_addrz_fload (AddrZ Type) FReg)
(rule (emit_addrz_fload addr $F32) (pulley_fload32le_z addr))
(rule (emit_addrz_fload addr $F64) (pulley_fload64le_z addr))
;; Special case: use `*_g32` addressing when the bounds-check can be
;; pattern-matched.
(rule 2 (gen_fload addr offset flags ty)
(if-let addrg32 (wasm_g32 addr offset flags ty))
(emit_addrg32_fload addrg32 ty))
(decl emit_addrg32_fload (AddrG32 Type) FReg)
(rule (emit_addrg32_fload addr $F32) (pulley_fload32le_g32 addr))
(rule (emit_addrg32_fload addr $F64) (pulley_fload64le_g32 addr))
;; Analog of `gen_xload`, but for vector registers.
(decl gen_vload (Value Offset32 MemFlags Type VExtKind) VReg)
;; Base case: nontrapping loads/stores use the `*_o32` addressing mode through
;; helpers in `emit.rs` and the `pulley_vload` pseudo-inst. Note that this
;; intentionally doesn't support all extension kinds.
(rule 0 (gen_vload addr offset flags ty (VExtKind.None))
(if-let true (memflags_nontrapping flags))
(pulley_vload (amode addr offset) ty flags))
;; Base case: wasm loads/stores using the `*_z` addressing mode.
(rule 1 (gen_vload addr offset flags ty ext)
(if-let true (memflags_is_wasm flags))
(emit_addrz_vload (addrz addr offset) ty ext))
(decl emit_addrz_vload (AddrZ Type VExtKind) VReg)
(rule (emit_addrz_vload addr (ty_vec128 _) (VExtKind.None)) (pulley_vload128le_z addr))
(rule (emit_addrz_vload addr (ty_vec128 _) (VExtKind.S8x8)) (pulley_vload8x8_s_z addr))
(rule (emit_addrz_vload addr (ty_vec128 _) (VExtKind.U8x8)) (pulley_vload8x8_u_z addr))
(rule (emit_addrz_vload addr (ty_vec128 _) (VExtKind.S16x4)) (pulley_vload16x4le_s_z addr))
(rule (emit_addrz_vload addr (ty_vec128 _) (VExtKind.U16x4)) (pulley_vload16x4le_u_z addr))
(rule (emit_addrz_vload addr (ty_vec128 _) (VExtKind.S32x2)) (pulley_vload32x2le_s_z addr))
(rule (emit_addrz_vload addr (ty_vec128 _) (VExtKind.U32x2)) (pulley_vload32x2le_u_z addr))
;; Special case: use `*_g32` addressing when the bounds-check can be
;; pattern-matched.
(rule 2 (gen_vload addr offset flags (ty_vec128 ty) (VExtKind.None))
(if-let addrg32 (wasm_g32 addr offset flags ty))
(pulley_vload128le_g32 addrg32))
;;;; Rules for `store` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (store flags src @ (value_type (ty_int (fits_in_64 ty))) addr offset))
(side_effect (gen_store src addr offset flags ty)))
(rule 1 (lower (store flags src @ (value_type (ty_scalar_float ty)) addr offset))
(side_effect (gen_store src addr offset flags ty)))
(rule (lower (istore8 flags src addr offset))
(side_effect (gen_store src addr offset flags $I8)))
(rule (lower (istore16 flags src addr offset))
(side_effect (gen_store src addr offset flags $I16)))
(rule (lower (istore32 flags src addr offset))
(side_effect (gen_store src addr offset flags $I32)))
(rule 2 (lower (store flags src @ (value_type (ty_vec128 ty)) addr offset))
(side_effect (gen_store src addr offset flags ty)))
;; i128 stores
(rule 3 (lower (store flags src @ (value_type $I128) addr offset))
(let
((src_regs ValueRegs src)
(src_lo XReg (value_regs_get src_regs 0))
(src_hi XReg (value_regs_get src_regs 1))
(addrp0 XReg addr)
(addrp8 Amode (Amode.RegOffset addrp0 8)))
(side_effect (emit_store_i128 flags src_lo src_hi (Amode.RegOffset addrp0 0) addrp8))))
;; Helper to handle big/little endian to determine which order the lo/hi
;; halves of the i128 are stored.
(decl emit_store_i128 (MemFlags XReg XReg Amode Amode) SideEffectNoResult)
(rule 0 (emit_store_i128 flags lo hi addrp0 addrp8)
(if-let (Endianness.Little) (endianness flags))
(let ((_ InstOutput (side_effect (pulley_xstore addrp0 lo $I64 flags))))
(pulley_xstore addrp8 hi $I64 flags)))
(rule 1 (emit_store_i128 flags lo hi addrp0 addrp8)
(if-let (Endianness.Big) (endianness flags))
(let ((_ InstOutput (side_effect (pulley_xstore addrp0 hi $I64 flags))))
(pulley_xstore addrp8 lo $I64 flags)))
;; Helper function to store the first argument into the second argument. This
;; only stores up to the width of the `Type` specified, regardless of what the
;; type of the first argument is.
(decl gen_store (Value Value Offset32 MemFlags Type) SideEffectNoResult)
;; Base cases: normal typed loads/stores
(rule 0 (gen_store src addr offset flags (ty_int ty))
(if-let true (memflags_nontrapping flags))
(pulley_xstore (amode addr offset) src ty flags))
(rule 1 (gen_store src addr offset flags (ty_scalar_float ty))
(if-let true (memflags_nontrapping flags))
(pulley_fstore (amode addr offset) src ty flags))
(rule 2 (gen_store src addr offset flags (ty_vec128 ty))
(if-let true (memflags_nontrapping flags))
(pulley_vstore (amode addr offset) src ty flags))
;; Base case: wasm stores
(rule 3 (gen_store src addr offset flags ty)
(if-let true (memflags_is_wasm flags))
(emit_addrz_store (addrz addr offset) src ty))
(decl emit_addrz_store (AddrZ Value Type) SideEffectNoResult)
(rule (emit_addrz_store addr val $I8) (pulley_xstore8_z addr val))
(rule (emit_addrz_store addr val $I16) (pulley_xstore16le_z addr val))
(rule (emit_addrz_store addr val $I32) (pulley_xstore32le_z addr val))
(rule (emit_addrz_store addr val $I64) (pulley_xstore64le_z addr val))
(rule (emit_addrz_store addr val $F32) (pulley_fstore32le_z addr val))
(rule (emit_addrz_store addr val $F64) (pulley_fstore64le_z addr val))
(rule 1 (emit_addrz_store addr val (ty_vec128 _)) (pulley_vstore128le_z addr val))
;; special case: wasm stores with a folded bounds check
(rule 4 (gen_store src addr offset flags ty)
(if-let addrg32 (wasm_g32 addr offset flags ty))
(emit_addrg32_store addrg32 src ty))
(decl emit_addrg32_store (AddrG32 Value Type) SideEffectNoResult)
(rule (emit_addrg32_store addr val $I8) (pulley_xstore8_g32 addr val))
(rule (emit_addrg32_store addr val $I16) (pulley_xstore16le_g32 addr val))
(rule (emit_addrg32_store addr val $I32) (pulley_xstore32le_g32 addr val))
(rule (emit_addrg32_store addr val $I64) (pulley_xstore64le_g32 addr val))
(rule (emit_addrg32_store addr val $F32) (pulley_fstore32le_g32 addr val))
(rule (emit_addrg32_store addr val $F64) (pulley_fstore64le_g32 addr val))
(rule 1 (emit_addrg32_store addr val (ty_vec128 _)) (pulley_vstore128le_g32 addr val))
;;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (stack_addr stack_slot offset))
(lower_stack_addr stack_slot offset))
(decl lower_stack_addr (StackSlot Offset32) XReg)
(rule (lower_stack_addr stack_slot offset)
(let ((dst WritableXReg (temp_writable_xreg))
(_ Unit (emit (abi_stackslot_addr dst stack_slot offset))))
dst))
;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (uextend val)))
(zext32 val))
(rule 1 (lower (has_type $I64 (uextend val)))
(zext64 val))
(rule 1 (lower (has_type $I128 (uextend val)))
(value_regs (zext64 val) (pulley_xzero)))
;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (sextend val)))
(sext32 val))
(rule 1 (lower (has_type $I64 (sextend val)))
(sext64 val))
(rule 1 (lower (has_type $I128 (sextend val)))
(if-let (u6_from_u8 shift) (u64_try_into_u8 63))
(let ((lo XReg (sext64 val))
(hi XReg (pulley_xshr64_s_u6 lo shift)))
(value_regs lo hi)))
;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 _ty) (ireduce src)))
src)
;;;; Rules for `iconcat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I128 (iconcat a b)))
(value_regs a b))
;;;; Rules for `isplit` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (isplit x @ (value_type $I128)))
(let ((lo XReg (value_regs_get x 0))
(hi XReg (value_regs_get x 1)))
(output_pair lo hi)))
;;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I32 (uadd_overflow_trap a b tc)))
(pulley_xadd32_uoverflow_trap a b tc))
(rule (lower (has_type $I64 (uadd_overflow_trap a b tc)))
(pulley_xadd64_uoverflow_trap a b tc))
;;;; Rules for `uadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I32 (uadd_overflow a b)))
(let ((sum XReg (pulley_xadd32 a b))
(is_overflow XReg (pulley_xult32 sum a)))
(output_pair sum is_overflow)))
(rule (lower (has_type $I64 (uadd_overflow a b)))
(let ((sum XReg (pulley_xadd64 a b))
(is_overflow XReg (pulley_xult64 sum a)))
(output_pair sum is_overflow)))
;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int (fits_in_32 _)) (select c a b)))
(pulley_xselect32 (emit_cond (lower_cond c)) a b))
(rule 1 (lower (has_type $I64 (select c a b)))
(pulley_xselect64 (emit_cond (lower_cond c)) a b))
(rule 1 (lower (has_type $F32 (select c a b)))
(pulley_fselect32 (emit_cond (lower_cond c)) a b))
(rule 1 (lower (has_type $F64 (select c a b)))
(pulley_fselect64 (emit_cond (lower_cond c)) a b))
(rule 2 (lower (has_type (ty_vec128 _) (select c a b)))
(pulley_vselect (emit_cond (lower_cond c)) a b))
;; Helper to emit a conditional into a register itself.
(decl emit_cond (Cond) XReg)
(rule (emit_cond (Cond.If32 reg)) reg)
(rule (emit_cond (Cond.IfNot32 reg)) (pulley_xeq32 reg (pulley_xconst8 0)))
(rule (emit_cond (Cond.IfXeq32 src1 src2)) (pulley_xeq32 src1 src2))
(rule (emit_cond (Cond.IfXneq32 src1 src2)) (pulley_xneq32 src1 src2))
(rule (emit_cond (Cond.IfXslt32 src1 src2)) (pulley_xslt32 src1 src2))
(rule (emit_cond (Cond.IfXslteq32 src1 src2)) (pulley_xslteq32 src1 src2))
(rule (emit_cond (Cond.IfXult32 src1 src2)) (pulley_xult32 src1 src2))
(rule (emit_cond (Cond.IfXulteq32 src1 src2)) (pulley_xulteq32 src1 src2))
(rule (emit_cond (Cond.IfXeq64 src1 src2)) (pulley_xeq64 src1 src2))
(rule (emit_cond (Cond.IfXneq64 src1 src2)) (pulley_xneq64 src1 src2))
(rule (emit_cond (Cond.IfXslt64 src1 src2)) (pulley_xslt64 src1 src2))
(rule (emit_cond (Cond.IfXslteq64 src1 src2)) (pulley_xslteq64 src1 src2))
(rule (emit_cond (Cond.IfXult64 src1 src2)) (pulley_xult64 src1 src2))
(rule (emit_cond (Cond.IfXulteq64 src1 src2)) (pulley_xulteq64 src1 src2))
(rule (emit_cond (Cond.IfXeq32I32 src1 src2)) (pulley_xeq32 src1 (imm src2)))
(rule (emit_cond (Cond.IfXneq32I32 src1 src2)) (pulley_xneq32 src1 (imm src2)))
(rule (emit_cond (Cond.IfXslt32I32 src1 src2)) (pulley_xslt32 src1 (imm src2)))
(rule (emit_cond (Cond.IfXslteq32I32 src1 src2)) (pulley_xslteq32 src1 (imm src2)))
(rule (emit_cond (Cond.IfXult32I32 src1 src2)) (pulley_xult32 src1 (imm src2)))
(rule (emit_cond (Cond.IfXulteq32I32 src1 src2)) (pulley_xulteq32 src1 (imm src2)))
;; Note the operand swaps here
(rule (emit_cond (Cond.IfXsgt32I32 src1 src2)) (pulley_xslt32 (imm src2) src1))
(rule (emit_cond (Cond.IfXsgteq32I32 src1 src2)) (pulley_xslteq32 (imm src2) src1))
(rule (emit_cond (Cond.IfXugt32I32 src1 src2)) (pulley_xult32 (imm src2) src1))
(rule (emit_cond (Cond.IfXugteq32I32 src1 src2)) (pulley_xulteq32 (imm src2) src1))
(rule (emit_cond (Cond.IfXeq64I32 src1 src2)) (pulley_xeq64 src1 (imm src2)))
(rule (emit_cond (Cond.IfXneq64I32 src1 src2)) (pulley_xneq64 src1 (imm src2)))
(rule (emit_cond (Cond.IfXslt64I32 src1 src2)) (pulley_xslt64 src1 (imm src2)))
(rule (emit_cond (Cond.IfXslteq64I32 src1 src2)) (pulley_xslteq64 src1 (imm src2)))
(rule (emit_cond (Cond.IfXult64I32 src1 src2)) (pulley_xult64 src1 (imm src2)))
(rule (emit_cond (Cond.IfXulteq64I32 src1 src2)) (pulley_xulteq64 src1 (imm src2)))
;; Note the operand swaps here
(rule (emit_cond (Cond.IfXsgt64I32 src1 src2)) (pulley_xslt64 (imm src2) src1))
(rule (emit_cond (Cond.IfXsgteq64I32 src1 src2)) (pulley_xslteq64 (imm src2) src1))
(rule (emit_cond (Cond.IfXugt64I32 src1 src2)) (pulley_xult64 (imm src2) src1))
(rule (emit_cond (Cond.IfXugteq64I32 src1 src2)) (pulley_xulteq64 (imm src2) src1))
;;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (bitcast _flags val @ (value_type $I32))))
(pulley_bitcast_float_from_int_32 val))
(rule (lower (has_type $F64 (bitcast _flags val @ (value_type $I64))))
(pulley_bitcast_float_from_int_64 val))
(rule (lower (has_type $I32 (bitcast _flags val @ (value_type $F32))))
(pulley_bitcast_int_from_float_32 val))
(rule (lower (has_type $I64 (bitcast _flags val @ (value_type $F64))))
(pulley_bitcast_int_from_float_64 val))
(rule 1 (lower (has_type (ty_vec128 _) (bitcast _flags val @ (value_type (ty_vec128 _)))))
val)
(rule 2 (lower (has_type ty (bitcast _flags a @ (value_type ty)))) a)
;;;; Rules for `fcvt_to_{u,s}int` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I32 (fcvt_to_uint val @ (value_type $F32))))
(pulley_x32_from_f32_u val))
(rule (lower (has_type $I32 (fcvt_to_uint val @ (value_type $F64))))
(pulley_x32_from_f64_u val))
(rule (lower (has_type $I64 (fcvt_to_uint val @ (value_type $F32))))
(pulley_x64_from_f32_u val))
(rule (lower (has_type $I64 (fcvt_to_uint val @ (value_type $F64))))
(pulley_x64_from_f64_u val))
(rule (lower (has_type $I32 (fcvt_to_sint val @ (value_type $F32))))
(pulley_x32_from_f32_s val))
(rule (lower (has_type $I32 (fcvt_to_sint val @ (value_type $F64))))
(pulley_x32_from_f64_s val))
(rule (lower (has_type $I64 (fcvt_to_sint val @ (value_type $F32))))
(pulley_x64_from_f32_s val))
(rule (lower (has_type $I64 (fcvt_to_sint val @ (value_type $F64))))
(pulley_x64_from_f64_s val))
;;;; Rules for `fcvt_from_{u,s}int` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type $F32 (fcvt_from_uint val @ (value_type (fits_in_32 _)))))
(pulley_f32_from_x32_u (zext32 val)))
(rule 1 (lower (has_type $F32 (fcvt_from_uint val @ (value_type $I64))))
(pulley_f32_from_x64_u val))
(rule 0 (lower (has_type $F64 (fcvt_from_uint val @ (value_type (fits_in_32 _)))))
(pulley_f64_from_x32_u (zext32 val)))
(rule 1 (lower (has_type $F64 (fcvt_from_uint val @ (value_type $I64))))
(pulley_f64_from_x64_u val))
(rule 0 (lower (has_type $F32 (fcvt_from_sint val @ (value_type (fits_in_32 _)))))
(pulley_f32_from_x32_s (sext32 val)))
(rule 1 (lower (has_type $F32 (fcvt_from_sint val @ (value_type $I64))))
(pulley_f32_from_x64_s val))
(rule 0 (lower (has_type $F64 (fcvt_from_sint val @ (value_type (fits_in_32 _)))))
(pulley_f64_from_x32_s (sext32 val)))
(rule 1 (lower (has_type $F64 (fcvt_from_sint val @ (value_type $I64))))
(pulley_f64_from_x64_s val))
(rule (lower (has_type $F32X4 (fcvt_from_sint val @ (value_type $I32X4))))
(pulley_vf32x4_from_i32x4_s val))
(rule (lower (has_type $F32X4 (fcvt_from_uint val @ (value_type $I32X4))))
(pulley_vf32x4_from_i32x4_u val))
(rule (lower (has_type $F64X2 (fcvt_from_sint val @ (value_type $I64X2))))
(pulley_vf64x2_from_i64x2_s val))
(rule (lower (has_type $F64X2 (fcvt_from_uint val @ (value_type $I64X2))))
(pulley_vf64x2_from_i64x2_u val))
;;;; Rules for `fcvt_to_{u,s}int_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I32 (fcvt_to_uint_sat val @ (value_type $F32))))
(pulley_x32_from_f32_u_sat val))
(rule (lower (has_type $I32 (fcvt_to_uint_sat val @ (value_type $F64))))
(pulley_x32_from_f64_u_sat val))
(rule (lower (has_type $I64 (fcvt_to_uint_sat val @ (value_type $F32))))
(pulley_x64_from_f32_u_sat val))
(rule (lower (has_type $I64 (fcvt_to_uint_sat val @ (value_type $F64))))
(pulley_x64_from_f64_u_sat val))
(rule (lower (has_type $I32 (fcvt_to_sint_sat val @ (value_type $F32))))
(pulley_x32_from_f32_s_sat val))
(rule (lower (has_type $I32 (fcvt_to_sint_sat val @ (value_type $F64))))
(pulley_x32_from_f64_s_sat val))
(rule (lower (has_type $I64 (fcvt_to_sint_sat val @ (value_type $F32))))
(pulley_x64_from_f32_s_sat val))
(rule (lower (has_type $I64 (fcvt_to_sint_sat val @ (value_type $F64))))
(pulley_x64_from_f64_s_sat val))
(rule (lower (has_type $I32X4 (fcvt_to_sint_sat val @ (value_type $F32X4))))
(pulley_vi32x4_from_f32x4_s val))
(rule (lower (has_type $I32X4 (fcvt_to_uint_sat val @ (value_type $F32X4))))
(pulley_vi32x4_from_f32x4_u val))
(rule (lower (has_type $I64X2 (fcvt_to_sint_sat val @ (value_type $F64X2))))
(pulley_vi64x2_from_f64x2_s val))
(rule (lower (has_type $I64X2 (fcvt_to_uint_sat val @ (value_type $F64X2))))
(pulley_vi64x2_from_f64x2_u val))
;;;; Rules for `fdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fdemote val @ (value_type $F64))))
(pulley_f32_from_f64 val))
;;;; Rules for `fpromote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F64 (fpromote val @ (value_type $F32))))
(pulley_f64_from_f32 val))
;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fcopysign a b)))
(pulley_fcopysign32 a b))
(rule (lower (has_type $F64 (fcopysign a b)))
(pulley_fcopysign64 a b))
;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fadd a b))) (pulley_fadd32 a b))
(rule (lower (has_type $F64 (fadd a b))) (pulley_fadd64 a b))
(rule (lower (has_type $F32X4 (fadd a b))) (pulley_vaddf32x4 a b))
(rule (lower (has_type $F64X2 (fadd a b))) (pulley_vaddf64x2 a b))
;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fsub a b))) (pulley_fsub32 a b))
(rule (lower (has_type $F64 (fsub a b))) (pulley_fsub64 a b))
(rule (lower (has_type $F32X4 (fsub a b))) (pulley_vsubf32x4 a b))
(rule (lower (has_type $F64X2 (fsub a b))) (pulley_vsubf64x2 a b))
;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fmul a b))) (pulley_fmul32 a b))
(rule (lower (has_type $F64 (fmul a b))) (pulley_fmul64 a b))
(rule (lower (has_type $F32X4 (fmul a b))) (pulley_vmulf32x4 a b))
(rule (lower (has_type $F64X2 (fmul a b))) (pulley_vmulf64x2 a b))
;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fdiv a b))) (pulley_fdiv32 a b))
(rule (lower (has_type $F64 (fdiv a b))) (pulley_fdiv64 a b))
(rule (lower (has_type $F32X4 (fdiv a b))) (pulley_vdivf32x4 a b))
(rule (lower (has_type $F64X2 (fdiv a b))) (pulley_vdivf64x2 a b))
;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fmax a b))) (pulley_fmaximum32 a b))
(rule (lower (has_type $F64 (fmax a b))) (pulley_fmaximum64 a b))
(rule (lower (has_type $F32X4 (fmax a b))) (pulley_vmaximumf32x4 a b))
(rule (lower (has_type $F64X2 (fmax a b))) (pulley_vmaximumf64x2 a b))
;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fmin a b))) (pulley_fminimum32 a b))
(rule (lower (has_type $F64 (fmin a b))) (pulley_fminimum64 a b))
(rule (lower (has_type $F32X4 (fmin a b))) (pulley_vminimumf32x4 a b))
(rule (lower (has_type $F64X2 (fmin a b))) (pulley_vminimumf64x2 a b))
;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (trunc a))) (pulley_ftrunc32 a))
(rule (lower (has_type $F64 (trunc a))) (pulley_ftrunc64 a))
(rule (lower (has_type $F32X4 (trunc a))) (pulley_vtrunc32x4 a))
(rule (lower (has_type $F64X2 (trunc a))) (pulley_vtrunc64x2 a))
;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (floor a))) (pulley_ffloor32 a))
(rule (lower (has_type $F64 (floor a))) (pulley_ffloor64 a))
(rule (lower (has_type $F32X4 (floor a)))
(pulley_vfloor32x4 a))
(rule (lower (has_type $F64X2 (floor a)))
(pulley_vfloor64x2 a))
;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (ceil a))) (pulley_fceil32 a))
(rule (lower (has_type $F64 (ceil a))) (pulley_fceil64 a))
(rule (lower (has_type $F64X2 (ceil a)))
(pulley_vceil64x2 a))
(rule (lower (has_type $F32X4 (ceil a)))
(pulley_vceil32x4 a))
;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (nearest a))) (pulley_fnearest32 a))
(rule (lower (has_type $F64 (nearest a))) (pulley_fnearest64 a))
(rule (lower (has_type $F32X4 (nearest a)))
(pulley_vnearest32x4 a))
(rule (lower (has_type $F64X2 (nearest a)))
(pulley_vnearest64x2 a))
;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (sqrt a))) (pulley_fsqrt32 a))
(rule (lower (has_type $F64 (sqrt a))) (pulley_fsqrt64 a))
(rule (lower (has_type $F32X4 (sqrt a)))
(pulley_vsqrt32x4 a))
(rule (lower (has_type $F64X2 (sqrt a)))
(pulley_vsqrt64x2 a))
;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fneg a))) (pulley_fneg32 a))
(rule (lower (has_type $F64 (fneg a))) (pulley_fneg64 a))
(rule (lower (has_type $F32X4 (fneg a))) (pulley_vnegf32x4 a))
(rule (lower (has_type $F64X2 (fneg a))) (pulley_vnegf64x2 a))
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (ineg a))) (pulley_xneg32 (sext32 a)))
(rule 1 (lower (has_type $I64 (ineg a))) (pulley_xneg64 a))
;; vector negation
(rule 1 (lower (has_type $I8X16 (ineg a))) (pulley_vneg8x16 a))
(rule 1 (lower (has_type $I16X8 (ineg a))) (pulley_vneg16x8 a))
(rule 1 (lower (has_type $I32X4 (ineg a))) (pulley_vneg32x4 a))
(rule 1 (lower (has_type $I64X2 (ineg a))) (pulley_vneg64x2 a))
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fabs a))) (pulley_fabs32 a))
(rule (lower (has_type $F64 (fabs a))) (pulley_fabs64 a))
(rule (lower (has_type $F32X4 (fabs a))) (pulley_vabsf32x4 a))
(rule (lower (has_type $F64X2 (fabs a))) (pulley_vabsf64x2 a))
;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (ty_vec128 _) (vconst (u128_from_constant a)))) (pulley_vconst128 a))
;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I16 (bswap a)))
(if-let (u6_from_u8 shift) (u64_try_into_u8 16))
(pulley_xshr32_u_u6 (pulley_bswap32 a) shift))
(rule (lower (has_type $I32 (bswap a))) (pulley_bswap32 a))
(rule (lower (has_type $I64 (bswap a))) (pulley_bswap64 a))
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 _) (iabs a))) (pulley_xabs32 (sext32 a)))
(rule 1 (lower (has_type $I64 (iabs a))) (pulley_xabs64 a))
(rule 1 (lower (has_type $I8X16 (iabs a))) (pulley_vabs8x16 a))
(rule 1 (lower (has_type $I16X8 (iabs a))) (pulley_vabs16x8 a))
(rule 1 (lower (has_type $I32X4 (iabs a))) (pulley_vabs32x4 a))
(rule 1 (lower (has_type $I64X2 (iabs a))) (pulley_vabs64x2 a))
;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (splat a))) (pulley_vsplatx8 a))
(rule (lower (has_type $I16X8 (splat a))) (pulley_vsplatx16 a))
(rule (lower (has_type $I32X4 (splat a))) (pulley_vsplatx32 a))
(rule (lower (has_type $I64X2 (splat a))) (pulley_vsplatx64 a))
(rule (lower (has_type $F32X4 (splat a))) (pulley_vsplatf32 a))
(rule (lower (has_type $F64X2 (splat a))) (pulley_vsplatf64 a))
;;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_32 _) (vhigh_bits a @ (value_type $I8X16))))
(pulley_vbitmask8x16 a))
(rule (lower (has_type (fits_in_32 _) (vhigh_bits a @ (value_type $I16X8))))
(pulley_vbitmask16x8 a))
(rule (lower (has_type (fits_in_32 _) (vhigh_bits a @ (value_type $I32X4))))
(pulley_vbitmask32x4 a))
(rule (lower (has_type (fits_in_32 _) (vhigh_bits a @ (value_type $I64X2))))
(pulley_vbitmask64x2 a))
;;;; Rules for `vall_true`; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (vall_true a @ (value_type $I8X16))) (pulley_valltrue8x16 a))
(rule (lower (vall_true a @ (value_type $I16X8))) (pulley_valltrue16x8 a))
(rule (lower (vall_true a @ (value_type $I32X4))) (pulley_valltrue32x4 a))
(rule (lower (vall_true a @ (value_type $I64X2))) (pulley_valltrue64x2 a))
(rule (lower (vall_true a @ (value_type $F32X4))) (pulley_valltrue32x4 a))
(rule (lower (vall_true a @ (value_type $F64X2))) (pulley_valltrue64x2 a))
;;;; Rules for `vany_true`; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (vany_true a @ (value_type $I8X16))) (pulley_vanytrue8x16 a))
(rule (lower (vany_true a @ (value_type $I16X8))) (pulley_vanytrue16x8 a))
(rule (lower (vany_true a @ (value_type $I32X4))) (pulley_vanytrue32x4 a))
(rule (lower (vany_true a @ (value_type $I64X2))) (pulley_vanytrue64x2 a))
(rule (lower (vany_true a @ (value_type $F32X4))) (pulley_vanytrue32x4 a))
(rule (lower (vany_true a @ (value_type $F64X2))) (pulley_vanytrue64x2 a))
;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (swiden_low a @ (value_type $I8X16))) (pulley_vwidenlow8x16_s a))
(rule (lower (swiden_low a @ (value_type $I16X8))) (pulley_vwidenlow16x8_s a))
(rule (lower (swiden_low a @ (value_type $I32X4))) (pulley_vwidenlow32x4_s a))
;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (swiden_high a @ (value_type $I8X16))) (pulley_vwidenhigh8x16_s a))
(rule (lower (swiden_high a @ (value_type $I16X8))) (pulley_vwidenhigh16x8_s a))
(rule (lower (swiden_high a @ (value_type $I32X4))) (pulley_vwidenhigh32x4_s a))
;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (uwiden_low a @ (value_type $I8X16))) (pulley_vwidenlow8x16_u a))
(rule (lower (uwiden_low a @ (value_type $I16X8))) (pulley_vwidenlow16x8_u a))
(rule (lower (uwiden_low a @ (value_type $I32X4))) (pulley_vwidenlow32x4_u a))
;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (uwiden_high a @ (value_type $I8X16))) (pulley_vwidenhigh8x16_u a))
(rule (lower (uwiden_high a @ (value_type $I16X8))) (pulley_vwidenhigh16x8_u a))
(rule (lower (uwiden_high a @ (value_type $I32X4))) (pulley_vwidenhigh32x4_u a))
;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (snarrow a @ (value_type $I16X8) b)) (pulley_vnarrow16x8_s a b))
(rule (lower (snarrow a @ (value_type $I32X4) b)) (pulley_vnarrow32x4_s a b))
(rule (lower (snarrow a @ (value_type $I64X2) b)) (pulley_vnarrow64x2_s a b))
;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (unarrow a @ (value_type $I16X8) b)) (pulley_vnarrow16x8_u a b))
(rule (lower (unarrow a @ (value_type $I32X4) b)) (pulley_vnarrow32x4_u a b))
;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (uunarrow a @ (value_type $I64X2) b)) (pulley_vunarrow64x2_u a b))
;;;; Rules for `fvpromote_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (fvpromote_low a @ (value_type $F32X4))) (pulley_vfpromotelow a))
;;;; Rules for `fvdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (fvdemote a @ (value_type $F64X2))) (pulley_vfdemote a))
;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (extractlane a @ (value_type $I8X16) (u8_from_uimm8 lane)))
(pulley_xextractv8x16 a lane))
(rule (lower (extractlane a @ (value_type $I16X8) (u8_from_uimm8 lane)))
(pulley_xextractv16x8 a lane))
(rule (lower (extractlane a @ (value_type $I32X4) (u8_from_uimm8 lane)))
(pulley_xextractv32x4 a lane))
(rule (lower (extractlane a @ (value_type $I64X2) (u8_from_uimm8 lane)))
(pulley_xextractv64x2 a lane))
(rule (lower (extractlane a @ (value_type $F32X4) (u8_from_uimm8 lane)))
(pulley_fextractv32x4 a lane))
(rule (lower (extractlane a @ (value_type $F64X2) (u8_from_uimm8 lane)))
(pulley_fextractv64x2 a lane))
;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (insertlane a @ (value_type $I8X16) b (u8_from_uimm8 lane)))
(pulley_vinsertx8 a b lane))
(rule (lower (insertlane a @ (value_type $I16X8) b (u8_from_uimm8 lane)))
(pulley_vinsertx16 a b lane))
(rule (lower (insertlane a @ (value_type $I32X4) b (u8_from_uimm8 lane)))
(pulley_vinsertx32 a b lane))
(rule (lower (insertlane a @ (value_type $I64X2) b (u8_from_uimm8 lane)))
(pulley_vinsertx64 a b lane))
(rule (lower (insertlane a @ (value_type $F32X4) b (u8_from_uimm8 lane)))
(pulley_vinsertf32 a b lane))
(rule (lower (insertlane a @ (value_type $F64X2) b (u8_from_uimm8 lane)))
(pulley_vinsertf64 a b lane))
;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Note that this doesn't use special pulley instructions at this time and
;; generates a bytecode-wise relatively inefficient lowering. Should be
;; relatively easy to optimize if necessary in the future.
(rule (lower (scalar_to_vector a @ (value_type $I8)))
(pulley_vinsertx8 (pulley_vconst128 0) a 0))
(rule (lower (scalar_to_vector a @ (value_type $I16)))
(pulley_vinsertx16 (pulley_vconst128 0) a 0))
(rule (lower (scalar_to_vector a @ (value_type $I32)))
(pulley_vinsertx32 (pulley_vconst128 0) a 0))
(rule (lower (scalar_to_vector a @ (value_type $I64)))
(pulley_vinsertx64 (pulley_vconst128 0) a 0))
(rule (lower (scalar_to_vector a @ (value_type $F32)))
(pulley_vinsertf32 (pulley_vconst128 0) a 0))
(rule (lower (scalar_to_vector a @ (value_type $F64)))
(pulley_vinsertf64 (pulley_vconst128 0) a 0))
;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (shuffle a b (u128_from_immediate mask))))
(pulley_vshuffle a b mask))
;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower (has_type $I8X16 (swizzle a b))) (pulley_vswizzlei8x16 a b))
;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32X4 (fma a b c))) (pulley_vfma32x4 a b c))
(rule (lower (has_type $F64X2 (fma a b c))) (pulley_vfma64x2 a b c))
;; Rules for `symbol_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (symbol_value (symbol_value_data extname dist offset)))
(load_ext_name extname offset dist))
;; Rules for `func_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (func_addr (func_ref_data _ extname dist _)))
(load_ext_name extname 0 dist))
;; Rules for `get_exception_handler_address` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (get_exception_handler_address (u64_from_imm64 idx) block))
(let ((succ_label MachLabel (block_exn_successor_label block idx)))
(pulley_label_address succ_label)))
;; Rules for `sequence_point` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (sequence_point))
(side_effect
(pulley_sequence_point)))