/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */1#include <linux/linkage.h>2#include <asm/asmmacro.h>3#include <asm/core.h>45#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 || XCHAL_HAVE_MAC166#define XCHAL_NO_MUL 07#else8#define XCHAL_NO_MUL 19#endif1011ENTRY(__umulsidi3)1213#ifdef __XTENSA_CALL0_ABI__14abi_entry(32)15s32i a12, sp, 1616s32i a13, sp, 2017s32i a14, sp, 2418s32i a15, sp, 2819#elif XCHAL_NO_MUL20/* This is not really a leaf function; allocate enough stack space21to allow CALL12s to a helper function. */22abi_entry(32)23#else24abi_entry_default25#endif2627#ifdef __XTENSA_EB__28#define wh a229#define wl a330#else31#define wh a332#define wl a233#endif /* __XTENSA_EB__ */3435/* This code is taken from the mulsf3 routine in ieee754-sf.S.36See more comments there. */3738#if XCHAL_HAVE_MUL32_HIGH39mull a6, a2, a340muluh wh, a2, a341mov wl, a64243#else /* ! MUL32_HIGH */4445#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL46/* a0 and a8 will be clobbered by calling the multiply function47but a8 is not used here and need not be saved. */48s32i a0, sp, 049#endif5051#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL325253#define a2h a454#define a3h a55556/* Get the high halves of the inputs into registers. */57srli a2h, a2, 1658srli a3h, a3, 165960#define a2l a261#define a3l a36263#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL1664/* Clear the high halves of the inputs. This does not matter65for MUL16 because the high bits are ignored. */66extui a2, a2, 0, 1667extui a3, a3, 0, 1668#endif69#endif /* MUL16 || MUL32 */707172#if XCHAL_HAVE_MUL167374#define do_mul(dst, xreg, xhalf, yreg, yhalf) \75mul16u dst, xreg ## xhalf, yreg ## yhalf7677#elif XCHAL_HAVE_MUL327879#define do_mul(dst, xreg, xhalf, yreg, yhalf) \80mull dst, xreg ## xhalf, yreg ## yhalf8182#elif XCHAL_HAVE_MAC168384/* The preprocessor insists on inserting a space when concatenating after85a period in the definition of do_mul below. These macros are a workaround86using underscores instead of periods when doing the concatenation. */87#define umul_aa_ll umul.aa.ll88#define umul_aa_lh umul.aa.lh89#define umul_aa_hl umul.aa.hl90#define umul_aa_hh umul.aa.hh9192#define do_mul(dst, xreg, xhalf, yreg, yhalf) \93umul_aa_ ## xhalf ## yhalf xreg, yreg; \94rsr dst, ACCLO9596#else /* no multiply hardware */9798#define set_arg_l(dst, src) \99extui dst, src, 0, 16100#define set_arg_h(dst, src) \101srli dst, src, 16102103#ifdef __XTENSA_CALL0_ABI__104#define do_mul(dst, xreg, xhalf, yreg, yhalf) \105set_arg_ ## xhalf (a13, xreg); \106set_arg_ ## yhalf (a14, yreg); \107call0 .Lmul_mulsi3; \108mov dst, a12109#else110#define do_mul(dst, xreg, xhalf, yreg, yhalf) \111set_arg_ ## xhalf (a14, xreg); \112set_arg_ ## yhalf (a15, yreg); \113call12 .Lmul_mulsi3; \114mov dst, a14115#endif /* __XTENSA_CALL0_ABI__ */116117#endif /* no multiply hardware */118119/* Add pp1 and pp2 into a6 with carry-out in a9. */120do_mul(a6, a2, l, a3, h) /* pp 1 */121do_mul(a11, a2, h, a3, l) /* pp 2 */122movi a9, 0123add a6, a6, a11124bgeu a6, a11, 1f125addi a9, a9, 11261:127/* Shift the high half of a9/a6 into position in a9. Note that128this value can be safely incremented without any carry-outs. */129ssai 16130src a9, a9, a6131132/* Compute the low word into a6. */133do_mul(a11, a2, l, a3, l) /* pp 0 */134sll a6, a6135add a6, a6, a11136bgeu a6, a11, 1f137addi a9, a9, 11381:139/* Compute the high word into wh. */140do_mul(wh, a2, h, a3, h) /* pp 3 */141add wh, wh, a9142mov wl, a6143144#endif /* !MUL32_HIGH */145146#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL147/* Restore the original return address. */148l32i a0, sp, 0149#endif150#ifdef __XTENSA_CALL0_ABI__151l32i a12, sp, 16152l32i a13, sp, 20153l32i a14, sp, 24154l32i a15, sp, 28155abi_ret(32)156#else157abi_ret_default158#endif159160#if XCHAL_NO_MUL161162.macro do_addx2 dst, as, at, tmp163#if XCHAL_HAVE_ADDX164addx2 \dst, \as, \at165#else166slli \tmp, \as, 1167add \dst, \tmp, \at168#endif169.endm170171.macro do_addx4 dst, as, at, tmp172#if XCHAL_HAVE_ADDX173addx4 \dst, \as, \at174#else175slli \tmp, \as, 2176add \dst, \tmp, \at177#endif178.endm179180.macro do_addx8 dst, as, at, tmp181#if XCHAL_HAVE_ADDX182addx8 \dst, \as, \at183#else184slli \tmp, \as, 3185add \dst, \tmp, \at186#endif187.endm188189/* For Xtensa processors with no multiply hardware, this simplified190version of _mulsi3 is used for multiplying 16-bit chunks of191the floating-point mantissas. When using CALL0, this function192uses a custom ABI: the inputs are passed in a13 and a14, the193result is returned in a12, and a8 and a15 are clobbered. */194.align 4195.Lmul_mulsi3:196abi_entry_default197198.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2199movi \dst, 02001: add \tmp1, \src2, \dst201extui \tmp2, \src1, 0, 1202movnez \dst, \tmp1, \tmp2203204do_addx2 \tmp1, \src2, \dst, \tmp1205extui \tmp2, \src1, 1, 1206movnez \dst, \tmp1, \tmp2207208do_addx4 \tmp1, \src2, \dst, \tmp1209extui \tmp2, \src1, 2, 1210movnez \dst, \tmp1, \tmp2211212do_addx8 \tmp1, \src2, \dst, \tmp1213extui \tmp2, \src1, 3, 1214movnez \dst, \tmp1, \tmp2215216srli \src1, \src1, 4217slli \src2, \src2, 4218bnez \src1, 1b219.endm220221#ifdef __XTENSA_CALL0_ABI__222mul_mulsi3_body a12, a13, a14, a15, a8223#else224/* The result will be written into a2, so save that argument in a4. */225mov a4, a2226mul_mulsi3_body a2, a4, a3, a5, a6227#endif228abi_ret_default229#endif /* XCHAL_NO_MUL */230231ENDPROC(__umulsidi3)232EXPORT_SYMBOL(__umulsidi3)233234235