/*1* Copyright 2008 Analog Devices Inc.2*3* Licensed under the ADI BSD license or the GPL-2 (or later)4*/56.align 27.global ___muldi3;8.type ___muldi3, STT_FUNC;910#ifdef CONFIG_ARITHMETIC_OPS_L111.section .l1.text12#else13.text14#endif1516/*17R1:R0 * R3:R218= R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l19[X] = (R1.h * R3.h) * 2^9620[X] + (R1.h * R3.l + R1.l * R3.h) * 2^8021[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^6422[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^4823[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^3224[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^1625[T4] + (R0.l * R2.l)2627We can discard the first three lines marked "X" since we produce28only a 64 bit result. So, we need ten 16-bit multiplies.2930Individual mul-acc results:31[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h32[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h33[E3] = R0.l * R2.h + R2.l * R0.h34[E4] = R0.l * R2.l3536We also need to add high parts from lower-level results to higher ones:37E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E43839One interesting property is that all parts of the result that depend40on the sign of the multiplication are discarded. Those would be the41multiplications involving R1.h and R3.h, but only the top 16 bit of42the 32 bit result depend on the sign, and since R1.h and R3.h only43occur in E1, the top half of these results is cut off.44So, we can just use FU mode for all of the 16-bit multiplies, and45ignore questions of when to use mixed mode. */4647___muldi3:48/* [SP] technically is part of the caller's frame, but we can49use it as scratch space. */50A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */51A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */52A0 += A1; /* E1 */53R4 = A0.w;54A0 = R0.l * R3.l (FU); /* E2 */55A0 += R2.l * R1.l (FU); /* E2 */5657A1 = R2.L * R0.L (FU); /* E4 */58R3 = A1.w;59A1 = A1 >> 16; /* E3c */60A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */61A1 += R0.L * R2.H (FU); /* E3c */62R0 = A1.w;63A1 = A1 >> 16; /* E2c */64A0 += A1; /* E2c */65R1 = A0.w;6667/* low(result) = low(E3c):low(E4) */68R0 = PACK (R0.l, R3.l);69/* high(result) = E2c + (E1 << 16) */70R1.h = R1.h + R4.l (NS) || R4 = [SP];71RTS;7273.size ___muldi3, .-___muldi3747576