/* SPDX-License-Identifier: GPL-2.0 */1.file "div_Xsig.S"2/*---------------------------------------------------------------------------+3| div_Xsig.S |4| |5| Division subroutine for 96 bit quantities |6| |7| Copyright (C) 1994,1995 |8| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |9| Australia. E-mail [email protected] |10| |11| |12+---------------------------------------------------------------------------*/1314/*---------------------------------------------------------------------------+15| Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |16| put the 96 bit result at the location d. |17| |18| The result may not be accurate to 96 bits. It is intended for use where |19| a result better than 64 bits is required. The result should usually be |20| good to at least 94 bits. |21| The returned result is actually divided by one half. This is done to |22| prevent overflow. |23| |24| .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |25| |26| void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |27| |28+---------------------------------------------------------------------------*/2930#include "exception.h"31#include "fpu_emu.h"323334#define XsigLL(x) (x)35#define XsigL(x) 4(x)36#define XsigH(x) 8(x)373839#ifndef NON_REENTRANT_FPU40/*41Local storage on the stack:42Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_043*/44#define FPU_accum_3 -4(%ebp)45#define FPU_accum_2 -8(%ebp)46#define FPU_accum_1 -12(%ebp)47#define FPU_accum_0 -16(%ebp)48#define FPU_result_3 -20(%ebp)49#define FPU_result_2 -24(%ebp)50#define FPU_result_1 -28(%ebp)5152#else53.data54/*55Local storage in a static area:56Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_057*/58.align 4,059FPU_accum_3:60.long 061FPU_accum_2:62.long 063FPU_accum_1:64.long 065FPU_accum_0:66.long 067FPU_result_3:68.long 069FPU_result_2:70.long 071FPU_result_1:72.long 073#endif /* NON_REENTRANT_FPU */747576.text77SYM_FUNC_START(div_Xsig)78pushl %ebp79movl %esp,%ebp80#ifndef NON_REENTRANT_FPU81subl $28,%esp82#endif /* NON_REENTRANT_FPU */8384pushl %esi85pushl %edi86pushl %ebx8788movl PARAM1,%esi /* pointer to num */89movl PARAM2,%ebx /* pointer to denom */9091#ifdef PARANOID92testl $0x80000000, XsigH(%ebx) /* Divisor */93je L_bugged94#endif /* PARANOID */959697/*---------------------------------------------------------------------------+98| Divide: Return arg1/arg2 to arg3. |99| |100| The maximum returned value is (ignoring exponents) |101| .ffffffff ffffffff |102| ------------------ = 1.ffffffff fffffffe |103| .80000000 00000000 |104| and the minimum is |105| .80000000 00000000 |106| ------------------ = .80000000 00000001 (rounded) |107| .ffffffff ffffffff |108| |109+---------------------------------------------------------------------------*/110111/* Save extended dividend in local register */112113/* Divide by 2 to prevent overflow */114clc115movl XsigH(%esi),%eax116rcrl %eax117movl %eax,FPU_accum_3118movl XsigL(%esi),%eax119rcrl %eax120movl %eax,FPU_accum_2121movl XsigLL(%esi),%eax122rcrl %eax123movl %eax,FPU_accum_1124movl $0,%eax125rcrl %eax126movl %eax,FPU_accum_0127128movl FPU_accum_2,%eax /* Get the current num */129movl FPU_accum_3,%edx130131/*----------------------------------------------------------------------*/132/* Initialization done.133Do the first 32 bits. */134135/* We will divide by a number which is too large */136movl XsigH(%ebx),%ecx137addl $1,%ecx138jnc LFirst_div_not_1139140/* here we need to divide by 100000000h,141i.e., no division at all.. */142mov %edx,%eax143jmp LFirst_div_done144145LFirst_div_not_1:146divl %ecx /* Divide the numerator by the augmented147denom ms dw */148149LFirst_div_done:150movl %eax,FPU_result_3 /* Put the result in the answer */151152mull XsigH(%ebx) /* mul by the ms dw of the denom */153154subl %eax,FPU_accum_2 /* Subtract from the num local reg */155sbbl %edx,FPU_accum_3156157movl FPU_result_3,%eax /* Get the result back */158mull XsigL(%ebx) /* now mul the ls dw of the denom */159160subl %eax,FPU_accum_1 /* Subtract from the num local reg */161sbbl %edx,FPU_accum_2162sbbl $0,FPU_accum_3163je LDo_2nd_32_bits /* Must check for non-zero result here */164165#ifdef PARANOID166jb L_bugged_1167#endif /* PARANOID */168169/* need to subtract another once of the denom */170incl FPU_result_3 /* Correct the answer */171172movl XsigL(%ebx),%eax173movl XsigH(%ebx),%edx174subl %eax,FPU_accum_1 /* Subtract from the num local reg */175sbbl %edx,FPU_accum_2176177#ifdef PARANOID178sbbl $0,FPU_accum_3179jne L_bugged_1 /* Must check for non-zero result here */180#endif /* PARANOID */181182/*----------------------------------------------------------------------*/183/* Half of the main problem is done, there is just a reduced numerator184to handle now.185Work with the second 32 bits, FPU_accum_0 not used from now on */186LDo_2nd_32_bits:187movl FPU_accum_2,%edx /* get the reduced num */188movl FPU_accum_1,%eax189190/* need to check for possible subsequent overflow */191cmpl XsigH(%ebx),%edx192jb LDo_2nd_div193ja LPrevent_2nd_overflow194195cmpl XsigL(%ebx),%eax196jb LDo_2nd_div197198LPrevent_2nd_overflow:199/* The numerator is greater or equal, would cause overflow */200/* prevent overflow */201subl XsigL(%ebx),%eax202sbbl XsigH(%ebx),%edx203movl %edx,FPU_accum_2204movl %eax,FPU_accum_1205206incl FPU_result_3 /* Reflect the subtraction in the answer */207208#ifdef PARANOID209je L_bugged_2 /* Can't bump the result to 1.0 */210#endif /* PARANOID */211212LDo_2nd_div:213cmpl $0,%ecx /* augmented denom msw */214jnz LSecond_div_not_1215216/* %ecx == 0, we are dividing by 1.0 */217mov %edx,%eax218jmp LSecond_div_done219220LSecond_div_not_1:221divl %ecx /* Divide the numerator by the denom ms dw */222223LSecond_div_done:224movl %eax,FPU_result_2 /* Put the result in the answer */225226mull XsigH(%ebx) /* mul by the ms dw of the denom */227228subl %eax,FPU_accum_1 /* Subtract from the num local reg */229sbbl %edx,FPU_accum_2230231#ifdef PARANOID232jc L_bugged_2233#endif /* PARANOID */234235movl FPU_result_2,%eax /* Get the result back */236mull XsigL(%ebx) /* now mul the ls dw of the denom */237238subl %eax,FPU_accum_0 /* Subtract from the num local reg */239sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */240sbbl $0,FPU_accum_2241242#ifdef PARANOID243jc L_bugged_2244#endif /* PARANOID */245246jz LDo_3rd_32_bits247248#ifdef PARANOID249cmpl $1,FPU_accum_2250jne L_bugged_2251#endif /* PARANOID */252253/* need to subtract another once of the denom */254movl XsigL(%ebx),%eax255movl XsigH(%ebx),%edx256subl %eax,FPU_accum_0 /* Subtract from the num local reg */257sbbl %edx,FPU_accum_1258sbbl $0,FPU_accum_2259260#ifdef PARANOID261jc L_bugged_2262jne L_bugged_2263#endif /* PARANOID */264265addl $1,FPU_result_2 /* Correct the answer */266adcl $0,FPU_result_3267268#ifdef PARANOID269jc L_bugged_2 /* Must check for non-zero result here */270#endif /* PARANOID */271272/*----------------------------------------------------------------------*/273/* The division is essentially finished here, we just need to perform274tidying operations.275Deal with the 3rd 32 bits */276LDo_3rd_32_bits:277/* We use an approximation for the third 32 bits.278To take account of the 3rd 32 bits of the divisor279(call them del), we subtract del * (a/b) */280281movl FPU_result_3,%eax /* a/b */282mull XsigLL(%ebx) /* del */283284subl %edx,FPU_accum_1285286/* A borrow indicates that the result is negative */287jnb LTest_over288289movl XsigH(%ebx),%edx290addl %edx,FPU_accum_1291292subl $1,FPU_result_2 /* Adjust the answer */293sbbl $0,FPU_result_3294295/* The above addition might not have been enough, check again. */296movl FPU_accum_1,%edx /* get the reduced num */297cmpl XsigH(%ebx),%edx /* denom */298jb LDo_3rd_div299300movl XsigH(%ebx),%edx301addl %edx,FPU_accum_1302303subl $1,FPU_result_2 /* Adjust the answer */304sbbl $0,FPU_result_3305jmp LDo_3rd_div306307LTest_over:308movl FPU_accum_1,%edx /* get the reduced num */309310/* need to check for possible subsequent overflow */311cmpl XsigH(%ebx),%edx /* denom */312jb LDo_3rd_div313314/* prevent overflow */315subl XsigH(%ebx),%edx316movl %edx,FPU_accum_1317318addl $1,FPU_result_2 /* Reflect the subtraction in the answer */319adcl $0,FPU_result_3320321LDo_3rd_div:322movl FPU_accum_0,%eax323movl FPU_accum_1,%edx324divl XsigH(%ebx)325326movl %eax,FPU_result_1 /* Rough estimate of third word */327328movl PARAM3,%esi /* pointer to answer */329330movl FPU_result_1,%eax331movl %eax,XsigLL(%esi)332movl FPU_result_2,%eax333movl %eax,XsigL(%esi)334movl FPU_result_3,%eax335movl %eax,XsigH(%esi)336337L_exit:338popl %ebx339popl %edi340popl %esi341342leave343RET344345346#ifdef PARANOID347/* The logic is wrong if we got here */348L_bugged:349pushl EX_INTERNAL|0x240350call EXCEPTION351pop %ebx352jmp L_exit353354L_bugged_1:355pushl EX_INTERNAL|0x241356call EXCEPTION357pop %ebx358jmp L_exit359360L_bugged_2:361pushl EX_INTERNAL|0x242362call EXCEPTION363pop %ebx364jmp L_exit365#endif /* PARANOID */366SYM_FUNC_END(div_Xsig)367368369