.file "div_Xsig.S"1/*---------------------------------------------------------------------------+2| div_Xsig.S |3| |4| Division subroutine for 96 bit quantities |5| |6| Copyright (C) 1994,1995 |7| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |8| Australia. E-mail [email protected] |9| |10| |11+---------------------------------------------------------------------------*/1213/*---------------------------------------------------------------------------+14| Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |15| put the 96 bit result at the location d. |16| |17| The result may not be accurate to 96 bits. It is intended for use where |18| a result better than 64 bits is required. The result should usually be |19| good to at least 94 bits. |20| The returned result is actually divided by one half. This is done to |21| prevent overflow. |22| |23| .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |24| |25| void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |26| |27+---------------------------------------------------------------------------*/2829#include "exception.h"30#include "fpu_emu.h"313233#define XsigLL(x) (x)34#define XsigL(x) 4(x)35#define XsigH(x) 8(x)363738#ifndef NON_REENTRANT_FPU39/*40Local storage on the stack:41Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_042*/43#define FPU_accum_3 -4(%ebp)44#define FPU_accum_2 -8(%ebp)45#define FPU_accum_1 -12(%ebp)46#define FPU_accum_0 -16(%ebp)47#define FPU_result_3 -20(%ebp)48#define FPU_result_2 -24(%ebp)49#define FPU_result_1 -28(%ebp)5051#else52.data53/*54Local storage in a static area:55Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_056*/57.align 4,058FPU_accum_3:59.long 060FPU_accum_2:61.long 062FPU_accum_1:63.long 064FPU_accum_0:65.long 066FPU_result_3:67.long 068FPU_result_2:69.long 070FPU_result_1:71.long 072#endif /* NON_REENTRANT_FPU */737475.text76ENTRY(div_Xsig)77pushl %ebp78movl %esp,%ebp79#ifndef NON_REENTRANT_FPU80subl $28,%esp81#endif /* NON_REENTRANT_FPU */8283pushl %esi84pushl %edi85pushl %ebx8687movl PARAM1,%esi /* pointer to num */88movl PARAM2,%ebx /* pointer to denom */8990#ifdef PARANOID91testl $0x80000000, XsigH(%ebx) /* Divisor */92je L_bugged93#endif /* PARANOID */949596/*---------------------------------------------------------------------------+97| Divide: Return arg1/arg2 to arg3. |98| |99| The maximum returned value is (ignoring exponents) |100| .ffffffff ffffffff |101| ------------------ = 1.ffffffff fffffffe |102| .80000000 00000000 |103| and the minimum is |104| .80000000 00000000 |105| ------------------ = .80000000 00000001 (rounded) |106| .ffffffff ffffffff |107| |108+---------------------------------------------------------------------------*/109110/* Save extended dividend in local register */111112/* Divide by 2 to prevent overflow */113clc114movl XsigH(%esi),%eax115rcrl %eax116movl %eax,FPU_accum_3117movl XsigL(%esi),%eax118rcrl %eax119movl %eax,FPU_accum_2120movl XsigLL(%esi),%eax121rcrl %eax122movl %eax,FPU_accum_1123movl $0,%eax124rcrl %eax125movl %eax,FPU_accum_0126127movl FPU_accum_2,%eax /* Get the current num */128movl FPU_accum_3,%edx129130/*----------------------------------------------------------------------*/131/* Initialization done.132Do the first 32 bits. */133134/* We will divide by a number which is too large */135movl XsigH(%ebx),%ecx136addl $1,%ecx137jnc LFirst_div_not_1138139/* here we need to divide by 100000000h,140i.e., no division at all.. */141mov %edx,%eax142jmp LFirst_div_done143144LFirst_div_not_1:145divl %ecx /* Divide the numerator by the augmented146denom ms dw */147148LFirst_div_done:149movl %eax,FPU_result_3 /* Put the result in the answer */150151mull XsigH(%ebx) /* mul by the ms dw of the denom */152153subl %eax,FPU_accum_2 /* Subtract from the num local reg */154sbbl %edx,FPU_accum_3155156movl FPU_result_3,%eax /* Get the result back */157mull XsigL(%ebx) /* now mul the ls dw of the denom */158159subl %eax,FPU_accum_1 /* Subtract from the num local reg */160sbbl %edx,FPU_accum_2161sbbl $0,FPU_accum_3162je LDo_2nd_32_bits /* Must check for non-zero result here */163164#ifdef PARANOID165jb L_bugged_1166#endif /* PARANOID */167168/* need to subtract another once of the denom */169incl FPU_result_3 /* Correct the answer */170171movl XsigL(%ebx),%eax172movl XsigH(%ebx),%edx173subl %eax,FPU_accum_1 /* Subtract from the num local reg */174sbbl %edx,FPU_accum_2175176#ifdef PARANOID177sbbl $0,FPU_accum_3178jne L_bugged_1 /* Must check for non-zero result here */179#endif /* PARANOID */180181/*----------------------------------------------------------------------*/182/* Half of the main problem is done, there is just a reduced numerator183to handle now.184Work with the second 32 bits, FPU_accum_0 not used from now on */185LDo_2nd_32_bits:186movl FPU_accum_2,%edx /* get the reduced num */187movl FPU_accum_1,%eax188189/* need to check for possible subsequent overflow */190cmpl XsigH(%ebx),%edx191jb LDo_2nd_div192ja LPrevent_2nd_overflow193194cmpl XsigL(%ebx),%eax195jb LDo_2nd_div196197LPrevent_2nd_overflow:198/* The numerator is greater or equal, would cause overflow */199/* prevent overflow */200subl XsigL(%ebx),%eax201sbbl XsigH(%ebx),%edx202movl %edx,FPU_accum_2203movl %eax,FPU_accum_1204205incl FPU_result_3 /* Reflect the subtraction in the answer */206207#ifdef PARANOID208je L_bugged_2 /* Can't bump the result to 1.0 */209#endif /* PARANOID */210211LDo_2nd_div:212cmpl $0,%ecx /* augmented denom msw */213jnz LSecond_div_not_1214215/* %ecx == 0, we are dividing by 1.0 */216mov %edx,%eax217jmp LSecond_div_done218219LSecond_div_not_1:220divl %ecx /* Divide the numerator by the denom ms dw */221222LSecond_div_done:223movl %eax,FPU_result_2 /* Put the result in the answer */224225mull XsigH(%ebx) /* mul by the ms dw of the denom */226227subl %eax,FPU_accum_1 /* Subtract from the num local reg */228sbbl %edx,FPU_accum_2229230#ifdef PARANOID231jc L_bugged_2232#endif /* PARANOID */233234movl FPU_result_2,%eax /* Get the result back */235mull XsigL(%ebx) /* now mul the ls dw of the denom */236237subl %eax,FPU_accum_0 /* Subtract from the num local reg */238sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */239sbbl $0,FPU_accum_2240241#ifdef PARANOID242jc L_bugged_2243#endif /* PARANOID */244245jz LDo_3rd_32_bits246247#ifdef PARANOID248cmpl $1,FPU_accum_2249jne L_bugged_2250#endif /* PARANOID */251252/* need to subtract another once of the denom */253movl XsigL(%ebx),%eax254movl XsigH(%ebx),%edx255subl %eax,FPU_accum_0 /* Subtract from the num local reg */256sbbl %edx,FPU_accum_1257sbbl $0,FPU_accum_2258259#ifdef PARANOID260jc L_bugged_2261jne L_bugged_2262#endif /* PARANOID */263264addl $1,FPU_result_2 /* Correct the answer */265adcl $0,FPU_result_3266267#ifdef PARANOID268jc L_bugged_2 /* Must check for non-zero result here */269#endif /* PARANOID */270271/*----------------------------------------------------------------------*/272/* The division is essentially finished here, we just need to perform273tidying operations.274Deal with the 3rd 32 bits */275LDo_3rd_32_bits:276/* We use an approximation for the third 32 bits.277To take account of the 3rd 32 bits of the divisor278(call them del), we subtract del * (a/b) */279280movl FPU_result_3,%eax /* a/b */281mull XsigLL(%ebx) /* del */282283subl %edx,FPU_accum_1284285/* A borrow indicates that the result is negative */286jnb LTest_over287288movl XsigH(%ebx),%edx289addl %edx,FPU_accum_1290291subl $1,FPU_result_2 /* Adjust the answer */292sbbl $0,FPU_result_3293294/* The above addition might not have been enough, check again. */295movl FPU_accum_1,%edx /* get the reduced num */296cmpl XsigH(%ebx),%edx /* denom */297jb LDo_3rd_div298299movl XsigH(%ebx),%edx300addl %edx,FPU_accum_1301302subl $1,FPU_result_2 /* Adjust the answer */303sbbl $0,FPU_result_3304jmp LDo_3rd_div305306LTest_over:307movl FPU_accum_1,%edx /* get the reduced num */308309/* need to check for possible subsequent overflow */310cmpl XsigH(%ebx),%edx /* denom */311jb LDo_3rd_div312313/* prevent overflow */314subl XsigH(%ebx),%edx315movl %edx,FPU_accum_1316317addl $1,FPU_result_2 /* Reflect the subtraction in the answer */318adcl $0,FPU_result_3319320LDo_3rd_div:321movl FPU_accum_0,%eax322movl FPU_accum_1,%edx323divl XsigH(%ebx)324325movl %eax,FPU_result_1 /* Rough estimate of third word */326327movl PARAM3,%esi /* pointer to answer */328329movl FPU_result_1,%eax330movl %eax,XsigLL(%esi)331movl FPU_result_2,%eax332movl %eax,XsigL(%esi)333movl FPU_result_3,%eax334movl %eax,XsigH(%esi)335336L_exit:337popl %ebx338popl %edi339popl %esi340341leave342ret343344345#ifdef PARANOID346/* The logic is wrong if we got here */347L_bugged:348pushl EX_INTERNAL|0x240349call EXCEPTION350pop %ebx351jmp L_exit352353L_bugged_1:354pushl EX_INTERNAL|0x241355call EXCEPTION356pop %ebx357jmp L_exit358359L_bugged_2:360pushl EX_INTERNAL|0x242361call EXCEPTION362pop %ebx363jmp L_exit364#endif /* PARANOID */365366367