.file "wm_sqrt.S"1/*---------------------------------------------------------------------------+2| wm_sqrt.S |3| |4| Fixed point arithmetic square root evaluation. |5| |6| Copyright (C) 1992,1993,1995,1997 |7| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |8| Australia. E-mail [email protected] |9| |10| Call from C as: |11| int wm_sqrt(FPU_REG *n, unsigned int control_word) |12| |13+---------------------------------------------------------------------------*/1415/*---------------------------------------------------------------------------+16| wm_sqrt(FPU_REG *n, unsigned int control_word) |17| returns the square root of n in n. |18| |19| Use Newton's method to compute the square root of a number, which must |20| be in the range [1.0 .. 4.0), to 64 bits accuracy. |21| Does not check the sign or tag of the argument. |22| Sets the exponent, but not the sign or tag of the result. |23| |24| The guess is kept in %esi:%edi |25+---------------------------------------------------------------------------*/2627#include "exception.h"28#include "fpu_emu.h"293031#ifndef NON_REENTRANT_FPU32/* Local storage on the stack: */33#define FPU_accum_3 -4(%ebp) /* ms word */34#define FPU_accum_2 -8(%ebp)35#define FPU_accum_1 -12(%ebp)36#define FPU_accum_0 -16(%ebp)3738/*39* The de-normalised argument:40* sq_2 sq_1 sq_041* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 042* ^ binary point here43*/44#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */45#define FPU_fsqrt_arg_1 -24(%ebp)46#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */4748#else49/* Local storage in a static area: */50.data51.align 4,052FPU_accum_3:53.long 0 /* ms word */54FPU_accum_2:55.long 056FPU_accum_1:57.long 058FPU_accum_0:59.long 06061/* The de-normalised argument:62sq_2 sq_1 sq_063b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 064^ binary point here65*/66FPU_fsqrt_arg_2:67.long 0 /* ms word */68FPU_fsqrt_arg_1:69.long 070FPU_fsqrt_arg_0:71.long 0 /* ls word, at most the ms bit is set */72#endif /* NON_REENTRANT_FPU */737475.text76ENTRY(wm_sqrt)77pushl %ebp78movl %esp,%ebp79#ifndef NON_REENTRANT_FPU80subl $28,%esp81#endif /* NON_REENTRANT_FPU */82pushl %esi83pushl %edi84pushl %ebx8586movl PARAM1,%esi8788movl SIGH(%esi),%eax89movl SIGL(%esi),%ecx90xorl %edx,%edx9192/* We use a rough linear estimate for the first guess.. */9394cmpw EXP_BIAS,EXP(%esi)95jnz sqrt_arg_ge_29697shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */98rcrl $1,%ecx99rcrl $1,%edx100101sqrt_arg_ge_2:102/* From here on, n is never accessed directly again until it is103replaced by the answer. */104105movl %eax,FPU_fsqrt_arg_2 /* ms word of n */106movl %ecx,FPU_fsqrt_arg_1107movl %edx,FPU_fsqrt_arg_0108109/* Make a linear first estimate */110shrl $1,%eax111addl $0x40000000,%eax112movl $0xaaaaaaaa,%ecx113mull %ecx114shll %edx /* max result was 7fff... */115testl $0x80000000,%edx /* but min was 3fff... */116jnz sqrt_prelim_no_adjust117118movl $0x80000000,%edx /* round up */119120sqrt_prelim_no_adjust:121movl %edx,%esi /* Our first guess */122123/* We have now computed (approx) (2 + x) / 3, which forms the basis124for a few iterations of Newton's method */125126movl FPU_fsqrt_arg_2,%ecx /* ms word */127128/*129* From our initial estimate, three iterations are enough to get us130* to 30 bits or so. This will then allow two iterations at better131* precision to complete the process.132*/133134/* Compute (g + n/g)/2 at each iteration (g is the guess). */135shrl %ecx /* Doing this first will prevent a divide */136/* overflow later. */137138movl %ecx,%edx /* msw of the arg / 2 */139divl %esi /* current estimate */140shrl %esi /* divide by 2 */141addl %eax,%esi /* the new estimate */142143movl %ecx,%edx144divl %esi145shrl %esi146addl %eax,%esi147148movl %ecx,%edx149divl %esi150shrl %esi151addl %eax,%esi152153/*154* Now that an estimate accurate to about 30 bits has been obtained (in %esi),155* we improve it to 60 bits or so.156*157* The strategy from now on is to compute new estimates from158* guess := guess + (n - guess^2) / (2 * guess)159*/160161/* First, find the square of the guess */162movl %esi,%eax163mull %esi164/* guess^2 now in %edx:%eax */165166movl FPU_fsqrt_arg_1,%ecx167subl %ecx,%eax168movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */169sbbl %ecx,%edx170jnc sqrt_stage_2_positive171172/* Subtraction gives a negative result,173negate the result before division. */174notl %edx175notl %eax176addl $1,%eax177adcl $0,%edx178179divl %esi180movl %eax,%ecx181182movl %edx,%eax183divl %esi184jmp sqrt_stage_2_finish185186sqrt_stage_2_positive:187divl %esi188movl %eax,%ecx189190movl %edx,%eax191divl %esi192193notl %ecx194notl %eax195addl $1,%eax196adcl $0,%ecx197198sqrt_stage_2_finish:199sarl $1,%ecx /* divide by 2 */200rcrl $1,%eax201202/* Form the new estimate in %esi:%edi */203movl %eax,%edi204addl %ecx,%esi205206jnz sqrt_stage_2_done /* result should be [1..2) */207208#ifdef PARANOID209/* It should be possible to get here only if the arg is ffff....ffff */210cmp $0xffffffff,FPU_fsqrt_arg_1211jnz sqrt_stage_2_error212#endif /* PARANOID */213214/* The best rounded result. */215xorl %eax,%eax216decl %eax217movl %eax,%edi218movl %eax,%esi219movl $0x7fffffff,%eax220jmp sqrt_round_result221222#ifdef PARANOID223sqrt_stage_2_error:224pushl EX_INTERNAL|0x213225call EXCEPTION226#endif /* PARANOID */227228sqrt_stage_2_done:229230/* Now the square root has been computed to better than 60 bits. */231232/* Find the square of the guess. */233movl %edi,%eax /* ls word of guess */234mull %edi235movl %edx,FPU_accum_1236237movl %esi,%eax238mull %esi239movl %edx,FPU_accum_3240movl %eax,FPU_accum_2241242movl %edi,%eax243mull %esi244addl %eax,FPU_accum_1245adcl %edx,FPU_accum_2246adcl $0,FPU_accum_3247248/* movl %esi,%eax */249/* mull %edi */250addl %eax,FPU_accum_1251adcl %edx,FPU_accum_2252adcl $0,FPU_accum_3253254/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */255256movl FPU_fsqrt_arg_0,%eax /* get normalized n */257subl %eax,FPU_accum_1258movl FPU_fsqrt_arg_1,%eax259sbbl %eax,FPU_accum_2260movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */261sbbl %eax,FPU_accum_3262jnc sqrt_stage_3_positive263264/* Subtraction gives a negative result,265negate the result before division */266notl FPU_accum_1267notl FPU_accum_2268notl FPU_accum_3269addl $1,FPU_accum_1270adcl $0,FPU_accum_2271272#ifdef PARANOID273adcl $0,FPU_accum_3 /* This must be zero */274jz sqrt_stage_3_no_error275276sqrt_stage_3_error:277pushl EX_INTERNAL|0x207278call EXCEPTION279280sqrt_stage_3_no_error:281#endif /* PARANOID */282283movl FPU_accum_2,%edx284movl FPU_accum_1,%eax285divl %esi286movl %eax,%ecx287288movl %edx,%eax289divl %esi290291sarl $1,%ecx /* divide by 2 */292rcrl $1,%eax293294/* prepare to round the result */295296addl %ecx,%edi297adcl $0,%esi298299jmp sqrt_stage_3_finished300301sqrt_stage_3_positive:302movl FPU_accum_2,%edx303movl FPU_accum_1,%eax304divl %esi305movl %eax,%ecx306307movl %edx,%eax308divl %esi309310sarl $1,%ecx /* divide by 2 */311rcrl $1,%eax312313/* prepare to round the result */314315notl %eax /* Negate the correction term */316notl %ecx317addl $1,%eax318adcl $0,%ecx /* carry here ==> correction == 0 */319adcl $0xffffffff,%esi320321addl %ecx,%edi322adcl $0,%esi323324sqrt_stage_3_finished:325326/*327* The result in %esi:%edi:%esi should be good to about 90 bits here,328* and the rounding information here does not have sufficient accuracy329* in a few rare cases.330*/331cmpl $0xffffffe0,%eax332ja sqrt_near_exact_x333334cmpl $0x00000020,%eax335jb sqrt_near_exact336337cmpl $0x7fffffe0,%eax338jb sqrt_round_result339340cmpl $0x80000020,%eax341jb sqrt_get_more_precision342343sqrt_round_result:344/* Set up for rounding operations */345movl %eax,%edx346movl %esi,%eax347movl %edi,%ebx348movl PARAM1,%edi349movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */350jmp fpu_reg_round351352353sqrt_near_exact_x:354/* First, the estimate must be rounded up. */355addl $1,%edi356adcl $0,%esi357358sqrt_near_exact:359/*360* This is an easy case because x^1/2 is monotonic.361* We need just find the square of our estimate, compare it362* with the argument, and deduce whether our estimate is363* above, below, or exact. We use the fact that the estimate364* is known to be accurate to about 90 bits.365*/366movl %edi,%eax /* ls word of guess */367mull %edi368movl %edx,%ebx /* 2nd ls word of square */369movl %eax,%ecx /* ls word of square */370371movl %edi,%eax372mull %esi373addl %eax,%ebx374addl %eax,%ebx375376#ifdef PARANOID377cmp $0xffffffb0,%ebx378jb sqrt_near_exact_ok379380cmp $0x00000050,%ebx381ja sqrt_near_exact_ok382383pushl EX_INTERNAL|0x214384call EXCEPTION385386sqrt_near_exact_ok:387#endif /* PARANOID */388389or %ebx,%ebx390js sqrt_near_exact_small391392jnz sqrt_near_exact_large393394or %ebx,%edx395jnz sqrt_near_exact_large396397/* Our estimate is exactly the right answer */398xorl %eax,%eax399jmp sqrt_round_result400401sqrt_near_exact_small:402/* Our estimate is too small */403movl $0x000000ff,%eax404jmp sqrt_round_result405406sqrt_near_exact_large:407/* Our estimate is too large, we need to decrement it */408subl $1,%edi409sbbl $0,%esi410movl $0xffffff00,%eax411jmp sqrt_round_result412413414sqrt_get_more_precision:415/* This case is almost the same as the above, except we start416with an extra bit of precision in the estimate. */417stc /* The extra bit. */418rcll $1,%edi /* Shift the estimate left one bit */419rcll $1,%esi420421movl %edi,%eax /* ls word of guess */422mull %edi423movl %edx,%ebx /* 2nd ls word of square */424movl %eax,%ecx /* ls word of square */425426movl %edi,%eax427mull %esi428addl %eax,%ebx429addl %eax,%ebx430431/* Put our estimate back to its original value */432stc /* The ms bit. */433rcrl $1,%esi /* Shift the estimate left one bit */434rcrl $1,%edi435436#ifdef PARANOID437cmp $0xffffff60,%ebx438jb sqrt_more_prec_ok439440cmp $0x000000a0,%ebx441ja sqrt_more_prec_ok442443pushl EX_INTERNAL|0x215444call EXCEPTION445446sqrt_more_prec_ok:447#endif /* PARANOID */448449or %ebx,%ebx450js sqrt_more_prec_small451452jnz sqrt_more_prec_large453454or %ebx,%ecx455jnz sqrt_more_prec_large456457/* Our estimate is exactly the right answer */458movl $0x80000000,%eax459jmp sqrt_round_result460461sqrt_more_prec_small:462/* Our estimate is too small */463movl $0x800000ff,%eax464jmp sqrt_round_result465466sqrt_more_prec_large:467/* Our estimate is too large */468movl $0x7fffff00,%eax469jmp sqrt_round_result470471472