/* SPDX-License-Identifier: GPL-2.0 */1.file "wm_sqrt.S"2/*---------------------------------------------------------------------------+3| wm_sqrt.S |4| |5| Fixed point arithmetic square root evaluation. |6| |7| Copyright (C) 1992,1993,1995,1997 |8| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |9| Australia. E-mail [email protected] |10| |11| Call from C as: |12| int wm_sqrt(FPU_REG *n, unsigned int control_word) |13| |14+---------------------------------------------------------------------------*/1516/*---------------------------------------------------------------------------+17| wm_sqrt(FPU_REG *n, unsigned int control_word) |18| returns the square root of n in n. |19| |20| Use Newton's method to compute the square root of a number, which must |21| be in the range [1.0 .. 4.0), to 64 bits accuracy. |22| Does not check the sign or tag of the argument. |23| Sets the exponent, but not the sign or tag of the result. |24| |25| The guess is kept in %esi:%edi |26+---------------------------------------------------------------------------*/2728#include "exception.h"29#include "fpu_emu.h"303132#ifndef NON_REENTRANT_FPU33/* Local storage on the stack: */34#define FPU_accum_3 -4(%ebp) /* ms word */35#define FPU_accum_2 -8(%ebp)36#define FPU_accum_1 -12(%ebp)37#define FPU_accum_0 -16(%ebp)3839/*40* The de-normalised argument:41* sq_2 sq_1 sq_042* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 043* ^ binary point here44*/45#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */46#define FPU_fsqrt_arg_1 -24(%ebp)47#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */4849#else50/* Local storage in a static area: */51.data52.align 4,053FPU_accum_3:54.long 0 /* ms word */55FPU_accum_2:56.long 057FPU_accum_1:58.long 059FPU_accum_0:60.long 06162/* The de-normalised argument:63sq_2 sq_1 sq_064b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 065^ binary point here66*/67FPU_fsqrt_arg_2:68.long 0 /* ms word */69FPU_fsqrt_arg_1:70.long 071FPU_fsqrt_arg_0:72.long 0 /* ls word, at most the ms bit is set */73#endif /* NON_REENTRANT_FPU */747576.text77SYM_FUNC_START(wm_sqrt)78pushl %ebp79movl %esp,%ebp80#ifndef NON_REENTRANT_FPU81subl $28,%esp82#endif /* NON_REENTRANT_FPU */83pushl %esi84pushl %edi85pushl %ebx8687movl PARAM1,%esi8889movl SIGH(%esi),%eax90movl SIGL(%esi),%ecx91xorl %edx,%edx9293/* We use a rough linear estimate for the first guess.. */9495cmpw EXP_BIAS,EXP(%esi)96jnz sqrt_arg_ge_29798shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */99rcrl $1,%ecx100rcrl $1,%edx101102sqrt_arg_ge_2:103/* From here on, n is never accessed directly again until it is104replaced by the answer. */105106movl %eax,FPU_fsqrt_arg_2 /* ms word of n */107movl %ecx,FPU_fsqrt_arg_1108movl %edx,FPU_fsqrt_arg_0109110/* Make a linear first estimate */111shrl $1,%eax112addl $0x40000000,%eax113movl $0xaaaaaaaa,%ecx114mull %ecx115shll %edx /* max result was 7fff... */116testl $0x80000000,%edx /* but min was 3fff... */117jnz sqrt_prelim_no_adjust118119movl $0x80000000,%edx /* round up */120121sqrt_prelim_no_adjust:122movl %edx,%esi /* Our first guess */123124/* We have now computed (approx) (2 + x) / 3, which forms the basis125for a few iterations of Newton's method */126127movl FPU_fsqrt_arg_2,%ecx /* ms word */128129/*130* From our initial estimate, three iterations are enough to get us131* to 30 bits or so. This will then allow two iterations at better132* precision to complete the process.133*/134135/* Compute (g + n/g)/2 at each iteration (g is the guess). */136shrl %ecx /* Doing this first will prevent a divide */137/* overflow later. */138139movl %ecx,%edx /* msw of the arg / 2 */140divl %esi /* current estimate */141shrl %esi /* divide by 2 */142addl %eax,%esi /* the new estimate */143144movl %ecx,%edx145divl %esi146shrl %esi147addl %eax,%esi148149movl %ecx,%edx150divl %esi151shrl %esi152addl %eax,%esi153154/*155* Now that an estimate accurate to about 30 bits has been obtained (in %esi),156* we improve it to 60 bits or so.157*158* The strategy from now on is to compute new estimates from159* guess := guess + (n - guess^2) / (2 * guess)160*/161162/* First, find the square of the guess */163movl %esi,%eax164mull %esi165/* guess^2 now in %edx:%eax */166167movl FPU_fsqrt_arg_1,%ecx168subl %ecx,%eax169movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */170sbbl %ecx,%edx171jnc sqrt_stage_2_positive172173/* Subtraction gives a negative result,174negate the result before division. */175notl %edx176notl %eax177addl $1,%eax178adcl $0,%edx179180divl %esi181movl %eax,%ecx182183movl %edx,%eax184divl %esi185jmp sqrt_stage_2_finish186187sqrt_stage_2_positive:188divl %esi189movl %eax,%ecx190191movl %edx,%eax192divl %esi193194notl %ecx195notl %eax196addl $1,%eax197adcl $0,%ecx198199sqrt_stage_2_finish:200sarl $1,%ecx /* divide by 2 */201rcrl $1,%eax202203/* Form the new estimate in %esi:%edi */204movl %eax,%edi205addl %ecx,%esi206207jnz sqrt_stage_2_done /* result should be [1..2) */208209#ifdef PARANOID210/* It should be possible to get here only if the arg is ffff....ffff */211cmpl $0xffffffff,FPU_fsqrt_arg_1212jnz sqrt_stage_2_error213#endif /* PARANOID */214215/* The best rounded result. */216xorl %eax,%eax217decl %eax218movl %eax,%edi219movl %eax,%esi220movl $0x7fffffff,%eax221jmp sqrt_round_result222223#ifdef PARANOID224sqrt_stage_2_error:225pushl EX_INTERNAL|0x213226call EXCEPTION227#endif /* PARANOID */228229sqrt_stage_2_done:230231/* Now the square root has been computed to better than 60 bits. */232233/* Find the square of the guess. */234movl %edi,%eax /* ls word of guess */235mull %edi236movl %edx,FPU_accum_1237238movl %esi,%eax239mull %esi240movl %edx,FPU_accum_3241movl %eax,FPU_accum_2242243movl %edi,%eax244mull %esi245addl %eax,FPU_accum_1246adcl %edx,FPU_accum_2247adcl $0,FPU_accum_3248249/* movl %esi,%eax */250/* mull %edi */251addl %eax,FPU_accum_1252adcl %edx,FPU_accum_2253adcl $0,FPU_accum_3254255/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */256257movl FPU_fsqrt_arg_0,%eax /* get normalized n */258subl %eax,FPU_accum_1259movl FPU_fsqrt_arg_1,%eax260sbbl %eax,FPU_accum_2261movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */262sbbl %eax,FPU_accum_3263jnc sqrt_stage_3_positive264265/* Subtraction gives a negative result,266negate the result before division */267notl FPU_accum_1268notl FPU_accum_2269notl FPU_accum_3270addl $1,FPU_accum_1271adcl $0,FPU_accum_2272273#ifdef PARANOID274adcl $0,FPU_accum_3 /* This must be zero */275jz sqrt_stage_3_no_error276277sqrt_stage_3_error:278pushl EX_INTERNAL|0x207279call EXCEPTION280281sqrt_stage_3_no_error:282#endif /* PARANOID */283284movl FPU_accum_2,%edx285movl FPU_accum_1,%eax286divl %esi287movl %eax,%ecx288289movl %edx,%eax290divl %esi291292sarl $1,%ecx /* divide by 2 */293rcrl $1,%eax294295/* prepare to round the result */296297addl %ecx,%edi298adcl $0,%esi299300jmp sqrt_stage_3_finished301302sqrt_stage_3_positive:303movl FPU_accum_2,%edx304movl FPU_accum_1,%eax305divl %esi306movl %eax,%ecx307308movl %edx,%eax309divl %esi310311sarl $1,%ecx /* divide by 2 */312rcrl $1,%eax313314/* prepare to round the result */315316notl %eax /* Negate the correction term */317notl %ecx318addl $1,%eax319adcl $0,%ecx /* carry here ==> correction == 0 */320adcl $0xffffffff,%esi321322addl %ecx,%edi323adcl $0,%esi324325sqrt_stage_3_finished:326327/*328* The result in %esi:%edi:%esi should be good to about 90 bits here,329* and the rounding information here does not have sufficient accuracy330* in a few rare cases.331*/332cmpl $0xffffffe0,%eax333ja sqrt_near_exact_x334335cmpl $0x00000020,%eax336jb sqrt_near_exact337338cmpl $0x7fffffe0,%eax339jb sqrt_round_result340341cmpl $0x80000020,%eax342jb sqrt_get_more_precision343344sqrt_round_result:345/* Set up for rounding operations */346movl %eax,%edx347movl %esi,%eax348movl %edi,%ebx349movl PARAM1,%edi350movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */351jmp fpu_reg_round352353354sqrt_near_exact_x:355/* First, the estimate must be rounded up. */356addl $1,%edi357adcl $0,%esi358359sqrt_near_exact:360/*361* This is an easy case because x^1/2 is monotonic.362* We need just find the square of our estimate, compare it363* with the argument, and deduce whether our estimate is364* above, below, or exact. We use the fact that the estimate365* is known to be accurate to about 90 bits.366*/367movl %edi,%eax /* ls word of guess */368mull %edi369movl %edx,%ebx /* 2nd ls word of square */370movl %eax,%ecx /* ls word of square */371372movl %edi,%eax373mull %esi374addl %eax,%ebx375addl %eax,%ebx376377#ifdef PARANOID378cmp $0xffffffb0,%ebx379jb sqrt_near_exact_ok380381cmp $0x00000050,%ebx382ja sqrt_near_exact_ok383384pushl EX_INTERNAL|0x214385call EXCEPTION386387sqrt_near_exact_ok:388#endif /* PARANOID */389390or %ebx,%ebx391js sqrt_near_exact_small392393jnz sqrt_near_exact_large394395or %ebx,%edx396jnz sqrt_near_exact_large397398/* Our estimate is exactly the right answer */399xorl %eax,%eax400jmp sqrt_round_result401402sqrt_near_exact_small:403/* Our estimate is too small */404movl $0x000000ff,%eax405jmp sqrt_round_result406407sqrt_near_exact_large:408/* Our estimate is too large, we need to decrement it */409subl $1,%edi410sbbl $0,%esi411movl $0xffffff00,%eax412jmp sqrt_round_result413414415sqrt_get_more_precision:416/* This case is almost the same as the above, except we start417with an extra bit of precision in the estimate. */418stc /* The extra bit. */419rcll $1,%edi /* Shift the estimate left one bit */420rcll $1,%esi421422movl %edi,%eax /* ls word of guess */423mull %edi424movl %edx,%ebx /* 2nd ls word of square */425movl %eax,%ecx /* ls word of square */426427movl %edi,%eax428mull %esi429addl %eax,%ebx430addl %eax,%ebx431432/* Put our estimate back to its original value */433stc /* The ms bit. */434rcrl $1,%esi /* Shift the estimate left one bit */435rcrl $1,%edi436437#ifdef PARANOID438cmp $0xffffff60,%ebx439jb sqrt_more_prec_ok440441cmp $0x000000a0,%ebx442ja sqrt_more_prec_ok443444pushl EX_INTERNAL|0x215445call EXCEPTION446447sqrt_more_prec_ok:448#endif /* PARANOID */449450or %ebx,%ebx451js sqrt_more_prec_small452453jnz sqrt_more_prec_large454455or %ebx,%ecx456jnz sqrt_more_prec_large457458/* Our estimate is exactly the right answer */459movl $0x80000000,%eax460jmp sqrt_round_result461462sqrt_more_prec_small:463/* Our estimate is too small */464movl $0x800000ff,%eax465jmp sqrt_round_result466467sqrt_more_prec_large:468/* Our estimate is too large */469movl $0x7fffff00,%eax470jmp sqrt_round_result471SYM_FUNC_END(wm_sqrt)472473474