/* IEEE754 floating point arithmetic1* double precision square root2*/3/*4* MIPS floating point support5* Copyright (C) 1994-2000 Algorithmics Ltd.6*7* ########################################################################8*9* This program is free software; you can distribute it and/or modify it10* under the terms of the GNU General Public License (Version 2) as11* published by the Free Software Foundation.12*13* This program is distributed in the hope it will be useful, but WITHOUT14* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or15* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License16* for more details.17*18* You should have received a copy of the GNU General Public License along19* with this program; if not, write to the Free Software Foundation, Inc.,20* 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.21*22* ########################################################################23*/242526#include "ieee754dp.h"2728static const unsigned table[] = {290, 1204, 3062, 5746, 9193, 13348, 18162, 23592,3029598, 36145, 43202, 50740, 58733, 67158, 75992,3185215, 83599, 71378, 60428, 50647, 41945, 34246,3227478, 21581, 16499, 12183, 8588, 5674, 3403,331742, 661, 13034};3536ieee754dp ieee754dp_sqrt(ieee754dp x)37{38struct _ieee754_csr oldcsr;39ieee754dp y, z, t;40unsigned scalx, yh;41COMPXDP;4243EXPLODEXDP;44CLEARCX;45FLUSHXDP;4647/* x == INF or NAN? */48switch (xc) {49case IEEE754_CLASS_QNAN:50/* sqrt(Nan) = Nan */51return ieee754dp_nanxcpt(x, "sqrt");52case IEEE754_CLASS_SNAN:53SETCX(IEEE754_INVALID_OPERATION);54return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt");55case IEEE754_CLASS_ZERO:56/* sqrt(0) = 0 */57return x;58case IEEE754_CLASS_INF:59if (xs) {60/* sqrt(-Inf) = Nan */61SETCX(IEEE754_INVALID_OPERATION);62return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt");63}64/* sqrt(+Inf) = Inf */65return x;66case IEEE754_CLASS_DNORM:67DPDNORMX;68/* fall through */69case IEEE754_CLASS_NORM:70if (xs) {71/* sqrt(-x) = Nan */72SETCX(IEEE754_INVALID_OPERATION);73return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt");74}75break;76}7778/* save old csr; switch off INX enable & flag; set RN rounding */79oldcsr = ieee754_csr;80ieee754_csr.mx &= ~IEEE754_INEXACT;81ieee754_csr.sx &= ~IEEE754_INEXACT;82ieee754_csr.rm = IEEE754_RN;8384/* adjust exponent to prevent overflow */85scalx = 0;86if (xe > 512) { /* x > 2**-512? */87xe -= 512; /* x = x / 2**512 */88scalx += 256;89} else if (xe < -512) { /* x < 2**-512? */90xe += 512; /* x = x * 2**512 */91scalx -= 256;92}9394y = x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);9596/* magic initial approximation to almost 8 sig. bits */97yh = y.bits >> 32;98yh = (yh >> 1) + 0x1ff80000;99yh = yh - table[(yh >> 15) & 31];100y.bits = ((u64) yh << 32) | (y.bits & 0xffffffff);101102/* Heron's rule once with correction to improve to ~18 sig. bits */103/* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */104t = ieee754dp_div(x, y);105y = ieee754dp_add(y, t);106y.bits -= 0x0010000600000000LL;107y.bits &= 0xffffffff00000000LL;108109/* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */110/* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */111z = t = ieee754dp_mul(y, y);112t.parts.bexp += 0x001;113t = ieee754dp_add(t, z);114z = ieee754dp_mul(ieee754dp_sub(x, z), y);115116/* t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; */117t = ieee754dp_div(z, ieee754dp_add(t, x));118t.parts.bexp += 0x001;119y = ieee754dp_add(y, t);120121/* twiddle last bit to force y correctly rounded */122123/* set RZ, clear INEX flag */124ieee754_csr.rm = IEEE754_RZ;125ieee754_csr.sx &= ~IEEE754_INEXACT;126127/* t=x/y; ...chopped quotient, possibly inexact */128t = ieee754dp_div(x, y);129130if (ieee754_csr.sx & IEEE754_INEXACT || t.bits != y.bits) {131132if (!(ieee754_csr.sx & IEEE754_INEXACT))133/* t = t-ulp */134t.bits -= 1;135136/* add inexact to result status */137oldcsr.cx |= IEEE754_INEXACT;138oldcsr.sx |= IEEE754_INEXACT;139140switch (oldcsr.rm) {141case IEEE754_RP:142y.bits += 1;143/* drop through */144case IEEE754_RN:145t.bits += 1;146break;147}148149/* y=y+t; ...chopped sum */150y = ieee754dp_add(y, t);151152/* adjust scalx for correctly rounded sqrt(x) */153scalx -= 1;154}155156/* py[n0]=py[n0]+scalx; ...scale back y */157y.parts.bexp += scalx;158159/* restore rounding mode, possibly set inexact */160ieee754_csr = oldcsr;161162return y;163}164165166