Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/sdl/libm/e_sqrt.c
9905 views
1
#include "SDL_internal.h"
2
/*
3
* ====================================================
4
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5
*
6
* Developed at SunPro, a Sun Microsystems, Inc. business.
7
* Permission to use, copy, modify, and distribute this
8
* software is freely granted, provided that this notice
9
* is preserved.
10
* ====================================================
11
*/
12
13
/* __ieee754_sqrt(x)
14
* Return correctly rounded sqrt.
15
* ------------------------------------------
16
* | Use the hardware sqrt if you have one |
17
* ------------------------------------------
18
* Method:
19
* Bit by bit method using integer arithmetic. (Slow, but portable)
20
* 1. Normalization
21
* Scale x to y in [1,4) with even powers of 2:
22
* find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
23
* sqrt(x) = 2^k * sqrt(y)
24
* 2. Bit by bit computation
25
* Let q = sqrt(y) truncated to i bit after binary point (q = 1),
26
* i 0
27
* i+1 2
28
* s = 2*q , and y = 2 * ( y - q ). (1)
29
* i i i i
30
*
31
* To compute q from q , one checks whether
32
* i+1 i
33
*
34
* -(i+1) 2
35
* (q + 2 ) <= y. (2)
36
* i
37
* -(i+1)
38
* If (2) is false, then q = q ; otherwise q = q + 2 .
39
* i+1 i i+1 i
40
*
41
* With some algebric manipulation, it is not difficult to see
42
* that (2) is equivalent to
43
* -(i+1)
44
* s + 2 <= y (3)
45
* i i
46
*
47
* The advantage of (3) is that s and y can be computed by
48
* i i
49
* the following recurrence formula:
50
* if (3) is false
51
*
52
* s = s , y = y ; (4)
53
* i+1 i i+1 i
54
*
55
* otherwise,
56
* -i -(i+1)
57
* s = s + 2 , y = y - s - 2 (5)
58
* i+1 i i+1 i i
59
*
60
* One may easily use induction to prove (4) and (5).
61
* Note. Since the left hand side of (3) contain only i+2 bits,
62
* it does not necessary to do a full (53-bit) comparison
63
* in (3).
64
* 3. Final rounding
65
* After generating the 53 bits result, we compute one more bit.
66
* Together with the remainder, we can decide whether the
67
* result is exact, bigger than 1/2ulp, or less than 1/2ulp
68
* (it will never equal to 1/2ulp).
69
* The rounding mode can be detected by checking whether
70
* huge + tiny is equal to huge, and whether huge - tiny is
71
* equal to huge for some floating point number "huge" and "tiny".
72
*
73
* Special cases:
74
* sqrt(+-0) = +-0 ... exact
75
* sqrt(inf) = inf
76
* sqrt(-ve) = NaN ... with invalid signal
77
* sqrt(NaN) = NaN ... with invalid signal for signaling NaN
78
*
79
* Other methods : see the appended file at the end of the program below.
80
*---------------
81
*/
82
83
#include "math_libm.h"
84
#include "math_private.h"
85
86
static const double one = 1.0, tiny = 1.0e-300;
87
88
double attribute_hidden __ieee754_sqrt(double x)
89
{
90
double z;
91
int32_t sign = (int)0x80000000;
92
int32_t ix0,s0,q,m,t,i;
93
u_int32_t r,t1,s1,ix1,q1;
94
95
EXTRACT_WORDS(ix0,ix1,x);
96
97
/* take care of Inf and NaN */
98
if((ix0&0x7ff00000)==0x7ff00000) {
99
return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
100
sqrt(-inf)=sNaN */
101
}
102
/* take care of zero */
103
if(ix0<=0) {
104
if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
105
else if(ix0<0)
106
return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
107
}
108
/* normalize x */
109
m = (ix0>>20);
110
if(m==0) { /* subnormal x */
111
while(ix0==0) {
112
m -= 21;
113
ix0 |= (ix1>>11); ix1 <<= 21;
114
}
115
for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
116
m -= i-1;
117
ix0 |= (ix1>>(32-i));
118
ix1 <<= i;
119
}
120
m -= 1023; /* unbias exponent */
121
ix0 = (ix0&0x000fffff)|0x00100000;
122
if(m&1){ /* odd m, double x to make it even */
123
ix0 += ix0 + ((ix1&sign)>>31);
124
ix1 += ix1;
125
}
126
m >>= 1; /* m = [m/2] */
127
128
/* generate sqrt(x) bit by bit */
129
ix0 += ix0 + ((ix1&sign)>>31);
130
ix1 += ix1;
131
q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
132
r = 0x00200000; /* r = moving bit from right to left */
133
134
while(r!=0) {
135
t = s0+r;
136
if(t<=ix0) {
137
s0 = t+r;
138
ix0 -= t;
139
q += r;
140
}
141
ix0 += ix0 + ((ix1&sign)>>31);
142
ix1 += ix1;
143
r>>=1;
144
}
145
146
r = sign;
147
while(r!=0) {
148
t1 = s1+r;
149
t = s0;
150
if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
151
s1 = t1+r;
152
if(((t1&sign)==(u_int32_t)sign)&&(s1&sign)==0) s0 += 1;
153
ix0 -= t;
154
if (ix1 < t1) ix0 -= 1;
155
ix1 -= t1;
156
q1 += r;
157
}
158
ix0 += ix0 + ((ix1&sign)>>31);
159
ix1 += ix1;
160
r>>=1;
161
}
162
163
/* use floating add to find out rounding direction */
164
if((ix0|ix1)!=0) {
165
z = one-tiny; /* trigger inexact flag */
166
if (z>=one) {
167
z = one+tiny;
168
if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
169
else if (z>one) {
170
if (q1==(u_int32_t)0xfffffffe) q+=1;
171
q1+=2;
172
} else
173
q1 += (q1&1);
174
}
175
}
176
ix0 = (q>>1)+0x3fe00000;
177
ix1 = q1>>1;
178
if ((q&1)==1) ix1 |= sign;
179
ix0 += (m <<20);
180
INSERT_WORDS(z,ix0,ix1);
181
return z;
182
}
183
184
/*
185
* wrapper sqrt(x)
186
*/
187
#ifndef _IEEE_LIBM
188
double sqrt(double x)
189
{
190
double z = __ieee754_sqrt(x);
191
if (_LIB_VERSION == _IEEE_ || isnan(x))
192
return z;
193
if (x < 0.0)
194
return __kernel_standard(x, x, 26); /* sqrt(negative) */
195
return z;
196
}
197
#else
198
strong_alias(__ieee754_sqrt, sqrt)
199
#endif
200
libm_hidden_def(sqrt)
201
202
203
/*
204
Other methods (use floating-point arithmetic)
205
-------------
206
(This is a copy of a drafted paper by Prof W. Kahan
207
and K.C. Ng, written in May, 1986)
208
209
Two algorithms are given here to implement sqrt(x)
210
(IEEE double precision arithmetic) in software.
211
Both supply sqrt(x) correctly rounded. The first algorithm (in
212
Section A) uses newton iterations and involves four divisions.
213
The second one uses reciproot iterations to avoid division, but
214
requires more multiplications. Both algorithms need the ability
215
to chop results of arithmetic operations instead of round them,
216
and the INEXACT flag to indicate when an arithmetic operation
217
is executed exactly with no roundoff error, all part of the
218
standard (IEEE 754-1985). The ability to perform shift, add,
219
subtract and logical AND operations upon 32-bit words is needed
220
too, though not part of the standard.
221
222
A. sqrt(x) by Newton Iteration
223
224
(1) Initial approximation
225
226
Let x0 and x1 be the leading and the trailing 32-bit words of
227
a floating point number x (in IEEE double format) respectively
228
229
1 11 52 ...widths
230
------------------------------------------------------
231
x: |s| e | f |
232
------------------------------------------------------
233
msb lsb msb lsb ...order
234
235
236
------------------------ ------------------------
237
x0: |s| e | f1 | x1: | f2 |
238
------------------------ ------------------------
239
240
By performing shifts and subtracts on x0 and x1 (both regarded
241
as integers), we obtain an 8-bit approximation of sqrt(x) as
242
follows.
243
244
k := (x0>>1) + 0x1ff80000;
245
y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits
246
Here k is a 32-bit integer and T1[] is an integer array containing
247
correction terms. Now magically the floating value of y (y's
248
leading 32-bit word is y0, the value of its trailing word is 0)
249
approximates sqrt(x) to almost 8-bit.
250
251
Value of T1:
252
static int T1[32]= {
253
0, 1024, 3062, 5746, 9193, 13348, 18162, 23592,
254
29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215,
255
83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581,
256
16499, 12183, 8588, 5674, 3403, 1742, 661, 130,};
257
258
(2) Iterative refinement
259
260
Apply Heron's rule three times to y, we have y approximates
261
sqrt(x) to within 1 ulp (Unit in the Last Place):
262
263
y := (y+x/y)/2 ... almost 17 sig. bits
264
y := (y+x/y)/2 ... almost 35 sig. bits
265
y := y-(y-x/y)/2 ... within 1 ulp
266
267
268
Remark 1.
269
Another way to improve y to within 1 ulp is:
270
271
y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x)
272
y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x)
273
274
2
275
(x-y )*y
276
y := y + 2* ---------- ...within 1 ulp
277
2
278
3y + x
279
280
281
This formula has one division fewer than the one above; however,
282
it requires more multiplications and additions. Also x must be
283
scaled in advance to avoid spurious overflow in evaluating the
284
expression 3y*y+x. Hence it is not recommended uless division
285
is slow. If division is very slow, then one should use the
286
reciproot algorithm given in section B.
287
288
(3) Final adjustment
289
290
By twiddling y's last bit it is possible to force y to be
291
correctly rounded according to the prevailing rounding mode
292
as follows. Let r and i be copies of the rounding mode and
293
inexact flag before entering the square root program. Also we
294
use the expression y+-ulp for the next representable floating
295
numbers (up and down) of y. Note that y+-ulp = either fixed
296
point y+-1, or multiply y by nextafter(1,+-inf) in chopped
297
mode.
298
299
I := FALSE; ... reset INEXACT flag I
300
R := RZ; ... set rounding mode to round-toward-zero
301
z := x/y; ... chopped quotient, possibly inexact
302
If(not I) then { ... if the quotient is exact
303
if(z=y) {
304
I := i; ... restore inexact flag
305
R := r; ... restore rounded mode
306
return sqrt(x):=y.
307
} else {
308
z := z - ulp; ... special rounding
309
}
310
}
311
i := TRUE; ... sqrt(x) is inexact
312
If (r=RN) then z=z+ulp ... rounded-to-nearest
313
If (r=RP) then { ... round-toward-+inf
314
y = y+ulp; z=z+ulp;
315
}
316
y := y+z; ... chopped sum
317
y0:=y0-0x00100000; ... y := y/2 is correctly rounded.
318
I := i; ... restore inexact flag
319
R := r; ... restore rounded mode
320
return sqrt(x):=y.
321
322
(4) Special cases
323
324
Square root of +inf, +-0, or NaN is itself;
325
Square root of a negative number is NaN with invalid signal.
326
327
328
B. sqrt(x) by Reciproot Iteration
329
330
(1) Initial approximation
331
332
Let x0 and x1 be the leading and the trailing 32-bit words of
333
a floating point number x (in IEEE double format) respectively
334
(see section A). By performing shifs and subtracts on x0 and y0,
335
we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
336
337
k := 0x5fe80000 - (x0>>1);
338
y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits
339
340
Here k is a 32-bit integer and T2[] is an integer array
341
containing correction terms. Now magically the floating
342
value of y (y's leading 32-bit word is y0, the value of
343
its trailing word y1 is set to zero) approximates 1/sqrt(x)
344
to almost 7.8-bit.
345
346
Value of T2:
347
static int T2[64]= {
348
0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
349
0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
350
0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
351
0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
352
0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
353
0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
354
0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
355
0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
356
357
(2) Iterative refinement
358
359
Apply Reciproot iteration three times to y and multiply the
360
result by x to get an approximation z that matches sqrt(x)
361
to about 1 ulp. To be exact, we will have
362
-1ulp < sqrt(x)-z<1.0625ulp.
363
364
... set rounding mode to Round-to-nearest
365
y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x)
366
y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
367
... special arrangement for better accuracy
368
z := x*y ... 29 bits to sqrt(x), with z*y<1
369
z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x)
370
371
Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
372
(a) the term z*y in the final iteration is always less than 1;
373
(b) the error in the final result is biased upward so that
374
-1 ulp < sqrt(x) - z < 1.0625 ulp
375
instead of |sqrt(x)-z|<1.03125ulp.
376
377
(3) Final adjustment
378
379
By twiddling y's last bit it is possible to force y to be
380
correctly rounded according to the prevailing rounding mode
381
as follows. Let r and i be copies of the rounding mode and
382
inexact flag before entering the square root program. Also we
383
use the expression y+-ulp for the next representable floating
384
numbers (up and down) of y. Note that y+-ulp = either fixed
385
point y+-1, or multiply y by nextafter(1,+-inf) in chopped
386
mode.
387
388
R := RZ; ... set rounding mode to round-toward-zero
389
switch(r) {
390
case RN: ... round-to-nearest
391
if(x<= z*(z-ulp)...chopped) z = z - ulp; else
392
if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
393
break;
394
case RZ:case RM: ... round-to-zero or round-to--inf
395
R:=RP; ... reset rounding mod to round-to-+inf
396
if(x<z*z ... rounded up) z = z - ulp; else
397
if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
398
break;
399
case RP: ... round-to-+inf
400
if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
401
if(x>z*z ...chopped) z = z+ulp;
402
break;
403
}
404
405
Remark 3. The above comparisons can be done in fixed point. For
406
example, to compare x and w=z*z chopped, it suffices to compare
407
x1 and w1 (the trailing parts of x and w), regarding them as
408
two's complement integers.
409
410
...Is z an exact square root?
411
To determine whether z is an exact square root of x, let z1 be the
412
trailing part of z, and also let x0 and x1 be the leading and
413
trailing parts of x.
414
415
If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
416
I := 1; ... Raise Inexact flag: z is not exact
417
else {
418
j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2
419
k := z1 >> 26; ... get z's 25-th and 26-th
420
fraction bits
421
I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
422
}
423
R:= r ... restore rounded mode
424
return sqrt(x):=z.
425
426
If multiplication is cheaper then the foregoing red tape, the
427
Inexact flag can be evaluated by
428
429
I := i;
430
I := (z*z!=x) or I.
431
432
Note that z*z can overwrite I; this value must be sensed if it is
433
True.
434
435
Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
436
zero.
437
438
--------------------
439
z1: | f2 |
440
--------------------
441
bit 31 bit 0
442
443
Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
444
or even of logb(x) have the following relations:
445
446
-------------------------------------------------
447
bit 27,26 of z1 bit 1,0 of x1 logb(x)
448
-------------------------------------------------
449
00 00 odd and even
450
01 01 even
451
10 10 odd
452
10 00 even
453
11 01 even
454
-------------------------------------------------
455
456
(4) Special cases (see (4) of Section A).
457
458
*/
459
460