Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/math-emu/wm_sqrt.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
.file "wm_sqrt.S"
3
/*---------------------------------------------------------------------------+
4
| wm_sqrt.S |
5
| |
6
| Fixed point arithmetic square root evaluation. |
7
| |
8
| Copyright (C) 1992,1993,1995,1997 |
9
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
10
| Australia. E-mail [email protected] |
11
| |
12
| Call from C as: |
13
| int wm_sqrt(FPU_REG *n, unsigned int control_word) |
14
| |
15
+---------------------------------------------------------------------------*/
16
17
/*---------------------------------------------------------------------------+
18
| wm_sqrt(FPU_REG *n, unsigned int control_word) |
19
| returns the square root of n in n. |
20
| |
21
| Use Newton's method to compute the square root of a number, which must |
22
| be in the range [1.0 .. 4.0), to 64 bits accuracy. |
23
| Does not check the sign or tag of the argument. |
24
| Sets the exponent, but not the sign or tag of the result. |
25
| |
26
| The guess is kept in %esi:%edi |
27
+---------------------------------------------------------------------------*/
28
29
#include "exception.h"
30
#include "fpu_emu.h"
31
32
33
#ifndef NON_REENTRANT_FPU
34
/* Local storage on the stack: */
35
#define FPU_accum_3 -4(%ebp) /* ms word */
36
#define FPU_accum_2 -8(%ebp)
37
#define FPU_accum_1 -12(%ebp)
38
#define FPU_accum_0 -16(%ebp)
39
40
/*
41
* The de-normalised argument:
42
* sq_2 sq_1 sq_0
43
* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
44
* ^ binary point here
45
*/
46
#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
47
#define FPU_fsqrt_arg_1 -24(%ebp)
48
#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
49
50
#else
51
/* Local storage in a static area: */
52
.data
53
.align 4,0
54
FPU_accum_3:
55
.long 0 /* ms word */
56
FPU_accum_2:
57
.long 0
58
FPU_accum_1:
59
.long 0
60
FPU_accum_0:
61
.long 0
62
63
/* The de-normalised argument:
64
sq_2 sq_1 sq_0
65
b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
66
^ binary point here
67
*/
68
FPU_fsqrt_arg_2:
69
.long 0 /* ms word */
70
FPU_fsqrt_arg_1:
71
.long 0
72
FPU_fsqrt_arg_0:
73
.long 0 /* ls word, at most the ms bit is set */
74
#endif /* NON_REENTRANT_FPU */
75
76
77
.text
78
SYM_FUNC_START(wm_sqrt)
79
pushl %ebp
80
movl %esp,%ebp
81
#ifndef NON_REENTRANT_FPU
82
subl $28,%esp
83
#endif /* NON_REENTRANT_FPU */
84
pushl %esi
85
pushl %edi
86
pushl %ebx
87
88
movl PARAM1,%esi
89
90
movl SIGH(%esi),%eax
91
movl SIGL(%esi),%ecx
92
xorl %edx,%edx
93
94
/* We use a rough linear estimate for the first guess.. */
95
96
cmpw EXP_BIAS,EXP(%esi)
97
jnz sqrt_arg_ge_2
98
99
shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
100
rcrl $1,%ecx
101
rcrl $1,%edx
102
103
sqrt_arg_ge_2:
104
/* From here on, n is never accessed directly again until it is
105
replaced by the answer. */
106
107
movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
108
movl %ecx,FPU_fsqrt_arg_1
109
movl %edx,FPU_fsqrt_arg_0
110
111
/* Make a linear first estimate */
112
shrl $1,%eax
113
addl $0x40000000,%eax
114
movl $0xaaaaaaaa,%ecx
115
mull %ecx
116
shll %edx /* max result was 7fff... */
117
testl $0x80000000,%edx /* but min was 3fff... */
118
jnz sqrt_prelim_no_adjust
119
120
movl $0x80000000,%edx /* round up */
121
122
sqrt_prelim_no_adjust:
123
movl %edx,%esi /* Our first guess */
124
125
/* We have now computed (approx) (2 + x) / 3, which forms the basis
126
for a few iterations of Newton's method */
127
128
movl FPU_fsqrt_arg_2,%ecx /* ms word */
129
130
/*
131
* From our initial estimate, three iterations are enough to get us
132
* to 30 bits or so. This will then allow two iterations at better
133
* precision to complete the process.
134
*/
135
136
/* Compute (g + n/g)/2 at each iteration (g is the guess). */
137
shrl %ecx /* Doing this first will prevent a divide */
138
/* overflow later. */
139
140
movl %ecx,%edx /* msw of the arg / 2 */
141
divl %esi /* current estimate */
142
shrl %esi /* divide by 2 */
143
addl %eax,%esi /* the new estimate */
144
145
movl %ecx,%edx
146
divl %esi
147
shrl %esi
148
addl %eax,%esi
149
150
movl %ecx,%edx
151
divl %esi
152
shrl %esi
153
addl %eax,%esi
154
155
/*
156
* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
157
* we improve it to 60 bits or so.
158
*
159
* The strategy from now on is to compute new estimates from
160
* guess := guess + (n - guess^2) / (2 * guess)
161
*/
162
163
/* First, find the square of the guess */
164
movl %esi,%eax
165
mull %esi
166
/* guess^2 now in %edx:%eax */
167
168
movl FPU_fsqrt_arg_1,%ecx
169
subl %ecx,%eax
170
movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
171
sbbl %ecx,%edx
172
jnc sqrt_stage_2_positive
173
174
/* Subtraction gives a negative result,
175
negate the result before division. */
176
notl %edx
177
notl %eax
178
addl $1,%eax
179
adcl $0,%edx
180
181
divl %esi
182
movl %eax,%ecx
183
184
movl %edx,%eax
185
divl %esi
186
jmp sqrt_stage_2_finish
187
188
sqrt_stage_2_positive:
189
divl %esi
190
movl %eax,%ecx
191
192
movl %edx,%eax
193
divl %esi
194
195
notl %ecx
196
notl %eax
197
addl $1,%eax
198
adcl $0,%ecx
199
200
sqrt_stage_2_finish:
201
sarl $1,%ecx /* divide by 2 */
202
rcrl $1,%eax
203
204
/* Form the new estimate in %esi:%edi */
205
movl %eax,%edi
206
addl %ecx,%esi
207
208
jnz sqrt_stage_2_done /* result should be [1..2) */
209
210
#ifdef PARANOID
211
/* It should be possible to get here only if the arg is ffff....ffff */
212
cmpl $0xffffffff,FPU_fsqrt_arg_1
213
jnz sqrt_stage_2_error
214
#endif /* PARANOID */
215
216
/* The best rounded result. */
217
xorl %eax,%eax
218
decl %eax
219
movl %eax,%edi
220
movl %eax,%esi
221
movl $0x7fffffff,%eax
222
jmp sqrt_round_result
223
224
#ifdef PARANOID
225
sqrt_stage_2_error:
226
pushl EX_INTERNAL|0x213
227
call EXCEPTION
228
#endif /* PARANOID */
229
230
sqrt_stage_2_done:
231
232
/* Now the square root has been computed to better than 60 bits. */
233
234
/* Find the square of the guess. */
235
movl %edi,%eax /* ls word of guess */
236
mull %edi
237
movl %edx,FPU_accum_1
238
239
movl %esi,%eax
240
mull %esi
241
movl %edx,FPU_accum_3
242
movl %eax,FPU_accum_2
243
244
movl %edi,%eax
245
mull %esi
246
addl %eax,FPU_accum_1
247
adcl %edx,FPU_accum_2
248
adcl $0,FPU_accum_3
249
250
/* movl %esi,%eax */
251
/* mull %edi */
252
addl %eax,FPU_accum_1
253
adcl %edx,FPU_accum_2
254
adcl $0,FPU_accum_3
255
256
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
257
258
movl FPU_fsqrt_arg_0,%eax /* get normalized n */
259
subl %eax,FPU_accum_1
260
movl FPU_fsqrt_arg_1,%eax
261
sbbl %eax,FPU_accum_2
262
movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
263
sbbl %eax,FPU_accum_3
264
jnc sqrt_stage_3_positive
265
266
/* Subtraction gives a negative result,
267
negate the result before division */
268
notl FPU_accum_1
269
notl FPU_accum_2
270
notl FPU_accum_3
271
addl $1,FPU_accum_1
272
adcl $0,FPU_accum_2
273
274
#ifdef PARANOID
275
adcl $0,FPU_accum_3 /* This must be zero */
276
jz sqrt_stage_3_no_error
277
278
sqrt_stage_3_error:
279
pushl EX_INTERNAL|0x207
280
call EXCEPTION
281
282
sqrt_stage_3_no_error:
283
#endif /* PARANOID */
284
285
movl FPU_accum_2,%edx
286
movl FPU_accum_1,%eax
287
divl %esi
288
movl %eax,%ecx
289
290
movl %edx,%eax
291
divl %esi
292
293
sarl $1,%ecx /* divide by 2 */
294
rcrl $1,%eax
295
296
/* prepare to round the result */
297
298
addl %ecx,%edi
299
adcl $0,%esi
300
301
jmp sqrt_stage_3_finished
302
303
sqrt_stage_3_positive:
304
movl FPU_accum_2,%edx
305
movl FPU_accum_1,%eax
306
divl %esi
307
movl %eax,%ecx
308
309
movl %edx,%eax
310
divl %esi
311
312
sarl $1,%ecx /* divide by 2 */
313
rcrl $1,%eax
314
315
/* prepare to round the result */
316
317
notl %eax /* Negate the correction term */
318
notl %ecx
319
addl $1,%eax
320
adcl $0,%ecx /* carry here ==> correction == 0 */
321
adcl $0xffffffff,%esi
322
323
addl %ecx,%edi
324
adcl $0,%esi
325
326
sqrt_stage_3_finished:
327
328
/*
329
* The result in %esi:%edi:%esi should be good to about 90 bits here,
330
* and the rounding information here does not have sufficient accuracy
331
* in a few rare cases.
332
*/
333
cmpl $0xffffffe0,%eax
334
ja sqrt_near_exact_x
335
336
cmpl $0x00000020,%eax
337
jb sqrt_near_exact
338
339
cmpl $0x7fffffe0,%eax
340
jb sqrt_round_result
341
342
cmpl $0x80000020,%eax
343
jb sqrt_get_more_precision
344
345
sqrt_round_result:
346
/* Set up for rounding operations */
347
movl %eax,%edx
348
movl %esi,%eax
349
movl %edi,%ebx
350
movl PARAM1,%edi
351
movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
352
jmp fpu_reg_round
353
354
355
sqrt_near_exact_x:
356
/* First, the estimate must be rounded up. */
357
addl $1,%edi
358
adcl $0,%esi
359
360
sqrt_near_exact:
361
/*
362
* This is an easy case because x^1/2 is monotonic.
363
* We need just find the square of our estimate, compare it
364
* with the argument, and deduce whether our estimate is
365
* above, below, or exact. We use the fact that the estimate
366
* is known to be accurate to about 90 bits.
367
*/
368
movl %edi,%eax /* ls word of guess */
369
mull %edi
370
movl %edx,%ebx /* 2nd ls word of square */
371
movl %eax,%ecx /* ls word of square */
372
373
movl %edi,%eax
374
mull %esi
375
addl %eax,%ebx
376
addl %eax,%ebx
377
378
#ifdef PARANOID
379
cmp $0xffffffb0,%ebx
380
jb sqrt_near_exact_ok
381
382
cmp $0x00000050,%ebx
383
ja sqrt_near_exact_ok
384
385
pushl EX_INTERNAL|0x214
386
call EXCEPTION
387
388
sqrt_near_exact_ok:
389
#endif /* PARANOID */
390
391
or %ebx,%ebx
392
js sqrt_near_exact_small
393
394
jnz sqrt_near_exact_large
395
396
or %ebx,%edx
397
jnz sqrt_near_exact_large
398
399
/* Our estimate is exactly the right answer */
400
xorl %eax,%eax
401
jmp sqrt_round_result
402
403
sqrt_near_exact_small:
404
/* Our estimate is too small */
405
movl $0x000000ff,%eax
406
jmp sqrt_round_result
407
408
sqrt_near_exact_large:
409
/* Our estimate is too large, we need to decrement it */
410
subl $1,%edi
411
sbbl $0,%esi
412
movl $0xffffff00,%eax
413
jmp sqrt_round_result
414
415
416
sqrt_get_more_precision:
417
/* This case is almost the same as the above, except we start
418
with an extra bit of precision in the estimate. */
419
stc /* The extra bit. */
420
rcll $1,%edi /* Shift the estimate left one bit */
421
rcll $1,%esi
422
423
movl %edi,%eax /* ls word of guess */
424
mull %edi
425
movl %edx,%ebx /* 2nd ls word of square */
426
movl %eax,%ecx /* ls word of square */
427
428
movl %edi,%eax
429
mull %esi
430
addl %eax,%ebx
431
addl %eax,%ebx
432
433
/* Put our estimate back to its original value */
434
stc /* The ms bit. */
435
rcrl $1,%esi /* Shift the estimate left one bit */
436
rcrl $1,%edi
437
438
#ifdef PARANOID
439
cmp $0xffffff60,%ebx
440
jb sqrt_more_prec_ok
441
442
cmp $0x000000a0,%ebx
443
ja sqrt_more_prec_ok
444
445
pushl EX_INTERNAL|0x215
446
call EXCEPTION
447
448
sqrt_more_prec_ok:
449
#endif /* PARANOID */
450
451
or %ebx,%ebx
452
js sqrt_more_prec_small
453
454
jnz sqrt_more_prec_large
455
456
or %ebx,%ecx
457
jnz sqrt_more_prec_large
458
459
/* Our estimate is exactly the right answer */
460
movl $0x80000000,%eax
461
jmp sqrt_round_result
462
463
sqrt_more_prec_small:
464
/* Our estimate is too small */
465
movl $0x800000ff,%eax
466
jmp sqrt_round_result
467
468
sqrt_more_prec_large:
469
/* Our estimate is too large */
470
movl $0x7fffff00,%eax
471
jmp sqrt_round_result
472
SYM_FUNC_END(wm_sqrt)
473
474