Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/math-emu/wm_sqrt.S
10818 views
1
.file "wm_sqrt.S"
2
/*---------------------------------------------------------------------------+
3
| wm_sqrt.S |
4
| |
5
| Fixed point arithmetic square root evaluation. |
6
| |
7
| Copyright (C) 1992,1993,1995,1997 |
8
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9
| Australia. E-mail [email protected] |
10
| |
11
| Call from C as: |
12
| int wm_sqrt(FPU_REG *n, unsigned int control_word) |
13
| |
14
+---------------------------------------------------------------------------*/
15
16
/*---------------------------------------------------------------------------+
17
| wm_sqrt(FPU_REG *n, unsigned int control_word) |
18
| returns the square root of n in n. |
19
| |
20
| Use Newton's method to compute the square root of a number, which must |
21
| be in the range [1.0 .. 4.0), to 64 bits accuracy. |
22
| Does not check the sign or tag of the argument. |
23
| Sets the exponent, but not the sign or tag of the result. |
24
| |
25
| The guess is kept in %esi:%edi |
26
+---------------------------------------------------------------------------*/
27
28
#include "exception.h"
29
#include "fpu_emu.h"
30
31
32
#ifndef NON_REENTRANT_FPU
33
/* Local storage on the stack: */
34
#define FPU_accum_3 -4(%ebp) /* ms word */
35
#define FPU_accum_2 -8(%ebp)
36
#define FPU_accum_1 -12(%ebp)
37
#define FPU_accum_0 -16(%ebp)
38
39
/*
40
* The de-normalised argument:
41
* sq_2 sq_1 sq_0
42
* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
43
* ^ binary point here
44
*/
45
#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
46
#define FPU_fsqrt_arg_1 -24(%ebp)
47
#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
48
49
#else
50
/* Local storage in a static area: */
51
.data
52
.align 4,0
53
FPU_accum_3:
54
.long 0 /* ms word */
55
FPU_accum_2:
56
.long 0
57
FPU_accum_1:
58
.long 0
59
FPU_accum_0:
60
.long 0
61
62
/* The de-normalised argument:
63
sq_2 sq_1 sq_0
64
b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
65
^ binary point here
66
*/
67
FPU_fsqrt_arg_2:
68
.long 0 /* ms word */
69
FPU_fsqrt_arg_1:
70
.long 0
71
FPU_fsqrt_arg_0:
72
.long 0 /* ls word, at most the ms bit is set */
73
#endif /* NON_REENTRANT_FPU */
74
75
76
.text
77
ENTRY(wm_sqrt)
78
pushl %ebp
79
movl %esp,%ebp
80
#ifndef NON_REENTRANT_FPU
81
subl $28,%esp
82
#endif /* NON_REENTRANT_FPU */
83
pushl %esi
84
pushl %edi
85
pushl %ebx
86
87
movl PARAM1,%esi
88
89
movl SIGH(%esi),%eax
90
movl SIGL(%esi),%ecx
91
xorl %edx,%edx
92
93
/* We use a rough linear estimate for the first guess.. */
94
95
cmpw EXP_BIAS,EXP(%esi)
96
jnz sqrt_arg_ge_2
97
98
shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
99
rcrl $1,%ecx
100
rcrl $1,%edx
101
102
sqrt_arg_ge_2:
103
/* From here on, n is never accessed directly again until it is
104
replaced by the answer. */
105
106
movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
107
movl %ecx,FPU_fsqrt_arg_1
108
movl %edx,FPU_fsqrt_arg_0
109
110
/* Make a linear first estimate */
111
shrl $1,%eax
112
addl $0x40000000,%eax
113
movl $0xaaaaaaaa,%ecx
114
mull %ecx
115
shll %edx /* max result was 7fff... */
116
testl $0x80000000,%edx /* but min was 3fff... */
117
jnz sqrt_prelim_no_adjust
118
119
movl $0x80000000,%edx /* round up */
120
121
sqrt_prelim_no_adjust:
122
movl %edx,%esi /* Our first guess */
123
124
/* We have now computed (approx) (2 + x) / 3, which forms the basis
125
for a few iterations of Newton's method */
126
127
movl FPU_fsqrt_arg_2,%ecx /* ms word */
128
129
/*
130
* From our initial estimate, three iterations are enough to get us
131
* to 30 bits or so. This will then allow two iterations at better
132
* precision to complete the process.
133
*/
134
135
/* Compute (g + n/g)/2 at each iteration (g is the guess). */
136
shrl %ecx /* Doing this first will prevent a divide */
137
/* overflow later. */
138
139
movl %ecx,%edx /* msw of the arg / 2 */
140
divl %esi /* current estimate */
141
shrl %esi /* divide by 2 */
142
addl %eax,%esi /* the new estimate */
143
144
movl %ecx,%edx
145
divl %esi
146
shrl %esi
147
addl %eax,%esi
148
149
movl %ecx,%edx
150
divl %esi
151
shrl %esi
152
addl %eax,%esi
153
154
/*
155
* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
156
* we improve it to 60 bits or so.
157
*
158
* The strategy from now on is to compute new estimates from
159
* guess := guess + (n - guess^2) / (2 * guess)
160
*/
161
162
/* First, find the square of the guess */
163
movl %esi,%eax
164
mull %esi
165
/* guess^2 now in %edx:%eax */
166
167
movl FPU_fsqrt_arg_1,%ecx
168
subl %ecx,%eax
169
movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
170
sbbl %ecx,%edx
171
jnc sqrt_stage_2_positive
172
173
/* Subtraction gives a negative result,
174
negate the result before division. */
175
notl %edx
176
notl %eax
177
addl $1,%eax
178
adcl $0,%edx
179
180
divl %esi
181
movl %eax,%ecx
182
183
movl %edx,%eax
184
divl %esi
185
jmp sqrt_stage_2_finish
186
187
sqrt_stage_2_positive:
188
divl %esi
189
movl %eax,%ecx
190
191
movl %edx,%eax
192
divl %esi
193
194
notl %ecx
195
notl %eax
196
addl $1,%eax
197
adcl $0,%ecx
198
199
sqrt_stage_2_finish:
200
sarl $1,%ecx /* divide by 2 */
201
rcrl $1,%eax
202
203
/* Form the new estimate in %esi:%edi */
204
movl %eax,%edi
205
addl %ecx,%esi
206
207
jnz sqrt_stage_2_done /* result should be [1..2) */
208
209
#ifdef PARANOID
210
/* It should be possible to get here only if the arg is ffff....ffff */
211
cmp $0xffffffff,FPU_fsqrt_arg_1
212
jnz sqrt_stage_2_error
213
#endif /* PARANOID */
214
215
/* The best rounded result. */
216
xorl %eax,%eax
217
decl %eax
218
movl %eax,%edi
219
movl %eax,%esi
220
movl $0x7fffffff,%eax
221
jmp sqrt_round_result
222
223
#ifdef PARANOID
224
sqrt_stage_2_error:
225
pushl EX_INTERNAL|0x213
226
call EXCEPTION
227
#endif /* PARANOID */
228
229
sqrt_stage_2_done:
230
231
/* Now the square root has been computed to better than 60 bits. */
232
233
/* Find the square of the guess. */
234
movl %edi,%eax /* ls word of guess */
235
mull %edi
236
movl %edx,FPU_accum_1
237
238
movl %esi,%eax
239
mull %esi
240
movl %edx,FPU_accum_3
241
movl %eax,FPU_accum_2
242
243
movl %edi,%eax
244
mull %esi
245
addl %eax,FPU_accum_1
246
adcl %edx,FPU_accum_2
247
adcl $0,FPU_accum_3
248
249
/* movl %esi,%eax */
250
/* mull %edi */
251
addl %eax,FPU_accum_1
252
adcl %edx,FPU_accum_2
253
adcl $0,FPU_accum_3
254
255
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
256
257
movl FPU_fsqrt_arg_0,%eax /* get normalized n */
258
subl %eax,FPU_accum_1
259
movl FPU_fsqrt_arg_1,%eax
260
sbbl %eax,FPU_accum_2
261
movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
262
sbbl %eax,FPU_accum_3
263
jnc sqrt_stage_3_positive
264
265
/* Subtraction gives a negative result,
266
negate the result before division */
267
notl FPU_accum_1
268
notl FPU_accum_2
269
notl FPU_accum_3
270
addl $1,FPU_accum_1
271
adcl $0,FPU_accum_2
272
273
#ifdef PARANOID
274
adcl $0,FPU_accum_3 /* This must be zero */
275
jz sqrt_stage_3_no_error
276
277
sqrt_stage_3_error:
278
pushl EX_INTERNAL|0x207
279
call EXCEPTION
280
281
sqrt_stage_3_no_error:
282
#endif /* PARANOID */
283
284
movl FPU_accum_2,%edx
285
movl FPU_accum_1,%eax
286
divl %esi
287
movl %eax,%ecx
288
289
movl %edx,%eax
290
divl %esi
291
292
sarl $1,%ecx /* divide by 2 */
293
rcrl $1,%eax
294
295
/* prepare to round the result */
296
297
addl %ecx,%edi
298
adcl $0,%esi
299
300
jmp sqrt_stage_3_finished
301
302
sqrt_stage_3_positive:
303
movl FPU_accum_2,%edx
304
movl FPU_accum_1,%eax
305
divl %esi
306
movl %eax,%ecx
307
308
movl %edx,%eax
309
divl %esi
310
311
sarl $1,%ecx /* divide by 2 */
312
rcrl $1,%eax
313
314
/* prepare to round the result */
315
316
notl %eax /* Negate the correction term */
317
notl %ecx
318
addl $1,%eax
319
adcl $0,%ecx /* carry here ==> correction == 0 */
320
adcl $0xffffffff,%esi
321
322
addl %ecx,%edi
323
adcl $0,%esi
324
325
sqrt_stage_3_finished:
326
327
/*
328
* The result in %esi:%edi:%esi should be good to about 90 bits here,
329
* and the rounding information here does not have sufficient accuracy
330
* in a few rare cases.
331
*/
332
cmpl $0xffffffe0,%eax
333
ja sqrt_near_exact_x
334
335
cmpl $0x00000020,%eax
336
jb sqrt_near_exact
337
338
cmpl $0x7fffffe0,%eax
339
jb sqrt_round_result
340
341
cmpl $0x80000020,%eax
342
jb sqrt_get_more_precision
343
344
sqrt_round_result:
345
/* Set up for rounding operations */
346
movl %eax,%edx
347
movl %esi,%eax
348
movl %edi,%ebx
349
movl PARAM1,%edi
350
movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
351
jmp fpu_reg_round
352
353
354
sqrt_near_exact_x:
355
/* First, the estimate must be rounded up. */
356
addl $1,%edi
357
adcl $0,%esi
358
359
sqrt_near_exact:
360
/*
361
* This is an easy case because x^1/2 is monotonic.
362
* We need just find the square of our estimate, compare it
363
* with the argument, and deduce whether our estimate is
364
* above, below, or exact. We use the fact that the estimate
365
* is known to be accurate to about 90 bits.
366
*/
367
movl %edi,%eax /* ls word of guess */
368
mull %edi
369
movl %edx,%ebx /* 2nd ls word of square */
370
movl %eax,%ecx /* ls word of square */
371
372
movl %edi,%eax
373
mull %esi
374
addl %eax,%ebx
375
addl %eax,%ebx
376
377
#ifdef PARANOID
378
cmp $0xffffffb0,%ebx
379
jb sqrt_near_exact_ok
380
381
cmp $0x00000050,%ebx
382
ja sqrt_near_exact_ok
383
384
pushl EX_INTERNAL|0x214
385
call EXCEPTION
386
387
sqrt_near_exact_ok:
388
#endif /* PARANOID */
389
390
or %ebx,%ebx
391
js sqrt_near_exact_small
392
393
jnz sqrt_near_exact_large
394
395
or %ebx,%edx
396
jnz sqrt_near_exact_large
397
398
/* Our estimate is exactly the right answer */
399
xorl %eax,%eax
400
jmp sqrt_round_result
401
402
sqrt_near_exact_small:
403
/* Our estimate is too small */
404
movl $0x000000ff,%eax
405
jmp sqrt_round_result
406
407
sqrt_near_exact_large:
408
/* Our estimate is too large, we need to decrement it */
409
subl $1,%edi
410
sbbl $0,%esi
411
movl $0xffffff00,%eax
412
jmp sqrt_round_result
413
414
415
sqrt_get_more_precision:
416
/* This case is almost the same as the above, except we start
417
with an extra bit of precision in the estimate. */
418
stc /* The extra bit. */
419
rcll $1,%edi /* Shift the estimate left one bit */
420
rcll $1,%esi
421
422
movl %edi,%eax /* ls word of guess */
423
mull %edi
424
movl %edx,%ebx /* 2nd ls word of square */
425
movl %eax,%ecx /* ls word of square */
426
427
movl %edi,%eax
428
mull %esi
429
addl %eax,%ebx
430
addl %eax,%ebx
431
432
/* Put our estimate back to its original value */
433
stc /* The ms bit. */
434
rcrl $1,%esi /* Shift the estimate left one bit */
435
rcrl $1,%edi
436
437
#ifdef PARANOID
438
cmp $0xffffff60,%ebx
439
jb sqrt_more_prec_ok
440
441
cmp $0x000000a0,%ebx
442
ja sqrt_more_prec_ok
443
444
pushl EX_INTERNAL|0x215
445
call EXCEPTION
446
447
sqrt_more_prec_ok:
448
#endif /* PARANOID */
449
450
or %ebx,%ebx
451
js sqrt_more_prec_small
452
453
jnz sqrt_more_prec_large
454
455
or %ebx,%ecx
456
jnz sqrt_more_prec_large
457
458
/* Our estimate is exactly the right answer */
459
movl $0x80000000,%eax
460
jmp sqrt_round_result
461
462
sqrt_more_prec_small:
463
/* Our estimate is too small */
464
movl $0x800000ff,%eax
465
jmp sqrt_round_result
466
467
sqrt_more_prec_large:
468
/* Our estimate is too large */
469
movl $0x7fffff00,%eax
470
jmp sqrt_round_result
471
472