CoCalc -- wm

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/math-emu/wm_sqrt.S
¹⁰⁸¹⁸ views
1
	.file	"wm_sqrt.S"
2
/*---------------------------------------------------------------------------+
3
 |  wm_sqrt.S                                                                |
4
 |                                                                           |
5
 | Fixed point arithmetic square root evaluation.                            |
6
 |                                                                           |
7
 | Copyright (C) 1992,1993,1995,1997                                         |
8
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
9
 |                       Australia.  E-mail [email protected]               |
10
 |                                                                           |
11
 | Call from C as:                                                           |
12
 |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
13
 |                                                                           |
14
 +---------------------------------------------------------------------------*/
15

16
/*---------------------------------------------------------------------------+
17
 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
18
 |    returns the square root of n in n.                                     |
19
 |                                                                           |
20
 |  Use Newton's method to compute the square root of a number, which must   |
21
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
22
 |  Does not check the sign or tag of the argument.                          |
23
 |  Sets the exponent, but not the sign or tag of the result.                |
24
 |                                                                           |
25
 |  The guess is kept in %esi:%edi                                           |
26
 +---------------------------------------------------------------------------*/
27

28
#include "exception.h"
29
#include "fpu_emu.h"
30

31

32
#ifndef NON_REENTRANT_FPU
33
/*	Local storage on the stack: */
34
#define FPU_accum_3	-4(%ebp)	/* ms word */
35
#define FPU_accum_2	-8(%ebp)
36
#define FPU_accum_1	-12(%ebp)
37
#define FPU_accum_0	-16(%ebp)
38

39
/*
40
 * The de-normalised argument:
41
 *                  sq_2                  sq_1              sq_0
42
 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
43
 *           ^ binary point here
44
 */
45
#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
46
#define FPU_fsqrt_arg_1	-24(%ebp)
47
#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */
48

49
#else
50
/*	Local storage in a static area: */
51
.data
52
	.align 4,0
53
FPU_accum_3:
54
	.long	0		/* ms word */
55
FPU_accum_2:
56
	.long	0
57
FPU_accum_1:
58
	.long	0
59
FPU_accum_0:
60
	.long	0
61

62
/* The de-normalised argument:
63
                    sq_2                  sq_1              sq_0
64
          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
65
             ^ binary point here
66
 */
67
FPU_fsqrt_arg_2:
68
	.long	0		/* ms word */
69
FPU_fsqrt_arg_1:
70
	.long	0
71
FPU_fsqrt_arg_0:
72
	.long	0		/* ls word, at most the ms bit is set */
73
#endif /* NON_REENTRANT_FPU */ 
74

75

76
.text
77
ENTRY(wm_sqrt)
78
	pushl	%ebp
79
	movl	%esp,%ebp
80
#ifndef NON_REENTRANT_FPU
81
	subl	$28,%esp
82
#endif /* NON_REENTRANT_FPU */
83
	pushl	%esi
84
	pushl	%edi
85
	pushl	%ebx
86

87
	movl	PARAM1,%esi
88

89
	movl	SIGH(%esi),%eax
90
	movl	SIGL(%esi),%ecx
91
	xorl	%edx,%edx
92

93
/* We use a rough linear estimate for the first guess.. */
94

95
	cmpw	EXP_BIAS,EXP(%esi)
96
	jnz	sqrt_arg_ge_2
97

98
	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
99
	rcrl	$1,%ecx
100
	rcrl	$1,%edx
101

102
sqrt_arg_ge_2:
103
/* From here on, n is never accessed directly again until it is
104
   replaced by the answer. */
105

106
	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
107
	movl	%ecx,FPU_fsqrt_arg_1
108
	movl	%edx,FPU_fsqrt_arg_0
109

110
/* Make a linear first estimate */
111
	shrl	$1,%eax
112
	addl	$0x40000000,%eax
113
	movl	$0xaaaaaaaa,%ecx
114
	mull	%ecx
115
	shll	%edx			/* max result was 7fff... */
116
	testl	$0x80000000,%edx	/* but min was 3fff... */
117
	jnz	sqrt_prelim_no_adjust
118

119
	movl	$0x80000000,%edx	/* round up */
120

121
sqrt_prelim_no_adjust:
122
	movl	%edx,%esi	/* Our first guess */
123

124
/* We have now computed (approx)   (2 + x) / 3, which forms the basis
125
   for a few iterations of Newton's method */
126

127
	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */
128

129
/*
130
 * From our initial estimate, three iterations are enough to get us
131
 * to 30 bits or so. This will then allow two iterations at better
132
 * precision to complete the process.
133
 */
134

135
/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
136
	shrl	%ecx		/* Doing this first will prevent a divide */
137
				/* overflow later. */
138

139
	movl	%ecx,%edx	/* msw of the arg / 2 */
140
	divl	%esi		/* current estimate */
141
	shrl	%esi		/* divide by 2 */
142
	addl	%eax,%esi	/* the new estimate */
143

144
	movl	%ecx,%edx
145
	divl	%esi
146
	shrl	%esi
147
	addl	%eax,%esi
148

149
	movl	%ecx,%edx
150
	divl	%esi
151
	shrl	%esi
152
	addl	%eax,%esi
153

154
/*
155
 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
156
 * we improve it to 60 bits or so.
157
 *
158
 * The strategy from now on is to compute new estimates from
159
 *      guess := guess + (n - guess^2) / (2 * guess)
160
 */
161

162
/* First, find the square of the guess */
163
	movl	%esi,%eax
164
	mull	%esi
165
/* guess^2 now in %edx:%eax */
166

167
	movl	FPU_fsqrt_arg_1,%ecx
168
	subl	%ecx,%eax
169
	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
170
	sbbl	%ecx,%edx
171
	jnc	sqrt_stage_2_positive
172

173
/* Subtraction gives a negative result,
174
   negate the result before division. */
175
	notl	%edx
176
	notl	%eax
177
	addl	$1,%eax
178
	adcl	$0,%edx
179

180
	divl	%esi
181
	movl	%eax,%ecx
182

183
	movl	%edx,%eax
184
	divl	%esi
185
	jmp	sqrt_stage_2_finish
186

187
sqrt_stage_2_positive:
188
	divl	%esi
189
	movl	%eax,%ecx
190

191
	movl	%edx,%eax
192
	divl	%esi
193

194
	notl	%ecx
195
	notl	%eax
196
	addl	$1,%eax
197
	adcl	$0,%ecx
198

199
sqrt_stage_2_finish:
200
	sarl	$1,%ecx		/* divide by 2 */
201
	rcrl	$1,%eax
202

203
	/* Form the new estimate in %esi:%edi */
204
	movl	%eax,%edi
205
	addl	%ecx,%esi
206

207
	jnz	sqrt_stage_2_done	/* result should be [1..2) */
208

209
#ifdef PARANOID
210
/* It should be possible to get here only if the arg is ffff....ffff */
211
	cmp	$0xffffffff,FPU_fsqrt_arg_1
212
	jnz	sqrt_stage_2_error
213
#endif /* PARANOID */
214

215
/* The best rounded result. */
216
	xorl	%eax,%eax
217
	decl	%eax
218
	movl	%eax,%edi
219
	movl	%eax,%esi
220
	movl	$0x7fffffff,%eax
221
	jmp	sqrt_round_result
222

223
#ifdef PARANOID
224
sqrt_stage_2_error:
225
	pushl	EX_INTERNAL|0x213
226
	call	EXCEPTION
227
#endif /* PARANOID */ 
228

229
sqrt_stage_2_done:
230

231
/* Now the square root has been computed to better than 60 bits. */
232

233
/* Find the square of the guess. */
234
	movl	%edi,%eax		/* ls word of guess */
235
	mull	%edi
236
	movl	%edx,FPU_accum_1
237

238
	movl	%esi,%eax
239
	mull	%esi
240
	movl	%edx,FPU_accum_3
241
	movl	%eax,FPU_accum_2
242

243
	movl	%edi,%eax
244
	mull	%esi
245
	addl	%eax,FPU_accum_1
246
	adcl	%edx,FPU_accum_2
247
	adcl	$0,FPU_accum_3
248

249
/*	movl	%esi,%eax */
250
/*	mull	%edi */
251
	addl	%eax,FPU_accum_1
252
	adcl	%edx,FPU_accum_2
253
	adcl	$0,FPU_accum_3
254

255
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
256

257
	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
258
	subl	%eax,FPU_accum_1
259
	movl	FPU_fsqrt_arg_1,%eax
260
	sbbl	%eax,FPU_accum_2
261
	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
262
	sbbl	%eax,FPU_accum_3
263
	jnc	sqrt_stage_3_positive
264

265
/* Subtraction gives a negative result,
266
   negate the result before division */
267
	notl	FPU_accum_1
268
	notl	FPU_accum_2
269
	notl	FPU_accum_3
270
	addl	$1,FPU_accum_1
271
	adcl	$0,FPU_accum_2
272

273
#ifdef PARANOID
274
	adcl	$0,FPU_accum_3	/* This must be zero */
275
	jz	sqrt_stage_3_no_error
276

277
sqrt_stage_3_error:
278
	pushl	EX_INTERNAL|0x207
279
	call	EXCEPTION
280

281
sqrt_stage_3_no_error:
282
#endif /* PARANOID */
283

284
	movl	FPU_accum_2,%edx
285
	movl	FPU_accum_1,%eax
286
	divl	%esi
287
	movl	%eax,%ecx
288

289
	movl	%edx,%eax
290
	divl	%esi
291

292
	sarl	$1,%ecx		/* divide by 2 */
293
	rcrl	$1,%eax
294

295
	/* prepare to round the result */
296

297
	addl	%ecx,%edi
298
	adcl	$0,%esi
299

300
	jmp	sqrt_stage_3_finished
301

302
sqrt_stage_3_positive:
303
	movl	FPU_accum_2,%edx
304
	movl	FPU_accum_1,%eax
305
	divl	%esi
306
	movl	%eax,%ecx
307

308
	movl	%edx,%eax
309
	divl	%esi
310

311
	sarl	$1,%ecx		/* divide by 2 */
312
	rcrl	$1,%eax
313

314
	/* prepare to round the result */
315

316
	notl	%eax		/* Negate the correction term */
317
	notl	%ecx
318
	addl	$1,%eax
319
	adcl	$0,%ecx		/* carry here ==> correction == 0 */
320
	adcl	$0xffffffff,%esi
321

322
	addl	%ecx,%edi
323
	adcl	$0,%esi
324

325
sqrt_stage_3_finished:
326

327
/*
328
 * The result in %esi:%edi:%esi should be good to about 90 bits here,
329
 * and the rounding information here does not have sufficient accuracy
330
 * in a few rare cases.
331
 */
332
	cmpl	$0xffffffe0,%eax
333
	ja	sqrt_near_exact_x
334

335
	cmpl	$0x00000020,%eax
336
	jb	sqrt_near_exact
337

338
	cmpl	$0x7fffffe0,%eax
339
	jb	sqrt_round_result
340

341
	cmpl	$0x80000020,%eax
342
	jb	sqrt_get_more_precision
343

344
sqrt_round_result:
345
/* Set up for rounding operations */
346
	movl	%eax,%edx
347
	movl	%esi,%eax
348
	movl	%edi,%ebx
349
	movl	PARAM1,%edi
350
	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
351
	jmp	fpu_reg_round
352

353

354
sqrt_near_exact_x:
355
/* First, the estimate must be rounded up. */
356
	addl	$1,%edi
357
	adcl	$0,%esi
358

359
sqrt_near_exact:
360
/*
361
 * This is an easy case because x^1/2 is monotonic.
362
 * We need just find the square of our estimate, compare it
363
 * with the argument, and deduce whether our estimate is
364
 * above, below, or exact. We use the fact that the estimate
365
 * is known to be accurate to about 90 bits.
366
 */
367
	movl	%edi,%eax		/* ls word of guess */
368
	mull	%edi
369
	movl	%edx,%ebx		/* 2nd ls word of square */
370
	movl	%eax,%ecx		/* ls word of square */
371

372
	movl	%edi,%eax
373
	mull	%esi
374
	addl	%eax,%ebx
375
	addl	%eax,%ebx
376

377
#ifdef PARANOID
378
	cmp	$0xffffffb0,%ebx
379
	jb	sqrt_near_exact_ok
380

381
	cmp	$0x00000050,%ebx
382
	ja	sqrt_near_exact_ok
383

384
	pushl	EX_INTERNAL|0x214
385
	call	EXCEPTION
386

387
sqrt_near_exact_ok:
388
#endif /* PARANOID */ 
389

390
	or	%ebx,%ebx
391
	js	sqrt_near_exact_small
392

393
	jnz	sqrt_near_exact_large
394

395
	or	%ebx,%edx
396
	jnz	sqrt_near_exact_large
397

398
/* Our estimate is exactly the right answer */
399
	xorl	%eax,%eax
400
	jmp	sqrt_round_result
401

402
sqrt_near_exact_small:
403
/* Our estimate is too small */
404
	movl	$0x000000ff,%eax
405
	jmp	sqrt_round_result
406
	
407
sqrt_near_exact_large:
408
/* Our estimate is too large, we need to decrement it */
409
	subl	$1,%edi
410
	sbbl	$0,%esi
411
	movl	$0xffffff00,%eax
412
	jmp	sqrt_round_result
413

414

415
sqrt_get_more_precision:
416
/* This case is almost the same as the above, except we start
417
   with an extra bit of precision in the estimate. */
418
	stc			/* The extra bit. */
419
	rcll	$1,%edi		/* Shift the estimate left one bit */
420
	rcll	$1,%esi
421

422
	movl	%edi,%eax		/* ls word of guess */
423
	mull	%edi
424
	movl	%edx,%ebx		/* 2nd ls word of square */
425
	movl	%eax,%ecx		/* ls word of square */
426

427
	movl	%edi,%eax
428
	mull	%esi
429
	addl	%eax,%ebx
430
	addl	%eax,%ebx
431

432
/* Put our estimate back to its original value */
433
	stc			/* The ms bit. */
434
	rcrl	$1,%esi		/* Shift the estimate left one bit */
435
	rcrl	$1,%edi
436

437
#ifdef PARANOID
438
	cmp	$0xffffff60,%ebx
439
	jb	sqrt_more_prec_ok
440

441
	cmp	$0x000000a0,%ebx
442
	ja	sqrt_more_prec_ok
443

444
	pushl	EX_INTERNAL|0x215
445
	call	EXCEPTION
446

447
sqrt_more_prec_ok:
448
#endif /* PARANOID */ 
449

450
	or	%ebx,%ebx
451
	js	sqrt_more_prec_small
452

453
	jnz	sqrt_more_prec_large
454

455
	or	%ebx,%ecx
456
	jnz	sqrt_more_prec_large
457

458
/* Our estimate is exactly the right answer */
459
	movl	$0x80000000,%eax
460
	jmp	sqrt_round_result
461

462
sqrt_more_prec_small:
463
/* Our estimate is too small */
464
	movl	$0x800000ff,%eax
465
	jmp	sqrt_round_result
466
	
467
sqrt_more_prec_large:
468
/* Our estimate is too large */
469
	movl	$0x7fffff00,%eax
470
	jmp	sqrt_round_result
471

472
Product

Resources

Company