CoCalc -- wm

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/math-emu/wm_sqrt.S
²⁶⁴²⁴ views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
	.file	"wm_sqrt.S"
3
/*---------------------------------------------------------------------------+
4
 |  wm_sqrt.S                                                                |
5
 |                                                                           |
6
 | Fixed point arithmetic square root evaluation.                            |
7
 |                                                                           |
8
 | Copyright (C) 1992,1993,1995,1997                                         |
9
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
10
 |                       Australia.  E-mail [email protected]               |
11
 |                                                                           |
12
 | Call from C as:                                                           |
13
 |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
14
 |                                                                           |
15
 +---------------------------------------------------------------------------*/
16

17
/*---------------------------------------------------------------------------+
18
 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
19
 |    returns the square root of n in n.                                     |
20
 |                                                                           |
21
 |  Use Newton's method to compute the square root of a number, which must   |
22
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
23
 |  Does not check the sign or tag of the argument.                          |
24
 |  Sets the exponent, but not the sign or tag of the result.                |
25
 |                                                                           |
26
 |  The guess is kept in %esi:%edi                                           |
27
 +---------------------------------------------------------------------------*/
28

29
#include "exception.h"
30
#include "fpu_emu.h"
31

32

33
#ifndef NON_REENTRANT_FPU
34
/*	Local storage on the stack: */
35
#define FPU_accum_3	-4(%ebp)	/* ms word */
36
#define FPU_accum_2	-8(%ebp)
37
#define FPU_accum_1	-12(%ebp)
38
#define FPU_accum_0	-16(%ebp)
39

40
/*
41
 * The de-normalised argument:
42
 *                  sq_2                  sq_1              sq_0
43
 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
44
 *           ^ binary point here
45
 */
46
#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
47
#define FPU_fsqrt_arg_1	-24(%ebp)
48
#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */
49

50
#else
51
/*	Local storage in a static area: */
52
.data
53
	.align 4,0
54
FPU_accum_3:
55
	.long	0		/* ms word */
56
FPU_accum_2:
57
	.long	0
58
FPU_accum_1:
59
	.long	0
60
FPU_accum_0:
61
	.long	0
62

63
/* The de-normalised argument:
64
                    sq_2                  sq_1              sq_0
65
          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
66
             ^ binary point here
67
 */
68
FPU_fsqrt_arg_2:
69
	.long	0		/* ms word */
70
FPU_fsqrt_arg_1:
71
	.long	0
72
FPU_fsqrt_arg_0:
73
	.long	0		/* ls word, at most the ms bit is set */
74
#endif /* NON_REENTRANT_FPU */ 
75

76

77
.text
78
SYM_FUNC_START(wm_sqrt)
79
	pushl	%ebp
80
	movl	%esp,%ebp
81
#ifndef NON_REENTRANT_FPU
82
	subl	$28,%esp
83
#endif /* NON_REENTRANT_FPU */
84
	pushl	%esi
85
	pushl	%edi
86
	pushl	%ebx
87

88
	movl	PARAM1,%esi
89

90
	movl	SIGH(%esi),%eax
91
	movl	SIGL(%esi),%ecx
92
	xorl	%edx,%edx
93

94
/* We use a rough linear estimate for the first guess.. */
95

96
	cmpw	EXP_BIAS,EXP(%esi)
97
	jnz	sqrt_arg_ge_2
98

99
	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
100
	rcrl	$1,%ecx
101
	rcrl	$1,%edx
102

103
sqrt_arg_ge_2:
104
/* From here on, n is never accessed directly again until it is
105
   replaced by the answer. */
106

107
	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
108
	movl	%ecx,FPU_fsqrt_arg_1
109
	movl	%edx,FPU_fsqrt_arg_0
110

111
/* Make a linear first estimate */
112
	shrl	$1,%eax
113
	addl	$0x40000000,%eax
114
	movl	$0xaaaaaaaa,%ecx
115
	mull	%ecx
116
	shll	%edx			/* max result was 7fff... */
117
	testl	$0x80000000,%edx	/* but min was 3fff... */
118
	jnz	sqrt_prelim_no_adjust
119

120
	movl	$0x80000000,%edx	/* round up */
121

122
sqrt_prelim_no_adjust:
123
	movl	%edx,%esi	/* Our first guess */
124

125
/* We have now computed (approx)   (2 + x) / 3, which forms the basis
126
   for a few iterations of Newton's method */
127

128
	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */
129

130
/*
131
 * From our initial estimate, three iterations are enough to get us
132
 * to 30 bits or so. This will then allow two iterations at better
133
 * precision to complete the process.
134
 */
135

136
/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
137
	shrl	%ecx		/* Doing this first will prevent a divide */
138
				/* overflow later. */
139

140
	movl	%ecx,%edx	/* msw of the arg / 2 */
141
	divl	%esi		/* current estimate */
142
	shrl	%esi		/* divide by 2 */
143
	addl	%eax,%esi	/* the new estimate */
144

145
	movl	%ecx,%edx
146
	divl	%esi
147
	shrl	%esi
148
	addl	%eax,%esi
149

150
	movl	%ecx,%edx
151
	divl	%esi
152
	shrl	%esi
153
	addl	%eax,%esi
154

155
/*
156
 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
157
 * we improve it to 60 bits or so.
158
 *
159
 * The strategy from now on is to compute new estimates from
160
 *      guess := guess + (n - guess^2) / (2 * guess)
161
 */
162

163
/* First, find the square of the guess */
164
	movl	%esi,%eax
165
	mull	%esi
166
/* guess^2 now in %edx:%eax */
167

168
	movl	FPU_fsqrt_arg_1,%ecx
169
	subl	%ecx,%eax
170
	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
171
	sbbl	%ecx,%edx
172
	jnc	sqrt_stage_2_positive
173

174
/* Subtraction gives a negative result,
175
   negate the result before division. */
176
	notl	%edx
177
	notl	%eax
178
	addl	$1,%eax
179
	adcl	$0,%edx
180

181
	divl	%esi
182
	movl	%eax,%ecx
183

184
	movl	%edx,%eax
185
	divl	%esi
186
	jmp	sqrt_stage_2_finish
187

188
sqrt_stage_2_positive:
189
	divl	%esi
190
	movl	%eax,%ecx
191

192
	movl	%edx,%eax
193
	divl	%esi
194

195
	notl	%ecx
196
	notl	%eax
197
	addl	$1,%eax
198
	adcl	$0,%ecx
199

200
sqrt_stage_2_finish:
201
	sarl	$1,%ecx		/* divide by 2 */
202
	rcrl	$1,%eax
203

204
	/* Form the new estimate in %esi:%edi */
205
	movl	%eax,%edi
206
	addl	%ecx,%esi
207

208
	jnz	sqrt_stage_2_done	/* result should be [1..2) */
209

210
#ifdef PARANOID
211
/* It should be possible to get here only if the arg is ffff....ffff */
212
	cmpl	$0xffffffff,FPU_fsqrt_arg_1
213
	jnz	sqrt_stage_2_error
214
#endif /* PARANOID */
215

216
/* The best rounded result. */
217
	xorl	%eax,%eax
218
	decl	%eax
219
	movl	%eax,%edi
220
	movl	%eax,%esi
221
	movl	$0x7fffffff,%eax
222
	jmp	sqrt_round_result
223

224
#ifdef PARANOID
225
sqrt_stage_2_error:
226
	pushl	EX_INTERNAL|0x213
227
	call	EXCEPTION
228
#endif /* PARANOID */ 
229

230
sqrt_stage_2_done:
231

232
/* Now the square root has been computed to better than 60 bits. */
233

234
/* Find the square of the guess. */
235
	movl	%edi,%eax		/* ls word of guess */
236
	mull	%edi
237
	movl	%edx,FPU_accum_1
238

239
	movl	%esi,%eax
240
	mull	%esi
241
	movl	%edx,FPU_accum_3
242
	movl	%eax,FPU_accum_2
243

244
	movl	%edi,%eax
245
	mull	%esi
246
	addl	%eax,FPU_accum_1
247
	adcl	%edx,FPU_accum_2
248
	adcl	$0,FPU_accum_3
249

250
/*	movl	%esi,%eax */
251
/*	mull	%edi */
252
	addl	%eax,FPU_accum_1
253
	adcl	%edx,FPU_accum_2
254
	adcl	$0,FPU_accum_3
255

256
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
257

258
	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
259
	subl	%eax,FPU_accum_1
260
	movl	FPU_fsqrt_arg_1,%eax
261
	sbbl	%eax,FPU_accum_2
262
	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
263
	sbbl	%eax,FPU_accum_3
264
	jnc	sqrt_stage_3_positive
265

266
/* Subtraction gives a negative result,
267
   negate the result before division */
268
	notl	FPU_accum_1
269
	notl	FPU_accum_2
270
	notl	FPU_accum_3
271
	addl	$1,FPU_accum_1
272
	adcl	$0,FPU_accum_2
273

274
#ifdef PARANOID
275
	adcl	$0,FPU_accum_3	/* This must be zero */
276
	jz	sqrt_stage_3_no_error
277

278
sqrt_stage_3_error:
279
	pushl	EX_INTERNAL|0x207
280
	call	EXCEPTION
281

282
sqrt_stage_3_no_error:
283
#endif /* PARANOID */
284

285
	movl	FPU_accum_2,%edx
286
	movl	FPU_accum_1,%eax
287
	divl	%esi
288
	movl	%eax,%ecx
289

290
	movl	%edx,%eax
291
	divl	%esi
292

293
	sarl	$1,%ecx		/* divide by 2 */
294
	rcrl	$1,%eax
295

296
	/* prepare to round the result */
297

298
	addl	%ecx,%edi
299
	adcl	$0,%esi
300

301
	jmp	sqrt_stage_3_finished
302

303
sqrt_stage_3_positive:
304
	movl	FPU_accum_2,%edx
305
	movl	FPU_accum_1,%eax
306
	divl	%esi
307
	movl	%eax,%ecx
308

309
	movl	%edx,%eax
310
	divl	%esi
311

312
	sarl	$1,%ecx		/* divide by 2 */
313
	rcrl	$1,%eax
314

315
	/* prepare to round the result */
316

317
	notl	%eax		/* Negate the correction term */
318
	notl	%ecx
319
	addl	$1,%eax
320
	adcl	$0,%ecx		/* carry here ==> correction == 0 */
321
	adcl	$0xffffffff,%esi
322

323
	addl	%ecx,%edi
324
	adcl	$0,%esi
325

326
sqrt_stage_3_finished:
327

328
/*
329
 * The result in %esi:%edi:%esi should be good to about 90 bits here,
330
 * and the rounding information here does not have sufficient accuracy
331
 * in a few rare cases.
332
 */
333
	cmpl	$0xffffffe0,%eax
334
	ja	sqrt_near_exact_x
335

336
	cmpl	$0x00000020,%eax
337
	jb	sqrt_near_exact
338

339
	cmpl	$0x7fffffe0,%eax
340
	jb	sqrt_round_result
341

342
	cmpl	$0x80000020,%eax
343
	jb	sqrt_get_more_precision
344

345
sqrt_round_result:
346
/* Set up for rounding operations */
347
	movl	%eax,%edx
348
	movl	%esi,%eax
349
	movl	%edi,%ebx
350
	movl	PARAM1,%edi
351
	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
352
	jmp	fpu_reg_round
353

354

355
sqrt_near_exact_x:
356
/* First, the estimate must be rounded up. */
357
	addl	$1,%edi
358
	adcl	$0,%esi
359

360
sqrt_near_exact:
361
/*
362
 * This is an easy case because x^1/2 is monotonic.
363
 * We need just find the square of our estimate, compare it
364
 * with the argument, and deduce whether our estimate is
365
 * above, below, or exact. We use the fact that the estimate
366
 * is known to be accurate to about 90 bits.
367
 */
368
	movl	%edi,%eax		/* ls word of guess */
369
	mull	%edi
370
	movl	%edx,%ebx		/* 2nd ls word of square */
371
	movl	%eax,%ecx		/* ls word of square */
372

373
	movl	%edi,%eax
374
	mull	%esi
375
	addl	%eax,%ebx
376
	addl	%eax,%ebx
377

378
#ifdef PARANOID
379
	cmp	$0xffffffb0,%ebx
380
	jb	sqrt_near_exact_ok
381

382
	cmp	$0x00000050,%ebx
383
	ja	sqrt_near_exact_ok
384

385
	pushl	EX_INTERNAL|0x214
386
	call	EXCEPTION
387

388
sqrt_near_exact_ok:
389
#endif /* PARANOID */ 
390

391
	or	%ebx,%ebx
392
	js	sqrt_near_exact_small
393

394
	jnz	sqrt_near_exact_large
395

396
	or	%ebx,%edx
397
	jnz	sqrt_near_exact_large
398

399
/* Our estimate is exactly the right answer */
400
	xorl	%eax,%eax
401
	jmp	sqrt_round_result
402

403
sqrt_near_exact_small:
404
/* Our estimate is too small */
405
	movl	$0x000000ff,%eax
406
	jmp	sqrt_round_result
407
	
408
sqrt_near_exact_large:
409
/* Our estimate is too large, we need to decrement it */
410
	subl	$1,%edi
411
	sbbl	$0,%esi
412
	movl	$0xffffff00,%eax
413
	jmp	sqrt_round_result
414

415

416
sqrt_get_more_precision:
417
/* This case is almost the same as the above, except we start
418
   with an extra bit of precision in the estimate. */
419
	stc			/* The extra bit. */
420
	rcll	$1,%edi		/* Shift the estimate left one bit */
421
	rcll	$1,%esi
422

423
	movl	%edi,%eax		/* ls word of guess */
424
	mull	%edi
425
	movl	%edx,%ebx		/* 2nd ls word of square */
426
	movl	%eax,%ecx		/* ls word of square */
427

428
	movl	%edi,%eax
429
	mull	%esi
430
	addl	%eax,%ebx
431
	addl	%eax,%ebx
432

433
/* Put our estimate back to its original value */
434
	stc			/* The ms bit. */
435
	rcrl	$1,%esi		/* Shift the estimate left one bit */
436
	rcrl	$1,%edi
437

438
#ifdef PARANOID
439
	cmp	$0xffffff60,%ebx
440
	jb	sqrt_more_prec_ok
441

442
	cmp	$0x000000a0,%ebx
443
	ja	sqrt_more_prec_ok
444

445
	pushl	EX_INTERNAL|0x215
446
	call	EXCEPTION
447

448
sqrt_more_prec_ok:
449
#endif /* PARANOID */ 
450

451
	or	%ebx,%ebx
452
	js	sqrt_more_prec_small
453

454
	jnz	sqrt_more_prec_large
455

456
	or	%ebx,%ecx
457
	jnz	sqrt_more_prec_large
458

459
/* Our estimate is exactly the right answer */
460
	movl	$0x80000000,%eax
461
	jmp	sqrt_round_result
462

463
sqrt_more_prec_small:
464
/* Our estimate is too small */
465
	movl	$0x800000ff,%eax
466
	jmp	sqrt_round_result
467
	
468
sqrt_more_prec_large:
469
/* Our estimate is too large */
470
	movl	$0x7fffff00,%eax
471
	jmp	sqrt_round_result
472
SYM_FUNC_END(wm_sqrt)
473

474
Product

Resources

Company