CoCalc -- reg_u

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/math-emu/reg_u_div.S
¹⁰⁸²⁰ views
1
	.file	"reg_u_div.S"
2
/*---------------------------------------------------------------------------+
3
 |  reg_u_div.S                                                              |
4
 |                                                                           |
5
 | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
6
 |                                                                           |
7
 | Copyright (C) 1992,1993,1995,1997                                         |
8
 |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9
 |                  E-mail   [email protected]                              |
10
 |                                                                           |
11
 |                                                                           |
12
 +---------------------------------------------------------------------------*/
13

14
/*---------------------------------------------------------------------------+
15
 | Call from C as:                                                           |
16
 |    int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest,                   |
17
 |                unsigned int control_word, char *sign)                     |
18
 |                                                                           |
19
 |  Does not compute the destination exponent, but does adjust it.           |
20
 |                                                                           |
21
 |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
22
 |    one was raised, or -1 on internal error.                               |
23
 +---------------------------------------------------------------------------*/
24

25
#include "exception.h"
26
#include "fpu_emu.h"
27
#include "control_w.h"
28

29

30
/* #define	dSIGL(x)	(x) */
31
/* #define	dSIGH(x)	4(x) */
32

33

34
#ifndef NON_REENTRANT_FPU
35
/*
36
	Local storage on the stack:
37
	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
38
	Overflow flag:	ovfl_flag
39
 */
40
#define FPU_accum_3	-4(%ebp)
41
#define FPU_accum_2	-8(%ebp)
42
#define FPU_accum_1	-12(%ebp)
43
#define FPU_accum_0	-16(%ebp)
44
#define FPU_result_1	-20(%ebp)
45
#define FPU_result_2	-24(%ebp)
46
#define FPU_ovfl_flag	-28(%ebp)
47

48
#else
49
.data
50
/*
51
	Local storage in a static area:
52
	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
53
	Overflow flag:	ovfl_flag
54
 */
55
	.align 4,0
56
FPU_accum_3:
57
	.long	0
58
FPU_accum_2:
59
	.long	0
60
FPU_accum_1:
61
	.long	0
62
FPU_accum_0:
63
	.long	0
64
FPU_result_1:
65
	.long	0
66
FPU_result_2:
67
	.long	0
68
FPU_ovfl_flag:
69
	.byte	0
70
#endif /* NON_REENTRANT_FPU */
71

72
#define REGA	PARAM1
73
#define REGB	PARAM2
74
#define DEST	PARAM3
75

76
.text
77
ENTRY(FPU_u_div)
78
	pushl	%ebp
79
	movl	%esp,%ebp
80
#ifndef NON_REENTRANT_FPU
81
	subl	$28,%esp
82
#endif /* NON_REENTRANT_FPU */
83

84
	pushl	%esi
85
	pushl	%edi
86
	pushl	%ebx
87

88
	movl	REGA,%esi
89
	movl	REGB,%ebx
90
	movl	DEST,%edi
91

92
	movswl	EXP(%esi),%edx
93
	movswl	EXP(%ebx),%eax
94
	subl	%eax,%edx
95
	addl	EXP_BIAS,%edx
96

97
	/* A denormal and a large number can cause an exponent underflow */
98
	cmpl	EXP_WAY_UNDER,%edx
99
	jg	xExp_not_underflow
100

101
	/* Set to a really low value allow correct handling */
102
	movl	EXP_WAY_UNDER,%edx
103

104
xExp_not_underflow:
105

106
	movw    %dx,EXP(%edi)
107

108
#ifdef PARANOID
109
/*	testl	$0x80000000, SIGH(%esi)	// Dividend */
110
/*	je	L_bugged */
111
	testl	$0x80000000, SIGH(%ebx)	/* Divisor */
112
	je	L_bugged
113
#endif /* PARANOID */ 
114

115
/* Check if the divisor can be treated as having just 32 bits */
116
	cmpl	$0,SIGL(%ebx)
117
	jnz	L_Full_Division	/* Can't do a quick divide */
118

119
/* We should be able to zip through the division here */
120
	movl	SIGH(%ebx),%ecx	/* The divisor */
121
	movl	SIGH(%esi),%edx	/* Dividend */
122
	movl	SIGL(%esi),%eax	/* Dividend */
123

124
	cmpl	%ecx,%edx
125
	setaeb	FPU_ovfl_flag	/* Keep a record */
126
	jb	L_no_adjust
127

128
	subl	%ecx,%edx	/* Prevent the overflow */
129

130
L_no_adjust:
131
	/* Divide the 64 bit number by the 32 bit denominator */
132
	divl	%ecx
133
	movl	%eax,FPU_result_2
134

135
	/* Work on the remainder of the first division */
136
	xorl	%eax,%eax
137
	divl	%ecx
138
	movl	%eax,FPU_result_1
139

140
	/* Work on the remainder of the 64 bit division */
141
	xorl	%eax,%eax
142
	divl	%ecx
143

144
	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
145
	je	L_no_overflow
146

147
	/* Do the shifting here */
148
	/* increase the exponent */
149
	incw	EXP(%edi)
150

151
	/* shift the mantissa right one bit */
152
	stc			/* To set the ms bit */
153
	rcrl	FPU_result_2
154
	rcrl	FPU_result_1
155
	rcrl	%eax
156

157
L_no_overflow:
158
	jmp	LRound_precision	/* Do the rounding as required */
159

160

161
/*---------------------------------------------------------------------------+
162
 |  Divide:   Return  arg1/arg2 to arg3.                                     |
163
 |                                                                           |
164
 |  This routine does not use the exponents of arg1 and arg2, but does       |
165
 |  adjust the exponent of arg3.                                             |
166
 |                                                                           |
167
 |  The maximum returned value is (ignoring exponents)                       |
168
 |               .ffffffff ffffffff                                          |
169
 |               ------------------  =  1.ffffffff fffffffe                  |
170
 |               .80000000 00000000                                          |
171
 | and the minimum is                                                        |
172
 |               .80000000 00000000                                          |
173
 |               ------------------  =  .80000000 00000001   (rounded)       |
174
 |               .ffffffff ffffffff                                          |
175
 |                                                                           |
176
 +---------------------------------------------------------------------------*/
177

178

179
L_Full_Division:
180
	/* Save extended dividend in local register */
181
	movl	SIGL(%esi),%eax
182
	movl	%eax,FPU_accum_2
183
	movl	SIGH(%esi),%eax
184
	movl	%eax,FPU_accum_3
185
	xorl	%eax,%eax
186
	movl	%eax,FPU_accum_1	/* zero the extension */
187
	movl	%eax,FPU_accum_0	/* zero the extension */
188

189
	movl	SIGL(%esi),%eax	/* Get the current num */
190
	movl	SIGH(%esi),%edx
191

192
/*----------------------------------------------------------------------*/
193
/* Initialization done.
194
   Do the first 32 bits. */
195

196
	movb	$0,FPU_ovfl_flag
197
	cmpl	SIGH(%ebx),%edx	/* Test for imminent overflow */
198
	jb	LLess_than_1
199
	ja	LGreater_than_1
200

201
	cmpl	SIGL(%ebx),%eax
202
	jb	LLess_than_1
203

204
LGreater_than_1:
205
/* The dividend is greater or equal, would cause overflow */
206
	setaeb	FPU_ovfl_flag		/* Keep a record */
207

208
	subl	SIGL(%ebx),%eax
209
	sbbl	SIGH(%ebx),%edx	/* Prevent the overflow */
210
	movl	%eax,FPU_accum_2
211
	movl	%edx,FPU_accum_3
212

213
LLess_than_1:
214
/* At this point, we have a dividend < divisor, with a record of
215
   adjustment in FPU_ovfl_flag */
216

217
	/* We will divide by a number which is too large */
218
	movl	SIGH(%ebx),%ecx
219
	addl	$1,%ecx
220
	jnc	LFirst_div_not_1
221

222
	/* here we need to divide by 100000000h,
223
	   i.e., no division at all.. */
224
	mov	%edx,%eax
225
	jmp	LFirst_div_done
226

227
LFirst_div_not_1:
228
	divl	%ecx		/* Divide the numerator by the augmented
229
				   denom ms dw */
230

231
LFirst_div_done:
232
	movl	%eax,FPU_result_2	/* Put the result in the answer */
233

234
	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
235

236
	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
237
	sbbl	%edx,FPU_accum_3
238

239
	movl	FPU_result_2,%eax	/* Get the result back */
240
	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
241

242
	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
243
	sbbl	%edx,FPU_accum_2
244
	sbbl	$0,FPU_accum_3
245
	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
246

247
#ifdef PARANOID
248
	jb	L_bugged_1
249
#endif /* PARANOID */ 
250

251
	/* need to subtract another once of the denom */
252
	incl	FPU_result_2	/* Correct the answer */
253

254
	movl	SIGL(%ebx),%eax
255
	movl	SIGH(%ebx),%edx
256
	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
257
	sbbl	%edx,FPU_accum_2
258

259
#ifdef PARANOID
260
	sbbl	$0,FPU_accum_3
261
	jne	L_bugged_1	/* Must check for non-zero result here */
262
#endif /* PARANOID */ 
263

264
/*----------------------------------------------------------------------*/
265
/* Half of the main problem is done, there is just a reduced numerator
266
   to handle now.
267
   Work with the second 32 bits, FPU_accum_0 not used from now on */
268
LDo_2nd_32_bits:
269
	movl	FPU_accum_2,%edx	/* get the reduced num */
270
	movl	FPU_accum_1,%eax
271

272
	/* need to check for possible subsequent overflow */
273
	cmpl	SIGH(%ebx),%edx
274
	jb	LDo_2nd_div
275
	ja	LPrevent_2nd_overflow
276

277
	cmpl	SIGL(%ebx),%eax
278
	jb	LDo_2nd_div
279

280
LPrevent_2nd_overflow:
281
/* The numerator is greater or equal, would cause overflow */
282
	/* prevent overflow */
283
	subl	SIGL(%ebx),%eax
284
	sbbl	SIGH(%ebx),%edx
285
	movl	%edx,FPU_accum_2
286
	movl	%eax,FPU_accum_1
287

288
	incl	FPU_result_2	/* Reflect the subtraction in the answer */
289

290
#ifdef PARANOID
291
	je	L_bugged_2	/* Can't bump the result to 1.0 */
292
#endif /* PARANOID */ 
293

294
LDo_2nd_div:
295
	cmpl	$0,%ecx		/* augmented denom msw */
296
	jnz	LSecond_div_not_1
297

298
	/* %ecx == 0, we are dividing by 1.0 */
299
	mov	%edx,%eax
300
	jmp	LSecond_div_done
301

302
LSecond_div_not_1:
303
	divl	%ecx		/* Divide the numerator by the denom ms dw */
304

305
LSecond_div_done:
306
	movl	%eax,FPU_result_1	/* Put the result in the answer */
307

308
	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
309

310
	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
311
	sbbl	%edx,FPU_accum_2
312

313
#ifdef PARANOID
314
	jc	L_bugged_2
315
#endif /* PARANOID */ 
316

317
	movl	FPU_result_1,%eax	/* Get the result back */
318
	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
319

320
	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
321
	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
322
	sbbl	$0,FPU_accum_2
323

324
#ifdef PARANOID
325
	jc	L_bugged_2
326
#endif /* PARANOID */ 
327

328
	jz	LDo_3rd_32_bits
329

330
#ifdef PARANOID
331
	cmpl	$1,FPU_accum_2
332
	jne	L_bugged_2
333
#endif /* PARANOID */
334

335
	/* need to subtract another once of the denom */
336
	movl	SIGL(%ebx),%eax
337
	movl	SIGH(%ebx),%edx
338
	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
339
	sbbl	%edx,FPU_accum_1
340
	sbbl	$0,FPU_accum_2
341

342
#ifdef PARANOID
343
	jc	L_bugged_2
344
	jne	L_bugged_2
345
#endif /* PARANOID */ 
346

347
	addl	$1,FPU_result_1	/* Correct the answer */
348
	adcl	$0,FPU_result_2
349

350
#ifdef PARANOID
351
	jc	L_bugged_2	/* Must check for non-zero result here */
352
#endif /* PARANOID */
353

354
/*----------------------------------------------------------------------*/
355
/* The division is essentially finished here, we just need to perform
356
   tidying operations.
357
   Deal with the 3rd 32 bits */
358
LDo_3rd_32_bits:
359
	movl	FPU_accum_1,%edx		/* get the reduced num */
360
	movl	FPU_accum_0,%eax
361

362
	/* need to check for possible subsequent overflow */
363
	cmpl	SIGH(%ebx),%edx	/* denom */
364
	jb	LRound_prep
365
	ja	LPrevent_3rd_overflow
366

367
	cmpl	SIGL(%ebx),%eax	/* denom */
368
	jb	LRound_prep
369

370
LPrevent_3rd_overflow:
371
	/* prevent overflow */
372
	subl	SIGL(%ebx),%eax
373
	sbbl	SIGH(%ebx),%edx
374
	movl	%edx,FPU_accum_1
375
	movl	%eax,FPU_accum_0
376

377
	addl	$1,FPU_result_1	/* Reflect the subtraction in the answer */
378
	adcl	$0,FPU_result_2
379
	jne	LRound_prep
380
	jnc	LRound_prep
381

382
	/* This is a tricky spot, there is an overflow of the answer */
383
	movb	$255,FPU_ovfl_flag		/* Overflow -> 1.000 */
384

385
LRound_prep:
386
/*
387
 * Prepare for rounding.
388
 * To test for rounding, we just need to compare 2*accum with the
389
 * denom.
390
 */
391
	movl	FPU_accum_0,%ecx
392
	movl	FPU_accum_1,%edx
393
	movl	%ecx,%eax
394
	orl	%edx,%eax
395
	jz	LRound_ovfl		/* The accumulator contains zero. */
396

397
	/* Multiply by 2 */
398
	clc
399
	rcll	$1,%ecx
400
	rcll	$1,%edx
401
	jc	LRound_large		/* No need to compare, denom smaller */
402

403
	subl	SIGL(%ebx),%ecx
404
	sbbl	SIGH(%ebx),%edx
405
	jnc	LRound_not_small
406

407
	movl	$0x70000000,%eax	/* Denom was larger */
408
	jmp	LRound_ovfl
409

410
LRound_not_small:
411
	jnz	LRound_large
412

413
	movl	$0x80000000,%eax	/* Remainder was exactly 1/2 denom */
414
	jmp	LRound_ovfl
415

416
LRound_large:
417
	movl	$0xff000000,%eax	/* Denom was smaller */
418

419
LRound_ovfl:
420
/* We are now ready to deal with rounding, but first we must get
421
   the bits properly aligned */
422
	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
423
	je	LRound_precision
424

425
	incw	EXP(%edi)
426

427
	/* shift the mantissa right one bit */
428
	stc			/* Will set the ms bit */
429
	rcrl	FPU_result_2
430
	rcrl	FPU_result_1
431
	rcrl	%eax
432

433
/* Round the result as required */
434
LRound_precision:
435
	decw	EXP(%edi)	/* binary point between 1st & 2nd bits */
436

437
	movl	%eax,%edx
438
	movl	FPU_result_1,%ebx
439
	movl	FPU_result_2,%eax
440
	jmp	fpu_reg_round
441

442

443
#ifdef PARANOID
444
/* The logic is wrong if we got here */
445
L_bugged:
446
	pushl	EX_INTERNAL|0x202
447
	call	EXCEPTION
448
	pop	%ebx
449
	jmp	L_exit
450

451
L_bugged_1:
452
	pushl	EX_INTERNAL|0x203
453
	call	EXCEPTION
454
	pop	%ebx
455
	jmp	L_exit
456

457
L_bugged_2:
458
	pushl	EX_INTERNAL|0x204
459
	call	EXCEPTION
460
	pop	%ebx
461
	jmp	L_exit
462

463
L_exit:
464
	movl	$-1,%eax
465
	popl	%ebx
466
	popl	%edi
467
	popl	%esi
468

469
	leave
470
	ret
471
#endif /* PARANOID */ 
472

473
Product

Resources

Company