Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/math-emu/reg_round.S
10817 views
1
.file "reg_round.S"
2
/*---------------------------------------------------------------------------+
3
| reg_round.S |
4
| |
5
| Rounding/truncation/etc for FPU basic arithmetic functions. |
6
| |
7
| Copyright (C) 1993,1995,1997 |
8
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9
| Australia. E-mail [email protected] |
10
| |
11
| This code has four possible entry points. |
12
| The following must be entered by a jmp instruction: |
13
| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
14
| |
15
| The FPU_round entry point is intended to be used by C code. |
16
| From C, call as: |
17
| int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
18
| |
19
| Return value is the tag of the answer, or-ed with FPU_Exception if |
20
| one was raised, or -1 on internal error. |
21
| |
22
| For correct "up" and "down" rounding, the argument must have the correct |
23
| sign. |
24
| |
25
+---------------------------------------------------------------------------*/
26
27
/*---------------------------------------------------------------------------+
28
| Four entry points. |
29
| |
30
| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
31
| %eax:%ebx 64 bit significand |
32
| %edx 32 bit extension of the significand |
33
| %edi pointer to an FPU_REG for the result to be stored |
34
| stack calling function must have set up a C stack frame and |
35
| pushed %esi, %edi, and %ebx |
36
| |
37
| Needed just for the fpu_reg_round_sqrt entry point: |
38
| %cx A control word in the same format as the FPU control word. |
39
| Otherwise, PARAM4 must give such a value. |
40
| |
41
| |
42
| The significand and its extension are assumed to be exact in the |
43
| following sense: |
44
| If the significand by itself is the exact result then the significand |
45
| extension (%edx) must contain 0, otherwise the significand extension |
46
| must be non-zero. |
47
| If the significand extension is non-zero then the significand is |
48
| smaller than the magnitude of the correct exact result by an amount |
49
| greater than zero and less than one ls bit of the significand. |
50
| The significand extension is only required to have three possible |
51
| non-zero values: |
52
| less than 0x80000000 <=> the significand is less than 1/2 an ls |
53
| bit smaller than the magnitude of the |
54
| true exact result. |
55
| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
56
| smaller than the magnitude of the true |
57
| exact result. |
58
| greater than 0x80000000 <=> the significand is more than 1/2 an ls |
59
| bit smaller than the magnitude of the |
60
| true exact result. |
61
| |
62
+---------------------------------------------------------------------------*/
63
64
/*---------------------------------------------------------------------------+
65
| The code in this module has become quite complex, but it should handle |
66
| all of the FPU flags which are set at this stage of the basic arithmetic |
67
| computations. |
68
| There are a few rare cases where the results are not set identically to |
69
| a real FPU. These require a bit more thought because at this stage the |
70
| results of the code here appear to be more consistent... |
71
| This may be changed in a future version. |
72
+---------------------------------------------------------------------------*/
73
74
75
#include "fpu_emu.h"
76
#include "exception.h"
77
#include "control_w.h"
78
79
/* Flags for FPU_bits_lost */
80
#define LOST_DOWN $1
81
#define LOST_UP $2
82
83
/* Flags for FPU_denormal */
84
#define DENORMAL $1
85
#define UNMASKED_UNDERFLOW $2
86
87
88
#ifndef NON_REENTRANT_FPU
89
/* Make the code re-entrant by putting
90
local storage on the stack: */
91
#define FPU_bits_lost (%esp)
92
#define FPU_denormal 1(%esp)
93
94
#else
95
/* Not re-entrant, so we can gain speed by putting
96
local storage in a static area: */
97
.data
98
.align 4,0
99
FPU_bits_lost:
100
.byte 0
101
FPU_denormal:
102
.byte 0
103
#endif /* NON_REENTRANT_FPU */
104
105
106
.text
107
.globl fpu_reg_round
108
.globl fpu_Arith_exit
109
110
/* Entry point when called from C */
111
ENTRY(FPU_round)
112
pushl %ebp
113
movl %esp,%ebp
114
pushl %esi
115
pushl %edi
116
pushl %ebx
117
118
movl PARAM1,%edi
119
movl SIGH(%edi),%eax
120
movl SIGL(%edi),%ebx
121
movl PARAM2,%edx
122
123
fpu_reg_round: /* Normal entry point */
124
movl PARAM4,%ecx
125
126
#ifndef NON_REENTRANT_FPU
127
pushl %ebx /* adjust the stack pointer */
128
#endif /* NON_REENTRANT_FPU */
129
130
#ifdef PARANOID
131
/* Cannot use this here yet */
132
/* orl %eax,%eax */
133
/* jns L_entry_bugged */
134
#endif /* PARANOID */
135
136
cmpw EXP_UNDER,EXP(%edi)
137
jle L_Make_denorm /* The number is a de-normal */
138
139
movb $0,FPU_denormal /* 0 -> not a de-normal */
140
141
Denorm_done:
142
movb $0,FPU_bits_lost /* No bits yet lost in rounding */
143
144
movl %ecx,%esi
145
andl CW_PC,%ecx
146
cmpl PR_64_BITS,%ecx
147
je LRound_To_64
148
149
cmpl PR_53_BITS,%ecx
150
je LRound_To_53
151
152
cmpl PR_24_BITS,%ecx
153
je LRound_To_24
154
155
#ifdef PECULIAR_486
156
/* With the precision control bits set to 01 "(reserved)", a real 80486
157
behaves as if the precision control bits were set to 11 "64 bits" */
158
cmpl PR_RESERVED_BITS,%ecx
159
je LRound_To_64
160
#ifdef PARANOID
161
jmp L_bugged_denorm_486
162
#endif /* PARANOID */
163
#else
164
#ifdef PARANOID
165
jmp L_bugged_denorm /* There is no bug, just a bad control word */
166
#endif /* PARANOID */
167
#endif /* PECULIAR_486 */
168
169
170
/* Round etc to 24 bit precision */
171
LRound_To_24:
172
movl %esi,%ecx
173
andl CW_RC,%ecx
174
cmpl RC_RND,%ecx
175
je LRound_nearest_24
176
177
cmpl RC_CHOP,%ecx
178
je LCheck_truncate_24
179
180
cmpl RC_UP,%ecx /* Towards +infinity */
181
je LUp_24
182
183
cmpl RC_DOWN,%ecx /* Towards -infinity */
184
je LDown_24
185
186
#ifdef PARANOID
187
jmp L_bugged_round24
188
#endif /* PARANOID */
189
190
LUp_24:
191
cmpb SIGN_POS,PARAM5
192
jne LCheck_truncate_24 /* If negative then up==truncate */
193
194
jmp LCheck_24_round_up
195
196
LDown_24:
197
cmpb SIGN_POS,PARAM5
198
je LCheck_truncate_24 /* If positive then down==truncate */
199
200
LCheck_24_round_up:
201
movl %eax,%ecx
202
andl $0x000000ff,%ecx
203
orl %ebx,%ecx
204
orl %edx,%ecx
205
jnz LDo_24_round_up
206
jmp L_Re_normalise
207
208
LRound_nearest_24:
209
/* Do rounding of the 24th bit if needed (nearest or even) */
210
movl %eax,%ecx
211
andl $0x000000ff,%ecx
212
cmpl $0x00000080,%ecx
213
jc LCheck_truncate_24 /* less than half, no increment needed */
214
215
jne LGreater_Half_24 /* greater than half, increment needed */
216
217
/* Possibly half, we need to check the ls bits */
218
orl %ebx,%ebx
219
jnz LGreater_Half_24 /* greater than half, increment needed */
220
221
orl %edx,%edx
222
jnz LGreater_Half_24 /* greater than half, increment needed */
223
224
/* Exactly half, increment only if 24th bit is 1 (round to even) */
225
testl $0x00000100,%eax
226
jz LDo_truncate_24
227
228
LGreater_Half_24: /* Rounding: increment at the 24th bit */
229
LDo_24_round_up:
230
andl $0xffffff00,%eax /* Truncate to 24 bits */
231
xorl %ebx,%ebx
232
movb LOST_UP,FPU_bits_lost
233
addl $0x00000100,%eax
234
jmp LCheck_Round_Overflow
235
236
LCheck_truncate_24:
237
movl %eax,%ecx
238
andl $0x000000ff,%ecx
239
orl %ebx,%ecx
240
orl %edx,%ecx
241
jz L_Re_normalise /* No truncation needed */
242
243
LDo_truncate_24:
244
andl $0xffffff00,%eax /* Truncate to 24 bits */
245
xorl %ebx,%ebx
246
movb LOST_DOWN,FPU_bits_lost
247
jmp L_Re_normalise
248
249
250
/* Round etc to 53 bit precision */
251
LRound_To_53:
252
movl %esi,%ecx
253
andl CW_RC,%ecx
254
cmpl RC_RND,%ecx
255
je LRound_nearest_53
256
257
cmpl RC_CHOP,%ecx
258
je LCheck_truncate_53
259
260
cmpl RC_UP,%ecx /* Towards +infinity */
261
je LUp_53
262
263
cmpl RC_DOWN,%ecx /* Towards -infinity */
264
je LDown_53
265
266
#ifdef PARANOID
267
jmp L_bugged_round53
268
#endif /* PARANOID */
269
270
LUp_53:
271
cmpb SIGN_POS,PARAM5
272
jne LCheck_truncate_53 /* If negative then up==truncate */
273
274
jmp LCheck_53_round_up
275
276
LDown_53:
277
cmpb SIGN_POS,PARAM5
278
je LCheck_truncate_53 /* If positive then down==truncate */
279
280
LCheck_53_round_up:
281
movl %ebx,%ecx
282
andl $0x000007ff,%ecx
283
orl %edx,%ecx
284
jnz LDo_53_round_up
285
jmp L_Re_normalise
286
287
LRound_nearest_53:
288
/* Do rounding of the 53rd bit if needed (nearest or even) */
289
movl %ebx,%ecx
290
andl $0x000007ff,%ecx
291
cmpl $0x00000400,%ecx
292
jc LCheck_truncate_53 /* less than half, no increment needed */
293
294
jnz LGreater_Half_53 /* greater than half, increment needed */
295
296
/* Possibly half, we need to check the ls bits */
297
orl %edx,%edx
298
jnz LGreater_Half_53 /* greater than half, increment needed */
299
300
/* Exactly half, increment only if 53rd bit is 1 (round to even) */
301
testl $0x00000800,%ebx
302
jz LTruncate_53
303
304
LGreater_Half_53: /* Rounding: increment at the 53rd bit */
305
LDo_53_round_up:
306
movb LOST_UP,FPU_bits_lost
307
andl $0xfffff800,%ebx /* Truncate to 53 bits */
308
addl $0x00000800,%ebx
309
adcl $0,%eax
310
jmp LCheck_Round_Overflow
311
312
LCheck_truncate_53:
313
movl %ebx,%ecx
314
andl $0x000007ff,%ecx
315
orl %edx,%ecx
316
jz L_Re_normalise
317
318
LTruncate_53:
319
movb LOST_DOWN,FPU_bits_lost
320
andl $0xfffff800,%ebx /* Truncate to 53 bits */
321
jmp L_Re_normalise
322
323
324
/* Round etc to 64 bit precision */
325
LRound_To_64:
326
movl %esi,%ecx
327
andl CW_RC,%ecx
328
cmpl RC_RND,%ecx
329
je LRound_nearest_64
330
331
cmpl RC_CHOP,%ecx
332
je LCheck_truncate_64
333
334
cmpl RC_UP,%ecx /* Towards +infinity */
335
je LUp_64
336
337
cmpl RC_DOWN,%ecx /* Towards -infinity */
338
je LDown_64
339
340
#ifdef PARANOID
341
jmp L_bugged_round64
342
#endif /* PARANOID */
343
344
LUp_64:
345
cmpb SIGN_POS,PARAM5
346
jne LCheck_truncate_64 /* If negative then up==truncate */
347
348
orl %edx,%edx
349
jnz LDo_64_round_up
350
jmp L_Re_normalise
351
352
LDown_64:
353
cmpb SIGN_POS,PARAM5
354
je LCheck_truncate_64 /* If positive then down==truncate */
355
356
orl %edx,%edx
357
jnz LDo_64_round_up
358
jmp L_Re_normalise
359
360
LRound_nearest_64:
361
cmpl $0x80000000,%edx
362
jc LCheck_truncate_64
363
364
jne LDo_64_round_up
365
366
/* Now test for round-to-even */
367
testb $1,%bl
368
jz LCheck_truncate_64
369
370
LDo_64_round_up:
371
movb LOST_UP,FPU_bits_lost
372
addl $1,%ebx
373
adcl $0,%eax
374
375
LCheck_Round_Overflow:
376
jnc L_Re_normalise
377
378
/* Overflow, adjust the result (significand to 1.0) */
379
rcrl $1,%eax
380
rcrl $1,%ebx
381
incw EXP(%edi)
382
jmp L_Re_normalise
383
384
LCheck_truncate_64:
385
orl %edx,%edx
386
jz L_Re_normalise
387
388
LTruncate_64:
389
movb LOST_DOWN,FPU_bits_lost
390
391
L_Re_normalise:
392
testb $0xff,FPU_denormal
393
jnz Normalise_result
394
395
L_Normalised:
396
movl TAG_Valid,%edx
397
398
L_deNormalised:
399
cmpb LOST_UP,FPU_bits_lost
400
je L_precision_lost_up
401
402
cmpb LOST_DOWN,FPU_bits_lost
403
je L_precision_lost_down
404
405
L_no_precision_loss:
406
/* store the result */
407
408
L_Store_significand:
409
movl %eax,SIGH(%edi)
410
movl %ebx,SIGL(%edi)
411
412
cmpw EXP_OVER,EXP(%edi)
413
jge L_overflow
414
415
movl %edx,%eax
416
417
/* Convert the exponent to 80x87 form. */
418
addw EXTENDED_Ebias,EXP(%edi)
419
andw $0x7fff,EXP(%edi)
420
421
fpu_reg_round_signed_special_exit:
422
423
cmpb SIGN_POS,PARAM5
424
je fpu_reg_round_special_exit
425
426
orw $0x8000,EXP(%edi) /* Negative sign for the result. */
427
428
fpu_reg_round_special_exit:
429
430
#ifndef NON_REENTRANT_FPU
431
popl %ebx /* adjust the stack pointer */
432
#endif /* NON_REENTRANT_FPU */
433
434
fpu_Arith_exit:
435
popl %ebx
436
popl %edi
437
popl %esi
438
leave
439
ret
440
441
442
/*
443
* Set the FPU status flags to represent precision loss due to
444
* round-up.
445
*/
446
L_precision_lost_up:
447
push %edx
448
push %eax
449
call set_precision_flag_up
450
popl %eax
451
popl %edx
452
jmp L_no_precision_loss
453
454
/*
455
* Set the FPU status flags to represent precision loss due to
456
* truncation.
457
*/
458
L_precision_lost_down:
459
push %edx
460
push %eax
461
call set_precision_flag_down
462
popl %eax
463
popl %edx
464
jmp L_no_precision_loss
465
466
467
/*
468
* The number is a denormal (which might get rounded up to a normal)
469
* Shift the number right the required number of bits, which will
470
* have to be undone later...
471
*/
472
L_Make_denorm:
473
/* The action to be taken depends upon whether the underflow
474
exception is masked */
475
testb CW_Underflow,%cl /* Underflow mask. */
476
jz Unmasked_underflow /* Do not make a denormal. */
477
478
movb DENORMAL,FPU_denormal
479
480
pushl %ecx /* Save */
481
movw EXP_UNDER+1,%cx
482
subw EXP(%edi),%cx
483
484
cmpw $64,%cx /* shrd only works for 0..31 bits */
485
jnc Denorm_shift_more_than_63
486
487
cmpw $32,%cx /* shrd only works for 0..31 bits */
488
jnc Denorm_shift_more_than_32
489
490
/*
491
* We got here without jumps by assuming that the most common requirement
492
* is for a small de-normalising shift.
493
* Shift by [1..31] bits
494
*/
495
addw %cx,EXP(%edi)
496
orl %edx,%edx /* extension */
497
setne %ch /* Save whether %edx is non-zero */
498
xorl %edx,%edx
499
shrd %cl,%ebx,%edx
500
shrd %cl,%eax,%ebx
501
shr %cl,%eax
502
orb %ch,%dl
503
popl %ecx
504
jmp Denorm_done
505
506
/* Shift by [32..63] bits */
507
Denorm_shift_more_than_32:
508
addw %cx,EXP(%edi)
509
subb $32,%cl
510
orl %edx,%edx
511
setne %ch
512
orb %ch,%bl
513
xorl %edx,%edx
514
shrd %cl,%ebx,%edx
515
shrd %cl,%eax,%ebx
516
shr %cl,%eax
517
orl %edx,%edx /* test these 32 bits */
518
setne %cl
519
orb %ch,%bl
520
orb %cl,%bl
521
movl %ebx,%edx
522
movl %eax,%ebx
523
xorl %eax,%eax
524
popl %ecx
525
jmp Denorm_done
526
527
/* Shift by [64..) bits */
528
Denorm_shift_more_than_63:
529
cmpw $64,%cx
530
jne Denorm_shift_more_than_64
531
532
/* Exactly 64 bit shift */
533
addw %cx,EXP(%edi)
534
xorl %ecx,%ecx
535
orl %edx,%edx
536
setne %cl
537
orl %ebx,%ebx
538
setne %ch
539
orb %ch,%cl
540
orb %cl,%al
541
movl %eax,%edx
542
xorl %eax,%eax
543
xorl %ebx,%ebx
544
popl %ecx
545
jmp Denorm_done
546
547
Denorm_shift_more_than_64:
548
movw EXP_UNDER+1,EXP(%edi)
549
/* This is easy, %eax must be non-zero, so.. */
550
movl $1,%edx
551
xorl %eax,%eax
552
xorl %ebx,%ebx
553
popl %ecx
554
jmp Denorm_done
555
556
557
Unmasked_underflow:
558
movb UNMASKED_UNDERFLOW,FPU_denormal
559
jmp Denorm_done
560
561
562
/* Undo the de-normalisation. */
563
Normalise_result:
564
cmpb UNMASKED_UNDERFLOW,FPU_denormal
565
je Signal_underflow
566
567
/* The number must be a denormal if we got here. */
568
#ifdef PARANOID
569
/* But check it... just in case. */
570
cmpw EXP_UNDER+1,EXP(%edi)
571
jne L_norm_bugged
572
#endif /* PARANOID */
573
574
#ifdef PECULIAR_486
575
/*
576
* This implements a special feature of 80486 behaviour.
577
* Underflow will be signalled even if the number is
578
* not a denormal after rounding.
579
* This difference occurs only for masked underflow, and not
580
* in the unmasked case.
581
* Actual 80486 behaviour differs from this in some circumstances.
582
*/
583
orl %eax,%eax /* ms bits */
584
js LPseudoDenormal /* Will be masked underflow */
585
#else
586
orl %eax,%eax /* ms bits */
587
js L_Normalised /* No longer a denormal */
588
#endif /* PECULIAR_486 */
589
590
jnz LDenormal_adj_exponent
591
592
orl %ebx,%ebx
593
jz L_underflow_to_zero /* The contents are zero */
594
595
LDenormal_adj_exponent:
596
decw EXP(%edi)
597
598
LPseudoDenormal:
599
testb $0xff,FPU_bits_lost /* bits lost == underflow */
600
movl TAG_Special,%edx
601
jz L_deNormalised
602
603
/* There must be a masked underflow */
604
push %eax
605
pushl EX_Underflow
606
call EXCEPTION
607
popl %eax
608
popl %eax
609
movl TAG_Special,%edx
610
jmp L_deNormalised
611
612
613
/*
614
* The operations resulted in a number too small to represent.
615
* Masked response.
616
*/
617
L_underflow_to_zero:
618
push %eax
619
call set_precision_flag_down
620
popl %eax
621
622
push %eax
623
pushl EX_Underflow
624
call EXCEPTION
625
popl %eax
626
popl %eax
627
628
/* Reduce the exponent to EXP_UNDER */
629
movw EXP_UNDER,EXP(%edi)
630
movl TAG_Zero,%edx
631
jmp L_Store_significand
632
633
634
/* The operations resulted in a number too large to represent. */
635
L_overflow:
636
addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
637
push %edi
638
call arith_overflow
639
pop %edi
640
jmp fpu_reg_round_signed_special_exit
641
642
643
Signal_underflow:
644
/* The number may have been changed to a non-denormal */
645
/* by the rounding operations. */
646
cmpw EXP_UNDER,EXP(%edi)
647
jle Do_unmasked_underflow
648
649
jmp L_Normalised
650
651
Do_unmasked_underflow:
652
/* Increase the exponent by the magic number */
653
addw $(3*(1<<13)),EXP(%edi)
654
push %eax
655
pushl EX_Underflow
656
call EXCEPTION
657
popl %eax
658
popl %eax
659
jmp L_Normalised
660
661
662
#ifdef PARANOID
663
#ifdef PECULIAR_486
664
L_bugged_denorm_486:
665
pushl EX_INTERNAL|0x236
666
call EXCEPTION
667
popl %ebx
668
jmp L_exception_exit
669
#else
670
L_bugged_denorm:
671
pushl EX_INTERNAL|0x230
672
call EXCEPTION
673
popl %ebx
674
jmp L_exception_exit
675
#endif /* PECULIAR_486 */
676
677
L_bugged_round24:
678
pushl EX_INTERNAL|0x231
679
call EXCEPTION
680
popl %ebx
681
jmp L_exception_exit
682
683
L_bugged_round53:
684
pushl EX_INTERNAL|0x232
685
call EXCEPTION
686
popl %ebx
687
jmp L_exception_exit
688
689
L_bugged_round64:
690
pushl EX_INTERNAL|0x233
691
call EXCEPTION
692
popl %ebx
693
jmp L_exception_exit
694
695
L_norm_bugged:
696
pushl EX_INTERNAL|0x234
697
call EXCEPTION
698
popl %ebx
699
jmp L_exception_exit
700
701
L_entry_bugged:
702
pushl EX_INTERNAL|0x235
703
call EXCEPTION
704
popl %ebx
705
L_exception_exit:
706
mov $-1,%eax
707
jmp fpu_reg_round_special_exit
708
#endif /* PARANOID */
709
710