Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/math-emu/reg_round.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
.file "reg_round.S"
3
/*---------------------------------------------------------------------------+
4
| reg_round.S |
5
| |
6
| Rounding/truncation/etc for FPU basic arithmetic functions. |
7
| |
8
| Copyright (C) 1993,1995,1997 |
9
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
10
| Australia. E-mail [email protected] |
11
| |
12
| This code has four possible entry points. |
13
| The following must be entered by a jmp instruction: |
14
| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
15
| |
16
| The FPU_round entry point is intended to be used by C code. |
17
| From C, call as: |
18
| int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
19
| |
20
| Return value is the tag of the answer, or-ed with FPU_Exception if |
21
| one was raised, or -1 on internal error. |
22
| |
23
| For correct "up" and "down" rounding, the argument must have the correct |
24
| sign. |
25
| |
26
+---------------------------------------------------------------------------*/
27
28
/*---------------------------------------------------------------------------+
29
| Four entry points. |
30
| |
31
| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
32
| %eax:%ebx 64 bit significand |
33
| %edx 32 bit extension of the significand |
34
| %edi pointer to an FPU_REG for the result to be stored |
35
| stack calling function must have set up a C stack frame and |
36
| pushed %esi, %edi, and %ebx |
37
| |
38
| Needed just for the fpu_reg_round_sqrt entry point: |
39
| %cx A control word in the same format as the FPU control word. |
40
| Otherwise, PARAM4 must give such a value. |
41
| |
42
| |
43
| The significand and its extension are assumed to be exact in the |
44
| following sense: |
45
| If the significand by itself is the exact result then the significand |
46
| extension (%edx) must contain 0, otherwise the significand extension |
47
| must be non-zero. |
48
| If the significand extension is non-zero then the significand is |
49
| smaller than the magnitude of the correct exact result by an amount |
50
| greater than zero and less than one ls bit of the significand. |
51
| The significand extension is only required to have three possible |
52
| non-zero values: |
53
| less than 0x80000000 <=> the significand is less than 1/2 an ls |
54
| bit smaller than the magnitude of the |
55
| true exact result. |
56
| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
57
| smaller than the magnitude of the true |
58
| exact result. |
59
| greater than 0x80000000 <=> the significand is more than 1/2 an ls |
60
| bit smaller than the magnitude of the |
61
| true exact result. |
62
| |
63
+---------------------------------------------------------------------------*/
64
65
/*---------------------------------------------------------------------------+
66
| The code in this module has become quite complex, but it should handle |
67
| all of the FPU flags which are set at this stage of the basic arithmetic |
68
| computations. |
69
| There are a few rare cases where the results are not set identically to |
70
| a real FPU. These require a bit more thought because at this stage the |
71
| results of the code here appear to be more consistent... |
72
| This may be changed in a future version. |
73
+---------------------------------------------------------------------------*/
74
75
76
#include "fpu_emu.h"
77
#include "exception.h"
78
#include "control_w.h"
79
80
/* Flags for FPU_bits_lost */
81
#define LOST_DOWN $1
82
#define LOST_UP $2
83
84
/* Flags for FPU_denormal */
85
#define DENORMAL $1
86
#define UNMASKED_UNDERFLOW $2
87
88
89
#ifndef NON_REENTRANT_FPU
90
/* Make the code re-entrant by putting
91
local storage on the stack: */
92
#define FPU_bits_lost (%esp)
93
#define FPU_denormal 1(%esp)
94
95
#else
96
/* Not re-entrant, so we can gain speed by putting
97
local storage in a static area: */
98
.data
99
.align 4,0
100
FPU_bits_lost:
101
.byte 0
102
FPU_denormal:
103
.byte 0
104
#endif /* NON_REENTRANT_FPU */
105
106
107
.text
108
.globl fpu_reg_round
109
.globl fpu_Arith_exit
110
111
/* Entry point when called from C */
112
SYM_FUNC_START(FPU_round)
113
pushl %ebp
114
movl %esp,%ebp
115
pushl %esi
116
pushl %edi
117
pushl %ebx
118
119
movl PARAM1,%edi
120
movl SIGH(%edi),%eax
121
movl SIGL(%edi),%ebx
122
movl PARAM2,%edx
123
124
fpu_reg_round: /* Normal entry point */
125
movl PARAM4,%ecx
126
127
#ifndef NON_REENTRANT_FPU
128
pushl %ebx /* adjust the stack pointer */
129
#endif /* NON_REENTRANT_FPU */
130
131
#ifdef PARANOID
132
/* Cannot use this here yet */
133
/* orl %eax,%eax */
134
/* jns L_entry_bugged */
135
#endif /* PARANOID */
136
137
cmpw EXP_UNDER,EXP(%edi)
138
jle L_Make_denorm /* The number is a de-normal */
139
140
movb $0,FPU_denormal /* 0 -> not a de-normal */
141
142
Denorm_done:
143
movb $0,FPU_bits_lost /* No bits yet lost in rounding */
144
145
movl %ecx,%esi
146
andl CW_PC,%ecx
147
cmpl PR_64_BITS,%ecx
148
je LRound_To_64
149
150
cmpl PR_53_BITS,%ecx
151
je LRound_To_53
152
153
cmpl PR_24_BITS,%ecx
154
je LRound_To_24
155
156
#ifdef PECULIAR_486
157
/* With the precision control bits set to 01 "(reserved)", a real 80486
158
behaves as if the precision control bits were set to 11 "64 bits" */
159
cmpl PR_RESERVED_BITS,%ecx
160
je LRound_To_64
161
#ifdef PARANOID
162
jmp L_bugged_denorm_486
163
#endif /* PARANOID */
164
#else
165
#ifdef PARANOID
166
jmp L_bugged_denorm /* There is no bug, just a bad control word */
167
#endif /* PARANOID */
168
#endif /* PECULIAR_486 */
169
170
171
/* Round etc to 24 bit precision */
172
LRound_To_24:
173
movl %esi,%ecx
174
andl CW_RC,%ecx
175
cmpl RC_RND,%ecx
176
je LRound_nearest_24
177
178
cmpl RC_CHOP,%ecx
179
je LCheck_truncate_24
180
181
cmpl RC_UP,%ecx /* Towards +infinity */
182
je LUp_24
183
184
cmpl RC_DOWN,%ecx /* Towards -infinity */
185
je LDown_24
186
187
#ifdef PARANOID
188
jmp L_bugged_round24
189
#endif /* PARANOID */
190
191
LUp_24:
192
cmpb SIGN_POS,PARAM5
193
jne LCheck_truncate_24 /* If negative then up==truncate */
194
195
jmp LCheck_24_round_up
196
197
LDown_24:
198
cmpb SIGN_POS,PARAM5
199
je LCheck_truncate_24 /* If positive then down==truncate */
200
201
LCheck_24_round_up:
202
movl %eax,%ecx
203
andl $0x000000ff,%ecx
204
orl %ebx,%ecx
205
orl %edx,%ecx
206
jnz LDo_24_round_up
207
jmp L_Re_normalise
208
209
LRound_nearest_24:
210
/* Do rounding of the 24th bit if needed (nearest or even) */
211
movl %eax,%ecx
212
andl $0x000000ff,%ecx
213
cmpl $0x00000080,%ecx
214
jc LCheck_truncate_24 /* less than half, no increment needed */
215
216
jne LGreater_Half_24 /* greater than half, increment needed */
217
218
/* Possibly half, we need to check the ls bits */
219
orl %ebx,%ebx
220
jnz LGreater_Half_24 /* greater than half, increment needed */
221
222
orl %edx,%edx
223
jnz LGreater_Half_24 /* greater than half, increment needed */
224
225
/* Exactly half, increment only if 24th bit is 1 (round to even) */
226
testl $0x00000100,%eax
227
jz LDo_truncate_24
228
229
LGreater_Half_24: /* Rounding: increment at the 24th bit */
230
LDo_24_round_up:
231
andl $0xffffff00,%eax /* Truncate to 24 bits */
232
xorl %ebx,%ebx
233
movb LOST_UP,FPU_bits_lost
234
addl $0x00000100,%eax
235
jmp LCheck_Round_Overflow
236
237
LCheck_truncate_24:
238
movl %eax,%ecx
239
andl $0x000000ff,%ecx
240
orl %ebx,%ecx
241
orl %edx,%ecx
242
jz L_Re_normalise /* No truncation needed */
243
244
LDo_truncate_24:
245
andl $0xffffff00,%eax /* Truncate to 24 bits */
246
xorl %ebx,%ebx
247
movb LOST_DOWN,FPU_bits_lost
248
jmp L_Re_normalise
249
250
251
/* Round etc to 53 bit precision */
252
LRound_To_53:
253
movl %esi,%ecx
254
andl CW_RC,%ecx
255
cmpl RC_RND,%ecx
256
je LRound_nearest_53
257
258
cmpl RC_CHOP,%ecx
259
je LCheck_truncate_53
260
261
cmpl RC_UP,%ecx /* Towards +infinity */
262
je LUp_53
263
264
cmpl RC_DOWN,%ecx /* Towards -infinity */
265
je LDown_53
266
267
#ifdef PARANOID
268
jmp L_bugged_round53
269
#endif /* PARANOID */
270
271
LUp_53:
272
cmpb SIGN_POS,PARAM5
273
jne LCheck_truncate_53 /* If negative then up==truncate */
274
275
jmp LCheck_53_round_up
276
277
LDown_53:
278
cmpb SIGN_POS,PARAM5
279
je LCheck_truncate_53 /* If positive then down==truncate */
280
281
LCheck_53_round_up:
282
movl %ebx,%ecx
283
andl $0x000007ff,%ecx
284
orl %edx,%ecx
285
jnz LDo_53_round_up
286
jmp L_Re_normalise
287
288
LRound_nearest_53:
289
/* Do rounding of the 53rd bit if needed (nearest or even) */
290
movl %ebx,%ecx
291
andl $0x000007ff,%ecx
292
cmpl $0x00000400,%ecx
293
jc LCheck_truncate_53 /* less than half, no increment needed */
294
295
jnz LGreater_Half_53 /* greater than half, increment needed */
296
297
/* Possibly half, we need to check the ls bits */
298
orl %edx,%edx
299
jnz LGreater_Half_53 /* greater than half, increment needed */
300
301
/* Exactly half, increment only if 53rd bit is 1 (round to even) */
302
testl $0x00000800,%ebx
303
jz LTruncate_53
304
305
LGreater_Half_53: /* Rounding: increment at the 53rd bit */
306
LDo_53_round_up:
307
movb LOST_UP,FPU_bits_lost
308
andl $0xfffff800,%ebx /* Truncate to 53 bits */
309
addl $0x00000800,%ebx
310
adcl $0,%eax
311
jmp LCheck_Round_Overflow
312
313
LCheck_truncate_53:
314
movl %ebx,%ecx
315
andl $0x000007ff,%ecx
316
orl %edx,%ecx
317
jz L_Re_normalise
318
319
LTruncate_53:
320
movb LOST_DOWN,FPU_bits_lost
321
andl $0xfffff800,%ebx /* Truncate to 53 bits */
322
jmp L_Re_normalise
323
324
325
/* Round etc to 64 bit precision */
326
LRound_To_64:
327
movl %esi,%ecx
328
andl CW_RC,%ecx
329
cmpl RC_RND,%ecx
330
je LRound_nearest_64
331
332
cmpl RC_CHOP,%ecx
333
je LCheck_truncate_64
334
335
cmpl RC_UP,%ecx /* Towards +infinity */
336
je LUp_64
337
338
cmpl RC_DOWN,%ecx /* Towards -infinity */
339
je LDown_64
340
341
#ifdef PARANOID
342
jmp L_bugged_round64
343
#endif /* PARANOID */
344
345
LUp_64:
346
cmpb SIGN_POS,PARAM5
347
jne LCheck_truncate_64 /* If negative then up==truncate */
348
349
orl %edx,%edx
350
jnz LDo_64_round_up
351
jmp L_Re_normalise
352
353
LDown_64:
354
cmpb SIGN_POS,PARAM5
355
je LCheck_truncate_64 /* If positive then down==truncate */
356
357
orl %edx,%edx
358
jnz LDo_64_round_up
359
jmp L_Re_normalise
360
361
LRound_nearest_64:
362
cmpl $0x80000000,%edx
363
jc LCheck_truncate_64
364
365
jne LDo_64_round_up
366
367
/* Now test for round-to-even */
368
testb $1,%bl
369
jz LCheck_truncate_64
370
371
LDo_64_round_up:
372
movb LOST_UP,FPU_bits_lost
373
addl $1,%ebx
374
adcl $0,%eax
375
376
LCheck_Round_Overflow:
377
jnc L_Re_normalise
378
379
/* Overflow, adjust the result (significand to 1.0) */
380
rcrl $1,%eax
381
rcrl $1,%ebx
382
incw EXP(%edi)
383
jmp L_Re_normalise
384
385
LCheck_truncate_64:
386
orl %edx,%edx
387
jz L_Re_normalise
388
389
LTruncate_64:
390
movb LOST_DOWN,FPU_bits_lost
391
392
L_Re_normalise:
393
testb $0xff,FPU_denormal
394
jnz Normalise_result
395
396
L_Normalised:
397
movl TAG_Valid,%edx
398
399
L_deNormalised:
400
cmpb LOST_UP,FPU_bits_lost
401
je L_precision_lost_up
402
403
cmpb LOST_DOWN,FPU_bits_lost
404
je L_precision_lost_down
405
406
L_no_precision_loss:
407
/* store the result */
408
409
L_Store_significand:
410
movl %eax,SIGH(%edi)
411
movl %ebx,SIGL(%edi)
412
413
cmpw EXP_OVER,EXP(%edi)
414
jge L_overflow
415
416
movl %edx,%eax
417
418
/* Convert the exponent to 80x87 form. */
419
addw EXTENDED_Ebias,EXP(%edi)
420
andw $0x7fff,EXP(%edi)
421
422
fpu_reg_round_signed_special_exit:
423
424
cmpb SIGN_POS,PARAM5
425
je fpu_reg_round_special_exit
426
427
orw $0x8000,EXP(%edi) /* Negative sign for the result. */
428
429
fpu_reg_round_special_exit:
430
431
#ifndef NON_REENTRANT_FPU
432
popl %ebx /* adjust the stack pointer */
433
#endif /* NON_REENTRANT_FPU */
434
435
fpu_Arith_exit:
436
popl %ebx
437
popl %edi
438
popl %esi
439
leave
440
RET
441
442
443
/*
444
* Set the FPU status flags to represent precision loss due to
445
* round-up.
446
*/
447
L_precision_lost_up:
448
push %edx
449
push %eax
450
call set_precision_flag_up
451
popl %eax
452
popl %edx
453
jmp L_no_precision_loss
454
455
/*
456
* Set the FPU status flags to represent precision loss due to
457
* truncation.
458
*/
459
L_precision_lost_down:
460
push %edx
461
push %eax
462
call set_precision_flag_down
463
popl %eax
464
popl %edx
465
jmp L_no_precision_loss
466
467
468
/*
469
* The number is a denormal (which might get rounded up to a normal)
470
* Shift the number right the required number of bits, which will
471
* have to be undone later...
472
*/
473
L_Make_denorm:
474
/* The action to be taken depends upon whether the underflow
475
exception is masked */
476
testb CW_Underflow,%cl /* Underflow mask. */
477
jz Unmasked_underflow /* Do not make a denormal. */
478
479
movb DENORMAL,FPU_denormal
480
481
pushl %ecx /* Save */
482
movw EXP_UNDER+1,%cx
483
subw EXP(%edi),%cx
484
485
cmpw $64,%cx /* shrd only works for 0..31 bits */
486
jnc Denorm_shift_more_than_63
487
488
cmpw $32,%cx /* shrd only works for 0..31 bits */
489
jnc Denorm_shift_more_than_32
490
491
/*
492
* We got here without jumps by assuming that the most common requirement
493
* is for a small de-normalising shift.
494
* Shift by [1..31] bits
495
*/
496
addw %cx,EXP(%edi)
497
orl %edx,%edx /* extension */
498
setne %ch /* Save whether %edx is non-zero */
499
xorl %edx,%edx
500
shrd %cl,%ebx,%edx
501
shrd %cl,%eax,%ebx
502
shr %cl,%eax
503
orb %ch,%dl
504
popl %ecx
505
jmp Denorm_done
506
507
/* Shift by [32..63] bits */
508
Denorm_shift_more_than_32:
509
addw %cx,EXP(%edi)
510
subb $32,%cl
511
orl %edx,%edx
512
setne %ch
513
orb %ch,%bl
514
xorl %edx,%edx
515
shrd %cl,%ebx,%edx
516
shrd %cl,%eax,%ebx
517
shr %cl,%eax
518
orl %edx,%edx /* test these 32 bits */
519
setne %cl
520
orb %ch,%bl
521
orb %cl,%bl
522
movl %ebx,%edx
523
movl %eax,%ebx
524
xorl %eax,%eax
525
popl %ecx
526
jmp Denorm_done
527
528
/* Shift by [64..) bits */
529
Denorm_shift_more_than_63:
530
cmpw $64,%cx
531
jne Denorm_shift_more_than_64
532
533
/* Exactly 64 bit shift */
534
addw %cx,EXP(%edi)
535
xorl %ecx,%ecx
536
orl %edx,%edx
537
setne %cl
538
orl %ebx,%ebx
539
setne %ch
540
orb %ch,%cl
541
orb %cl,%al
542
movl %eax,%edx
543
xorl %eax,%eax
544
xorl %ebx,%ebx
545
popl %ecx
546
jmp Denorm_done
547
548
Denorm_shift_more_than_64:
549
movw EXP_UNDER+1,EXP(%edi)
550
/* This is easy, %eax must be non-zero, so.. */
551
movl $1,%edx
552
xorl %eax,%eax
553
xorl %ebx,%ebx
554
popl %ecx
555
jmp Denorm_done
556
557
558
Unmasked_underflow:
559
movb UNMASKED_UNDERFLOW,FPU_denormal
560
jmp Denorm_done
561
562
563
/* Undo the de-normalisation. */
564
Normalise_result:
565
cmpb UNMASKED_UNDERFLOW,FPU_denormal
566
je Signal_underflow
567
568
/* The number must be a denormal if we got here. */
569
#ifdef PARANOID
570
/* But check it... just in case. */
571
cmpw EXP_UNDER+1,EXP(%edi)
572
jne L_norm_bugged
573
#endif /* PARANOID */
574
575
#ifdef PECULIAR_486
576
/*
577
* This implements a special feature of 80486 behaviour.
578
* Underflow will be signaled even if the number is
579
* not a denormal after rounding.
580
* This difference occurs only for masked underflow, and not
581
* in the unmasked case.
582
* Actual 80486 behaviour differs from this in some circumstances.
583
*/
584
orl %eax,%eax /* ms bits */
585
js LPseudoDenormal /* Will be masked underflow */
586
#else
587
orl %eax,%eax /* ms bits */
588
js L_Normalised /* No longer a denormal */
589
#endif /* PECULIAR_486 */
590
591
jnz LDenormal_adj_exponent
592
593
orl %ebx,%ebx
594
jz L_underflow_to_zero /* The contents are zero */
595
596
LDenormal_adj_exponent:
597
decw EXP(%edi)
598
599
LPseudoDenormal:
600
testb $0xff,FPU_bits_lost /* bits lost == underflow */
601
movl TAG_Special,%edx
602
jz L_deNormalised
603
604
/* There must be a masked underflow */
605
push %eax
606
pushl EX_Underflow
607
call EXCEPTION
608
popl %eax
609
popl %eax
610
movl TAG_Special,%edx
611
jmp L_deNormalised
612
613
614
/*
615
* The operations resulted in a number too small to represent.
616
* Masked response.
617
*/
618
L_underflow_to_zero:
619
push %eax
620
call set_precision_flag_down
621
popl %eax
622
623
push %eax
624
pushl EX_Underflow
625
call EXCEPTION
626
popl %eax
627
popl %eax
628
629
/* Reduce the exponent to EXP_UNDER */
630
movw EXP_UNDER,EXP(%edi)
631
movl TAG_Zero,%edx
632
jmp L_Store_significand
633
634
635
/* The operations resulted in a number too large to represent. */
636
L_overflow:
637
addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
638
push %edi
639
call arith_overflow
640
pop %edi
641
jmp fpu_reg_round_signed_special_exit
642
643
644
Signal_underflow:
645
/* The number may have been changed to a non-denormal */
646
/* by the rounding operations. */
647
cmpw EXP_UNDER,EXP(%edi)
648
jle Do_unmasked_underflow
649
650
jmp L_Normalised
651
652
Do_unmasked_underflow:
653
/* Increase the exponent by the magic number */
654
addw $(3*(1<<13)),EXP(%edi)
655
push %eax
656
pushl EX_Underflow
657
call EXCEPTION
658
popl %eax
659
popl %eax
660
jmp L_Normalised
661
662
663
#ifdef PARANOID
664
#ifdef PECULIAR_486
665
L_bugged_denorm_486:
666
pushl EX_INTERNAL|0x236
667
call EXCEPTION
668
popl %ebx
669
jmp L_exception_exit
670
#else
671
L_bugged_denorm:
672
pushl EX_INTERNAL|0x230
673
call EXCEPTION
674
popl %ebx
675
jmp L_exception_exit
676
#endif /* PECULIAR_486 */
677
678
L_bugged_round24:
679
pushl EX_INTERNAL|0x231
680
call EXCEPTION
681
popl %ebx
682
jmp L_exception_exit
683
684
L_bugged_round53:
685
pushl EX_INTERNAL|0x232
686
call EXCEPTION
687
popl %ebx
688
jmp L_exception_exit
689
690
L_bugged_round64:
691
pushl EX_INTERNAL|0x233
692
call EXCEPTION
693
popl %ebx
694
jmp L_exception_exit
695
696
L_norm_bugged:
697
pushl EX_INTERNAL|0x234
698
call EXCEPTION
699
popl %ebx
700
jmp L_exception_exit
701
702
L_entry_bugged:
703
pushl EX_INTERNAL|0x235
704
call EXCEPTION
705
popl %ebx
706
L_exception_exit:
707
mov $-1,%eax
708
jmp fpu_reg_round_special_exit
709
#endif /* PARANOID */
710
711
SYM_FUNC_END(FPU_round)
712
713