CoCalc -- softfloat.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/softfloat/bits32/softfloat.c
³⁹⁵³⁰ views
1
/* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */
2

3
/*
4
 * This version hacked for use with gcc -msoft-float by bjh21.
5
 * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6
 *  itself).
7
 */
8

9
/*
10
 * Things you may want to define:
11
 *
12
 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13
 *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14
 *   properly renamed.
15
 */
16

17
/*
18
 * This differs from the standard bits32/softfloat.c in that float64
19
 * is defined to be a 64-bit integer rather than a structure.  The
20
 * structure is float64s, with translation between the two going via
21
 * float64u.
22
 */
23

24
/*
25
===============================================================================
26

27
This C source file is part of the SoftFloat IEC/IEEE Floating-Point
28
Arithmetic Package, Release 2a.
29

30
Written by John R. Hauser.  This work was made possible in part by the
31
International Computer Science Institute, located at Suite 600, 1947 Center
32
Street, Berkeley, California 94704.  Funding was partially provided by the
33
National Science Foundation under grant MIP-9311980.  The original version
34
of this code was written as part of a project to build a fixed-point vector
35
processor in collaboration with the University of California at Berkeley,
36
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
37
is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
38
arithmetic/SoftFloat.html'.
39

40
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
41
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
42
TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
43
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
44
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
45

46
Derivative works are acceptable, even for commercial purposes, so long as
47
(1) they include prominent notice that the work is derivative, and (2) they
48
include prominent notice akin to these four paragraphs for those parts of
49
this code that are retained.
50

51
===============================================================================
52
*/
53

54
#ifdef SOFTFLOAT_FOR_GCC
55
#include "softfloat-for-gcc.h"
56
#endif
57

58
#include "milieu.h"
59
#include "softfloat.h"
60

61
/*
62
 * Conversions between floats as stored in memory and floats as
63
 * SoftFloat uses them
64
 */
65
#ifndef FLOAT64_DEMANGLE
66
#define FLOAT64_DEMANGLE(a)	(a)
67
#endif
68
#ifndef FLOAT64_MANGLE
69
#define FLOAT64_MANGLE(a)	(a)
70
#endif
71

72
/*
73
-------------------------------------------------------------------------------
74
Floating-point rounding mode and exception flags.
75
-------------------------------------------------------------------------------
76
*/
77
int float_rounding_mode = float_round_nearest_even;
78
int float_exception_flags = 0;
79

80
/*
81
-------------------------------------------------------------------------------
82
Primitive arithmetic functions, including multi-word arithmetic, and
83
division and square root approximations.  (Can be specialized to target if
84
desired.)
85
-------------------------------------------------------------------------------
86
*/
87
#include "softfloat-macros"
88

89
/*
90
-------------------------------------------------------------------------------
91
Functions and definitions to determine:  (1) whether tininess for underflow
92
is detected before or after rounding by default, (2) what (if anything)
93
happens when exceptions are raised, (3) how signaling NaNs are distinguished
94
from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
95
are propagated from function inputs to output.  These details are target-
96
specific.
97
-------------------------------------------------------------------------------
98
*/
99
#include "softfloat-specialize"
100

101
/*
102
-------------------------------------------------------------------------------
103
Returns the fraction bits of the single-precision floating-point value `a'.
104
-------------------------------------------------------------------------------
105
*/
106
INLINE bits32 extractFloat32Frac( float32 a )
107
{
108

109
    return a & 0x007FFFFF;
110

111
}
112

113
/*
114
-------------------------------------------------------------------------------
115
Returns the exponent bits of the single-precision floating-point value `a'.
116
-------------------------------------------------------------------------------
117
*/
118
INLINE int16 extractFloat32Exp( float32 a )
119
{
120

121
    return ( a>>23 ) & 0xFF;
122

123
}
124

125
/*
126
-------------------------------------------------------------------------------
127
Returns the sign bit of the single-precision floating-point value `a'.
128
-------------------------------------------------------------------------------
129
*/
130
INLINE flag extractFloat32Sign( float32 a )
131
{
132

133
    return a>>31;
134

135
}
136

137
/*
138
-------------------------------------------------------------------------------
139
Normalizes the subnormal single-precision floating-point value represented
140
by the denormalized significand `aSig'.  The normalized exponent and
141
significand are stored at the locations pointed to by `zExpPtr' and
142
`zSigPtr', respectively.
143
-------------------------------------------------------------------------------
144
*/
145
static void
146
 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
147
{
148
    int8 shiftCount;
149

150
    shiftCount = countLeadingZeros32( aSig ) - 8;
151
    *zSigPtr = aSig<<shiftCount;
152
    *zExpPtr = 1 - shiftCount;
153

154
}
155

156
/*
157
-------------------------------------------------------------------------------
158
Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
159
single-precision floating-point value, returning the result.  After being
160
shifted into the proper positions, the three fields are simply added
161
together to form the result.  This means that any integer portion of `zSig'
162
will be added into the exponent.  Since a properly normalized significand
163
will have an integer portion equal to 1, the `zExp' input should be 1 less
164
than the desired result exponent whenever `zSig' is a complete, normalized
165
significand.
166
-------------------------------------------------------------------------------
167
*/
168
INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
169
{
170

171
    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
172

173
}
174

175
/*
176
-------------------------------------------------------------------------------
177
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
178
and significand `zSig', and returns the proper single-precision floating-
179
point value corresponding to the abstract input.  Ordinarily, the abstract
180
value is simply rounded and packed into the single-precision format, with
181
the inexact exception raised if the abstract input cannot be represented
182
exactly.  However, if the abstract value is too large, the overflow and
183
inexact exceptions are raised and an infinity or maximal finite value is
184
returned.  If the abstract value is too small, the input value is rounded to
185
a subnormal number, and the underflow and inexact exceptions are raised if
186
the abstract input cannot be represented exactly as a subnormal single-
187
precision floating-point number.
188
    The input significand `zSig' has its binary point between bits 30
189
and 29, which is 7 bits to the left of the usual location.  This shifted
190
significand must be normalized or smaller.  If `zSig' is not normalized,
191
`zExp' must be 0; in that case, the result returned is a subnormal number,
192
and it must not require rounding.  In the usual case that `zSig' is
193
normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
194
The handling of underflow and overflow follows the IEC/IEEE Standard for
195
Binary Floating-Point Arithmetic.
196
-------------------------------------------------------------------------------
197
*/
198
static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
199
{
200
    int8 roundingMode;
201
    flag roundNearestEven;
202
    int8 roundIncrement, roundBits;
203
    flag isTiny;
204

205
    roundingMode = float_rounding_mode;
206
    roundNearestEven = roundingMode == float_round_nearest_even;
207
    roundIncrement = 0x40;
208
    if ( ! roundNearestEven ) {
209
        if ( roundingMode == float_round_to_zero ) {
210
            roundIncrement = 0;
211
        }
212
        else {
213
            roundIncrement = 0x7F;
214
            if ( zSign ) {
215
                if ( roundingMode == float_round_up ) roundIncrement = 0;
216
            }
217
            else {
218
                if ( roundingMode == float_round_down ) roundIncrement = 0;
219
            }
220
        }
221
    }
222
    roundBits = zSig & 0x7F;
223
    if ( 0xFD <= (bits16) zExp ) {
224
        if (    ( 0xFD < zExp )
225
             || (    ( zExp == 0xFD )
226
                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
227
           ) {
228
            float_raise( float_flag_overflow | float_flag_inexact );
229
            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
230
        }
231
        if ( zExp < 0 ) {
232
            isTiny =
233
                   ( float_detect_tininess == float_tininess_before_rounding )
234
                || ( zExp < -1 )
235
                || ( zSig + roundIncrement < 0x80000000 );
236
            shift32RightJamming( zSig, - zExp, &zSig );
237
            zExp = 0;
238
            roundBits = zSig & 0x7F;
239
            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
240
        }
241
    }
242
    if ( roundBits ) float_exception_flags |= float_flag_inexact;
243
    zSig = ( zSig + roundIncrement )>>7;
244
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
245
    if ( zSig == 0 ) zExp = 0;
246
    return packFloat32( zSign, zExp, zSig );
247

248
}
249

250
/*
251
-------------------------------------------------------------------------------
252
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
253
and significand `zSig', and returns the proper single-precision floating-
254
point value corresponding to the abstract input.  This routine is just like
255
`roundAndPackFloat32' except that `zSig' does not have to be normalized.
256
Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
257
floating-point exponent.
258
-------------------------------------------------------------------------------
259
*/
260
static float32
261
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
262
{
263
    int8 shiftCount;
264

265
    shiftCount = countLeadingZeros32( zSig ) - 1;
266
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
267

268
}
269

270
/*
271
-------------------------------------------------------------------------------
272
Returns the least-significant 32 fraction bits of the double-precision
273
floating-point value `a'.
274
-------------------------------------------------------------------------------
275
*/
276
INLINE bits32 extractFloat64Frac1( float64 a )
277
{
278

279
    return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF );
280

281
}
282

283
/*
284
-------------------------------------------------------------------------------
285
Returns the most-significant 20 fraction bits of the double-precision
286
floating-point value `a'.
287
-------------------------------------------------------------------------------
288
*/
289
INLINE bits32 extractFloat64Frac0( float64 a )
290
{
291

292
    return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF;
293

294
}
295

296
/*
297
-------------------------------------------------------------------------------
298
Returns the exponent bits of the double-precision floating-point value `a'.
299
-------------------------------------------------------------------------------
300
*/
301
INLINE int16 extractFloat64Exp( float64 a )
302
{
303

304
    return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
305

306
}
307

308
/*
309
-------------------------------------------------------------------------------
310
Returns the sign bit of the double-precision floating-point value `a'.
311
-------------------------------------------------------------------------------
312
*/
313
INLINE flag extractFloat64Sign( float64 a )
314
{
315

316
    return FLOAT64_DEMANGLE(a)>>63;
317

318
}
319

320
/*
321
-------------------------------------------------------------------------------
322
Normalizes the subnormal double-precision floating-point value represented
323
by the denormalized significand formed by the concatenation of `aSig0' and
324
`aSig1'.  The normalized exponent is stored at the location pointed to by
325
`zExpPtr'.  The most significant 21 bits of the normalized significand are
326
stored at the location pointed to by `zSig0Ptr', and the least significant
327
32 bits of the normalized significand are stored at the location pointed to
328
by `zSig1Ptr'.
329
-------------------------------------------------------------------------------
330
*/
331
static void
332
 normalizeFloat64Subnormal(
333
     bits32 aSig0,
334
     bits32 aSig1,
335
     int16 *zExpPtr,
336
     bits32 *zSig0Ptr,
337
     bits32 *zSig1Ptr
338
 )
339
{
340
    int8 shiftCount;
341

342
    if ( aSig0 == 0 ) {
343
        shiftCount = countLeadingZeros32( aSig1 ) - 11;
344
        if ( shiftCount < 0 ) {
345
            *zSig0Ptr = aSig1>>( - shiftCount );
346
            *zSig1Ptr = aSig1<<( shiftCount & 31 );
347
        }
348
        else {
349
            *zSig0Ptr = aSig1<<shiftCount;
350
            *zSig1Ptr = 0;
351
        }
352
        *zExpPtr = - shiftCount - 31;
353
    }
354
    else {
355
        shiftCount = countLeadingZeros32( aSig0 ) - 11;
356
        shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
357
        *zExpPtr = 1 - shiftCount;
358
    }
359

360
}
361

362
/*
363
-------------------------------------------------------------------------------
364
Packs the sign `zSign', the exponent `zExp', and the significand formed by
365
the concatenation of `zSig0' and `zSig1' into a double-precision floating-
366
point value, returning the result.  After being shifted into the proper
367
positions, the three fields `zSign', `zExp', and `zSig0' are simply added
368
together to form the most significant 32 bits of the result.  This means
369
that any integer portion of `zSig0' will be added into the exponent.  Since
370
a properly normalized significand will have an integer portion equal to 1,
371
the `zExp' input should be 1 less than the desired result exponent whenever
372
`zSig0' and `zSig1' concatenated form a complete, normalized significand.
373
-------------------------------------------------------------------------------
374
*/
375
INLINE float64
376
 packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
377
{
378

379
    return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
380
			   ( ( (bits64) zExp )<<52 ) + 
381
			   ( ( (bits64) zSig0 )<<32 ) + zSig1 );
382

383

384
}
385

386
/*
387
-------------------------------------------------------------------------------
388
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
389
and extended significand formed by the concatenation of `zSig0', `zSig1',
390
and `zSig2', and returns the proper double-precision floating-point value
391
corresponding to the abstract input.  Ordinarily, the abstract value is
392
simply rounded and packed into the double-precision format, with the inexact
393
exception raised if the abstract input cannot be represented exactly.
394
However, if the abstract value is too large, the overflow and inexact
395
exceptions are raised and an infinity or maximal finite value is returned.
396
If the abstract value is too small, the input value is rounded to a
397
subnormal number, and the underflow and inexact exceptions are raised if the
398
abstract input cannot be represented exactly as a subnormal double-precision
399
floating-point number.
400
    The input significand must be normalized or smaller.  If the input
401
significand is not normalized, `zExp' must be 0; in that case, the result
402
returned is a subnormal number, and it must not require rounding.  In the
403
usual case that the input significand is normalized, `zExp' must be 1 less
404
than the ``true'' floating-point exponent.  The handling of underflow and
405
overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
406
-------------------------------------------------------------------------------
407
*/
408
static float64
409
 roundAndPackFloat64(
410
     flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
411
{
412
    int8 roundingMode;
413
    flag roundNearestEven, increment, isTiny;
414

415
    roundingMode = float_rounding_mode;
416
    roundNearestEven = ( roundingMode == float_round_nearest_even );
417
    increment = ( (sbits32) zSig2 < 0 );
418
    if ( ! roundNearestEven ) {
419
        if ( roundingMode == float_round_to_zero ) {
420
            increment = 0;
421
        }
422
        else {
423
            if ( zSign ) {
424
                increment = ( roundingMode == float_round_down ) && zSig2;
425
            }
426
            else {
427
                increment = ( roundingMode == float_round_up ) && zSig2;
428
            }
429
        }
430
    }
431
    if ( 0x7FD <= (bits16) zExp ) {
432
        if (    ( 0x7FD < zExp )
433
             || (    ( zExp == 0x7FD )
434
                  && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
435
                  && increment
436
                )
437
           ) {
438
            float_raise( float_flag_overflow | float_flag_inexact );
439
            if (    ( roundingMode == float_round_to_zero )
440
                 || ( zSign && ( roundingMode == float_round_up ) )
441
                 || ( ! zSign && ( roundingMode == float_round_down ) )
442
               ) {
443
                return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
444
            }
445
            return packFloat64( zSign, 0x7FF, 0, 0 );
446
        }
447
        if ( zExp < 0 ) {
448
            isTiny =
449
                   ( float_detect_tininess == float_tininess_before_rounding )
450
                || ( zExp < -1 )
451
                || ! increment
452
                || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
453
            shift64ExtraRightJamming(
454
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
455
            zExp = 0;
456
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
457
            if ( roundNearestEven ) {
458
                increment = ( (sbits32) zSig2 < 0 );
459
            }
460
            else {
461
                if ( zSign ) {
462
                    increment = ( roundingMode == float_round_down ) && zSig2;
463
                }
464
                else {
465
                    increment = ( roundingMode == float_round_up ) && zSig2;
466
                }
467
            }
468
        }
469
    }
470
    if ( zSig2 ) float_exception_flags |= float_flag_inexact;
471
    if ( increment ) {
472
        add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
473
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
474
    }
475
    else {
476
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
477
    }
478
    return packFloat64( zSign, zExp, zSig0, zSig1 );
479

480
}
481

482
/*
483
-------------------------------------------------------------------------------
484
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
485
and significand formed by the concatenation of `zSig0' and `zSig1', and
486
returns the proper double-precision floating-point value corresponding
487
to the abstract input.  This routine is just like `roundAndPackFloat64'
488
except that the input significand has fewer bits and does not have to be
489
normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
490
point exponent.
491
-------------------------------------------------------------------------------
492
*/
493
static float64
494
 normalizeRoundAndPackFloat64(
495
     flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
496
{
497
    int8 shiftCount;
498
    bits32 zSig2;
499

500
    if ( zSig0 == 0 ) {
501
        zSig0 = zSig1;
502
        zSig1 = 0;
503
        zExp -= 32;
504
    }
505
    shiftCount = countLeadingZeros32( zSig0 ) - 11;
506
    if ( 0 <= shiftCount ) {
507
        zSig2 = 0;
508
        shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
509
    }
510
    else {
511
        shift64ExtraRightJamming(
512
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
513
    }
514
    zExp -= shiftCount;
515
    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
516

517
}
518

519
/*
520
-------------------------------------------------------------------------------
521
Returns the result of converting the 32-bit two's complement integer `a' to
522
the single-precision floating-point format.  The conversion is performed
523
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
524
-------------------------------------------------------------------------------
525
*/
526
float32 int32_to_float32( int32 a )
527
{
528
    flag zSign;
529

530
    if ( a == 0 ) return 0;
531
    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
532
    zSign = ( a < 0 );
533
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
534

535
}
536

537
/*
538
-------------------------------------------------------------------------------
539
Returns the result of converting the 32-bit two's complement integer `a' to
540
the double-precision floating-point format.  The conversion is performed
541
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
542
-------------------------------------------------------------------------------
543
*/
544
float64 int32_to_float64( int32 a )
545
{
546
    flag zSign;
547
    bits32 absA;
548
    int8 shiftCount;
549
    bits32 zSig0, zSig1;
550

551
    if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
552
    zSign = ( a < 0 );
553
    absA = zSign ? - a : a;
554
    shiftCount = countLeadingZeros32( absA ) - 11;
555
    if ( 0 <= shiftCount ) {
556
        zSig0 = absA<<shiftCount;
557
        zSig1 = 0;
558
    }
559
    else {
560
        shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
561
    }
562
    return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
563

564
}
565

566
#ifndef SOFTFLOAT_FOR_GCC
567
/*
568
-------------------------------------------------------------------------------
569
Returns the result of converting the single-precision floating-point value
570
`a' to the 32-bit two's complement integer format.  The conversion is
571
performed according to the IEC/IEEE Standard for Binary Floating-Point
572
Arithmetic---which means in particular that the conversion is rounded
573
according to the current rounding mode.  If `a' is a NaN, the largest
574
positive integer is returned.  Otherwise, if the conversion overflows, the
575
largest integer with the same sign as `a' is returned.
576
-------------------------------------------------------------------------------
577
*/
578
int32 float32_to_int32( float32 a )
579
{
580
    flag aSign;
581
    int16 aExp, shiftCount;
582
    bits32 aSig, aSigExtra;
583
    int32 z;
584
    int8 roundingMode;
585

586
    aSig = extractFloat32Frac( a );
587
    aExp = extractFloat32Exp( a );
588
    aSign = extractFloat32Sign( a );
589
    shiftCount = aExp - 0x96;
590
    if ( 0 <= shiftCount ) {
591
        if ( 0x9E <= aExp ) {
592
            if ( a != 0xCF000000 ) {
593
                float_raise( float_flag_invalid );
594
                if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
595
                    return 0x7FFFFFFF;
596
                }
597
            }
598
            return (sbits32) 0x80000000;
599
        }
600
        z = ( aSig | 0x00800000 )<<shiftCount;
601
        if ( aSign ) z = - z;
602
    }
603
    else {
604
        if ( aExp < 0x7E ) {
605
            aSigExtra = aExp | aSig;
606
            z = 0;
607
        }
608
        else {
609
            aSig |= 0x00800000;
610
            aSigExtra = aSig<<( shiftCount & 31 );
611
            z = aSig>>( - shiftCount );
612
        }
613
        if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
614
        roundingMode = float_rounding_mode;
615
        if ( roundingMode == float_round_nearest_even ) {
616
            if ( (sbits32) aSigExtra < 0 ) {
617
                ++z;
618
                if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
619
            }
620
            if ( aSign ) z = - z;
621
        }
622
        else {
623
            aSigExtra = ( aSigExtra != 0 );
624
            if ( aSign ) {
625
                z += ( roundingMode == float_round_down ) & aSigExtra;
626
                z = - z;
627
            }
628
            else {
629
                z += ( roundingMode == float_round_up ) & aSigExtra;
630
            }
631
        }
632
    }
633
    return z;
634

635
}
636
#endif
637

638
/*
639
-------------------------------------------------------------------------------
640
Returns the result of converting the single-precision floating-point value
641
`a' to the 32-bit two's complement integer format.  The conversion is
642
performed according to the IEC/IEEE Standard for Binary Floating-Point
643
Arithmetic, except that the conversion is always rounded toward zero.
644
If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
645
the conversion overflows, the largest integer with the same sign as `a' is
646
returned.
647
-------------------------------------------------------------------------------
648
*/
649
int32 float32_to_int32_round_to_zero( float32 a )
650
{
651
    flag aSign;
652
    int16 aExp, shiftCount;
653
    bits32 aSig;
654
    int32 z;
655

656
    aSig = extractFloat32Frac( a );
657
    aExp = extractFloat32Exp( a );
658
    aSign = extractFloat32Sign( a );
659
    shiftCount = aExp - 0x9E;
660
    if ( 0 <= shiftCount ) {
661
        if ( a != 0xCF000000 ) {
662
            float_raise( float_flag_invalid );
663
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
664
        }
665
        return (sbits32) 0x80000000;
666
    }
667
    else if ( aExp <= 0x7E ) {
668
        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
669
        return 0;
670
    }
671
    aSig = ( aSig | 0x00800000 )<<8;
672
    z = aSig>>( - shiftCount );
673
    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
674
        float_exception_flags |= float_flag_inexact;
675
    }
676
    if ( aSign ) z = - z;
677
    return z;
678

679
}
680

681
/*
682
-------------------------------------------------------------------------------
683
Returns the result of converting the single-precision floating-point value
684
`a' to the double-precision floating-point format.  The conversion is
685
performed according to the IEC/IEEE Standard for Binary Floating-Point
686
Arithmetic.
687
-------------------------------------------------------------------------------
688
*/
689
float64 float32_to_float64( float32 a )
690
{
691
    flag aSign;
692
    int16 aExp;
693
    bits32 aSig, zSig0, zSig1;
694

695
    aSig = extractFloat32Frac( a );
696
    aExp = extractFloat32Exp( a );
697
    aSign = extractFloat32Sign( a );
698
    if ( aExp == 0xFF ) {
699
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
700
        return packFloat64( aSign, 0x7FF, 0, 0 );
701
    }
702
    if ( aExp == 0 ) {
703
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
704
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
705
        --aExp;
706
    }
707
    shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
708
    return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
709

710
}
711

712
#ifndef SOFTFLOAT_FOR_GCC
713
/*
714
-------------------------------------------------------------------------------
715
Rounds the single-precision floating-point value `a' to an integer,
716
and returns the result as a single-precision floating-point value.  The
717
operation is performed according to the IEC/IEEE Standard for Binary
718
Floating-Point Arithmetic.
719
-------------------------------------------------------------------------------
720
*/
721
float32 float32_round_to_int( float32 a )
722
{
723
    flag aSign;
724
    int16 aExp;
725
    bits32 lastBitMask, roundBitsMask;
726
    int8 roundingMode;
727
    float32 z;
728

729
    aExp = extractFloat32Exp( a );
730
    if ( 0x96 <= aExp ) {
731
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
732
            return propagateFloat32NaN( a, a );
733
        }
734
        return a;
735
    }
736
    if ( aExp <= 0x7E ) {
737
        if ( (bits32) ( a<<1 ) == 0 ) return a;
738
        float_exception_flags |= float_flag_inexact;
739
        aSign = extractFloat32Sign( a );
740
        switch ( float_rounding_mode ) {
741
         case float_round_nearest_even:
742
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
743
                return packFloat32( aSign, 0x7F, 0 );
744
            }
745
            break;
746
	 case float_round_to_zero:
747
	    break;
748
         case float_round_down:
749
            return aSign ? 0xBF800000 : 0;
750
         case float_round_up:
751
            return aSign ? 0x80000000 : 0x3F800000;
752
        }
753
        return packFloat32( aSign, 0, 0 );
754
    }
755
    lastBitMask = 1;
756
    lastBitMask <<= 0x96 - aExp;
757
    roundBitsMask = lastBitMask - 1;
758
    z = a;
759
    roundingMode = float_rounding_mode;
760
    if ( roundingMode == float_round_nearest_even ) {
761
        z += lastBitMask>>1;
762
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
763
    }
764
    else if ( roundingMode != float_round_to_zero ) {
765
        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
766
            z += roundBitsMask;
767
        }
768
    }
769
    z &= ~ roundBitsMask;
770
    if ( z != a ) float_exception_flags |= float_flag_inexact;
771
    return z;
772

773
}
774
#endif
775

776
/*
777
-------------------------------------------------------------------------------
778
Returns the result of adding the absolute values of the single-precision
779
floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
780
before being returned.  `zSign' is ignored if the result is a NaN.
781
The addition is performed according to the IEC/IEEE Standard for Binary
782
Floating-Point Arithmetic.
783
-------------------------------------------------------------------------------
784
*/
785
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
786
{
787
    int16 aExp, bExp, zExp;
788
    bits32 aSig, bSig, zSig;
789
    int16 expDiff;
790

791
    aSig = extractFloat32Frac( a );
792
    aExp = extractFloat32Exp( a );
793
    bSig = extractFloat32Frac( b );
794
    bExp = extractFloat32Exp( b );
795
    expDiff = aExp - bExp;
796
    aSig <<= 6;
797
    bSig <<= 6;
798
    if ( 0 < expDiff ) {
799
        if ( aExp == 0xFF ) {
800
            if ( aSig ) return propagateFloat32NaN( a, b );
801
            return a;
802
        }
803
        if ( bExp == 0 ) {
804
            --expDiff;
805
        }
806
        else {
807
            bSig |= 0x20000000;
808
        }
809
        shift32RightJamming( bSig, expDiff, &bSig );
810
        zExp = aExp;
811
    }
812
    else if ( expDiff < 0 ) {
813
        if ( bExp == 0xFF ) {
814
            if ( bSig ) return propagateFloat32NaN( a, b );
815
            return packFloat32( zSign, 0xFF, 0 );
816
        }
817
        if ( aExp == 0 ) {
818
            ++expDiff;
819
        }
820
        else {
821
            aSig |= 0x20000000;
822
        }
823
        shift32RightJamming( aSig, - expDiff, &aSig );
824
        zExp = bExp;
825
    }
826
    else {
827
        if ( aExp == 0xFF ) {
828
            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
829
            return a;
830
        }
831
        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
832
        zSig = 0x40000000 + aSig + bSig;
833
        zExp = aExp;
834
        goto roundAndPack;
835
    }
836
    aSig |= 0x20000000;
837
    zSig = ( aSig + bSig )<<1;
838
    --zExp;
839
    if ( (sbits32) zSig < 0 ) {
840
        zSig = aSig + bSig;
841
        ++zExp;
842
    }
843
 roundAndPack:
844
    return roundAndPackFloat32( zSign, zExp, zSig );
845

846
}
847

848
/*
849
-------------------------------------------------------------------------------
850
Returns the result of subtracting the absolute values of the single-
851
precision floating-point values `a' and `b'.  If `zSign' is 1, the
852
difference is negated before being returned.  `zSign' is ignored if the
853
result is a NaN.  The subtraction is performed according to the IEC/IEEE
854
Standard for Binary Floating-Point Arithmetic.
855
-------------------------------------------------------------------------------
856
*/
857
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
858
{
859
    int16 aExp, bExp, zExp;
860
    bits32 aSig, bSig, zSig;
861
    int16 expDiff;
862

863
    aSig = extractFloat32Frac( a );
864
    aExp = extractFloat32Exp( a );
865
    bSig = extractFloat32Frac( b );
866
    bExp = extractFloat32Exp( b );
867
    expDiff = aExp - bExp;
868
    aSig <<= 7;
869
    bSig <<= 7;
870
    if ( 0 < expDiff ) goto aExpBigger;
871
    if ( expDiff < 0 ) goto bExpBigger;
872
    if ( aExp == 0xFF ) {
873
        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
874
        float_raise( float_flag_invalid );
875
        return float32_default_nan;
876
    }
877
    if ( aExp == 0 ) {
878
        aExp = 1;
879
        bExp = 1;
880
    }
881
    if ( bSig < aSig ) goto aBigger;
882
    if ( aSig < bSig ) goto bBigger;
883
    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
884
 bExpBigger:
885
    if ( bExp == 0xFF ) {
886
        if ( bSig ) return propagateFloat32NaN( a, b );
887
        return packFloat32( zSign ^ 1, 0xFF, 0 );
888
    }
889
    if ( aExp == 0 ) {
890
        ++expDiff;
891
    }
892
    else {
893
        aSig |= 0x40000000;
894
    }
895
    shift32RightJamming( aSig, - expDiff, &aSig );
896
    bSig |= 0x40000000;
897
 bBigger:
898
    zSig = bSig - aSig;
899
    zExp = bExp;
900
    zSign ^= 1;
901
    goto normalizeRoundAndPack;
902
 aExpBigger:
903
    if ( aExp == 0xFF ) {
904
        if ( aSig ) return propagateFloat32NaN( a, b );
905
        return a;
906
    }
907
    if ( bExp == 0 ) {
908
        --expDiff;
909
    }
910
    else {
911
        bSig |= 0x40000000;
912
    }
913
    shift32RightJamming( bSig, expDiff, &bSig );
914
    aSig |= 0x40000000;
915
 aBigger:
916
    zSig = aSig - bSig;
917
    zExp = aExp;
918
 normalizeRoundAndPack:
919
    --zExp;
920
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
921

922
}
923

924
/*
925
-------------------------------------------------------------------------------
926
Returns the result of adding the single-precision floating-point values `a'
927
and `b'.  The operation is performed according to the IEC/IEEE Standard for
928
Binary Floating-Point Arithmetic.
929
-------------------------------------------------------------------------------
930
*/
931
float32 float32_add( float32 a, float32 b )
932
{
933
    flag aSign, bSign;
934

935
    aSign = extractFloat32Sign( a );
936
    bSign = extractFloat32Sign( b );
937
    if ( aSign == bSign ) {
938
        return addFloat32Sigs( a, b, aSign );
939
    }
940
    else {
941
        return subFloat32Sigs( a, b, aSign );
942
    }
943

944
}
945

946
/*
947
-------------------------------------------------------------------------------
948
Returns the result of subtracting the single-precision floating-point values
949
`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
950
for Binary Floating-Point Arithmetic.
951
-------------------------------------------------------------------------------
952
*/
953
float32 float32_sub( float32 a, float32 b )
954
{
955
    flag aSign, bSign;
956

957
    aSign = extractFloat32Sign( a );
958
    bSign = extractFloat32Sign( b );
959
    if ( aSign == bSign ) {
960
        return subFloat32Sigs( a, b, aSign );
961
    }
962
    else {
963
        return addFloat32Sigs( a, b, aSign );
964
    }
965

966
}
967

968
/*
969
-------------------------------------------------------------------------------
970
Returns the result of multiplying the single-precision floating-point values
971
`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
972
for Binary Floating-Point Arithmetic.
973
-------------------------------------------------------------------------------
974
*/
975
float32 float32_mul( float32 a, float32 b )
976
{
977
    flag aSign, bSign, zSign;
978
    int16 aExp, bExp, zExp;
979
    bits32 aSig, bSig, zSig0, zSig1;
980

981
    aSig = extractFloat32Frac( a );
982
    aExp = extractFloat32Exp( a );
983
    aSign = extractFloat32Sign( a );
984
    bSig = extractFloat32Frac( b );
985
    bExp = extractFloat32Exp( b );
986
    bSign = extractFloat32Sign( b );
987
    zSign = aSign ^ bSign;
988
    if ( aExp == 0xFF ) {
989
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
990
            return propagateFloat32NaN( a, b );
991
        }
992
        if ( ( bExp | bSig ) == 0 ) {
993
            float_raise( float_flag_invalid );
994
            return float32_default_nan;
995
        }
996
        return packFloat32( zSign, 0xFF, 0 );
997
    }
998
    if ( bExp == 0xFF ) {
999
        if ( bSig ) return propagateFloat32NaN( a, b );
1000
        if ( ( aExp | aSig ) == 0 ) {
1001
            float_raise( float_flag_invalid );
1002
            return float32_default_nan;
1003
        }
1004
        return packFloat32( zSign, 0xFF, 0 );
1005
    }
1006
    if ( aExp == 0 ) {
1007
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1008
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1009
    }
1010
    if ( bExp == 0 ) {
1011
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1012
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1013
    }
1014
    zExp = aExp + bExp - 0x7F;
1015
    aSig = ( aSig | 0x00800000 )<<7;
1016
    bSig = ( bSig | 0x00800000 )<<8;
1017
    mul32To64( aSig, bSig, &zSig0, &zSig1 );
1018
    zSig0 |= ( zSig1 != 0 );
1019
    if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1020
        zSig0 <<= 1;
1021
        --zExp;
1022
    }
1023
    return roundAndPackFloat32( zSign, zExp, zSig0 );
1024

1025
}
1026

1027
/*
1028
-------------------------------------------------------------------------------
1029
Returns the result of dividing the single-precision floating-point value `a'
1030
by the corresponding value `b'.  The operation is performed according to the
1031
IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1032
-------------------------------------------------------------------------------
1033
*/
1034
float32 float32_div( float32 a, float32 b )
1035
{
1036
    flag aSign, bSign, zSign;
1037
    int16 aExp, bExp, zExp;
1038
    bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1039

1040
    aSig = extractFloat32Frac( a );
1041
    aExp = extractFloat32Exp( a );
1042
    aSign = extractFloat32Sign( a );
1043
    bSig = extractFloat32Frac( b );
1044
    bExp = extractFloat32Exp( b );
1045
    bSign = extractFloat32Sign( b );
1046
    zSign = aSign ^ bSign;
1047
    if ( aExp == 0xFF ) {
1048
        if ( aSig ) return propagateFloat32NaN( a, b );
1049
        if ( bExp == 0xFF ) {
1050
            if ( bSig ) return propagateFloat32NaN( a, b );
1051
            float_raise( float_flag_invalid );
1052
            return float32_default_nan;
1053
        }
1054
        return packFloat32( zSign, 0xFF, 0 );
1055
    }
1056
    if ( bExp == 0xFF ) {
1057
        if ( bSig ) return propagateFloat32NaN( a, b );
1058
        return packFloat32( zSign, 0, 0 );
1059
    }
1060
    if ( bExp == 0 ) {
1061
        if ( bSig == 0 ) {
1062
            if ( ( aExp | aSig ) == 0 ) {
1063
                float_raise( float_flag_invalid );
1064
                return float32_default_nan;
1065
            }
1066
            float_raise( float_flag_divbyzero );
1067
            return packFloat32( zSign, 0xFF, 0 );
1068
        }
1069
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1070
    }
1071
    if ( aExp == 0 ) {
1072
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1073
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1074
    }
1075
    zExp = aExp - bExp + 0x7D;
1076
    aSig = ( aSig | 0x00800000 )<<7;
1077
    bSig = ( bSig | 0x00800000 )<<8;
1078
    if ( bSig <= ( aSig + aSig ) ) {
1079
        aSig >>= 1;
1080
        ++zExp;
1081
    }
1082
    zSig = estimateDiv64To32( aSig, 0, bSig );
1083
    if ( ( zSig & 0x3F ) <= 2 ) {
1084
        mul32To64( bSig, zSig, &term0, &term1 );
1085
        sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1086
        while ( (sbits32) rem0 < 0 ) {
1087
            --zSig;
1088
            add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1089
        }
1090
        zSig |= ( rem1 != 0 );
1091
    }
1092
    return roundAndPackFloat32( zSign, zExp, zSig );
1093

1094
}
1095

1096
#ifndef SOFTFLOAT_FOR_GCC
1097
/*
1098
-------------------------------------------------------------------------------
1099
Returns the remainder of the single-precision floating-point value `a'
1100
with respect to the corresponding value `b'.  The operation is performed
1101
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1102
-------------------------------------------------------------------------------
1103
*/
1104
float32 float32_rem( float32 a, float32 b )
1105
{
1106
    flag aSign, bSign, zSign;
1107
    int16 aExp, bExp, expDiff;
1108
    bits32 aSig, bSig, q, allZero, alternateASig;
1109
    sbits32 sigMean;
1110

1111
    aSig = extractFloat32Frac( a );
1112
    aExp = extractFloat32Exp( a );
1113
    aSign = extractFloat32Sign( a );
1114
    bSig = extractFloat32Frac( b );
1115
    bExp = extractFloat32Exp( b );
1116
    bSign = extractFloat32Sign( b );
1117
    if ( aExp == 0xFF ) {
1118
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1119
            return propagateFloat32NaN( a, b );
1120
        }
1121
        float_raise( float_flag_invalid );
1122
        return float32_default_nan;
1123
    }
1124
    if ( bExp == 0xFF ) {
1125
        if ( bSig ) return propagateFloat32NaN( a, b );
1126
        return a;
1127
    }
1128
    if ( bExp == 0 ) {
1129
        if ( bSig == 0 ) {
1130
            float_raise( float_flag_invalid );
1131
            return float32_default_nan;
1132
        }
1133
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1134
    }
1135
    if ( aExp == 0 ) {
1136
        if ( aSig == 0 ) return a;
1137
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1138
    }
1139
    expDiff = aExp - bExp;
1140
    aSig = ( aSig | 0x00800000 )<<8;
1141
    bSig = ( bSig | 0x00800000 )<<8;
1142
    if ( expDiff < 0 ) {
1143
        if ( expDiff < -1 ) return a;
1144
        aSig >>= 1;
1145
    }
1146
    q = ( bSig <= aSig );
1147
    if ( q ) aSig -= bSig;
1148
    expDiff -= 32;
1149
    while ( 0 < expDiff ) {
1150
        q = estimateDiv64To32( aSig, 0, bSig );
1151
        q = ( 2 < q ) ? q - 2 : 0;
1152
        aSig = - ( ( bSig>>2 ) * q );
1153
        expDiff -= 30;
1154
    }
1155
    expDiff += 32;
1156
    if ( 0 < expDiff ) {
1157
        q = estimateDiv64To32( aSig, 0, bSig );
1158
        q = ( 2 < q ) ? q - 2 : 0;
1159
        q >>= 32 - expDiff;
1160
        bSig >>= 2;
1161
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1162
    }
1163
    else {
1164
        aSig >>= 2;
1165
        bSig >>= 2;
1166
    }
1167
    do {
1168
        alternateASig = aSig;
1169
        ++q;
1170
        aSig -= bSig;
1171
    } while ( 0 <= (sbits32) aSig );
1172
    sigMean = aSig + alternateASig;
1173
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1174
        aSig = alternateASig;
1175
    }
1176
    zSign = ( (sbits32) aSig < 0 );
1177
    if ( zSign ) aSig = - aSig;
1178
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1179

1180
}
1181
#endif
1182

1183
#ifndef SOFTFLOAT_FOR_GCC
1184
/*
1185
-------------------------------------------------------------------------------
1186
Returns the square root of the single-precision floating-point value `a'.
1187
The operation is performed according to the IEC/IEEE Standard for Binary
1188
Floating-Point Arithmetic.
1189
-------------------------------------------------------------------------------
1190
*/
1191
float32 float32_sqrt( float32 a )
1192
{
1193
    flag aSign;
1194
    int16 aExp, zExp;
1195
    bits32 aSig, zSig, rem0, rem1, term0, term1;
1196

1197
    aSig = extractFloat32Frac( a );
1198
    aExp = extractFloat32Exp( a );
1199
    aSign = extractFloat32Sign( a );
1200
    if ( aExp == 0xFF ) {
1201
        if ( aSig ) return propagateFloat32NaN( a, 0 );
1202
        if ( ! aSign ) return a;
1203
        float_raise( float_flag_invalid );
1204
        return float32_default_nan;
1205
    }
1206
    if ( aSign ) {
1207
        if ( ( aExp | aSig ) == 0 ) return a;
1208
        float_raise( float_flag_invalid );
1209
        return float32_default_nan;
1210
    }
1211
    if ( aExp == 0 ) {
1212
        if ( aSig == 0 ) return 0;
1213
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1214
    }
1215
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1216
    aSig = ( aSig | 0x00800000 )<<8;
1217
    zSig = estimateSqrt32( aExp, aSig ) + 2;
1218
    if ( ( zSig & 0x7F ) <= 5 ) {
1219
        if ( zSig < 2 ) {
1220
            zSig = 0x7FFFFFFF;
1221
            goto roundAndPack;
1222
        }
1223
        else {
1224
            aSig >>= aExp & 1;
1225
            mul32To64( zSig, zSig, &term0, &term1 );
1226
            sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1227
            while ( (sbits32) rem0 < 0 ) {
1228
                --zSig;
1229
                shortShift64Left( 0, zSig, 1, &term0, &term1 );
1230
                term1 |= 1;
1231
                add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1232
            }
1233
            zSig |= ( ( rem0 | rem1 ) != 0 );
1234
        }
1235
    }
1236
    shift32RightJamming( zSig, 1, &zSig );
1237
 roundAndPack:
1238
    return roundAndPackFloat32( 0, zExp, zSig );
1239

1240
}
1241
#endif
1242

1243
/*
1244
-------------------------------------------------------------------------------
1245
Returns 1 if the single-precision floating-point value `a' is equal to
1246
the corresponding value `b', and 0 otherwise.  The comparison is performed
1247
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1248
-------------------------------------------------------------------------------
1249
*/
1250
flag float32_eq( float32 a, float32 b )
1251
{
1252

1253
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1254
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1255
       ) {
1256
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1257
            float_raise( float_flag_invalid );
1258
        }
1259
        return 0;
1260
    }
1261
    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1262

1263
}
1264

1265
/*
1266
-------------------------------------------------------------------------------
1267
Returns 1 if the single-precision floating-point value `a' is less than
1268
or equal to the corresponding value `b', and 0 otherwise.  The comparison
1269
is performed according to the IEC/IEEE Standard for Binary Floating-Point
1270
Arithmetic.
1271
-------------------------------------------------------------------------------
1272
*/
1273
flag float32_le( float32 a, float32 b )
1274
{
1275
    flag aSign, bSign;
1276

1277
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1278
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1279
       ) {
1280
        float_raise( float_flag_invalid );
1281
        return 0;
1282
    }
1283
    aSign = extractFloat32Sign( a );
1284
    bSign = extractFloat32Sign( b );
1285
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1286
    return ( a == b ) || ( aSign ^ ( a < b ) );
1287

1288
}
1289

1290
/*
1291
-------------------------------------------------------------------------------
1292
Returns 1 if the single-precision floating-point value `a' is less than
1293
the corresponding value `b', and 0 otherwise.  The comparison is performed
1294
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1295
-------------------------------------------------------------------------------
1296
*/
1297
flag float32_lt( float32 a, float32 b )
1298
{
1299
    flag aSign, bSign;
1300

1301
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1302
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1303
       ) {
1304
        float_raise( float_flag_invalid );
1305
        return 0;
1306
    }
1307
    aSign = extractFloat32Sign( a );
1308
    bSign = extractFloat32Sign( b );
1309
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1310
    return ( a != b ) && ( aSign ^ ( a < b ) );
1311

1312
}
1313

1314
#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1315
/*
1316
-------------------------------------------------------------------------------
1317
Returns 1 if the single-precision floating-point value `a' is equal to
1318
the corresponding value `b', and 0 otherwise.  The invalid exception is
1319
raised if either operand is a NaN.  Otherwise, the comparison is performed
1320
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1321
-------------------------------------------------------------------------------
1322
*/
1323
flag float32_eq_signaling( float32 a, float32 b )
1324
{
1325

1326
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1327
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1328
       ) {
1329
        float_raise( float_flag_invalid );
1330
        return 0;
1331
    }
1332
    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1333

1334
}
1335

1336
/*
1337
-------------------------------------------------------------------------------
1338
Returns 1 if the single-precision floating-point value `a' is less than or
1339
equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1340
cause an exception.  Otherwise, the comparison is performed according to the
1341
IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1342
-------------------------------------------------------------------------------
1343
*/
1344
flag float32_le_quiet( float32 a, float32 b )
1345
{
1346
    flag aSign, bSign;
1347
    int16 aExp, bExp;
1348

1349
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1350
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1351
       ) {
1352
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1353
            float_raise( float_flag_invalid );
1354
        }
1355
        return 0;
1356
    }
1357
    aSign = extractFloat32Sign( a );
1358
    bSign = extractFloat32Sign( b );
1359
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1360
    return ( a == b ) || ( aSign ^ ( a < b ) );
1361

1362
}
1363

1364
/*
1365
-------------------------------------------------------------------------------
1366
Returns 1 if the single-precision floating-point value `a' is less than
1367
the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1368
exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1369
Standard for Binary Floating-Point Arithmetic.
1370
-------------------------------------------------------------------------------
1371
*/
1372
flag float32_lt_quiet( float32 a, float32 b )
1373
{
1374
    flag aSign, bSign;
1375

1376
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1377
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1378
       ) {
1379
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1380
            float_raise( float_flag_invalid );
1381
        }
1382
        return 0;
1383
    }
1384
    aSign = extractFloat32Sign( a );
1385
    bSign = extractFloat32Sign( b );
1386
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1387
    return ( a != b ) && ( aSign ^ ( a < b ) );
1388

1389
}
1390
#endif /* !SOFTFLOAT_FOR_GCC */
1391

1392
#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1393
/*
1394
-------------------------------------------------------------------------------
1395
Returns the result of converting the double-precision floating-point value
1396
`a' to the 32-bit two's complement integer format.  The conversion is
1397
performed according to the IEC/IEEE Standard for Binary Floating-Point
1398
Arithmetic---which means in particular that the conversion is rounded
1399
according to the current rounding mode.  If `a' is a NaN, the largest
1400
positive integer is returned.  Otherwise, if the conversion overflows, the
1401
largest integer with the same sign as `a' is returned.
1402
-------------------------------------------------------------------------------
1403
*/
1404
int32 float64_to_int32( float64 a )
1405
{
1406
    flag aSign;
1407
    int16 aExp, shiftCount;
1408
    bits32 aSig0, aSig1, absZ, aSigExtra;
1409
    int32 z;
1410
    int8 roundingMode;
1411

1412
    aSig1 = extractFloat64Frac1( a );
1413
    aSig0 = extractFloat64Frac0( a );
1414
    aExp = extractFloat64Exp( a );
1415
    aSign = extractFloat64Sign( a );
1416
    shiftCount = aExp - 0x413;
1417
    if ( 0 <= shiftCount ) {
1418
        if ( 0x41E < aExp ) {
1419
            if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1420
            goto invalid;
1421
        }
1422
        shortShift64Left(
1423
            aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1424
        if ( 0x80000000 < absZ ) goto invalid;
1425
    }
1426
    else {
1427
        aSig1 = ( aSig1 != 0 );
1428
        if ( aExp < 0x3FE ) {
1429
            aSigExtra = aExp | aSig0 | aSig1;
1430
            absZ = 0;
1431
        }
1432
        else {
1433
            aSig0 |= 0x00100000;
1434
            aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1435
            absZ = aSig0>>( - shiftCount );
1436
        }
1437
    }
1438
    roundingMode = float_rounding_mode;
1439
    if ( roundingMode == float_round_nearest_even ) {
1440
        if ( (sbits32) aSigExtra < 0 ) {
1441
            ++absZ;
1442
            if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1443
        }
1444
        z = aSign ? - absZ : absZ;
1445
    }
1446
    else {
1447
        aSigExtra = ( aSigExtra != 0 );
1448
        if ( aSign ) {
1449
            z = - (   absZ
1450
                    + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1451
        }
1452
        else {
1453
            z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1454
        }
1455
    }
1456
    if ( ( aSign ^ ( z < 0 ) ) && z ) {
1457
 invalid:
1458
        float_raise( float_flag_invalid );
1459
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1460
    }
1461
    if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1462
    return z;
1463

1464
}
1465
#endif /* !SOFTFLOAT_FOR_GCC */
1466

1467
/*
1468
-------------------------------------------------------------------------------
1469
Returns the result of converting the double-precision floating-point value
1470
`a' to the 32-bit two's complement integer format.  The conversion is
1471
performed according to the IEC/IEEE Standard for Binary Floating-Point
1472
Arithmetic, except that the conversion is always rounded toward zero.
1473
If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1474
the conversion overflows, the largest integer with the same sign as `a' is
1475
returned.
1476
-------------------------------------------------------------------------------
1477
*/
1478
int32 float64_to_int32_round_to_zero( float64 a )
1479
{
1480
    flag aSign;
1481
    int16 aExp, shiftCount;
1482
    bits32 aSig0, aSig1, absZ, aSigExtra;
1483
    int32 z;
1484

1485
    aSig1 = extractFloat64Frac1( a );
1486
    aSig0 = extractFloat64Frac0( a );
1487
    aExp = extractFloat64Exp( a );
1488
    aSign = extractFloat64Sign( a );
1489
    shiftCount = aExp - 0x413;
1490
    if ( 0 <= shiftCount ) {
1491
        if ( 0x41E < aExp ) {
1492
            if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1493
            goto invalid;
1494
        }
1495
        shortShift64Left(
1496
            aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1497
    }
1498
    else {
1499
        if ( aExp < 0x3FF ) {
1500
            if ( aExp | aSig0 | aSig1 ) {
1501
                float_exception_flags |= float_flag_inexact;
1502
            }
1503
            return 0;
1504
        }
1505
        aSig0 |= 0x00100000;
1506
        aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1507
        absZ = aSig0>>( - shiftCount );
1508
    }
1509
    z = aSign ? - absZ : absZ;
1510
    if ( ( aSign ^ ( z < 0 ) ) && z ) {
1511
 invalid:
1512
        float_raise( float_flag_invalid );
1513
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1514
    }
1515
    if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1516
    return z;
1517

1518
}
1519

1520
/*
1521
-------------------------------------------------------------------------------
1522
Returns the result of converting the double-precision floating-point value
1523
`a' to the single-precision floating-point format.  The conversion is
1524
performed according to the IEC/IEEE Standard for Binary Floating-Point
1525
Arithmetic.
1526
-------------------------------------------------------------------------------
1527
*/
1528
float32 float64_to_float32( float64 a )
1529
{
1530
    flag aSign;
1531
    int16 aExp;
1532
    bits32 aSig0, aSig1, zSig;
1533
    bits32 allZero;
1534

1535
    aSig1 = extractFloat64Frac1( a );
1536
    aSig0 = extractFloat64Frac0( a );
1537
    aExp = extractFloat64Exp( a );
1538
    aSign = extractFloat64Sign( a );
1539
    if ( aExp == 0x7FF ) {
1540
        if ( aSig0 | aSig1 ) {
1541
            return commonNaNToFloat32( float64ToCommonNaN( a ) );
1542
        }
1543
        return packFloat32( aSign, 0xFF, 0 );
1544
    }
1545
    shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1546
    if ( aExp ) zSig |= 0x40000000;
1547
    return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1548

1549
}
1550

1551
#ifndef SOFTFLOAT_FOR_GCC
1552
/*
1553
-------------------------------------------------------------------------------
1554
Rounds the double-precision floating-point value `a' to an integer,
1555
and returns the result as a double-precision floating-point value.  The
1556
operation is performed according to the IEC/IEEE Standard for Binary
1557
Floating-Point Arithmetic.
1558
-------------------------------------------------------------------------------
1559
*/
1560
float64 float64_round_to_int( float64 a )
1561
{
1562
    flag aSign;
1563
    int16 aExp;
1564
    bits32 lastBitMask, roundBitsMask;
1565
    int8 roundingMode;
1566
    float64 z;
1567

1568
    aExp = extractFloat64Exp( a );
1569
    if ( 0x413 <= aExp ) {
1570
        if ( 0x433 <= aExp ) {
1571
            if (    ( aExp == 0x7FF )
1572
                 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1573
                return propagateFloat64NaN( a, a );
1574
            }
1575
            return a;
1576
        }
1577
        lastBitMask = 1;
1578
        lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1579
        roundBitsMask = lastBitMask - 1;
1580
        z = a;
1581
        roundingMode = float_rounding_mode;
1582
        if ( roundingMode == float_round_nearest_even ) {
1583
            if ( lastBitMask ) {
1584
                add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1585
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1586
            }
1587
            else {
1588
                if ( (sbits32) z.low < 0 ) {
1589
                    ++z.high;
1590
                    if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1591
                }
1592
            }
1593
        }
1594
        else if ( roundingMode != float_round_to_zero ) {
1595
            if (   extractFloat64Sign( z )
1596
                 ^ ( roundingMode == float_round_up ) ) {
1597
                add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1598
            }
1599
        }
1600
        z.low &= ~ roundBitsMask;
1601
    }
1602
    else {
1603
        if ( aExp <= 0x3FE ) {
1604
            if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1605
            float_exception_flags |= float_flag_inexact;
1606
            aSign = extractFloat64Sign( a );
1607
            switch ( float_rounding_mode ) {
1608
             case float_round_nearest_even:
1609
                if (    ( aExp == 0x3FE )
1610
                     && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1611
                   ) {
1612
                    return packFloat64( aSign, 0x3FF, 0, 0 );
1613
                }
1614
                break;
1615
             case float_round_down:
1616
                return
1617
                      aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1618
                    : packFloat64( 0, 0, 0, 0 );
1619
             case float_round_up:
1620
                return
1621
                      aSign ? packFloat64( 1, 0, 0, 0 )
1622
                    : packFloat64( 0, 0x3FF, 0, 0 );
1623
            }
1624
            return packFloat64( aSign, 0, 0, 0 );
1625
        }
1626
        lastBitMask = 1;
1627
        lastBitMask <<= 0x413 - aExp;
1628
        roundBitsMask = lastBitMask - 1;
1629
        z.low = 0;
1630
        z.high = a.high;
1631
        roundingMode = float_rounding_mode;
1632
        if ( roundingMode == float_round_nearest_even ) {
1633
            z.high += lastBitMask>>1;
1634
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1635
                z.high &= ~ lastBitMask;
1636
            }
1637
        }
1638
        else if ( roundingMode != float_round_to_zero ) {
1639
            if (   extractFloat64Sign( z )
1640
                 ^ ( roundingMode == float_round_up ) ) {
1641
                z.high |= ( a.low != 0 );
1642
                z.high += roundBitsMask;
1643
            }
1644
        }
1645
        z.high &= ~ roundBitsMask;
1646
    }
1647
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1648
        float_exception_flags |= float_flag_inexact;
1649
    }
1650
    return z;
1651

1652
}
1653
#endif
1654

1655
/*
1656
-------------------------------------------------------------------------------
1657
Returns the result of adding the absolute values of the double-precision
1658
floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1659
before being returned.  `zSign' is ignored if the result is a NaN.
1660
The addition is performed according to the IEC/IEEE Standard for Binary
1661
Floating-Point Arithmetic.
1662
-------------------------------------------------------------------------------
1663
*/
1664
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1665
{
1666
    int16 aExp, bExp, zExp;
1667
    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1668
    int16 expDiff;
1669

1670
    aSig1 = extractFloat64Frac1( a );
1671
    aSig0 = extractFloat64Frac0( a );
1672
    aExp = extractFloat64Exp( a );
1673
    bSig1 = extractFloat64Frac1( b );
1674
    bSig0 = extractFloat64Frac0( b );
1675
    bExp = extractFloat64Exp( b );
1676
    expDiff = aExp - bExp;
1677
    if ( 0 < expDiff ) {
1678
        if ( aExp == 0x7FF ) {
1679
            if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1680
            return a;
1681
        }
1682
        if ( bExp == 0 ) {
1683
            --expDiff;
1684
        }
1685
        else {
1686
            bSig0 |= 0x00100000;
1687
        }
1688
        shift64ExtraRightJamming(
1689
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1690
        zExp = aExp;
1691
    }
1692
    else if ( expDiff < 0 ) {
1693
        if ( bExp == 0x7FF ) {
1694
            if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1695
            return packFloat64( zSign, 0x7FF, 0, 0 );
1696
        }
1697
        if ( aExp == 0 ) {
1698
            ++expDiff;
1699
        }
1700
        else {
1701
            aSig0 |= 0x00100000;
1702
        }
1703
        shift64ExtraRightJamming(
1704
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1705
        zExp = bExp;
1706
    }
1707
    else {
1708
        if ( aExp == 0x7FF ) {
1709
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1710
                return propagateFloat64NaN( a, b );
1711
            }
1712
            return a;
1713
        }
1714
        add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1715
        if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1716
        zSig2 = 0;
1717
        zSig0 |= 0x00200000;
1718
        zExp = aExp;
1719
        goto shiftRight1;
1720
    }
1721
    aSig0 |= 0x00100000;
1722
    add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1723
    --zExp;
1724
    if ( zSig0 < 0x00200000 ) goto roundAndPack;
1725
    ++zExp;
1726
 shiftRight1:
1727
    shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1728
 roundAndPack:
1729
    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1730

1731
}
1732

1733
/*
1734
-------------------------------------------------------------------------------
1735
Returns the result of subtracting the absolute values of the double-
1736
precision floating-point values `a' and `b'.  If `zSign' is 1, the
1737
difference is negated before being returned.  `zSign' is ignored if the
1738
result is a NaN.  The subtraction is performed according to the IEC/IEEE
1739
Standard for Binary Floating-Point Arithmetic.
1740
-------------------------------------------------------------------------------
1741
*/
1742
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1743
{
1744
    int16 aExp, bExp, zExp;
1745
    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1746
    int16 expDiff;
1747

1748
    aSig1 = extractFloat64Frac1( a );
1749
    aSig0 = extractFloat64Frac0( a );
1750
    aExp = extractFloat64Exp( a );
1751
    bSig1 = extractFloat64Frac1( b );
1752
    bSig0 = extractFloat64Frac0( b );
1753
    bExp = extractFloat64Exp( b );
1754
    expDiff = aExp - bExp;
1755
    shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1756
    shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1757
    if ( 0 < expDiff ) goto aExpBigger;
1758
    if ( expDiff < 0 ) goto bExpBigger;
1759
    if ( aExp == 0x7FF ) {
1760
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1761
            return propagateFloat64NaN( a, b );
1762
        }
1763
        float_raise( float_flag_invalid );
1764
        return float64_default_nan;
1765
    }
1766
    if ( aExp == 0 ) {
1767
        aExp = 1;
1768
        bExp = 1;
1769
    }
1770
    if ( bSig0 < aSig0 ) goto aBigger;
1771
    if ( aSig0 < bSig0 ) goto bBigger;
1772
    if ( bSig1 < aSig1 ) goto aBigger;
1773
    if ( aSig1 < bSig1 ) goto bBigger;
1774
    return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1775
 bExpBigger:
1776
    if ( bExp == 0x7FF ) {
1777
        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1778
        return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1779
    }
1780
    if ( aExp == 0 ) {
1781
        ++expDiff;
1782
    }
1783
    else {
1784
        aSig0 |= 0x40000000;
1785
    }
1786
    shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1787
    bSig0 |= 0x40000000;
1788
 bBigger:
1789
    sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1790
    zExp = bExp;
1791
    zSign ^= 1;
1792
    goto normalizeRoundAndPack;
1793
 aExpBigger:
1794
    if ( aExp == 0x7FF ) {
1795
        if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1796
        return a;
1797
    }
1798
    if ( bExp == 0 ) {
1799
        --expDiff;
1800
    }
1801
    else {
1802
        bSig0 |= 0x40000000;
1803
    }
1804
    shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1805
    aSig0 |= 0x40000000;
1806
 aBigger:
1807
    sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1808
    zExp = aExp;
1809
 normalizeRoundAndPack:
1810
    --zExp;
1811
    return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1812

1813
}
1814

1815
/*
1816
-------------------------------------------------------------------------------
1817
Returns the result of adding the double-precision floating-point values `a'
1818
and `b'.  The operation is performed according to the IEC/IEEE Standard for
1819
Binary Floating-Point Arithmetic.
1820
-------------------------------------------------------------------------------
1821
*/
1822
float64 float64_add( float64 a, float64 b )
1823
{
1824
    flag aSign, bSign;
1825

1826
    aSign = extractFloat64Sign( a );
1827
    bSign = extractFloat64Sign( b );
1828
    if ( aSign == bSign ) {
1829
        return addFloat64Sigs( a, b, aSign );
1830
    }
1831
    else {
1832
        return subFloat64Sigs( a, b, aSign );
1833
    }
1834

1835
}
1836

1837
/*
1838
-------------------------------------------------------------------------------
1839
Returns the result of subtracting the double-precision floating-point values
1840
`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1841
for Binary Floating-Point Arithmetic.
1842
-------------------------------------------------------------------------------
1843
*/
1844
float64 float64_sub( float64 a, float64 b )
1845
{
1846
    flag aSign, bSign;
1847

1848
    aSign = extractFloat64Sign( a );
1849
    bSign = extractFloat64Sign( b );
1850
    if ( aSign == bSign ) {
1851
        return subFloat64Sigs( a, b, aSign );
1852
    }
1853
    else {
1854
        return addFloat64Sigs( a, b, aSign );
1855
    }
1856

1857
}
1858

1859
/*
1860
-------------------------------------------------------------------------------
1861
Returns the result of multiplying the double-precision floating-point values
1862
`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1863
for Binary Floating-Point Arithmetic.
1864
-------------------------------------------------------------------------------
1865
*/
1866
float64 float64_mul( float64 a, float64 b )
1867
{
1868
    flag aSign, bSign, zSign;
1869
    int16 aExp, bExp, zExp;
1870
    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1871

1872
    aSig1 = extractFloat64Frac1( a );
1873
    aSig0 = extractFloat64Frac0( a );
1874
    aExp = extractFloat64Exp( a );
1875
    aSign = extractFloat64Sign( a );
1876
    bSig1 = extractFloat64Frac1( b );
1877
    bSig0 = extractFloat64Frac0( b );
1878
    bExp = extractFloat64Exp( b );
1879
    bSign = extractFloat64Sign( b );
1880
    zSign = aSign ^ bSign;
1881
    if ( aExp == 0x7FF ) {
1882
        if (    ( aSig0 | aSig1 )
1883
             || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1884
            return propagateFloat64NaN( a, b );
1885
        }
1886
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1887
        return packFloat64( zSign, 0x7FF, 0, 0 );
1888
    }
1889
    if ( bExp == 0x7FF ) {
1890
        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1891
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1892
 invalid:
1893
            float_raise( float_flag_invalid );
1894
            return float64_default_nan;
1895
        }
1896
        return packFloat64( zSign, 0x7FF, 0, 0 );
1897
    }
1898
    if ( aExp == 0 ) {
1899
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1900
        normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1901
    }
1902
    if ( bExp == 0 ) {
1903
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1904
        normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1905
    }
1906
    zExp = aExp + bExp - 0x400;
1907
    aSig0 |= 0x00100000;
1908
    shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1909
    mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1910
    add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1911
    zSig2 |= ( zSig3 != 0 );
1912
    if ( 0x00200000 <= zSig0 ) {
1913
        shift64ExtraRightJamming(
1914
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1915
        ++zExp;
1916
    }
1917
    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1918

1919
}
1920

1921
/*
1922
-------------------------------------------------------------------------------
1923
Returns the result of dividing the double-precision floating-point value `a'
1924
by the corresponding value `b'.  The operation is performed according to the
1925
IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1926
-------------------------------------------------------------------------------
1927
*/
1928
float64 float64_div( float64 a, float64 b )
1929
{
1930
    flag aSign, bSign, zSign;
1931
    int16 aExp, bExp, zExp;
1932
    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1933
    bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1934

1935
    aSig1 = extractFloat64Frac1( a );
1936
    aSig0 = extractFloat64Frac0( a );
1937
    aExp = extractFloat64Exp( a );
1938
    aSign = extractFloat64Sign( a );
1939
    bSig1 = extractFloat64Frac1( b );
1940
    bSig0 = extractFloat64Frac0( b );
1941
    bExp = extractFloat64Exp( b );
1942
    bSign = extractFloat64Sign( b );
1943
    zSign = aSign ^ bSign;
1944
    if ( aExp == 0x7FF ) {
1945
        if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1946
        if ( bExp == 0x7FF ) {
1947
            if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1948
            goto invalid;
1949
        }
1950
        return packFloat64( zSign, 0x7FF, 0, 0 );
1951
    }
1952
    if ( bExp == 0x7FF ) {
1953
        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1954
        return packFloat64( zSign, 0, 0, 0 );
1955
    }
1956
    if ( bExp == 0 ) {
1957
        if ( ( bSig0 | bSig1 ) == 0 ) {
1958
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1959
 invalid:
1960
                float_raise( float_flag_invalid );
1961
                return float64_default_nan;
1962
            }
1963
            float_raise( float_flag_divbyzero );
1964
            return packFloat64( zSign, 0x7FF, 0, 0 );
1965
        }
1966
        normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1967
    }
1968
    if ( aExp == 0 ) {
1969
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1970
        normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1971
    }
1972
    zExp = aExp - bExp + 0x3FD;
1973
    shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1974
    shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1975
    if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1976
        shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1977
        ++zExp;
1978
    }
1979
    zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1980
    mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1981
    sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1982
    while ( (sbits32) rem0 < 0 ) {
1983
        --zSig0;
1984
        add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1985
    }
1986
    zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1987
    if ( ( zSig1 & 0x3FF ) <= 4 ) {
1988
        mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1989
        sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
1990
        while ( (sbits32) rem1 < 0 ) {
1991
            --zSig1;
1992
            add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
1993
        }
1994
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
1995
    }
1996
    shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
1997
    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1998

1999
}
2000

2001
#ifndef SOFTFLOAT_FOR_GCC
2002
/*
2003
-------------------------------------------------------------------------------
2004
Returns the remainder of the double-precision floating-point value `a'
2005
with respect to the corresponding value `b'.  The operation is performed
2006
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2007
-------------------------------------------------------------------------------
2008
*/
2009
float64 float64_rem( float64 a, float64 b )
2010
{
2011
    flag aSign, bSign, zSign;
2012
    int16 aExp, bExp, expDiff;
2013
    bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2014
    bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2015
    sbits32 sigMean0;
2016
    float64 z;
2017

2018
    aSig1 = extractFloat64Frac1( a );
2019
    aSig0 = extractFloat64Frac0( a );
2020
    aExp = extractFloat64Exp( a );
2021
    aSign = extractFloat64Sign( a );
2022
    bSig1 = extractFloat64Frac1( b );
2023
    bSig0 = extractFloat64Frac0( b );
2024
    bExp = extractFloat64Exp( b );
2025
    bSign = extractFloat64Sign( b );
2026
    if ( aExp == 0x7FF ) {
2027
        if (    ( aSig0 | aSig1 )
2028
             || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2029
            return propagateFloat64NaN( a, b );
2030
        }
2031
        goto invalid;
2032
    }
2033
    if ( bExp == 0x7FF ) {
2034
        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2035
        return a;
2036
    }
2037
    if ( bExp == 0 ) {
2038
        if ( ( bSig0 | bSig1 ) == 0 ) {
2039
 invalid:
2040
            float_raise( float_flag_invalid );
2041
            return float64_default_nan;
2042
        }
2043
        normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2044
    }
2045
    if ( aExp == 0 ) {
2046
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
2047
        normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2048
    }
2049
    expDiff = aExp - bExp;
2050
    if ( expDiff < -1 ) return a;
2051
    shortShift64Left(
2052
        aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2053
    shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2054
    q = le64( bSig0, bSig1, aSig0, aSig1 );
2055
    if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2056
    expDiff -= 32;
2057
    while ( 0 < expDiff ) {
2058
        q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2059
        q = ( 4 < q ) ? q - 4 : 0;
2060
        mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2061
        shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2062
        shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2063
        sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2064
        expDiff -= 29;
2065
    }
2066
    if ( -32 < expDiff ) {
2067
        q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2068
        q = ( 4 < q ) ? q - 4 : 0;
2069
        q >>= - expDiff;
2070
        shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2071
        expDiff += 24;
2072
        if ( expDiff < 0 ) {
2073
            shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2074
        }
2075
        else {
2076
            shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2077
        }
2078
        mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2079
        sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2080
    }
2081
    else {
2082
        shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2083
        shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2084
    }
2085
    do {
2086
        alternateASig0 = aSig0;
2087
        alternateASig1 = aSig1;
2088
        ++q;
2089
        sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2090
    } while ( 0 <= (sbits32) aSig0 );
2091
    add64(
2092
        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2093
    if (    ( sigMean0 < 0 )
2094
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2095
        aSig0 = alternateASig0;
2096
        aSig1 = alternateASig1;
2097
    }
2098
    zSign = ( (sbits32) aSig0 < 0 );
2099
    if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2100
    return
2101
        normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2102

2103
}
2104
#endif
2105

2106
#ifndef SOFTFLOAT_FOR_GCC
2107
/*
2108
-------------------------------------------------------------------------------
2109
Returns the square root of the double-precision floating-point value `a'.
2110
The operation is performed according to the IEC/IEEE Standard for Binary
2111
Floating-Point Arithmetic.
2112
-------------------------------------------------------------------------------
2113
*/
2114
float64 float64_sqrt( float64 a )
2115
{
2116
    flag aSign;
2117
    int16 aExp, zExp;
2118
    bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2119
    bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2120
    float64 z;
2121

2122
    aSig1 = extractFloat64Frac1( a );
2123
    aSig0 = extractFloat64Frac0( a );
2124
    aExp = extractFloat64Exp( a );
2125
    aSign = extractFloat64Sign( a );
2126
    if ( aExp == 0x7FF ) {
2127
        if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2128
        if ( ! aSign ) return a;
2129
        goto invalid;
2130
    }
2131
    if ( aSign ) {
2132
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2133
 invalid:
2134
        float_raise( float_flag_invalid );
2135
        return float64_default_nan;
2136
    }
2137
    if ( aExp == 0 ) {
2138
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2139
        normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2140
    }
2141
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2142
    aSig0 |= 0x00100000;
2143
    shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2144
    zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2145
    if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2146
    doubleZSig0 = zSig0 + zSig0;
2147
    shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2148
    mul32To64( zSig0, zSig0, &term0, &term1 );
2149
    sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2150
    while ( (sbits32) rem0 < 0 ) {
2151
        --zSig0;
2152
        doubleZSig0 -= 2;
2153
        add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2154
    }
2155
    zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2156
    if ( ( zSig1 & 0x1FF ) <= 5 ) {
2157
        if ( zSig1 == 0 ) zSig1 = 1;
2158
        mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2159
        sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2160
        mul32To64( zSig1, zSig1, &term2, &term3 );
2161
        sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2162
        while ( (sbits32) rem1 < 0 ) {
2163
            --zSig1;
2164
            shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2165
            term3 |= 1;
2166
            term2 |= doubleZSig0;
2167
            add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2168
        }
2169
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2170
    }
2171
    shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2172
    return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2173

2174
}
2175
#endif
2176

2177
/*
2178
-------------------------------------------------------------------------------
2179
Returns 1 if the double-precision floating-point value `a' is equal to
2180
the corresponding value `b', and 0 otherwise.  The comparison is performed
2181
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2182
-------------------------------------------------------------------------------
2183
*/
2184
flag float64_eq( float64 a, float64 b )
2185
{
2186

2187
    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2188
              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2189
         || (    ( extractFloat64Exp( b ) == 0x7FF )
2190
              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2191
       ) {
2192
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2193
            float_raise( float_flag_invalid );
2194
        }
2195
        return 0;
2196
    }
2197
    return ( a == b ) ||
2198
	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2199

2200
}
2201

2202
/*
2203
-------------------------------------------------------------------------------
2204
Returns 1 if the double-precision floating-point value `a' is less than
2205
or equal to the corresponding value `b', and 0 otherwise.  The comparison
2206
is performed according to the IEC/IEEE Standard for Binary Floating-Point
2207
Arithmetic.
2208
-------------------------------------------------------------------------------
2209
*/
2210
flag float64_le( float64 a, float64 b )
2211
{
2212
    flag aSign, bSign;
2213

2214
    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2215
              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2216
         || (    ( extractFloat64Exp( b ) == 0x7FF )
2217
              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2218
       ) {
2219
        float_raise( float_flag_invalid );
2220
        return 0;
2221
    }
2222
    aSign = extractFloat64Sign( a );
2223
    bSign = extractFloat64Sign( b );
2224
    if ( aSign != bSign )
2225
	return aSign ||
2226
	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2227
	      0 );
2228
    return ( a == b ) ||
2229
	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2230
}
2231

2232
/*
2233
-------------------------------------------------------------------------------
2234
Returns 1 if the double-precision floating-point value `a' is less than
2235
the corresponding value `b', and 0 otherwise.  The comparison is performed
2236
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2237
-------------------------------------------------------------------------------
2238
*/
2239
flag float64_lt( float64 a, float64 b )
2240
{
2241
    flag aSign, bSign;
2242

2243
    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2244
              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2245
         || (    ( extractFloat64Exp( b ) == 0x7FF )
2246
              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2247
       ) {
2248
        float_raise( float_flag_invalid );
2249
        return 0;
2250
    }
2251
    aSign = extractFloat64Sign( a );
2252
    bSign = extractFloat64Sign( b );
2253
    if ( aSign != bSign )
2254
	return aSign &&
2255
	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2256
	      0 );
2257
    return ( a != b ) &&
2258
	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2259

2260
}
2261

2262
#ifndef SOFTFLOAT_FOR_GCC
2263
/*
2264
-------------------------------------------------------------------------------
2265
Returns 1 if the double-precision floating-point value `a' is equal to
2266
the corresponding value `b', and 0 otherwise.  The invalid exception is
2267
raised if either operand is a NaN.  Otherwise, the comparison is performed
2268
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2269
-------------------------------------------------------------------------------
2270
*/
2271
flag float64_eq_signaling( float64 a, float64 b )
2272
{
2273

2274
    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2275
              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2276
         || (    ( extractFloat64Exp( b ) == 0x7FF )
2277
              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2278
       ) {
2279
        float_raise( float_flag_invalid );
2280
        return 0;
2281
    }
2282
    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2283

2284
}
2285

2286
/*
2287
-------------------------------------------------------------------------------
2288
Returns 1 if the double-precision floating-point value `a' is less than or
2289
equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2290
cause an exception.  Otherwise, the comparison is performed according to the
2291
IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2292
-------------------------------------------------------------------------------
2293
*/
2294
flag float64_le_quiet( float64 a, float64 b )
2295
{
2296
    flag aSign, bSign;
2297

2298
    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2299
              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2300
         || (    ( extractFloat64Exp( b ) == 0x7FF )
2301
              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2302
       ) {
2303
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2304
            float_raise( float_flag_invalid );
2305
        }
2306
        return 0;
2307
    }
2308
    aSign = extractFloat64Sign( a );
2309
    bSign = extractFloat64Sign( b );
2310
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2311
    return ( a == b ) || ( aSign ^ ( a < b ) );
2312

2313
}
2314

2315
/*
2316
-------------------------------------------------------------------------------
2317
Returns 1 if the double-precision floating-point value `a' is less than
2318
the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2319
exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2320
Standard for Binary Floating-Point Arithmetic.
2321
-------------------------------------------------------------------------------
2322
*/
2323
flag float64_lt_quiet( float64 a, float64 b )
2324
{
2325
    flag aSign, bSign;
2326

2327
    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2328
              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2329
         || (    ( extractFloat64Exp( b ) == 0x7FF )
2330
              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2331
       ) {
2332
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2333
            float_raise( float_flag_invalid );
2334
        }
2335
        return 0;
2336
    }
2337
    aSign = extractFloat64Sign( a );
2338
    bSign = extractFloat64Sign( b );
2339
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2340
    return ( a != b ) && ( aSign ^ ( a < b ) );
2341

2342
}
2343

2344
#endif
2345

2346
Product

Resources

Company