Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/sh/kernel/cpu/sh4/softfloat.c
26495 views
1
/*
2
* Floating point emulation support for subnormalised numbers on SH4
3
* architecture This file is derived from the SoftFloat IEC/IEEE
4
* Floating-point Arithmetic Package, Release 2 the original license of
5
* which is reproduced below.
6
*
7
* ========================================================================
8
*
9
* This C source file is part of the SoftFloat IEC/IEEE Floating-point
10
* Arithmetic Package, Release 2.
11
*
12
* Written by John R. Hauser. This work was made possible in part by the
13
* International Computer Science Institute, located at Suite 600, 1947 Center
14
* Street, Berkeley, California 94704. Funding was partially provided by the
15
* National Science Foundation under grant MIP-9311980. The original version
16
* of this code was written as part of a project to build a fixed-point vector
17
* processor in collaboration with the University of California at Berkeley,
18
* overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19
* is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20
* arithmetic/softfloat.html'.
21
*
22
* THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
23
* has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24
* TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
25
* PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26
* AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
27
*
28
* Derivative works are acceptable, even for commercial purposes, so long as
29
* (1) they include prominent notice that the work is derivative, and (2) they
30
* include prominent notice akin to these three paragraphs for those parts of
31
* this code that are retained.
32
*
33
* ========================================================================
34
*
35
* SH4 modifications by Ismail Dhaoui <[email protected]>
36
* and Kamel Khelifi <[email protected]>
37
*/
38
#include <linux/kernel.h>
39
#include <cpu/fpu.h>
40
#include <asm/div64.h>
41
42
#define LIT64( a ) a##LL
43
44
typedef char flag;
45
typedef unsigned char uint8;
46
typedef signed char int8;
47
typedef int uint16;
48
typedef int int16;
49
typedef unsigned int uint32;
50
typedef signed int int32;
51
52
typedef unsigned long long int bits64;
53
typedef signed long long int sbits64;
54
55
typedef unsigned char bits8;
56
typedef signed char sbits8;
57
typedef unsigned short int bits16;
58
typedef signed short int sbits16;
59
typedef unsigned int bits32;
60
typedef signed int sbits32;
61
62
typedef unsigned long long int uint64;
63
typedef signed long long int int64;
64
65
typedef unsigned long int float32;
66
typedef unsigned long long float64;
67
68
extern void float_raise(unsigned int flags); /* in fpu.c */
69
extern int float_rounding_mode(void); /* in fpu.c */
70
71
bits64 extractFloat64Frac(float64 a);
72
flag extractFloat64Sign(float64 a);
73
int16 extractFloat64Exp(float64 a);
74
int16 extractFloat32Exp(float32 a);
75
flag extractFloat32Sign(float32 a);
76
bits32 extractFloat32Frac(float32 a);
77
float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
78
void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
79
float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
80
void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
81
float64 float64_sub(float64 a, float64 b);
82
float32 float32_sub(float32 a, float32 b);
83
float32 float32_add(float32 a, float32 b);
84
float64 float64_add(float64 a, float64 b);
85
float64 float64_div(float64 a, float64 b);
86
float32 float32_div(float32 a, float32 b);
87
float32 float32_mul(float32 a, float32 b);
88
float64 float64_mul(float64 a, float64 b);
89
float32 float64_to_float32(float64 a);
90
void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91
bits64 * z1Ptr);
92
void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
93
bits64 * z1Ptr);
94
void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
95
96
static int8 countLeadingZeros32(bits32 a);
97
static int8 countLeadingZeros64(bits64 a);
98
static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
99
bits64 zSig);
100
static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
101
static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
102
static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
103
static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
104
bits32 zSig);
105
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
106
static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
107
static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
108
static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
109
bits64 * zSigPtr);
110
static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
111
static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
112
bits32 * zSigPtr);
113
114
bits64 extractFloat64Frac(float64 a)
115
{
116
return a & LIT64(0x000FFFFFFFFFFFFF);
117
}
118
119
flag extractFloat64Sign(float64 a)
120
{
121
return a >> 63;
122
}
123
124
int16 extractFloat64Exp(float64 a)
125
{
126
return (a >> 52) & 0x7FF;
127
}
128
129
int16 extractFloat32Exp(float32 a)
130
{
131
return (a >> 23) & 0xFF;
132
}
133
134
flag extractFloat32Sign(float32 a)
135
{
136
return a >> 31;
137
}
138
139
bits32 extractFloat32Frac(float32 a)
140
{
141
return a & 0x007FFFFF;
142
}
143
144
float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
145
{
146
return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147
}
148
149
void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
150
{
151
bits64 z;
152
153
if (count == 0) {
154
z = a;
155
} else if (count < 64) {
156
z = (a >> count) | ((a << ((-count) & 63)) != 0);
157
} else {
158
z = (a != 0);
159
}
160
*zPtr = z;
161
}
162
163
static int8 countLeadingZeros32(bits32 a)
164
{
165
static const int8 countLeadingZerosHigh[] = {
166
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
167
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
168
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182
};
183
int8 shiftCount;
184
185
shiftCount = 0;
186
if (a < 0x10000) {
187
shiftCount += 16;
188
a <<= 16;
189
}
190
if (a < 0x1000000) {
191
shiftCount += 8;
192
a <<= 8;
193
}
194
shiftCount += countLeadingZerosHigh[a >> 24];
195
return shiftCount;
196
197
}
198
199
static int8 countLeadingZeros64(bits64 a)
200
{
201
int8 shiftCount;
202
203
shiftCount = 0;
204
if (a < ((bits64) 1) << 32) {
205
shiftCount += 32;
206
} else {
207
a >>= 32;
208
}
209
shiftCount += countLeadingZeros32(a);
210
return shiftCount;
211
212
}
213
214
static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
215
{
216
int8 shiftCount;
217
218
shiftCount = countLeadingZeros64(zSig) - 1;
219
return roundAndPackFloat64(zSign, zExp - shiftCount,
220
zSig << shiftCount);
221
222
}
223
224
static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
225
{
226
int16 aExp, bExp, zExp;
227
bits64 aSig, bSig, zSig;
228
int16 expDiff;
229
230
aSig = extractFloat64Frac(a);
231
aExp = extractFloat64Exp(a);
232
bSig = extractFloat64Frac(b);
233
bExp = extractFloat64Exp(b);
234
expDiff = aExp - bExp;
235
aSig <<= 10;
236
bSig <<= 10;
237
if (0 < expDiff)
238
goto aExpBigger;
239
if (expDiff < 0)
240
goto bExpBigger;
241
if (aExp == 0) {
242
aExp = 1;
243
bExp = 1;
244
}
245
if (bSig < aSig)
246
goto aBigger;
247
if (aSig < bSig)
248
goto bBigger;
249
return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
250
bExpBigger:
251
if (bExp == 0x7FF) {
252
return packFloat64(zSign ^ 1, 0x7FF, 0);
253
}
254
if (aExp == 0) {
255
++expDiff;
256
} else {
257
aSig |= LIT64(0x4000000000000000);
258
}
259
shift64RightJamming(aSig, -expDiff, &aSig);
260
bSig |= LIT64(0x4000000000000000);
261
bBigger:
262
zSig = bSig - aSig;
263
zExp = bExp;
264
zSign ^= 1;
265
goto normalizeRoundAndPack;
266
aExpBigger:
267
if (aExp == 0x7FF) {
268
return a;
269
}
270
if (bExp == 0) {
271
--expDiff;
272
} else {
273
bSig |= LIT64(0x4000000000000000);
274
}
275
shift64RightJamming(bSig, expDiff, &bSig);
276
aSig |= LIT64(0x4000000000000000);
277
aBigger:
278
zSig = aSig - bSig;
279
zExp = aExp;
280
normalizeRoundAndPack:
281
--zExp;
282
return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
283
284
}
285
static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
286
{
287
int16 aExp, bExp, zExp;
288
bits64 aSig, bSig, zSig;
289
int16 expDiff;
290
291
aSig = extractFloat64Frac(a);
292
aExp = extractFloat64Exp(a);
293
bSig = extractFloat64Frac(b);
294
bExp = extractFloat64Exp(b);
295
expDiff = aExp - bExp;
296
aSig <<= 9;
297
bSig <<= 9;
298
if (0 < expDiff) {
299
if (aExp == 0x7FF) {
300
return a;
301
}
302
if (bExp == 0) {
303
--expDiff;
304
} else {
305
bSig |= LIT64(0x2000000000000000);
306
}
307
shift64RightJamming(bSig, expDiff, &bSig);
308
zExp = aExp;
309
} else if (expDiff < 0) {
310
if (bExp == 0x7FF) {
311
return packFloat64(zSign, 0x7FF, 0);
312
}
313
if (aExp == 0) {
314
++expDiff;
315
} else {
316
aSig |= LIT64(0x2000000000000000);
317
}
318
shift64RightJamming(aSig, -expDiff, &aSig);
319
zExp = bExp;
320
} else {
321
if (aExp == 0x7FF) {
322
return a;
323
}
324
if (aExp == 0)
325
return packFloat64(zSign, 0, (aSig + bSig) >> 9);
326
zSig = LIT64(0x4000000000000000) + aSig + bSig;
327
zExp = aExp;
328
goto roundAndPack;
329
}
330
aSig |= LIT64(0x2000000000000000);
331
zSig = (aSig + bSig) << 1;
332
--zExp;
333
if ((sbits64) zSig < 0) {
334
zSig = aSig + bSig;
335
++zExp;
336
}
337
roundAndPack:
338
return roundAndPackFloat64(zSign, zExp, zSig);
339
340
}
341
342
float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
343
{
344
return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
345
}
346
347
void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
348
{
349
bits32 z;
350
if (count == 0) {
351
z = a;
352
} else if (count < 32) {
353
z = (a >> count) | ((a << ((-count) & 31)) != 0);
354
} else {
355
z = (a != 0);
356
}
357
*zPtr = z;
358
}
359
360
static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
361
{
362
flag roundNearestEven;
363
int8 roundIncrement, roundBits;
364
flag isTiny;
365
366
/* SH4 has only 2 rounding modes - round to nearest and round to zero */
367
roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
368
roundIncrement = 0x40;
369
if (!roundNearestEven) {
370
roundIncrement = 0;
371
}
372
roundBits = zSig & 0x7F;
373
if (0xFD <= (bits16) zExp) {
374
if ((0xFD < zExp)
375
|| ((zExp == 0xFD)
376
&& ((sbits32) (zSig + roundIncrement) < 0))
377
) {
378
float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
379
return packFloat32(zSign, 0xFF,
380
0) - (roundIncrement == 0);
381
}
382
if (zExp < 0) {
383
isTiny = (zExp < -1)
384
|| (zSig + roundIncrement < 0x80000000);
385
shift32RightJamming(zSig, -zExp, &zSig);
386
zExp = 0;
387
roundBits = zSig & 0x7F;
388
if (isTiny && roundBits)
389
float_raise(FPSCR_CAUSE_UNDERFLOW);
390
}
391
}
392
if (roundBits)
393
float_raise(FPSCR_CAUSE_INEXACT);
394
zSig = (zSig + roundIncrement) >> 7;
395
zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
396
if (zSig == 0)
397
zExp = 0;
398
return packFloat32(zSign, zExp, zSig);
399
400
}
401
402
static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
403
{
404
int8 shiftCount;
405
406
shiftCount = countLeadingZeros32(zSig) - 1;
407
return roundAndPackFloat32(zSign, zExp - shiftCount,
408
zSig << shiftCount);
409
}
410
411
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
412
{
413
flag roundNearestEven;
414
int16 roundIncrement, roundBits;
415
flag isTiny;
416
417
/* SH4 has only 2 rounding modes - round to nearest and round to zero */
418
roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
419
roundIncrement = 0x200;
420
if (!roundNearestEven) {
421
roundIncrement = 0;
422
}
423
roundBits = zSig & 0x3FF;
424
if (0x7FD <= (bits16) zExp) {
425
if ((0x7FD < zExp)
426
|| ((zExp == 0x7FD)
427
&& ((sbits64) (zSig + roundIncrement) < 0))
428
) {
429
float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
430
return packFloat64(zSign, 0x7FF,
431
0) - (roundIncrement == 0);
432
}
433
if (zExp < 0) {
434
isTiny = (zExp < -1)
435
|| (zSig + roundIncrement <
436
LIT64(0x8000000000000000));
437
shift64RightJamming(zSig, -zExp, &zSig);
438
zExp = 0;
439
roundBits = zSig & 0x3FF;
440
if (isTiny && roundBits)
441
float_raise(FPSCR_CAUSE_UNDERFLOW);
442
}
443
}
444
if (roundBits)
445
float_raise(FPSCR_CAUSE_INEXACT);
446
zSig = (zSig + roundIncrement) >> 10;
447
zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
448
if (zSig == 0)
449
zExp = 0;
450
return packFloat64(zSign, zExp, zSig);
451
452
}
453
454
static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
455
{
456
int16 aExp, bExp, zExp;
457
bits32 aSig, bSig, zSig;
458
int16 expDiff;
459
460
aSig = extractFloat32Frac(a);
461
aExp = extractFloat32Exp(a);
462
bSig = extractFloat32Frac(b);
463
bExp = extractFloat32Exp(b);
464
expDiff = aExp - bExp;
465
aSig <<= 7;
466
bSig <<= 7;
467
if (0 < expDiff)
468
goto aExpBigger;
469
if (expDiff < 0)
470
goto bExpBigger;
471
if (aExp == 0) {
472
aExp = 1;
473
bExp = 1;
474
}
475
if (bSig < aSig)
476
goto aBigger;
477
if (aSig < bSig)
478
goto bBigger;
479
return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
480
bExpBigger:
481
if (bExp == 0xFF) {
482
return packFloat32(zSign ^ 1, 0xFF, 0);
483
}
484
if (aExp == 0) {
485
++expDiff;
486
} else {
487
aSig |= 0x40000000;
488
}
489
shift32RightJamming(aSig, -expDiff, &aSig);
490
bSig |= 0x40000000;
491
bBigger:
492
zSig = bSig - aSig;
493
zExp = bExp;
494
zSign ^= 1;
495
goto normalizeRoundAndPack;
496
aExpBigger:
497
if (aExp == 0xFF) {
498
return a;
499
}
500
if (bExp == 0) {
501
--expDiff;
502
} else {
503
bSig |= 0x40000000;
504
}
505
shift32RightJamming(bSig, expDiff, &bSig);
506
aSig |= 0x40000000;
507
aBigger:
508
zSig = aSig - bSig;
509
zExp = aExp;
510
normalizeRoundAndPack:
511
--zExp;
512
return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
513
514
}
515
516
static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
517
{
518
int16 aExp, bExp, zExp;
519
bits32 aSig, bSig, zSig;
520
int16 expDiff;
521
522
aSig = extractFloat32Frac(a);
523
aExp = extractFloat32Exp(a);
524
bSig = extractFloat32Frac(b);
525
bExp = extractFloat32Exp(b);
526
expDiff = aExp - bExp;
527
aSig <<= 6;
528
bSig <<= 6;
529
if (0 < expDiff) {
530
if (aExp == 0xFF) {
531
return a;
532
}
533
if (bExp == 0) {
534
--expDiff;
535
} else {
536
bSig |= 0x20000000;
537
}
538
shift32RightJamming(bSig, expDiff, &bSig);
539
zExp = aExp;
540
} else if (expDiff < 0) {
541
if (bExp == 0xFF) {
542
return packFloat32(zSign, 0xFF, 0);
543
}
544
if (aExp == 0) {
545
++expDiff;
546
} else {
547
aSig |= 0x20000000;
548
}
549
shift32RightJamming(aSig, -expDiff, &aSig);
550
zExp = bExp;
551
} else {
552
if (aExp == 0xFF) {
553
return a;
554
}
555
if (aExp == 0)
556
return packFloat32(zSign, 0, (aSig + bSig) >> 6);
557
zSig = 0x40000000 + aSig + bSig;
558
zExp = aExp;
559
goto roundAndPack;
560
}
561
aSig |= 0x20000000;
562
zSig = (aSig + bSig) << 1;
563
--zExp;
564
if ((sbits32) zSig < 0) {
565
zSig = aSig + bSig;
566
++zExp;
567
}
568
roundAndPack:
569
return roundAndPackFloat32(zSign, zExp, zSig);
570
571
}
572
573
float64 float64_sub(float64 a, float64 b)
574
{
575
flag aSign, bSign;
576
577
aSign = extractFloat64Sign(a);
578
bSign = extractFloat64Sign(b);
579
if (aSign == bSign) {
580
return subFloat64Sigs(a, b, aSign);
581
} else {
582
return addFloat64Sigs(a, b, aSign);
583
}
584
585
}
586
587
float32 float32_sub(float32 a, float32 b)
588
{
589
flag aSign, bSign;
590
591
aSign = extractFloat32Sign(a);
592
bSign = extractFloat32Sign(b);
593
if (aSign == bSign) {
594
return subFloat32Sigs(a, b, aSign);
595
} else {
596
return addFloat32Sigs(a, b, aSign);
597
}
598
599
}
600
601
float32 float32_add(float32 a, float32 b)
602
{
603
flag aSign, bSign;
604
605
aSign = extractFloat32Sign(a);
606
bSign = extractFloat32Sign(b);
607
if (aSign == bSign) {
608
return addFloat32Sigs(a, b, aSign);
609
} else {
610
return subFloat32Sigs(a, b, aSign);
611
}
612
613
}
614
615
float64 float64_add(float64 a, float64 b)
616
{
617
flag aSign, bSign;
618
619
aSign = extractFloat64Sign(a);
620
bSign = extractFloat64Sign(b);
621
if (aSign == bSign) {
622
return addFloat64Sigs(a, b, aSign);
623
} else {
624
return subFloat64Sigs(a, b, aSign);
625
}
626
}
627
628
static void
629
normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
630
{
631
int8 shiftCount;
632
633
shiftCount = countLeadingZeros64(aSig) - 11;
634
*zSigPtr = aSig << shiftCount;
635
*zExpPtr = 1 - shiftCount;
636
}
637
638
void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
639
bits64 * z1Ptr)
640
{
641
bits64 z1;
642
643
z1 = a1 + b1;
644
*z1Ptr = z1;
645
*z0Ptr = a0 + b0 + (z1 < a1);
646
}
647
648
void
649
sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
650
bits64 * z1Ptr)
651
{
652
*z1Ptr = a1 - b1;
653
*z0Ptr = a0 - b0 - (a1 < b1);
654
}
655
656
static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
657
{
658
bits64 b0, b1;
659
bits64 rem0, rem1, term0, term1;
660
bits64 z, tmp;
661
if (b <= a0)
662
return LIT64(0xFFFFFFFFFFFFFFFF);
663
b0 = b >> 32;
664
tmp = a0;
665
do_div(tmp, b0);
666
667
z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
668
mul64To128(b, z, &term0, &term1);
669
sub128(a0, a1, term0, term1, &rem0, &rem1);
670
while (((sbits64) rem0) < 0) {
671
z -= LIT64(0x100000000);
672
b1 = b << 32;
673
add128(rem0, rem1, b0, b1, &rem0, &rem1);
674
}
675
rem0 = (rem0 << 32) | (rem1 >> 32);
676
tmp = rem0;
677
do_div(tmp, b0);
678
z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
679
return z;
680
}
681
682
void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
683
{
684
bits32 aHigh, aLow, bHigh, bLow;
685
bits64 z0, zMiddleA, zMiddleB, z1;
686
687
aLow = a;
688
aHigh = a >> 32;
689
bLow = b;
690
bHigh = b >> 32;
691
z1 = ((bits64) aLow) * bLow;
692
zMiddleA = ((bits64) aLow) * bHigh;
693
zMiddleB = ((bits64) aHigh) * bLow;
694
z0 = ((bits64) aHigh) * bHigh;
695
zMiddleA += zMiddleB;
696
z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
697
zMiddleA <<= 32;
698
z1 += zMiddleA;
699
z0 += (z1 < zMiddleA);
700
*z1Ptr = z1;
701
*z0Ptr = z0;
702
703
}
704
705
static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
706
bits32 * zSigPtr)
707
{
708
int8 shiftCount;
709
710
shiftCount = countLeadingZeros32(aSig) - 8;
711
*zSigPtr = aSig << shiftCount;
712
*zExpPtr = 1 - shiftCount;
713
714
}
715
716
float64 float64_div(float64 a, float64 b)
717
{
718
flag aSign, bSign, zSign;
719
int16 aExp, bExp, zExp;
720
bits64 aSig, bSig, zSig;
721
bits64 rem0, rem1;
722
bits64 term0, term1;
723
724
aSig = extractFloat64Frac(a);
725
aExp = extractFloat64Exp(a);
726
aSign = extractFloat64Sign(a);
727
bSig = extractFloat64Frac(b);
728
bExp = extractFloat64Exp(b);
729
bSign = extractFloat64Sign(b);
730
zSign = aSign ^ bSign;
731
if (aExp == 0x7FF) {
732
if (bExp == 0x7FF) {
733
}
734
return packFloat64(zSign, 0x7FF, 0);
735
}
736
if (bExp == 0x7FF) {
737
return packFloat64(zSign, 0, 0);
738
}
739
if (bExp == 0) {
740
if (bSig == 0) {
741
if ((aExp | aSig) == 0) {
742
float_raise(FPSCR_CAUSE_INVALID);
743
}
744
return packFloat64(zSign, 0x7FF, 0);
745
}
746
normalizeFloat64Subnormal(bSig, &bExp, &bSig);
747
}
748
if (aExp == 0) {
749
if (aSig == 0)
750
return packFloat64(zSign, 0, 0);
751
normalizeFloat64Subnormal(aSig, &aExp, &aSig);
752
}
753
zExp = aExp - bExp + 0x3FD;
754
aSig = (aSig | LIT64(0x0010000000000000)) << 10;
755
bSig = (bSig | LIT64(0x0010000000000000)) << 11;
756
if (bSig <= (aSig + aSig)) {
757
aSig >>= 1;
758
++zExp;
759
}
760
zSig = estimateDiv128To64(aSig, 0, bSig);
761
if ((zSig & 0x1FF) <= 2) {
762
mul64To128(bSig, zSig, &term0, &term1);
763
sub128(aSig, 0, term0, term1, &rem0, &rem1);
764
while ((sbits64) rem0 < 0) {
765
--zSig;
766
add128(rem0, rem1, 0, bSig, &rem0, &rem1);
767
}
768
zSig |= (rem1 != 0);
769
}
770
return roundAndPackFloat64(zSign, zExp, zSig);
771
772
}
773
774
float32 float32_div(float32 a, float32 b)
775
{
776
flag aSign, bSign, zSign;
777
int16 aExp, bExp, zExp;
778
bits32 aSig, bSig;
779
uint64_t zSig;
780
781
aSig = extractFloat32Frac(a);
782
aExp = extractFloat32Exp(a);
783
aSign = extractFloat32Sign(a);
784
bSig = extractFloat32Frac(b);
785
bExp = extractFloat32Exp(b);
786
bSign = extractFloat32Sign(b);
787
zSign = aSign ^ bSign;
788
if (aExp == 0xFF) {
789
if (bExp == 0xFF) {
790
}
791
return packFloat32(zSign, 0xFF, 0);
792
}
793
if (bExp == 0xFF) {
794
return packFloat32(zSign, 0, 0);
795
}
796
if (bExp == 0) {
797
if (bSig == 0) {
798
return packFloat32(zSign, 0xFF, 0);
799
}
800
normalizeFloat32Subnormal(bSig, &bExp, &bSig);
801
}
802
if (aExp == 0) {
803
if (aSig == 0)
804
return packFloat32(zSign, 0, 0);
805
normalizeFloat32Subnormal(aSig, &aExp, &aSig);
806
}
807
zExp = aExp - bExp + 0x7D;
808
aSig = (aSig | 0x00800000) << 7;
809
bSig = (bSig | 0x00800000) << 8;
810
if (bSig <= (aSig + aSig)) {
811
aSig >>= 1;
812
++zExp;
813
}
814
zSig = (((bits64) aSig) << 32);
815
do_div(zSig, bSig);
816
817
if ((zSig & 0x3F) == 0) {
818
zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
819
}
820
return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
821
822
}
823
824
float32 float32_mul(float32 a, float32 b)
825
{
826
char aSign, bSign, zSign;
827
int aExp, bExp, zExp;
828
unsigned int aSig, bSig;
829
unsigned long long zSig64;
830
unsigned int zSig;
831
832
aSig = extractFloat32Frac(a);
833
aExp = extractFloat32Exp(a);
834
aSign = extractFloat32Sign(a);
835
bSig = extractFloat32Frac(b);
836
bExp = extractFloat32Exp(b);
837
bSign = extractFloat32Sign(b);
838
zSign = aSign ^ bSign;
839
if (aExp == 0) {
840
if (aSig == 0)
841
return packFloat32(zSign, 0, 0);
842
normalizeFloat32Subnormal(aSig, &aExp, &aSig);
843
}
844
if (bExp == 0) {
845
if (bSig == 0)
846
return packFloat32(zSign, 0, 0);
847
normalizeFloat32Subnormal(bSig, &bExp, &bSig);
848
}
849
if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
850
return roundAndPackFloat32(zSign, 0xff, 0);
851
852
zExp = aExp + bExp - 0x7F;
853
aSig = (aSig | 0x00800000) << 7;
854
bSig = (bSig | 0x00800000) << 8;
855
shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
856
zSig = zSig64;
857
if (0 <= (signed int)(zSig << 1)) {
858
zSig <<= 1;
859
--zExp;
860
}
861
return roundAndPackFloat32(zSign, zExp, zSig);
862
863
}
864
865
float64 float64_mul(float64 a, float64 b)
866
{
867
char aSign, bSign, zSign;
868
int aExp, bExp, zExp;
869
unsigned long long int aSig, bSig, zSig0, zSig1;
870
871
aSig = extractFloat64Frac(a);
872
aExp = extractFloat64Exp(a);
873
aSign = extractFloat64Sign(a);
874
bSig = extractFloat64Frac(b);
875
bExp = extractFloat64Exp(b);
876
bSign = extractFloat64Sign(b);
877
zSign = aSign ^ bSign;
878
879
if (aExp == 0) {
880
if (aSig == 0)
881
return packFloat64(zSign, 0, 0);
882
normalizeFloat64Subnormal(aSig, &aExp, &aSig);
883
}
884
if (bExp == 0) {
885
if (bSig == 0)
886
return packFloat64(zSign, 0, 0);
887
normalizeFloat64Subnormal(bSig, &bExp, &bSig);
888
}
889
if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
890
return roundAndPackFloat64(zSign, 0x7ff, 0);
891
892
zExp = aExp + bExp - 0x3FF;
893
aSig = (aSig | 0x0010000000000000LL) << 10;
894
bSig = (bSig | 0x0010000000000000LL) << 11;
895
mul64To128(aSig, bSig, &zSig0, &zSig1);
896
zSig0 |= (zSig1 != 0);
897
if (0 <= (signed long long int)(zSig0 << 1)) {
898
zSig0 <<= 1;
899
--zExp;
900
}
901
return roundAndPackFloat64(zSign, zExp, zSig0);
902
}
903
904
/*
905
* -------------------------------------------------------------------------------
906
* Returns the result of converting the double-precision floating-point value
907
* `a' to the single-precision floating-point format. The conversion is
908
* performed according to the IEC/IEEE Standard for Binary Floating-point
909
* Arithmetic.
910
* -------------------------------------------------------------------------------
911
* */
912
float32 float64_to_float32(float64 a)
913
{
914
flag aSign;
915
int16 aExp;
916
bits64 aSig;
917
bits32 zSig;
918
919
aSig = extractFloat64Frac( a );
920
aExp = extractFloat64Exp( a );
921
aSign = extractFloat64Sign( a );
922
923
shift64RightJamming( aSig, 22, &aSig );
924
zSig = aSig;
925
if ( aExp || zSig ) {
926
zSig |= 0x40000000;
927
aExp -= 0x381;
928
}
929
return roundAndPackFloat32(aSign, aExp, zSig);
930
}
931
932