CoCalc -- softfloat.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/sh/kernel/cpu/sh4/softfloat.c
²⁶⁴⁹⁵ views
1
/*
2
 * Floating point emulation support for subnormalised numbers on SH4
3
 * architecture This file is derived from the SoftFloat IEC/IEEE
4
 * Floating-point Arithmetic Package, Release 2 the original license of
5
 * which is reproduced below.
6
 *
7
 * ========================================================================
8
 *
9
 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10
 * Arithmetic Package, Release 2.
11
 *
12
 * Written by John R. Hauser.  This work was made possible in part by the
13
 * International Computer Science Institute, located at Suite 600, 1947 Center
14
 * Street, Berkeley, California 94704.  Funding was partially provided by the
15
 * National Science Foundation under grant MIP-9311980.  The original version
16
 * of this code was written as part of a project to build a fixed-point vector
17
 * processor in collaboration with the University of California at Berkeley,
18
 * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19
 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20
 * arithmetic/softfloat.html'.
21
 *
22
 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
23
 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24
 * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
25
 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26
 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
27
 *
28
 * Derivative works are acceptable, even for commercial purposes, so long as
29
 * (1) they include prominent notice that the work is derivative, and (2) they
30
 * include prominent notice akin to these three paragraphs for those parts of
31
 * this code that are retained.
32
 *
33
 * ========================================================================
34
 *
35
 * SH4 modifications by Ismail Dhaoui <[email protected]>
36
 * and Kamel Khelifi <[email protected]>
37
 */
38
#include <linux/kernel.h>
39
#include <cpu/fpu.h>
40
#include <asm/div64.h>
41

42
#define LIT64( a ) a##LL
43

44
typedef char flag;
45
typedef unsigned char uint8;
46
typedef signed char int8;
47
typedef int uint16;
48
typedef int int16;
49
typedef unsigned int uint32;
50
typedef signed int int32;
51

52
typedef unsigned long long int bits64;
53
typedef signed long long int sbits64;
54

55
typedef unsigned char bits8;
56
typedef signed char sbits8;
57
typedef unsigned short int bits16;
58
typedef signed short int sbits16;
59
typedef unsigned int bits32;
60
typedef signed int sbits32;
61

62
typedef unsigned long long int uint64;
63
typedef signed long long int int64;
64

65
typedef unsigned long int float32;
66
typedef unsigned long long float64;
67

68
extern void float_raise(unsigned int flags);	/* in fpu.c */
69
extern int float_rounding_mode(void);	/* in fpu.c */
70

71
bits64 extractFloat64Frac(float64 a);
72
flag extractFloat64Sign(float64 a);
73
int16 extractFloat64Exp(float64 a);
74
int16 extractFloat32Exp(float32 a);
75
flag extractFloat32Sign(float32 a);
76
bits32 extractFloat32Frac(float32 a);
77
float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
78
void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
79
float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
80
void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
81
float64 float64_sub(float64 a, float64 b);
82
float32 float32_sub(float32 a, float32 b);
83
float32 float32_add(float32 a, float32 b);
84
float64 float64_add(float64 a, float64 b);
85
float64 float64_div(float64 a, float64 b);
86
float32 float32_div(float32 a, float32 b);
87
float32 float32_mul(float32 a, float32 b);
88
float64 float64_mul(float64 a, float64 b);
89
float32 float64_to_float32(float64 a);
90
void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91
		   bits64 * z1Ptr);
92
void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
93
		   bits64 * z1Ptr);
94
void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
95

96
static int8 countLeadingZeros32(bits32 a);
97
static int8 countLeadingZeros64(bits64 a);
98
static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
99
					    bits64 zSig);
100
static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
101
static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
102
static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
103
static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
104
					    bits32 zSig);
105
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
106
static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
107
static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
108
static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
109
				      bits64 * zSigPtr);
110
static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
111
static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
112
				      bits32 * zSigPtr);
113

114
bits64 extractFloat64Frac(float64 a)
115
{
116
	return a & LIT64(0x000FFFFFFFFFFFFF);
117
}
118

119
flag extractFloat64Sign(float64 a)
120
{
121
	return a >> 63;
122
}
123

124
int16 extractFloat64Exp(float64 a)
125
{
126
	return (a >> 52) & 0x7FF;
127
}
128

129
int16 extractFloat32Exp(float32 a)
130
{
131
	return (a >> 23) & 0xFF;
132
}
133

134
flag extractFloat32Sign(float32 a)
135
{
136
	return a >> 31;
137
}
138

139
bits32 extractFloat32Frac(float32 a)
140
{
141
	return a & 0x007FFFFF;
142
}
143

144
float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
145
{
146
	return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147
}
148

149
void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
150
{
151
	bits64 z;
152

153
	if (count == 0) {
154
		z = a;
155
	} else if (count < 64) {
156
		z = (a >> count) | ((a << ((-count) & 63)) != 0);
157
	} else {
158
		z = (a != 0);
159
	}
160
	*zPtr = z;
161
}
162

163
static int8 countLeadingZeros32(bits32 a)
164
{
165
	static const int8 countLeadingZerosHigh[] = {
166
		8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
167
		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
168
		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169
		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182
	};
183
	int8 shiftCount;
184

185
	shiftCount = 0;
186
	if (a < 0x10000) {
187
		shiftCount += 16;
188
		a <<= 16;
189
	}
190
	if (a < 0x1000000) {
191
		shiftCount += 8;
192
		a <<= 8;
193
	}
194
	shiftCount += countLeadingZerosHigh[a >> 24];
195
	return shiftCount;
196

197
}
198

199
static int8 countLeadingZeros64(bits64 a)
200
{
201
	int8 shiftCount;
202

203
	shiftCount = 0;
204
	if (a < ((bits64) 1) << 32) {
205
		shiftCount += 32;
206
	} else {
207
		a >>= 32;
208
	}
209
	shiftCount += countLeadingZeros32(a);
210
	return shiftCount;
211

212
}
213

214
static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
215
{
216
	int8 shiftCount;
217

218
	shiftCount = countLeadingZeros64(zSig) - 1;
219
	return roundAndPackFloat64(zSign, zExp - shiftCount,
220
				   zSig << shiftCount);
221

222
}
223

224
static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
225
{
226
	int16 aExp, bExp, zExp;
227
	bits64 aSig, bSig, zSig;
228
	int16 expDiff;
229

230
	aSig = extractFloat64Frac(a);
231
	aExp = extractFloat64Exp(a);
232
	bSig = extractFloat64Frac(b);
233
	bExp = extractFloat64Exp(b);
234
	expDiff = aExp - bExp;
235
	aSig <<= 10;
236
	bSig <<= 10;
237
	if (0 < expDiff)
238
		goto aExpBigger;
239
	if (expDiff < 0)
240
		goto bExpBigger;
241
	if (aExp == 0) {
242
		aExp = 1;
243
		bExp = 1;
244
	}
245
	if (bSig < aSig)
246
		goto aBigger;
247
	if (aSig < bSig)
248
		goto bBigger;
249
	return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
250
      bExpBigger:
251
	if (bExp == 0x7FF) {
252
		return packFloat64(zSign ^ 1, 0x7FF, 0);
253
	}
254
	if (aExp == 0) {
255
		++expDiff;
256
	} else {
257
		aSig |= LIT64(0x4000000000000000);
258
	}
259
	shift64RightJamming(aSig, -expDiff, &aSig);
260
	bSig |= LIT64(0x4000000000000000);
261
      bBigger:
262
	zSig = bSig - aSig;
263
	zExp = bExp;
264
	zSign ^= 1;
265
	goto normalizeRoundAndPack;
266
      aExpBigger:
267
	if (aExp == 0x7FF) {
268
		return a;
269
	}
270
	if (bExp == 0) {
271
		--expDiff;
272
	} else {
273
		bSig |= LIT64(0x4000000000000000);
274
	}
275
	shift64RightJamming(bSig, expDiff, &bSig);
276
	aSig |= LIT64(0x4000000000000000);
277
      aBigger:
278
	zSig = aSig - bSig;
279
	zExp = aExp;
280
      normalizeRoundAndPack:
281
	--zExp;
282
	return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
283

284
}
285
static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
286
{
287
	int16 aExp, bExp, zExp;
288
	bits64 aSig, bSig, zSig;
289
	int16 expDiff;
290

291
	aSig = extractFloat64Frac(a);
292
	aExp = extractFloat64Exp(a);
293
	bSig = extractFloat64Frac(b);
294
	bExp = extractFloat64Exp(b);
295
	expDiff = aExp - bExp;
296
	aSig <<= 9;
297
	bSig <<= 9;
298
	if (0 < expDiff) {
299
		if (aExp == 0x7FF) {
300
			return a;
301
		}
302
		if (bExp == 0) {
303
			--expDiff;
304
		} else {
305
			bSig |= LIT64(0x2000000000000000);
306
		}
307
		shift64RightJamming(bSig, expDiff, &bSig);
308
		zExp = aExp;
309
	} else if (expDiff < 0) {
310
		if (bExp == 0x7FF) {
311
			return packFloat64(zSign, 0x7FF, 0);
312
		}
313
		if (aExp == 0) {
314
			++expDiff;
315
		} else {
316
			aSig |= LIT64(0x2000000000000000);
317
		}
318
		shift64RightJamming(aSig, -expDiff, &aSig);
319
		zExp = bExp;
320
	} else {
321
		if (aExp == 0x7FF) {
322
			return a;
323
		}
324
		if (aExp == 0)
325
			return packFloat64(zSign, 0, (aSig + bSig) >> 9);
326
		zSig = LIT64(0x4000000000000000) + aSig + bSig;
327
		zExp = aExp;
328
		goto roundAndPack;
329
	}
330
	aSig |= LIT64(0x2000000000000000);
331
	zSig = (aSig + bSig) << 1;
332
	--zExp;
333
	if ((sbits64) zSig < 0) {
334
		zSig = aSig + bSig;
335
		++zExp;
336
	}
337
      roundAndPack:
338
	return roundAndPackFloat64(zSign, zExp, zSig);
339

340
}
341

342
float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
343
{
344
	return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
345
}
346

347
void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
348
{
349
	bits32 z;
350
	if (count == 0) {
351
		z = a;
352
	} else if (count < 32) {
353
		z = (a >> count) | ((a << ((-count) & 31)) != 0);
354
	} else {
355
		z = (a != 0);
356
	}
357
	*zPtr = z;
358
}
359

360
static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
361
{
362
	flag roundNearestEven;
363
	int8 roundIncrement, roundBits;
364
	flag isTiny;
365

366
	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
367
	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
368
	roundIncrement = 0x40;
369
	if (!roundNearestEven) {
370
		roundIncrement = 0;
371
	}
372
	roundBits = zSig & 0x7F;
373
	if (0xFD <= (bits16) zExp) {
374
		if ((0xFD < zExp)
375
		    || ((zExp == 0xFD)
376
			&& ((sbits32) (zSig + roundIncrement) < 0))
377
		    ) {
378
			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
379
			return packFloat32(zSign, 0xFF,
380
					   0) - (roundIncrement == 0);
381
		}
382
		if (zExp < 0) {
383
			isTiny = (zExp < -1)
384
			    || (zSig + roundIncrement < 0x80000000);
385
			shift32RightJamming(zSig, -zExp, &zSig);
386
			zExp = 0;
387
			roundBits = zSig & 0x7F;
388
			if (isTiny && roundBits)
389
				float_raise(FPSCR_CAUSE_UNDERFLOW);
390
		}
391
	}
392
	if (roundBits)
393
		float_raise(FPSCR_CAUSE_INEXACT);
394
	zSig = (zSig + roundIncrement) >> 7;
395
	zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
396
	if (zSig == 0)
397
		zExp = 0;
398
	return packFloat32(zSign, zExp, zSig);
399

400
}
401

402
static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
403
{
404
	int8 shiftCount;
405

406
	shiftCount = countLeadingZeros32(zSig) - 1;
407
	return roundAndPackFloat32(zSign, zExp - shiftCount,
408
				   zSig << shiftCount);
409
}
410

411
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
412
{
413
	flag roundNearestEven;
414
	int16 roundIncrement, roundBits;
415
	flag isTiny;
416

417
	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
418
	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
419
	roundIncrement = 0x200;
420
	if (!roundNearestEven) {
421
		roundIncrement = 0;
422
	}
423
	roundBits = zSig & 0x3FF;
424
	if (0x7FD <= (bits16) zExp) {
425
		if ((0x7FD < zExp)
426
		    || ((zExp == 0x7FD)
427
			&& ((sbits64) (zSig + roundIncrement) < 0))
428
		    ) {
429
			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
430
			return packFloat64(zSign, 0x7FF,
431
					   0) - (roundIncrement == 0);
432
		}
433
		if (zExp < 0) {
434
			isTiny = (zExp < -1)
435
			    || (zSig + roundIncrement <
436
				LIT64(0x8000000000000000));
437
			shift64RightJamming(zSig, -zExp, &zSig);
438
			zExp = 0;
439
			roundBits = zSig & 0x3FF;
440
			if (isTiny && roundBits)
441
				float_raise(FPSCR_CAUSE_UNDERFLOW);
442
		}
443
	}
444
	if (roundBits)
445
		float_raise(FPSCR_CAUSE_INEXACT);
446
	zSig = (zSig + roundIncrement) >> 10;
447
	zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
448
	if (zSig == 0)
449
		zExp = 0;
450
	return packFloat64(zSign, zExp, zSig);
451

452
}
453

454
static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
455
{
456
	int16 aExp, bExp, zExp;
457
	bits32 aSig, bSig, zSig;
458
	int16 expDiff;
459

460
	aSig = extractFloat32Frac(a);
461
	aExp = extractFloat32Exp(a);
462
	bSig = extractFloat32Frac(b);
463
	bExp = extractFloat32Exp(b);
464
	expDiff = aExp - bExp;
465
	aSig <<= 7;
466
	bSig <<= 7;
467
	if (0 < expDiff)
468
		goto aExpBigger;
469
	if (expDiff < 0)
470
		goto bExpBigger;
471
	if (aExp == 0) {
472
		aExp = 1;
473
		bExp = 1;
474
	}
475
	if (bSig < aSig)
476
		goto aBigger;
477
	if (aSig < bSig)
478
		goto bBigger;
479
	return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
480
      bExpBigger:
481
	if (bExp == 0xFF) {
482
		return packFloat32(zSign ^ 1, 0xFF, 0);
483
	}
484
	if (aExp == 0) {
485
		++expDiff;
486
	} else {
487
		aSig |= 0x40000000;
488
	}
489
	shift32RightJamming(aSig, -expDiff, &aSig);
490
	bSig |= 0x40000000;
491
      bBigger:
492
	zSig = bSig - aSig;
493
	zExp = bExp;
494
	zSign ^= 1;
495
	goto normalizeRoundAndPack;
496
      aExpBigger:
497
	if (aExp == 0xFF) {
498
		return a;
499
	}
500
	if (bExp == 0) {
501
		--expDiff;
502
	} else {
503
		bSig |= 0x40000000;
504
	}
505
	shift32RightJamming(bSig, expDiff, &bSig);
506
	aSig |= 0x40000000;
507
      aBigger:
508
	zSig = aSig - bSig;
509
	zExp = aExp;
510
      normalizeRoundAndPack:
511
	--zExp;
512
	return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
513

514
}
515

516
static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
517
{
518
	int16 aExp, bExp, zExp;
519
	bits32 aSig, bSig, zSig;
520
	int16 expDiff;
521

522
	aSig = extractFloat32Frac(a);
523
	aExp = extractFloat32Exp(a);
524
	bSig = extractFloat32Frac(b);
525
	bExp = extractFloat32Exp(b);
526
	expDiff = aExp - bExp;
527
	aSig <<= 6;
528
	bSig <<= 6;
529
	if (0 < expDiff) {
530
		if (aExp == 0xFF) {
531
			return a;
532
		}
533
		if (bExp == 0) {
534
			--expDiff;
535
		} else {
536
			bSig |= 0x20000000;
537
		}
538
		shift32RightJamming(bSig, expDiff, &bSig);
539
		zExp = aExp;
540
	} else if (expDiff < 0) {
541
		if (bExp == 0xFF) {
542
			return packFloat32(zSign, 0xFF, 0);
543
		}
544
		if (aExp == 0) {
545
			++expDiff;
546
		} else {
547
			aSig |= 0x20000000;
548
		}
549
		shift32RightJamming(aSig, -expDiff, &aSig);
550
		zExp = bExp;
551
	} else {
552
		if (aExp == 0xFF) {
553
			return a;
554
		}
555
		if (aExp == 0)
556
			return packFloat32(zSign, 0, (aSig + bSig) >> 6);
557
		zSig = 0x40000000 + aSig + bSig;
558
		zExp = aExp;
559
		goto roundAndPack;
560
	}
561
	aSig |= 0x20000000;
562
	zSig = (aSig + bSig) << 1;
563
	--zExp;
564
	if ((sbits32) zSig < 0) {
565
		zSig = aSig + bSig;
566
		++zExp;
567
	}
568
      roundAndPack:
569
	return roundAndPackFloat32(zSign, zExp, zSig);
570

571
}
572

573
float64 float64_sub(float64 a, float64 b)
574
{
575
	flag aSign, bSign;
576

577
	aSign = extractFloat64Sign(a);
578
	bSign = extractFloat64Sign(b);
579
	if (aSign == bSign) {
580
		return subFloat64Sigs(a, b, aSign);
581
	} else {
582
		return addFloat64Sigs(a, b, aSign);
583
	}
584

585
}
586

587
float32 float32_sub(float32 a, float32 b)
588
{
589
	flag aSign, bSign;
590

591
	aSign = extractFloat32Sign(a);
592
	bSign = extractFloat32Sign(b);
593
	if (aSign == bSign) {
594
		return subFloat32Sigs(a, b, aSign);
595
	} else {
596
		return addFloat32Sigs(a, b, aSign);
597
	}
598

599
}
600

601
float32 float32_add(float32 a, float32 b)
602
{
603
	flag aSign, bSign;
604

605
	aSign = extractFloat32Sign(a);
606
	bSign = extractFloat32Sign(b);
607
	if (aSign == bSign) {
608
		return addFloat32Sigs(a, b, aSign);
609
	} else {
610
		return subFloat32Sigs(a, b, aSign);
611
	}
612

613
}
614

615
float64 float64_add(float64 a, float64 b)
616
{
617
	flag aSign, bSign;
618

619
	aSign = extractFloat64Sign(a);
620
	bSign = extractFloat64Sign(b);
621
	if (aSign == bSign) {
622
		return addFloat64Sigs(a, b, aSign);
623
	} else {
624
		return subFloat64Sigs(a, b, aSign);
625
	}
626
}
627

628
static void
629
normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
630
{
631
	int8 shiftCount;
632

633
	shiftCount = countLeadingZeros64(aSig) - 11;
634
	*zSigPtr = aSig << shiftCount;
635
	*zExpPtr = 1 - shiftCount;
636
}
637

638
void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
639
		   bits64 * z1Ptr)
640
{
641
	bits64 z1;
642

643
	z1 = a1 + b1;
644
	*z1Ptr = z1;
645
	*z0Ptr = a0 + b0 + (z1 < a1);
646
}
647

648
void
649
sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
650
       bits64 * z1Ptr)
651
{
652
	*z1Ptr = a1 - b1;
653
	*z0Ptr = a0 - b0 - (a1 < b1);
654
}
655

656
static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
657
{
658
	bits64 b0, b1;
659
	bits64 rem0, rem1, term0, term1;
660
	bits64 z, tmp;
661
	if (b <= a0)
662
		return LIT64(0xFFFFFFFFFFFFFFFF);
663
	b0 = b >> 32;
664
	tmp = a0;
665
	do_div(tmp, b0);
666

667
	z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
668
	mul64To128(b, z, &term0, &term1);
669
	sub128(a0, a1, term0, term1, &rem0, &rem1);
670
	while (((sbits64) rem0) < 0) {
671
		z -= LIT64(0x100000000);
672
		b1 = b << 32;
673
		add128(rem0, rem1, b0, b1, &rem0, &rem1);
674
	}
675
	rem0 = (rem0 << 32) | (rem1 >> 32);
676
	tmp = rem0;
677
	do_div(tmp, b0);
678
	z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
679
	return z;
680
}
681

682
void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
683
{
684
	bits32 aHigh, aLow, bHigh, bLow;
685
	bits64 z0, zMiddleA, zMiddleB, z1;
686

687
	aLow = a;
688
	aHigh = a >> 32;
689
	bLow = b;
690
	bHigh = b >> 32;
691
	z1 = ((bits64) aLow) * bLow;
692
	zMiddleA = ((bits64) aLow) * bHigh;
693
	zMiddleB = ((bits64) aHigh) * bLow;
694
	z0 = ((bits64) aHigh) * bHigh;
695
	zMiddleA += zMiddleB;
696
	z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
697
	zMiddleA <<= 32;
698
	z1 += zMiddleA;
699
	z0 += (z1 < zMiddleA);
700
	*z1Ptr = z1;
701
	*z0Ptr = z0;
702

703
}
704

705
static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
706
				      bits32 * zSigPtr)
707
{
708
	int8 shiftCount;
709

710
	shiftCount = countLeadingZeros32(aSig) - 8;
711
	*zSigPtr = aSig << shiftCount;
712
	*zExpPtr = 1 - shiftCount;
713

714
}
715

716
float64 float64_div(float64 a, float64 b)
717
{
718
	flag aSign, bSign, zSign;
719
	int16 aExp, bExp, zExp;
720
	bits64 aSig, bSig, zSig;
721
	bits64 rem0, rem1;
722
	bits64 term0, term1;
723

724
	aSig = extractFloat64Frac(a);
725
	aExp = extractFloat64Exp(a);
726
	aSign = extractFloat64Sign(a);
727
	bSig = extractFloat64Frac(b);
728
	bExp = extractFloat64Exp(b);
729
	bSign = extractFloat64Sign(b);
730
	zSign = aSign ^ bSign;
731
	if (aExp == 0x7FF) {
732
		if (bExp == 0x7FF) {
733
		}
734
		return packFloat64(zSign, 0x7FF, 0);
735
	}
736
	if (bExp == 0x7FF) {
737
		return packFloat64(zSign, 0, 0);
738
	}
739
	if (bExp == 0) {
740
		if (bSig == 0) {
741
			if ((aExp | aSig) == 0) {
742
				float_raise(FPSCR_CAUSE_INVALID);
743
			}
744
			return packFloat64(zSign, 0x7FF, 0);
745
		}
746
		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
747
	}
748
	if (aExp == 0) {
749
		if (aSig == 0)
750
			return packFloat64(zSign, 0, 0);
751
		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
752
	}
753
	zExp = aExp - bExp + 0x3FD;
754
	aSig = (aSig | LIT64(0x0010000000000000)) << 10;
755
	bSig = (bSig | LIT64(0x0010000000000000)) << 11;
756
	if (bSig <= (aSig + aSig)) {
757
		aSig >>= 1;
758
		++zExp;
759
	}
760
	zSig = estimateDiv128To64(aSig, 0, bSig);
761
	if ((zSig & 0x1FF) <= 2) {
762
		mul64To128(bSig, zSig, &term0, &term1);
763
		sub128(aSig, 0, term0, term1, &rem0, &rem1);
764
		while ((sbits64) rem0 < 0) {
765
			--zSig;
766
			add128(rem0, rem1, 0, bSig, &rem0, &rem1);
767
		}
768
		zSig |= (rem1 != 0);
769
	}
770
	return roundAndPackFloat64(zSign, zExp, zSig);
771

772
}
773

774
float32 float32_div(float32 a, float32 b)
775
{
776
	flag aSign, bSign, zSign;
777
	int16 aExp, bExp, zExp;
778
	bits32 aSig, bSig;
779
	uint64_t zSig;
780

781
	aSig = extractFloat32Frac(a);
782
	aExp = extractFloat32Exp(a);
783
	aSign = extractFloat32Sign(a);
784
	bSig = extractFloat32Frac(b);
785
	bExp = extractFloat32Exp(b);
786
	bSign = extractFloat32Sign(b);
787
	zSign = aSign ^ bSign;
788
	if (aExp == 0xFF) {
789
		if (bExp == 0xFF) {
790
		}
791
		return packFloat32(zSign, 0xFF, 0);
792
	}
793
	if (bExp == 0xFF) {
794
		return packFloat32(zSign, 0, 0);
795
	}
796
	if (bExp == 0) {
797
		if (bSig == 0) {
798
			return packFloat32(zSign, 0xFF, 0);
799
		}
800
		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
801
	}
802
	if (aExp == 0) {
803
		if (aSig == 0)
804
			return packFloat32(zSign, 0, 0);
805
		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
806
	}
807
	zExp = aExp - bExp + 0x7D;
808
	aSig = (aSig | 0x00800000) << 7;
809
	bSig = (bSig | 0x00800000) << 8;
810
	if (bSig <= (aSig + aSig)) {
811
		aSig >>= 1;
812
		++zExp;
813
	}
814
	zSig = (((bits64) aSig) << 32);
815
	do_div(zSig, bSig);
816

817
	if ((zSig & 0x3F) == 0) {
818
		zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
819
	}
820
	return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
821

822
}
823

824
float32 float32_mul(float32 a, float32 b)
825
{
826
	char aSign, bSign, zSign;
827
	int aExp, bExp, zExp;
828
	unsigned int aSig, bSig;
829
	unsigned long long zSig64;
830
	unsigned int zSig;
831

832
	aSig = extractFloat32Frac(a);
833
	aExp = extractFloat32Exp(a);
834
	aSign = extractFloat32Sign(a);
835
	bSig = extractFloat32Frac(b);
836
	bExp = extractFloat32Exp(b);
837
	bSign = extractFloat32Sign(b);
838
	zSign = aSign ^ bSign;
839
	if (aExp == 0) {
840
		if (aSig == 0)
841
			return packFloat32(zSign, 0, 0);
842
		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
843
	}
844
	if (bExp == 0) {
845
		if (bSig == 0)
846
			return packFloat32(zSign, 0, 0);
847
		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
848
	}
849
	if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
850
		return roundAndPackFloat32(zSign, 0xff, 0);
851

852
	zExp = aExp + bExp - 0x7F;
853
	aSig = (aSig | 0x00800000) << 7;
854
	bSig = (bSig | 0x00800000) << 8;
855
	shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
856
	zSig = zSig64;
857
	if (0 <= (signed int)(zSig << 1)) {
858
		zSig <<= 1;
859
		--zExp;
860
	}
861
	return roundAndPackFloat32(zSign, zExp, zSig);
862

863
}
864

865
float64 float64_mul(float64 a, float64 b)
866
{
867
	char aSign, bSign, zSign;
868
	int aExp, bExp, zExp;
869
	unsigned long long int aSig, bSig, zSig0, zSig1;
870

871
	aSig = extractFloat64Frac(a);
872
	aExp = extractFloat64Exp(a);
873
	aSign = extractFloat64Sign(a);
874
	bSig = extractFloat64Frac(b);
875
	bExp = extractFloat64Exp(b);
876
	bSign = extractFloat64Sign(b);
877
	zSign = aSign ^ bSign;
878

879
	if (aExp == 0) {
880
		if (aSig == 0)
881
			return packFloat64(zSign, 0, 0);
882
		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
883
	}
884
	if (bExp == 0) {
885
		if (bSig == 0)
886
			return packFloat64(zSign, 0, 0);
887
		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
888
	}
889
	if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
890
		return roundAndPackFloat64(zSign, 0x7ff, 0);
891

892
	zExp = aExp + bExp - 0x3FF;
893
	aSig = (aSig | 0x0010000000000000LL) << 10;
894
	bSig = (bSig | 0x0010000000000000LL) << 11;
895
	mul64To128(aSig, bSig, &zSig0, &zSig1);
896
	zSig0 |= (zSig1 != 0);
897
	if (0 <= (signed long long int)(zSig0 << 1)) {
898
		zSig0 <<= 1;
899
		--zExp;
900
	}
901
	return roundAndPackFloat64(zSign, zExp, zSig0);
902
}
903

904
/*
905
 * -------------------------------------------------------------------------------
906
 *  Returns the result of converting the double-precision floating-point value
907
 *  `a' to the single-precision floating-point format.  The conversion is
908
 *  performed according to the IEC/IEEE Standard for Binary Floating-point
909
 *  Arithmetic.
910
 *  -------------------------------------------------------------------------------
911
 *  */
912
float32 float64_to_float32(float64 a)
913
{
914
    flag aSign;
915
    int16 aExp;
916
    bits64 aSig;
917
    bits32 zSig;
918

919
    aSig = extractFloat64Frac( a );
920
    aExp = extractFloat64Exp( a );
921
    aSign = extractFloat64Sign( a );
922

923
    shift64RightJamming( aSig, 22, &aSig );
924
    zSig = aSig;
925
    if ( aExp || zSig ) {
926
        zSig |= 0x40000000;
927
        aExp -= 0x381;
928
    }
929
    return roundAndPackFloat32(aSign, aExp, zSig);
930
}
931

932
Product

Resources

Company