Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/parisc/math-emu/fmpyfadd.c
26288 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Linux/PA-RISC Project (http://www.parisc-linux.org/)
4
*
5
* Floating-point emulation code
6
* Copyright (C) 2001 Hewlett-Packard (Paul Bame) <[email protected]>
7
*/
8
/*
9
* BEGIN_DESC
10
*
11
* File:
12
* @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
13
*
14
* Purpose:
15
* Double Floating-point Multiply Fused Add
16
* Double Floating-point Multiply Negate Fused Add
17
* Single Floating-point Multiply Fused Add
18
* Single Floating-point Multiply Negate Fused Add
19
*
20
* External Interfaces:
21
* dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
22
* dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
23
* sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
24
* sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
25
*
26
* Internal Interfaces:
27
*
28
* Theory:
29
* <<please update with a overview of the operation of this file>>
30
*
31
* END_DESC
32
*/
33
34
35
#include "float.h"
36
#include "sgl_float.h"
37
#include "dbl_float.h"
38
39
40
/*
41
* Double Floating-point Multiply Fused Add
42
*/
43
44
int
45
dbl_fmpyfadd(
46
dbl_floating_point *src1ptr,
47
dbl_floating_point *src2ptr,
48
dbl_floating_point *src3ptr,
49
unsigned int *status,
50
dbl_floating_point *dstptr)
51
{
52
unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
53
register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
54
unsigned int rightp1, rightp2, rightp3, rightp4;
55
unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
56
register int mpy_exponent, add_exponent, count;
57
boolean inexact = FALSE, is_tiny = FALSE;
58
59
unsigned int signlessleft1, signlessright1, save;
60
register int result_exponent, diff_exponent;
61
int sign_save, jumpsize;
62
63
Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
64
Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
65
Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
66
67
/*
68
* set sign bit of result of multiply
69
*/
70
if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
71
Dbl_setnegativezerop1(resultp1);
72
else Dbl_setzerop1(resultp1);
73
74
/*
75
* Generate multiply exponent
76
*/
77
mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
78
79
/*
80
* check first operand for NaN's or infinity
81
*/
82
if (Dbl_isinfinity_exponent(opnd1p1)) {
83
if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
84
if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
85
Dbl_isnotnan(opnd3p1,opnd3p2)) {
86
if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
87
/*
88
* invalid since operands are infinity
89
* and zero
90
*/
91
if (Is_invalidtrap_enabled())
92
return(OPC_2E_INVALIDEXCEPTION);
93
Set_invalidflag();
94
Dbl_makequietnan(resultp1,resultp2);
95
Dbl_copytoptr(resultp1,resultp2,dstptr);
96
return(NOEXCEPTION);
97
}
98
/*
99
* Check third operand for infinity with a
100
* sign opposite of the multiply result
101
*/
102
if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
103
(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
104
/*
105
* invalid since attempting a magnitude
106
* subtraction of infinities
107
*/
108
if (Is_invalidtrap_enabled())
109
return(OPC_2E_INVALIDEXCEPTION);
110
Set_invalidflag();
111
Dbl_makequietnan(resultp1,resultp2);
112
Dbl_copytoptr(resultp1,resultp2,dstptr);
113
return(NOEXCEPTION);
114
}
115
116
/*
117
* return infinity
118
*/
119
Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
120
Dbl_copytoptr(resultp1,resultp2,dstptr);
121
return(NOEXCEPTION);
122
}
123
}
124
else {
125
/*
126
* is NaN; signaling or quiet?
127
*/
128
if (Dbl_isone_signaling(opnd1p1)) {
129
/* trap if INVALIDTRAP enabled */
130
if (Is_invalidtrap_enabled())
131
return(OPC_2E_INVALIDEXCEPTION);
132
/* make NaN quiet */
133
Set_invalidflag();
134
Dbl_set_quiet(opnd1p1);
135
}
136
/*
137
* is second operand a signaling NaN?
138
*/
139
else if (Dbl_is_signalingnan(opnd2p1)) {
140
/* trap if INVALIDTRAP enabled */
141
if (Is_invalidtrap_enabled())
142
return(OPC_2E_INVALIDEXCEPTION);
143
/* make NaN quiet */
144
Set_invalidflag();
145
Dbl_set_quiet(opnd2p1);
146
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
147
return(NOEXCEPTION);
148
}
149
/*
150
* is third operand a signaling NaN?
151
*/
152
else if (Dbl_is_signalingnan(opnd3p1)) {
153
/* trap if INVALIDTRAP enabled */
154
if (Is_invalidtrap_enabled())
155
return(OPC_2E_INVALIDEXCEPTION);
156
/* make NaN quiet */
157
Set_invalidflag();
158
Dbl_set_quiet(opnd3p1);
159
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
160
return(NOEXCEPTION);
161
}
162
/*
163
* return quiet NaN
164
*/
165
Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
166
return(NOEXCEPTION);
167
}
168
}
169
170
/*
171
* check second operand for NaN's or infinity
172
*/
173
if (Dbl_isinfinity_exponent(opnd2p1)) {
174
if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
175
if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
176
if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
177
/*
178
* invalid since multiply operands are
179
* zero & infinity
180
*/
181
if (Is_invalidtrap_enabled())
182
return(OPC_2E_INVALIDEXCEPTION);
183
Set_invalidflag();
184
Dbl_makequietnan(opnd2p1,opnd2p2);
185
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
186
return(NOEXCEPTION);
187
}
188
189
/*
190
* Check third operand for infinity with a
191
* sign opposite of the multiply result
192
*/
193
if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
194
(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
195
/*
196
* invalid since attempting a magnitude
197
* subtraction of infinities
198
*/
199
if (Is_invalidtrap_enabled())
200
return(OPC_2E_INVALIDEXCEPTION);
201
Set_invalidflag();
202
Dbl_makequietnan(resultp1,resultp2);
203
Dbl_copytoptr(resultp1,resultp2,dstptr);
204
return(NOEXCEPTION);
205
}
206
207
/*
208
* return infinity
209
*/
210
Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
211
Dbl_copytoptr(resultp1,resultp2,dstptr);
212
return(NOEXCEPTION);
213
}
214
}
215
else {
216
/*
217
* is NaN; signaling or quiet?
218
*/
219
if (Dbl_isone_signaling(opnd2p1)) {
220
/* trap if INVALIDTRAP enabled */
221
if (Is_invalidtrap_enabled())
222
return(OPC_2E_INVALIDEXCEPTION);
223
/* make NaN quiet */
224
Set_invalidflag();
225
Dbl_set_quiet(opnd2p1);
226
}
227
/*
228
* is third operand a signaling NaN?
229
*/
230
else if (Dbl_is_signalingnan(opnd3p1)) {
231
/* trap if INVALIDTRAP enabled */
232
if (Is_invalidtrap_enabled())
233
return(OPC_2E_INVALIDEXCEPTION);
234
/* make NaN quiet */
235
Set_invalidflag();
236
Dbl_set_quiet(opnd3p1);
237
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
238
return(NOEXCEPTION);
239
}
240
/*
241
* return quiet NaN
242
*/
243
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
244
return(NOEXCEPTION);
245
}
246
}
247
248
/*
249
* check third operand for NaN's or infinity
250
*/
251
if (Dbl_isinfinity_exponent(opnd3p1)) {
252
if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
253
/* return infinity */
254
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
255
return(NOEXCEPTION);
256
} else {
257
/*
258
* is NaN; signaling or quiet?
259
*/
260
if (Dbl_isone_signaling(opnd3p1)) {
261
/* trap if INVALIDTRAP enabled */
262
if (Is_invalidtrap_enabled())
263
return(OPC_2E_INVALIDEXCEPTION);
264
/* make NaN quiet */
265
Set_invalidflag();
266
Dbl_set_quiet(opnd3p1);
267
}
268
/*
269
* return quiet NaN
270
*/
271
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
272
return(NOEXCEPTION);
273
}
274
}
275
276
/*
277
* Generate multiply mantissa
278
*/
279
if (Dbl_isnotzero_exponent(opnd1p1)) {
280
/* set hidden bit */
281
Dbl_clear_signexponent_set_hidden(opnd1p1);
282
}
283
else {
284
/* check for zero */
285
if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
286
/*
287
* Perform the add opnd3 with zero here.
288
*/
289
if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
290
if (Is_rounding_mode(ROUNDMINUS)) {
291
Dbl_or_signs(opnd3p1,resultp1);
292
} else {
293
Dbl_and_signs(opnd3p1,resultp1);
294
}
295
}
296
/*
297
* Now let's check for trapped underflow case.
298
*/
299
else if (Dbl_iszero_exponent(opnd3p1) &&
300
Is_underflowtrap_enabled()) {
301
/* need to normalize results mantissa */
302
sign_save = Dbl_signextendedsign(opnd3p1);
303
result_exponent = 0;
304
Dbl_leftshiftby1(opnd3p1,opnd3p2);
305
Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
306
Dbl_set_sign(opnd3p1,/*using*/sign_save);
307
Dbl_setwrapped_exponent(opnd3p1,result_exponent,
308
unfl);
309
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
310
/* inexact = FALSE */
311
return(OPC_2E_UNDERFLOWEXCEPTION);
312
}
313
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
314
return(NOEXCEPTION);
315
}
316
/* is denormalized, adjust exponent */
317
Dbl_clear_signexponent(opnd1p1);
318
Dbl_leftshiftby1(opnd1p1,opnd1p2);
319
Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
320
}
321
/* opnd2 needs to have hidden bit set with msb in hidden bit */
322
if (Dbl_isnotzero_exponent(opnd2p1)) {
323
Dbl_clear_signexponent_set_hidden(opnd2p1);
324
}
325
else {
326
/* check for zero */
327
if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
328
/*
329
* Perform the add opnd3 with zero here.
330
*/
331
if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
332
if (Is_rounding_mode(ROUNDMINUS)) {
333
Dbl_or_signs(opnd3p1,resultp1);
334
} else {
335
Dbl_and_signs(opnd3p1,resultp1);
336
}
337
}
338
/*
339
* Now let's check for trapped underflow case.
340
*/
341
else if (Dbl_iszero_exponent(opnd3p1) &&
342
Is_underflowtrap_enabled()) {
343
/* need to normalize results mantissa */
344
sign_save = Dbl_signextendedsign(opnd3p1);
345
result_exponent = 0;
346
Dbl_leftshiftby1(opnd3p1,opnd3p2);
347
Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
348
Dbl_set_sign(opnd3p1,/*using*/sign_save);
349
Dbl_setwrapped_exponent(opnd3p1,result_exponent,
350
unfl);
351
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
352
/* inexact = FALSE */
353
return(OPC_2E_UNDERFLOWEXCEPTION);
354
}
355
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
356
return(NOEXCEPTION);
357
}
358
/* is denormalized; want to normalize */
359
Dbl_clear_signexponent(opnd2p1);
360
Dbl_leftshiftby1(opnd2p1,opnd2p2);
361
Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
362
}
363
364
/* Multiply the first two source mantissas together */
365
366
/*
367
* The intermediate result will be kept in tmpres,
368
* which needs enough room for 106 bits of mantissa,
369
* so lets call it a Double extended.
370
*/
371
Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
372
373
/*
374
* Four bits at a time are inspected in each loop, and a
375
* simple shift and add multiply algorithm is used.
376
*/
377
for (count = DBL_P-1; count >= 0; count -= 4) {
378
Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
379
if (Dbit28p2(opnd1p2)) {
380
/* Fourword_add should be an ADD followed by 3 ADDC's */
381
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
382
opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
383
}
384
if (Dbit29p2(opnd1p2)) {
385
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
386
opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
387
}
388
if (Dbit30p2(opnd1p2)) {
389
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
390
opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
391
}
392
if (Dbit31p2(opnd1p2)) {
393
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
394
opnd2p1, opnd2p2, 0, 0);
395
}
396
Dbl_rightshiftby4(opnd1p1,opnd1p2);
397
}
398
if (Is_dexthiddenoverflow(tmpresp1)) {
399
/* result mantissa >= 2 (mantissa overflow) */
400
mpy_exponent++;
401
Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
402
}
403
404
/*
405
* Restore the sign of the mpy result which was saved in resultp1.
406
* The exponent will continue to be kept in mpy_exponent.
407
*/
408
Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
409
410
/*
411
* No rounding is required, since the result of the multiply
412
* is exact in the extended format.
413
*/
414
415
/*
416
* Now we are ready to perform the add portion of the operation.
417
*
418
* The exponents need to be kept as integers for now, since the
419
* multiply result might not fit into the exponent field. We
420
* can't overflow or underflow because of this yet, since the
421
* add could bring the final result back into range.
422
*/
423
add_exponent = Dbl_exponent(opnd3p1);
424
425
/*
426
* Check for denormalized or zero add operand.
427
*/
428
if (add_exponent == 0) {
429
/* check for zero */
430
if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
431
/* right is zero */
432
/* Left can't be zero and must be result.
433
*
434
* The final result is now in tmpres and mpy_exponent,
435
* and needs to be rounded and squeezed back into
436
* double precision format from double extended.
437
*/
438
result_exponent = mpy_exponent;
439
Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
440
resultp1,resultp2,resultp3,resultp4);
441
sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
442
goto round;
443
}
444
445
/*
446
* Neither are zeroes.
447
* Adjust exponent and normalize add operand.
448
*/
449
sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
450
Dbl_clear_signexponent(opnd3p1);
451
Dbl_leftshiftby1(opnd3p1,opnd3p2);
452
Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
453
Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
454
} else {
455
Dbl_clear_exponent_set_hidden(opnd3p1);
456
}
457
/*
458
* Copy opnd3 to the double extended variable called right.
459
*/
460
Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
461
462
/*
463
* A zero "save" helps discover equal operands (for later),
464
* and is used in swapping operands (if needed).
465
*/
466
Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
467
468
/*
469
* Compare magnitude of operands.
470
*/
471
Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
472
Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
473
if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
474
Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
475
/*
476
* Set the left operand to the larger one by XOR swap.
477
* First finish the first word "save".
478
*/
479
Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
480
Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
481
Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
482
rightp2,rightp3,rightp4);
483
/* also setup exponents used in rest of routine */
484
diff_exponent = add_exponent - mpy_exponent;
485
result_exponent = add_exponent;
486
} else {
487
/* also setup exponents used in rest of routine */
488
diff_exponent = mpy_exponent - add_exponent;
489
result_exponent = mpy_exponent;
490
}
491
/* Invariant: left is not smaller than right. */
492
493
/*
494
* Special case alignment of operands that would force alignment
495
* beyond the extent of the extension. A further optimization
496
* could special case this but only reduces the path length for
497
* this infrequent case.
498
*/
499
if (diff_exponent > DBLEXT_THRESHOLD) {
500
diff_exponent = DBLEXT_THRESHOLD;
501
}
502
503
/* Align right operand by shifting it to the right */
504
Dblext_clear_sign(rightp1);
505
Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
506
/*shifted by*/diff_exponent);
507
508
/* Treat sum and difference of the operands separately. */
509
if ((int)save < 0) {
510
/*
511
* Difference of the two operands. Overflow can occur if the
512
* multiply overflowed. A borrow can occur out of the hidden
513
* bit and force a post normalization phase.
514
*/
515
Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
516
rightp1,rightp2,rightp3,rightp4,
517
resultp1,resultp2,resultp3,resultp4);
518
sign_save = Dbl_signextendedsign(resultp1);
519
if (Dbl_iszero_hidden(resultp1)) {
520
/* Handle normalization */
521
/* A straightforward algorithm would now shift the
522
* result and extension left until the hidden bit
523
* becomes one. Not all of the extension bits need
524
* participate in the shift. Only the two most
525
* significant bits (round and guard) are needed.
526
* If only a single shift is needed then the guard
527
* bit becomes a significant low order bit and the
528
* extension must participate in the rounding.
529
* If more than a single shift is needed, then all
530
* bits to the right of the guard bit are zeros,
531
* and the guard bit may or may not be zero. */
532
Dblext_leftshiftby1(resultp1,resultp2,resultp3,
533
resultp4);
534
535
/* Need to check for a zero result. The sign and
536
* exponent fields have already been zeroed. The more
537
* efficient test of the full object can be used.
538
*/
539
if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
540
/* Must have been "x-x" or "x+(-x)". */
541
if (Is_rounding_mode(ROUNDMINUS))
542
Dbl_setone_sign(resultp1);
543
Dbl_copytoptr(resultp1,resultp2,dstptr);
544
return(NOEXCEPTION);
545
}
546
result_exponent--;
547
548
/* Look to see if normalization is finished. */
549
if (Dbl_isone_hidden(resultp1)) {
550
/* No further normalization is needed */
551
goto round;
552
}
553
554
/* Discover first one bit to determine shift amount.
555
* Use a modified binary search. We have already
556
* shifted the result one position right and still
557
* not found a one so the remainder of the extension
558
* must be zero and simplifies rounding. */
559
/* Scan bytes */
560
while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
561
Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
562
result_exponent -= 8;
563
}
564
/* Now narrow it down to the nibble */
565
if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
566
/* The lower nibble contains the
567
* normalizing one */
568
Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
569
result_exponent -= 4;
570
}
571
/* Select case where first bit is set (already
572
* normalized) otherwise select the proper shift. */
573
jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
574
if (jumpsize <= 7) switch(jumpsize) {
575
case 1:
576
Dblext_leftshiftby3(resultp1,resultp2,resultp3,
577
resultp4);
578
result_exponent -= 3;
579
break;
580
case 2:
581
case 3:
582
Dblext_leftshiftby2(resultp1,resultp2,resultp3,
583
resultp4);
584
result_exponent -= 2;
585
break;
586
case 4:
587
case 5:
588
case 6:
589
case 7:
590
Dblext_leftshiftby1(resultp1,resultp2,resultp3,
591
resultp4);
592
result_exponent -= 1;
593
break;
594
}
595
} /* end if (hidden...)... */
596
/* Fall through and round */
597
} /* end if (save < 0)... */
598
else {
599
/* Add magnitudes */
600
Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
601
rightp1,rightp2,rightp3,rightp4,
602
/*to*/resultp1,resultp2,resultp3,resultp4);
603
sign_save = Dbl_signextendedsign(resultp1);
604
if (Dbl_isone_hiddenoverflow(resultp1)) {
605
/* Prenormalization required. */
606
Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
607
resultp4);
608
result_exponent++;
609
} /* end if hiddenoverflow... */
610
} /* end else ...add magnitudes... */
611
612
/* Round the result. If the extension and lower two words are
613
* all zeros, then the result is exact. Otherwise round in the
614
* correct direction. Underflow is possible. If a postnormalization
615
* is necessary, then the mantissa is all zeros so no shift is needed.
616
*/
617
round:
618
if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
619
Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
620
result_exponent,is_tiny);
621
}
622
Dbl_set_sign(resultp1,/*using*/sign_save);
623
if (Dblext_isnotzero_mantissap3(resultp3) ||
624
Dblext_isnotzero_mantissap4(resultp4)) {
625
inexact = TRUE;
626
switch(Rounding_mode()) {
627
case ROUNDNEAREST: /* The default. */
628
if (Dblext_isone_highp3(resultp3)) {
629
/* at least 1/2 ulp */
630
if (Dblext_isnotzero_low31p3(resultp3) ||
631
Dblext_isnotzero_mantissap4(resultp4) ||
632
Dblext_isone_lowp2(resultp2)) {
633
/* either exactly half way and odd or
634
* more than 1/2ulp */
635
Dbl_increment(resultp1,resultp2);
636
}
637
}
638
break;
639
640
case ROUNDPLUS:
641
if (Dbl_iszero_sign(resultp1)) {
642
/* Round up positive results */
643
Dbl_increment(resultp1,resultp2);
644
}
645
break;
646
647
case ROUNDMINUS:
648
if (Dbl_isone_sign(resultp1)) {
649
/* Round down negative results */
650
Dbl_increment(resultp1,resultp2);
651
}
652
653
case ROUNDZERO:;
654
/* truncate is simple */
655
} /* end switch... */
656
if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
657
}
658
if (result_exponent >= DBL_INFINITY_EXPONENT) {
659
/* trap if OVERFLOWTRAP enabled */
660
if (Is_overflowtrap_enabled()) {
661
/*
662
* Adjust bias of result
663
*/
664
Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
665
Dbl_copytoptr(resultp1,resultp2,dstptr);
666
if (inexact)
667
if (Is_inexacttrap_enabled())
668
return (OPC_2E_OVERFLOWEXCEPTION |
669
OPC_2E_INEXACTEXCEPTION);
670
else Set_inexactflag();
671
return (OPC_2E_OVERFLOWEXCEPTION);
672
}
673
inexact = TRUE;
674
Set_overflowflag();
675
/* set result to infinity or largest number */
676
Dbl_setoverflow(resultp1,resultp2);
677
678
} else if (result_exponent <= 0) { /* underflow case */
679
if (Is_underflowtrap_enabled()) {
680
/*
681
* Adjust bias of result
682
*/
683
Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
684
Dbl_copytoptr(resultp1,resultp2,dstptr);
685
if (inexact)
686
if (Is_inexacttrap_enabled())
687
return (OPC_2E_UNDERFLOWEXCEPTION |
688
OPC_2E_INEXACTEXCEPTION);
689
else Set_inexactflag();
690
return(OPC_2E_UNDERFLOWEXCEPTION);
691
}
692
else if (inexact && is_tiny) Set_underflowflag();
693
}
694
else Dbl_set_exponent(resultp1,result_exponent);
695
Dbl_copytoptr(resultp1,resultp2,dstptr);
696
if (inexact)
697
if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
698
else Set_inexactflag();
699
return(NOEXCEPTION);
700
}
701
702
/*
703
* Double Floating-point Multiply Negate Fused Add
704
*/
705
706
dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
707
708
dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
709
unsigned int *status;
710
{
711
unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
712
register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
713
unsigned int rightp1, rightp2, rightp3, rightp4;
714
unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
715
register int mpy_exponent, add_exponent, count;
716
boolean inexact = FALSE, is_tiny = FALSE;
717
718
unsigned int signlessleft1, signlessright1, save;
719
register int result_exponent, diff_exponent;
720
int sign_save, jumpsize;
721
722
Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
723
Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
724
Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
725
726
/*
727
* set sign bit of result of multiply
728
*/
729
if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
730
Dbl_setzerop1(resultp1);
731
else
732
Dbl_setnegativezerop1(resultp1);
733
734
/*
735
* Generate multiply exponent
736
*/
737
mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
738
739
/*
740
* check first operand for NaN's or infinity
741
*/
742
if (Dbl_isinfinity_exponent(opnd1p1)) {
743
if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
744
if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
745
Dbl_isnotnan(opnd3p1,opnd3p2)) {
746
if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
747
/*
748
* invalid since operands are infinity
749
* and zero
750
*/
751
if (Is_invalidtrap_enabled())
752
return(OPC_2E_INVALIDEXCEPTION);
753
Set_invalidflag();
754
Dbl_makequietnan(resultp1,resultp2);
755
Dbl_copytoptr(resultp1,resultp2,dstptr);
756
return(NOEXCEPTION);
757
}
758
/*
759
* Check third operand for infinity with a
760
* sign opposite of the multiply result
761
*/
762
if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
763
(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
764
/*
765
* invalid since attempting a magnitude
766
* subtraction of infinities
767
*/
768
if (Is_invalidtrap_enabled())
769
return(OPC_2E_INVALIDEXCEPTION);
770
Set_invalidflag();
771
Dbl_makequietnan(resultp1,resultp2);
772
Dbl_copytoptr(resultp1,resultp2,dstptr);
773
return(NOEXCEPTION);
774
}
775
776
/*
777
* return infinity
778
*/
779
Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
780
Dbl_copytoptr(resultp1,resultp2,dstptr);
781
return(NOEXCEPTION);
782
}
783
}
784
else {
785
/*
786
* is NaN; signaling or quiet?
787
*/
788
if (Dbl_isone_signaling(opnd1p1)) {
789
/* trap if INVALIDTRAP enabled */
790
if (Is_invalidtrap_enabled())
791
return(OPC_2E_INVALIDEXCEPTION);
792
/* make NaN quiet */
793
Set_invalidflag();
794
Dbl_set_quiet(opnd1p1);
795
}
796
/*
797
* is second operand a signaling NaN?
798
*/
799
else if (Dbl_is_signalingnan(opnd2p1)) {
800
/* trap if INVALIDTRAP enabled */
801
if (Is_invalidtrap_enabled())
802
return(OPC_2E_INVALIDEXCEPTION);
803
/* make NaN quiet */
804
Set_invalidflag();
805
Dbl_set_quiet(opnd2p1);
806
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
807
return(NOEXCEPTION);
808
}
809
/*
810
* is third operand a signaling NaN?
811
*/
812
else if (Dbl_is_signalingnan(opnd3p1)) {
813
/* trap if INVALIDTRAP enabled */
814
if (Is_invalidtrap_enabled())
815
return(OPC_2E_INVALIDEXCEPTION);
816
/* make NaN quiet */
817
Set_invalidflag();
818
Dbl_set_quiet(opnd3p1);
819
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
820
return(NOEXCEPTION);
821
}
822
/*
823
* return quiet NaN
824
*/
825
Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
826
return(NOEXCEPTION);
827
}
828
}
829
830
/*
831
* check second operand for NaN's or infinity
832
*/
833
if (Dbl_isinfinity_exponent(opnd2p1)) {
834
if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
835
if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
836
if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
837
/*
838
* invalid since multiply operands are
839
* zero & infinity
840
*/
841
if (Is_invalidtrap_enabled())
842
return(OPC_2E_INVALIDEXCEPTION);
843
Set_invalidflag();
844
Dbl_makequietnan(opnd2p1,opnd2p2);
845
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
846
return(NOEXCEPTION);
847
}
848
849
/*
850
* Check third operand for infinity with a
851
* sign opposite of the multiply result
852
*/
853
if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
854
(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
855
/*
856
* invalid since attempting a magnitude
857
* subtraction of infinities
858
*/
859
if (Is_invalidtrap_enabled())
860
return(OPC_2E_INVALIDEXCEPTION);
861
Set_invalidflag();
862
Dbl_makequietnan(resultp1,resultp2);
863
Dbl_copytoptr(resultp1,resultp2,dstptr);
864
return(NOEXCEPTION);
865
}
866
867
/*
868
* return infinity
869
*/
870
Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
871
Dbl_copytoptr(resultp1,resultp2,dstptr);
872
return(NOEXCEPTION);
873
}
874
}
875
else {
876
/*
877
* is NaN; signaling or quiet?
878
*/
879
if (Dbl_isone_signaling(opnd2p1)) {
880
/* trap if INVALIDTRAP enabled */
881
if (Is_invalidtrap_enabled())
882
return(OPC_2E_INVALIDEXCEPTION);
883
/* make NaN quiet */
884
Set_invalidflag();
885
Dbl_set_quiet(opnd2p1);
886
}
887
/*
888
* is third operand a signaling NaN?
889
*/
890
else if (Dbl_is_signalingnan(opnd3p1)) {
891
/* trap if INVALIDTRAP enabled */
892
if (Is_invalidtrap_enabled())
893
return(OPC_2E_INVALIDEXCEPTION);
894
/* make NaN quiet */
895
Set_invalidflag();
896
Dbl_set_quiet(opnd3p1);
897
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
898
return(NOEXCEPTION);
899
}
900
/*
901
* return quiet NaN
902
*/
903
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
904
return(NOEXCEPTION);
905
}
906
}
907
908
/*
909
* check third operand for NaN's or infinity
910
*/
911
if (Dbl_isinfinity_exponent(opnd3p1)) {
912
if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
913
/* return infinity */
914
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
915
return(NOEXCEPTION);
916
} else {
917
/*
918
* is NaN; signaling or quiet?
919
*/
920
if (Dbl_isone_signaling(opnd3p1)) {
921
/* trap if INVALIDTRAP enabled */
922
if (Is_invalidtrap_enabled())
923
return(OPC_2E_INVALIDEXCEPTION);
924
/* make NaN quiet */
925
Set_invalidflag();
926
Dbl_set_quiet(opnd3p1);
927
}
928
/*
929
* return quiet NaN
930
*/
931
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
932
return(NOEXCEPTION);
933
}
934
}
935
936
/*
937
* Generate multiply mantissa
938
*/
939
if (Dbl_isnotzero_exponent(opnd1p1)) {
940
/* set hidden bit */
941
Dbl_clear_signexponent_set_hidden(opnd1p1);
942
}
943
else {
944
/* check for zero */
945
if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
946
/*
947
* Perform the add opnd3 with zero here.
948
*/
949
if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
950
if (Is_rounding_mode(ROUNDMINUS)) {
951
Dbl_or_signs(opnd3p1,resultp1);
952
} else {
953
Dbl_and_signs(opnd3p1,resultp1);
954
}
955
}
956
/*
957
* Now let's check for trapped underflow case.
958
*/
959
else if (Dbl_iszero_exponent(opnd3p1) &&
960
Is_underflowtrap_enabled()) {
961
/* need to normalize results mantissa */
962
sign_save = Dbl_signextendedsign(opnd3p1);
963
result_exponent = 0;
964
Dbl_leftshiftby1(opnd3p1,opnd3p2);
965
Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
966
Dbl_set_sign(opnd3p1,/*using*/sign_save);
967
Dbl_setwrapped_exponent(opnd3p1,result_exponent,
968
unfl);
969
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
970
/* inexact = FALSE */
971
return(OPC_2E_UNDERFLOWEXCEPTION);
972
}
973
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
974
return(NOEXCEPTION);
975
}
976
/* is denormalized, adjust exponent */
977
Dbl_clear_signexponent(opnd1p1);
978
Dbl_leftshiftby1(opnd1p1,opnd1p2);
979
Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
980
}
981
/* opnd2 needs to have hidden bit set with msb in hidden bit */
982
if (Dbl_isnotzero_exponent(opnd2p1)) {
983
Dbl_clear_signexponent_set_hidden(opnd2p1);
984
}
985
else {
986
/* check for zero */
987
if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
988
/*
989
* Perform the add opnd3 with zero here.
990
*/
991
if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
992
if (Is_rounding_mode(ROUNDMINUS)) {
993
Dbl_or_signs(opnd3p1,resultp1);
994
} else {
995
Dbl_and_signs(opnd3p1,resultp1);
996
}
997
}
998
/*
999
* Now let's check for trapped underflow case.
1000
*/
1001
else if (Dbl_iszero_exponent(opnd3p1) &&
1002
Is_underflowtrap_enabled()) {
1003
/* need to normalize results mantissa */
1004
sign_save = Dbl_signextendedsign(opnd3p1);
1005
result_exponent = 0;
1006
Dbl_leftshiftby1(opnd3p1,opnd3p2);
1007
Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1008
Dbl_set_sign(opnd3p1,/*using*/sign_save);
1009
Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1010
unfl);
1011
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1012
/* inexact = FALSE */
1013
return(OPC_2E_UNDERFLOWEXCEPTION);
1014
}
1015
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1016
return(NOEXCEPTION);
1017
}
1018
/* is denormalized; want to normalize */
1019
Dbl_clear_signexponent(opnd2p1);
1020
Dbl_leftshiftby1(opnd2p1,opnd2p2);
1021
Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1022
}
1023
1024
/* Multiply the first two source mantissas together */
1025
1026
/*
1027
* The intermediate result will be kept in tmpres,
1028
* which needs enough room for 106 bits of mantissa,
1029
* so lets call it a Double extended.
1030
*/
1031
Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1032
1033
/*
1034
* Four bits at a time are inspected in each loop, and a
1035
* simple shift and add multiply algorithm is used.
1036
*/
1037
for (count = DBL_P-1; count >= 0; count -= 4) {
1038
Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1039
if (Dbit28p2(opnd1p2)) {
1040
/* Fourword_add should be an ADD followed by 3 ADDC's */
1041
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1042
opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1043
}
1044
if (Dbit29p2(opnd1p2)) {
1045
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1046
opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1047
}
1048
if (Dbit30p2(opnd1p2)) {
1049
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1050
opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1051
}
1052
if (Dbit31p2(opnd1p2)) {
1053
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1054
opnd2p1, opnd2p2, 0, 0);
1055
}
1056
Dbl_rightshiftby4(opnd1p1,opnd1p2);
1057
}
1058
if (Is_dexthiddenoverflow(tmpresp1)) {
1059
/* result mantissa >= 2 (mantissa overflow) */
1060
mpy_exponent++;
1061
Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1062
}
1063
1064
/*
1065
* Restore the sign of the mpy result which was saved in resultp1.
1066
* The exponent will continue to be kept in mpy_exponent.
1067
*/
1068
Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1069
1070
/*
1071
* No rounding is required, since the result of the multiply
1072
* is exact in the extended format.
1073
*/
1074
1075
/*
1076
* Now we are ready to perform the add portion of the operation.
1077
*
1078
* The exponents need to be kept as integers for now, since the
1079
* multiply result might not fit into the exponent field. We
1080
* can't overflow or underflow because of this yet, since the
1081
* add could bring the final result back into range.
1082
*/
1083
add_exponent = Dbl_exponent(opnd3p1);
1084
1085
/*
1086
* Check for denormalized or zero add operand.
1087
*/
1088
if (add_exponent == 0) {
1089
/* check for zero */
1090
if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1091
/* right is zero */
1092
/* Left can't be zero and must be result.
1093
*
1094
* The final result is now in tmpres and mpy_exponent,
1095
* and needs to be rounded and squeezed back into
1096
* double precision format from double extended.
1097
*/
1098
result_exponent = mpy_exponent;
1099
Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1100
resultp1,resultp2,resultp3,resultp4);
1101
sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1102
goto round;
1103
}
1104
1105
/*
1106
* Neither are zeroes.
1107
* Adjust exponent and normalize add operand.
1108
*/
1109
sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
1110
Dbl_clear_signexponent(opnd3p1);
1111
Dbl_leftshiftby1(opnd3p1,opnd3p2);
1112
Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1113
Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
1114
} else {
1115
Dbl_clear_exponent_set_hidden(opnd3p1);
1116
}
1117
/*
1118
* Copy opnd3 to the double extended variable called right.
1119
*/
1120
Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1121
1122
/*
1123
* A zero "save" helps discover equal operands (for later),
1124
* and is used in swapping operands (if needed).
1125
*/
1126
Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1127
1128
/*
1129
* Compare magnitude of operands.
1130
*/
1131
Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1132
Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1133
if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1134
Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1135
/*
1136
* Set the left operand to the larger one by XOR swap.
1137
* First finish the first word "save".
1138
*/
1139
Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1140
Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1141
Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1142
rightp2,rightp3,rightp4);
1143
/* also setup exponents used in rest of routine */
1144
diff_exponent = add_exponent - mpy_exponent;
1145
result_exponent = add_exponent;
1146
} else {
1147
/* also setup exponents used in rest of routine */
1148
diff_exponent = mpy_exponent - add_exponent;
1149
result_exponent = mpy_exponent;
1150
}
1151
/* Invariant: left is not smaller than right. */
1152
1153
/*
1154
* Special case alignment of operands that would force alignment
1155
* beyond the extent of the extension. A further optimization
1156
* could special case this but only reduces the path length for
1157
* this infrequent case.
1158
*/
1159
if (diff_exponent > DBLEXT_THRESHOLD) {
1160
diff_exponent = DBLEXT_THRESHOLD;
1161
}
1162
1163
/* Align right operand by shifting it to the right */
1164
Dblext_clear_sign(rightp1);
1165
Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1166
/*shifted by*/diff_exponent);
1167
1168
/* Treat sum and difference of the operands separately. */
1169
if ((int)save < 0) {
1170
/*
1171
* Difference of the two operands. Overflow can occur if the
1172
* multiply overflowed. A borrow can occur out of the hidden
1173
* bit and force a post normalization phase.
1174
*/
1175
Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1176
rightp1,rightp2,rightp3,rightp4,
1177
resultp1,resultp2,resultp3,resultp4);
1178
sign_save = Dbl_signextendedsign(resultp1);
1179
if (Dbl_iszero_hidden(resultp1)) {
1180
/* Handle normalization */
1181
/* A straightforward algorithm would now shift the
1182
* result and extension left until the hidden bit
1183
* becomes one. Not all of the extension bits need
1184
* participate in the shift. Only the two most
1185
* significant bits (round and guard) are needed.
1186
* If only a single shift is needed then the guard
1187
* bit becomes a significant low order bit and the
1188
* extension must participate in the rounding.
1189
* If more than a single shift is needed, then all
1190
* bits to the right of the guard bit are zeros,
1191
* and the guard bit may or may not be zero. */
1192
Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1193
resultp4);
1194
1195
/* Need to check for a zero result. The sign and
1196
* exponent fields have already been zeroed. The more
1197
* efficient test of the full object can be used.
1198
*/
1199
if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1200
/* Must have been "x-x" or "x+(-x)". */
1201
if (Is_rounding_mode(ROUNDMINUS))
1202
Dbl_setone_sign(resultp1);
1203
Dbl_copytoptr(resultp1,resultp2,dstptr);
1204
return(NOEXCEPTION);
1205
}
1206
result_exponent--;
1207
1208
/* Look to see if normalization is finished. */
1209
if (Dbl_isone_hidden(resultp1)) {
1210
/* No further normalization is needed */
1211
goto round;
1212
}
1213
1214
/* Discover first one bit to determine shift amount.
1215
* Use a modified binary search. We have already
1216
* shifted the result one position right and still
1217
* not found a one so the remainder of the extension
1218
* must be zero and simplifies rounding. */
1219
/* Scan bytes */
1220
while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1221
Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1222
result_exponent -= 8;
1223
}
1224
/* Now narrow it down to the nibble */
1225
if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1226
/* The lower nibble contains the
1227
* normalizing one */
1228
Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1229
result_exponent -= 4;
1230
}
1231
/* Select case where first bit is set (already
1232
* normalized) otherwise select the proper shift. */
1233
jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1234
if (jumpsize <= 7) switch(jumpsize) {
1235
case 1:
1236
Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1237
resultp4);
1238
result_exponent -= 3;
1239
break;
1240
case 2:
1241
case 3:
1242
Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1243
resultp4);
1244
result_exponent -= 2;
1245
break;
1246
case 4:
1247
case 5:
1248
case 6:
1249
case 7:
1250
Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1251
resultp4);
1252
result_exponent -= 1;
1253
break;
1254
}
1255
} /* end if (hidden...)... */
1256
/* Fall through and round */
1257
} /* end if (save < 0)... */
1258
else {
1259
/* Add magnitudes */
1260
Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1261
rightp1,rightp2,rightp3,rightp4,
1262
/*to*/resultp1,resultp2,resultp3,resultp4);
1263
sign_save = Dbl_signextendedsign(resultp1);
1264
if (Dbl_isone_hiddenoverflow(resultp1)) {
1265
/* Prenormalization required. */
1266
Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1267
resultp4);
1268
result_exponent++;
1269
} /* end if hiddenoverflow... */
1270
} /* end else ...add magnitudes... */
1271
1272
/* Round the result. If the extension and lower two words are
1273
* all zeros, then the result is exact. Otherwise round in the
1274
* correct direction. Underflow is possible. If a postnormalization
1275
* is necessary, then the mantissa is all zeros so no shift is needed.
1276
*/
1277
round:
1278
if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1279
Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1280
result_exponent,is_tiny);
1281
}
1282
Dbl_set_sign(resultp1,/*using*/sign_save);
1283
if (Dblext_isnotzero_mantissap3(resultp3) ||
1284
Dblext_isnotzero_mantissap4(resultp4)) {
1285
inexact = TRUE;
1286
switch(Rounding_mode()) {
1287
case ROUNDNEAREST: /* The default. */
1288
if (Dblext_isone_highp3(resultp3)) {
1289
/* at least 1/2 ulp */
1290
if (Dblext_isnotzero_low31p3(resultp3) ||
1291
Dblext_isnotzero_mantissap4(resultp4) ||
1292
Dblext_isone_lowp2(resultp2)) {
1293
/* either exactly half way and odd or
1294
* more than 1/2ulp */
1295
Dbl_increment(resultp1,resultp2);
1296
}
1297
}
1298
break;
1299
1300
case ROUNDPLUS:
1301
if (Dbl_iszero_sign(resultp1)) {
1302
/* Round up positive results */
1303
Dbl_increment(resultp1,resultp2);
1304
}
1305
break;
1306
1307
case ROUNDMINUS:
1308
if (Dbl_isone_sign(resultp1)) {
1309
/* Round down negative results */
1310
Dbl_increment(resultp1,resultp2);
1311
}
1312
1313
case ROUNDZERO:;
1314
/* truncate is simple */
1315
} /* end switch... */
1316
if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1317
}
1318
if (result_exponent >= DBL_INFINITY_EXPONENT) {
1319
/* Overflow */
1320
if (Is_overflowtrap_enabled()) {
1321
/*
1322
* Adjust bias of result
1323
*/
1324
Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1325
Dbl_copytoptr(resultp1,resultp2,dstptr);
1326
if (inexact)
1327
if (Is_inexacttrap_enabled())
1328
return (OPC_2E_OVERFLOWEXCEPTION |
1329
OPC_2E_INEXACTEXCEPTION);
1330
else Set_inexactflag();
1331
return (OPC_2E_OVERFLOWEXCEPTION);
1332
}
1333
inexact = TRUE;
1334
Set_overflowflag();
1335
Dbl_setoverflow(resultp1,resultp2);
1336
} else if (result_exponent <= 0) { /* underflow case */
1337
if (Is_underflowtrap_enabled()) {
1338
/*
1339
* Adjust bias of result
1340
*/
1341
Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1342
Dbl_copytoptr(resultp1,resultp2,dstptr);
1343
if (inexact)
1344
if (Is_inexacttrap_enabled())
1345
return (OPC_2E_UNDERFLOWEXCEPTION |
1346
OPC_2E_INEXACTEXCEPTION);
1347
else Set_inexactflag();
1348
return(OPC_2E_UNDERFLOWEXCEPTION);
1349
}
1350
else if (inexact && is_tiny) Set_underflowflag();
1351
}
1352
else Dbl_set_exponent(resultp1,result_exponent);
1353
Dbl_copytoptr(resultp1,resultp2,dstptr);
1354
if (inexact)
1355
if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1356
else Set_inexactflag();
1357
return(NOEXCEPTION);
1358
}
1359
1360
/*
1361
* Single Floating-point Multiply Fused Add
1362
*/
1363
1364
sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1365
1366
sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1367
unsigned int *status;
1368
{
1369
unsigned int opnd1, opnd2, opnd3;
1370
register unsigned int tmpresp1, tmpresp2;
1371
unsigned int rightp1, rightp2;
1372
unsigned int resultp1, resultp2 = 0;
1373
register int mpy_exponent, add_exponent, count;
1374
boolean inexact = FALSE, is_tiny = FALSE;
1375
1376
unsigned int signlessleft1, signlessright1, save;
1377
register int result_exponent, diff_exponent;
1378
int sign_save, jumpsize;
1379
1380
Sgl_copyfromptr(src1ptr,opnd1);
1381
Sgl_copyfromptr(src2ptr,opnd2);
1382
Sgl_copyfromptr(src3ptr,opnd3);
1383
1384
/*
1385
* set sign bit of result of multiply
1386
*/
1387
if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1388
Sgl_setnegativezero(resultp1);
1389
else Sgl_setzero(resultp1);
1390
1391
/*
1392
* Generate multiply exponent
1393
*/
1394
mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1395
1396
/*
1397
* check first operand for NaN's or infinity
1398
*/
1399
if (Sgl_isinfinity_exponent(opnd1)) {
1400
if (Sgl_iszero_mantissa(opnd1)) {
1401
if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1402
if (Sgl_iszero_exponentmantissa(opnd2)) {
1403
/*
1404
* invalid since operands are infinity
1405
* and zero
1406
*/
1407
if (Is_invalidtrap_enabled())
1408
return(OPC_2E_INVALIDEXCEPTION);
1409
Set_invalidflag();
1410
Sgl_makequietnan(resultp1);
1411
Sgl_copytoptr(resultp1,dstptr);
1412
return(NOEXCEPTION);
1413
}
1414
/*
1415
* Check third operand for infinity with a
1416
* sign opposite of the multiply result
1417
*/
1418
if (Sgl_isinfinity(opnd3) &&
1419
(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1420
/*
1421
* invalid since attempting a magnitude
1422
* subtraction of infinities
1423
*/
1424
if (Is_invalidtrap_enabled())
1425
return(OPC_2E_INVALIDEXCEPTION);
1426
Set_invalidflag();
1427
Sgl_makequietnan(resultp1);
1428
Sgl_copytoptr(resultp1,dstptr);
1429
return(NOEXCEPTION);
1430
}
1431
1432
/*
1433
* return infinity
1434
*/
1435
Sgl_setinfinity_exponentmantissa(resultp1);
1436
Sgl_copytoptr(resultp1,dstptr);
1437
return(NOEXCEPTION);
1438
}
1439
}
1440
else {
1441
/*
1442
* is NaN; signaling or quiet?
1443
*/
1444
if (Sgl_isone_signaling(opnd1)) {
1445
/* trap if INVALIDTRAP enabled */
1446
if (Is_invalidtrap_enabled())
1447
return(OPC_2E_INVALIDEXCEPTION);
1448
/* make NaN quiet */
1449
Set_invalidflag();
1450
Sgl_set_quiet(opnd1);
1451
}
1452
/*
1453
* is second operand a signaling NaN?
1454
*/
1455
else if (Sgl_is_signalingnan(opnd2)) {
1456
/* trap if INVALIDTRAP enabled */
1457
if (Is_invalidtrap_enabled())
1458
return(OPC_2E_INVALIDEXCEPTION);
1459
/* make NaN quiet */
1460
Set_invalidflag();
1461
Sgl_set_quiet(opnd2);
1462
Sgl_copytoptr(opnd2,dstptr);
1463
return(NOEXCEPTION);
1464
}
1465
/*
1466
* is third operand a signaling NaN?
1467
*/
1468
else if (Sgl_is_signalingnan(opnd3)) {
1469
/* trap if INVALIDTRAP enabled */
1470
if (Is_invalidtrap_enabled())
1471
return(OPC_2E_INVALIDEXCEPTION);
1472
/* make NaN quiet */
1473
Set_invalidflag();
1474
Sgl_set_quiet(opnd3);
1475
Sgl_copytoptr(opnd3,dstptr);
1476
return(NOEXCEPTION);
1477
}
1478
/*
1479
* return quiet NaN
1480
*/
1481
Sgl_copytoptr(opnd1,dstptr);
1482
return(NOEXCEPTION);
1483
}
1484
}
1485
1486
/*
1487
* check second operand for NaN's or infinity
1488
*/
1489
if (Sgl_isinfinity_exponent(opnd2)) {
1490
if (Sgl_iszero_mantissa(opnd2)) {
1491
if (Sgl_isnotnan(opnd3)) {
1492
if (Sgl_iszero_exponentmantissa(opnd1)) {
1493
/*
1494
* invalid since multiply operands are
1495
* zero & infinity
1496
*/
1497
if (Is_invalidtrap_enabled())
1498
return(OPC_2E_INVALIDEXCEPTION);
1499
Set_invalidflag();
1500
Sgl_makequietnan(opnd2);
1501
Sgl_copytoptr(opnd2,dstptr);
1502
return(NOEXCEPTION);
1503
}
1504
1505
/*
1506
* Check third operand for infinity with a
1507
* sign opposite of the multiply result
1508
*/
1509
if (Sgl_isinfinity(opnd3) &&
1510
(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1511
/*
1512
* invalid since attempting a magnitude
1513
* subtraction of infinities
1514
*/
1515
if (Is_invalidtrap_enabled())
1516
return(OPC_2E_INVALIDEXCEPTION);
1517
Set_invalidflag();
1518
Sgl_makequietnan(resultp1);
1519
Sgl_copytoptr(resultp1,dstptr);
1520
return(NOEXCEPTION);
1521
}
1522
1523
/*
1524
* return infinity
1525
*/
1526
Sgl_setinfinity_exponentmantissa(resultp1);
1527
Sgl_copytoptr(resultp1,dstptr);
1528
return(NOEXCEPTION);
1529
}
1530
}
1531
else {
1532
/*
1533
* is NaN; signaling or quiet?
1534
*/
1535
if (Sgl_isone_signaling(opnd2)) {
1536
/* trap if INVALIDTRAP enabled */
1537
if (Is_invalidtrap_enabled())
1538
return(OPC_2E_INVALIDEXCEPTION);
1539
/* make NaN quiet */
1540
Set_invalidflag();
1541
Sgl_set_quiet(opnd2);
1542
}
1543
/*
1544
* is third operand a signaling NaN?
1545
*/
1546
else if (Sgl_is_signalingnan(opnd3)) {
1547
/* trap if INVALIDTRAP enabled */
1548
if (Is_invalidtrap_enabled())
1549
return(OPC_2E_INVALIDEXCEPTION);
1550
/* make NaN quiet */
1551
Set_invalidflag();
1552
Sgl_set_quiet(opnd3);
1553
Sgl_copytoptr(opnd3,dstptr);
1554
return(NOEXCEPTION);
1555
}
1556
/*
1557
* return quiet NaN
1558
*/
1559
Sgl_copytoptr(opnd2,dstptr);
1560
return(NOEXCEPTION);
1561
}
1562
}
1563
1564
/*
1565
* check third operand for NaN's or infinity
1566
*/
1567
if (Sgl_isinfinity_exponent(opnd3)) {
1568
if (Sgl_iszero_mantissa(opnd3)) {
1569
/* return infinity */
1570
Sgl_copytoptr(opnd3,dstptr);
1571
return(NOEXCEPTION);
1572
} else {
1573
/*
1574
* is NaN; signaling or quiet?
1575
*/
1576
if (Sgl_isone_signaling(opnd3)) {
1577
/* trap if INVALIDTRAP enabled */
1578
if (Is_invalidtrap_enabled())
1579
return(OPC_2E_INVALIDEXCEPTION);
1580
/* make NaN quiet */
1581
Set_invalidflag();
1582
Sgl_set_quiet(opnd3);
1583
}
1584
/*
1585
* return quiet NaN
1586
*/
1587
Sgl_copytoptr(opnd3,dstptr);
1588
return(NOEXCEPTION);
1589
}
1590
}
1591
1592
/*
1593
* Generate multiply mantissa
1594
*/
1595
if (Sgl_isnotzero_exponent(opnd1)) {
1596
/* set hidden bit */
1597
Sgl_clear_signexponent_set_hidden(opnd1);
1598
}
1599
else {
1600
/* check for zero */
1601
if (Sgl_iszero_mantissa(opnd1)) {
1602
/*
1603
* Perform the add opnd3 with zero here.
1604
*/
1605
if (Sgl_iszero_exponentmantissa(opnd3)) {
1606
if (Is_rounding_mode(ROUNDMINUS)) {
1607
Sgl_or_signs(opnd3,resultp1);
1608
} else {
1609
Sgl_and_signs(opnd3,resultp1);
1610
}
1611
}
1612
/*
1613
* Now let's check for trapped underflow case.
1614
*/
1615
else if (Sgl_iszero_exponent(opnd3) &&
1616
Is_underflowtrap_enabled()) {
1617
/* need to normalize results mantissa */
1618
sign_save = Sgl_signextendedsign(opnd3);
1619
result_exponent = 0;
1620
Sgl_leftshiftby1(opnd3);
1621
Sgl_normalize(opnd3,result_exponent);
1622
Sgl_set_sign(opnd3,/*using*/sign_save);
1623
Sgl_setwrapped_exponent(opnd3,result_exponent,
1624
unfl);
1625
Sgl_copytoptr(opnd3,dstptr);
1626
/* inexact = FALSE */
1627
return(OPC_2E_UNDERFLOWEXCEPTION);
1628
}
1629
Sgl_copytoptr(opnd3,dstptr);
1630
return(NOEXCEPTION);
1631
}
1632
/* is denormalized, adjust exponent */
1633
Sgl_clear_signexponent(opnd1);
1634
Sgl_leftshiftby1(opnd1);
1635
Sgl_normalize(opnd1,mpy_exponent);
1636
}
1637
/* opnd2 needs to have hidden bit set with msb in hidden bit */
1638
if (Sgl_isnotzero_exponent(opnd2)) {
1639
Sgl_clear_signexponent_set_hidden(opnd2);
1640
}
1641
else {
1642
/* check for zero */
1643
if (Sgl_iszero_mantissa(opnd2)) {
1644
/*
1645
* Perform the add opnd3 with zero here.
1646
*/
1647
if (Sgl_iszero_exponentmantissa(opnd3)) {
1648
if (Is_rounding_mode(ROUNDMINUS)) {
1649
Sgl_or_signs(opnd3,resultp1);
1650
} else {
1651
Sgl_and_signs(opnd3,resultp1);
1652
}
1653
}
1654
/*
1655
* Now let's check for trapped underflow case.
1656
*/
1657
else if (Sgl_iszero_exponent(opnd3) &&
1658
Is_underflowtrap_enabled()) {
1659
/* need to normalize results mantissa */
1660
sign_save = Sgl_signextendedsign(opnd3);
1661
result_exponent = 0;
1662
Sgl_leftshiftby1(opnd3);
1663
Sgl_normalize(opnd3,result_exponent);
1664
Sgl_set_sign(opnd3,/*using*/sign_save);
1665
Sgl_setwrapped_exponent(opnd3,result_exponent,
1666
unfl);
1667
Sgl_copytoptr(opnd3,dstptr);
1668
/* inexact = FALSE */
1669
return(OPC_2E_UNDERFLOWEXCEPTION);
1670
}
1671
Sgl_copytoptr(opnd3,dstptr);
1672
return(NOEXCEPTION);
1673
}
1674
/* is denormalized; want to normalize */
1675
Sgl_clear_signexponent(opnd2);
1676
Sgl_leftshiftby1(opnd2);
1677
Sgl_normalize(opnd2,mpy_exponent);
1678
}
1679
1680
/* Multiply the first two source mantissas together */
1681
1682
/*
1683
* The intermediate result will be kept in tmpres,
1684
* which needs enough room for 106 bits of mantissa,
1685
* so lets call it a Double extended.
1686
*/
1687
Sglext_setzero(tmpresp1,tmpresp2);
1688
1689
/*
1690
* Four bits at a time are inspected in each loop, and a
1691
* simple shift and add multiply algorithm is used.
1692
*/
1693
for (count = SGL_P-1; count >= 0; count -= 4) {
1694
Sglext_rightshiftby4(tmpresp1,tmpresp2);
1695
if (Sbit28(opnd1)) {
1696
/* Twoword_add should be an ADD followed by 2 ADDC's */
1697
Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1698
}
1699
if (Sbit29(opnd1)) {
1700
Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1701
}
1702
if (Sbit30(opnd1)) {
1703
Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1704
}
1705
if (Sbit31(opnd1)) {
1706
Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1707
}
1708
Sgl_rightshiftby4(opnd1);
1709
}
1710
if (Is_sexthiddenoverflow(tmpresp1)) {
1711
/* result mantissa >= 2 (mantissa overflow) */
1712
mpy_exponent++;
1713
Sglext_rightshiftby4(tmpresp1,tmpresp2);
1714
} else {
1715
Sglext_rightshiftby3(tmpresp1,tmpresp2);
1716
}
1717
1718
/*
1719
* Restore the sign of the mpy result which was saved in resultp1.
1720
* The exponent will continue to be kept in mpy_exponent.
1721
*/
1722
Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1723
1724
/*
1725
* No rounding is required, since the result of the multiply
1726
* is exact in the extended format.
1727
*/
1728
1729
/*
1730
* Now we are ready to perform the add portion of the operation.
1731
*
1732
* The exponents need to be kept as integers for now, since the
1733
* multiply result might not fit into the exponent field. We
1734
* can't overflow or underflow because of this yet, since the
1735
* add could bring the final result back into range.
1736
*/
1737
add_exponent = Sgl_exponent(opnd3);
1738
1739
/*
1740
* Check for denormalized or zero add operand.
1741
*/
1742
if (add_exponent == 0) {
1743
/* check for zero */
1744
if (Sgl_iszero_mantissa(opnd3)) {
1745
/* right is zero */
1746
/* Left can't be zero and must be result.
1747
*
1748
* The final result is now in tmpres and mpy_exponent,
1749
* and needs to be rounded and squeezed back into
1750
* double precision format from double extended.
1751
*/
1752
result_exponent = mpy_exponent;
1753
Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1754
sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1755
goto round;
1756
}
1757
1758
/*
1759
* Neither are zeroes.
1760
* Adjust exponent and normalize add operand.
1761
*/
1762
sign_save = Sgl_signextendedsign(opnd3); /* save sign */
1763
Sgl_clear_signexponent(opnd3);
1764
Sgl_leftshiftby1(opnd3);
1765
Sgl_normalize(opnd3,add_exponent);
1766
Sgl_set_sign(opnd3,sign_save); /* restore sign */
1767
} else {
1768
Sgl_clear_exponent_set_hidden(opnd3);
1769
}
1770
/*
1771
* Copy opnd3 to the double extended variable called right.
1772
*/
1773
Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1774
1775
/*
1776
* A zero "save" helps discover equal operands (for later),
1777
* and is used in swapping operands (if needed).
1778
*/
1779
Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1780
1781
/*
1782
* Compare magnitude of operands.
1783
*/
1784
Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1785
Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1786
if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1787
Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1788
/*
1789
* Set the left operand to the larger one by XOR swap.
1790
* First finish the first word "save".
1791
*/
1792
Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1793
Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1794
Sglext_swap_lower(tmpresp2,rightp2);
1795
/* also setup exponents used in rest of routine */
1796
diff_exponent = add_exponent - mpy_exponent;
1797
result_exponent = add_exponent;
1798
} else {
1799
/* also setup exponents used in rest of routine */
1800
diff_exponent = mpy_exponent - add_exponent;
1801
result_exponent = mpy_exponent;
1802
}
1803
/* Invariant: left is not smaller than right. */
1804
1805
/*
1806
* Special case alignment of operands that would force alignment
1807
* beyond the extent of the extension. A further optimization
1808
* could special case this but only reduces the path length for
1809
* this infrequent case.
1810
*/
1811
if (diff_exponent > SGLEXT_THRESHOLD) {
1812
diff_exponent = SGLEXT_THRESHOLD;
1813
}
1814
1815
/* Align right operand by shifting it to the right */
1816
Sglext_clear_sign(rightp1);
1817
Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1818
1819
/* Treat sum and difference of the operands separately. */
1820
if ((int)save < 0) {
1821
/*
1822
* Difference of the two operands. Overflow can occur if the
1823
* multiply overflowed. A borrow can occur out of the hidden
1824
* bit and force a post normalization phase.
1825
*/
1826
Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1827
resultp1,resultp2);
1828
sign_save = Sgl_signextendedsign(resultp1);
1829
if (Sgl_iszero_hidden(resultp1)) {
1830
/* Handle normalization */
1831
/* A straightforward algorithm would now shift the
1832
* result and extension left until the hidden bit
1833
* becomes one. Not all of the extension bits need
1834
* participate in the shift. Only the two most
1835
* significant bits (round and guard) are needed.
1836
* If only a single shift is needed then the guard
1837
* bit becomes a significant low order bit and the
1838
* extension must participate in the rounding.
1839
* If more than a single shift is needed, then all
1840
* bits to the right of the guard bit are zeros,
1841
* and the guard bit may or may not be zero. */
1842
Sglext_leftshiftby1(resultp1,resultp2);
1843
1844
/* Need to check for a zero result. The sign and
1845
* exponent fields have already been zeroed. The more
1846
* efficient test of the full object can be used.
1847
*/
1848
if (Sglext_iszero(resultp1,resultp2)) {
1849
/* Must have been "x-x" or "x+(-x)". */
1850
if (Is_rounding_mode(ROUNDMINUS))
1851
Sgl_setone_sign(resultp1);
1852
Sgl_copytoptr(resultp1,dstptr);
1853
return(NOEXCEPTION);
1854
}
1855
result_exponent--;
1856
1857
/* Look to see if normalization is finished. */
1858
if (Sgl_isone_hidden(resultp1)) {
1859
/* No further normalization is needed */
1860
goto round;
1861
}
1862
1863
/* Discover first one bit to determine shift amount.
1864
* Use a modified binary search. We have already
1865
* shifted the result one position right and still
1866
* not found a one so the remainder of the extension
1867
* must be zero and simplifies rounding. */
1868
/* Scan bytes */
1869
while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1870
Sglext_leftshiftby8(resultp1,resultp2);
1871
result_exponent -= 8;
1872
}
1873
/* Now narrow it down to the nibble */
1874
if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1875
/* The lower nibble contains the
1876
* normalizing one */
1877
Sglext_leftshiftby4(resultp1,resultp2);
1878
result_exponent -= 4;
1879
}
1880
/* Select case where first bit is set (already
1881
* normalized) otherwise select the proper shift. */
1882
jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1883
if (jumpsize <= 7) switch(jumpsize) {
1884
case 1:
1885
Sglext_leftshiftby3(resultp1,resultp2);
1886
result_exponent -= 3;
1887
break;
1888
case 2:
1889
case 3:
1890
Sglext_leftshiftby2(resultp1,resultp2);
1891
result_exponent -= 2;
1892
break;
1893
case 4:
1894
case 5:
1895
case 6:
1896
case 7:
1897
Sglext_leftshiftby1(resultp1,resultp2);
1898
result_exponent -= 1;
1899
break;
1900
}
1901
} /* end if (hidden...)... */
1902
/* Fall through and round */
1903
} /* end if (save < 0)... */
1904
else {
1905
/* Add magnitudes */
1906
Sglext_addition(tmpresp1,tmpresp2,
1907
rightp1,rightp2, /*to*/resultp1,resultp2);
1908
sign_save = Sgl_signextendedsign(resultp1);
1909
if (Sgl_isone_hiddenoverflow(resultp1)) {
1910
/* Prenormalization required. */
1911
Sglext_arithrightshiftby1(resultp1,resultp2);
1912
result_exponent++;
1913
} /* end if hiddenoverflow... */
1914
} /* end else ...add magnitudes... */
1915
1916
/* Round the result. If the extension and lower two words are
1917
* all zeros, then the result is exact. Otherwise round in the
1918
* correct direction. Underflow is possible. If a postnormalization
1919
* is necessary, then the mantissa is all zeros so no shift is needed.
1920
*/
1921
round:
1922
if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1923
Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1924
}
1925
Sgl_set_sign(resultp1,/*using*/sign_save);
1926
if (Sglext_isnotzero_mantissap2(resultp2)) {
1927
inexact = TRUE;
1928
switch(Rounding_mode()) {
1929
case ROUNDNEAREST: /* The default. */
1930
if (Sglext_isone_highp2(resultp2)) {
1931
/* at least 1/2 ulp */
1932
if (Sglext_isnotzero_low31p2(resultp2) ||
1933
Sglext_isone_lowp1(resultp1)) {
1934
/* either exactly half way and odd or
1935
* more than 1/2ulp */
1936
Sgl_increment(resultp1);
1937
}
1938
}
1939
break;
1940
1941
case ROUNDPLUS:
1942
if (Sgl_iszero_sign(resultp1)) {
1943
/* Round up positive results */
1944
Sgl_increment(resultp1);
1945
}
1946
break;
1947
1948
case ROUNDMINUS:
1949
if (Sgl_isone_sign(resultp1)) {
1950
/* Round down negative results */
1951
Sgl_increment(resultp1);
1952
}
1953
1954
case ROUNDZERO:;
1955
/* truncate is simple */
1956
} /* end switch... */
1957
if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1958
}
1959
if (result_exponent >= SGL_INFINITY_EXPONENT) {
1960
/* Overflow */
1961
if (Is_overflowtrap_enabled()) {
1962
/*
1963
* Adjust bias of result
1964
*/
1965
Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1966
Sgl_copytoptr(resultp1,dstptr);
1967
if (inexact)
1968
if (Is_inexacttrap_enabled())
1969
return (OPC_2E_OVERFLOWEXCEPTION |
1970
OPC_2E_INEXACTEXCEPTION);
1971
else Set_inexactflag();
1972
return (OPC_2E_OVERFLOWEXCEPTION);
1973
}
1974
inexact = TRUE;
1975
Set_overflowflag();
1976
Sgl_setoverflow(resultp1);
1977
} else if (result_exponent <= 0) { /* underflow case */
1978
if (Is_underflowtrap_enabled()) {
1979
/*
1980
* Adjust bias of result
1981
*/
1982
Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1983
Sgl_copytoptr(resultp1,dstptr);
1984
if (inexact)
1985
if (Is_inexacttrap_enabled())
1986
return (OPC_2E_UNDERFLOWEXCEPTION |
1987
OPC_2E_INEXACTEXCEPTION);
1988
else Set_inexactflag();
1989
return(OPC_2E_UNDERFLOWEXCEPTION);
1990
}
1991
else if (inexact && is_tiny) Set_underflowflag();
1992
}
1993
else Sgl_set_exponent(resultp1,result_exponent);
1994
Sgl_copytoptr(resultp1,dstptr);
1995
if (inexact)
1996
if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1997
else Set_inexactflag();
1998
return(NOEXCEPTION);
1999
}
2000
2001
/*
2002
* Single Floating-point Multiply Negate Fused Add
2003
*/
2004
2005
sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2006
2007
sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2008
unsigned int *status;
2009
{
2010
unsigned int opnd1, opnd2, opnd3;
2011
register unsigned int tmpresp1, tmpresp2;
2012
unsigned int rightp1, rightp2;
2013
unsigned int resultp1, resultp2 = 0;
2014
register int mpy_exponent, add_exponent, count;
2015
boolean inexact = FALSE, is_tiny = FALSE;
2016
2017
unsigned int signlessleft1, signlessright1, save;
2018
register int result_exponent, diff_exponent;
2019
int sign_save, jumpsize;
2020
2021
Sgl_copyfromptr(src1ptr,opnd1);
2022
Sgl_copyfromptr(src2ptr,opnd2);
2023
Sgl_copyfromptr(src3ptr,opnd3);
2024
2025
/*
2026
* set sign bit of result of multiply
2027
*/
2028
if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2029
Sgl_setzero(resultp1);
2030
else
2031
Sgl_setnegativezero(resultp1);
2032
2033
/*
2034
* Generate multiply exponent
2035
*/
2036
mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2037
2038
/*
2039
* check first operand for NaN's or infinity
2040
*/
2041
if (Sgl_isinfinity_exponent(opnd1)) {
2042
if (Sgl_iszero_mantissa(opnd1)) {
2043
if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2044
if (Sgl_iszero_exponentmantissa(opnd2)) {
2045
/*
2046
* invalid since operands are infinity
2047
* and zero
2048
*/
2049
if (Is_invalidtrap_enabled())
2050
return(OPC_2E_INVALIDEXCEPTION);
2051
Set_invalidflag();
2052
Sgl_makequietnan(resultp1);
2053
Sgl_copytoptr(resultp1,dstptr);
2054
return(NOEXCEPTION);
2055
}
2056
/*
2057
* Check third operand for infinity with a
2058
* sign opposite of the multiply result
2059
*/
2060
if (Sgl_isinfinity(opnd3) &&
2061
(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2062
/*
2063
* invalid since attempting a magnitude
2064
* subtraction of infinities
2065
*/
2066
if (Is_invalidtrap_enabled())
2067
return(OPC_2E_INVALIDEXCEPTION);
2068
Set_invalidflag();
2069
Sgl_makequietnan(resultp1);
2070
Sgl_copytoptr(resultp1,dstptr);
2071
return(NOEXCEPTION);
2072
}
2073
2074
/*
2075
* return infinity
2076
*/
2077
Sgl_setinfinity_exponentmantissa(resultp1);
2078
Sgl_copytoptr(resultp1,dstptr);
2079
return(NOEXCEPTION);
2080
}
2081
}
2082
else {
2083
/*
2084
* is NaN; signaling or quiet?
2085
*/
2086
if (Sgl_isone_signaling(opnd1)) {
2087
/* trap if INVALIDTRAP enabled */
2088
if (Is_invalidtrap_enabled())
2089
return(OPC_2E_INVALIDEXCEPTION);
2090
/* make NaN quiet */
2091
Set_invalidflag();
2092
Sgl_set_quiet(opnd1);
2093
}
2094
/*
2095
* is second operand a signaling NaN?
2096
*/
2097
else if (Sgl_is_signalingnan(opnd2)) {
2098
/* trap if INVALIDTRAP enabled */
2099
if (Is_invalidtrap_enabled())
2100
return(OPC_2E_INVALIDEXCEPTION);
2101
/* make NaN quiet */
2102
Set_invalidflag();
2103
Sgl_set_quiet(opnd2);
2104
Sgl_copytoptr(opnd2,dstptr);
2105
return(NOEXCEPTION);
2106
}
2107
/*
2108
* is third operand a signaling NaN?
2109
*/
2110
else if (Sgl_is_signalingnan(opnd3)) {
2111
/* trap if INVALIDTRAP enabled */
2112
if (Is_invalidtrap_enabled())
2113
return(OPC_2E_INVALIDEXCEPTION);
2114
/* make NaN quiet */
2115
Set_invalidflag();
2116
Sgl_set_quiet(opnd3);
2117
Sgl_copytoptr(opnd3,dstptr);
2118
return(NOEXCEPTION);
2119
}
2120
/*
2121
* return quiet NaN
2122
*/
2123
Sgl_copytoptr(opnd1,dstptr);
2124
return(NOEXCEPTION);
2125
}
2126
}
2127
2128
/*
2129
* check second operand for NaN's or infinity
2130
*/
2131
if (Sgl_isinfinity_exponent(opnd2)) {
2132
if (Sgl_iszero_mantissa(opnd2)) {
2133
if (Sgl_isnotnan(opnd3)) {
2134
if (Sgl_iszero_exponentmantissa(opnd1)) {
2135
/*
2136
* invalid since multiply operands are
2137
* zero & infinity
2138
*/
2139
if (Is_invalidtrap_enabled())
2140
return(OPC_2E_INVALIDEXCEPTION);
2141
Set_invalidflag();
2142
Sgl_makequietnan(opnd2);
2143
Sgl_copytoptr(opnd2,dstptr);
2144
return(NOEXCEPTION);
2145
}
2146
2147
/*
2148
* Check third operand for infinity with a
2149
* sign opposite of the multiply result
2150
*/
2151
if (Sgl_isinfinity(opnd3) &&
2152
(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2153
/*
2154
* invalid since attempting a magnitude
2155
* subtraction of infinities
2156
*/
2157
if (Is_invalidtrap_enabled())
2158
return(OPC_2E_INVALIDEXCEPTION);
2159
Set_invalidflag();
2160
Sgl_makequietnan(resultp1);
2161
Sgl_copytoptr(resultp1,dstptr);
2162
return(NOEXCEPTION);
2163
}
2164
2165
/*
2166
* return infinity
2167
*/
2168
Sgl_setinfinity_exponentmantissa(resultp1);
2169
Sgl_copytoptr(resultp1,dstptr);
2170
return(NOEXCEPTION);
2171
}
2172
}
2173
else {
2174
/*
2175
* is NaN; signaling or quiet?
2176
*/
2177
if (Sgl_isone_signaling(opnd2)) {
2178
/* trap if INVALIDTRAP enabled */
2179
if (Is_invalidtrap_enabled())
2180
return(OPC_2E_INVALIDEXCEPTION);
2181
/* make NaN quiet */
2182
Set_invalidflag();
2183
Sgl_set_quiet(opnd2);
2184
}
2185
/*
2186
* is third operand a signaling NaN?
2187
*/
2188
else if (Sgl_is_signalingnan(opnd3)) {
2189
/* trap if INVALIDTRAP enabled */
2190
if (Is_invalidtrap_enabled())
2191
return(OPC_2E_INVALIDEXCEPTION);
2192
/* make NaN quiet */
2193
Set_invalidflag();
2194
Sgl_set_quiet(opnd3);
2195
Sgl_copytoptr(opnd3,dstptr);
2196
return(NOEXCEPTION);
2197
}
2198
/*
2199
* return quiet NaN
2200
*/
2201
Sgl_copytoptr(opnd2,dstptr);
2202
return(NOEXCEPTION);
2203
}
2204
}
2205
2206
/*
2207
* check third operand for NaN's or infinity
2208
*/
2209
if (Sgl_isinfinity_exponent(opnd3)) {
2210
if (Sgl_iszero_mantissa(opnd3)) {
2211
/* return infinity */
2212
Sgl_copytoptr(opnd3,dstptr);
2213
return(NOEXCEPTION);
2214
} else {
2215
/*
2216
* is NaN; signaling or quiet?
2217
*/
2218
if (Sgl_isone_signaling(opnd3)) {
2219
/* trap if INVALIDTRAP enabled */
2220
if (Is_invalidtrap_enabled())
2221
return(OPC_2E_INVALIDEXCEPTION);
2222
/* make NaN quiet */
2223
Set_invalidflag();
2224
Sgl_set_quiet(opnd3);
2225
}
2226
/*
2227
* return quiet NaN
2228
*/
2229
Sgl_copytoptr(opnd3,dstptr);
2230
return(NOEXCEPTION);
2231
}
2232
}
2233
2234
/*
2235
* Generate multiply mantissa
2236
*/
2237
if (Sgl_isnotzero_exponent(opnd1)) {
2238
/* set hidden bit */
2239
Sgl_clear_signexponent_set_hidden(opnd1);
2240
}
2241
else {
2242
/* check for zero */
2243
if (Sgl_iszero_mantissa(opnd1)) {
2244
/*
2245
* Perform the add opnd3 with zero here.
2246
*/
2247
if (Sgl_iszero_exponentmantissa(opnd3)) {
2248
if (Is_rounding_mode(ROUNDMINUS)) {
2249
Sgl_or_signs(opnd3,resultp1);
2250
} else {
2251
Sgl_and_signs(opnd3,resultp1);
2252
}
2253
}
2254
/*
2255
* Now let's check for trapped underflow case.
2256
*/
2257
else if (Sgl_iszero_exponent(opnd3) &&
2258
Is_underflowtrap_enabled()) {
2259
/* need to normalize results mantissa */
2260
sign_save = Sgl_signextendedsign(opnd3);
2261
result_exponent = 0;
2262
Sgl_leftshiftby1(opnd3);
2263
Sgl_normalize(opnd3,result_exponent);
2264
Sgl_set_sign(opnd3,/*using*/sign_save);
2265
Sgl_setwrapped_exponent(opnd3,result_exponent,
2266
unfl);
2267
Sgl_copytoptr(opnd3,dstptr);
2268
/* inexact = FALSE */
2269
return(OPC_2E_UNDERFLOWEXCEPTION);
2270
}
2271
Sgl_copytoptr(opnd3,dstptr);
2272
return(NOEXCEPTION);
2273
}
2274
/* is denormalized, adjust exponent */
2275
Sgl_clear_signexponent(opnd1);
2276
Sgl_leftshiftby1(opnd1);
2277
Sgl_normalize(opnd1,mpy_exponent);
2278
}
2279
/* opnd2 needs to have hidden bit set with msb in hidden bit */
2280
if (Sgl_isnotzero_exponent(opnd2)) {
2281
Sgl_clear_signexponent_set_hidden(opnd2);
2282
}
2283
else {
2284
/* check for zero */
2285
if (Sgl_iszero_mantissa(opnd2)) {
2286
/*
2287
* Perform the add opnd3 with zero here.
2288
*/
2289
if (Sgl_iszero_exponentmantissa(opnd3)) {
2290
if (Is_rounding_mode(ROUNDMINUS)) {
2291
Sgl_or_signs(opnd3,resultp1);
2292
} else {
2293
Sgl_and_signs(opnd3,resultp1);
2294
}
2295
}
2296
/*
2297
* Now let's check for trapped underflow case.
2298
*/
2299
else if (Sgl_iszero_exponent(opnd3) &&
2300
Is_underflowtrap_enabled()) {
2301
/* need to normalize results mantissa */
2302
sign_save = Sgl_signextendedsign(opnd3);
2303
result_exponent = 0;
2304
Sgl_leftshiftby1(opnd3);
2305
Sgl_normalize(opnd3,result_exponent);
2306
Sgl_set_sign(opnd3,/*using*/sign_save);
2307
Sgl_setwrapped_exponent(opnd3,result_exponent,
2308
unfl);
2309
Sgl_copytoptr(opnd3,dstptr);
2310
/* inexact = FALSE */
2311
return(OPC_2E_UNDERFLOWEXCEPTION);
2312
}
2313
Sgl_copytoptr(opnd3,dstptr);
2314
return(NOEXCEPTION);
2315
}
2316
/* is denormalized; want to normalize */
2317
Sgl_clear_signexponent(opnd2);
2318
Sgl_leftshiftby1(opnd2);
2319
Sgl_normalize(opnd2,mpy_exponent);
2320
}
2321
2322
/* Multiply the first two source mantissas together */
2323
2324
/*
2325
* The intermediate result will be kept in tmpres,
2326
* which needs enough room for 106 bits of mantissa,
2327
* so lets call it a Double extended.
2328
*/
2329
Sglext_setzero(tmpresp1,tmpresp2);
2330
2331
/*
2332
* Four bits at a time are inspected in each loop, and a
2333
* simple shift and add multiply algorithm is used.
2334
*/
2335
for (count = SGL_P-1; count >= 0; count -= 4) {
2336
Sglext_rightshiftby4(tmpresp1,tmpresp2);
2337
if (Sbit28(opnd1)) {
2338
/* Twoword_add should be an ADD followed by 2 ADDC's */
2339
Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2340
}
2341
if (Sbit29(opnd1)) {
2342
Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2343
}
2344
if (Sbit30(opnd1)) {
2345
Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2346
}
2347
if (Sbit31(opnd1)) {
2348
Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2349
}
2350
Sgl_rightshiftby4(opnd1);
2351
}
2352
if (Is_sexthiddenoverflow(tmpresp1)) {
2353
/* result mantissa >= 2 (mantissa overflow) */
2354
mpy_exponent++;
2355
Sglext_rightshiftby4(tmpresp1,tmpresp2);
2356
} else {
2357
Sglext_rightshiftby3(tmpresp1,tmpresp2);
2358
}
2359
2360
/*
2361
* Restore the sign of the mpy result which was saved in resultp1.
2362
* The exponent will continue to be kept in mpy_exponent.
2363
*/
2364
Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2365
2366
/*
2367
* No rounding is required, since the result of the multiply
2368
* is exact in the extended format.
2369
*/
2370
2371
/*
2372
* Now we are ready to perform the add portion of the operation.
2373
*
2374
* The exponents need to be kept as integers for now, since the
2375
* multiply result might not fit into the exponent field. We
2376
* can't overflow or underflow because of this yet, since the
2377
* add could bring the final result back into range.
2378
*/
2379
add_exponent = Sgl_exponent(opnd3);
2380
2381
/*
2382
* Check for denormalized or zero add operand.
2383
*/
2384
if (add_exponent == 0) {
2385
/* check for zero */
2386
if (Sgl_iszero_mantissa(opnd3)) {
2387
/* right is zero */
2388
/* Left can't be zero and must be result.
2389
*
2390
* The final result is now in tmpres and mpy_exponent,
2391
* and needs to be rounded and squeezed back into
2392
* double precision format from double extended.
2393
*/
2394
result_exponent = mpy_exponent;
2395
Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2396
sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2397
goto round;
2398
}
2399
2400
/*
2401
* Neither are zeroes.
2402
* Adjust exponent and normalize add operand.
2403
*/
2404
sign_save = Sgl_signextendedsign(opnd3); /* save sign */
2405
Sgl_clear_signexponent(opnd3);
2406
Sgl_leftshiftby1(opnd3);
2407
Sgl_normalize(opnd3,add_exponent);
2408
Sgl_set_sign(opnd3,sign_save); /* restore sign */
2409
} else {
2410
Sgl_clear_exponent_set_hidden(opnd3);
2411
}
2412
/*
2413
* Copy opnd3 to the double extended variable called right.
2414
*/
2415
Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2416
2417
/*
2418
* A zero "save" helps discover equal operands (for later),
2419
* and is used in swapping operands (if needed).
2420
*/
2421
Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2422
2423
/*
2424
* Compare magnitude of operands.
2425
*/
2426
Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2427
Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2428
if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2429
Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2430
/*
2431
* Set the left operand to the larger one by XOR swap.
2432
* First finish the first word "save".
2433
*/
2434
Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2435
Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2436
Sglext_swap_lower(tmpresp2,rightp2);
2437
/* also setup exponents used in rest of routine */
2438
diff_exponent = add_exponent - mpy_exponent;
2439
result_exponent = add_exponent;
2440
} else {
2441
/* also setup exponents used in rest of routine */
2442
diff_exponent = mpy_exponent - add_exponent;
2443
result_exponent = mpy_exponent;
2444
}
2445
/* Invariant: left is not smaller than right. */
2446
2447
/*
2448
* Special case alignment of operands that would force alignment
2449
* beyond the extent of the extension. A further optimization
2450
* could special case this but only reduces the path length for
2451
* this infrequent case.
2452
*/
2453
if (diff_exponent > SGLEXT_THRESHOLD) {
2454
diff_exponent = SGLEXT_THRESHOLD;
2455
}
2456
2457
/* Align right operand by shifting it to the right */
2458
Sglext_clear_sign(rightp1);
2459
Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2460
2461
/* Treat sum and difference of the operands separately. */
2462
if ((int)save < 0) {
2463
/*
2464
* Difference of the two operands. Overflow can occur if the
2465
* multiply overflowed. A borrow can occur out of the hidden
2466
* bit and force a post normalization phase.
2467
*/
2468
Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2469
resultp1,resultp2);
2470
sign_save = Sgl_signextendedsign(resultp1);
2471
if (Sgl_iszero_hidden(resultp1)) {
2472
/* Handle normalization */
2473
/* A straightforward algorithm would now shift the
2474
* result and extension left until the hidden bit
2475
* becomes one. Not all of the extension bits need
2476
* participate in the shift. Only the two most
2477
* significant bits (round and guard) are needed.
2478
* If only a single shift is needed then the guard
2479
* bit becomes a significant low order bit and the
2480
* extension must participate in the rounding.
2481
* If more than a single shift is needed, then all
2482
* bits to the right of the guard bit are zeros,
2483
* and the guard bit may or may not be zero. */
2484
Sglext_leftshiftby1(resultp1,resultp2);
2485
2486
/* Need to check for a zero result. The sign and
2487
* exponent fields have already been zeroed. The more
2488
* efficient test of the full object can be used.
2489
*/
2490
if (Sglext_iszero(resultp1,resultp2)) {
2491
/* Must have been "x-x" or "x+(-x)". */
2492
if (Is_rounding_mode(ROUNDMINUS))
2493
Sgl_setone_sign(resultp1);
2494
Sgl_copytoptr(resultp1,dstptr);
2495
return(NOEXCEPTION);
2496
}
2497
result_exponent--;
2498
2499
/* Look to see if normalization is finished. */
2500
if (Sgl_isone_hidden(resultp1)) {
2501
/* No further normalization is needed */
2502
goto round;
2503
}
2504
2505
/* Discover first one bit to determine shift amount.
2506
* Use a modified binary search. We have already
2507
* shifted the result one position right and still
2508
* not found a one so the remainder of the extension
2509
* must be zero and simplifies rounding. */
2510
/* Scan bytes */
2511
while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2512
Sglext_leftshiftby8(resultp1,resultp2);
2513
result_exponent -= 8;
2514
}
2515
/* Now narrow it down to the nibble */
2516
if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2517
/* The lower nibble contains the
2518
* normalizing one */
2519
Sglext_leftshiftby4(resultp1,resultp2);
2520
result_exponent -= 4;
2521
}
2522
/* Select case where first bit is set (already
2523
* normalized) otherwise select the proper shift. */
2524
jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2525
if (jumpsize <= 7) switch(jumpsize) {
2526
case 1:
2527
Sglext_leftshiftby3(resultp1,resultp2);
2528
result_exponent -= 3;
2529
break;
2530
case 2:
2531
case 3:
2532
Sglext_leftshiftby2(resultp1,resultp2);
2533
result_exponent -= 2;
2534
break;
2535
case 4:
2536
case 5:
2537
case 6:
2538
case 7:
2539
Sglext_leftshiftby1(resultp1,resultp2);
2540
result_exponent -= 1;
2541
break;
2542
}
2543
} /* end if (hidden...)... */
2544
/* Fall through and round */
2545
} /* end if (save < 0)... */
2546
else {
2547
/* Add magnitudes */
2548
Sglext_addition(tmpresp1,tmpresp2,
2549
rightp1,rightp2, /*to*/resultp1,resultp2);
2550
sign_save = Sgl_signextendedsign(resultp1);
2551
if (Sgl_isone_hiddenoverflow(resultp1)) {
2552
/* Prenormalization required. */
2553
Sglext_arithrightshiftby1(resultp1,resultp2);
2554
result_exponent++;
2555
} /* end if hiddenoverflow... */
2556
} /* end else ...add magnitudes... */
2557
2558
/* Round the result. If the extension and lower two words are
2559
* all zeros, then the result is exact. Otherwise round in the
2560
* correct direction. Underflow is possible. If a postnormalization
2561
* is necessary, then the mantissa is all zeros so no shift is needed.
2562
*/
2563
round:
2564
if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2565
Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2566
}
2567
Sgl_set_sign(resultp1,/*using*/sign_save);
2568
if (Sglext_isnotzero_mantissap2(resultp2)) {
2569
inexact = TRUE;
2570
switch(Rounding_mode()) {
2571
case ROUNDNEAREST: /* The default. */
2572
if (Sglext_isone_highp2(resultp2)) {
2573
/* at least 1/2 ulp */
2574
if (Sglext_isnotzero_low31p2(resultp2) ||
2575
Sglext_isone_lowp1(resultp1)) {
2576
/* either exactly half way and odd or
2577
* more than 1/2ulp */
2578
Sgl_increment(resultp1);
2579
}
2580
}
2581
break;
2582
2583
case ROUNDPLUS:
2584
if (Sgl_iszero_sign(resultp1)) {
2585
/* Round up positive results */
2586
Sgl_increment(resultp1);
2587
}
2588
break;
2589
2590
case ROUNDMINUS:
2591
if (Sgl_isone_sign(resultp1)) {
2592
/* Round down negative results */
2593
Sgl_increment(resultp1);
2594
}
2595
2596
case ROUNDZERO:;
2597
/* truncate is simple */
2598
} /* end switch... */
2599
if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2600
}
2601
if (result_exponent >= SGL_INFINITY_EXPONENT) {
2602
/* Overflow */
2603
if (Is_overflowtrap_enabled()) {
2604
/*
2605
* Adjust bias of result
2606
*/
2607
Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2608
Sgl_copytoptr(resultp1,dstptr);
2609
if (inexact)
2610
if (Is_inexacttrap_enabled())
2611
return (OPC_2E_OVERFLOWEXCEPTION |
2612
OPC_2E_INEXACTEXCEPTION);
2613
else Set_inexactflag();
2614
return (OPC_2E_OVERFLOWEXCEPTION);
2615
}
2616
inexact = TRUE;
2617
Set_overflowflag();
2618
Sgl_setoverflow(resultp1);
2619
} else if (result_exponent <= 0) { /* underflow case */
2620
if (Is_underflowtrap_enabled()) {
2621
/*
2622
* Adjust bias of result
2623
*/
2624
Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2625
Sgl_copytoptr(resultp1,dstptr);
2626
if (inexact)
2627
if (Is_inexacttrap_enabled())
2628
return (OPC_2E_UNDERFLOWEXCEPTION |
2629
OPC_2E_INEXACTEXCEPTION);
2630
else Set_inexactflag();
2631
return(OPC_2E_UNDERFLOWEXCEPTION);
2632
}
2633
else if (inexact && is_tiny) Set_underflowflag();
2634
}
2635
else Sgl_set_exponent(resultp1,result_exponent);
2636
Sgl_copytoptr(resultp1,dstptr);
2637
if (inexact)
2638
if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2639
else Set_inexactflag();
2640
return(NOEXCEPTION);
2641
}
2642
2643
2644