Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/parisc/math-emu/fmpyfadd.c
10817 views
1
/*
2
* Linux/PA-RISC Project (http://www.parisc-linux.org/)
3
*
4
* Floating-point emulation code
5
* Copyright (C) 2001 Hewlett-Packard (Paul Bame) <[email protected]>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2, or (at your option)
10
* any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
*/
21
/*
22
* BEGIN_DESC
23
*
24
* File:
25
* @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
26
*
27
* Purpose:
28
* Double Floating-point Multiply Fused Add
29
* Double Floating-point Multiply Negate Fused Add
30
* Single Floating-point Multiply Fused Add
31
* Single Floating-point Multiply Negate Fused Add
32
*
33
* External Interfaces:
34
* dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
35
* dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
36
* sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
37
* sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
38
*
39
* Internal Interfaces:
40
*
41
* Theory:
42
* <<please update with a overview of the operation of this file>>
43
*
44
* END_DESC
45
*/
46
47
48
#include "float.h"
49
#include "sgl_float.h"
50
#include "dbl_float.h"
51
52
53
/*
54
* Double Floating-point Multiply Fused Add
55
*/
56
57
int
58
dbl_fmpyfadd(
59
dbl_floating_point *src1ptr,
60
dbl_floating_point *src2ptr,
61
dbl_floating_point *src3ptr,
62
unsigned int *status,
63
dbl_floating_point *dstptr)
64
{
65
unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
66
register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
67
unsigned int rightp1, rightp2, rightp3, rightp4;
68
unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
69
register int mpy_exponent, add_exponent, count;
70
boolean inexact = FALSE, is_tiny = FALSE;
71
72
unsigned int signlessleft1, signlessright1, save;
73
register int result_exponent, diff_exponent;
74
int sign_save, jumpsize;
75
76
Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
77
Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
78
Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
79
80
/*
81
* set sign bit of result of multiply
82
*/
83
if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
84
Dbl_setnegativezerop1(resultp1);
85
else Dbl_setzerop1(resultp1);
86
87
/*
88
* Generate multiply exponent
89
*/
90
mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
91
92
/*
93
* check first operand for NaN's or infinity
94
*/
95
if (Dbl_isinfinity_exponent(opnd1p1)) {
96
if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
97
if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
98
Dbl_isnotnan(opnd3p1,opnd3p2)) {
99
if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
100
/*
101
* invalid since operands are infinity
102
* and zero
103
*/
104
if (Is_invalidtrap_enabled())
105
return(OPC_2E_INVALIDEXCEPTION);
106
Set_invalidflag();
107
Dbl_makequietnan(resultp1,resultp2);
108
Dbl_copytoptr(resultp1,resultp2,dstptr);
109
return(NOEXCEPTION);
110
}
111
/*
112
* Check third operand for infinity with a
113
* sign opposite of the multiply result
114
*/
115
if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
116
(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
117
/*
118
* invalid since attempting a magnitude
119
* subtraction of infinities
120
*/
121
if (Is_invalidtrap_enabled())
122
return(OPC_2E_INVALIDEXCEPTION);
123
Set_invalidflag();
124
Dbl_makequietnan(resultp1,resultp2);
125
Dbl_copytoptr(resultp1,resultp2,dstptr);
126
return(NOEXCEPTION);
127
}
128
129
/*
130
* return infinity
131
*/
132
Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
133
Dbl_copytoptr(resultp1,resultp2,dstptr);
134
return(NOEXCEPTION);
135
}
136
}
137
else {
138
/*
139
* is NaN; signaling or quiet?
140
*/
141
if (Dbl_isone_signaling(opnd1p1)) {
142
/* trap if INVALIDTRAP enabled */
143
if (Is_invalidtrap_enabled())
144
return(OPC_2E_INVALIDEXCEPTION);
145
/* make NaN quiet */
146
Set_invalidflag();
147
Dbl_set_quiet(opnd1p1);
148
}
149
/*
150
* is second operand a signaling NaN?
151
*/
152
else if (Dbl_is_signalingnan(opnd2p1)) {
153
/* trap if INVALIDTRAP enabled */
154
if (Is_invalidtrap_enabled())
155
return(OPC_2E_INVALIDEXCEPTION);
156
/* make NaN quiet */
157
Set_invalidflag();
158
Dbl_set_quiet(opnd2p1);
159
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
160
return(NOEXCEPTION);
161
}
162
/*
163
* is third operand a signaling NaN?
164
*/
165
else if (Dbl_is_signalingnan(opnd3p1)) {
166
/* trap if INVALIDTRAP enabled */
167
if (Is_invalidtrap_enabled())
168
return(OPC_2E_INVALIDEXCEPTION);
169
/* make NaN quiet */
170
Set_invalidflag();
171
Dbl_set_quiet(opnd3p1);
172
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
173
return(NOEXCEPTION);
174
}
175
/*
176
* return quiet NaN
177
*/
178
Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
179
return(NOEXCEPTION);
180
}
181
}
182
183
/*
184
* check second operand for NaN's or infinity
185
*/
186
if (Dbl_isinfinity_exponent(opnd2p1)) {
187
if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
188
if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
189
if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
190
/*
191
* invalid since multiply operands are
192
* zero & infinity
193
*/
194
if (Is_invalidtrap_enabled())
195
return(OPC_2E_INVALIDEXCEPTION);
196
Set_invalidflag();
197
Dbl_makequietnan(opnd2p1,opnd2p2);
198
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
199
return(NOEXCEPTION);
200
}
201
202
/*
203
* Check third operand for infinity with a
204
* sign opposite of the multiply result
205
*/
206
if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
207
(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
208
/*
209
* invalid since attempting a magnitude
210
* subtraction of infinities
211
*/
212
if (Is_invalidtrap_enabled())
213
return(OPC_2E_INVALIDEXCEPTION);
214
Set_invalidflag();
215
Dbl_makequietnan(resultp1,resultp2);
216
Dbl_copytoptr(resultp1,resultp2,dstptr);
217
return(NOEXCEPTION);
218
}
219
220
/*
221
* return infinity
222
*/
223
Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
224
Dbl_copytoptr(resultp1,resultp2,dstptr);
225
return(NOEXCEPTION);
226
}
227
}
228
else {
229
/*
230
* is NaN; signaling or quiet?
231
*/
232
if (Dbl_isone_signaling(opnd2p1)) {
233
/* trap if INVALIDTRAP enabled */
234
if (Is_invalidtrap_enabled())
235
return(OPC_2E_INVALIDEXCEPTION);
236
/* make NaN quiet */
237
Set_invalidflag();
238
Dbl_set_quiet(opnd2p1);
239
}
240
/*
241
* is third operand a signaling NaN?
242
*/
243
else if (Dbl_is_signalingnan(opnd3p1)) {
244
/* trap if INVALIDTRAP enabled */
245
if (Is_invalidtrap_enabled())
246
return(OPC_2E_INVALIDEXCEPTION);
247
/* make NaN quiet */
248
Set_invalidflag();
249
Dbl_set_quiet(opnd3p1);
250
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
251
return(NOEXCEPTION);
252
}
253
/*
254
* return quiet NaN
255
*/
256
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
257
return(NOEXCEPTION);
258
}
259
}
260
261
/*
262
* check third operand for NaN's or infinity
263
*/
264
if (Dbl_isinfinity_exponent(opnd3p1)) {
265
if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
266
/* return infinity */
267
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
268
return(NOEXCEPTION);
269
} else {
270
/*
271
* is NaN; signaling or quiet?
272
*/
273
if (Dbl_isone_signaling(opnd3p1)) {
274
/* trap if INVALIDTRAP enabled */
275
if (Is_invalidtrap_enabled())
276
return(OPC_2E_INVALIDEXCEPTION);
277
/* make NaN quiet */
278
Set_invalidflag();
279
Dbl_set_quiet(opnd3p1);
280
}
281
/*
282
* return quiet NaN
283
*/
284
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
285
return(NOEXCEPTION);
286
}
287
}
288
289
/*
290
* Generate multiply mantissa
291
*/
292
if (Dbl_isnotzero_exponent(opnd1p1)) {
293
/* set hidden bit */
294
Dbl_clear_signexponent_set_hidden(opnd1p1);
295
}
296
else {
297
/* check for zero */
298
if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
299
/*
300
* Perform the add opnd3 with zero here.
301
*/
302
if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
303
if (Is_rounding_mode(ROUNDMINUS)) {
304
Dbl_or_signs(opnd3p1,resultp1);
305
} else {
306
Dbl_and_signs(opnd3p1,resultp1);
307
}
308
}
309
/*
310
* Now let's check for trapped underflow case.
311
*/
312
else if (Dbl_iszero_exponent(opnd3p1) &&
313
Is_underflowtrap_enabled()) {
314
/* need to normalize results mantissa */
315
sign_save = Dbl_signextendedsign(opnd3p1);
316
result_exponent = 0;
317
Dbl_leftshiftby1(opnd3p1,opnd3p2);
318
Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
319
Dbl_set_sign(opnd3p1,/*using*/sign_save);
320
Dbl_setwrapped_exponent(opnd3p1,result_exponent,
321
unfl);
322
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
323
/* inexact = FALSE */
324
return(OPC_2E_UNDERFLOWEXCEPTION);
325
}
326
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
327
return(NOEXCEPTION);
328
}
329
/* is denormalized, adjust exponent */
330
Dbl_clear_signexponent(opnd1p1);
331
Dbl_leftshiftby1(opnd1p1,opnd1p2);
332
Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
333
}
334
/* opnd2 needs to have hidden bit set with msb in hidden bit */
335
if (Dbl_isnotzero_exponent(opnd2p1)) {
336
Dbl_clear_signexponent_set_hidden(opnd2p1);
337
}
338
else {
339
/* check for zero */
340
if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
341
/*
342
* Perform the add opnd3 with zero here.
343
*/
344
if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
345
if (Is_rounding_mode(ROUNDMINUS)) {
346
Dbl_or_signs(opnd3p1,resultp1);
347
} else {
348
Dbl_and_signs(opnd3p1,resultp1);
349
}
350
}
351
/*
352
* Now let's check for trapped underflow case.
353
*/
354
else if (Dbl_iszero_exponent(opnd3p1) &&
355
Is_underflowtrap_enabled()) {
356
/* need to normalize results mantissa */
357
sign_save = Dbl_signextendedsign(opnd3p1);
358
result_exponent = 0;
359
Dbl_leftshiftby1(opnd3p1,opnd3p2);
360
Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
361
Dbl_set_sign(opnd3p1,/*using*/sign_save);
362
Dbl_setwrapped_exponent(opnd3p1,result_exponent,
363
unfl);
364
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
365
/* inexact = FALSE */
366
return(OPC_2E_UNDERFLOWEXCEPTION);
367
}
368
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
369
return(NOEXCEPTION);
370
}
371
/* is denormalized; want to normalize */
372
Dbl_clear_signexponent(opnd2p1);
373
Dbl_leftshiftby1(opnd2p1,opnd2p2);
374
Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
375
}
376
377
/* Multiply the first two source mantissas together */
378
379
/*
380
* The intermediate result will be kept in tmpres,
381
* which needs enough room for 106 bits of mantissa,
382
* so lets call it a Double extended.
383
*/
384
Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
385
386
/*
387
* Four bits at a time are inspected in each loop, and a
388
* simple shift and add multiply algorithm is used.
389
*/
390
for (count = DBL_P-1; count >= 0; count -= 4) {
391
Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
392
if (Dbit28p2(opnd1p2)) {
393
/* Fourword_add should be an ADD followed by 3 ADDC's */
394
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
395
opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
396
}
397
if (Dbit29p2(opnd1p2)) {
398
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
399
opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
400
}
401
if (Dbit30p2(opnd1p2)) {
402
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
403
opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
404
}
405
if (Dbit31p2(opnd1p2)) {
406
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
407
opnd2p1, opnd2p2, 0, 0);
408
}
409
Dbl_rightshiftby4(opnd1p1,opnd1p2);
410
}
411
if (Is_dexthiddenoverflow(tmpresp1)) {
412
/* result mantissa >= 2 (mantissa overflow) */
413
mpy_exponent++;
414
Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
415
}
416
417
/*
418
* Restore the sign of the mpy result which was saved in resultp1.
419
* The exponent will continue to be kept in mpy_exponent.
420
*/
421
Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
422
423
/*
424
* No rounding is required, since the result of the multiply
425
* is exact in the extended format.
426
*/
427
428
/*
429
* Now we are ready to perform the add portion of the operation.
430
*
431
* The exponents need to be kept as integers for now, since the
432
* multiply result might not fit into the exponent field. We
433
* can't overflow or underflow because of this yet, since the
434
* add could bring the final result back into range.
435
*/
436
add_exponent = Dbl_exponent(opnd3p1);
437
438
/*
439
* Check for denormalized or zero add operand.
440
*/
441
if (add_exponent == 0) {
442
/* check for zero */
443
if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
444
/* right is zero */
445
/* Left can't be zero and must be result.
446
*
447
* The final result is now in tmpres and mpy_exponent,
448
* and needs to be rounded and squeezed back into
449
* double precision format from double extended.
450
*/
451
result_exponent = mpy_exponent;
452
Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
453
resultp1,resultp2,resultp3,resultp4);
454
sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
455
goto round;
456
}
457
458
/*
459
* Neither are zeroes.
460
* Adjust exponent and normalize add operand.
461
*/
462
sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
463
Dbl_clear_signexponent(opnd3p1);
464
Dbl_leftshiftby1(opnd3p1,opnd3p2);
465
Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
466
Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
467
} else {
468
Dbl_clear_exponent_set_hidden(opnd3p1);
469
}
470
/*
471
* Copy opnd3 to the double extended variable called right.
472
*/
473
Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
474
475
/*
476
* A zero "save" helps discover equal operands (for later),
477
* and is used in swapping operands (if needed).
478
*/
479
Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
480
481
/*
482
* Compare magnitude of operands.
483
*/
484
Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
485
Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
486
if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
487
Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
488
/*
489
* Set the left operand to the larger one by XOR swap.
490
* First finish the first word "save".
491
*/
492
Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
493
Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
494
Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
495
rightp2,rightp3,rightp4);
496
/* also setup exponents used in rest of routine */
497
diff_exponent = add_exponent - mpy_exponent;
498
result_exponent = add_exponent;
499
} else {
500
/* also setup exponents used in rest of routine */
501
diff_exponent = mpy_exponent - add_exponent;
502
result_exponent = mpy_exponent;
503
}
504
/* Invariant: left is not smaller than right. */
505
506
/*
507
* Special case alignment of operands that would force alignment
508
* beyond the extent of the extension. A further optimization
509
* could special case this but only reduces the path length for
510
* this infrequent case.
511
*/
512
if (diff_exponent > DBLEXT_THRESHOLD) {
513
diff_exponent = DBLEXT_THRESHOLD;
514
}
515
516
/* Align right operand by shifting it to the right */
517
Dblext_clear_sign(rightp1);
518
Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
519
/*shifted by*/diff_exponent);
520
521
/* Treat sum and difference of the operands separately. */
522
if ((int)save < 0) {
523
/*
524
* Difference of the two operands. Overflow can occur if the
525
* multiply overflowed. A borrow can occur out of the hidden
526
* bit and force a post normalization phase.
527
*/
528
Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
529
rightp1,rightp2,rightp3,rightp4,
530
resultp1,resultp2,resultp3,resultp4);
531
sign_save = Dbl_signextendedsign(resultp1);
532
if (Dbl_iszero_hidden(resultp1)) {
533
/* Handle normalization */
534
/* A straightforward algorithm would now shift the
535
* result and extension left until the hidden bit
536
* becomes one. Not all of the extension bits need
537
* participate in the shift. Only the two most
538
* significant bits (round and guard) are needed.
539
* If only a single shift is needed then the guard
540
* bit becomes a significant low order bit and the
541
* extension must participate in the rounding.
542
* If more than a single shift is needed, then all
543
* bits to the right of the guard bit are zeros,
544
* and the guard bit may or may not be zero. */
545
Dblext_leftshiftby1(resultp1,resultp2,resultp3,
546
resultp4);
547
548
/* Need to check for a zero result. The sign and
549
* exponent fields have already been zeroed. The more
550
* efficient test of the full object can be used.
551
*/
552
if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
553
/* Must have been "x-x" or "x+(-x)". */
554
if (Is_rounding_mode(ROUNDMINUS))
555
Dbl_setone_sign(resultp1);
556
Dbl_copytoptr(resultp1,resultp2,dstptr);
557
return(NOEXCEPTION);
558
}
559
result_exponent--;
560
561
/* Look to see if normalization is finished. */
562
if (Dbl_isone_hidden(resultp1)) {
563
/* No further normalization is needed */
564
goto round;
565
}
566
567
/* Discover first one bit to determine shift amount.
568
* Use a modified binary search. We have already
569
* shifted the result one position right and still
570
* not found a one so the remainder of the extension
571
* must be zero and simplifies rounding. */
572
/* Scan bytes */
573
while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
574
Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
575
result_exponent -= 8;
576
}
577
/* Now narrow it down to the nibble */
578
if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
579
/* The lower nibble contains the
580
* normalizing one */
581
Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
582
result_exponent -= 4;
583
}
584
/* Select case where first bit is set (already
585
* normalized) otherwise select the proper shift. */
586
jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
587
if (jumpsize <= 7) switch(jumpsize) {
588
case 1:
589
Dblext_leftshiftby3(resultp1,resultp2,resultp3,
590
resultp4);
591
result_exponent -= 3;
592
break;
593
case 2:
594
case 3:
595
Dblext_leftshiftby2(resultp1,resultp2,resultp3,
596
resultp4);
597
result_exponent -= 2;
598
break;
599
case 4:
600
case 5:
601
case 6:
602
case 7:
603
Dblext_leftshiftby1(resultp1,resultp2,resultp3,
604
resultp4);
605
result_exponent -= 1;
606
break;
607
}
608
} /* end if (hidden...)... */
609
/* Fall through and round */
610
} /* end if (save < 0)... */
611
else {
612
/* Add magnitudes */
613
Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
614
rightp1,rightp2,rightp3,rightp4,
615
/*to*/resultp1,resultp2,resultp3,resultp4);
616
sign_save = Dbl_signextendedsign(resultp1);
617
if (Dbl_isone_hiddenoverflow(resultp1)) {
618
/* Prenormalization required. */
619
Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
620
resultp4);
621
result_exponent++;
622
} /* end if hiddenoverflow... */
623
} /* end else ...add magnitudes... */
624
625
/* Round the result. If the extension and lower two words are
626
* all zeros, then the result is exact. Otherwise round in the
627
* correct direction. Underflow is possible. If a postnormalization
628
* is necessary, then the mantissa is all zeros so no shift is needed.
629
*/
630
round:
631
if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
632
Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
633
result_exponent,is_tiny);
634
}
635
Dbl_set_sign(resultp1,/*using*/sign_save);
636
if (Dblext_isnotzero_mantissap3(resultp3) ||
637
Dblext_isnotzero_mantissap4(resultp4)) {
638
inexact = TRUE;
639
switch(Rounding_mode()) {
640
case ROUNDNEAREST: /* The default. */
641
if (Dblext_isone_highp3(resultp3)) {
642
/* at least 1/2 ulp */
643
if (Dblext_isnotzero_low31p3(resultp3) ||
644
Dblext_isnotzero_mantissap4(resultp4) ||
645
Dblext_isone_lowp2(resultp2)) {
646
/* either exactly half way and odd or
647
* more than 1/2ulp */
648
Dbl_increment(resultp1,resultp2);
649
}
650
}
651
break;
652
653
case ROUNDPLUS:
654
if (Dbl_iszero_sign(resultp1)) {
655
/* Round up positive results */
656
Dbl_increment(resultp1,resultp2);
657
}
658
break;
659
660
case ROUNDMINUS:
661
if (Dbl_isone_sign(resultp1)) {
662
/* Round down negative results */
663
Dbl_increment(resultp1,resultp2);
664
}
665
666
case ROUNDZERO:;
667
/* truncate is simple */
668
} /* end switch... */
669
if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
670
}
671
if (result_exponent >= DBL_INFINITY_EXPONENT) {
672
/* trap if OVERFLOWTRAP enabled */
673
if (Is_overflowtrap_enabled()) {
674
/*
675
* Adjust bias of result
676
*/
677
Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
678
Dbl_copytoptr(resultp1,resultp2,dstptr);
679
if (inexact)
680
if (Is_inexacttrap_enabled())
681
return (OPC_2E_OVERFLOWEXCEPTION |
682
OPC_2E_INEXACTEXCEPTION);
683
else Set_inexactflag();
684
return (OPC_2E_OVERFLOWEXCEPTION);
685
}
686
inexact = TRUE;
687
Set_overflowflag();
688
/* set result to infinity or largest number */
689
Dbl_setoverflow(resultp1,resultp2);
690
691
} else if (result_exponent <= 0) { /* underflow case */
692
if (Is_underflowtrap_enabled()) {
693
/*
694
* Adjust bias of result
695
*/
696
Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
697
Dbl_copytoptr(resultp1,resultp2,dstptr);
698
if (inexact)
699
if (Is_inexacttrap_enabled())
700
return (OPC_2E_UNDERFLOWEXCEPTION |
701
OPC_2E_INEXACTEXCEPTION);
702
else Set_inexactflag();
703
return(OPC_2E_UNDERFLOWEXCEPTION);
704
}
705
else if (inexact && is_tiny) Set_underflowflag();
706
}
707
else Dbl_set_exponent(resultp1,result_exponent);
708
Dbl_copytoptr(resultp1,resultp2,dstptr);
709
if (inexact)
710
if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
711
else Set_inexactflag();
712
return(NOEXCEPTION);
713
}
714
715
/*
716
* Double Floating-point Multiply Negate Fused Add
717
*/
718
719
dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
720
721
dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
722
unsigned int *status;
723
{
724
unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
725
register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
726
unsigned int rightp1, rightp2, rightp3, rightp4;
727
unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
728
register int mpy_exponent, add_exponent, count;
729
boolean inexact = FALSE, is_tiny = FALSE;
730
731
unsigned int signlessleft1, signlessright1, save;
732
register int result_exponent, diff_exponent;
733
int sign_save, jumpsize;
734
735
Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
736
Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
737
Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
738
739
/*
740
* set sign bit of result of multiply
741
*/
742
if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
743
Dbl_setzerop1(resultp1);
744
else
745
Dbl_setnegativezerop1(resultp1);
746
747
/*
748
* Generate multiply exponent
749
*/
750
mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
751
752
/*
753
* check first operand for NaN's or infinity
754
*/
755
if (Dbl_isinfinity_exponent(opnd1p1)) {
756
if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
757
if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
758
Dbl_isnotnan(opnd3p1,opnd3p2)) {
759
if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
760
/*
761
* invalid since operands are infinity
762
* and zero
763
*/
764
if (Is_invalidtrap_enabled())
765
return(OPC_2E_INVALIDEXCEPTION);
766
Set_invalidflag();
767
Dbl_makequietnan(resultp1,resultp2);
768
Dbl_copytoptr(resultp1,resultp2,dstptr);
769
return(NOEXCEPTION);
770
}
771
/*
772
* Check third operand for infinity with a
773
* sign opposite of the multiply result
774
*/
775
if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
776
(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
777
/*
778
* invalid since attempting a magnitude
779
* subtraction of infinities
780
*/
781
if (Is_invalidtrap_enabled())
782
return(OPC_2E_INVALIDEXCEPTION);
783
Set_invalidflag();
784
Dbl_makequietnan(resultp1,resultp2);
785
Dbl_copytoptr(resultp1,resultp2,dstptr);
786
return(NOEXCEPTION);
787
}
788
789
/*
790
* return infinity
791
*/
792
Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
793
Dbl_copytoptr(resultp1,resultp2,dstptr);
794
return(NOEXCEPTION);
795
}
796
}
797
else {
798
/*
799
* is NaN; signaling or quiet?
800
*/
801
if (Dbl_isone_signaling(opnd1p1)) {
802
/* trap if INVALIDTRAP enabled */
803
if (Is_invalidtrap_enabled())
804
return(OPC_2E_INVALIDEXCEPTION);
805
/* make NaN quiet */
806
Set_invalidflag();
807
Dbl_set_quiet(opnd1p1);
808
}
809
/*
810
* is second operand a signaling NaN?
811
*/
812
else if (Dbl_is_signalingnan(opnd2p1)) {
813
/* trap if INVALIDTRAP enabled */
814
if (Is_invalidtrap_enabled())
815
return(OPC_2E_INVALIDEXCEPTION);
816
/* make NaN quiet */
817
Set_invalidflag();
818
Dbl_set_quiet(opnd2p1);
819
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
820
return(NOEXCEPTION);
821
}
822
/*
823
* is third operand a signaling NaN?
824
*/
825
else if (Dbl_is_signalingnan(opnd3p1)) {
826
/* trap if INVALIDTRAP enabled */
827
if (Is_invalidtrap_enabled())
828
return(OPC_2E_INVALIDEXCEPTION);
829
/* make NaN quiet */
830
Set_invalidflag();
831
Dbl_set_quiet(opnd3p1);
832
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
833
return(NOEXCEPTION);
834
}
835
/*
836
* return quiet NaN
837
*/
838
Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
839
return(NOEXCEPTION);
840
}
841
}
842
843
/*
844
* check second operand for NaN's or infinity
845
*/
846
if (Dbl_isinfinity_exponent(opnd2p1)) {
847
if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
848
if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
849
if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
850
/*
851
* invalid since multiply operands are
852
* zero & infinity
853
*/
854
if (Is_invalidtrap_enabled())
855
return(OPC_2E_INVALIDEXCEPTION);
856
Set_invalidflag();
857
Dbl_makequietnan(opnd2p1,opnd2p2);
858
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
859
return(NOEXCEPTION);
860
}
861
862
/*
863
* Check third operand for infinity with a
864
* sign opposite of the multiply result
865
*/
866
if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
867
(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
868
/*
869
* invalid since attempting a magnitude
870
* subtraction of infinities
871
*/
872
if (Is_invalidtrap_enabled())
873
return(OPC_2E_INVALIDEXCEPTION);
874
Set_invalidflag();
875
Dbl_makequietnan(resultp1,resultp2);
876
Dbl_copytoptr(resultp1,resultp2,dstptr);
877
return(NOEXCEPTION);
878
}
879
880
/*
881
* return infinity
882
*/
883
Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
884
Dbl_copytoptr(resultp1,resultp2,dstptr);
885
return(NOEXCEPTION);
886
}
887
}
888
else {
889
/*
890
* is NaN; signaling or quiet?
891
*/
892
if (Dbl_isone_signaling(opnd2p1)) {
893
/* trap if INVALIDTRAP enabled */
894
if (Is_invalidtrap_enabled())
895
return(OPC_2E_INVALIDEXCEPTION);
896
/* make NaN quiet */
897
Set_invalidflag();
898
Dbl_set_quiet(opnd2p1);
899
}
900
/*
901
* is third operand a signaling NaN?
902
*/
903
else if (Dbl_is_signalingnan(opnd3p1)) {
904
/* trap if INVALIDTRAP enabled */
905
if (Is_invalidtrap_enabled())
906
return(OPC_2E_INVALIDEXCEPTION);
907
/* make NaN quiet */
908
Set_invalidflag();
909
Dbl_set_quiet(opnd3p1);
910
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
911
return(NOEXCEPTION);
912
}
913
/*
914
* return quiet NaN
915
*/
916
Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
917
return(NOEXCEPTION);
918
}
919
}
920
921
/*
922
* check third operand for NaN's or infinity
923
*/
924
if (Dbl_isinfinity_exponent(opnd3p1)) {
925
if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
926
/* return infinity */
927
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
928
return(NOEXCEPTION);
929
} else {
930
/*
931
* is NaN; signaling or quiet?
932
*/
933
if (Dbl_isone_signaling(opnd3p1)) {
934
/* trap if INVALIDTRAP enabled */
935
if (Is_invalidtrap_enabled())
936
return(OPC_2E_INVALIDEXCEPTION);
937
/* make NaN quiet */
938
Set_invalidflag();
939
Dbl_set_quiet(opnd3p1);
940
}
941
/*
942
* return quiet NaN
943
*/
944
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
945
return(NOEXCEPTION);
946
}
947
}
948
949
/*
950
* Generate multiply mantissa
951
*/
952
if (Dbl_isnotzero_exponent(opnd1p1)) {
953
/* set hidden bit */
954
Dbl_clear_signexponent_set_hidden(opnd1p1);
955
}
956
else {
957
/* check for zero */
958
if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
959
/*
960
* Perform the add opnd3 with zero here.
961
*/
962
if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
963
if (Is_rounding_mode(ROUNDMINUS)) {
964
Dbl_or_signs(opnd3p1,resultp1);
965
} else {
966
Dbl_and_signs(opnd3p1,resultp1);
967
}
968
}
969
/*
970
* Now let's check for trapped underflow case.
971
*/
972
else if (Dbl_iszero_exponent(opnd3p1) &&
973
Is_underflowtrap_enabled()) {
974
/* need to normalize results mantissa */
975
sign_save = Dbl_signextendedsign(opnd3p1);
976
result_exponent = 0;
977
Dbl_leftshiftby1(opnd3p1,opnd3p2);
978
Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
979
Dbl_set_sign(opnd3p1,/*using*/sign_save);
980
Dbl_setwrapped_exponent(opnd3p1,result_exponent,
981
unfl);
982
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
983
/* inexact = FALSE */
984
return(OPC_2E_UNDERFLOWEXCEPTION);
985
}
986
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
987
return(NOEXCEPTION);
988
}
989
/* is denormalized, adjust exponent */
990
Dbl_clear_signexponent(opnd1p1);
991
Dbl_leftshiftby1(opnd1p1,opnd1p2);
992
Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
993
}
994
/* opnd2 needs to have hidden bit set with msb in hidden bit */
995
if (Dbl_isnotzero_exponent(opnd2p1)) {
996
Dbl_clear_signexponent_set_hidden(opnd2p1);
997
}
998
else {
999
/* check for zero */
1000
if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1001
/*
1002
* Perform the add opnd3 with zero here.
1003
*/
1004
if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1005
if (Is_rounding_mode(ROUNDMINUS)) {
1006
Dbl_or_signs(opnd3p1,resultp1);
1007
} else {
1008
Dbl_and_signs(opnd3p1,resultp1);
1009
}
1010
}
1011
/*
1012
* Now let's check for trapped underflow case.
1013
*/
1014
else if (Dbl_iszero_exponent(opnd3p1) &&
1015
Is_underflowtrap_enabled()) {
1016
/* need to normalize results mantissa */
1017
sign_save = Dbl_signextendedsign(opnd3p1);
1018
result_exponent = 0;
1019
Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020
Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021
Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022
Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023
unfl);
1024
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025
/* inexact = FALSE */
1026
return(OPC_2E_UNDERFLOWEXCEPTION);
1027
}
1028
Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029
return(NOEXCEPTION);
1030
}
1031
/* is denormalized; want to normalize */
1032
Dbl_clear_signexponent(opnd2p1);
1033
Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034
Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1035
}
1036
1037
/* Multiply the first two source mantissas together */
1038
1039
/*
1040
* The intermediate result will be kept in tmpres,
1041
* which needs enough room for 106 bits of mantissa,
1042
* so lets call it a Double extended.
1043
*/
1044
Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1045
1046
/*
1047
* Four bits at a time are inspected in each loop, and a
1048
* simple shift and add multiply algorithm is used.
1049
*/
1050
for (count = DBL_P-1; count >= 0; count -= 4) {
1051
Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052
if (Dbit28p2(opnd1p2)) {
1053
/* Fourword_add should be an ADD followed by 3 ADDC's */
1054
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1055
opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1056
}
1057
if (Dbit29p2(opnd1p2)) {
1058
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059
opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1060
}
1061
if (Dbit30p2(opnd1p2)) {
1062
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063
opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1064
}
1065
if (Dbit31p2(opnd1p2)) {
1066
Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067
opnd2p1, opnd2p2, 0, 0);
1068
}
1069
Dbl_rightshiftby4(opnd1p1,opnd1p2);
1070
}
1071
if (Is_dexthiddenoverflow(tmpresp1)) {
1072
/* result mantissa >= 2 (mantissa overflow) */
1073
mpy_exponent++;
1074
Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1075
}
1076
1077
/*
1078
* Restore the sign of the mpy result which was saved in resultp1.
1079
* The exponent will continue to be kept in mpy_exponent.
1080
*/
1081
Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1082
1083
/*
1084
* No rounding is required, since the result of the multiply
1085
* is exact in the extended format.
1086
*/
1087
1088
/*
1089
* Now we are ready to perform the add portion of the operation.
1090
*
1091
* The exponents need to be kept as integers for now, since the
1092
* multiply result might not fit into the exponent field. We
1093
* can't overflow or underflow because of this yet, since the
1094
* add could bring the final result back into range.
1095
*/
1096
add_exponent = Dbl_exponent(opnd3p1);
1097
1098
/*
1099
* Check for denormalized or zero add operand.
1100
*/
1101
if (add_exponent == 0) {
1102
/* check for zero */
1103
if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104
/* right is zero */
1105
/* Left can't be zero and must be result.
1106
*
1107
* The final result is now in tmpres and mpy_exponent,
1108
* and needs to be rounded and squeezed back into
1109
* double precision format from double extended.
1110
*/
1111
result_exponent = mpy_exponent;
1112
Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113
resultp1,resultp2,resultp3,resultp4);
1114
sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115
goto round;
1116
}
1117
1118
/*
1119
* Neither are zeroes.
1120
* Adjust exponent and normalize add operand.
1121
*/
1122
sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
1123
Dbl_clear_signexponent(opnd3p1);
1124
Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125
Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126
Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
1127
} else {
1128
Dbl_clear_exponent_set_hidden(opnd3p1);
1129
}
1130
/*
1131
* Copy opnd3 to the double extended variable called right.
1132
*/
1133
Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1134
1135
/*
1136
* A zero "save" helps discover equal operands (for later),
1137
* and is used in swapping operands (if needed).
1138
*/
1139
Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1140
1141
/*
1142
* Compare magnitude of operands.
1143
*/
1144
Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145
Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146
if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147
Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1148
/*
1149
* Set the left operand to the larger one by XOR swap.
1150
* First finish the first word "save".
1151
*/
1152
Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153
Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154
Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155
rightp2,rightp3,rightp4);
1156
/* also setup exponents used in rest of routine */
1157
diff_exponent = add_exponent - mpy_exponent;
1158
result_exponent = add_exponent;
1159
} else {
1160
/* also setup exponents used in rest of routine */
1161
diff_exponent = mpy_exponent - add_exponent;
1162
result_exponent = mpy_exponent;
1163
}
1164
/* Invariant: left is not smaller than right. */
1165
1166
/*
1167
* Special case alignment of operands that would force alignment
1168
* beyond the extent of the extension. A further optimization
1169
* could special case this but only reduces the path length for
1170
* this infrequent case.
1171
*/
1172
if (diff_exponent > DBLEXT_THRESHOLD) {
1173
diff_exponent = DBLEXT_THRESHOLD;
1174
}
1175
1176
/* Align right operand by shifting it to the right */
1177
Dblext_clear_sign(rightp1);
1178
Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179
/*shifted by*/diff_exponent);
1180
1181
/* Treat sum and difference of the operands separately. */
1182
if ((int)save < 0) {
1183
/*
1184
* Difference of the two operands. Overflow can occur if the
1185
* multiply overflowed. A borrow can occur out of the hidden
1186
* bit and force a post normalization phase.
1187
*/
1188
Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189
rightp1,rightp2,rightp3,rightp4,
1190
resultp1,resultp2,resultp3,resultp4);
1191
sign_save = Dbl_signextendedsign(resultp1);
1192
if (Dbl_iszero_hidden(resultp1)) {
1193
/* Handle normalization */
1194
/* A straightforward algorithm would now shift the
1195
* result and extension left until the hidden bit
1196
* becomes one. Not all of the extension bits need
1197
* participate in the shift. Only the two most
1198
* significant bits (round and guard) are needed.
1199
* If only a single shift is needed then the guard
1200
* bit becomes a significant low order bit and the
1201
* extension must participate in the rounding.
1202
* If more than a single shift is needed, then all
1203
* bits to the right of the guard bit are zeros,
1204
* and the guard bit may or may not be zero. */
1205
Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206
resultp4);
1207
1208
/* Need to check for a zero result. The sign and
1209
* exponent fields have already been zeroed. The more
1210
* efficient test of the full object can be used.
1211
*/
1212
if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213
/* Must have been "x-x" or "x+(-x)". */
1214
if (Is_rounding_mode(ROUNDMINUS))
1215
Dbl_setone_sign(resultp1);
1216
Dbl_copytoptr(resultp1,resultp2,dstptr);
1217
return(NOEXCEPTION);
1218
}
1219
result_exponent--;
1220
1221
/* Look to see if normalization is finished. */
1222
if (Dbl_isone_hidden(resultp1)) {
1223
/* No further normalization is needed */
1224
goto round;
1225
}
1226
1227
/* Discover first one bit to determine shift amount.
1228
* Use a modified binary search. We have already
1229
* shifted the result one position right and still
1230
* not found a one so the remainder of the extension
1231
* must be zero and simplifies rounding. */
1232
/* Scan bytes */
1233
while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234
Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235
result_exponent -= 8;
1236
}
1237
/* Now narrow it down to the nibble */
1238
if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239
/* The lower nibble contains the
1240
* normalizing one */
1241
Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242
result_exponent -= 4;
1243
}
1244
/* Select case where first bit is set (already
1245
* normalized) otherwise select the proper shift. */
1246
jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247
if (jumpsize <= 7) switch(jumpsize) {
1248
case 1:
1249
Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250
resultp4);
1251
result_exponent -= 3;
1252
break;
1253
case 2:
1254
case 3:
1255
Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256
resultp4);
1257
result_exponent -= 2;
1258
break;
1259
case 4:
1260
case 5:
1261
case 6:
1262
case 7:
1263
Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264
resultp4);
1265
result_exponent -= 1;
1266
break;
1267
}
1268
} /* end if (hidden...)... */
1269
/* Fall through and round */
1270
} /* end if (save < 0)... */
1271
else {
1272
/* Add magnitudes */
1273
Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274
rightp1,rightp2,rightp3,rightp4,
1275
/*to*/resultp1,resultp2,resultp3,resultp4);
1276
sign_save = Dbl_signextendedsign(resultp1);
1277
if (Dbl_isone_hiddenoverflow(resultp1)) {
1278
/* Prenormalization required. */
1279
Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280
resultp4);
1281
result_exponent++;
1282
} /* end if hiddenoverflow... */
1283
} /* end else ...add magnitudes... */
1284
1285
/* Round the result. If the extension and lower two words are
1286
* all zeros, then the result is exact. Otherwise round in the
1287
* correct direction. Underflow is possible. If a postnormalization
1288
* is necessary, then the mantissa is all zeros so no shift is needed.
1289
*/
1290
round:
1291
if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292
Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293
result_exponent,is_tiny);
1294
}
1295
Dbl_set_sign(resultp1,/*using*/sign_save);
1296
if (Dblext_isnotzero_mantissap3(resultp3) ||
1297
Dblext_isnotzero_mantissap4(resultp4)) {
1298
inexact = TRUE;
1299
switch(Rounding_mode()) {
1300
case ROUNDNEAREST: /* The default. */
1301
if (Dblext_isone_highp3(resultp3)) {
1302
/* at least 1/2 ulp */
1303
if (Dblext_isnotzero_low31p3(resultp3) ||
1304
Dblext_isnotzero_mantissap4(resultp4) ||
1305
Dblext_isone_lowp2(resultp2)) {
1306
/* either exactly half way and odd or
1307
* more than 1/2ulp */
1308
Dbl_increment(resultp1,resultp2);
1309
}
1310
}
1311
break;
1312
1313
case ROUNDPLUS:
1314
if (Dbl_iszero_sign(resultp1)) {
1315
/* Round up positive results */
1316
Dbl_increment(resultp1,resultp2);
1317
}
1318
break;
1319
1320
case ROUNDMINUS:
1321
if (Dbl_isone_sign(resultp1)) {
1322
/* Round down negative results */
1323
Dbl_increment(resultp1,resultp2);
1324
}
1325
1326
case ROUNDZERO:;
1327
/* truncate is simple */
1328
} /* end switch... */
1329
if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1330
}
1331
if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332
/* Overflow */
1333
if (Is_overflowtrap_enabled()) {
1334
/*
1335
* Adjust bias of result
1336
*/
1337
Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338
Dbl_copytoptr(resultp1,resultp2,dstptr);
1339
if (inexact)
1340
if (Is_inexacttrap_enabled())
1341
return (OPC_2E_OVERFLOWEXCEPTION |
1342
OPC_2E_INEXACTEXCEPTION);
1343
else Set_inexactflag();
1344
return (OPC_2E_OVERFLOWEXCEPTION);
1345
}
1346
inexact = TRUE;
1347
Set_overflowflag();
1348
Dbl_setoverflow(resultp1,resultp2);
1349
} else if (result_exponent <= 0) { /* underflow case */
1350
if (Is_underflowtrap_enabled()) {
1351
/*
1352
* Adjust bias of result
1353
*/
1354
Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355
Dbl_copytoptr(resultp1,resultp2,dstptr);
1356
if (inexact)
1357
if (Is_inexacttrap_enabled())
1358
return (OPC_2E_UNDERFLOWEXCEPTION |
1359
OPC_2E_INEXACTEXCEPTION);
1360
else Set_inexactflag();
1361
return(OPC_2E_UNDERFLOWEXCEPTION);
1362
}
1363
else if (inexact && is_tiny) Set_underflowflag();
1364
}
1365
else Dbl_set_exponent(resultp1,result_exponent);
1366
Dbl_copytoptr(resultp1,resultp2,dstptr);
1367
if (inexact)
1368
if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1369
else Set_inexactflag();
1370
return(NOEXCEPTION);
1371
}
1372
1373
/*
1374
* Single Floating-point Multiply Fused Add
1375
*/
1376
1377
sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1378
1379
sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380
unsigned int *status;
1381
{
1382
unsigned int opnd1, opnd2, opnd3;
1383
register unsigned int tmpresp1, tmpresp2;
1384
unsigned int rightp1, rightp2;
1385
unsigned int resultp1, resultp2 = 0;
1386
register int mpy_exponent, add_exponent, count;
1387
boolean inexact = FALSE, is_tiny = FALSE;
1388
1389
unsigned int signlessleft1, signlessright1, save;
1390
register int result_exponent, diff_exponent;
1391
int sign_save, jumpsize;
1392
1393
Sgl_copyfromptr(src1ptr,opnd1);
1394
Sgl_copyfromptr(src2ptr,opnd2);
1395
Sgl_copyfromptr(src3ptr,opnd3);
1396
1397
/*
1398
* set sign bit of result of multiply
1399
*/
1400
if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1401
Sgl_setnegativezero(resultp1);
1402
else Sgl_setzero(resultp1);
1403
1404
/*
1405
* Generate multiply exponent
1406
*/
1407
mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1408
1409
/*
1410
* check first operand for NaN's or infinity
1411
*/
1412
if (Sgl_isinfinity_exponent(opnd1)) {
1413
if (Sgl_iszero_mantissa(opnd1)) {
1414
if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415
if (Sgl_iszero_exponentmantissa(opnd2)) {
1416
/*
1417
* invalid since operands are infinity
1418
* and zero
1419
*/
1420
if (Is_invalidtrap_enabled())
1421
return(OPC_2E_INVALIDEXCEPTION);
1422
Set_invalidflag();
1423
Sgl_makequietnan(resultp1);
1424
Sgl_copytoptr(resultp1,dstptr);
1425
return(NOEXCEPTION);
1426
}
1427
/*
1428
* Check third operand for infinity with a
1429
* sign opposite of the multiply result
1430
*/
1431
if (Sgl_isinfinity(opnd3) &&
1432
(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1433
/*
1434
* invalid since attempting a magnitude
1435
* subtraction of infinities
1436
*/
1437
if (Is_invalidtrap_enabled())
1438
return(OPC_2E_INVALIDEXCEPTION);
1439
Set_invalidflag();
1440
Sgl_makequietnan(resultp1);
1441
Sgl_copytoptr(resultp1,dstptr);
1442
return(NOEXCEPTION);
1443
}
1444
1445
/*
1446
* return infinity
1447
*/
1448
Sgl_setinfinity_exponentmantissa(resultp1);
1449
Sgl_copytoptr(resultp1,dstptr);
1450
return(NOEXCEPTION);
1451
}
1452
}
1453
else {
1454
/*
1455
* is NaN; signaling or quiet?
1456
*/
1457
if (Sgl_isone_signaling(opnd1)) {
1458
/* trap if INVALIDTRAP enabled */
1459
if (Is_invalidtrap_enabled())
1460
return(OPC_2E_INVALIDEXCEPTION);
1461
/* make NaN quiet */
1462
Set_invalidflag();
1463
Sgl_set_quiet(opnd1);
1464
}
1465
/*
1466
* is second operand a signaling NaN?
1467
*/
1468
else if (Sgl_is_signalingnan(opnd2)) {
1469
/* trap if INVALIDTRAP enabled */
1470
if (Is_invalidtrap_enabled())
1471
return(OPC_2E_INVALIDEXCEPTION);
1472
/* make NaN quiet */
1473
Set_invalidflag();
1474
Sgl_set_quiet(opnd2);
1475
Sgl_copytoptr(opnd2,dstptr);
1476
return(NOEXCEPTION);
1477
}
1478
/*
1479
* is third operand a signaling NaN?
1480
*/
1481
else if (Sgl_is_signalingnan(opnd3)) {
1482
/* trap if INVALIDTRAP enabled */
1483
if (Is_invalidtrap_enabled())
1484
return(OPC_2E_INVALIDEXCEPTION);
1485
/* make NaN quiet */
1486
Set_invalidflag();
1487
Sgl_set_quiet(opnd3);
1488
Sgl_copytoptr(opnd3,dstptr);
1489
return(NOEXCEPTION);
1490
}
1491
/*
1492
* return quiet NaN
1493
*/
1494
Sgl_copytoptr(opnd1,dstptr);
1495
return(NOEXCEPTION);
1496
}
1497
}
1498
1499
/*
1500
* check second operand for NaN's or infinity
1501
*/
1502
if (Sgl_isinfinity_exponent(opnd2)) {
1503
if (Sgl_iszero_mantissa(opnd2)) {
1504
if (Sgl_isnotnan(opnd3)) {
1505
if (Sgl_iszero_exponentmantissa(opnd1)) {
1506
/*
1507
* invalid since multiply operands are
1508
* zero & infinity
1509
*/
1510
if (Is_invalidtrap_enabled())
1511
return(OPC_2E_INVALIDEXCEPTION);
1512
Set_invalidflag();
1513
Sgl_makequietnan(opnd2);
1514
Sgl_copytoptr(opnd2,dstptr);
1515
return(NOEXCEPTION);
1516
}
1517
1518
/*
1519
* Check third operand for infinity with a
1520
* sign opposite of the multiply result
1521
*/
1522
if (Sgl_isinfinity(opnd3) &&
1523
(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1524
/*
1525
* invalid since attempting a magnitude
1526
* subtraction of infinities
1527
*/
1528
if (Is_invalidtrap_enabled())
1529
return(OPC_2E_INVALIDEXCEPTION);
1530
Set_invalidflag();
1531
Sgl_makequietnan(resultp1);
1532
Sgl_copytoptr(resultp1,dstptr);
1533
return(NOEXCEPTION);
1534
}
1535
1536
/*
1537
* return infinity
1538
*/
1539
Sgl_setinfinity_exponentmantissa(resultp1);
1540
Sgl_copytoptr(resultp1,dstptr);
1541
return(NOEXCEPTION);
1542
}
1543
}
1544
else {
1545
/*
1546
* is NaN; signaling or quiet?
1547
*/
1548
if (Sgl_isone_signaling(opnd2)) {
1549
/* trap if INVALIDTRAP enabled */
1550
if (Is_invalidtrap_enabled())
1551
return(OPC_2E_INVALIDEXCEPTION);
1552
/* make NaN quiet */
1553
Set_invalidflag();
1554
Sgl_set_quiet(opnd2);
1555
}
1556
/*
1557
* is third operand a signaling NaN?
1558
*/
1559
else if (Sgl_is_signalingnan(opnd3)) {
1560
/* trap if INVALIDTRAP enabled */
1561
if (Is_invalidtrap_enabled())
1562
return(OPC_2E_INVALIDEXCEPTION);
1563
/* make NaN quiet */
1564
Set_invalidflag();
1565
Sgl_set_quiet(opnd3);
1566
Sgl_copytoptr(opnd3,dstptr);
1567
return(NOEXCEPTION);
1568
}
1569
/*
1570
* return quiet NaN
1571
*/
1572
Sgl_copytoptr(opnd2,dstptr);
1573
return(NOEXCEPTION);
1574
}
1575
}
1576
1577
/*
1578
* check third operand for NaN's or infinity
1579
*/
1580
if (Sgl_isinfinity_exponent(opnd3)) {
1581
if (Sgl_iszero_mantissa(opnd3)) {
1582
/* return infinity */
1583
Sgl_copytoptr(opnd3,dstptr);
1584
return(NOEXCEPTION);
1585
} else {
1586
/*
1587
* is NaN; signaling or quiet?
1588
*/
1589
if (Sgl_isone_signaling(opnd3)) {
1590
/* trap if INVALIDTRAP enabled */
1591
if (Is_invalidtrap_enabled())
1592
return(OPC_2E_INVALIDEXCEPTION);
1593
/* make NaN quiet */
1594
Set_invalidflag();
1595
Sgl_set_quiet(opnd3);
1596
}
1597
/*
1598
* return quiet NaN
1599
*/
1600
Sgl_copytoptr(opnd3,dstptr);
1601
return(NOEXCEPTION);
1602
}
1603
}
1604
1605
/*
1606
* Generate multiply mantissa
1607
*/
1608
if (Sgl_isnotzero_exponent(opnd1)) {
1609
/* set hidden bit */
1610
Sgl_clear_signexponent_set_hidden(opnd1);
1611
}
1612
else {
1613
/* check for zero */
1614
if (Sgl_iszero_mantissa(opnd1)) {
1615
/*
1616
* Perform the add opnd3 with zero here.
1617
*/
1618
if (Sgl_iszero_exponentmantissa(opnd3)) {
1619
if (Is_rounding_mode(ROUNDMINUS)) {
1620
Sgl_or_signs(opnd3,resultp1);
1621
} else {
1622
Sgl_and_signs(opnd3,resultp1);
1623
}
1624
}
1625
/*
1626
* Now let's check for trapped underflow case.
1627
*/
1628
else if (Sgl_iszero_exponent(opnd3) &&
1629
Is_underflowtrap_enabled()) {
1630
/* need to normalize results mantissa */
1631
sign_save = Sgl_signextendedsign(opnd3);
1632
result_exponent = 0;
1633
Sgl_leftshiftby1(opnd3);
1634
Sgl_normalize(opnd3,result_exponent);
1635
Sgl_set_sign(opnd3,/*using*/sign_save);
1636
Sgl_setwrapped_exponent(opnd3,result_exponent,
1637
unfl);
1638
Sgl_copytoptr(opnd3,dstptr);
1639
/* inexact = FALSE */
1640
return(OPC_2E_UNDERFLOWEXCEPTION);
1641
}
1642
Sgl_copytoptr(opnd3,dstptr);
1643
return(NOEXCEPTION);
1644
}
1645
/* is denormalized, adjust exponent */
1646
Sgl_clear_signexponent(opnd1);
1647
Sgl_leftshiftby1(opnd1);
1648
Sgl_normalize(opnd1,mpy_exponent);
1649
}
1650
/* opnd2 needs to have hidden bit set with msb in hidden bit */
1651
if (Sgl_isnotzero_exponent(opnd2)) {
1652
Sgl_clear_signexponent_set_hidden(opnd2);
1653
}
1654
else {
1655
/* check for zero */
1656
if (Sgl_iszero_mantissa(opnd2)) {
1657
/*
1658
* Perform the add opnd3 with zero here.
1659
*/
1660
if (Sgl_iszero_exponentmantissa(opnd3)) {
1661
if (Is_rounding_mode(ROUNDMINUS)) {
1662
Sgl_or_signs(opnd3,resultp1);
1663
} else {
1664
Sgl_and_signs(opnd3,resultp1);
1665
}
1666
}
1667
/*
1668
* Now let's check for trapped underflow case.
1669
*/
1670
else if (Sgl_iszero_exponent(opnd3) &&
1671
Is_underflowtrap_enabled()) {
1672
/* need to normalize results mantissa */
1673
sign_save = Sgl_signextendedsign(opnd3);
1674
result_exponent = 0;
1675
Sgl_leftshiftby1(opnd3);
1676
Sgl_normalize(opnd3,result_exponent);
1677
Sgl_set_sign(opnd3,/*using*/sign_save);
1678
Sgl_setwrapped_exponent(opnd3,result_exponent,
1679
unfl);
1680
Sgl_copytoptr(opnd3,dstptr);
1681
/* inexact = FALSE */
1682
return(OPC_2E_UNDERFLOWEXCEPTION);
1683
}
1684
Sgl_copytoptr(opnd3,dstptr);
1685
return(NOEXCEPTION);
1686
}
1687
/* is denormalized; want to normalize */
1688
Sgl_clear_signexponent(opnd2);
1689
Sgl_leftshiftby1(opnd2);
1690
Sgl_normalize(opnd2,mpy_exponent);
1691
}
1692
1693
/* Multiply the first two source mantissas together */
1694
1695
/*
1696
* The intermediate result will be kept in tmpres,
1697
* which needs enough room for 106 bits of mantissa,
1698
* so lets call it a Double extended.
1699
*/
1700
Sglext_setzero(tmpresp1,tmpresp2);
1701
1702
/*
1703
* Four bits at a time are inspected in each loop, and a
1704
* simple shift and add multiply algorithm is used.
1705
*/
1706
for (count = SGL_P-1; count >= 0; count -= 4) {
1707
Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708
if (Sbit28(opnd1)) {
1709
/* Twoword_add should be an ADD followed by 2 ADDC's */
1710
Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1711
}
1712
if (Sbit29(opnd1)) {
1713
Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1714
}
1715
if (Sbit30(opnd1)) {
1716
Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1717
}
1718
if (Sbit31(opnd1)) {
1719
Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1720
}
1721
Sgl_rightshiftby4(opnd1);
1722
}
1723
if (Is_sexthiddenoverflow(tmpresp1)) {
1724
/* result mantissa >= 2 (mantissa overflow) */
1725
mpy_exponent++;
1726
Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727
} else {
1728
Sglext_rightshiftby3(tmpresp1,tmpresp2);
1729
}
1730
1731
/*
1732
* Restore the sign of the mpy result which was saved in resultp1.
1733
* The exponent will continue to be kept in mpy_exponent.
1734
*/
1735
Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1736
1737
/*
1738
* No rounding is required, since the result of the multiply
1739
* is exact in the extended format.
1740
*/
1741
1742
/*
1743
* Now we are ready to perform the add portion of the operation.
1744
*
1745
* The exponents need to be kept as integers for now, since the
1746
* multiply result might not fit into the exponent field. We
1747
* can't overflow or underflow because of this yet, since the
1748
* add could bring the final result back into range.
1749
*/
1750
add_exponent = Sgl_exponent(opnd3);
1751
1752
/*
1753
* Check for denormalized or zero add operand.
1754
*/
1755
if (add_exponent == 0) {
1756
/* check for zero */
1757
if (Sgl_iszero_mantissa(opnd3)) {
1758
/* right is zero */
1759
/* Left can't be zero and must be result.
1760
*
1761
* The final result is now in tmpres and mpy_exponent,
1762
* and needs to be rounded and squeezed back into
1763
* double precision format from double extended.
1764
*/
1765
result_exponent = mpy_exponent;
1766
Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767
sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768
goto round;
1769
}
1770
1771
/*
1772
* Neither are zeroes.
1773
* Adjust exponent and normalize add operand.
1774
*/
1775
sign_save = Sgl_signextendedsign(opnd3); /* save sign */
1776
Sgl_clear_signexponent(opnd3);
1777
Sgl_leftshiftby1(opnd3);
1778
Sgl_normalize(opnd3,add_exponent);
1779
Sgl_set_sign(opnd3,sign_save); /* restore sign */
1780
} else {
1781
Sgl_clear_exponent_set_hidden(opnd3);
1782
}
1783
/*
1784
* Copy opnd3 to the double extended variable called right.
1785
*/
1786
Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1787
1788
/*
1789
* A zero "save" helps discover equal operands (for later),
1790
* and is used in swapping operands (if needed).
1791
*/
1792
Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1793
1794
/*
1795
* Compare magnitude of operands.
1796
*/
1797
Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798
Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799
if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800
Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1801
/*
1802
* Set the left operand to the larger one by XOR swap.
1803
* First finish the first word "save".
1804
*/
1805
Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806
Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807
Sglext_swap_lower(tmpresp2,rightp2);
1808
/* also setup exponents used in rest of routine */
1809
diff_exponent = add_exponent - mpy_exponent;
1810
result_exponent = add_exponent;
1811
} else {
1812
/* also setup exponents used in rest of routine */
1813
diff_exponent = mpy_exponent - add_exponent;
1814
result_exponent = mpy_exponent;
1815
}
1816
/* Invariant: left is not smaller than right. */
1817
1818
/*
1819
* Special case alignment of operands that would force alignment
1820
* beyond the extent of the extension. A further optimization
1821
* could special case this but only reduces the path length for
1822
* this infrequent case.
1823
*/
1824
if (diff_exponent > SGLEXT_THRESHOLD) {
1825
diff_exponent = SGLEXT_THRESHOLD;
1826
}
1827
1828
/* Align right operand by shifting it to the right */
1829
Sglext_clear_sign(rightp1);
1830
Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1831
1832
/* Treat sum and difference of the operands separately. */
1833
if ((int)save < 0) {
1834
/*
1835
* Difference of the two operands. Overflow can occur if the
1836
* multiply overflowed. A borrow can occur out of the hidden
1837
* bit and force a post normalization phase.
1838
*/
1839
Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840
resultp1,resultp2);
1841
sign_save = Sgl_signextendedsign(resultp1);
1842
if (Sgl_iszero_hidden(resultp1)) {
1843
/* Handle normalization */
1844
/* A straightforward algorithm would now shift the
1845
* result and extension left until the hidden bit
1846
* becomes one. Not all of the extension bits need
1847
* participate in the shift. Only the two most
1848
* significant bits (round and guard) are needed.
1849
* If only a single shift is needed then the guard
1850
* bit becomes a significant low order bit and the
1851
* extension must participate in the rounding.
1852
* If more than a single shift is needed, then all
1853
* bits to the right of the guard bit are zeros,
1854
* and the guard bit may or may not be zero. */
1855
Sglext_leftshiftby1(resultp1,resultp2);
1856
1857
/* Need to check for a zero result. The sign and
1858
* exponent fields have already been zeroed. The more
1859
* efficient test of the full object can be used.
1860
*/
1861
if (Sglext_iszero(resultp1,resultp2)) {
1862
/* Must have been "x-x" or "x+(-x)". */
1863
if (Is_rounding_mode(ROUNDMINUS))
1864
Sgl_setone_sign(resultp1);
1865
Sgl_copytoptr(resultp1,dstptr);
1866
return(NOEXCEPTION);
1867
}
1868
result_exponent--;
1869
1870
/* Look to see if normalization is finished. */
1871
if (Sgl_isone_hidden(resultp1)) {
1872
/* No further normalization is needed */
1873
goto round;
1874
}
1875
1876
/* Discover first one bit to determine shift amount.
1877
* Use a modified binary search. We have already
1878
* shifted the result one position right and still
1879
* not found a one so the remainder of the extension
1880
* must be zero and simplifies rounding. */
1881
/* Scan bytes */
1882
while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883
Sglext_leftshiftby8(resultp1,resultp2);
1884
result_exponent -= 8;
1885
}
1886
/* Now narrow it down to the nibble */
1887
if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888
/* The lower nibble contains the
1889
* normalizing one */
1890
Sglext_leftshiftby4(resultp1,resultp2);
1891
result_exponent -= 4;
1892
}
1893
/* Select case where first bit is set (already
1894
* normalized) otherwise select the proper shift. */
1895
jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896
if (jumpsize <= 7) switch(jumpsize) {
1897
case 1:
1898
Sglext_leftshiftby3(resultp1,resultp2);
1899
result_exponent -= 3;
1900
break;
1901
case 2:
1902
case 3:
1903
Sglext_leftshiftby2(resultp1,resultp2);
1904
result_exponent -= 2;
1905
break;
1906
case 4:
1907
case 5:
1908
case 6:
1909
case 7:
1910
Sglext_leftshiftby1(resultp1,resultp2);
1911
result_exponent -= 1;
1912
break;
1913
}
1914
} /* end if (hidden...)... */
1915
/* Fall through and round */
1916
} /* end if (save < 0)... */
1917
else {
1918
/* Add magnitudes */
1919
Sglext_addition(tmpresp1,tmpresp2,
1920
rightp1,rightp2, /*to*/resultp1,resultp2);
1921
sign_save = Sgl_signextendedsign(resultp1);
1922
if (Sgl_isone_hiddenoverflow(resultp1)) {
1923
/* Prenormalization required. */
1924
Sglext_arithrightshiftby1(resultp1,resultp2);
1925
result_exponent++;
1926
} /* end if hiddenoverflow... */
1927
} /* end else ...add magnitudes... */
1928
1929
/* Round the result. If the extension and lower two words are
1930
* all zeros, then the result is exact. Otherwise round in the
1931
* correct direction. Underflow is possible. If a postnormalization
1932
* is necessary, then the mantissa is all zeros so no shift is needed.
1933
*/
1934
round:
1935
if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936
Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1937
}
1938
Sgl_set_sign(resultp1,/*using*/sign_save);
1939
if (Sglext_isnotzero_mantissap2(resultp2)) {
1940
inexact = TRUE;
1941
switch(Rounding_mode()) {
1942
case ROUNDNEAREST: /* The default. */
1943
if (Sglext_isone_highp2(resultp2)) {
1944
/* at least 1/2 ulp */
1945
if (Sglext_isnotzero_low31p2(resultp2) ||
1946
Sglext_isone_lowp1(resultp1)) {
1947
/* either exactly half way and odd or
1948
* more than 1/2ulp */
1949
Sgl_increment(resultp1);
1950
}
1951
}
1952
break;
1953
1954
case ROUNDPLUS:
1955
if (Sgl_iszero_sign(resultp1)) {
1956
/* Round up positive results */
1957
Sgl_increment(resultp1);
1958
}
1959
break;
1960
1961
case ROUNDMINUS:
1962
if (Sgl_isone_sign(resultp1)) {
1963
/* Round down negative results */
1964
Sgl_increment(resultp1);
1965
}
1966
1967
case ROUNDZERO:;
1968
/* truncate is simple */
1969
} /* end switch... */
1970
if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1971
}
1972
if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973
/* Overflow */
1974
if (Is_overflowtrap_enabled()) {
1975
/*
1976
* Adjust bias of result
1977
*/
1978
Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979
Sgl_copytoptr(resultp1,dstptr);
1980
if (inexact)
1981
if (Is_inexacttrap_enabled())
1982
return (OPC_2E_OVERFLOWEXCEPTION |
1983
OPC_2E_INEXACTEXCEPTION);
1984
else Set_inexactflag();
1985
return (OPC_2E_OVERFLOWEXCEPTION);
1986
}
1987
inexact = TRUE;
1988
Set_overflowflag();
1989
Sgl_setoverflow(resultp1);
1990
} else if (result_exponent <= 0) { /* underflow case */
1991
if (Is_underflowtrap_enabled()) {
1992
/*
1993
* Adjust bias of result
1994
*/
1995
Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996
Sgl_copytoptr(resultp1,dstptr);
1997
if (inexact)
1998
if (Is_inexacttrap_enabled())
1999
return (OPC_2E_UNDERFLOWEXCEPTION |
2000
OPC_2E_INEXACTEXCEPTION);
2001
else Set_inexactflag();
2002
return(OPC_2E_UNDERFLOWEXCEPTION);
2003
}
2004
else if (inexact && is_tiny) Set_underflowflag();
2005
}
2006
else Sgl_set_exponent(resultp1,result_exponent);
2007
Sgl_copytoptr(resultp1,dstptr);
2008
if (inexact)
2009
if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2010
else Set_inexactflag();
2011
return(NOEXCEPTION);
2012
}
2013
2014
/*
2015
* Single Floating-point Multiply Negate Fused Add
2016
*/
2017
2018
sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2019
2020
sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021
unsigned int *status;
2022
{
2023
unsigned int opnd1, opnd2, opnd3;
2024
register unsigned int tmpresp1, tmpresp2;
2025
unsigned int rightp1, rightp2;
2026
unsigned int resultp1, resultp2 = 0;
2027
register int mpy_exponent, add_exponent, count;
2028
boolean inexact = FALSE, is_tiny = FALSE;
2029
2030
unsigned int signlessleft1, signlessright1, save;
2031
register int result_exponent, diff_exponent;
2032
int sign_save, jumpsize;
2033
2034
Sgl_copyfromptr(src1ptr,opnd1);
2035
Sgl_copyfromptr(src2ptr,opnd2);
2036
Sgl_copyfromptr(src3ptr,opnd3);
2037
2038
/*
2039
* set sign bit of result of multiply
2040
*/
2041
if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2042
Sgl_setzero(resultp1);
2043
else
2044
Sgl_setnegativezero(resultp1);
2045
2046
/*
2047
* Generate multiply exponent
2048
*/
2049
mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2050
2051
/*
2052
* check first operand for NaN's or infinity
2053
*/
2054
if (Sgl_isinfinity_exponent(opnd1)) {
2055
if (Sgl_iszero_mantissa(opnd1)) {
2056
if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057
if (Sgl_iszero_exponentmantissa(opnd2)) {
2058
/*
2059
* invalid since operands are infinity
2060
* and zero
2061
*/
2062
if (Is_invalidtrap_enabled())
2063
return(OPC_2E_INVALIDEXCEPTION);
2064
Set_invalidflag();
2065
Sgl_makequietnan(resultp1);
2066
Sgl_copytoptr(resultp1,dstptr);
2067
return(NOEXCEPTION);
2068
}
2069
/*
2070
* Check third operand for infinity with a
2071
* sign opposite of the multiply result
2072
*/
2073
if (Sgl_isinfinity(opnd3) &&
2074
(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2075
/*
2076
* invalid since attempting a magnitude
2077
* subtraction of infinities
2078
*/
2079
if (Is_invalidtrap_enabled())
2080
return(OPC_2E_INVALIDEXCEPTION);
2081
Set_invalidflag();
2082
Sgl_makequietnan(resultp1);
2083
Sgl_copytoptr(resultp1,dstptr);
2084
return(NOEXCEPTION);
2085
}
2086
2087
/*
2088
* return infinity
2089
*/
2090
Sgl_setinfinity_exponentmantissa(resultp1);
2091
Sgl_copytoptr(resultp1,dstptr);
2092
return(NOEXCEPTION);
2093
}
2094
}
2095
else {
2096
/*
2097
* is NaN; signaling or quiet?
2098
*/
2099
if (Sgl_isone_signaling(opnd1)) {
2100
/* trap if INVALIDTRAP enabled */
2101
if (Is_invalidtrap_enabled())
2102
return(OPC_2E_INVALIDEXCEPTION);
2103
/* make NaN quiet */
2104
Set_invalidflag();
2105
Sgl_set_quiet(opnd1);
2106
}
2107
/*
2108
* is second operand a signaling NaN?
2109
*/
2110
else if (Sgl_is_signalingnan(opnd2)) {
2111
/* trap if INVALIDTRAP enabled */
2112
if (Is_invalidtrap_enabled())
2113
return(OPC_2E_INVALIDEXCEPTION);
2114
/* make NaN quiet */
2115
Set_invalidflag();
2116
Sgl_set_quiet(opnd2);
2117
Sgl_copytoptr(opnd2,dstptr);
2118
return(NOEXCEPTION);
2119
}
2120
/*
2121
* is third operand a signaling NaN?
2122
*/
2123
else if (Sgl_is_signalingnan(opnd3)) {
2124
/* trap if INVALIDTRAP enabled */
2125
if (Is_invalidtrap_enabled())
2126
return(OPC_2E_INVALIDEXCEPTION);
2127
/* make NaN quiet */
2128
Set_invalidflag();
2129
Sgl_set_quiet(opnd3);
2130
Sgl_copytoptr(opnd3,dstptr);
2131
return(NOEXCEPTION);
2132
}
2133
/*
2134
* return quiet NaN
2135
*/
2136
Sgl_copytoptr(opnd1,dstptr);
2137
return(NOEXCEPTION);
2138
}
2139
}
2140
2141
/*
2142
* check second operand for NaN's or infinity
2143
*/
2144
if (Sgl_isinfinity_exponent(opnd2)) {
2145
if (Sgl_iszero_mantissa(opnd2)) {
2146
if (Sgl_isnotnan(opnd3)) {
2147
if (Sgl_iszero_exponentmantissa(opnd1)) {
2148
/*
2149
* invalid since multiply operands are
2150
* zero & infinity
2151
*/
2152
if (Is_invalidtrap_enabled())
2153
return(OPC_2E_INVALIDEXCEPTION);
2154
Set_invalidflag();
2155
Sgl_makequietnan(opnd2);
2156
Sgl_copytoptr(opnd2,dstptr);
2157
return(NOEXCEPTION);
2158
}
2159
2160
/*
2161
* Check third operand for infinity with a
2162
* sign opposite of the multiply result
2163
*/
2164
if (Sgl_isinfinity(opnd3) &&
2165
(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2166
/*
2167
* invalid since attempting a magnitude
2168
* subtraction of infinities
2169
*/
2170
if (Is_invalidtrap_enabled())
2171
return(OPC_2E_INVALIDEXCEPTION);
2172
Set_invalidflag();
2173
Sgl_makequietnan(resultp1);
2174
Sgl_copytoptr(resultp1,dstptr);
2175
return(NOEXCEPTION);
2176
}
2177
2178
/*
2179
* return infinity
2180
*/
2181
Sgl_setinfinity_exponentmantissa(resultp1);
2182
Sgl_copytoptr(resultp1,dstptr);
2183
return(NOEXCEPTION);
2184
}
2185
}
2186
else {
2187
/*
2188
* is NaN; signaling or quiet?
2189
*/
2190
if (Sgl_isone_signaling(opnd2)) {
2191
/* trap if INVALIDTRAP enabled */
2192
if (Is_invalidtrap_enabled())
2193
return(OPC_2E_INVALIDEXCEPTION);
2194
/* make NaN quiet */
2195
Set_invalidflag();
2196
Sgl_set_quiet(opnd2);
2197
}
2198
/*
2199
* is third operand a signaling NaN?
2200
*/
2201
else if (Sgl_is_signalingnan(opnd3)) {
2202
/* trap if INVALIDTRAP enabled */
2203
if (Is_invalidtrap_enabled())
2204
return(OPC_2E_INVALIDEXCEPTION);
2205
/* make NaN quiet */
2206
Set_invalidflag();
2207
Sgl_set_quiet(opnd3);
2208
Sgl_copytoptr(opnd3,dstptr);
2209
return(NOEXCEPTION);
2210
}
2211
/*
2212
* return quiet NaN
2213
*/
2214
Sgl_copytoptr(opnd2,dstptr);
2215
return(NOEXCEPTION);
2216
}
2217
}
2218
2219
/*
2220
* check third operand for NaN's or infinity
2221
*/
2222
if (Sgl_isinfinity_exponent(opnd3)) {
2223
if (Sgl_iszero_mantissa(opnd3)) {
2224
/* return infinity */
2225
Sgl_copytoptr(opnd3,dstptr);
2226
return(NOEXCEPTION);
2227
} else {
2228
/*
2229
* is NaN; signaling or quiet?
2230
*/
2231
if (Sgl_isone_signaling(opnd3)) {
2232
/* trap if INVALIDTRAP enabled */
2233
if (Is_invalidtrap_enabled())
2234
return(OPC_2E_INVALIDEXCEPTION);
2235
/* make NaN quiet */
2236
Set_invalidflag();
2237
Sgl_set_quiet(opnd3);
2238
}
2239
/*
2240
* return quiet NaN
2241
*/
2242
Sgl_copytoptr(opnd3,dstptr);
2243
return(NOEXCEPTION);
2244
}
2245
}
2246
2247
/*
2248
* Generate multiply mantissa
2249
*/
2250
if (Sgl_isnotzero_exponent(opnd1)) {
2251
/* set hidden bit */
2252
Sgl_clear_signexponent_set_hidden(opnd1);
2253
}
2254
else {
2255
/* check for zero */
2256
if (Sgl_iszero_mantissa(opnd1)) {
2257
/*
2258
* Perform the add opnd3 with zero here.
2259
*/
2260
if (Sgl_iszero_exponentmantissa(opnd3)) {
2261
if (Is_rounding_mode(ROUNDMINUS)) {
2262
Sgl_or_signs(opnd3,resultp1);
2263
} else {
2264
Sgl_and_signs(opnd3,resultp1);
2265
}
2266
}
2267
/*
2268
* Now let's check for trapped underflow case.
2269
*/
2270
else if (Sgl_iszero_exponent(opnd3) &&
2271
Is_underflowtrap_enabled()) {
2272
/* need to normalize results mantissa */
2273
sign_save = Sgl_signextendedsign(opnd3);
2274
result_exponent = 0;
2275
Sgl_leftshiftby1(opnd3);
2276
Sgl_normalize(opnd3,result_exponent);
2277
Sgl_set_sign(opnd3,/*using*/sign_save);
2278
Sgl_setwrapped_exponent(opnd3,result_exponent,
2279
unfl);
2280
Sgl_copytoptr(opnd3,dstptr);
2281
/* inexact = FALSE */
2282
return(OPC_2E_UNDERFLOWEXCEPTION);
2283
}
2284
Sgl_copytoptr(opnd3,dstptr);
2285
return(NOEXCEPTION);
2286
}
2287
/* is denormalized, adjust exponent */
2288
Sgl_clear_signexponent(opnd1);
2289
Sgl_leftshiftby1(opnd1);
2290
Sgl_normalize(opnd1,mpy_exponent);
2291
}
2292
/* opnd2 needs to have hidden bit set with msb in hidden bit */
2293
if (Sgl_isnotzero_exponent(opnd2)) {
2294
Sgl_clear_signexponent_set_hidden(opnd2);
2295
}
2296
else {
2297
/* check for zero */
2298
if (Sgl_iszero_mantissa(opnd2)) {
2299
/*
2300
* Perform the add opnd3 with zero here.
2301
*/
2302
if (Sgl_iszero_exponentmantissa(opnd3)) {
2303
if (Is_rounding_mode(ROUNDMINUS)) {
2304
Sgl_or_signs(opnd3,resultp1);
2305
} else {
2306
Sgl_and_signs(opnd3,resultp1);
2307
}
2308
}
2309
/*
2310
* Now let's check for trapped underflow case.
2311
*/
2312
else if (Sgl_iszero_exponent(opnd3) &&
2313
Is_underflowtrap_enabled()) {
2314
/* need to normalize results mantissa */
2315
sign_save = Sgl_signextendedsign(opnd3);
2316
result_exponent = 0;
2317
Sgl_leftshiftby1(opnd3);
2318
Sgl_normalize(opnd3,result_exponent);
2319
Sgl_set_sign(opnd3,/*using*/sign_save);
2320
Sgl_setwrapped_exponent(opnd3,result_exponent,
2321
unfl);
2322
Sgl_copytoptr(opnd3,dstptr);
2323
/* inexact = FALSE */
2324
return(OPC_2E_UNDERFLOWEXCEPTION);
2325
}
2326
Sgl_copytoptr(opnd3,dstptr);
2327
return(NOEXCEPTION);
2328
}
2329
/* is denormalized; want to normalize */
2330
Sgl_clear_signexponent(opnd2);
2331
Sgl_leftshiftby1(opnd2);
2332
Sgl_normalize(opnd2,mpy_exponent);
2333
}
2334
2335
/* Multiply the first two source mantissas together */
2336
2337
/*
2338
* The intermediate result will be kept in tmpres,
2339
* which needs enough room for 106 bits of mantissa,
2340
* so lets call it a Double extended.
2341
*/
2342
Sglext_setzero(tmpresp1,tmpresp2);
2343
2344
/*
2345
* Four bits at a time are inspected in each loop, and a
2346
* simple shift and add multiply algorithm is used.
2347
*/
2348
for (count = SGL_P-1; count >= 0; count -= 4) {
2349
Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350
if (Sbit28(opnd1)) {
2351
/* Twoword_add should be an ADD followed by 2 ADDC's */
2352
Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2353
}
2354
if (Sbit29(opnd1)) {
2355
Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2356
}
2357
if (Sbit30(opnd1)) {
2358
Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2359
}
2360
if (Sbit31(opnd1)) {
2361
Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2362
}
2363
Sgl_rightshiftby4(opnd1);
2364
}
2365
if (Is_sexthiddenoverflow(tmpresp1)) {
2366
/* result mantissa >= 2 (mantissa overflow) */
2367
mpy_exponent++;
2368
Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369
} else {
2370
Sglext_rightshiftby3(tmpresp1,tmpresp2);
2371
}
2372
2373
/*
2374
* Restore the sign of the mpy result which was saved in resultp1.
2375
* The exponent will continue to be kept in mpy_exponent.
2376
*/
2377
Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2378
2379
/*
2380
* No rounding is required, since the result of the multiply
2381
* is exact in the extended format.
2382
*/
2383
2384
/*
2385
* Now we are ready to perform the add portion of the operation.
2386
*
2387
* The exponents need to be kept as integers for now, since the
2388
* multiply result might not fit into the exponent field. We
2389
* can't overflow or underflow because of this yet, since the
2390
* add could bring the final result back into range.
2391
*/
2392
add_exponent = Sgl_exponent(opnd3);
2393
2394
/*
2395
* Check for denormalized or zero add operand.
2396
*/
2397
if (add_exponent == 0) {
2398
/* check for zero */
2399
if (Sgl_iszero_mantissa(opnd3)) {
2400
/* right is zero */
2401
/* Left can't be zero and must be result.
2402
*
2403
* The final result is now in tmpres and mpy_exponent,
2404
* and needs to be rounded and squeezed back into
2405
* double precision format from double extended.
2406
*/
2407
result_exponent = mpy_exponent;
2408
Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409
sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410
goto round;
2411
}
2412
2413
/*
2414
* Neither are zeroes.
2415
* Adjust exponent and normalize add operand.
2416
*/
2417
sign_save = Sgl_signextendedsign(opnd3); /* save sign */
2418
Sgl_clear_signexponent(opnd3);
2419
Sgl_leftshiftby1(opnd3);
2420
Sgl_normalize(opnd3,add_exponent);
2421
Sgl_set_sign(opnd3,sign_save); /* restore sign */
2422
} else {
2423
Sgl_clear_exponent_set_hidden(opnd3);
2424
}
2425
/*
2426
* Copy opnd3 to the double extended variable called right.
2427
*/
2428
Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2429
2430
/*
2431
* A zero "save" helps discover equal operands (for later),
2432
* and is used in swapping operands (if needed).
2433
*/
2434
Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2435
2436
/*
2437
* Compare magnitude of operands.
2438
*/
2439
Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440
Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441
if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442
Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2443
/*
2444
* Set the left operand to the larger one by XOR swap.
2445
* First finish the first word "save".
2446
*/
2447
Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448
Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449
Sglext_swap_lower(tmpresp2,rightp2);
2450
/* also setup exponents used in rest of routine */
2451
diff_exponent = add_exponent - mpy_exponent;
2452
result_exponent = add_exponent;
2453
} else {
2454
/* also setup exponents used in rest of routine */
2455
diff_exponent = mpy_exponent - add_exponent;
2456
result_exponent = mpy_exponent;
2457
}
2458
/* Invariant: left is not smaller than right. */
2459
2460
/*
2461
* Special case alignment of operands that would force alignment
2462
* beyond the extent of the extension. A further optimization
2463
* could special case this but only reduces the path length for
2464
* this infrequent case.
2465
*/
2466
if (diff_exponent > SGLEXT_THRESHOLD) {
2467
diff_exponent = SGLEXT_THRESHOLD;
2468
}
2469
2470
/* Align right operand by shifting it to the right */
2471
Sglext_clear_sign(rightp1);
2472
Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2473
2474
/* Treat sum and difference of the operands separately. */
2475
if ((int)save < 0) {
2476
/*
2477
* Difference of the two operands. Overflow can occur if the
2478
* multiply overflowed. A borrow can occur out of the hidden
2479
* bit and force a post normalization phase.
2480
*/
2481
Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482
resultp1,resultp2);
2483
sign_save = Sgl_signextendedsign(resultp1);
2484
if (Sgl_iszero_hidden(resultp1)) {
2485
/* Handle normalization */
2486
/* A straightforward algorithm would now shift the
2487
* result and extension left until the hidden bit
2488
* becomes one. Not all of the extension bits need
2489
* participate in the shift. Only the two most
2490
* significant bits (round and guard) are needed.
2491
* If only a single shift is needed then the guard
2492
* bit becomes a significant low order bit and the
2493
* extension must participate in the rounding.
2494
* If more than a single shift is needed, then all
2495
* bits to the right of the guard bit are zeros,
2496
* and the guard bit may or may not be zero. */
2497
Sglext_leftshiftby1(resultp1,resultp2);
2498
2499
/* Need to check for a zero result. The sign and
2500
* exponent fields have already been zeroed. The more
2501
* efficient test of the full object can be used.
2502
*/
2503
if (Sglext_iszero(resultp1,resultp2)) {
2504
/* Must have been "x-x" or "x+(-x)". */
2505
if (Is_rounding_mode(ROUNDMINUS))
2506
Sgl_setone_sign(resultp1);
2507
Sgl_copytoptr(resultp1,dstptr);
2508
return(NOEXCEPTION);
2509
}
2510
result_exponent--;
2511
2512
/* Look to see if normalization is finished. */
2513
if (Sgl_isone_hidden(resultp1)) {
2514
/* No further normalization is needed */
2515
goto round;
2516
}
2517
2518
/* Discover first one bit to determine shift amount.
2519
* Use a modified binary search. We have already
2520
* shifted the result one position right and still
2521
* not found a one so the remainder of the extension
2522
* must be zero and simplifies rounding. */
2523
/* Scan bytes */
2524
while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525
Sglext_leftshiftby8(resultp1,resultp2);
2526
result_exponent -= 8;
2527
}
2528
/* Now narrow it down to the nibble */
2529
if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530
/* The lower nibble contains the
2531
* normalizing one */
2532
Sglext_leftshiftby4(resultp1,resultp2);
2533
result_exponent -= 4;
2534
}
2535
/* Select case where first bit is set (already
2536
* normalized) otherwise select the proper shift. */
2537
jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538
if (jumpsize <= 7) switch(jumpsize) {
2539
case 1:
2540
Sglext_leftshiftby3(resultp1,resultp2);
2541
result_exponent -= 3;
2542
break;
2543
case 2:
2544
case 3:
2545
Sglext_leftshiftby2(resultp1,resultp2);
2546
result_exponent -= 2;
2547
break;
2548
case 4:
2549
case 5:
2550
case 6:
2551
case 7:
2552
Sglext_leftshiftby1(resultp1,resultp2);
2553
result_exponent -= 1;
2554
break;
2555
}
2556
} /* end if (hidden...)... */
2557
/* Fall through and round */
2558
} /* end if (save < 0)... */
2559
else {
2560
/* Add magnitudes */
2561
Sglext_addition(tmpresp1,tmpresp2,
2562
rightp1,rightp2, /*to*/resultp1,resultp2);
2563
sign_save = Sgl_signextendedsign(resultp1);
2564
if (Sgl_isone_hiddenoverflow(resultp1)) {
2565
/* Prenormalization required. */
2566
Sglext_arithrightshiftby1(resultp1,resultp2);
2567
result_exponent++;
2568
} /* end if hiddenoverflow... */
2569
} /* end else ...add magnitudes... */
2570
2571
/* Round the result. If the extension and lower two words are
2572
* all zeros, then the result is exact. Otherwise round in the
2573
* correct direction. Underflow is possible. If a postnormalization
2574
* is necessary, then the mantissa is all zeros so no shift is needed.
2575
*/
2576
round:
2577
if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578
Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2579
}
2580
Sgl_set_sign(resultp1,/*using*/sign_save);
2581
if (Sglext_isnotzero_mantissap2(resultp2)) {
2582
inexact = TRUE;
2583
switch(Rounding_mode()) {
2584
case ROUNDNEAREST: /* The default. */
2585
if (Sglext_isone_highp2(resultp2)) {
2586
/* at least 1/2 ulp */
2587
if (Sglext_isnotzero_low31p2(resultp2) ||
2588
Sglext_isone_lowp1(resultp1)) {
2589
/* either exactly half way and odd or
2590
* more than 1/2ulp */
2591
Sgl_increment(resultp1);
2592
}
2593
}
2594
break;
2595
2596
case ROUNDPLUS:
2597
if (Sgl_iszero_sign(resultp1)) {
2598
/* Round up positive results */
2599
Sgl_increment(resultp1);
2600
}
2601
break;
2602
2603
case ROUNDMINUS:
2604
if (Sgl_isone_sign(resultp1)) {
2605
/* Round down negative results */
2606
Sgl_increment(resultp1);
2607
}
2608
2609
case ROUNDZERO:;
2610
/* truncate is simple */
2611
} /* end switch... */
2612
if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2613
}
2614
if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615
/* Overflow */
2616
if (Is_overflowtrap_enabled()) {
2617
/*
2618
* Adjust bias of result
2619
*/
2620
Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621
Sgl_copytoptr(resultp1,dstptr);
2622
if (inexact)
2623
if (Is_inexacttrap_enabled())
2624
return (OPC_2E_OVERFLOWEXCEPTION |
2625
OPC_2E_INEXACTEXCEPTION);
2626
else Set_inexactflag();
2627
return (OPC_2E_OVERFLOWEXCEPTION);
2628
}
2629
inexact = TRUE;
2630
Set_overflowflag();
2631
Sgl_setoverflow(resultp1);
2632
} else if (result_exponent <= 0) { /* underflow case */
2633
if (Is_underflowtrap_enabled()) {
2634
/*
2635
* Adjust bias of result
2636
*/
2637
Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638
Sgl_copytoptr(resultp1,dstptr);
2639
if (inexact)
2640
if (Is_inexacttrap_enabled())
2641
return (OPC_2E_UNDERFLOWEXCEPTION |
2642
OPC_2E_INEXACTEXCEPTION);
2643
else Set_inexactflag();
2644
return(OPC_2E_UNDERFLOWEXCEPTION);
2645
}
2646
else if (inexact && is_tiny) Set_underflowflag();
2647
}
2648
else Sgl_set_exponent(resultp1,result_exponent);
2649
Sgl_copytoptr(resultp1,dstptr);
2650
if (inexact)
2651
if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2652
else Set_inexactflag();
2653
return(NOEXCEPTION);
2654
}
2655
2656
2657