CoCalc -- fmpyfadd.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/parisc/math-emu/fmpyfadd.c
²⁶²⁸⁸ views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
4
 *
5
 * Floating-point emulation code
6
 *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <[email protected]>
7
 */
8
/*
9
 * BEGIN_DESC
10
 *
11
 *  File:
12
 *	@(#)	pa/spmath/fmpyfadd.c		$Revision: 1.1 $
13
 *
14
 *  Purpose:
15
 *	Double Floating-point Multiply Fused Add
16
 *	Double Floating-point Multiply Negate Fused Add
17
 *	Single Floating-point Multiply Fused Add
18
 *	Single Floating-point Multiply Negate Fused Add
19
 *
20
 *  External Interfaces:
21
 *	dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
22
 *	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
23
 *	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
24
 *	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
25
 *
26
 *  Internal Interfaces:
27
 *
28
 *  Theory:
29
 *	<<please update with a overview of the operation of this file>>
30
 *
31
 * END_DESC
32
*/
33

34

35
#include "float.h"
36
#include "sgl_float.h"
37
#include "dbl_float.h"
38

39

40
/*
41
 *  Double Floating-point Multiply Fused Add
42
 */
43

44
int
45
dbl_fmpyfadd(
46
	    dbl_floating_point *src1ptr,
47
	    dbl_floating_point *src2ptr,
48
	    dbl_floating_point *src3ptr,
49
	    unsigned int *status,
50
	    dbl_floating_point *dstptr)
51
{
52
	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
53
	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
54
	unsigned int rightp1, rightp2, rightp3, rightp4;
55
	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
56
	register int mpy_exponent, add_exponent, count;
57
	boolean inexact = FALSE, is_tiny = FALSE;
58

59
	unsigned int signlessleft1, signlessright1, save;
60
	register int result_exponent, diff_exponent;
61
	int sign_save, jumpsize;
62
	
63
	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
64
	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
65
	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
66

67
	/* 
68
	 * set sign bit of result of multiply
69
	 */
70
	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
71
		Dbl_setnegativezerop1(resultp1); 
72
	else Dbl_setzerop1(resultp1);
73

74
	/*
75
	 * Generate multiply exponent 
76
	 */
77
	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
78

79
	/*
80
	 * check first operand for NaN's or infinity
81
	 */
82
	if (Dbl_isinfinity_exponent(opnd1p1)) {
83
		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
84
			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
85
			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
86
				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
87
					/* 
88
					 * invalid since operands are infinity 
89
					 * and zero 
90
					 */
91
					if (Is_invalidtrap_enabled())
92
						return(OPC_2E_INVALIDEXCEPTION);
93
					Set_invalidflag();
94
					Dbl_makequietnan(resultp1,resultp2);
95
					Dbl_copytoptr(resultp1,resultp2,dstptr);
96
					return(NOEXCEPTION);
97
				}
98
				/*
99
				 * Check third operand for infinity with a
100
				 *  sign opposite of the multiply result
101
				 */
102
				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
103
				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
104
					/* 
105
					 * invalid since attempting a magnitude
106
					 * subtraction of infinities
107
					 */
108
					if (Is_invalidtrap_enabled())
109
						return(OPC_2E_INVALIDEXCEPTION);
110
					Set_invalidflag();
111
					Dbl_makequietnan(resultp1,resultp2);
112
					Dbl_copytoptr(resultp1,resultp2,dstptr);
113
					return(NOEXCEPTION);
114
				}
115

116
				/*
117
			 	 * return infinity
118
			 	 */
119
				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
120
				Dbl_copytoptr(resultp1,resultp2,dstptr);
121
				return(NOEXCEPTION);
122
			}
123
		}
124
		else {
125
			/*
126
		 	 * is NaN; signaling or quiet?
127
		 	 */
128
			if (Dbl_isone_signaling(opnd1p1)) {
129
				/* trap if INVALIDTRAP enabled */
130
				if (Is_invalidtrap_enabled()) 
131
			    		return(OPC_2E_INVALIDEXCEPTION);
132
				/* make NaN quiet */
133
				Set_invalidflag();
134
				Dbl_set_quiet(opnd1p1);
135
			}
136
			/* 
137
			 * is second operand a signaling NaN? 
138
			 */
139
			else if (Dbl_is_signalingnan(opnd2p1)) {
140
				/* trap if INVALIDTRAP enabled */
141
				if (Is_invalidtrap_enabled())
142
			    		return(OPC_2E_INVALIDEXCEPTION);
143
				/* make NaN quiet */
144
				Set_invalidflag();
145
				Dbl_set_quiet(opnd2p1);
146
				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
147
				return(NOEXCEPTION);
148
			}
149
			/* 
150
			 * is third operand a signaling NaN? 
151
			 */
152
			else if (Dbl_is_signalingnan(opnd3p1)) {
153
				/* trap if INVALIDTRAP enabled */
154
				if (Is_invalidtrap_enabled())
155
			    		return(OPC_2E_INVALIDEXCEPTION);
156
				/* make NaN quiet */
157
				Set_invalidflag();
158
				Dbl_set_quiet(opnd3p1);
159
				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
160
				return(NOEXCEPTION);
161
			}
162
			/*
163
		 	 * return quiet NaN
164
		 	 */
165
			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
166
			return(NOEXCEPTION);
167
		}
168
	}
169

170
	/*
171
	 * check second operand for NaN's or infinity
172
	 */
173
	if (Dbl_isinfinity_exponent(opnd2p1)) {
174
		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
175
			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
176
				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
177
					/* 
178
					 * invalid since multiply operands are
179
					 * zero & infinity
180
					 */
181
					if (Is_invalidtrap_enabled())
182
						return(OPC_2E_INVALIDEXCEPTION);
183
					Set_invalidflag();
184
					Dbl_makequietnan(opnd2p1,opnd2p2);
185
					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
186
					return(NOEXCEPTION);
187
				}
188

189
				/*
190
				 * Check third operand for infinity with a
191
				 *  sign opposite of the multiply result
192
				 */
193
				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
194
				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
195
					/* 
196
					 * invalid since attempting a magnitude
197
					 * subtraction of infinities
198
					 */
199
					if (Is_invalidtrap_enabled())
200
				       		return(OPC_2E_INVALIDEXCEPTION);
201
				       	Set_invalidflag();
202
				       	Dbl_makequietnan(resultp1,resultp2);
203
					Dbl_copytoptr(resultp1,resultp2,dstptr);
204
					return(NOEXCEPTION);
205
				}
206

207
				/*
208
				 * return infinity
209
				 */
210
				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
211
				Dbl_copytoptr(resultp1,resultp2,dstptr);
212
				return(NOEXCEPTION);
213
			}
214
		}
215
		else {
216
			/*
217
			 * is NaN; signaling or quiet?
218
			 */
219
			if (Dbl_isone_signaling(opnd2p1)) {
220
				/* trap if INVALIDTRAP enabled */
221
				if (Is_invalidtrap_enabled())
222
					return(OPC_2E_INVALIDEXCEPTION);
223
				/* make NaN quiet */
224
				Set_invalidflag();
225
				Dbl_set_quiet(opnd2p1);
226
			}
227
			/* 
228
			 * is third operand a signaling NaN? 
229
			 */
230
			else if (Dbl_is_signalingnan(opnd3p1)) {
231
			       	/* trap if INVALIDTRAP enabled */
232
			       	if (Is_invalidtrap_enabled())
233
				   		return(OPC_2E_INVALIDEXCEPTION);
234
			       	/* make NaN quiet */
235
			       	Set_invalidflag();
236
			       	Dbl_set_quiet(opnd3p1);
237
				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
238
		       		return(NOEXCEPTION);
239
			}
240
			/*
241
			 * return quiet NaN
242
			 */
243
			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
244
			return(NOEXCEPTION);
245
		}
246
	}
247

248
	/*
249
	 * check third operand for NaN's or infinity
250
	 */
251
	if (Dbl_isinfinity_exponent(opnd3p1)) {
252
		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
253
			/* return infinity */
254
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
255
			return(NOEXCEPTION);
256
		} else {
257
			/*
258
			 * is NaN; signaling or quiet?
259
			 */
260
			if (Dbl_isone_signaling(opnd3p1)) {
261
				/* trap if INVALIDTRAP enabled */
262
				if (Is_invalidtrap_enabled())
263
					return(OPC_2E_INVALIDEXCEPTION);
264
				/* make NaN quiet */
265
				Set_invalidflag();
266
				Dbl_set_quiet(opnd3p1);
267
			}
268
			/*
269
			 * return quiet NaN
270
 			 */
271
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
272
			return(NOEXCEPTION);
273
		}
274
    	}
275

276
	/*
277
	 * Generate multiply mantissa
278
	 */
279
	if (Dbl_isnotzero_exponent(opnd1p1)) {
280
		/* set hidden bit */
281
		Dbl_clear_signexponent_set_hidden(opnd1p1);
282
	}
283
	else {
284
		/* check for zero */
285
		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
286
			/*
287
			 * Perform the add opnd3 with zero here.
288
			 */
289
			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
290
				if (Is_rounding_mode(ROUNDMINUS)) {
291
					Dbl_or_signs(opnd3p1,resultp1);
292
				} else {
293
					Dbl_and_signs(opnd3p1,resultp1);
294
				}
295
			}
296
			/*
297
			 * Now let's check for trapped underflow case.
298
			 */
299
			else if (Dbl_iszero_exponent(opnd3p1) &&
300
			         Is_underflowtrap_enabled()) {
301
                    		/* need to normalize results mantissa */
302
                    		sign_save = Dbl_signextendedsign(opnd3p1);
303
				result_exponent = 0;
304
                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
305
                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
306
                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
307
                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
308
							unfl);
309
                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
310
                    		/* inexact = FALSE */
311
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
312
			}
313
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
314
			return(NOEXCEPTION);
315
		}
316
		/* is denormalized, adjust exponent */
317
		Dbl_clear_signexponent(opnd1p1);
318
		Dbl_leftshiftby1(opnd1p1,opnd1p2);
319
		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
320
	}
321
	/* opnd2 needs to have hidden bit set with msb in hidden bit */
322
	if (Dbl_isnotzero_exponent(opnd2p1)) {
323
		Dbl_clear_signexponent_set_hidden(opnd2p1);
324
	}
325
	else {
326
		/* check for zero */
327
		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
328
			/*
329
			 * Perform the add opnd3 with zero here.
330
			 */
331
			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
332
				if (Is_rounding_mode(ROUNDMINUS)) {
333
					Dbl_or_signs(opnd3p1,resultp1);
334
				} else {
335
					Dbl_and_signs(opnd3p1,resultp1);
336
				}
337
			}
338
			/*
339
			 * Now let's check for trapped underflow case.
340
			 */
341
			else if (Dbl_iszero_exponent(opnd3p1) &&
342
			    Is_underflowtrap_enabled()) {
343
                    		/* need to normalize results mantissa */
344
                    		sign_save = Dbl_signextendedsign(opnd3p1);
345
				result_exponent = 0;
346
                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
347
                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
348
                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
349
                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
350
							unfl);
351
                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
352
                    		/* inexact = FALSE */
353
				return(OPC_2E_UNDERFLOWEXCEPTION);
354
			}
355
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
356
			return(NOEXCEPTION);
357
		}
358
		/* is denormalized; want to normalize */
359
		Dbl_clear_signexponent(opnd2p1);
360
		Dbl_leftshiftby1(opnd2p1,opnd2p2);
361
		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
362
	}
363

364
	/* Multiply the first two source mantissas together */
365

366
	/* 
367
	 * The intermediate result will be kept in tmpres,
368
	 * which needs enough room for 106 bits of mantissa,
369
	 * so lets call it a Double extended.
370
	 */
371
	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
372

373
	/* 
374
	 * Four bits at a time are inspected in each loop, and a 
375
	 * simple shift and add multiply algorithm is used. 
376
	 */ 
377
	for (count = DBL_P-1; count >= 0; count -= 4) {
378
		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
379
		if (Dbit28p2(opnd1p2)) {
380
	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
381
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
382
			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
383
		}
384
		if (Dbit29p2(opnd1p2)) {
385
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
386
			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
387
		}
388
		if (Dbit30p2(opnd1p2)) {
389
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
390
			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
391
		}
392
		if (Dbit31p2(opnd1p2)) {
393
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
394
			 opnd2p1, opnd2p2, 0, 0);
395
		}
396
		Dbl_rightshiftby4(opnd1p1,opnd1p2);
397
	}
398
	if (Is_dexthiddenoverflow(tmpresp1)) {
399
		/* result mantissa >= 2 (mantissa overflow) */
400
		mpy_exponent++;
401
		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
402
	}
403

404
	/*
405
	 * Restore the sign of the mpy result which was saved in resultp1.
406
	 * The exponent will continue to be kept in mpy_exponent.
407
	 */
408
	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
409

410
	/* 
411
	 * No rounding is required, since the result of the multiply
412
	 * is exact in the extended format.
413
	 */
414

415
	/*
416
	 * Now we are ready to perform the add portion of the operation.
417
	 *
418
	 * The exponents need to be kept as integers for now, since the
419
	 * multiply result might not fit into the exponent field.  We
420
	 * can't overflow or underflow because of this yet, since the
421
	 * add could bring the final result back into range.
422
	 */
423
	add_exponent = Dbl_exponent(opnd3p1);
424

425
	/*
426
	 * Check for denormalized or zero add operand.
427
	 */
428
	if (add_exponent == 0) {
429
		/* check for zero */
430
		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
431
			/* right is zero */
432
			/* Left can't be zero and must be result.
433
			 *
434
			 * The final result is now in tmpres and mpy_exponent,
435
			 * and needs to be rounded and squeezed back into
436
			 * double precision format from double extended.
437
			 */
438
			result_exponent = mpy_exponent;
439
			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
440
				resultp1,resultp2,resultp3,resultp4);
441
			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
442
			goto round;
443
		}
444

445
		/* 
446
		 * Neither are zeroes.  
447
		 * Adjust exponent and normalize add operand.
448
		 */
449
		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
450
		Dbl_clear_signexponent(opnd3p1);
451
		Dbl_leftshiftby1(opnd3p1,opnd3p2);
452
		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
453
		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
454
	} else {
455
		Dbl_clear_exponent_set_hidden(opnd3p1);
456
	}
457
	/*
458
	 * Copy opnd3 to the double extended variable called right.
459
	 */
460
	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
461

462
	/*
463
	 * A zero "save" helps discover equal operands (for later),
464
	 * and is used in swapping operands (if needed).
465
	 */
466
	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
467

468
	/*
469
	 * Compare magnitude of operands.
470
	 */
471
	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
472
	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
473
	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
474
	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
475
		/*
476
		 * Set the left operand to the larger one by XOR swap.
477
		 * First finish the first word "save".
478
		 */
479
		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
480
		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
481
		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
482
			rightp2,rightp3,rightp4);
483
		/* also setup exponents used in rest of routine */
484
		diff_exponent = add_exponent - mpy_exponent;
485
		result_exponent = add_exponent;
486
	} else {
487
		/* also setup exponents used in rest of routine */
488
		diff_exponent = mpy_exponent - add_exponent;
489
		result_exponent = mpy_exponent;
490
	}
491
	/* Invariant: left is not smaller than right. */
492

493
	/*
494
	 * Special case alignment of operands that would force alignment
495
	 * beyond the extent of the extension.  A further optimization
496
	 * could special case this but only reduces the path length for
497
	 * this infrequent case.
498
	 */
499
	if (diff_exponent > DBLEXT_THRESHOLD) {
500
		diff_exponent = DBLEXT_THRESHOLD;
501
	}
502

503
	/* Align right operand by shifting it to the right */
504
	Dblext_clear_sign(rightp1);
505
	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
506
		/*shifted by*/diff_exponent);
507
	
508
	/* Treat sum and difference of the operands separately. */
509
	if ((int)save < 0) {
510
		/*
511
		 * Difference of the two operands.  Overflow can occur if the
512
		 * multiply overflowed.  A borrow can occur out of the hidden
513
		 * bit and force a post normalization phase.
514
		 */
515
		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
516
			rightp1,rightp2,rightp3,rightp4,
517
			resultp1,resultp2,resultp3,resultp4);
518
		sign_save = Dbl_signextendedsign(resultp1);
519
		if (Dbl_iszero_hidden(resultp1)) {
520
			/* Handle normalization */
521
		/* A straightforward algorithm would now shift the
522
		 * result and extension left until the hidden bit
523
		 * becomes one.  Not all of the extension bits need
524
		 * participate in the shift.  Only the two most 
525
		 * significant bits (round and guard) are needed.
526
		 * If only a single shift is needed then the guard
527
		 * bit becomes a significant low order bit and the
528
		 * extension must participate in the rounding.
529
		 * If more than a single shift is needed, then all
530
		 * bits to the right of the guard bit are zeros, 
531
		 * and the guard bit may or may not be zero. */
532
			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
533
				resultp4);
534

535
			/* Need to check for a zero result.  The sign and
536
			 * exponent fields have already been zeroed.  The more
537
			 * efficient test of the full object can be used.
538
			 */
539
			 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
540
				/* Must have been "x-x" or "x+(-x)". */
541
				if (Is_rounding_mode(ROUNDMINUS))
542
					Dbl_setone_sign(resultp1);
543
				Dbl_copytoptr(resultp1,resultp2,dstptr);
544
				return(NOEXCEPTION);
545
			}
546
			result_exponent--;
547

548
			/* Look to see if normalization is finished. */
549
			if (Dbl_isone_hidden(resultp1)) {
550
				/* No further normalization is needed */
551
				goto round;
552
			}
553

554
			/* Discover first one bit to determine shift amount.
555
			 * Use a modified binary search.  We have already
556
			 * shifted the result one position right and still
557
			 * not found a one so the remainder of the extension
558
			 * must be zero and simplifies rounding. */
559
			/* Scan bytes */
560
			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
561
				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
562
				result_exponent -= 8;
563
			}
564
			/* Now narrow it down to the nibble */
565
			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
566
				/* The lower nibble contains the
567
				 * normalizing one */
568
				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
569
				result_exponent -= 4;
570
			}
571
			/* Select case where first bit is set (already
572
			 * normalized) otherwise select the proper shift. */
573
			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
574
			if (jumpsize <= 7) switch(jumpsize) {
575
			case 1:
576
				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
577
					resultp4);
578
				result_exponent -= 3;
579
				break;
580
			case 2:
581
			case 3:
582
				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
583
					resultp4);
584
				result_exponent -= 2;
585
				break;
586
			case 4:
587
			case 5:
588
			case 6:
589
			case 7:
590
				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
591
					resultp4);
592
				result_exponent -= 1;
593
				break;
594
			}
595
		} /* end if (hidden...)... */
596
	/* Fall through and round */
597
	} /* end if (save < 0)... */
598
	else {
599
		/* Add magnitudes */
600
		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
601
			rightp1,rightp2,rightp3,rightp4,
602
			/*to*/resultp1,resultp2,resultp3,resultp4);
603
		sign_save = Dbl_signextendedsign(resultp1);
604
		if (Dbl_isone_hiddenoverflow(resultp1)) {
605
	    		/* Prenormalization required. */
606
	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
607
				resultp4);
608
	    		result_exponent++;
609
		} /* end if hiddenoverflow... */
610
	} /* end else ...add magnitudes... */
611

612
	/* Round the result.  If the extension and lower two words are
613
	 * all zeros, then the result is exact.  Otherwise round in the
614
	 * correct direction.  Underflow is possible. If a postnormalization
615
	 * is necessary, then the mantissa is all zeros so no shift is needed.
616
	 */
617
  round:
618
	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
619
		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
620
			result_exponent,is_tiny);
621
	}
622
	Dbl_set_sign(resultp1,/*using*/sign_save);
623
	if (Dblext_isnotzero_mantissap3(resultp3) || 
624
	    Dblext_isnotzero_mantissap4(resultp4)) {
625
		inexact = TRUE;
626
		switch(Rounding_mode()) {
627
		case ROUNDNEAREST: /* The default. */
628
			if (Dblext_isone_highp3(resultp3)) {
629
				/* at least 1/2 ulp */
630
				if (Dblext_isnotzero_low31p3(resultp3) ||
631
				    Dblext_isnotzero_mantissap4(resultp4) ||
632
				    Dblext_isone_lowp2(resultp2)) {
633
					/* either exactly half way and odd or
634
					 * more than 1/2ulp */
635
					Dbl_increment(resultp1,resultp2);
636
				}
637
			}
638
	    		break;
639

640
		case ROUNDPLUS:
641
	    		if (Dbl_iszero_sign(resultp1)) {
642
				/* Round up positive results */
643
				Dbl_increment(resultp1,resultp2);
644
			}
645
			break;
646
	    
647
		case ROUNDMINUS:
648
	    		if (Dbl_isone_sign(resultp1)) {
649
				/* Round down negative results */
650
				Dbl_increment(resultp1,resultp2);
651
			}
652
	    
653
		case ROUNDZERO:;
654
			/* truncate is simple */
655
		} /* end switch... */
656
		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
657
	}
658
	if (result_exponent >= DBL_INFINITY_EXPONENT) {
659
                /* trap if OVERFLOWTRAP enabled */
660
                if (Is_overflowtrap_enabled()) {
661
                        /*
662
                         * Adjust bias of result
663
                         */
664
                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
665
                        Dbl_copytoptr(resultp1,resultp2,dstptr);
666
                        if (inexact)
667
                            if (Is_inexacttrap_enabled())
668
                                return (OPC_2E_OVERFLOWEXCEPTION |
669
					OPC_2E_INEXACTEXCEPTION);
670
                            else Set_inexactflag();
671
                        return (OPC_2E_OVERFLOWEXCEPTION);
672
                }
673
                inexact = TRUE;
674
                Set_overflowflag();
675
                /* set result to infinity or largest number */
676
                Dbl_setoverflow(resultp1,resultp2);
677

678
	} else if (result_exponent <= 0) {	/* underflow case */
679
		if (Is_underflowtrap_enabled()) {
680
                        /*
681
                         * Adjust bias of result
682
                         */
683
                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
684
			Dbl_copytoptr(resultp1,resultp2,dstptr);
685
                        if (inexact)
686
                            if (Is_inexacttrap_enabled())
687
                                return (OPC_2E_UNDERFLOWEXCEPTION |
688
					OPC_2E_INEXACTEXCEPTION);
689
                            else Set_inexactflag();
690
	    		return(OPC_2E_UNDERFLOWEXCEPTION);
691
		}
692
		else if (inexact && is_tiny) Set_underflowflag();
693
	}
694
	else Dbl_set_exponent(resultp1,result_exponent);
695
	Dbl_copytoptr(resultp1,resultp2,dstptr);
696
	if (inexact) 
697
		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
698
		else Set_inexactflag();
699
    	return(NOEXCEPTION);
700
}
701

702
/*
703
 *  Double Floating-point Multiply Negate Fused Add
704
 */
705

706
dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
707

708
dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
709
unsigned int *status;
710
{
711
	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
712
	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
713
	unsigned int rightp1, rightp2, rightp3, rightp4;
714
	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
715
	register int mpy_exponent, add_exponent, count;
716
	boolean inexact = FALSE, is_tiny = FALSE;
717

718
	unsigned int signlessleft1, signlessright1, save;
719
	register int result_exponent, diff_exponent;
720
	int sign_save, jumpsize;
721
	
722
	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
723
	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
724
	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
725

726
	/* 
727
	 * set sign bit of result of multiply
728
	 */
729
	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
730
		Dbl_setzerop1(resultp1);
731
	else
732
		Dbl_setnegativezerop1(resultp1); 
733

734
	/*
735
	 * Generate multiply exponent 
736
	 */
737
	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
738

739
	/*
740
	 * check first operand for NaN's or infinity
741
	 */
742
	if (Dbl_isinfinity_exponent(opnd1p1)) {
743
		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
744
			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
745
			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
746
				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
747
					/* 
748
					 * invalid since operands are infinity 
749
					 * and zero 
750
					 */
751
					if (Is_invalidtrap_enabled())
752
						return(OPC_2E_INVALIDEXCEPTION);
753
					Set_invalidflag();
754
					Dbl_makequietnan(resultp1,resultp2);
755
					Dbl_copytoptr(resultp1,resultp2,dstptr);
756
					return(NOEXCEPTION);
757
				}
758
				/*
759
				 * Check third operand for infinity with a
760
				 *  sign opposite of the multiply result
761
				 */
762
				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
763
				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
764
					/* 
765
					 * invalid since attempting a magnitude
766
					 * subtraction of infinities
767
					 */
768
					if (Is_invalidtrap_enabled())
769
						return(OPC_2E_INVALIDEXCEPTION);
770
					Set_invalidflag();
771
					Dbl_makequietnan(resultp1,resultp2);
772
					Dbl_copytoptr(resultp1,resultp2,dstptr);
773
					return(NOEXCEPTION);
774
				}
775

776
				/*
777
			 	 * return infinity
778
			 	 */
779
				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
780
				Dbl_copytoptr(resultp1,resultp2,dstptr);
781
				return(NOEXCEPTION);
782
			}
783
		}
784
		else {
785
			/*
786
		 	 * is NaN; signaling or quiet?
787
		 	 */
788
			if (Dbl_isone_signaling(opnd1p1)) {
789
				/* trap if INVALIDTRAP enabled */
790
				if (Is_invalidtrap_enabled()) 
791
			    		return(OPC_2E_INVALIDEXCEPTION);
792
				/* make NaN quiet */
793
				Set_invalidflag();
794
				Dbl_set_quiet(opnd1p1);
795
			}
796
			/* 
797
			 * is second operand a signaling NaN? 
798
			 */
799
			else if (Dbl_is_signalingnan(opnd2p1)) {
800
				/* trap if INVALIDTRAP enabled */
801
				if (Is_invalidtrap_enabled())
802
			    		return(OPC_2E_INVALIDEXCEPTION);
803
				/* make NaN quiet */
804
				Set_invalidflag();
805
				Dbl_set_quiet(opnd2p1);
806
				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
807
				return(NOEXCEPTION);
808
			}
809
			/* 
810
			 * is third operand a signaling NaN? 
811
			 */
812
			else if (Dbl_is_signalingnan(opnd3p1)) {
813
				/* trap if INVALIDTRAP enabled */
814
				if (Is_invalidtrap_enabled())
815
			    		return(OPC_2E_INVALIDEXCEPTION);
816
				/* make NaN quiet */
817
				Set_invalidflag();
818
				Dbl_set_quiet(opnd3p1);
819
				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
820
				return(NOEXCEPTION);
821
			}
822
			/*
823
		 	 * return quiet NaN
824
		 	 */
825
			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
826
			return(NOEXCEPTION);
827
		}
828
	}
829

830
	/*
831
	 * check second operand for NaN's or infinity
832
	 */
833
	if (Dbl_isinfinity_exponent(opnd2p1)) {
834
		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
835
			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
836
				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
837
					/* 
838
					 * invalid since multiply operands are
839
					 * zero & infinity
840
					 */
841
					if (Is_invalidtrap_enabled())
842
						return(OPC_2E_INVALIDEXCEPTION);
843
					Set_invalidflag();
844
					Dbl_makequietnan(opnd2p1,opnd2p2);
845
					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
846
					return(NOEXCEPTION);
847
				}
848

849
				/*
850
				 * Check third operand for infinity with a
851
				 *  sign opposite of the multiply result
852
				 */
853
				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
854
				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
855
					/* 
856
					 * invalid since attempting a magnitude
857
					 * subtraction of infinities
858
					 */
859
					if (Is_invalidtrap_enabled())
860
				       		return(OPC_2E_INVALIDEXCEPTION);
861
				       	Set_invalidflag();
862
				       	Dbl_makequietnan(resultp1,resultp2);
863
					Dbl_copytoptr(resultp1,resultp2,dstptr);
864
					return(NOEXCEPTION);
865
				}
866

867
				/*
868
				 * return infinity
869
				 */
870
				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
871
				Dbl_copytoptr(resultp1,resultp2,dstptr);
872
				return(NOEXCEPTION);
873
			}
874
		}
875
		else {
876
			/*
877
			 * is NaN; signaling or quiet?
878
			 */
879
			if (Dbl_isone_signaling(opnd2p1)) {
880
				/* trap if INVALIDTRAP enabled */
881
				if (Is_invalidtrap_enabled())
882
					return(OPC_2E_INVALIDEXCEPTION);
883
				/* make NaN quiet */
884
				Set_invalidflag();
885
				Dbl_set_quiet(opnd2p1);
886
			}
887
			/* 
888
			 * is third operand a signaling NaN? 
889
			 */
890
			else if (Dbl_is_signalingnan(opnd3p1)) {
891
			       	/* trap if INVALIDTRAP enabled */
892
			       	if (Is_invalidtrap_enabled())
893
				   		return(OPC_2E_INVALIDEXCEPTION);
894
			       	/* make NaN quiet */
895
			       	Set_invalidflag();
896
			       	Dbl_set_quiet(opnd3p1);
897
				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
898
		       		return(NOEXCEPTION);
899
			}
900
			/*
901
			 * return quiet NaN
902
			 */
903
			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
904
			return(NOEXCEPTION);
905
		}
906
	}
907

908
	/*
909
	 * check third operand for NaN's or infinity
910
	 */
911
	if (Dbl_isinfinity_exponent(opnd3p1)) {
912
		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
913
			/* return infinity */
914
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
915
			return(NOEXCEPTION);
916
		} else {
917
			/*
918
			 * is NaN; signaling or quiet?
919
			 */
920
			if (Dbl_isone_signaling(opnd3p1)) {
921
				/* trap if INVALIDTRAP enabled */
922
				if (Is_invalidtrap_enabled())
923
					return(OPC_2E_INVALIDEXCEPTION);
924
				/* make NaN quiet */
925
				Set_invalidflag();
926
				Dbl_set_quiet(opnd3p1);
927
			}
928
			/*
929
			 * return quiet NaN
930
 			 */
931
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
932
			return(NOEXCEPTION);
933
		}
934
    	}
935

936
	/*
937
	 * Generate multiply mantissa
938
	 */
939
	if (Dbl_isnotzero_exponent(opnd1p1)) {
940
		/* set hidden bit */
941
		Dbl_clear_signexponent_set_hidden(opnd1p1);
942
	}
943
	else {
944
		/* check for zero */
945
		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
946
			/*
947
			 * Perform the add opnd3 with zero here.
948
			 */
949
			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
950
				if (Is_rounding_mode(ROUNDMINUS)) {
951
					Dbl_or_signs(opnd3p1,resultp1);
952
				} else {
953
					Dbl_and_signs(opnd3p1,resultp1);
954
				}
955
			}
956
			/*
957
			 * Now let's check for trapped underflow case.
958
			 */
959
			else if (Dbl_iszero_exponent(opnd3p1) &&
960
			         Is_underflowtrap_enabled()) {
961
                    		/* need to normalize results mantissa */
962
                    		sign_save = Dbl_signextendedsign(opnd3p1);
963
				result_exponent = 0;
964
                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
965
                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
966
                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
967
                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
968
							unfl);
969
                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
970
                    		/* inexact = FALSE */
971
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
972
			}
973
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
974
			return(NOEXCEPTION);
975
		}
976
		/* is denormalized, adjust exponent */
977
		Dbl_clear_signexponent(opnd1p1);
978
		Dbl_leftshiftby1(opnd1p1,opnd1p2);
979
		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
980
	}
981
	/* opnd2 needs to have hidden bit set with msb in hidden bit */
982
	if (Dbl_isnotzero_exponent(opnd2p1)) {
983
		Dbl_clear_signexponent_set_hidden(opnd2p1);
984
	}
985
	else {
986
		/* check for zero */
987
		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
988
			/*
989
			 * Perform the add opnd3 with zero here.
990
			 */
991
			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
992
				if (Is_rounding_mode(ROUNDMINUS)) {
993
					Dbl_or_signs(opnd3p1,resultp1);
994
				} else {
995
					Dbl_and_signs(opnd3p1,resultp1);
996
				}
997
			}
998
			/*
999
			 * Now let's check for trapped underflow case.
1000
			 */
1001
			else if (Dbl_iszero_exponent(opnd3p1) &&
1002
			    Is_underflowtrap_enabled()) {
1003
                    		/* need to normalize results mantissa */
1004
                    		sign_save = Dbl_signextendedsign(opnd3p1);
1005
				result_exponent = 0;
1006
                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1007
                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1008
                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
1009
                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1010
							unfl);
1011
                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1012
                    		/* inexact = FALSE */
1013
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1014
			}
1015
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1016
			return(NOEXCEPTION);
1017
		}
1018
		/* is denormalized; want to normalize */
1019
		Dbl_clear_signexponent(opnd2p1);
1020
		Dbl_leftshiftby1(opnd2p1,opnd2p2);
1021
		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1022
	}
1023

1024
	/* Multiply the first two source mantissas together */
1025

1026
	/* 
1027
	 * The intermediate result will be kept in tmpres,
1028
	 * which needs enough room for 106 bits of mantissa,
1029
	 * so lets call it a Double extended.
1030
	 */
1031
	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1032

1033
	/* 
1034
	 * Four bits at a time are inspected in each loop, and a 
1035
	 * simple shift and add multiply algorithm is used. 
1036
	 */ 
1037
	for (count = DBL_P-1; count >= 0; count -= 4) {
1038
		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1039
		if (Dbit28p2(opnd1p2)) {
1040
	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
1041
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
1042
			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1043
		}
1044
		if (Dbit29p2(opnd1p2)) {
1045
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1046
			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1047
		}
1048
		if (Dbit30p2(opnd1p2)) {
1049
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1050
			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1051
		}
1052
		if (Dbit31p2(opnd1p2)) {
1053
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1054
			 opnd2p1, opnd2p2, 0, 0);
1055
		}
1056
		Dbl_rightshiftby4(opnd1p1,opnd1p2);
1057
	}
1058
	if (Is_dexthiddenoverflow(tmpresp1)) {
1059
		/* result mantissa >= 2 (mantissa overflow) */
1060
		mpy_exponent++;
1061
		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1062
	}
1063

1064
	/*
1065
	 * Restore the sign of the mpy result which was saved in resultp1.
1066
	 * The exponent will continue to be kept in mpy_exponent.
1067
	 */
1068
	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1069

1070
	/* 
1071
	 * No rounding is required, since the result of the multiply
1072
	 * is exact in the extended format.
1073
	 */
1074

1075
	/*
1076
	 * Now we are ready to perform the add portion of the operation.
1077
	 *
1078
	 * The exponents need to be kept as integers for now, since the
1079
	 * multiply result might not fit into the exponent field.  We
1080
	 * can't overflow or underflow because of this yet, since the
1081
	 * add could bring the final result back into range.
1082
	 */
1083
	add_exponent = Dbl_exponent(opnd3p1);
1084

1085
	/*
1086
	 * Check for denormalized or zero add operand.
1087
	 */
1088
	if (add_exponent == 0) {
1089
		/* check for zero */
1090
		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1091
			/* right is zero */
1092
			/* Left can't be zero and must be result.
1093
			 *
1094
			 * The final result is now in tmpres and mpy_exponent,
1095
			 * and needs to be rounded and squeezed back into
1096
			 * double precision format from double extended.
1097
			 */
1098
			result_exponent = mpy_exponent;
1099
			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1100
				resultp1,resultp2,resultp3,resultp4);
1101
			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1102
			goto round;
1103
		}
1104

1105
		/* 
1106
		 * Neither are zeroes.  
1107
		 * Adjust exponent and normalize add operand.
1108
		 */
1109
		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
1110
		Dbl_clear_signexponent(opnd3p1);
1111
		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1112
		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1113
		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
1114
	} else {
1115
		Dbl_clear_exponent_set_hidden(opnd3p1);
1116
	}
1117
	/*
1118
	 * Copy opnd3 to the double extended variable called right.
1119
	 */
1120
	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1121

1122
	/*
1123
	 * A zero "save" helps discover equal operands (for later),
1124
	 * and is used in swapping operands (if needed).
1125
	 */
1126
	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1127

1128
	/*
1129
	 * Compare magnitude of operands.
1130
	 */
1131
	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1132
	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1133
	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1134
	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1135
		/*
1136
		 * Set the left operand to the larger one by XOR swap.
1137
		 * First finish the first word "save".
1138
		 */
1139
		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1140
		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1141
		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1142
			rightp2,rightp3,rightp4);
1143
		/* also setup exponents used in rest of routine */
1144
		diff_exponent = add_exponent - mpy_exponent;
1145
		result_exponent = add_exponent;
1146
	} else {
1147
		/* also setup exponents used in rest of routine */
1148
		diff_exponent = mpy_exponent - add_exponent;
1149
		result_exponent = mpy_exponent;
1150
	}
1151
	/* Invariant: left is not smaller than right. */
1152

1153
	/*
1154
	 * Special case alignment of operands that would force alignment
1155
	 * beyond the extent of the extension.  A further optimization
1156
	 * could special case this but only reduces the path length for
1157
	 * this infrequent case.
1158
	 */
1159
	if (diff_exponent > DBLEXT_THRESHOLD) {
1160
		diff_exponent = DBLEXT_THRESHOLD;
1161
	}
1162

1163
	/* Align right operand by shifting it to the right */
1164
	Dblext_clear_sign(rightp1);
1165
	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1166
		/*shifted by*/diff_exponent);
1167
	
1168
	/* Treat sum and difference of the operands separately. */
1169
	if ((int)save < 0) {
1170
		/*
1171
		 * Difference of the two operands.  Overflow can occur if the
1172
		 * multiply overflowed.  A borrow can occur out of the hidden
1173
		 * bit and force a post normalization phase.
1174
		 */
1175
		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1176
			rightp1,rightp2,rightp3,rightp4,
1177
			resultp1,resultp2,resultp3,resultp4);
1178
		sign_save = Dbl_signextendedsign(resultp1);
1179
		if (Dbl_iszero_hidden(resultp1)) {
1180
			/* Handle normalization */
1181
		/* A straightforward algorithm would now shift the
1182
		 * result and extension left until the hidden bit
1183
		 * becomes one.  Not all of the extension bits need
1184
		 * participate in the shift.  Only the two most 
1185
		 * significant bits (round and guard) are needed.
1186
		 * If only a single shift is needed then the guard
1187
		 * bit becomes a significant low order bit and the
1188
		 * extension must participate in the rounding.
1189
		 * If more than a single shift is needed, then all
1190
		 * bits to the right of the guard bit are zeros, 
1191
		 * and the guard bit may or may not be zero. */
1192
			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1193
				resultp4);
1194

1195
			/* Need to check for a zero result.  The sign and
1196
			 * exponent fields have already been zeroed.  The more
1197
			 * efficient test of the full object can be used.
1198
			 */
1199
			 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1200
				/* Must have been "x-x" or "x+(-x)". */
1201
				if (Is_rounding_mode(ROUNDMINUS))
1202
					Dbl_setone_sign(resultp1);
1203
				Dbl_copytoptr(resultp1,resultp2,dstptr);
1204
				return(NOEXCEPTION);
1205
			}
1206
			result_exponent--;
1207

1208
			/* Look to see if normalization is finished. */
1209
			if (Dbl_isone_hidden(resultp1)) {
1210
				/* No further normalization is needed */
1211
				goto round;
1212
			}
1213

1214
			/* Discover first one bit to determine shift amount.
1215
			 * Use a modified binary search.  We have already
1216
			 * shifted the result one position right and still
1217
			 * not found a one so the remainder of the extension
1218
			 * must be zero and simplifies rounding. */
1219
			/* Scan bytes */
1220
			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1221
				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1222
				result_exponent -= 8;
1223
			}
1224
			/* Now narrow it down to the nibble */
1225
			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1226
				/* The lower nibble contains the
1227
				 * normalizing one */
1228
				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1229
				result_exponent -= 4;
1230
			}
1231
			/* Select case where first bit is set (already
1232
			 * normalized) otherwise select the proper shift. */
1233
			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1234
			if (jumpsize <= 7) switch(jumpsize) {
1235
			case 1:
1236
				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1237
					resultp4);
1238
				result_exponent -= 3;
1239
				break;
1240
			case 2:
1241
			case 3:
1242
				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1243
					resultp4);
1244
				result_exponent -= 2;
1245
				break;
1246
			case 4:
1247
			case 5:
1248
			case 6:
1249
			case 7:
1250
				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1251
					resultp4);
1252
				result_exponent -= 1;
1253
				break;
1254
			}
1255
		} /* end if (hidden...)... */
1256
	/* Fall through and round */
1257
	} /* end if (save < 0)... */
1258
	else {
1259
		/* Add magnitudes */
1260
		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1261
			rightp1,rightp2,rightp3,rightp4,
1262
			/*to*/resultp1,resultp2,resultp3,resultp4);
1263
		sign_save = Dbl_signextendedsign(resultp1);
1264
		if (Dbl_isone_hiddenoverflow(resultp1)) {
1265
	    		/* Prenormalization required. */
1266
	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1267
				resultp4);
1268
	    		result_exponent++;
1269
		} /* end if hiddenoverflow... */
1270
	} /* end else ...add magnitudes... */
1271

1272
	/* Round the result.  If the extension and lower two words are
1273
	 * all zeros, then the result is exact.  Otherwise round in the
1274
	 * correct direction.  Underflow is possible. If a postnormalization
1275
	 * is necessary, then the mantissa is all zeros so no shift is needed.
1276
	 */
1277
  round:
1278
	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1279
		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1280
			result_exponent,is_tiny);
1281
	}
1282
	Dbl_set_sign(resultp1,/*using*/sign_save);
1283
	if (Dblext_isnotzero_mantissap3(resultp3) || 
1284
	    Dblext_isnotzero_mantissap4(resultp4)) {
1285
		inexact = TRUE;
1286
		switch(Rounding_mode()) {
1287
		case ROUNDNEAREST: /* The default. */
1288
			if (Dblext_isone_highp3(resultp3)) {
1289
				/* at least 1/2 ulp */
1290
				if (Dblext_isnotzero_low31p3(resultp3) ||
1291
				    Dblext_isnotzero_mantissap4(resultp4) ||
1292
				    Dblext_isone_lowp2(resultp2)) {
1293
					/* either exactly half way and odd or
1294
					 * more than 1/2ulp */
1295
					Dbl_increment(resultp1,resultp2);
1296
				}
1297
			}
1298
	    		break;
1299

1300
		case ROUNDPLUS:
1301
	    		if (Dbl_iszero_sign(resultp1)) {
1302
				/* Round up positive results */
1303
				Dbl_increment(resultp1,resultp2);
1304
			}
1305
			break;
1306
	    
1307
		case ROUNDMINUS:
1308
	    		if (Dbl_isone_sign(resultp1)) {
1309
				/* Round down negative results */
1310
				Dbl_increment(resultp1,resultp2);
1311
			}
1312
	    
1313
		case ROUNDZERO:;
1314
			/* truncate is simple */
1315
		} /* end switch... */
1316
		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1317
	}
1318
	if (result_exponent >= DBL_INFINITY_EXPONENT) {
1319
		/* Overflow */
1320
		if (Is_overflowtrap_enabled()) {
1321
                        /*
1322
                         * Adjust bias of result
1323
                         */
1324
                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1325
                        Dbl_copytoptr(resultp1,resultp2,dstptr);
1326
                        if (inexact)
1327
                            if (Is_inexacttrap_enabled())
1328
                                return (OPC_2E_OVERFLOWEXCEPTION |
1329
					OPC_2E_INEXACTEXCEPTION);
1330
                            else Set_inexactflag();
1331
                        return (OPC_2E_OVERFLOWEXCEPTION);
1332
		}
1333
		inexact = TRUE;
1334
		Set_overflowflag();
1335
		Dbl_setoverflow(resultp1,resultp2);
1336
	} else if (result_exponent <= 0) {	/* underflow case */
1337
		if (Is_underflowtrap_enabled()) {
1338
                        /*
1339
                         * Adjust bias of result
1340
                         */
1341
                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1342
			Dbl_copytoptr(resultp1,resultp2,dstptr);
1343
                        if (inexact)
1344
                            if (Is_inexacttrap_enabled())
1345
                                return (OPC_2E_UNDERFLOWEXCEPTION |
1346
					OPC_2E_INEXACTEXCEPTION);
1347
                            else Set_inexactflag();
1348
	    		return(OPC_2E_UNDERFLOWEXCEPTION);
1349
		}
1350
		else if (inexact && is_tiny) Set_underflowflag();
1351
	}
1352
	else Dbl_set_exponent(resultp1,result_exponent);
1353
	Dbl_copytoptr(resultp1,resultp2,dstptr);
1354
	if (inexact) 
1355
		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1356
		else Set_inexactflag();
1357
    	return(NOEXCEPTION);
1358
}
1359

1360
/*
1361
 *  Single Floating-point Multiply Fused Add
1362
 */
1363

1364
sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1365

1366
sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1367
unsigned int *status;
1368
{
1369
	unsigned int opnd1, opnd2, opnd3;
1370
	register unsigned int tmpresp1, tmpresp2;
1371
	unsigned int rightp1, rightp2;
1372
	unsigned int resultp1, resultp2 = 0;
1373
	register int mpy_exponent, add_exponent, count;
1374
	boolean inexact = FALSE, is_tiny = FALSE;
1375

1376
	unsigned int signlessleft1, signlessright1, save;
1377
	register int result_exponent, diff_exponent;
1378
	int sign_save, jumpsize;
1379
	
1380
	Sgl_copyfromptr(src1ptr,opnd1);
1381
	Sgl_copyfromptr(src2ptr,opnd2);
1382
	Sgl_copyfromptr(src3ptr,opnd3);
1383

1384
	/* 
1385
	 * set sign bit of result of multiply
1386
	 */
1387
	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
1388
		Sgl_setnegativezero(resultp1); 
1389
	else Sgl_setzero(resultp1);
1390

1391
	/*
1392
	 * Generate multiply exponent 
1393
	 */
1394
	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1395

1396
	/*
1397
	 * check first operand for NaN's or infinity
1398
	 */
1399
	if (Sgl_isinfinity_exponent(opnd1)) {
1400
		if (Sgl_iszero_mantissa(opnd1)) {
1401
			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1402
				if (Sgl_iszero_exponentmantissa(opnd2)) {
1403
					/* 
1404
					 * invalid since operands are infinity 
1405
					 * and zero 
1406
					 */
1407
					if (Is_invalidtrap_enabled())
1408
						return(OPC_2E_INVALIDEXCEPTION);
1409
					Set_invalidflag();
1410
					Sgl_makequietnan(resultp1);
1411
					Sgl_copytoptr(resultp1,dstptr);
1412
					return(NOEXCEPTION);
1413
				}
1414
				/*
1415
				 * Check third operand for infinity with a
1416
				 *  sign opposite of the multiply result
1417
				 */
1418
				if (Sgl_isinfinity(opnd3) &&
1419
				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1420
					/* 
1421
					 * invalid since attempting a magnitude
1422
					 * subtraction of infinities
1423
					 */
1424
					if (Is_invalidtrap_enabled())
1425
						return(OPC_2E_INVALIDEXCEPTION);
1426
					Set_invalidflag();
1427
					Sgl_makequietnan(resultp1);
1428
					Sgl_copytoptr(resultp1,dstptr);
1429
					return(NOEXCEPTION);
1430
				}
1431

1432
				/*
1433
			 	 * return infinity
1434
			 	 */
1435
				Sgl_setinfinity_exponentmantissa(resultp1);
1436
				Sgl_copytoptr(resultp1,dstptr);
1437
				return(NOEXCEPTION);
1438
			}
1439
		}
1440
		else {
1441
			/*
1442
		 	 * is NaN; signaling or quiet?
1443
		 	 */
1444
			if (Sgl_isone_signaling(opnd1)) {
1445
				/* trap if INVALIDTRAP enabled */
1446
				if (Is_invalidtrap_enabled()) 
1447
			    		return(OPC_2E_INVALIDEXCEPTION);
1448
				/* make NaN quiet */
1449
				Set_invalidflag();
1450
				Sgl_set_quiet(opnd1);
1451
			}
1452
			/* 
1453
			 * is second operand a signaling NaN? 
1454
			 */
1455
			else if (Sgl_is_signalingnan(opnd2)) {
1456
				/* trap if INVALIDTRAP enabled */
1457
				if (Is_invalidtrap_enabled())
1458
			    		return(OPC_2E_INVALIDEXCEPTION);
1459
				/* make NaN quiet */
1460
				Set_invalidflag();
1461
				Sgl_set_quiet(opnd2);
1462
				Sgl_copytoptr(opnd2,dstptr);
1463
				return(NOEXCEPTION);
1464
			}
1465
			/* 
1466
			 * is third operand a signaling NaN? 
1467
			 */
1468
			else if (Sgl_is_signalingnan(opnd3)) {
1469
				/* trap if INVALIDTRAP enabled */
1470
				if (Is_invalidtrap_enabled())
1471
			    		return(OPC_2E_INVALIDEXCEPTION);
1472
				/* make NaN quiet */
1473
				Set_invalidflag();
1474
				Sgl_set_quiet(opnd3);
1475
				Sgl_copytoptr(opnd3,dstptr);
1476
				return(NOEXCEPTION);
1477
			}
1478
			/*
1479
		 	 * return quiet NaN
1480
		 	 */
1481
			Sgl_copytoptr(opnd1,dstptr);
1482
			return(NOEXCEPTION);
1483
		}
1484
	}
1485

1486
	/*
1487
	 * check second operand for NaN's or infinity
1488
	 */
1489
	if (Sgl_isinfinity_exponent(opnd2)) {
1490
		if (Sgl_iszero_mantissa(opnd2)) {
1491
			if (Sgl_isnotnan(opnd3)) {
1492
				if (Sgl_iszero_exponentmantissa(opnd1)) {
1493
					/* 
1494
					 * invalid since multiply operands are
1495
					 * zero & infinity
1496
					 */
1497
					if (Is_invalidtrap_enabled())
1498
						return(OPC_2E_INVALIDEXCEPTION);
1499
					Set_invalidflag();
1500
					Sgl_makequietnan(opnd2);
1501
					Sgl_copytoptr(opnd2,dstptr);
1502
					return(NOEXCEPTION);
1503
				}
1504

1505
				/*
1506
				 * Check third operand for infinity with a
1507
				 *  sign opposite of the multiply result
1508
				 */
1509
				if (Sgl_isinfinity(opnd3) &&
1510
				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1511
					/* 
1512
					 * invalid since attempting a magnitude
1513
					 * subtraction of infinities
1514
					 */
1515
					if (Is_invalidtrap_enabled())
1516
				       		return(OPC_2E_INVALIDEXCEPTION);
1517
				       	Set_invalidflag();
1518
				       	Sgl_makequietnan(resultp1);
1519
					Sgl_copytoptr(resultp1,dstptr);
1520
					return(NOEXCEPTION);
1521
				}
1522

1523
				/*
1524
				 * return infinity
1525
				 */
1526
				Sgl_setinfinity_exponentmantissa(resultp1);
1527
				Sgl_copytoptr(resultp1,dstptr);
1528
				return(NOEXCEPTION);
1529
			}
1530
		}
1531
		else {
1532
			/*
1533
			 * is NaN; signaling or quiet?
1534
			 */
1535
			if (Sgl_isone_signaling(opnd2)) {
1536
				/* trap if INVALIDTRAP enabled */
1537
				if (Is_invalidtrap_enabled())
1538
					return(OPC_2E_INVALIDEXCEPTION);
1539
				/* make NaN quiet */
1540
				Set_invalidflag();
1541
				Sgl_set_quiet(opnd2);
1542
			}
1543
			/* 
1544
			 * is third operand a signaling NaN? 
1545
			 */
1546
			else if (Sgl_is_signalingnan(opnd3)) {
1547
			       	/* trap if INVALIDTRAP enabled */
1548
			       	if (Is_invalidtrap_enabled())
1549
				   		return(OPC_2E_INVALIDEXCEPTION);
1550
			       	/* make NaN quiet */
1551
			       	Set_invalidflag();
1552
			       	Sgl_set_quiet(opnd3);
1553
				Sgl_copytoptr(opnd3,dstptr);
1554
		       		return(NOEXCEPTION);
1555
			}
1556
			/*
1557
			 * return quiet NaN
1558
			 */
1559
			Sgl_copytoptr(opnd2,dstptr);
1560
			return(NOEXCEPTION);
1561
		}
1562
	}
1563

1564
	/*
1565
	 * check third operand for NaN's or infinity
1566
	 */
1567
	if (Sgl_isinfinity_exponent(opnd3)) {
1568
		if (Sgl_iszero_mantissa(opnd3)) {
1569
			/* return infinity */
1570
			Sgl_copytoptr(opnd3,dstptr);
1571
			return(NOEXCEPTION);
1572
		} else {
1573
			/*
1574
			 * is NaN; signaling or quiet?
1575
			 */
1576
			if (Sgl_isone_signaling(opnd3)) {
1577
				/* trap if INVALIDTRAP enabled */
1578
				if (Is_invalidtrap_enabled())
1579
					return(OPC_2E_INVALIDEXCEPTION);
1580
				/* make NaN quiet */
1581
				Set_invalidflag();
1582
				Sgl_set_quiet(opnd3);
1583
			}
1584
			/*
1585
			 * return quiet NaN
1586
 			 */
1587
			Sgl_copytoptr(opnd3,dstptr);
1588
			return(NOEXCEPTION);
1589
		}
1590
    	}
1591

1592
	/*
1593
	 * Generate multiply mantissa
1594
	 */
1595
	if (Sgl_isnotzero_exponent(opnd1)) {
1596
		/* set hidden bit */
1597
		Sgl_clear_signexponent_set_hidden(opnd1);
1598
	}
1599
	else {
1600
		/* check for zero */
1601
		if (Sgl_iszero_mantissa(opnd1)) {
1602
			/*
1603
			 * Perform the add opnd3 with zero here.
1604
			 */
1605
			if (Sgl_iszero_exponentmantissa(opnd3)) {
1606
				if (Is_rounding_mode(ROUNDMINUS)) {
1607
					Sgl_or_signs(opnd3,resultp1);
1608
				} else {
1609
					Sgl_and_signs(opnd3,resultp1);
1610
				}
1611
			}
1612
			/*
1613
			 * Now let's check for trapped underflow case.
1614
			 */
1615
			else if (Sgl_iszero_exponent(opnd3) &&
1616
			         Is_underflowtrap_enabled()) {
1617
                    		/* need to normalize results mantissa */
1618
                    		sign_save = Sgl_signextendedsign(opnd3);
1619
				result_exponent = 0;
1620
                    		Sgl_leftshiftby1(opnd3);
1621
                    		Sgl_normalize(opnd3,result_exponent);
1622
                    		Sgl_set_sign(opnd3,/*using*/sign_save);
1623
                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
1624
							unfl);
1625
                    		Sgl_copytoptr(opnd3,dstptr);
1626
                    		/* inexact = FALSE */
1627
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1628
			}
1629
			Sgl_copytoptr(opnd3,dstptr);
1630
			return(NOEXCEPTION);
1631
		}
1632
		/* is denormalized, adjust exponent */
1633
		Sgl_clear_signexponent(opnd1);
1634
		Sgl_leftshiftby1(opnd1);
1635
		Sgl_normalize(opnd1,mpy_exponent);
1636
	}
1637
	/* opnd2 needs to have hidden bit set with msb in hidden bit */
1638
	if (Sgl_isnotzero_exponent(opnd2)) {
1639
		Sgl_clear_signexponent_set_hidden(opnd2);
1640
	}
1641
	else {
1642
		/* check for zero */
1643
		if (Sgl_iszero_mantissa(opnd2)) {
1644
			/*
1645
			 * Perform the add opnd3 with zero here.
1646
			 */
1647
			if (Sgl_iszero_exponentmantissa(opnd3)) {
1648
				if (Is_rounding_mode(ROUNDMINUS)) {
1649
					Sgl_or_signs(opnd3,resultp1);
1650
				} else {
1651
					Sgl_and_signs(opnd3,resultp1);
1652
				}
1653
			}
1654
			/*
1655
			 * Now let's check for trapped underflow case.
1656
			 */
1657
			else if (Sgl_iszero_exponent(opnd3) &&
1658
			    Is_underflowtrap_enabled()) {
1659
                    		/* need to normalize results mantissa */
1660
                    		sign_save = Sgl_signextendedsign(opnd3);
1661
				result_exponent = 0;
1662
                    		Sgl_leftshiftby1(opnd3);
1663
                    		Sgl_normalize(opnd3,result_exponent);
1664
                    		Sgl_set_sign(opnd3,/*using*/sign_save);
1665
                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
1666
							unfl);
1667
                    		Sgl_copytoptr(opnd3,dstptr);
1668
                    		/* inexact = FALSE */
1669
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1670
			}
1671
			Sgl_copytoptr(opnd3,dstptr);
1672
			return(NOEXCEPTION);
1673
		}
1674
		/* is denormalized; want to normalize */
1675
		Sgl_clear_signexponent(opnd2);
1676
		Sgl_leftshiftby1(opnd2);
1677
		Sgl_normalize(opnd2,mpy_exponent);
1678
	}
1679

1680
	/* Multiply the first two source mantissas together */
1681

1682
	/* 
1683
	 * The intermediate result will be kept in tmpres,
1684
	 * which needs enough room for 106 bits of mantissa,
1685
	 * so lets call it a Double extended.
1686
	 */
1687
	Sglext_setzero(tmpresp1,tmpresp2);
1688

1689
	/* 
1690
	 * Four bits at a time are inspected in each loop, and a 
1691
	 * simple shift and add multiply algorithm is used. 
1692
	 */ 
1693
	for (count = SGL_P-1; count >= 0; count -= 4) {
1694
		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1695
		if (Sbit28(opnd1)) {
1696
	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
1697
			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1698
		}
1699
		if (Sbit29(opnd1)) {
1700
			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1701
		}
1702
		if (Sbit30(opnd1)) {
1703
			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1704
		}
1705
		if (Sbit31(opnd1)) {
1706
			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1707
		}
1708
		Sgl_rightshiftby4(opnd1);
1709
	}
1710
	if (Is_sexthiddenoverflow(tmpresp1)) {
1711
		/* result mantissa >= 2 (mantissa overflow) */
1712
		mpy_exponent++;
1713
		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1714
	} else {
1715
		Sglext_rightshiftby3(tmpresp1,tmpresp2);
1716
	}
1717

1718
	/*
1719
	 * Restore the sign of the mpy result which was saved in resultp1.
1720
	 * The exponent will continue to be kept in mpy_exponent.
1721
	 */
1722
	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1723

1724
	/* 
1725
	 * No rounding is required, since the result of the multiply
1726
	 * is exact in the extended format.
1727
	 */
1728

1729
	/*
1730
	 * Now we are ready to perform the add portion of the operation.
1731
	 *
1732
	 * The exponents need to be kept as integers for now, since the
1733
	 * multiply result might not fit into the exponent field.  We
1734
	 * can't overflow or underflow because of this yet, since the
1735
	 * add could bring the final result back into range.
1736
	 */
1737
	add_exponent = Sgl_exponent(opnd3);
1738

1739
	/*
1740
	 * Check for denormalized or zero add operand.
1741
	 */
1742
	if (add_exponent == 0) {
1743
		/* check for zero */
1744
		if (Sgl_iszero_mantissa(opnd3)) {
1745
			/* right is zero */
1746
			/* Left can't be zero and must be result.
1747
			 *
1748
			 * The final result is now in tmpres and mpy_exponent,
1749
			 * and needs to be rounded and squeezed back into
1750
			 * double precision format from double extended.
1751
			 */
1752
			result_exponent = mpy_exponent;
1753
			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1754
			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1755
			goto round;
1756
		}
1757

1758
		/* 
1759
		 * Neither are zeroes.  
1760
		 * Adjust exponent and normalize add operand.
1761
		 */
1762
		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
1763
		Sgl_clear_signexponent(opnd3);
1764
		Sgl_leftshiftby1(opnd3);
1765
		Sgl_normalize(opnd3,add_exponent);
1766
		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
1767
	} else {
1768
		Sgl_clear_exponent_set_hidden(opnd3);
1769
	}
1770
	/*
1771
	 * Copy opnd3 to the double extended variable called right.
1772
	 */
1773
	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1774

1775
	/*
1776
	 * A zero "save" helps discover equal operands (for later),
1777
	 * and is used in swapping operands (if needed).
1778
	 */
1779
	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1780

1781
	/*
1782
	 * Compare magnitude of operands.
1783
	 */
1784
	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1785
	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1786
	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1787
	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1788
		/*
1789
		 * Set the left operand to the larger one by XOR swap.
1790
		 * First finish the first word "save".
1791
		 */
1792
		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1793
		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1794
		Sglext_swap_lower(tmpresp2,rightp2);
1795
		/* also setup exponents used in rest of routine */
1796
		diff_exponent = add_exponent - mpy_exponent;
1797
		result_exponent = add_exponent;
1798
	} else {
1799
		/* also setup exponents used in rest of routine */
1800
		diff_exponent = mpy_exponent - add_exponent;
1801
		result_exponent = mpy_exponent;
1802
	}
1803
	/* Invariant: left is not smaller than right. */
1804

1805
	/*
1806
	 * Special case alignment of operands that would force alignment
1807
	 * beyond the extent of the extension.  A further optimization
1808
	 * could special case this but only reduces the path length for
1809
	 * this infrequent case.
1810
	 */
1811
	if (diff_exponent > SGLEXT_THRESHOLD) {
1812
		diff_exponent = SGLEXT_THRESHOLD;
1813
	}
1814

1815
	/* Align right operand by shifting it to the right */
1816
	Sglext_clear_sign(rightp1);
1817
	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1818
	
1819
	/* Treat sum and difference of the operands separately. */
1820
	if ((int)save < 0) {
1821
		/*
1822
		 * Difference of the two operands.  Overflow can occur if the
1823
		 * multiply overflowed.  A borrow can occur out of the hidden
1824
		 * bit and force a post normalization phase.
1825
		 */
1826
		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1827
			resultp1,resultp2);
1828
		sign_save = Sgl_signextendedsign(resultp1);
1829
		if (Sgl_iszero_hidden(resultp1)) {
1830
			/* Handle normalization */
1831
		/* A straightforward algorithm would now shift the
1832
		 * result and extension left until the hidden bit
1833
		 * becomes one.  Not all of the extension bits need
1834
		 * participate in the shift.  Only the two most 
1835
		 * significant bits (round and guard) are needed.
1836
		 * If only a single shift is needed then the guard
1837
		 * bit becomes a significant low order bit and the
1838
		 * extension must participate in the rounding.
1839
		 * If more than a single shift is needed, then all
1840
		 * bits to the right of the guard bit are zeros, 
1841
		 * and the guard bit may or may not be zero. */
1842
			Sglext_leftshiftby1(resultp1,resultp2);
1843

1844
			/* Need to check for a zero result.  The sign and
1845
			 * exponent fields have already been zeroed.  The more
1846
			 * efficient test of the full object can be used.
1847
			 */
1848
			 if (Sglext_iszero(resultp1,resultp2)) {
1849
				/* Must have been "x-x" or "x+(-x)". */
1850
				if (Is_rounding_mode(ROUNDMINUS))
1851
					Sgl_setone_sign(resultp1);
1852
				Sgl_copytoptr(resultp1,dstptr);
1853
				return(NOEXCEPTION);
1854
			}
1855
			result_exponent--;
1856

1857
			/* Look to see if normalization is finished. */
1858
			if (Sgl_isone_hidden(resultp1)) {
1859
				/* No further normalization is needed */
1860
				goto round;
1861
			}
1862

1863
			/* Discover first one bit to determine shift amount.
1864
			 * Use a modified binary search.  We have already
1865
			 * shifted the result one position right and still
1866
			 * not found a one so the remainder of the extension
1867
			 * must be zero and simplifies rounding. */
1868
			/* Scan bytes */
1869
			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1870
				Sglext_leftshiftby8(resultp1,resultp2);
1871
				result_exponent -= 8;
1872
			}
1873
			/* Now narrow it down to the nibble */
1874
			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1875
				/* The lower nibble contains the
1876
				 * normalizing one */
1877
				Sglext_leftshiftby4(resultp1,resultp2);
1878
				result_exponent -= 4;
1879
			}
1880
			/* Select case where first bit is set (already
1881
			 * normalized) otherwise select the proper shift. */
1882
			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1883
			if (jumpsize <= 7) switch(jumpsize) {
1884
			case 1:
1885
				Sglext_leftshiftby3(resultp1,resultp2);
1886
				result_exponent -= 3;
1887
				break;
1888
			case 2:
1889
			case 3:
1890
				Sglext_leftshiftby2(resultp1,resultp2);
1891
				result_exponent -= 2;
1892
				break;
1893
			case 4:
1894
			case 5:
1895
			case 6:
1896
			case 7:
1897
				Sglext_leftshiftby1(resultp1,resultp2);
1898
				result_exponent -= 1;
1899
				break;
1900
			}
1901
		} /* end if (hidden...)... */
1902
	/* Fall through and round */
1903
	} /* end if (save < 0)... */
1904
	else {
1905
		/* Add magnitudes */
1906
		Sglext_addition(tmpresp1,tmpresp2,
1907
			rightp1,rightp2, /*to*/resultp1,resultp2);
1908
		sign_save = Sgl_signextendedsign(resultp1);
1909
		if (Sgl_isone_hiddenoverflow(resultp1)) {
1910
	    		/* Prenormalization required. */
1911
	    		Sglext_arithrightshiftby1(resultp1,resultp2);
1912
	    		result_exponent++;
1913
		} /* end if hiddenoverflow... */
1914
	} /* end else ...add magnitudes... */
1915

1916
	/* Round the result.  If the extension and lower two words are
1917
	 * all zeros, then the result is exact.  Otherwise round in the
1918
	 * correct direction.  Underflow is possible. If a postnormalization
1919
	 * is necessary, then the mantissa is all zeros so no shift is needed.
1920
	 */
1921
  round:
1922
	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1923
		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1924
	}
1925
	Sgl_set_sign(resultp1,/*using*/sign_save);
1926
	if (Sglext_isnotzero_mantissap2(resultp2)) {
1927
		inexact = TRUE;
1928
		switch(Rounding_mode()) {
1929
		case ROUNDNEAREST: /* The default. */
1930
			if (Sglext_isone_highp2(resultp2)) {
1931
				/* at least 1/2 ulp */
1932
				if (Sglext_isnotzero_low31p2(resultp2) ||
1933
				    Sglext_isone_lowp1(resultp1)) {
1934
					/* either exactly half way and odd or
1935
					 * more than 1/2ulp */
1936
					Sgl_increment(resultp1);
1937
				}
1938
			}
1939
	    		break;
1940

1941
		case ROUNDPLUS:
1942
	    		if (Sgl_iszero_sign(resultp1)) {
1943
				/* Round up positive results */
1944
				Sgl_increment(resultp1);
1945
			}
1946
			break;
1947
	    
1948
		case ROUNDMINUS:
1949
	    		if (Sgl_isone_sign(resultp1)) {
1950
				/* Round down negative results */
1951
				Sgl_increment(resultp1);
1952
			}
1953
	    
1954
		case ROUNDZERO:;
1955
			/* truncate is simple */
1956
		} /* end switch... */
1957
		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1958
	}
1959
	if (result_exponent >= SGL_INFINITY_EXPONENT) {
1960
		/* Overflow */
1961
		if (Is_overflowtrap_enabled()) {
1962
                        /*
1963
                         * Adjust bias of result
1964
                         */
1965
                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1966
                        Sgl_copytoptr(resultp1,dstptr);
1967
                        if (inexact)
1968
                            if (Is_inexacttrap_enabled())
1969
                                return (OPC_2E_OVERFLOWEXCEPTION |
1970
					OPC_2E_INEXACTEXCEPTION);
1971
                            else Set_inexactflag();
1972
                        return (OPC_2E_OVERFLOWEXCEPTION);
1973
		}
1974
		inexact = TRUE;
1975
		Set_overflowflag();
1976
		Sgl_setoverflow(resultp1);
1977
	} else if (result_exponent <= 0) {	/* underflow case */
1978
		if (Is_underflowtrap_enabled()) {
1979
                        /*
1980
                         * Adjust bias of result
1981
                         */
1982
                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1983
			Sgl_copytoptr(resultp1,dstptr);
1984
                        if (inexact)
1985
                            if (Is_inexacttrap_enabled())
1986
                                return (OPC_2E_UNDERFLOWEXCEPTION |
1987
					OPC_2E_INEXACTEXCEPTION);
1988
                            else Set_inexactflag();
1989
	    		return(OPC_2E_UNDERFLOWEXCEPTION);
1990
		}
1991
		else if (inexact && is_tiny) Set_underflowflag();
1992
	}
1993
	else Sgl_set_exponent(resultp1,result_exponent);
1994
	Sgl_copytoptr(resultp1,dstptr);
1995
	if (inexact) 
1996
		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1997
		else Set_inexactflag();
1998
    	return(NOEXCEPTION);
1999
}
2000

2001
/*
2002
 *  Single Floating-point Multiply Negate Fused Add
2003
 */
2004

2005
sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2006

2007
sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2008
unsigned int *status;
2009
{
2010
	unsigned int opnd1, opnd2, opnd3;
2011
	register unsigned int tmpresp1, tmpresp2;
2012
	unsigned int rightp1, rightp2;
2013
	unsigned int resultp1, resultp2 = 0;
2014
	register int mpy_exponent, add_exponent, count;
2015
	boolean inexact = FALSE, is_tiny = FALSE;
2016

2017
	unsigned int signlessleft1, signlessright1, save;
2018
	register int result_exponent, diff_exponent;
2019
	int sign_save, jumpsize;
2020
	
2021
	Sgl_copyfromptr(src1ptr,opnd1);
2022
	Sgl_copyfromptr(src2ptr,opnd2);
2023
	Sgl_copyfromptr(src3ptr,opnd3);
2024

2025
	/* 
2026
	 * set sign bit of result of multiply
2027
	 */
2028
	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
2029
		Sgl_setzero(resultp1);
2030
	else 
2031
		Sgl_setnegativezero(resultp1); 
2032

2033
	/*
2034
	 * Generate multiply exponent 
2035
	 */
2036
	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2037

2038
	/*
2039
	 * check first operand for NaN's or infinity
2040
	 */
2041
	if (Sgl_isinfinity_exponent(opnd1)) {
2042
		if (Sgl_iszero_mantissa(opnd1)) {
2043
			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2044
				if (Sgl_iszero_exponentmantissa(opnd2)) {
2045
					/* 
2046
					 * invalid since operands are infinity 
2047
					 * and zero 
2048
					 */
2049
					if (Is_invalidtrap_enabled())
2050
						return(OPC_2E_INVALIDEXCEPTION);
2051
					Set_invalidflag();
2052
					Sgl_makequietnan(resultp1);
2053
					Sgl_copytoptr(resultp1,dstptr);
2054
					return(NOEXCEPTION);
2055
				}
2056
				/*
2057
				 * Check third operand for infinity with a
2058
				 *  sign opposite of the multiply result
2059
				 */
2060
				if (Sgl_isinfinity(opnd3) &&
2061
				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2062
					/* 
2063
					 * invalid since attempting a magnitude
2064
					 * subtraction of infinities
2065
					 */
2066
					if (Is_invalidtrap_enabled())
2067
						return(OPC_2E_INVALIDEXCEPTION);
2068
					Set_invalidflag();
2069
					Sgl_makequietnan(resultp1);
2070
					Sgl_copytoptr(resultp1,dstptr);
2071
					return(NOEXCEPTION);
2072
				}
2073

2074
				/*
2075
			 	 * return infinity
2076
			 	 */
2077
				Sgl_setinfinity_exponentmantissa(resultp1);
2078
				Sgl_copytoptr(resultp1,dstptr);
2079
				return(NOEXCEPTION);
2080
			}
2081
		}
2082
		else {
2083
			/*
2084
		 	 * is NaN; signaling or quiet?
2085
		 	 */
2086
			if (Sgl_isone_signaling(opnd1)) {
2087
				/* trap if INVALIDTRAP enabled */
2088
				if (Is_invalidtrap_enabled()) 
2089
			    		return(OPC_2E_INVALIDEXCEPTION);
2090
				/* make NaN quiet */
2091
				Set_invalidflag();
2092
				Sgl_set_quiet(opnd1);
2093
			}
2094
			/* 
2095
			 * is second operand a signaling NaN? 
2096
			 */
2097
			else if (Sgl_is_signalingnan(opnd2)) {
2098
				/* trap if INVALIDTRAP enabled */
2099
				if (Is_invalidtrap_enabled())
2100
			    		return(OPC_2E_INVALIDEXCEPTION);
2101
				/* make NaN quiet */
2102
				Set_invalidflag();
2103
				Sgl_set_quiet(opnd2);
2104
				Sgl_copytoptr(opnd2,dstptr);
2105
				return(NOEXCEPTION);
2106
			}
2107
			/* 
2108
			 * is third operand a signaling NaN? 
2109
			 */
2110
			else if (Sgl_is_signalingnan(opnd3)) {
2111
				/* trap if INVALIDTRAP enabled */
2112
				if (Is_invalidtrap_enabled())
2113
			    		return(OPC_2E_INVALIDEXCEPTION);
2114
				/* make NaN quiet */
2115
				Set_invalidflag();
2116
				Sgl_set_quiet(opnd3);
2117
				Sgl_copytoptr(opnd3,dstptr);
2118
				return(NOEXCEPTION);
2119
			}
2120
			/*
2121
		 	 * return quiet NaN
2122
		 	 */
2123
			Sgl_copytoptr(opnd1,dstptr);
2124
			return(NOEXCEPTION);
2125
		}
2126
	}
2127

2128
	/*
2129
	 * check second operand for NaN's or infinity
2130
	 */
2131
	if (Sgl_isinfinity_exponent(opnd2)) {
2132
		if (Sgl_iszero_mantissa(opnd2)) {
2133
			if (Sgl_isnotnan(opnd3)) {
2134
				if (Sgl_iszero_exponentmantissa(opnd1)) {
2135
					/* 
2136
					 * invalid since multiply operands are
2137
					 * zero & infinity
2138
					 */
2139
					if (Is_invalidtrap_enabled())
2140
						return(OPC_2E_INVALIDEXCEPTION);
2141
					Set_invalidflag();
2142
					Sgl_makequietnan(opnd2);
2143
					Sgl_copytoptr(opnd2,dstptr);
2144
					return(NOEXCEPTION);
2145
				}
2146

2147
				/*
2148
				 * Check third operand for infinity with a
2149
				 *  sign opposite of the multiply result
2150
				 */
2151
				if (Sgl_isinfinity(opnd3) &&
2152
				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2153
					/* 
2154
					 * invalid since attempting a magnitude
2155
					 * subtraction of infinities
2156
					 */
2157
					if (Is_invalidtrap_enabled())
2158
				       		return(OPC_2E_INVALIDEXCEPTION);
2159
				       	Set_invalidflag();
2160
				       	Sgl_makequietnan(resultp1);
2161
					Sgl_copytoptr(resultp1,dstptr);
2162
					return(NOEXCEPTION);
2163
				}
2164

2165
				/*
2166
				 * return infinity
2167
				 */
2168
				Sgl_setinfinity_exponentmantissa(resultp1);
2169
				Sgl_copytoptr(resultp1,dstptr);
2170
				return(NOEXCEPTION);
2171
			}
2172
		}
2173
		else {
2174
			/*
2175
			 * is NaN; signaling or quiet?
2176
			 */
2177
			if (Sgl_isone_signaling(opnd2)) {
2178
				/* trap if INVALIDTRAP enabled */
2179
				if (Is_invalidtrap_enabled())
2180
					return(OPC_2E_INVALIDEXCEPTION);
2181
				/* make NaN quiet */
2182
				Set_invalidflag();
2183
				Sgl_set_quiet(opnd2);
2184
			}
2185
			/* 
2186
			 * is third operand a signaling NaN? 
2187
			 */
2188
			else if (Sgl_is_signalingnan(opnd3)) {
2189
			       	/* trap if INVALIDTRAP enabled */
2190
			       	if (Is_invalidtrap_enabled())
2191
				   		return(OPC_2E_INVALIDEXCEPTION);
2192
			       	/* make NaN quiet */
2193
			       	Set_invalidflag();
2194
			       	Sgl_set_quiet(opnd3);
2195
				Sgl_copytoptr(opnd3,dstptr);
2196
		       		return(NOEXCEPTION);
2197
			}
2198
			/*
2199
			 * return quiet NaN
2200
			 */
2201
			Sgl_copytoptr(opnd2,dstptr);
2202
			return(NOEXCEPTION);
2203
		}
2204
	}
2205

2206
	/*
2207
	 * check third operand for NaN's or infinity
2208
	 */
2209
	if (Sgl_isinfinity_exponent(opnd3)) {
2210
		if (Sgl_iszero_mantissa(opnd3)) {
2211
			/* return infinity */
2212
			Sgl_copytoptr(opnd3,dstptr);
2213
			return(NOEXCEPTION);
2214
		} else {
2215
			/*
2216
			 * is NaN; signaling or quiet?
2217
			 */
2218
			if (Sgl_isone_signaling(opnd3)) {
2219
				/* trap if INVALIDTRAP enabled */
2220
				if (Is_invalidtrap_enabled())
2221
					return(OPC_2E_INVALIDEXCEPTION);
2222
				/* make NaN quiet */
2223
				Set_invalidflag();
2224
				Sgl_set_quiet(opnd3);
2225
			}
2226
			/*
2227
			 * return quiet NaN
2228
 			 */
2229
			Sgl_copytoptr(opnd3,dstptr);
2230
			return(NOEXCEPTION);
2231
		}
2232
    	}
2233

2234
	/*
2235
	 * Generate multiply mantissa
2236
	 */
2237
	if (Sgl_isnotzero_exponent(opnd1)) {
2238
		/* set hidden bit */
2239
		Sgl_clear_signexponent_set_hidden(opnd1);
2240
	}
2241
	else {
2242
		/* check for zero */
2243
		if (Sgl_iszero_mantissa(opnd1)) {
2244
			/*
2245
			 * Perform the add opnd3 with zero here.
2246
			 */
2247
			if (Sgl_iszero_exponentmantissa(opnd3)) {
2248
				if (Is_rounding_mode(ROUNDMINUS)) {
2249
					Sgl_or_signs(opnd3,resultp1);
2250
				} else {
2251
					Sgl_and_signs(opnd3,resultp1);
2252
				}
2253
			}
2254
			/*
2255
			 * Now let's check for trapped underflow case.
2256
			 */
2257
			else if (Sgl_iszero_exponent(opnd3) &&
2258
			         Is_underflowtrap_enabled()) {
2259
                    		/* need to normalize results mantissa */
2260
                    		sign_save = Sgl_signextendedsign(opnd3);
2261
				result_exponent = 0;
2262
                    		Sgl_leftshiftby1(opnd3);
2263
                    		Sgl_normalize(opnd3,result_exponent);
2264
                    		Sgl_set_sign(opnd3,/*using*/sign_save);
2265
                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
2266
							unfl);
2267
                    		Sgl_copytoptr(opnd3,dstptr);
2268
                    		/* inexact = FALSE */
2269
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
2270
			}
2271
			Sgl_copytoptr(opnd3,dstptr);
2272
			return(NOEXCEPTION);
2273
		}
2274
		/* is denormalized, adjust exponent */
2275
		Sgl_clear_signexponent(opnd1);
2276
		Sgl_leftshiftby1(opnd1);
2277
		Sgl_normalize(opnd1,mpy_exponent);
2278
	}
2279
	/* opnd2 needs to have hidden bit set with msb in hidden bit */
2280
	if (Sgl_isnotzero_exponent(opnd2)) {
2281
		Sgl_clear_signexponent_set_hidden(opnd2);
2282
	}
2283
	else {
2284
		/* check for zero */
2285
		if (Sgl_iszero_mantissa(opnd2)) {
2286
			/*
2287
			 * Perform the add opnd3 with zero here.
2288
			 */
2289
			if (Sgl_iszero_exponentmantissa(opnd3)) {
2290
				if (Is_rounding_mode(ROUNDMINUS)) {
2291
					Sgl_or_signs(opnd3,resultp1);
2292
				} else {
2293
					Sgl_and_signs(opnd3,resultp1);
2294
				}
2295
			}
2296
			/*
2297
			 * Now let's check for trapped underflow case.
2298
			 */
2299
			else if (Sgl_iszero_exponent(opnd3) &&
2300
			    Is_underflowtrap_enabled()) {
2301
                    		/* need to normalize results mantissa */
2302
                    		sign_save = Sgl_signextendedsign(opnd3);
2303
				result_exponent = 0;
2304
                    		Sgl_leftshiftby1(opnd3);
2305
                    		Sgl_normalize(opnd3,result_exponent);
2306
                    		Sgl_set_sign(opnd3,/*using*/sign_save);
2307
                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
2308
							unfl);
2309
                    		Sgl_copytoptr(opnd3,dstptr);
2310
                    		/* inexact = FALSE */
2311
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
2312
			}
2313
			Sgl_copytoptr(opnd3,dstptr);
2314
			return(NOEXCEPTION);
2315
		}
2316
		/* is denormalized; want to normalize */
2317
		Sgl_clear_signexponent(opnd2);
2318
		Sgl_leftshiftby1(opnd2);
2319
		Sgl_normalize(opnd2,mpy_exponent);
2320
	}
2321

2322
	/* Multiply the first two source mantissas together */
2323

2324
	/* 
2325
	 * The intermediate result will be kept in tmpres,
2326
	 * which needs enough room for 106 bits of mantissa,
2327
	 * so lets call it a Double extended.
2328
	 */
2329
	Sglext_setzero(tmpresp1,tmpresp2);
2330

2331
	/* 
2332
	 * Four bits at a time are inspected in each loop, and a 
2333
	 * simple shift and add multiply algorithm is used. 
2334
	 */ 
2335
	for (count = SGL_P-1; count >= 0; count -= 4) {
2336
		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2337
		if (Sbit28(opnd1)) {
2338
	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
2339
			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2340
		}
2341
		if (Sbit29(opnd1)) {
2342
			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2343
		}
2344
		if (Sbit30(opnd1)) {
2345
			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2346
		}
2347
		if (Sbit31(opnd1)) {
2348
			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2349
		}
2350
		Sgl_rightshiftby4(opnd1);
2351
	}
2352
	if (Is_sexthiddenoverflow(tmpresp1)) {
2353
		/* result mantissa >= 2 (mantissa overflow) */
2354
		mpy_exponent++;
2355
		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2356
	} else {
2357
		Sglext_rightshiftby3(tmpresp1,tmpresp2);
2358
	}
2359

2360
	/*
2361
	 * Restore the sign of the mpy result which was saved in resultp1.
2362
	 * The exponent will continue to be kept in mpy_exponent.
2363
	 */
2364
	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2365

2366
	/* 
2367
	 * No rounding is required, since the result of the multiply
2368
	 * is exact in the extended format.
2369
	 */
2370

2371
	/*
2372
	 * Now we are ready to perform the add portion of the operation.
2373
	 *
2374
	 * The exponents need to be kept as integers for now, since the
2375
	 * multiply result might not fit into the exponent field.  We
2376
	 * can't overflow or underflow because of this yet, since the
2377
	 * add could bring the final result back into range.
2378
	 */
2379
	add_exponent = Sgl_exponent(opnd3);
2380

2381
	/*
2382
	 * Check for denormalized or zero add operand.
2383
	 */
2384
	if (add_exponent == 0) {
2385
		/* check for zero */
2386
		if (Sgl_iszero_mantissa(opnd3)) {
2387
			/* right is zero */
2388
			/* Left can't be zero and must be result.
2389
			 *
2390
			 * The final result is now in tmpres and mpy_exponent,
2391
			 * and needs to be rounded and squeezed back into
2392
			 * double precision format from double extended.
2393
			 */
2394
			result_exponent = mpy_exponent;
2395
			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2396
			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2397
			goto round;
2398
		}
2399

2400
		/* 
2401
		 * Neither are zeroes.  
2402
		 * Adjust exponent and normalize add operand.
2403
		 */
2404
		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
2405
		Sgl_clear_signexponent(opnd3);
2406
		Sgl_leftshiftby1(opnd3);
2407
		Sgl_normalize(opnd3,add_exponent);
2408
		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
2409
	} else {
2410
		Sgl_clear_exponent_set_hidden(opnd3);
2411
	}
2412
	/*
2413
	 * Copy opnd3 to the double extended variable called right.
2414
	 */
2415
	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2416

2417
	/*
2418
	 * A zero "save" helps discover equal operands (for later),
2419
	 * and is used in swapping operands (if needed).
2420
	 */
2421
	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2422

2423
	/*
2424
	 * Compare magnitude of operands.
2425
	 */
2426
	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2427
	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2428
	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2429
	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2430
		/*
2431
		 * Set the left operand to the larger one by XOR swap.
2432
		 * First finish the first word "save".
2433
		 */
2434
		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2435
		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2436
		Sglext_swap_lower(tmpresp2,rightp2);
2437
		/* also setup exponents used in rest of routine */
2438
		diff_exponent = add_exponent - mpy_exponent;
2439
		result_exponent = add_exponent;
2440
	} else {
2441
		/* also setup exponents used in rest of routine */
2442
		diff_exponent = mpy_exponent - add_exponent;
2443
		result_exponent = mpy_exponent;
2444
	}
2445
	/* Invariant: left is not smaller than right. */
2446

2447
	/*
2448
	 * Special case alignment of operands that would force alignment
2449
	 * beyond the extent of the extension.  A further optimization
2450
	 * could special case this but only reduces the path length for
2451
	 * this infrequent case.
2452
	 */
2453
	if (diff_exponent > SGLEXT_THRESHOLD) {
2454
		diff_exponent = SGLEXT_THRESHOLD;
2455
	}
2456

2457
	/* Align right operand by shifting it to the right */
2458
	Sglext_clear_sign(rightp1);
2459
	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2460
	
2461
	/* Treat sum and difference of the operands separately. */
2462
	if ((int)save < 0) {
2463
		/*
2464
		 * Difference of the two operands.  Overflow can occur if the
2465
		 * multiply overflowed.  A borrow can occur out of the hidden
2466
		 * bit and force a post normalization phase.
2467
		 */
2468
		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2469
			resultp1,resultp2);
2470
		sign_save = Sgl_signextendedsign(resultp1);
2471
		if (Sgl_iszero_hidden(resultp1)) {
2472
			/* Handle normalization */
2473
		/* A straightforward algorithm would now shift the
2474
		 * result and extension left until the hidden bit
2475
		 * becomes one.  Not all of the extension bits need
2476
		 * participate in the shift.  Only the two most 
2477
		 * significant bits (round and guard) are needed.
2478
		 * If only a single shift is needed then the guard
2479
		 * bit becomes a significant low order bit and the
2480
		 * extension must participate in the rounding.
2481
		 * If more than a single shift is needed, then all
2482
		 * bits to the right of the guard bit are zeros, 
2483
		 * and the guard bit may or may not be zero. */
2484
			Sglext_leftshiftby1(resultp1,resultp2);
2485

2486
			/* Need to check for a zero result.  The sign and
2487
			 * exponent fields have already been zeroed.  The more
2488
			 * efficient test of the full object can be used.
2489
			 */
2490
			 if (Sglext_iszero(resultp1,resultp2)) {
2491
				/* Must have been "x-x" or "x+(-x)". */
2492
				if (Is_rounding_mode(ROUNDMINUS))
2493
					Sgl_setone_sign(resultp1);
2494
				Sgl_copytoptr(resultp1,dstptr);
2495
				return(NOEXCEPTION);
2496
			}
2497
			result_exponent--;
2498

2499
			/* Look to see if normalization is finished. */
2500
			if (Sgl_isone_hidden(resultp1)) {
2501
				/* No further normalization is needed */
2502
				goto round;
2503
			}
2504

2505
			/* Discover first one bit to determine shift amount.
2506
			 * Use a modified binary search.  We have already
2507
			 * shifted the result one position right and still
2508
			 * not found a one so the remainder of the extension
2509
			 * must be zero and simplifies rounding. */
2510
			/* Scan bytes */
2511
			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2512
				Sglext_leftshiftby8(resultp1,resultp2);
2513
				result_exponent -= 8;
2514
			}
2515
			/* Now narrow it down to the nibble */
2516
			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2517
				/* The lower nibble contains the
2518
				 * normalizing one */
2519
				Sglext_leftshiftby4(resultp1,resultp2);
2520
				result_exponent -= 4;
2521
			}
2522
			/* Select case where first bit is set (already
2523
			 * normalized) otherwise select the proper shift. */
2524
			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2525
			if (jumpsize <= 7) switch(jumpsize) {
2526
			case 1:
2527
				Sglext_leftshiftby3(resultp1,resultp2);
2528
				result_exponent -= 3;
2529
				break;
2530
			case 2:
2531
			case 3:
2532
				Sglext_leftshiftby2(resultp1,resultp2);
2533
				result_exponent -= 2;
2534
				break;
2535
			case 4:
2536
			case 5:
2537
			case 6:
2538
			case 7:
2539
				Sglext_leftshiftby1(resultp1,resultp2);
2540
				result_exponent -= 1;
2541
				break;
2542
			}
2543
		} /* end if (hidden...)... */
2544
	/* Fall through and round */
2545
	} /* end if (save < 0)... */
2546
	else {
2547
		/* Add magnitudes */
2548
		Sglext_addition(tmpresp1,tmpresp2,
2549
			rightp1,rightp2, /*to*/resultp1,resultp2);
2550
		sign_save = Sgl_signextendedsign(resultp1);
2551
		if (Sgl_isone_hiddenoverflow(resultp1)) {
2552
	    		/* Prenormalization required. */
2553
	    		Sglext_arithrightshiftby1(resultp1,resultp2);
2554
	    		result_exponent++;
2555
		} /* end if hiddenoverflow... */
2556
	} /* end else ...add magnitudes... */
2557

2558
	/* Round the result.  If the extension and lower two words are
2559
	 * all zeros, then the result is exact.  Otherwise round in the
2560
	 * correct direction.  Underflow is possible. If a postnormalization
2561
	 * is necessary, then the mantissa is all zeros so no shift is needed.
2562
	 */
2563
  round:
2564
	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2565
		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2566
	}
2567
	Sgl_set_sign(resultp1,/*using*/sign_save);
2568
	if (Sglext_isnotzero_mantissap2(resultp2)) {
2569
		inexact = TRUE;
2570
		switch(Rounding_mode()) {
2571
		case ROUNDNEAREST: /* The default. */
2572
			if (Sglext_isone_highp2(resultp2)) {
2573
				/* at least 1/2 ulp */
2574
				if (Sglext_isnotzero_low31p2(resultp2) ||
2575
				    Sglext_isone_lowp1(resultp1)) {
2576
					/* either exactly half way and odd or
2577
					 * more than 1/2ulp */
2578
					Sgl_increment(resultp1);
2579
				}
2580
			}
2581
	    		break;
2582

2583
		case ROUNDPLUS:
2584
	    		if (Sgl_iszero_sign(resultp1)) {
2585
				/* Round up positive results */
2586
				Sgl_increment(resultp1);
2587
			}
2588
			break;
2589
	    
2590
		case ROUNDMINUS:
2591
	    		if (Sgl_isone_sign(resultp1)) {
2592
				/* Round down negative results */
2593
				Sgl_increment(resultp1);
2594
			}
2595
	    
2596
		case ROUNDZERO:;
2597
			/* truncate is simple */
2598
		} /* end switch... */
2599
		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2600
	}
2601
	if (result_exponent >= SGL_INFINITY_EXPONENT) {
2602
		/* Overflow */
2603
		if (Is_overflowtrap_enabled()) {
2604
                        /*
2605
                         * Adjust bias of result
2606
                         */
2607
                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2608
                        Sgl_copytoptr(resultp1,dstptr);
2609
                        if (inexact)
2610
                            if (Is_inexacttrap_enabled())
2611
                                return (OPC_2E_OVERFLOWEXCEPTION |
2612
					OPC_2E_INEXACTEXCEPTION);
2613
                            else Set_inexactflag();
2614
                        return (OPC_2E_OVERFLOWEXCEPTION);
2615
		}
2616
		inexact = TRUE;
2617
		Set_overflowflag();
2618
		Sgl_setoverflow(resultp1);
2619
	} else if (result_exponent <= 0) {	/* underflow case */
2620
		if (Is_underflowtrap_enabled()) {
2621
                        /*
2622
                         * Adjust bias of result
2623
                         */
2624
                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2625
			Sgl_copytoptr(resultp1,dstptr);
2626
                        if (inexact)
2627
                            if (Is_inexacttrap_enabled())
2628
                                return (OPC_2E_UNDERFLOWEXCEPTION |
2629
					OPC_2E_INEXACTEXCEPTION);
2630
                            else Set_inexactflag();
2631
	    		return(OPC_2E_UNDERFLOWEXCEPTION);
2632
		}
2633
		else if (inexact && is_tiny) Set_underflowflag();
2634
	}
2635
	else Sgl_set_exponent(resultp1,result_exponent);
2636
	Sgl_copytoptr(resultp1,dstptr);
2637
	if (inexact) 
2638
		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2639
		else Set_inexactflag();
2640
    	return(NOEXCEPTION);
2641
}
2642

2643

2644
Product

Resources

Company