CoCalc -- fmpyfadd.c

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/parisc/math-emu/fmpyfadd.c
¹⁰⁸¹⁷ views
1
/*
2
 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
3
 *
4
 * Floating-point emulation code
5
 *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <[email protected]>
6
 *
7
 *    This program is free software; you can redistribute it and/or modify
8
 *    it under the terms of the GNU General Public License as published by
9
 *    the Free Software Foundation; either version 2, or (at your option)
10
 *    any later version.
11
 *
12
 *    This program is distributed in the hope that it will be useful,
13
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 *    GNU General Public License for more details.
16
 *
17
 *    You should have received a copy of the GNU General Public License
18
 *    along with this program; if not, write to the Free Software
19
 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20
 */
21
/*
22
 * BEGIN_DESC
23
 *
24
 *  File:
25
 *	@(#)	pa/spmath/fmpyfadd.c		$Revision: 1.1 $
26
 *
27
 *  Purpose:
28
 *	Double Floating-point Multiply Fused Add
29
 *	Double Floating-point Multiply Negate Fused Add
30
 *	Single Floating-point Multiply Fused Add
31
 *	Single Floating-point Multiply Negate Fused Add
32
 *
33
 *  External Interfaces:
34
 *	dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
35
 *	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
36
 *	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
37
 *	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
38
 *
39
 *  Internal Interfaces:
40
 *
41
 *  Theory:
42
 *	<<please update with a overview of the operation of this file>>
43
 *
44
 * END_DESC
45
*/
46

47

48
#include "float.h"
49
#include "sgl_float.h"
50
#include "dbl_float.h"
51

52

53
/*
54
 *  Double Floating-point Multiply Fused Add
55
 */
56

57
int
58
dbl_fmpyfadd(
59
	    dbl_floating_point *src1ptr,
60
	    dbl_floating_point *src2ptr,
61
	    dbl_floating_point *src3ptr,
62
	    unsigned int *status,
63
	    dbl_floating_point *dstptr)
64
{
65
	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
66
	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
67
	unsigned int rightp1, rightp2, rightp3, rightp4;
68
	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
69
	register int mpy_exponent, add_exponent, count;
70
	boolean inexact = FALSE, is_tiny = FALSE;
71

72
	unsigned int signlessleft1, signlessright1, save;
73
	register int result_exponent, diff_exponent;
74
	int sign_save, jumpsize;
75
	
76
	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
77
	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
78
	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
79

80
	/* 
81
	 * set sign bit of result of multiply
82
	 */
83
	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
84
		Dbl_setnegativezerop1(resultp1); 
85
	else Dbl_setzerop1(resultp1);
86

87
	/*
88
	 * Generate multiply exponent 
89
	 */
90
	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
91

92
	/*
93
	 * check first operand for NaN's or infinity
94
	 */
95
	if (Dbl_isinfinity_exponent(opnd1p1)) {
96
		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
97
			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
98
			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
99
				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
100
					/* 
101
					 * invalid since operands are infinity 
102
					 * and zero 
103
					 */
104
					if (Is_invalidtrap_enabled())
105
						return(OPC_2E_INVALIDEXCEPTION);
106
					Set_invalidflag();
107
					Dbl_makequietnan(resultp1,resultp2);
108
					Dbl_copytoptr(resultp1,resultp2,dstptr);
109
					return(NOEXCEPTION);
110
				}
111
				/*
112
				 * Check third operand for infinity with a
113
				 *  sign opposite of the multiply result
114
				 */
115
				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
116
				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
117
					/* 
118
					 * invalid since attempting a magnitude
119
					 * subtraction of infinities
120
					 */
121
					if (Is_invalidtrap_enabled())
122
						return(OPC_2E_INVALIDEXCEPTION);
123
					Set_invalidflag();
124
					Dbl_makequietnan(resultp1,resultp2);
125
					Dbl_copytoptr(resultp1,resultp2,dstptr);
126
					return(NOEXCEPTION);
127
				}
128

129
				/*
130
			 	 * return infinity
131
			 	 */
132
				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
133
				Dbl_copytoptr(resultp1,resultp2,dstptr);
134
				return(NOEXCEPTION);
135
			}
136
		}
137
		else {
138
			/*
139
		 	 * is NaN; signaling or quiet?
140
		 	 */
141
			if (Dbl_isone_signaling(opnd1p1)) {
142
				/* trap if INVALIDTRAP enabled */
143
				if (Is_invalidtrap_enabled()) 
144
			    		return(OPC_2E_INVALIDEXCEPTION);
145
				/* make NaN quiet */
146
				Set_invalidflag();
147
				Dbl_set_quiet(opnd1p1);
148
			}
149
			/* 
150
			 * is second operand a signaling NaN? 
151
			 */
152
			else if (Dbl_is_signalingnan(opnd2p1)) {
153
				/* trap if INVALIDTRAP enabled */
154
				if (Is_invalidtrap_enabled())
155
			    		return(OPC_2E_INVALIDEXCEPTION);
156
				/* make NaN quiet */
157
				Set_invalidflag();
158
				Dbl_set_quiet(opnd2p1);
159
				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
160
				return(NOEXCEPTION);
161
			}
162
			/* 
163
			 * is third operand a signaling NaN? 
164
			 */
165
			else if (Dbl_is_signalingnan(opnd3p1)) {
166
				/* trap if INVALIDTRAP enabled */
167
				if (Is_invalidtrap_enabled())
168
			    		return(OPC_2E_INVALIDEXCEPTION);
169
				/* make NaN quiet */
170
				Set_invalidflag();
171
				Dbl_set_quiet(opnd3p1);
172
				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
173
				return(NOEXCEPTION);
174
			}
175
			/*
176
		 	 * return quiet NaN
177
		 	 */
178
			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
179
			return(NOEXCEPTION);
180
		}
181
	}
182

183
	/*
184
	 * check second operand for NaN's or infinity
185
	 */
186
	if (Dbl_isinfinity_exponent(opnd2p1)) {
187
		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
188
			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
189
				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
190
					/* 
191
					 * invalid since multiply operands are
192
					 * zero & infinity
193
					 */
194
					if (Is_invalidtrap_enabled())
195
						return(OPC_2E_INVALIDEXCEPTION);
196
					Set_invalidflag();
197
					Dbl_makequietnan(opnd2p1,opnd2p2);
198
					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
199
					return(NOEXCEPTION);
200
				}
201

202
				/*
203
				 * Check third operand for infinity with a
204
				 *  sign opposite of the multiply result
205
				 */
206
				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
207
				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
208
					/* 
209
					 * invalid since attempting a magnitude
210
					 * subtraction of infinities
211
					 */
212
					if (Is_invalidtrap_enabled())
213
				       		return(OPC_2E_INVALIDEXCEPTION);
214
				       	Set_invalidflag();
215
				       	Dbl_makequietnan(resultp1,resultp2);
216
					Dbl_copytoptr(resultp1,resultp2,dstptr);
217
					return(NOEXCEPTION);
218
				}
219

220
				/*
221
				 * return infinity
222
				 */
223
				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
224
				Dbl_copytoptr(resultp1,resultp2,dstptr);
225
				return(NOEXCEPTION);
226
			}
227
		}
228
		else {
229
			/*
230
			 * is NaN; signaling or quiet?
231
			 */
232
			if (Dbl_isone_signaling(opnd2p1)) {
233
				/* trap if INVALIDTRAP enabled */
234
				if (Is_invalidtrap_enabled())
235
					return(OPC_2E_INVALIDEXCEPTION);
236
				/* make NaN quiet */
237
				Set_invalidflag();
238
				Dbl_set_quiet(opnd2p1);
239
			}
240
			/* 
241
			 * is third operand a signaling NaN? 
242
			 */
243
			else if (Dbl_is_signalingnan(opnd3p1)) {
244
			       	/* trap if INVALIDTRAP enabled */
245
			       	if (Is_invalidtrap_enabled())
246
				   		return(OPC_2E_INVALIDEXCEPTION);
247
			       	/* make NaN quiet */
248
			       	Set_invalidflag();
249
			       	Dbl_set_quiet(opnd3p1);
250
				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
251
		       		return(NOEXCEPTION);
252
			}
253
			/*
254
			 * return quiet NaN
255
			 */
256
			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
257
			return(NOEXCEPTION);
258
		}
259
	}
260

261
	/*
262
	 * check third operand for NaN's or infinity
263
	 */
264
	if (Dbl_isinfinity_exponent(opnd3p1)) {
265
		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
266
			/* return infinity */
267
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
268
			return(NOEXCEPTION);
269
		} else {
270
			/*
271
			 * is NaN; signaling or quiet?
272
			 */
273
			if (Dbl_isone_signaling(opnd3p1)) {
274
				/* trap if INVALIDTRAP enabled */
275
				if (Is_invalidtrap_enabled())
276
					return(OPC_2E_INVALIDEXCEPTION);
277
				/* make NaN quiet */
278
				Set_invalidflag();
279
				Dbl_set_quiet(opnd3p1);
280
			}
281
			/*
282
			 * return quiet NaN
283
 			 */
284
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
285
			return(NOEXCEPTION);
286
		}
287
    	}
288

289
	/*
290
	 * Generate multiply mantissa
291
	 */
292
	if (Dbl_isnotzero_exponent(opnd1p1)) {
293
		/* set hidden bit */
294
		Dbl_clear_signexponent_set_hidden(opnd1p1);
295
	}
296
	else {
297
		/* check for zero */
298
		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
299
			/*
300
			 * Perform the add opnd3 with zero here.
301
			 */
302
			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
303
				if (Is_rounding_mode(ROUNDMINUS)) {
304
					Dbl_or_signs(opnd3p1,resultp1);
305
				} else {
306
					Dbl_and_signs(opnd3p1,resultp1);
307
				}
308
			}
309
			/*
310
			 * Now let's check for trapped underflow case.
311
			 */
312
			else if (Dbl_iszero_exponent(opnd3p1) &&
313
			         Is_underflowtrap_enabled()) {
314
                    		/* need to normalize results mantissa */
315
                    		sign_save = Dbl_signextendedsign(opnd3p1);
316
				result_exponent = 0;
317
                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
318
                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
319
                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
320
                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
321
							unfl);
322
                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
323
                    		/* inexact = FALSE */
324
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
325
			}
326
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
327
			return(NOEXCEPTION);
328
		}
329
		/* is denormalized, adjust exponent */
330
		Dbl_clear_signexponent(opnd1p1);
331
		Dbl_leftshiftby1(opnd1p1,opnd1p2);
332
		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
333
	}
334
	/* opnd2 needs to have hidden bit set with msb in hidden bit */
335
	if (Dbl_isnotzero_exponent(opnd2p1)) {
336
		Dbl_clear_signexponent_set_hidden(opnd2p1);
337
	}
338
	else {
339
		/* check for zero */
340
		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
341
			/*
342
			 * Perform the add opnd3 with zero here.
343
			 */
344
			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
345
				if (Is_rounding_mode(ROUNDMINUS)) {
346
					Dbl_or_signs(opnd3p1,resultp1);
347
				} else {
348
					Dbl_and_signs(opnd3p1,resultp1);
349
				}
350
			}
351
			/*
352
			 * Now let's check for trapped underflow case.
353
			 */
354
			else if (Dbl_iszero_exponent(opnd3p1) &&
355
			    Is_underflowtrap_enabled()) {
356
                    		/* need to normalize results mantissa */
357
                    		sign_save = Dbl_signextendedsign(opnd3p1);
358
				result_exponent = 0;
359
                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
360
                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
361
                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
362
                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
363
							unfl);
364
                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
365
                    		/* inexact = FALSE */
366
				return(OPC_2E_UNDERFLOWEXCEPTION);
367
			}
368
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
369
			return(NOEXCEPTION);
370
		}
371
		/* is denormalized; want to normalize */
372
		Dbl_clear_signexponent(opnd2p1);
373
		Dbl_leftshiftby1(opnd2p1,opnd2p2);
374
		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
375
	}
376

377
	/* Multiply the first two source mantissas together */
378

379
	/* 
380
	 * The intermediate result will be kept in tmpres,
381
	 * which needs enough room for 106 bits of mantissa,
382
	 * so lets call it a Double extended.
383
	 */
384
	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
385

386
	/* 
387
	 * Four bits at a time are inspected in each loop, and a 
388
	 * simple shift and add multiply algorithm is used. 
389
	 */ 
390
	for (count = DBL_P-1; count >= 0; count -= 4) {
391
		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
392
		if (Dbit28p2(opnd1p2)) {
393
	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
394
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
395
			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
396
		}
397
		if (Dbit29p2(opnd1p2)) {
398
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
399
			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
400
		}
401
		if (Dbit30p2(opnd1p2)) {
402
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
403
			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
404
		}
405
		if (Dbit31p2(opnd1p2)) {
406
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
407
			 opnd2p1, opnd2p2, 0, 0);
408
		}
409
		Dbl_rightshiftby4(opnd1p1,opnd1p2);
410
	}
411
	if (Is_dexthiddenoverflow(tmpresp1)) {
412
		/* result mantissa >= 2 (mantissa overflow) */
413
		mpy_exponent++;
414
		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
415
	}
416

417
	/*
418
	 * Restore the sign of the mpy result which was saved in resultp1.
419
	 * The exponent will continue to be kept in mpy_exponent.
420
	 */
421
	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
422

423
	/* 
424
	 * No rounding is required, since the result of the multiply
425
	 * is exact in the extended format.
426
	 */
427

428
	/*
429
	 * Now we are ready to perform the add portion of the operation.
430
	 *
431
	 * The exponents need to be kept as integers for now, since the
432
	 * multiply result might not fit into the exponent field.  We
433
	 * can't overflow or underflow because of this yet, since the
434
	 * add could bring the final result back into range.
435
	 */
436
	add_exponent = Dbl_exponent(opnd3p1);
437

438
	/*
439
	 * Check for denormalized or zero add operand.
440
	 */
441
	if (add_exponent == 0) {
442
		/* check for zero */
443
		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
444
			/* right is zero */
445
			/* Left can't be zero and must be result.
446
			 *
447
			 * The final result is now in tmpres and mpy_exponent,
448
			 * and needs to be rounded and squeezed back into
449
			 * double precision format from double extended.
450
			 */
451
			result_exponent = mpy_exponent;
452
			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
453
				resultp1,resultp2,resultp3,resultp4);
454
			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
455
			goto round;
456
		}
457

458
		/* 
459
		 * Neither are zeroes.  
460
		 * Adjust exponent and normalize add operand.
461
		 */
462
		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
463
		Dbl_clear_signexponent(opnd3p1);
464
		Dbl_leftshiftby1(opnd3p1,opnd3p2);
465
		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
466
		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
467
	} else {
468
		Dbl_clear_exponent_set_hidden(opnd3p1);
469
	}
470
	/*
471
	 * Copy opnd3 to the double extended variable called right.
472
	 */
473
	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
474

475
	/*
476
	 * A zero "save" helps discover equal operands (for later),
477
	 * and is used in swapping operands (if needed).
478
	 */
479
	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
480

481
	/*
482
	 * Compare magnitude of operands.
483
	 */
484
	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
485
	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
486
	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
487
	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
488
		/*
489
		 * Set the left operand to the larger one by XOR swap.
490
		 * First finish the first word "save".
491
		 */
492
		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
493
		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
494
		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
495
			rightp2,rightp3,rightp4);
496
		/* also setup exponents used in rest of routine */
497
		diff_exponent = add_exponent - mpy_exponent;
498
		result_exponent = add_exponent;
499
	} else {
500
		/* also setup exponents used in rest of routine */
501
		diff_exponent = mpy_exponent - add_exponent;
502
		result_exponent = mpy_exponent;
503
	}
504
	/* Invariant: left is not smaller than right. */
505

506
	/*
507
	 * Special case alignment of operands that would force alignment
508
	 * beyond the extent of the extension.  A further optimization
509
	 * could special case this but only reduces the path length for
510
	 * this infrequent case.
511
	 */
512
	if (diff_exponent > DBLEXT_THRESHOLD) {
513
		diff_exponent = DBLEXT_THRESHOLD;
514
	}
515

516
	/* Align right operand by shifting it to the right */
517
	Dblext_clear_sign(rightp1);
518
	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
519
		/*shifted by*/diff_exponent);
520
	
521
	/* Treat sum and difference of the operands separately. */
522
	if ((int)save < 0) {
523
		/*
524
		 * Difference of the two operands.  Overflow can occur if the
525
		 * multiply overflowed.  A borrow can occur out of the hidden
526
		 * bit and force a post normalization phase.
527
		 */
528
		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
529
			rightp1,rightp2,rightp3,rightp4,
530
			resultp1,resultp2,resultp3,resultp4);
531
		sign_save = Dbl_signextendedsign(resultp1);
532
		if (Dbl_iszero_hidden(resultp1)) {
533
			/* Handle normalization */
534
		/* A straightforward algorithm would now shift the
535
		 * result and extension left until the hidden bit
536
		 * becomes one.  Not all of the extension bits need
537
		 * participate in the shift.  Only the two most 
538
		 * significant bits (round and guard) are needed.
539
		 * If only a single shift is needed then the guard
540
		 * bit becomes a significant low order bit and the
541
		 * extension must participate in the rounding.
542
		 * If more than a single shift is needed, then all
543
		 * bits to the right of the guard bit are zeros, 
544
		 * and the guard bit may or may not be zero. */
545
			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
546
				resultp4);
547

548
			/* Need to check for a zero result.  The sign and
549
			 * exponent fields have already been zeroed.  The more
550
			 * efficient test of the full object can be used.
551
			 */
552
			 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
553
				/* Must have been "x-x" or "x+(-x)". */
554
				if (Is_rounding_mode(ROUNDMINUS))
555
					Dbl_setone_sign(resultp1);
556
				Dbl_copytoptr(resultp1,resultp2,dstptr);
557
				return(NOEXCEPTION);
558
			}
559
			result_exponent--;
560

561
			/* Look to see if normalization is finished. */
562
			if (Dbl_isone_hidden(resultp1)) {
563
				/* No further normalization is needed */
564
				goto round;
565
			}
566

567
			/* Discover first one bit to determine shift amount.
568
			 * Use a modified binary search.  We have already
569
			 * shifted the result one position right and still
570
			 * not found a one so the remainder of the extension
571
			 * must be zero and simplifies rounding. */
572
			/* Scan bytes */
573
			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
574
				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
575
				result_exponent -= 8;
576
			}
577
			/* Now narrow it down to the nibble */
578
			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
579
				/* The lower nibble contains the
580
				 * normalizing one */
581
				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
582
				result_exponent -= 4;
583
			}
584
			/* Select case where first bit is set (already
585
			 * normalized) otherwise select the proper shift. */
586
			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
587
			if (jumpsize <= 7) switch(jumpsize) {
588
			case 1:
589
				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
590
					resultp4);
591
				result_exponent -= 3;
592
				break;
593
			case 2:
594
			case 3:
595
				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
596
					resultp4);
597
				result_exponent -= 2;
598
				break;
599
			case 4:
600
			case 5:
601
			case 6:
602
			case 7:
603
				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
604
					resultp4);
605
				result_exponent -= 1;
606
				break;
607
			}
608
		} /* end if (hidden...)... */
609
	/* Fall through and round */
610
	} /* end if (save < 0)... */
611
	else {
612
		/* Add magnitudes */
613
		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
614
			rightp1,rightp2,rightp3,rightp4,
615
			/*to*/resultp1,resultp2,resultp3,resultp4);
616
		sign_save = Dbl_signextendedsign(resultp1);
617
		if (Dbl_isone_hiddenoverflow(resultp1)) {
618
	    		/* Prenormalization required. */
619
	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
620
				resultp4);
621
	    		result_exponent++;
622
		} /* end if hiddenoverflow... */
623
	} /* end else ...add magnitudes... */
624

625
	/* Round the result.  If the extension and lower two words are
626
	 * all zeros, then the result is exact.  Otherwise round in the
627
	 * correct direction.  Underflow is possible. If a postnormalization
628
	 * is necessary, then the mantissa is all zeros so no shift is needed.
629
	 */
630
  round:
631
	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
632
		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
633
			result_exponent,is_tiny);
634
	}
635
	Dbl_set_sign(resultp1,/*using*/sign_save);
636
	if (Dblext_isnotzero_mantissap3(resultp3) || 
637
	    Dblext_isnotzero_mantissap4(resultp4)) {
638
		inexact = TRUE;
639
		switch(Rounding_mode()) {
640
		case ROUNDNEAREST: /* The default. */
641
			if (Dblext_isone_highp3(resultp3)) {
642
				/* at least 1/2 ulp */
643
				if (Dblext_isnotzero_low31p3(resultp3) ||
644
				    Dblext_isnotzero_mantissap4(resultp4) ||
645
				    Dblext_isone_lowp2(resultp2)) {
646
					/* either exactly half way and odd or
647
					 * more than 1/2ulp */
648
					Dbl_increment(resultp1,resultp2);
649
				}
650
			}
651
	    		break;
652

653
		case ROUNDPLUS:
654
	    		if (Dbl_iszero_sign(resultp1)) {
655
				/* Round up positive results */
656
				Dbl_increment(resultp1,resultp2);
657
			}
658
			break;
659
	    
660
		case ROUNDMINUS:
661
	    		if (Dbl_isone_sign(resultp1)) {
662
				/* Round down negative results */
663
				Dbl_increment(resultp1,resultp2);
664
			}
665
	    
666
		case ROUNDZERO:;
667
			/* truncate is simple */
668
		} /* end switch... */
669
		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
670
	}
671
	if (result_exponent >= DBL_INFINITY_EXPONENT) {
672
                /* trap if OVERFLOWTRAP enabled */
673
                if (Is_overflowtrap_enabled()) {
674
                        /*
675
                         * Adjust bias of result
676
                         */
677
                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
678
                        Dbl_copytoptr(resultp1,resultp2,dstptr);
679
                        if (inexact)
680
                            if (Is_inexacttrap_enabled())
681
                                return (OPC_2E_OVERFLOWEXCEPTION |
682
					OPC_2E_INEXACTEXCEPTION);
683
                            else Set_inexactflag();
684
                        return (OPC_2E_OVERFLOWEXCEPTION);
685
                }
686
                inexact = TRUE;
687
                Set_overflowflag();
688
                /* set result to infinity or largest number */
689
                Dbl_setoverflow(resultp1,resultp2);
690

691
	} else if (result_exponent <= 0) {	/* underflow case */
692
		if (Is_underflowtrap_enabled()) {
693
                        /*
694
                         * Adjust bias of result
695
                         */
696
                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
697
			Dbl_copytoptr(resultp1,resultp2,dstptr);
698
                        if (inexact)
699
                            if (Is_inexacttrap_enabled())
700
                                return (OPC_2E_UNDERFLOWEXCEPTION |
701
					OPC_2E_INEXACTEXCEPTION);
702
                            else Set_inexactflag();
703
	    		return(OPC_2E_UNDERFLOWEXCEPTION);
704
		}
705
		else if (inexact && is_tiny) Set_underflowflag();
706
	}
707
	else Dbl_set_exponent(resultp1,result_exponent);
708
	Dbl_copytoptr(resultp1,resultp2,dstptr);
709
	if (inexact) 
710
		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
711
		else Set_inexactflag();
712
    	return(NOEXCEPTION);
713
}
714

715
/*
716
 *  Double Floating-point Multiply Negate Fused Add
717
 */
718

719
dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
720

721
dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
722
unsigned int *status;
723
{
724
	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
725
	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
726
	unsigned int rightp1, rightp2, rightp3, rightp4;
727
	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
728
	register int mpy_exponent, add_exponent, count;
729
	boolean inexact = FALSE, is_tiny = FALSE;
730

731
	unsigned int signlessleft1, signlessright1, save;
732
	register int result_exponent, diff_exponent;
733
	int sign_save, jumpsize;
734
	
735
	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
736
	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
737
	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
738

739
	/* 
740
	 * set sign bit of result of multiply
741
	 */
742
	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
743
		Dbl_setzerop1(resultp1);
744
	else
745
		Dbl_setnegativezerop1(resultp1); 
746

747
	/*
748
	 * Generate multiply exponent 
749
	 */
750
	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
751

752
	/*
753
	 * check first operand for NaN's or infinity
754
	 */
755
	if (Dbl_isinfinity_exponent(opnd1p1)) {
756
		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
757
			if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
758
			    Dbl_isnotnan(opnd3p1,opnd3p2)) {
759
				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
760
					/* 
761
					 * invalid since operands are infinity 
762
					 * and zero 
763
					 */
764
					if (Is_invalidtrap_enabled())
765
						return(OPC_2E_INVALIDEXCEPTION);
766
					Set_invalidflag();
767
					Dbl_makequietnan(resultp1,resultp2);
768
					Dbl_copytoptr(resultp1,resultp2,dstptr);
769
					return(NOEXCEPTION);
770
				}
771
				/*
772
				 * Check third operand for infinity with a
773
				 *  sign opposite of the multiply result
774
				 */
775
				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
776
				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
777
					/* 
778
					 * invalid since attempting a magnitude
779
					 * subtraction of infinities
780
					 */
781
					if (Is_invalidtrap_enabled())
782
						return(OPC_2E_INVALIDEXCEPTION);
783
					Set_invalidflag();
784
					Dbl_makequietnan(resultp1,resultp2);
785
					Dbl_copytoptr(resultp1,resultp2,dstptr);
786
					return(NOEXCEPTION);
787
				}
788

789
				/*
790
			 	 * return infinity
791
			 	 */
792
				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
793
				Dbl_copytoptr(resultp1,resultp2,dstptr);
794
				return(NOEXCEPTION);
795
			}
796
		}
797
		else {
798
			/*
799
		 	 * is NaN; signaling or quiet?
800
		 	 */
801
			if (Dbl_isone_signaling(opnd1p1)) {
802
				/* trap if INVALIDTRAP enabled */
803
				if (Is_invalidtrap_enabled()) 
804
			    		return(OPC_2E_INVALIDEXCEPTION);
805
				/* make NaN quiet */
806
				Set_invalidflag();
807
				Dbl_set_quiet(opnd1p1);
808
			}
809
			/* 
810
			 * is second operand a signaling NaN? 
811
			 */
812
			else if (Dbl_is_signalingnan(opnd2p1)) {
813
				/* trap if INVALIDTRAP enabled */
814
				if (Is_invalidtrap_enabled())
815
			    		return(OPC_2E_INVALIDEXCEPTION);
816
				/* make NaN quiet */
817
				Set_invalidflag();
818
				Dbl_set_quiet(opnd2p1);
819
				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
820
				return(NOEXCEPTION);
821
			}
822
			/* 
823
			 * is third operand a signaling NaN? 
824
			 */
825
			else if (Dbl_is_signalingnan(opnd3p1)) {
826
				/* trap if INVALIDTRAP enabled */
827
				if (Is_invalidtrap_enabled())
828
			    		return(OPC_2E_INVALIDEXCEPTION);
829
				/* make NaN quiet */
830
				Set_invalidflag();
831
				Dbl_set_quiet(opnd3p1);
832
				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
833
				return(NOEXCEPTION);
834
			}
835
			/*
836
		 	 * return quiet NaN
837
		 	 */
838
			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
839
			return(NOEXCEPTION);
840
		}
841
	}
842

843
	/*
844
	 * check second operand for NaN's or infinity
845
	 */
846
	if (Dbl_isinfinity_exponent(opnd2p1)) {
847
		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
848
			if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
849
				if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
850
					/* 
851
					 * invalid since multiply operands are
852
					 * zero & infinity
853
					 */
854
					if (Is_invalidtrap_enabled())
855
						return(OPC_2E_INVALIDEXCEPTION);
856
					Set_invalidflag();
857
					Dbl_makequietnan(opnd2p1,opnd2p2);
858
					Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
859
					return(NOEXCEPTION);
860
				}
861

862
				/*
863
				 * Check third operand for infinity with a
864
				 *  sign opposite of the multiply result
865
				 */
866
				if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
867
				    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
868
					/* 
869
					 * invalid since attempting a magnitude
870
					 * subtraction of infinities
871
					 */
872
					if (Is_invalidtrap_enabled())
873
				       		return(OPC_2E_INVALIDEXCEPTION);
874
				       	Set_invalidflag();
875
				       	Dbl_makequietnan(resultp1,resultp2);
876
					Dbl_copytoptr(resultp1,resultp2,dstptr);
877
					return(NOEXCEPTION);
878
				}
879

880
				/*
881
				 * return infinity
882
				 */
883
				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
884
				Dbl_copytoptr(resultp1,resultp2,dstptr);
885
				return(NOEXCEPTION);
886
			}
887
		}
888
		else {
889
			/*
890
			 * is NaN; signaling or quiet?
891
			 */
892
			if (Dbl_isone_signaling(opnd2p1)) {
893
				/* trap if INVALIDTRAP enabled */
894
				if (Is_invalidtrap_enabled())
895
					return(OPC_2E_INVALIDEXCEPTION);
896
				/* make NaN quiet */
897
				Set_invalidflag();
898
				Dbl_set_quiet(opnd2p1);
899
			}
900
			/* 
901
			 * is third operand a signaling NaN? 
902
			 */
903
			else if (Dbl_is_signalingnan(opnd3p1)) {
904
			       	/* trap if INVALIDTRAP enabled */
905
			       	if (Is_invalidtrap_enabled())
906
				   		return(OPC_2E_INVALIDEXCEPTION);
907
			       	/* make NaN quiet */
908
			       	Set_invalidflag();
909
			       	Dbl_set_quiet(opnd3p1);
910
				Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
911
		       		return(NOEXCEPTION);
912
			}
913
			/*
914
			 * return quiet NaN
915
			 */
916
			Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
917
			return(NOEXCEPTION);
918
		}
919
	}
920

921
	/*
922
	 * check third operand for NaN's or infinity
923
	 */
924
	if (Dbl_isinfinity_exponent(opnd3p1)) {
925
		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
926
			/* return infinity */
927
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
928
			return(NOEXCEPTION);
929
		} else {
930
			/*
931
			 * is NaN; signaling or quiet?
932
			 */
933
			if (Dbl_isone_signaling(opnd3p1)) {
934
				/* trap if INVALIDTRAP enabled */
935
				if (Is_invalidtrap_enabled())
936
					return(OPC_2E_INVALIDEXCEPTION);
937
				/* make NaN quiet */
938
				Set_invalidflag();
939
				Dbl_set_quiet(opnd3p1);
940
			}
941
			/*
942
			 * return quiet NaN
943
 			 */
944
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
945
			return(NOEXCEPTION);
946
		}
947
    	}
948

949
	/*
950
	 * Generate multiply mantissa
951
	 */
952
	if (Dbl_isnotzero_exponent(opnd1p1)) {
953
		/* set hidden bit */
954
		Dbl_clear_signexponent_set_hidden(opnd1p1);
955
	}
956
	else {
957
		/* check for zero */
958
		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
959
			/*
960
			 * Perform the add opnd3 with zero here.
961
			 */
962
			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
963
				if (Is_rounding_mode(ROUNDMINUS)) {
964
					Dbl_or_signs(opnd3p1,resultp1);
965
				} else {
966
					Dbl_and_signs(opnd3p1,resultp1);
967
				}
968
			}
969
			/*
970
			 * Now let's check for trapped underflow case.
971
			 */
972
			else if (Dbl_iszero_exponent(opnd3p1) &&
973
			         Is_underflowtrap_enabled()) {
974
                    		/* need to normalize results mantissa */
975
                    		sign_save = Dbl_signextendedsign(opnd3p1);
976
				result_exponent = 0;
977
                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
978
                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
979
                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
980
                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
981
							unfl);
982
                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
983
                    		/* inexact = FALSE */
984
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
985
			}
986
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
987
			return(NOEXCEPTION);
988
		}
989
		/* is denormalized, adjust exponent */
990
		Dbl_clear_signexponent(opnd1p1);
991
		Dbl_leftshiftby1(opnd1p1,opnd1p2);
992
		Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
993
	}
994
	/* opnd2 needs to have hidden bit set with msb in hidden bit */
995
	if (Dbl_isnotzero_exponent(opnd2p1)) {
996
		Dbl_clear_signexponent_set_hidden(opnd2p1);
997
	}
998
	else {
999
		/* check for zero */
1000
		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1001
			/*
1002
			 * Perform the add opnd3 with zero here.
1003
			 */
1004
			if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1005
				if (Is_rounding_mode(ROUNDMINUS)) {
1006
					Dbl_or_signs(opnd3p1,resultp1);
1007
				} else {
1008
					Dbl_and_signs(opnd3p1,resultp1);
1009
				}
1010
			}
1011
			/*
1012
			 * Now let's check for trapped underflow case.
1013
			 */
1014
			else if (Dbl_iszero_exponent(opnd3p1) &&
1015
			    Is_underflowtrap_enabled()) {
1016
                    		/* need to normalize results mantissa */
1017
                    		sign_save = Dbl_signextendedsign(opnd3p1);
1018
				result_exponent = 0;
1019
                    		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020
                    		Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021
                    		Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022
                    		Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023
							unfl);
1024
                    		Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025
                    		/* inexact = FALSE */
1026
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1027
			}
1028
			Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029
			return(NOEXCEPTION);
1030
		}
1031
		/* is denormalized; want to normalize */
1032
		Dbl_clear_signexponent(opnd2p1);
1033
		Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034
		Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1035
	}
1036

1037
	/* Multiply the first two source mantissas together */
1038

1039
	/* 
1040
	 * The intermediate result will be kept in tmpres,
1041
	 * which needs enough room for 106 bits of mantissa,
1042
	 * so lets call it a Double extended.
1043
	 */
1044
	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1045

1046
	/* 
1047
	 * Four bits at a time are inspected in each loop, and a 
1048
	 * simple shift and add multiply algorithm is used. 
1049
	 */ 
1050
	for (count = DBL_P-1; count >= 0; count -= 4) {
1051
		Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052
		if (Dbit28p2(opnd1p2)) {
1053
	 		/* Fourword_add should be an ADD followed by 3 ADDC's */
1054
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
1055
			 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1056
		}
1057
		if (Dbit29p2(opnd1p2)) {
1058
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059
			 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1060
		}
1061
		if (Dbit30p2(opnd1p2)) {
1062
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063
			 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1064
		}
1065
		if (Dbit31p2(opnd1p2)) {
1066
			Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067
			 opnd2p1, opnd2p2, 0, 0);
1068
		}
1069
		Dbl_rightshiftby4(opnd1p1,opnd1p2);
1070
	}
1071
	if (Is_dexthiddenoverflow(tmpresp1)) {
1072
		/* result mantissa >= 2 (mantissa overflow) */
1073
		mpy_exponent++;
1074
		Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1075
	}
1076

1077
	/*
1078
	 * Restore the sign of the mpy result which was saved in resultp1.
1079
	 * The exponent will continue to be kept in mpy_exponent.
1080
	 */
1081
	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1082

1083
	/* 
1084
	 * No rounding is required, since the result of the multiply
1085
	 * is exact in the extended format.
1086
	 */
1087

1088
	/*
1089
	 * Now we are ready to perform the add portion of the operation.
1090
	 *
1091
	 * The exponents need to be kept as integers for now, since the
1092
	 * multiply result might not fit into the exponent field.  We
1093
	 * can't overflow or underflow because of this yet, since the
1094
	 * add could bring the final result back into range.
1095
	 */
1096
	add_exponent = Dbl_exponent(opnd3p1);
1097

1098
	/*
1099
	 * Check for denormalized or zero add operand.
1100
	 */
1101
	if (add_exponent == 0) {
1102
		/* check for zero */
1103
		if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104
			/* right is zero */
1105
			/* Left can't be zero and must be result.
1106
			 *
1107
			 * The final result is now in tmpres and mpy_exponent,
1108
			 * and needs to be rounded and squeezed back into
1109
			 * double precision format from double extended.
1110
			 */
1111
			result_exponent = mpy_exponent;
1112
			Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113
				resultp1,resultp2,resultp3,resultp4);
1114
			sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115
			goto round;
1116
		}
1117

1118
		/* 
1119
		 * Neither are zeroes.  
1120
		 * Adjust exponent and normalize add operand.
1121
		 */
1122
		sign_save = Dbl_signextendedsign(opnd3p1);	/* save sign */
1123
		Dbl_clear_signexponent(opnd3p1);
1124
		Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125
		Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126
		Dbl_set_sign(opnd3p1,sign_save);	/* restore sign */
1127
	} else {
1128
		Dbl_clear_exponent_set_hidden(opnd3p1);
1129
	}
1130
	/*
1131
	 * Copy opnd3 to the double extended variable called right.
1132
	 */
1133
	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1134

1135
	/*
1136
	 * A zero "save" helps discover equal operands (for later),
1137
	 * and is used in swapping operands (if needed).
1138
	 */
1139
	Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1140

1141
	/*
1142
	 * Compare magnitude of operands.
1143
	 */
1144
	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145
	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146
	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147
	    Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1148
		/*
1149
		 * Set the left operand to the larger one by XOR swap.
1150
		 * First finish the first word "save".
1151
		 */
1152
		Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153
		Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154
		Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155
			rightp2,rightp3,rightp4);
1156
		/* also setup exponents used in rest of routine */
1157
		diff_exponent = add_exponent - mpy_exponent;
1158
		result_exponent = add_exponent;
1159
	} else {
1160
		/* also setup exponents used in rest of routine */
1161
		diff_exponent = mpy_exponent - add_exponent;
1162
		result_exponent = mpy_exponent;
1163
	}
1164
	/* Invariant: left is not smaller than right. */
1165

1166
	/*
1167
	 * Special case alignment of operands that would force alignment
1168
	 * beyond the extent of the extension.  A further optimization
1169
	 * could special case this but only reduces the path length for
1170
	 * this infrequent case.
1171
	 */
1172
	if (diff_exponent > DBLEXT_THRESHOLD) {
1173
		diff_exponent = DBLEXT_THRESHOLD;
1174
	}
1175

1176
	/* Align right operand by shifting it to the right */
1177
	Dblext_clear_sign(rightp1);
1178
	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179
		/*shifted by*/diff_exponent);
1180
	
1181
	/* Treat sum and difference of the operands separately. */
1182
	if ((int)save < 0) {
1183
		/*
1184
		 * Difference of the two operands.  Overflow can occur if the
1185
		 * multiply overflowed.  A borrow can occur out of the hidden
1186
		 * bit and force a post normalization phase.
1187
		 */
1188
		Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189
			rightp1,rightp2,rightp3,rightp4,
1190
			resultp1,resultp2,resultp3,resultp4);
1191
		sign_save = Dbl_signextendedsign(resultp1);
1192
		if (Dbl_iszero_hidden(resultp1)) {
1193
			/* Handle normalization */
1194
		/* A straightforward algorithm would now shift the
1195
		 * result and extension left until the hidden bit
1196
		 * becomes one.  Not all of the extension bits need
1197
		 * participate in the shift.  Only the two most 
1198
		 * significant bits (round and guard) are needed.
1199
		 * If only a single shift is needed then the guard
1200
		 * bit becomes a significant low order bit and the
1201
		 * extension must participate in the rounding.
1202
		 * If more than a single shift is needed, then all
1203
		 * bits to the right of the guard bit are zeros, 
1204
		 * and the guard bit may or may not be zero. */
1205
			Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206
				resultp4);
1207

1208
			/* Need to check for a zero result.  The sign and
1209
			 * exponent fields have already been zeroed.  The more
1210
			 * efficient test of the full object can be used.
1211
			 */
1212
			 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213
				/* Must have been "x-x" or "x+(-x)". */
1214
				if (Is_rounding_mode(ROUNDMINUS))
1215
					Dbl_setone_sign(resultp1);
1216
				Dbl_copytoptr(resultp1,resultp2,dstptr);
1217
				return(NOEXCEPTION);
1218
			}
1219
			result_exponent--;
1220

1221
			/* Look to see if normalization is finished. */
1222
			if (Dbl_isone_hidden(resultp1)) {
1223
				/* No further normalization is needed */
1224
				goto round;
1225
			}
1226

1227
			/* Discover first one bit to determine shift amount.
1228
			 * Use a modified binary search.  We have already
1229
			 * shifted the result one position right and still
1230
			 * not found a one so the remainder of the extension
1231
			 * must be zero and simplifies rounding. */
1232
			/* Scan bytes */
1233
			while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234
				Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235
				result_exponent -= 8;
1236
			}
1237
			/* Now narrow it down to the nibble */
1238
			if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239
				/* The lower nibble contains the
1240
				 * normalizing one */
1241
				Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242
				result_exponent -= 4;
1243
			}
1244
			/* Select case where first bit is set (already
1245
			 * normalized) otherwise select the proper shift. */
1246
			jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247
			if (jumpsize <= 7) switch(jumpsize) {
1248
			case 1:
1249
				Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250
					resultp4);
1251
				result_exponent -= 3;
1252
				break;
1253
			case 2:
1254
			case 3:
1255
				Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256
					resultp4);
1257
				result_exponent -= 2;
1258
				break;
1259
			case 4:
1260
			case 5:
1261
			case 6:
1262
			case 7:
1263
				Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264
					resultp4);
1265
				result_exponent -= 1;
1266
				break;
1267
			}
1268
		} /* end if (hidden...)... */
1269
	/* Fall through and round */
1270
	} /* end if (save < 0)... */
1271
	else {
1272
		/* Add magnitudes */
1273
		Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274
			rightp1,rightp2,rightp3,rightp4,
1275
			/*to*/resultp1,resultp2,resultp3,resultp4);
1276
		sign_save = Dbl_signextendedsign(resultp1);
1277
		if (Dbl_isone_hiddenoverflow(resultp1)) {
1278
	    		/* Prenormalization required. */
1279
	    		Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280
				resultp4);
1281
	    		result_exponent++;
1282
		} /* end if hiddenoverflow... */
1283
	} /* end else ...add magnitudes... */
1284

1285
	/* Round the result.  If the extension and lower two words are
1286
	 * all zeros, then the result is exact.  Otherwise round in the
1287
	 * correct direction.  Underflow is possible. If a postnormalization
1288
	 * is necessary, then the mantissa is all zeros so no shift is needed.
1289
	 */
1290
  round:
1291
	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292
		Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293
			result_exponent,is_tiny);
1294
	}
1295
	Dbl_set_sign(resultp1,/*using*/sign_save);
1296
	if (Dblext_isnotzero_mantissap3(resultp3) || 
1297
	    Dblext_isnotzero_mantissap4(resultp4)) {
1298
		inexact = TRUE;
1299
		switch(Rounding_mode()) {
1300
		case ROUNDNEAREST: /* The default. */
1301
			if (Dblext_isone_highp3(resultp3)) {
1302
				/* at least 1/2 ulp */
1303
				if (Dblext_isnotzero_low31p3(resultp3) ||
1304
				    Dblext_isnotzero_mantissap4(resultp4) ||
1305
				    Dblext_isone_lowp2(resultp2)) {
1306
					/* either exactly half way and odd or
1307
					 * more than 1/2ulp */
1308
					Dbl_increment(resultp1,resultp2);
1309
				}
1310
			}
1311
	    		break;
1312

1313
		case ROUNDPLUS:
1314
	    		if (Dbl_iszero_sign(resultp1)) {
1315
				/* Round up positive results */
1316
				Dbl_increment(resultp1,resultp2);
1317
			}
1318
			break;
1319
	    
1320
		case ROUNDMINUS:
1321
	    		if (Dbl_isone_sign(resultp1)) {
1322
				/* Round down negative results */
1323
				Dbl_increment(resultp1,resultp2);
1324
			}
1325
	    
1326
		case ROUNDZERO:;
1327
			/* truncate is simple */
1328
		} /* end switch... */
1329
		if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1330
	}
1331
	if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332
		/* Overflow */
1333
		if (Is_overflowtrap_enabled()) {
1334
                        /*
1335
                         * Adjust bias of result
1336
                         */
1337
                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338
                        Dbl_copytoptr(resultp1,resultp2,dstptr);
1339
                        if (inexact)
1340
                            if (Is_inexacttrap_enabled())
1341
                                return (OPC_2E_OVERFLOWEXCEPTION |
1342
					OPC_2E_INEXACTEXCEPTION);
1343
                            else Set_inexactflag();
1344
                        return (OPC_2E_OVERFLOWEXCEPTION);
1345
		}
1346
		inexact = TRUE;
1347
		Set_overflowflag();
1348
		Dbl_setoverflow(resultp1,resultp2);
1349
	} else if (result_exponent <= 0) {	/* underflow case */
1350
		if (Is_underflowtrap_enabled()) {
1351
                        /*
1352
                         * Adjust bias of result
1353
                         */
1354
                	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355
			Dbl_copytoptr(resultp1,resultp2,dstptr);
1356
                        if (inexact)
1357
                            if (Is_inexacttrap_enabled())
1358
                                return (OPC_2E_UNDERFLOWEXCEPTION |
1359
					OPC_2E_INEXACTEXCEPTION);
1360
                            else Set_inexactflag();
1361
	    		return(OPC_2E_UNDERFLOWEXCEPTION);
1362
		}
1363
		else if (inexact && is_tiny) Set_underflowflag();
1364
	}
1365
	else Dbl_set_exponent(resultp1,result_exponent);
1366
	Dbl_copytoptr(resultp1,resultp2,dstptr);
1367
	if (inexact) 
1368
		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1369
		else Set_inexactflag();
1370
    	return(NOEXCEPTION);
1371
}
1372

1373
/*
1374
 *  Single Floating-point Multiply Fused Add
1375
 */
1376

1377
sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1378

1379
sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380
unsigned int *status;
1381
{
1382
	unsigned int opnd1, opnd2, opnd3;
1383
	register unsigned int tmpresp1, tmpresp2;
1384
	unsigned int rightp1, rightp2;
1385
	unsigned int resultp1, resultp2 = 0;
1386
	register int mpy_exponent, add_exponent, count;
1387
	boolean inexact = FALSE, is_tiny = FALSE;
1388

1389
	unsigned int signlessleft1, signlessright1, save;
1390
	register int result_exponent, diff_exponent;
1391
	int sign_save, jumpsize;
1392
	
1393
	Sgl_copyfromptr(src1ptr,opnd1);
1394
	Sgl_copyfromptr(src2ptr,opnd2);
1395
	Sgl_copyfromptr(src3ptr,opnd3);
1396

1397
	/* 
1398
	 * set sign bit of result of multiply
1399
	 */
1400
	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
1401
		Sgl_setnegativezero(resultp1); 
1402
	else Sgl_setzero(resultp1);
1403

1404
	/*
1405
	 * Generate multiply exponent 
1406
	 */
1407
	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1408

1409
	/*
1410
	 * check first operand for NaN's or infinity
1411
	 */
1412
	if (Sgl_isinfinity_exponent(opnd1)) {
1413
		if (Sgl_iszero_mantissa(opnd1)) {
1414
			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415
				if (Sgl_iszero_exponentmantissa(opnd2)) {
1416
					/* 
1417
					 * invalid since operands are infinity 
1418
					 * and zero 
1419
					 */
1420
					if (Is_invalidtrap_enabled())
1421
						return(OPC_2E_INVALIDEXCEPTION);
1422
					Set_invalidflag();
1423
					Sgl_makequietnan(resultp1);
1424
					Sgl_copytoptr(resultp1,dstptr);
1425
					return(NOEXCEPTION);
1426
				}
1427
				/*
1428
				 * Check third operand for infinity with a
1429
				 *  sign opposite of the multiply result
1430
				 */
1431
				if (Sgl_isinfinity(opnd3) &&
1432
				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1433
					/* 
1434
					 * invalid since attempting a magnitude
1435
					 * subtraction of infinities
1436
					 */
1437
					if (Is_invalidtrap_enabled())
1438
						return(OPC_2E_INVALIDEXCEPTION);
1439
					Set_invalidflag();
1440
					Sgl_makequietnan(resultp1);
1441
					Sgl_copytoptr(resultp1,dstptr);
1442
					return(NOEXCEPTION);
1443
				}
1444

1445
				/*
1446
			 	 * return infinity
1447
			 	 */
1448
				Sgl_setinfinity_exponentmantissa(resultp1);
1449
				Sgl_copytoptr(resultp1,dstptr);
1450
				return(NOEXCEPTION);
1451
			}
1452
		}
1453
		else {
1454
			/*
1455
		 	 * is NaN; signaling or quiet?
1456
		 	 */
1457
			if (Sgl_isone_signaling(opnd1)) {
1458
				/* trap if INVALIDTRAP enabled */
1459
				if (Is_invalidtrap_enabled()) 
1460
			    		return(OPC_2E_INVALIDEXCEPTION);
1461
				/* make NaN quiet */
1462
				Set_invalidflag();
1463
				Sgl_set_quiet(opnd1);
1464
			}
1465
			/* 
1466
			 * is second operand a signaling NaN? 
1467
			 */
1468
			else if (Sgl_is_signalingnan(opnd2)) {
1469
				/* trap if INVALIDTRAP enabled */
1470
				if (Is_invalidtrap_enabled())
1471
			    		return(OPC_2E_INVALIDEXCEPTION);
1472
				/* make NaN quiet */
1473
				Set_invalidflag();
1474
				Sgl_set_quiet(opnd2);
1475
				Sgl_copytoptr(opnd2,dstptr);
1476
				return(NOEXCEPTION);
1477
			}
1478
			/* 
1479
			 * is third operand a signaling NaN? 
1480
			 */
1481
			else if (Sgl_is_signalingnan(opnd3)) {
1482
				/* trap if INVALIDTRAP enabled */
1483
				if (Is_invalidtrap_enabled())
1484
			    		return(OPC_2E_INVALIDEXCEPTION);
1485
				/* make NaN quiet */
1486
				Set_invalidflag();
1487
				Sgl_set_quiet(opnd3);
1488
				Sgl_copytoptr(opnd3,dstptr);
1489
				return(NOEXCEPTION);
1490
			}
1491
			/*
1492
		 	 * return quiet NaN
1493
		 	 */
1494
			Sgl_copytoptr(opnd1,dstptr);
1495
			return(NOEXCEPTION);
1496
		}
1497
	}
1498

1499
	/*
1500
	 * check second operand for NaN's or infinity
1501
	 */
1502
	if (Sgl_isinfinity_exponent(opnd2)) {
1503
		if (Sgl_iszero_mantissa(opnd2)) {
1504
			if (Sgl_isnotnan(opnd3)) {
1505
				if (Sgl_iszero_exponentmantissa(opnd1)) {
1506
					/* 
1507
					 * invalid since multiply operands are
1508
					 * zero & infinity
1509
					 */
1510
					if (Is_invalidtrap_enabled())
1511
						return(OPC_2E_INVALIDEXCEPTION);
1512
					Set_invalidflag();
1513
					Sgl_makequietnan(opnd2);
1514
					Sgl_copytoptr(opnd2,dstptr);
1515
					return(NOEXCEPTION);
1516
				}
1517

1518
				/*
1519
				 * Check third operand for infinity with a
1520
				 *  sign opposite of the multiply result
1521
				 */
1522
				if (Sgl_isinfinity(opnd3) &&
1523
				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1524
					/* 
1525
					 * invalid since attempting a magnitude
1526
					 * subtraction of infinities
1527
					 */
1528
					if (Is_invalidtrap_enabled())
1529
				       		return(OPC_2E_INVALIDEXCEPTION);
1530
				       	Set_invalidflag();
1531
				       	Sgl_makequietnan(resultp1);
1532
					Sgl_copytoptr(resultp1,dstptr);
1533
					return(NOEXCEPTION);
1534
				}
1535

1536
				/*
1537
				 * return infinity
1538
				 */
1539
				Sgl_setinfinity_exponentmantissa(resultp1);
1540
				Sgl_copytoptr(resultp1,dstptr);
1541
				return(NOEXCEPTION);
1542
			}
1543
		}
1544
		else {
1545
			/*
1546
			 * is NaN; signaling or quiet?
1547
			 */
1548
			if (Sgl_isone_signaling(opnd2)) {
1549
				/* trap if INVALIDTRAP enabled */
1550
				if (Is_invalidtrap_enabled())
1551
					return(OPC_2E_INVALIDEXCEPTION);
1552
				/* make NaN quiet */
1553
				Set_invalidflag();
1554
				Sgl_set_quiet(opnd2);
1555
			}
1556
			/* 
1557
			 * is third operand a signaling NaN? 
1558
			 */
1559
			else if (Sgl_is_signalingnan(opnd3)) {
1560
			       	/* trap if INVALIDTRAP enabled */
1561
			       	if (Is_invalidtrap_enabled())
1562
				   		return(OPC_2E_INVALIDEXCEPTION);
1563
			       	/* make NaN quiet */
1564
			       	Set_invalidflag();
1565
			       	Sgl_set_quiet(opnd3);
1566
				Sgl_copytoptr(opnd3,dstptr);
1567
		       		return(NOEXCEPTION);
1568
			}
1569
			/*
1570
			 * return quiet NaN
1571
			 */
1572
			Sgl_copytoptr(opnd2,dstptr);
1573
			return(NOEXCEPTION);
1574
		}
1575
	}
1576

1577
	/*
1578
	 * check third operand for NaN's or infinity
1579
	 */
1580
	if (Sgl_isinfinity_exponent(opnd3)) {
1581
		if (Sgl_iszero_mantissa(opnd3)) {
1582
			/* return infinity */
1583
			Sgl_copytoptr(opnd3,dstptr);
1584
			return(NOEXCEPTION);
1585
		} else {
1586
			/*
1587
			 * is NaN; signaling or quiet?
1588
			 */
1589
			if (Sgl_isone_signaling(opnd3)) {
1590
				/* trap if INVALIDTRAP enabled */
1591
				if (Is_invalidtrap_enabled())
1592
					return(OPC_2E_INVALIDEXCEPTION);
1593
				/* make NaN quiet */
1594
				Set_invalidflag();
1595
				Sgl_set_quiet(opnd3);
1596
			}
1597
			/*
1598
			 * return quiet NaN
1599
 			 */
1600
			Sgl_copytoptr(opnd3,dstptr);
1601
			return(NOEXCEPTION);
1602
		}
1603
    	}
1604

1605
	/*
1606
	 * Generate multiply mantissa
1607
	 */
1608
	if (Sgl_isnotzero_exponent(opnd1)) {
1609
		/* set hidden bit */
1610
		Sgl_clear_signexponent_set_hidden(opnd1);
1611
	}
1612
	else {
1613
		/* check for zero */
1614
		if (Sgl_iszero_mantissa(opnd1)) {
1615
			/*
1616
			 * Perform the add opnd3 with zero here.
1617
			 */
1618
			if (Sgl_iszero_exponentmantissa(opnd3)) {
1619
				if (Is_rounding_mode(ROUNDMINUS)) {
1620
					Sgl_or_signs(opnd3,resultp1);
1621
				} else {
1622
					Sgl_and_signs(opnd3,resultp1);
1623
				}
1624
			}
1625
			/*
1626
			 * Now let's check for trapped underflow case.
1627
			 */
1628
			else if (Sgl_iszero_exponent(opnd3) &&
1629
			         Is_underflowtrap_enabled()) {
1630
                    		/* need to normalize results mantissa */
1631
                    		sign_save = Sgl_signextendedsign(opnd3);
1632
				result_exponent = 0;
1633
                    		Sgl_leftshiftby1(opnd3);
1634
                    		Sgl_normalize(opnd3,result_exponent);
1635
                    		Sgl_set_sign(opnd3,/*using*/sign_save);
1636
                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
1637
							unfl);
1638
                    		Sgl_copytoptr(opnd3,dstptr);
1639
                    		/* inexact = FALSE */
1640
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1641
			}
1642
			Sgl_copytoptr(opnd3,dstptr);
1643
			return(NOEXCEPTION);
1644
		}
1645
		/* is denormalized, adjust exponent */
1646
		Sgl_clear_signexponent(opnd1);
1647
		Sgl_leftshiftby1(opnd1);
1648
		Sgl_normalize(opnd1,mpy_exponent);
1649
	}
1650
	/* opnd2 needs to have hidden bit set with msb in hidden bit */
1651
	if (Sgl_isnotzero_exponent(opnd2)) {
1652
		Sgl_clear_signexponent_set_hidden(opnd2);
1653
	}
1654
	else {
1655
		/* check for zero */
1656
		if (Sgl_iszero_mantissa(opnd2)) {
1657
			/*
1658
			 * Perform the add opnd3 with zero here.
1659
			 */
1660
			if (Sgl_iszero_exponentmantissa(opnd3)) {
1661
				if (Is_rounding_mode(ROUNDMINUS)) {
1662
					Sgl_or_signs(opnd3,resultp1);
1663
				} else {
1664
					Sgl_and_signs(opnd3,resultp1);
1665
				}
1666
			}
1667
			/*
1668
			 * Now let's check for trapped underflow case.
1669
			 */
1670
			else if (Sgl_iszero_exponent(opnd3) &&
1671
			    Is_underflowtrap_enabled()) {
1672
                    		/* need to normalize results mantissa */
1673
                    		sign_save = Sgl_signextendedsign(opnd3);
1674
				result_exponent = 0;
1675
                    		Sgl_leftshiftby1(opnd3);
1676
                    		Sgl_normalize(opnd3,result_exponent);
1677
                    		Sgl_set_sign(opnd3,/*using*/sign_save);
1678
                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
1679
							unfl);
1680
                    		Sgl_copytoptr(opnd3,dstptr);
1681
                    		/* inexact = FALSE */
1682
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
1683
			}
1684
			Sgl_copytoptr(opnd3,dstptr);
1685
			return(NOEXCEPTION);
1686
		}
1687
		/* is denormalized; want to normalize */
1688
		Sgl_clear_signexponent(opnd2);
1689
		Sgl_leftshiftby1(opnd2);
1690
		Sgl_normalize(opnd2,mpy_exponent);
1691
	}
1692

1693
	/* Multiply the first two source mantissas together */
1694

1695
	/* 
1696
	 * The intermediate result will be kept in tmpres,
1697
	 * which needs enough room for 106 bits of mantissa,
1698
	 * so lets call it a Double extended.
1699
	 */
1700
	Sglext_setzero(tmpresp1,tmpresp2);
1701

1702
	/* 
1703
	 * Four bits at a time are inspected in each loop, and a 
1704
	 * simple shift and add multiply algorithm is used. 
1705
	 */ 
1706
	for (count = SGL_P-1; count >= 0; count -= 4) {
1707
		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708
		if (Sbit28(opnd1)) {
1709
	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
1710
			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1711
		}
1712
		if (Sbit29(opnd1)) {
1713
			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1714
		}
1715
		if (Sbit30(opnd1)) {
1716
			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1717
		}
1718
		if (Sbit31(opnd1)) {
1719
			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1720
		}
1721
		Sgl_rightshiftby4(opnd1);
1722
	}
1723
	if (Is_sexthiddenoverflow(tmpresp1)) {
1724
		/* result mantissa >= 2 (mantissa overflow) */
1725
		mpy_exponent++;
1726
		Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727
	} else {
1728
		Sglext_rightshiftby3(tmpresp1,tmpresp2);
1729
	}
1730

1731
	/*
1732
	 * Restore the sign of the mpy result which was saved in resultp1.
1733
	 * The exponent will continue to be kept in mpy_exponent.
1734
	 */
1735
	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1736

1737
	/* 
1738
	 * No rounding is required, since the result of the multiply
1739
	 * is exact in the extended format.
1740
	 */
1741

1742
	/*
1743
	 * Now we are ready to perform the add portion of the operation.
1744
	 *
1745
	 * The exponents need to be kept as integers for now, since the
1746
	 * multiply result might not fit into the exponent field.  We
1747
	 * can't overflow or underflow because of this yet, since the
1748
	 * add could bring the final result back into range.
1749
	 */
1750
	add_exponent = Sgl_exponent(opnd3);
1751

1752
	/*
1753
	 * Check for denormalized or zero add operand.
1754
	 */
1755
	if (add_exponent == 0) {
1756
		/* check for zero */
1757
		if (Sgl_iszero_mantissa(opnd3)) {
1758
			/* right is zero */
1759
			/* Left can't be zero and must be result.
1760
			 *
1761
			 * The final result is now in tmpres and mpy_exponent,
1762
			 * and needs to be rounded and squeezed back into
1763
			 * double precision format from double extended.
1764
			 */
1765
			result_exponent = mpy_exponent;
1766
			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767
			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768
			goto round;
1769
		}
1770

1771
		/* 
1772
		 * Neither are zeroes.  
1773
		 * Adjust exponent and normalize add operand.
1774
		 */
1775
		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
1776
		Sgl_clear_signexponent(opnd3);
1777
		Sgl_leftshiftby1(opnd3);
1778
		Sgl_normalize(opnd3,add_exponent);
1779
		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
1780
	} else {
1781
		Sgl_clear_exponent_set_hidden(opnd3);
1782
	}
1783
	/*
1784
	 * Copy opnd3 to the double extended variable called right.
1785
	 */
1786
	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1787

1788
	/*
1789
	 * A zero "save" helps discover equal operands (for later),
1790
	 * and is used in swapping operands (if needed).
1791
	 */
1792
	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1793

1794
	/*
1795
	 * Compare magnitude of operands.
1796
	 */
1797
	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798
	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799
	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800
	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1801
		/*
1802
		 * Set the left operand to the larger one by XOR swap.
1803
		 * First finish the first word "save".
1804
		 */
1805
		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806
		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807
		Sglext_swap_lower(tmpresp2,rightp2);
1808
		/* also setup exponents used in rest of routine */
1809
		diff_exponent = add_exponent - mpy_exponent;
1810
		result_exponent = add_exponent;
1811
	} else {
1812
		/* also setup exponents used in rest of routine */
1813
		diff_exponent = mpy_exponent - add_exponent;
1814
		result_exponent = mpy_exponent;
1815
	}
1816
	/* Invariant: left is not smaller than right. */
1817

1818
	/*
1819
	 * Special case alignment of operands that would force alignment
1820
	 * beyond the extent of the extension.  A further optimization
1821
	 * could special case this but only reduces the path length for
1822
	 * this infrequent case.
1823
	 */
1824
	if (diff_exponent > SGLEXT_THRESHOLD) {
1825
		diff_exponent = SGLEXT_THRESHOLD;
1826
	}
1827

1828
	/* Align right operand by shifting it to the right */
1829
	Sglext_clear_sign(rightp1);
1830
	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1831
	
1832
	/* Treat sum and difference of the operands separately. */
1833
	if ((int)save < 0) {
1834
		/*
1835
		 * Difference of the two operands.  Overflow can occur if the
1836
		 * multiply overflowed.  A borrow can occur out of the hidden
1837
		 * bit and force a post normalization phase.
1838
		 */
1839
		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840
			resultp1,resultp2);
1841
		sign_save = Sgl_signextendedsign(resultp1);
1842
		if (Sgl_iszero_hidden(resultp1)) {
1843
			/* Handle normalization */
1844
		/* A straightforward algorithm would now shift the
1845
		 * result and extension left until the hidden bit
1846
		 * becomes one.  Not all of the extension bits need
1847
		 * participate in the shift.  Only the two most 
1848
		 * significant bits (round and guard) are needed.
1849
		 * If only a single shift is needed then the guard
1850
		 * bit becomes a significant low order bit and the
1851
		 * extension must participate in the rounding.
1852
		 * If more than a single shift is needed, then all
1853
		 * bits to the right of the guard bit are zeros, 
1854
		 * and the guard bit may or may not be zero. */
1855
			Sglext_leftshiftby1(resultp1,resultp2);
1856

1857
			/* Need to check for a zero result.  The sign and
1858
			 * exponent fields have already been zeroed.  The more
1859
			 * efficient test of the full object can be used.
1860
			 */
1861
			 if (Sglext_iszero(resultp1,resultp2)) {
1862
				/* Must have been "x-x" or "x+(-x)". */
1863
				if (Is_rounding_mode(ROUNDMINUS))
1864
					Sgl_setone_sign(resultp1);
1865
				Sgl_copytoptr(resultp1,dstptr);
1866
				return(NOEXCEPTION);
1867
			}
1868
			result_exponent--;
1869

1870
			/* Look to see if normalization is finished. */
1871
			if (Sgl_isone_hidden(resultp1)) {
1872
				/* No further normalization is needed */
1873
				goto round;
1874
			}
1875

1876
			/* Discover first one bit to determine shift amount.
1877
			 * Use a modified binary search.  We have already
1878
			 * shifted the result one position right and still
1879
			 * not found a one so the remainder of the extension
1880
			 * must be zero and simplifies rounding. */
1881
			/* Scan bytes */
1882
			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883
				Sglext_leftshiftby8(resultp1,resultp2);
1884
				result_exponent -= 8;
1885
			}
1886
			/* Now narrow it down to the nibble */
1887
			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888
				/* The lower nibble contains the
1889
				 * normalizing one */
1890
				Sglext_leftshiftby4(resultp1,resultp2);
1891
				result_exponent -= 4;
1892
			}
1893
			/* Select case where first bit is set (already
1894
			 * normalized) otherwise select the proper shift. */
1895
			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896
			if (jumpsize <= 7) switch(jumpsize) {
1897
			case 1:
1898
				Sglext_leftshiftby3(resultp1,resultp2);
1899
				result_exponent -= 3;
1900
				break;
1901
			case 2:
1902
			case 3:
1903
				Sglext_leftshiftby2(resultp1,resultp2);
1904
				result_exponent -= 2;
1905
				break;
1906
			case 4:
1907
			case 5:
1908
			case 6:
1909
			case 7:
1910
				Sglext_leftshiftby1(resultp1,resultp2);
1911
				result_exponent -= 1;
1912
				break;
1913
			}
1914
		} /* end if (hidden...)... */
1915
	/* Fall through and round */
1916
	} /* end if (save < 0)... */
1917
	else {
1918
		/* Add magnitudes */
1919
		Sglext_addition(tmpresp1,tmpresp2,
1920
			rightp1,rightp2, /*to*/resultp1,resultp2);
1921
		sign_save = Sgl_signextendedsign(resultp1);
1922
		if (Sgl_isone_hiddenoverflow(resultp1)) {
1923
	    		/* Prenormalization required. */
1924
	    		Sglext_arithrightshiftby1(resultp1,resultp2);
1925
	    		result_exponent++;
1926
		} /* end if hiddenoverflow... */
1927
	} /* end else ...add magnitudes... */
1928

1929
	/* Round the result.  If the extension and lower two words are
1930
	 * all zeros, then the result is exact.  Otherwise round in the
1931
	 * correct direction.  Underflow is possible. If a postnormalization
1932
	 * is necessary, then the mantissa is all zeros so no shift is needed.
1933
	 */
1934
  round:
1935
	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936
		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1937
	}
1938
	Sgl_set_sign(resultp1,/*using*/sign_save);
1939
	if (Sglext_isnotzero_mantissap2(resultp2)) {
1940
		inexact = TRUE;
1941
		switch(Rounding_mode()) {
1942
		case ROUNDNEAREST: /* The default. */
1943
			if (Sglext_isone_highp2(resultp2)) {
1944
				/* at least 1/2 ulp */
1945
				if (Sglext_isnotzero_low31p2(resultp2) ||
1946
				    Sglext_isone_lowp1(resultp1)) {
1947
					/* either exactly half way and odd or
1948
					 * more than 1/2ulp */
1949
					Sgl_increment(resultp1);
1950
				}
1951
			}
1952
	    		break;
1953

1954
		case ROUNDPLUS:
1955
	    		if (Sgl_iszero_sign(resultp1)) {
1956
				/* Round up positive results */
1957
				Sgl_increment(resultp1);
1958
			}
1959
			break;
1960
	    
1961
		case ROUNDMINUS:
1962
	    		if (Sgl_isone_sign(resultp1)) {
1963
				/* Round down negative results */
1964
				Sgl_increment(resultp1);
1965
			}
1966
	    
1967
		case ROUNDZERO:;
1968
			/* truncate is simple */
1969
		} /* end switch... */
1970
		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1971
	}
1972
	if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973
		/* Overflow */
1974
		if (Is_overflowtrap_enabled()) {
1975
                        /*
1976
                         * Adjust bias of result
1977
                         */
1978
                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979
                        Sgl_copytoptr(resultp1,dstptr);
1980
                        if (inexact)
1981
                            if (Is_inexacttrap_enabled())
1982
                                return (OPC_2E_OVERFLOWEXCEPTION |
1983
					OPC_2E_INEXACTEXCEPTION);
1984
                            else Set_inexactflag();
1985
                        return (OPC_2E_OVERFLOWEXCEPTION);
1986
		}
1987
		inexact = TRUE;
1988
		Set_overflowflag();
1989
		Sgl_setoverflow(resultp1);
1990
	} else if (result_exponent <= 0) {	/* underflow case */
1991
		if (Is_underflowtrap_enabled()) {
1992
                        /*
1993
                         * Adjust bias of result
1994
                         */
1995
                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996
			Sgl_copytoptr(resultp1,dstptr);
1997
                        if (inexact)
1998
                            if (Is_inexacttrap_enabled())
1999
                                return (OPC_2E_UNDERFLOWEXCEPTION |
2000
					OPC_2E_INEXACTEXCEPTION);
2001
                            else Set_inexactflag();
2002
	    		return(OPC_2E_UNDERFLOWEXCEPTION);
2003
		}
2004
		else if (inexact && is_tiny) Set_underflowflag();
2005
	}
2006
	else Sgl_set_exponent(resultp1,result_exponent);
2007
	Sgl_copytoptr(resultp1,dstptr);
2008
	if (inexact) 
2009
		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2010
		else Set_inexactflag();
2011
    	return(NOEXCEPTION);
2012
}
2013

2014
/*
2015
 *  Single Floating-point Multiply Negate Fused Add
2016
 */
2017

2018
sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2019

2020
sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021
unsigned int *status;
2022
{
2023
	unsigned int opnd1, opnd2, opnd3;
2024
	register unsigned int tmpresp1, tmpresp2;
2025
	unsigned int rightp1, rightp2;
2026
	unsigned int resultp1, resultp2 = 0;
2027
	register int mpy_exponent, add_exponent, count;
2028
	boolean inexact = FALSE, is_tiny = FALSE;
2029

2030
	unsigned int signlessleft1, signlessright1, save;
2031
	register int result_exponent, diff_exponent;
2032
	int sign_save, jumpsize;
2033
	
2034
	Sgl_copyfromptr(src1ptr,opnd1);
2035
	Sgl_copyfromptr(src2ptr,opnd2);
2036
	Sgl_copyfromptr(src3ptr,opnd3);
2037

2038
	/* 
2039
	 * set sign bit of result of multiply
2040
	 */
2041
	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
2042
		Sgl_setzero(resultp1);
2043
	else 
2044
		Sgl_setnegativezero(resultp1); 
2045

2046
	/*
2047
	 * Generate multiply exponent 
2048
	 */
2049
	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2050

2051
	/*
2052
	 * check first operand for NaN's or infinity
2053
	 */
2054
	if (Sgl_isinfinity_exponent(opnd1)) {
2055
		if (Sgl_iszero_mantissa(opnd1)) {
2056
			if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057
				if (Sgl_iszero_exponentmantissa(opnd2)) {
2058
					/* 
2059
					 * invalid since operands are infinity 
2060
					 * and zero 
2061
					 */
2062
					if (Is_invalidtrap_enabled())
2063
						return(OPC_2E_INVALIDEXCEPTION);
2064
					Set_invalidflag();
2065
					Sgl_makequietnan(resultp1);
2066
					Sgl_copytoptr(resultp1,dstptr);
2067
					return(NOEXCEPTION);
2068
				}
2069
				/*
2070
				 * Check third operand for infinity with a
2071
				 *  sign opposite of the multiply result
2072
				 */
2073
				if (Sgl_isinfinity(opnd3) &&
2074
				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2075
					/* 
2076
					 * invalid since attempting a magnitude
2077
					 * subtraction of infinities
2078
					 */
2079
					if (Is_invalidtrap_enabled())
2080
						return(OPC_2E_INVALIDEXCEPTION);
2081
					Set_invalidflag();
2082
					Sgl_makequietnan(resultp1);
2083
					Sgl_copytoptr(resultp1,dstptr);
2084
					return(NOEXCEPTION);
2085
				}
2086

2087
				/*
2088
			 	 * return infinity
2089
			 	 */
2090
				Sgl_setinfinity_exponentmantissa(resultp1);
2091
				Sgl_copytoptr(resultp1,dstptr);
2092
				return(NOEXCEPTION);
2093
			}
2094
		}
2095
		else {
2096
			/*
2097
		 	 * is NaN; signaling or quiet?
2098
		 	 */
2099
			if (Sgl_isone_signaling(opnd1)) {
2100
				/* trap if INVALIDTRAP enabled */
2101
				if (Is_invalidtrap_enabled()) 
2102
			    		return(OPC_2E_INVALIDEXCEPTION);
2103
				/* make NaN quiet */
2104
				Set_invalidflag();
2105
				Sgl_set_quiet(opnd1);
2106
			}
2107
			/* 
2108
			 * is second operand a signaling NaN? 
2109
			 */
2110
			else if (Sgl_is_signalingnan(opnd2)) {
2111
				/* trap if INVALIDTRAP enabled */
2112
				if (Is_invalidtrap_enabled())
2113
			    		return(OPC_2E_INVALIDEXCEPTION);
2114
				/* make NaN quiet */
2115
				Set_invalidflag();
2116
				Sgl_set_quiet(opnd2);
2117
				Sgl_copytoptr(opnd2,dstptr);
2118
				return(NOEXCEPTION);
2119
			}
2120
			/* 
2121
			 * is third operand a signaling NaN? 
2122
			 */
2123
			else if (Sgl_is_signalingnan(opnd3)) {
2124
				/* trap if INVALIDTRAP enabled */
2125
				if (Is_invalidtrap_enabled())
2126
			    		return(OPC_2E_INVALIDEXCEPTION);
2127
				/* make NaN quiet */
2128
				Set_invalidflag();
2129
				Sgl_set_quiet(opnd3);
2130
				Sgl_copytoptr(opnd3,dstptr);
2131
				return(NOEXCEPTION);
2132
			}
2133
			/*
2134
		 	 * return quiet NaN
2135
		 	 */
2136
			Sgl_copytoptr(opnd1,dstptr);
2137
			return(NOEXCEPTION);
2138
		}
2139
	}
2140

2141
	/*
2142
	 * check second operand for NaN's or infinity
2143
	 */
2144
	if (Sgl_isinfinity_exponent(opnd2)) {
2145
		if (Sgl_iszero_mantissa(opnd2)) {
2146
			if (Sgl_isnotnan(opnd3)) {
2147
				if (Sgl_iszero_exponentmantissa(opnd1)) {
2148
					/* 
2149
					 * invalid since multiply operands are
2150
					 * zero & infinity
2151
					 */
2152
					if (Is_invalidtrap_enabled())
2153
						return(OPC_2E_INVALIDEXCEPTION);
2154
					Set_invalidflag();
2155
					Sgl_makequietnan(opnd2);
2156
					Sgl_copytoptr(opnd2,dstptr);
2157
					return(NOEXCEPTION);
2158
				}
2159

2160
				/*
2161
				 * Check third operand for infinity with a
2162
				 *  sign opposite of the multiply result
2163
				 */
2164
				if (Sgl_isinfinity(opnd3) &&
2165
				    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2166
					/* 
2167
					 * invalid since attempting a magnitude
2168
					 * subtraction of infinities
2169
					 */
2170
					if (Is_invalidtrap_enabled())
2171
				       		return(OPC_2E_INVALIDEXCEPTION);
2172
				       	Set_invalidflag();
2173
				       	Sgl_makequietnan(resultp1);
2174
					Sgl_copytoptr(resultp1,dstptr);
2175
					return(NOEXCEPTION);
2176
				}
2177

2178
				/*
2179
				 * return infinity
2180
				 */
2181
				Sgl_setinfinity_exponentmantissa(resultp1);
2182
				Sgl_copytoptr(resultp1,dstptr);
2183
				return(NOEXCEPTION);
2184
			}
2185
		}
2186
		else {
2187
			/*
2188
			 * is NaN; signaling or quiet?
2189
			 */
2190
			if (Sgl_isone_signaling(opnd2)) {
2191
				/* trap if INVALIDTRAP enabled */
2192
				if (Is_invalidtrap_enabled())
2193
					return(OPC_2E_INVALIDEXCEPTION);
2194
				/* make NaN quiet */
2195
				Set_invalidflag();
2196
				Sgl_set_quiet(opnd2);
2197
			}
2198
			/* 
2199
			 * is third operand a signaling NaN? 
2200
			 */
2201
			else if (Sgl_is_signalingnan(opnd3)) {
2202
			       	/* trap if INVALIDTRAP enabled */
2203
			       	if (Is_invalidtrap_enabled())
2204
				   		return(OPC_2E_INVALIDEXCEPTION);
2205
			       	/* make NaN quiet */
2206
			       	Set_invalidflag();
2207
			       	Sgl_set_quiet(opnd3);
2208
				Sgl_copytoptr(opnd3,dstptr);
2209
		       		return(NOEXCEPTION);
2210
			}
2211
			/*
2212
			 * return quiet NaN
2213
			 */
2214
			Sgl_copytoptr(opnd2,dstptr);
2215
			return(NOEXCEPTION);
2216
		}
2217
	}
2218

2219
	/*
2220
	 * check third operand for NaN's or infinity
2221
	 */
2222
	if (Sgl_isinfinity_exponent(opnd3)) {
2223
		if (Sgl_iszero_mantissa(opnd3)) {
2224
			/* return infinity */
2225
			Sgl_copytoptr(opnd3,dstptr);
2226
			return(NOEXCEPTION);
2227
		} else {
2228
			/*
2229
			 * is NaN; signaling or quiet?
2230
			 */
2231
			if (Sgl_isone_signaling(opnd3)) {
2232
				/* trap if INVALIDTRAP enabled */
2233
				if (Is_invalidtrap_enabled())
2234
					return(OPC_2E_INVALIDEXCEPTION);
2235
				/* make NaN quiet */
2236
				Set_invalidflag();
2237
				Sgl_set_quiet(opnd3);
2238
			}
2239
			/*
2240
			 * return quiet NaN
2241
 			 */
2242
			Sgl_copytoptr(opnd3,dstptr);
2243
			return(NOEXCEPTION);
2244
		}
2245
    	}
2246

2247
	/*
2248
	 * Generate multiply mantissa
2249
	 */
2250
	if (Sgl_isnotzero_exponent(opnd1)) {
2251
		/* set hidden bit */
2252
		Sgl_clear_signexponent_set_hidden(opnd1);
2253
	}
2254
	else {
2255
		/* check for zero */
2256
		if (Sgl_iszero_mantissa(opnd1)) {
2257
			/*
2258
			 * Perform the add opnd3 with zero here.
2259
			 */
2260
			if (Sgl_iszero_exponentmantissa(opnd3)) {
2261
				if (Is_rounding_mode(ROUNDMINUS)) {
2262
					Sgl_or_signs(opnd3,resultp1);
2263
				} else {
2264
					Sgl_and_signs(opnd3,resultp1);
2265
				}
2266
			}
2267
			/*
2268
			 * Now let's check for trapped underflow case.
2269
			 */
2270
			else if (Sgl_iszero_exponent(opnd3) &&
2271
			         Is_underflowtrap_enabled()) {
2272
                    		/* need to normalize results mantissa */
2273
                    		sign_save = Sgl_signextendedsign(opnd3);
2274
				result_exponent = 0;
2275
                    		Sgl_leftshiftby1(opnd3);
2276
                    		Sgl_normalize(opnd3,result_exponent);
2277
                    		Sgl_set_sign(opnd3,/*using*/sign_save);
2278
                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
2279
							unfl);
2280
                    		Sgl_copytoptr(opnd3,dstptr);
2281
                    		/* inexact = FALSE */
2282
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
2283
			}
2284
			Sgl_copytoptr(opnd3,dstptr);
2285
			return(NOEXCEPTION);
2286
		}
2287
		/* is denormalized, adjust exponent */
2288
		Sgl_clear_signexponent(opnd1);
2289
		Sgl_leftshiftby1(opnd1);
2290
		Sgl_normalize(opnd1,mpy_exponent);
2291
	}
2292
	/* opnd2 needs to have hidden bit set with msb in hidden bit */
2293
	if (Sgl_isnotzero_exponent(opnd2)) {
2294
		Sgl_clear_signexponent_set_hidden(opnd2);
2295
	}
2296
	else {
2297
		/* check for zero */
2298
		if (Sgl_iszero_mantissa(opnd2)) {
2299
			/*
2300
			 * Perform the add opnd3 with zero here.
2301
			 */
2302
			if (Sgl_iszero_exponentmantissa(opnd3)) {
2303
				if (Is_rounding_mode(ROUNDMINUS)) {
2304
					Sgl_or_signs(opnd3,resultp1);
2305
				} else {
2306
					Sgl_and_signs(opnd3,resultp1);
2307
				}
2308
			}
2309
			/*
2310
			 * Now let's check for trapped underflow case.
2311
			 */
2312
			else if (Sgl_iszero_exponent(opnd3) &&
2313
			    Is_underflowtrap_enabled()) {
2314
                    		/* need to normalize results mantissa */
2315
                    		sign_save = Sgl_signextendedsign(opnd3);
2316
				result_exponent = 0;
2317
                    		Sgl_leftshiftby1(opnd3);
2318
                    		Sgl_normalize(opnd3,result_exponent);
2319
                    		Sgl_set_sign(opnd3,/*using*/sign_save);
2320
                    		Sgl_setwrapped_exponent(opnd3,result_exponent,
2321
							unfl);
2322
                    		Sgl_copytoptr(opnd3,dstptr);
2323
                    		/* inexact = FALSE */
2324
                    		return(OPC_2E_UNDERFLOWEXCEPTION);
2325
			}
2326
			Sgl_copytoptr(opnd3,dstptr);
2327
			return(NOEXCEPTION);
2328
		}
2329
		/* is denormalized; want to normalize */
2330
		Sgl_clear_signexponent(opnd2);
2331
		Sgl_leftshiftby1(opnd2);
2332
		Sgl_normalize(opnd2,mpy_exponent);
2333
	}
2334

2335
	/* Multiply the first two source mantissas together */
2336

2337
	/* 
2338
	 * The intermediate result will be kept in tmpres,
2339
	 * which needs enough room for 106 bits of mantissa,
2340
	 * so lets call it a Double extended.
2341
	 */
2342
	Sglext_setzero(tmpresp1,tmpresp2);
2343

2344
	/* 
2345
	 * Four bits at a time are inspected in each loop, and a 
2346
	 * simple shift and add multiply algorithm is used. 
2347
	 */ 
2348
	for (count = SGL_P-1; count >= 0; count -= 4) {
2349
		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350
		if (Sbit28(opnd1)) {
2351
	 		/* Twoword_add should be an ADD followed by 2 ADDC's */
2352
			Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2353
		}
2354
		if (Sbit29(opnd1)) {
2355
			Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2356
		}
2357
		if (Sbit30(opnd1)) {
2358
			Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2359
		}
2360
		if (Sbit31(opnd1)) {
2361
			Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2362
		}
2363
		Sgl_rightshiftby4(opnd1);
2364
	}
2365
	if (Is_sexthiddenoverflow(tmpresp1)) {
2366
		/* result mantissa >= 2 (mantissa overflow) */
2367
		mpy_exponent++;
2368
		Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369
	} else {
2370
		Sglext_rightshiftby3(tmpresp1,tmpresp2);
2371
	}
2372

2373
	/*
2374
	 * Restore the sign of the mpy result which was saved in resultp1.
2375
	 * The exponent will continue to be kept in mpy_exponent.
2376
	 */
2377
	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2378

2379
	/* 
2380
	 * No rounding is required, since the result of the multiply
2381
	 * is exact in the extended format.
2382
	 */
2383

2384
	/*
2385
	 * Now we are ready to perform the add portion of the operation.
2386
	 *
2387
	 * The exponents need to be kept as integers for now, since the
2388
	 * multiply result might not fit into the exponent field.  We
2389
	 * can't overflow or underflow because of this yet, since the
2390
	 * add could bring the final result back into range.
2391
	 */
2392
	add_exponent = Sgl_exponent(opnd3);
2393

2394
	/*
2395
	 * Check for denormalized or zero add operand.
2396
	 */
2397
	if (add_exponent == 0) {
2398
		/* check for zero */
2399
		if (Sgl_iszero_mantissa(opnd3)) {
2400
			/* right is zero */
2401
			/* Left can't be zero and must be result.
2402
			 *
2403
			 * The final result is now in tmpres and mpy_exponent,
2404
			 * and needs to be rounded and squeezed back into
2405
			 * double precision format from double extended.
2406
			 */
2407
			result_exponent = mpy_exponent;
2408
			Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409
			sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410
			goto round;
2411
		}
2412

2413
		/* 
2414
		 * Neither are zeroes.  
2415
		 * Adjust exponent and normalize add operand.
2416
		 */
2417
		sign_save = Sgl_signextendedsign(opnd3);	/* save sign */
2418
		Sgl_clear_signexponent(opnd3);
2419
		Sgl_leftshiftby1(opnd3);
2420
		Sgl_normalize(opnd3,add_exponent);
2421
		Sgl_set_sign(opnd3,sign_save);		/* restore sign */
2422
	} else {
2423
		Sgl_clear_exponent_set_hidden(opnd3);
2424
	}
2425
	/*
2426
	 * Copy opnd3 to the double extended variable called right.
2427
	 */
2428
	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2429

2430
	/*
2431
	 * A zero "save" helps discover equal operands (for later),
2432
	 * and is used in swapping operands (if needed).
2433
	 */
2434
	Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2435

2436
	/*
2437
	 * Compare magnitude of operands.
2438
	 */
2439
	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440
	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441
	if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442
	    Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2443
		/*
2444
		 * Set the left operand to the larger one by XOR swap.
2445
		 * First finish the first word "save".
2446
		 */
2447
		Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448
		Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449
		Sglext_swap_lower(tmpresp2,rightp2);
2450
		/* also setup exponents used in rest of routine */
2451
		diff_exponent = add_exponent - mpy_exponent;
2452
		result_exponent = add_exponent;
2453
	} else {
2454
		/* also setup exponents used in rest of routine */
2455
		diff_exponent = mpy_exponent - add_exponent;
2456
		result_exponent = mpy_exponent;
2457
	}
2458
	/* Invariant: left is not smaller than right. */
2459

2460
	/*
2461
	 * Special case alignment of operands that would force alignment
2462
	 * beyond the extent of the extension.  A further optimization
2463
	 * could special case this but only reduces the path length for
2464
	 * this infrequent case.
2465
	 */
2466
	if (diff_exponent > SGLEXT_THRESHOLD) {
2467
		diff_exponent = SGLEXT_THRESHOLD;
2468
	}
2469

2470
	/* Align right operand by shifting it to the right */
2471
	Sglext_clear_sign(rightp1);
2472
	Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2473
	
2474
	/* Treat sum and difference of the operands separately. */
2475
	if ((int)save < 0) {
2476
		/*
2477
		 * Difference of the two operands.  Overflow can occur if the
2478
		 * multiply overflowed.  A borrow can occur out of the hidden
2479
		 * bit and force a post normalization phase.
2480
		 */
2481
		Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482
			resultp1,resultp2);
2483
		sign_save = Sgl_signextendedsign(resultp1);
2484
		if (Sgl_iszero_hidden(resultp1)) {
2485
			/* Handle normalization */
2486
		/* A straightforward algorithm would now shift the
2487
		 * result and extension left until the hidden bit
2488
		 * becomes one.  Not all of the extension bits need
2489
		 * participate in the shift.  Only the two most 
2490
		 * significant bits (round and guard) are needed.
2491
		 * If only a single shift is needed then the guard
2492
		 * bit becomes a significant low order bit and the
2493
		 * extension must participate in the rounding.
2494
		 * If more than a single shift is needed, then all
2495
		 * bits to the right of the guard bit are zeros, 
2496
		 * and the guard bit may or may not be zero. */
2497
			Sglext_leftshiftby1(resultp1,resultp2);
2498

2499
			/* Need to check for a zero result.  The sign and
2500
			 * exponent fields have already been zeroed.  The more
2501
			 * efficient test of the full object can be used.
2502
			 */
2503
			 if (Sglext_iszero(resultp1,resultp2)) {
2504
				/* Must have been "x-x" or "x+(-x)". */
2505
				if (Is_rounding_mode(ROUNDMINUS))
2506
					Sgl_setone_sign(resultp1);
2507
				Sgl_copytoptr(resultp1,dstptr);
2508
				return(NOEXCEPTION);
2509
			}
2510
			result_exponent--;
2511

2512
			/* Look to see if normalization is finished. */
2513
			if (Sgl_isone_hidden(resultp1)) {
2514
				/* No further normalization is needed */
2515
				goto round;
2516
			}
2517

2518
			/* Discover first one bit to determine shift amount.
2519
			 * Use a modified binary search.  We have already
2520
			 * shifted the result one position right and still
2521
			 * not found a one so the remainder of the extension
2522
			 * must be zero and simplifies rounding. */
2523
			/* Scan bytes */
2524
			while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525
				Sglext_leftshiftby8(resultp1,resultp2);
2526
				result_exponent -= 8;
2527
			}
2528
			/* Now narrow it down to the nibble */
2529
			if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530
				/* The lower nibble contains the
2531
				 * normalizing one */
2532
				Sglext_leftshiftby4(resultp1,resultp2);
2533
				result_exponent -= 4;
2534
			}
2535
			/* Select case where first bit is set (already
2536
			 * normalized) otherwise select the proper shift. */
2537
			jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538
			if (jumpsize <= 7) switch(jumpsize) {
2539
			case 1:
2540
				Sglext_leftshiftby3(resultp1,resultp2);
2541
				result_exponent -= 3;
2542
				break;
2543
			case 2:
2544
			case 3:
2545
				Sglext_leftshiftby2(resultp1,resultp2);
2546
				result_exponent -= 2;
2547
				break;
2548
			case 4:
2549
			case 5:
2550
			case 6:
2551
			case 7:
2552
				Sglext_leftshiftby1(resultp1,resultp2);
2553
				result_exponent -= 1;
2554
				break;
2555
			}
2556
		} /* end if (hidden...)... */
2557
	/* Fall through and round */
2558
	} /* end if (save < 0)... */
2559
	else {
2560
		/* Add magnitudes */
2561
		Sglext_addition(tmpresp1,tmpresp2,
2562
			rightp1,rightp2, /*to*/resultp1,resultp2);
2563
		sign_save = Sgl_signextendedsign(resultp1);
2564
		if (Sgl_isone_hiddenoverflow(resultp1)) {
2565
	    		/* Prenormalization required. */
2566
	    		Sglext_arithrightshiftby1(resultp1,resultp2);
2567
	    		result_exponent++;
2568
		} /* end if hiddenoverflow... */
2569
	} /* end else ...add magnitudes... */
2570

2571
	/* Round the result.  If the extension and lower two words are
2572
	 * all zeros, then the result is exact.  Otherwise round in the
2573
	 * correct direction.  Underflow is possible. If a postnormalization
2574
	 * is necessary, then the mantissa is all zeros so no shift is needed.
2575
	 */
2576
  round:
2577
	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578
		Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2579
	}
2580
	Sgl_set_sign(resultp1,/*using*/sign_save);
2581
	if (Sglext_isnotzero_mantissap2(resultp2)) {
2582
		inexact = TRUE;
2583
		switch(Rounding_mode()) {
2584
		case ROUNDNEAREST: /* The default. */
2585
			if (Sglext_isone_highp2(resultp2)) {
2586
				/* at least 1/2 ulp */
2587
				if (Sglext_isnotzero_low31p2(resultp2) ||
2588
				    Sglext_isone_lowp1(resultp1)) {
2589
					/* either exactly half way and odd or
2590
					 * more than 1/2ulp */
2591
					Sgl_increment(resultp1);
2592
				}
2593
			}
2594
	    		break;
2595

2596
		case ROUNDPLUS:
2597
	    		if (Sgl_iszero_sign(resultp1)) {
2598
				/* Round up positive results */
2599
				Sgl_increment(resultp1);
2600
			}
2601
			break;
2602
	    
2603
		case ROUNDMINUS:
2604
	    		if (Sgl_isone_sign(resultp1)) {
2605
				/* Round down negative results */
2606
				Sgl_increment(resultp1);
2607
			}
2608
	    
2609
		case ROUNDZERO:;
2610
			/* truncate is simple */
2611
		} /* end switch... */
2612
		if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2613
	}
2614
	if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615
		/* Overflow */
2616
		if (Is_overflowtrap_enabled()) {
2617
                        /*
2618
                         * Adjust bias of result
2619
                         */
2620
                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621
                        Sgl_copytoptr(resultp1,dstptr);
2622
                        if (inexact)
2623
                            if (Is_inexacttrap_enabled())
2624
                                return (OPC_2E_OVERFLOWEXCEPTION |
2625
					OPC_2E_INEXACTEXCEPTION);
2626
                            else Set_inexactflag();
2627
                        return (OPC_2E_OVERFLOWEXCEPTION);
2628
		}
2629
		inexact = TRUE;
2630
		Set_overflowflag();
2631
		Sgl_setoverflow(resultp1);
2632
	} else if (result_exponent <= 0) {	/* underflow case */
2633
		if (Is_underflowtrap_enabled()) {
2634
                        /*
2635
                         * Adjust bias of result
2636
                         */
2637
                	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638
			Sgl_copytoptr(resultp1,dstptr);
2639
                        if (inexact)
2640
                            if (Is_inexacttrap_enabled())
2641
                                return (OPC_2E_UNDERFLOWEXCEPTION |
2642
					OPC_2E_INEXACTEXCEPTION);
2643
                            else Set_inexactflag();
2644
	    		return(OPC_2E_UNDERFLOWEXCEPTION);
2645
		}
2646
		else if (inexact && is_tiny) Set_underflowflag();
2647
	}
2648
	else Sgl_set_exponent(resultp1,result_exponent);
2649
	Sgl_copytoptr(resultp1,dstptr);
2650
	if (inexact) 
2651
		if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2652
		else Set_inexactflag();
2653
    	return(NOEXCEPTION);
2654
}
2655

2656

2657
Product

Resources

Company