/*1* Linux/PA-RISC Project (http://www.parisc-linux.org/)2*3* Floating-point emulation code4* Copyright (C) 2001 Hewlett-Packard (Paul Bame) <[email protected]>5*6* This program is free software; you can redistribute it and/or modify7* it under the terms of the GNU General Public License as published by8* the Free Software Foundation; either version 2, or (at your option)9* any later version.10*11* This program is distributed in the hope that it will be useful,12* but WITHOUT ANY WARRANTY; without even the implied warranty of13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the14* GNU General Public License for more details.15*16* You should have received a copy of the GNU General Public License17* along with this program; if not, write to the Free Software18* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA19*/20/*21* BEGIN_DESC22*23* File:24* @(#) pa/spmath/dfsub.c $Revision: 1.1 $25*26* Purpose:27* Double_subtract: subtract two double precision values.28*29* External Interfaces:30* dbl_fsub(leftptr, rightptr, dstptr, status)31*32* Internal Interfaces:33*34* Theory:35* <<please update with a overview of the operation of this file>>36*37* END_DESC38*/394041#include "float.h"42#include "dbl_float.h"4344/*45* Double_subtract: subtract two double precision values.46*/47int48dbl_fsub(49dbl_floating_point *leftptr,50dbl_floating_point *rightptr,51dbl_floating_point *dstptr,52unsigned int *status)53{54register unsigned int signless_upper_left, signless_upper_right, save;55register unsigned int leftp1, leftp2, rightp1, rightp2, extent;56register unsigned int resultp1 = 0, resultp2 = 0;5758register int result_exponent, right_exponent, diff_exponent;59register int sign_save, jumpsize;60register boolean inexact = FALSE, underflowtrap;6162/* Create local copies of the numbers */63Dbl_copyfromptr(leftptr,leftp1,leftp2);64Dbl_copyfromptr(rightptr,rightp1,rightp2);6566/* A zero "save" helps discover equal operands (for later), *67* and is used in swapping operands (if needed). */68Dbl_xortointp1(leftp1,rightp1,/*to*/save);6970/*71* check first operand for NaN's or infinity72*/73if ((result_exponent = Dbl_exponent(leftp1)) == DBL_INFINITY_EXPONENT)74{75if (Dbl_iszero_mantissa(leftp1,leftp2))76{77if (Dbl_isnotnan(rightp1,rightp2))78{79if (Dbl_isinfinity(rightp1,rightp2) && save==0)80{81/*82* invalid since operands are same signed infinity's83*/84if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);85Set_invalidflag();86Dbl_makequietnan(resultp1,resultp2);87Dbl_copytoptr(resultp1,resultp2,dstptr);88return(NOEXCEPTION);89}90/*91* return infinity92*/93Dbl_copytoptr(leftp1,leftp2,dstptr);94return(NOEXCEPTION);95}96}97else98{99/*100* is NaN; signaling or quiet?101*/102if (Dbl_isone_signaling(leftp1))103{104/* trap if INVALIDTRAP enabled */105if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);106/* make NaN quiet */107Set_invalidflag();108Dbl_set_quiet(leftp1);109}110/*111* is second operand a signaling NaN?112*/113else if (Dbl_is_signalingnan(rightp1))114{115/* trap if INVALIDTRAP enabled */116if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);117/* make NaN quiet */118Set_invalidflag();119Dbl_set_quiet(rightp1);120Dbl_copytoptr(rightp1,rightp2,dstptr);121return(NOEXCEPTION);122}123/*124* return quiet NaN125*/126Dbl_copytoptr(leftp1,leftp2,dstptr);127return(NOEXCEPTION);128}129} /* End left NaN or Infinity processing */130/*131* check second operand for NaN's or infinity132*/133if (Dbl_isinfinity_exponent(rightp1))134{135if (Dbl_iszero_mantissa(rightp1,rightp2))136{137/* return infinity */138Dbl_invert_sign(rightp1);139Dbl_copytoptr(rightp1,rightp2,dstptr);140return(NOEXCEPTION);141}142/*143* is NaN; signaling or quiet?144*/145if (Dbl_isone_signaling(rightp1))146{147/* trap if INVALIDTRAP enabled */148if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);149/* make NaN quiet */150Set_invalidflag();151Dbl_set_quiet(rightp1);152}153/*154* return quiet NaN155*/156Dbl_copytoptr(rightp1,rightp2,dstptr);157return(NOEXCEPTION);158} /* End right NaN or Infinity processing */159160/* Invariant: Must be dealing with finite numbers */161162/* Compare operands by removing the sign */163Dbl_copytoint_exponentmantissap1(leftp1,signless_upper_left);164Dbl_copytoint_exponentmantissap1(rightp1,signless_upper_right);165166/* sign difference selects add or sub operation. */167if(Dbl_ismagnitudeless(leftp2,rightp2,signless_upper_left,signless_upper_right))168{169/* Set the left operand to the larger one by XOR swap *170* First finish the first word using "save" */171Dbl_xorfromintp1(save,rightp1,/*to*/rightp1);172Dbl_xorfromintp1(save,leftp1,/*to*/leftp1);173Dbl_swap_lower(leftp2,rightp2);174result_exponent = Dbl_exponent(leftp1);175Dbl_invert_sign(leftp1);176}177/* Invariant: left is not smaller than right. */178179if((right_exponent = Dbl_exponent(rightp1)) == 0)180{181/* Denormalized operands. First look for zeroes */182if(Dbl_iszero_mantissa(rightp1,rightp2))183{184/* right is zero */185if(Dbl_iszero_exponentmantissa(leftp1,leftp2))186{187/* Both operands are zeros */188Dbl_invert_sign(rightp1);189if(Is_rounding_mode(ROUNDMINUS))190{191Dbl_or_signs(leftp1,/*with*/rightp1);192}193else194{195Dbl_and_signs(leftp1,/*with*/rightp1);196}197}198else199{200/* Left is not a zero and must be the result. Trapped201* underflows are signaled if left is denormalized. Result202* is always exact. */203if( (result_exponent == 0) && Is_underflowtrap_enabled() )204{205/* need to normalize results mantissa */206sign_save = Dbl_signextendedsign(leftp1);207Dbl_leftshiftby1(leftp1,leftp2);208Dbl_normalize(leftp1,leftp2,result_exponent);209Dbl_set_sign(leftp1,/*using*/sign_save);210Dbl_setwrapped_exponent(leftp1,result_exponent,unfl);211Dbl_copytoptr(leftp1,leftp2,dstptr);212/* inexact = FALSE */213return(UNDERFLOWEXCEPTION);214}215}216Dbl_copytoptr(leftp1,leftp2,dstptr);217return(NOEXCEPTION);218}219220/* Neither are zeroes */221Dbl_clear_sign(rightp1); /* Exponent is already cleared */222if(result_exponent == 0 )223{224/* Both operands are denormalized. The result must be exact225* and is simply calculated. A sum could become normalized and a226* difference could cancel to a true zero. */227if( (/*signed*/int) save >= 0 )228{229Dbl_subtract(leftp1,leftp2,/*minus*/rightp1,rightp2,230/*into*/resultp1,resultp2);231if(Dbl_iszero_mantissa(resultp1,resultp2))232{233if(Is_rounding_mode(ROUNDMINUS))234{235Dbl_setone_sign(resultp1);236}237else238{239Dbl_setzero_sign(resultp1);240}241Dbl_copytoptr(resultp1,resultp2,dstptr);242return(NOEXCEPTION);243}244}245else246{247Dbl_addition(leftp1,leftp2,rightp1,rightp2,248/*into*/resultp1,resultp2);249if(Dbl_isone_hidden(resultp1))250{251Dbl_copytoptr(resultp1,resultp2,dstptr);252return(NOEXCEPTION);253}254}255if(Is_underflowtrap_enabled())256{257/* need to normalize result */258sign_save = Dbl_signextendedsign(resultp1);259Dbl_leftshiftby1(resultp1,resultp2);260Dbl_normalize(resultp1,resultp2,result_exponent);261Dbl_set_sign(resultp1,/*using*/sign_save);262Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);263Dbl_copytoptr(resultp1,resultp2,dstptr);264/* inexact = FALSE */265return(UNDERFLOWEXCEPTION);266}267Dbl_copytoptr(resultp1,resultp2,dstptr);268return(NOEXCEPTION);269}270right_exponent = 1; /* Set exponent to reflect different bias271* with denomalized numbers. */272}273else274{275Dbl_clear_signexponent_set_hidden(rightp1);276}277Dbl_clear_exponent_set_hidden(leftp1);278diff_exponent = result_exponent - right_exponent;279280/*281* Special case alignment of operands that would force alignment282* beyond the extent of the extension. A further optimization283* could special case this but only reduces the path length for this284* infrequent case.285*/286if(diff_exponent > DBL_THRESHOLD)287{288diff_exponent = DBL_THRESHOLD;289}290291/* Align right operand by shifting to right */292Dbl_right_align(/*operand*/rightp1,rightp2,/*shifted by*/diff_exponent,293/*and lower to*/extent);294295/* Treat sum and difference of the operands separately. */296if( (/*signed*/int) save >= 0 )297{298/*299* Difference of the two operands. Their can be no overflow. A300* borrow can occur out of the hidden bit and force a post301* normalization phase.302*/303Dbl_subtract_withextension(leftp1,leftp2,/*minus*/rightp1,rightp2,304/*with*/extent,/*into*/resultp1,resultp2);305if(Dbl_iszero_hidden(resultp1))306{307/* Handle normalization */308/* A straight forward algorithm would now shift the result309* and extension left until the hidden bit becomes one. Not310* all of the extension bits need participate in the shift.311* Only the two most significant bits (round and guard) are312* needed. If only a single shift is needed then the guard313* bit becomes a significant low order bit and the extension314* must participate in the rounding. If more than a single315* shift is needed, then all bits to the right of the guard316* bit are zeros, and the guard bit may or may not be zero. */317sign_save = Dbl_signextendedsign(resultp1);318Dbl_leftshiftby1_withextent(resultp1,resultp2,extent,resultp1,resultp2);319320/* Need to check for a zero result. The sign and exponent321* fields have already been zeroed. The more efficient test322* of the full object can be used.323*/324if(Dbl_iszero(resultp1,resultp2))325/* Must have been "x-x" or "x+(-x)". */326{327if(Is_rounding_mode(ROUNDMINUS)) Dbl_setone_sign(resultp1);328Dbl_copytoptr(resultp1,resultp2,dstptr);329return(NOEXCEPTION);330}331result_exponent--;332/* Look to see if normalization is finished. */333if(Dbl_isone_hidden(resultp1))334{335if(result_exponent==0)336{337/* Denormalized, exponent should be zero. Left operand *338* was normalized, so extent (guard, round) was zero */339goto underflow;340}341else342{343/* No further normalization is needed. */344Dbl_set_sign(resultp1,/*using*/sign_save);345Ext_leftshiftby1(extent);346goto round;347}348}349350/* Check for denormalized, exponent should be zero. Left *351* operand was normalized, so extent (guard, round) was zero */352if(!(underflowtrap = Is_underflowtrap_enabled()) &&353result_exponent==0) goto underflow;354355/* Shift extension to complete one bit of normalization and356* update exponent. */357Ext_leftshiftby1(extent);358359/* Discover first one bit to determine shift amount. Use a360* modified binary search. We have already shifted the result361* one position right and still not found a one so the remainder362* of the extension must be zero and simplifies rounding. */363/* Scan bytes */364while(Dbl_iszero_hiddenhigh7mantissa(resultp1))365{366Dbl_leftshiftby8(resultp1,resultp2);367if((result_exponent -= 8) <= 0 && !underflowtrap)368goto underflow;369}370/* Now narrow it down to the nibble */371if(Dbl_iszero_hiddenhigh3mantissa(resultp1))372{373/* The lower nibble contains the normalizing one */374Dbl_leftshiftby4(resultp1,resultp2);375if((result_exponent -= 4) <= 0 && !underflowtrap)376goto underflow;377}378/* Select case were first bit is set (already normalized)379* otherwise select the proper shift. */380if((jumpsize = Dbl_hiddenhigh3mantissa(resultp1)) > 7)381{382/* Already normalized */383if(result_exponent <= 0) goto underflow;384Dbl_set_sign(resultp1,/*using*/sign_save);385Dbl_set_exponent(resultp1,/*using*/result_exponent);386Dbl_copytoptr(resultp1,resultp2,dstptr);387return(NOEXCEPTION);388}389Dbl_sethigh4bits(resultp1,/*using*/sign_save);390switch(jumpsize)391{392case 1:393{394Dbl_leftshiftby3(resultp1,resultp2);395result_exponent -= 3;396break;397}398case 2:399case 3:400{401Dbl_leftshiftby2(resultp1,resultp2);402result_exponent -= 2;403break;404}405case 4:406case 5:407case 6:408case 7:409{410Dbl_leftshiftby1(resultp1,resultp2);411result_exponent -= 1;412break;413}414}415if(result_exponent > 0)416{417Dbl_set_exponent(resultp1,/*using*/result_exponent);418Dbl_copytoptr(resultp1,resultp2,dstptr);419return(NOEXCEPTION); /* Sign bit is already set */420}421/* Fixup potential underflows */422underflow:423if(Is_underflowtrap_enabled())424{425Dbl_set_sign(resultp1,sign_save);426Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);427Dbl_copytoptr(resultp1,resultp2,dstptr);428/* inexact = FALSE */429return(UNDERFLOWEXCEPTION);430}431/*432* Since we cannot get an inexact denormalized result,433* we can now return.434*/435Dbl_fix_overshift(resultp1,resultp2,(1-result_exponent),extent);436Dbl_clear_signexponent(resultp1);437Dbl_set_sign(resultp1,sign_save);438Dbl_copytoptr(resultp1,resultp2,dstptr);439return(NOEXCEPTION);440} /* end if(hidden...)... */441/* Fall through and round */442} /* end if(save >= 0)... */443else444{445/* Subtract magnitudes */446Dbl_addition(leftp1,leftp2,rightp1,rightp2,/*to*/resultp1,resultp2);447if(Dbl_isone_hiddenoverflow(resultp1))448{449/* Prenormalization required. */450Dbl_rightshiftby1_withextent(resultp2,extent,extent);451Dbl_arithrightshiftby1(resultp1,resultp2);452result_exponent++;453} /* end if hiddenoverflow... */454} /* end else ...subtract magnitudes... */455456/* Round the result. If the extension is all zeros,then the result is457* exact. Otherwise round in the correct direction. No underflow is458* possible. If a postnormalization is necessary, then the mantissa is459* all zeros so no shift is needed. */460round:461if(Ext_isnotzero(extent))462{463inexact = TRUE;464switch(Rounding_mode())465{466case ROUNDNEAREST: /* The default. */467if(Ext_isone_sign(extent))468{469/* at least 1/2 ulp */470if(Ext_isnotzero_lower(extent) ||471Dbl_isone_lowmantissap2(resultp2))472{473/* either exactly half way and odd or more than 1/2ulp */474Dbl_increment(resultp1,resultp2);475}476}477break;478479case ROUNDPLUS:480if(Dbl_iszero_sign(resultp1))481{482/* Round up positive results */483Dbl_increment(resultp1,resultp2);484}485break;486487case ROUNDMINUS:488if(Dbl_isone_sign(resultp1))489{490/* Round down negative results */491Dbl_increment(resultp1,resultp2);492}493494case ROUNDZERO:;495/* truncate is simple */496} /* end switch... */497if(Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;498}499if(result_exponent == DBL_INFINITY_EXPONENT)500{501/* Overflow */502if(Is_overflowtrap_enabled())503{504Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);505Dbl_copytoptr(resultp1,resultp2,dstptr);506if (inexact)507if (Is_inexacttrap_enabled())508return(OVERFLOWEXCEPTION | INEXACTEXCEPTION);509else Set_inexactflag();510return(OVERFLOWEXCEPTION);511}512else513{514inexact = TRUE;515Set_overflowflag();516Dbl_setoverflow(resultp1,resultp2);517}518}519else Dbl_set_exponent(resultp1,result_exponent);520Dbl_copytoptr(resultp1,resultp2,dstptr);521if(inexact)522if(Is_inexacttrap_enabled()) return(INEXACTEXCEPTION);523else Set_inexactflag();524return(NOEXCEPTION);525}526527528