.file "reg_round.S"1/*---------------------------------------------------------------------------+2| reg_round.S |3| |4| Rounding/truncation/etc for FPU basic arithmetic functions. |5| |6| Copyright (C) 1993,1995,1997 |7| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |8| Australia. E-mail [email protected] |9| |10| This code has four possible entry points. |11| The following must be entered by a jmp instruction: |12| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |13| |14| The FPU_round entry point is intended to be used by C code. |15| From C, call as: |16| int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |17| |18| Return value is the tag of the answer, or-ed with FPU_Exception if |19| one was raised, or -1 on internal error. |20| |21| For correct "up" and "down" rounding, the argument must have the correct |22| sign. |23| |24+---------------------------------------------------------------------------*/2526/*---------------------------------------------------------------------------+27| Four entry points. |28| |29| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |30| %eax:%ebx 64 bit significand |31| %edx 32 bit extension of the significand |32| %edi pointer to an FPU_REG for the result to be stored |33| stack calling function must have set up a C stack frame and |34| pushed %esi, %edi, and %ebx |35| |36| Needed just for the fpu_reg_round_sqrt entry point: |37| %cx A control word in the same format as the FPU control word. |38| Otherwise, PARAM4 must give such a value. |39| |40| |41| The significand and its extension are assumed to be exact in the |42| following sense: |43| If the significand by itself is the exact result then the significand |44| extension (%edx) must contain 0, otherwise the significand extension |45| must be non-zero. |46| If the significand extension is non-zero then the significand is |47| smaller than the magnitude of the correct exact result by an amount |48| greater than zero and less than one ls bit of the significand. |49| The significand extension is only required to have three possible |50| non-zero values: |51| less than 0x80000000 <=> the significand is less than 1/2 an ls |52| bit smaller than the magnitude of the |53| true exact result. |54| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |55| smaller than the magnitude of the true |56| exact result. |57| greater than 0x80000000 <=> the significand is more than 1/2 an ls |58| bit smaller than the magnitude of the |59| true exact result. |60| |61+---------------------------------------------------------------------------*/6263/*---------------------------------------------------------------------------+64| The code in this module has become quite complex, but it should handle |65| all of the FPU flags which are set at this stage of the basic arithmetic |66| computations. |67| There are a few rare cases where the results are not set identically to |68| a real FPU. These require a bit more thought because at this stage the |69| results of the code here appear to be more consistent... |70| This may be changed in a future version. |71+---------------------------------------------------------------------------*/727374#include "fpu_emu.h"75#include "exception.h"76#include "control_w.h"7778/* Flags for FPU_bits_lost */79#define LOST_DOWN $180#define LOST_UP $28182/* Flags for FPU_denormal */83#define DENORMAL $184#define UNMASKED_UNDERFLOW $2858687#ifndef NON_REENTRANT_FPU88/* Make the code re-entrant by putting89local storage on the stack: */90#define FPU_bits_lost (%esp)91#define FPU_denormal 1(%esp)9293#else94/* Not re-entrant, so we can gain speed by putting95local storage in a static area: */96.data97.align 4,098FPU_bits_lost:99.byte 0100FPU_denormal:101.byte 0102#endif /* NON_REENTRANT_FPU */103104105.text106.globl fpu_reg_round107.globl fpu_Arith_exit108109/* Entry point when called from C */110ENTRY(FPU_round)111pushl %ebp112movl %esp,%ebp113pushl %esi114pushl %edi115pushl %ebx116117movl PARAM1,%edi118movl SIGH(%edi),%eax119movl SIGL(%edi),%ebx120movl PARAM2,%edx121122fpu_reg_round: /* Normal entry point */123movl PARAM4,%ecx124125#ifndef NON_REENTRANT_FPU126pushl %ebx /* adjust the stack pointer */127#endif /* NON_REENTRANT_FPU */128129#ifdef PARANOID130/* Cannot use this here yet */131/* orl %eax,%eax */132/* jns L_entry_bugged */133#endif /* PARANOID */134135cmpw EXP_UNDER,EXP(%edi)136jle L_Make_denorm /* The number is a de-normal */137138movb $0,FPU_denormal /* 0 -> not a de-normal */139140Denorm_done:141movb $0,FPU_bits_lost /* No bits yet lost in rounding */142143movl %ecx,%esi144andl CW_PC,%ecx145cmpl PR_64_BITS,%ecx146je LRound_To_64147148cmpl PR_53_BITS,%ecx149je LRound_To_53150151cmpl PR_24_BITS,%ecx152je LRound_To_24153154#ifdef PECULIAR_486155/* With the precision control bits set to 01 "(reserved)", a real 80486156behaves as if the precision control bits were set to 11 "64 bits" */157cmpl PR_RESERVED_BITS,%ecx158je LRound_To_64159#ifdef PARANOID160jmp L_bugged_denorm_486161#endif /* PARANOID */162#else163#ifdef PARANOID164jmp L_bugged_denorm /* There is no bug, just a bad control word */165#endif /* PARANOID */166#endif /* PECULIAR_486 */167168169/* Round etc to 24 bit precision */170LRound_To_24:171movl %esi,%ecx172andl CW_RC,%ecx173cmpl RC_RND,%ecx174je LRound_nearest_24175176cmpl RC_CHOP,%ecx177je LCheck_truncate_24178179cmpl RC_UP,%ecx /* Towards +infinity */180je LUp_24181182cmpl RC_DOWN,%ecx /* Towards -infinity */183je LDown_24184185#ifdef PARANOID186jmp L_bugged_round24187#endif /* PARANOID */188189LUp_24:190cmpb SIGN_POS,PARAM5191jne LCheck_truncate_24 /* If negative then up==truncate */192193jmp LCheck_24_round_up194195LDown_24:196cmpb SIGN_POS,PARAM5197je LCheck_truncate_24 /* If positive then down==truncate */198199LCheck_24_round_up:200movl %eax,%ecx201andl $0x000000ff,%ecx202orl %ebx,%ecx203orl %edx,%ecx204jnz LDo_24_round_up205jmp L_Re_normalise206207LRound_nearest_24:208/* Do rounding of the 24th bit if needed (nearest or even) */209movl %eax,%ecx210andl $0x000000ff,%ecx211cmpl $0x00000080,%ecx212jc LCheck_truncate_24 /* less than half, no increment needed */213214jne LGreater_Half_24 /* greater than half, increment needed */215216/* Possibly half, we need to check the ls bits */217orl %ebx,%ebx218jnz LGreater_Half_24 /* greater than half, increment needed */219220orl %edx,%edx221jnz LGreater_Half_24 /* greater than half, increment needed */222223/* Exactly half, increment only if 24th bit is 1 (round to even) */224testl $0x00000100,%eax225jz LDo_truncate_24226227LGreater_Half_24: /* Rounding: increment at the 24th bit */228LDo_24_round_up:229andl $0xffffff00,%eax /* Truncate to 24 bits */230xorl %ebx,%ebx231movb LOST_UP,FPU_bits_lost232addl $0x00000100,%eax233jmp LCheck_Round_Overflow234235LCheck_truncate_24:236movl %eax,%ecx237andl $0x000000ff,%ecx238orl %ebx,%ecx239orl %edx,%ecx240jz L_Re_normalise /* No truncation needed */241242LDo_truncate_24:243andl $0xffffff00,%eax /* Truncate to 24 bits */244xorl %ebx,%ebx245movb LOST_DOWN,FPU_bits_lost246jmp L_Re_normalise247248249/* Round etc to 53 bit precision */250LRound_To_53:251movl %esi,%ecx252andl CW_RC,%ecx253cmpl RC_RND,%ecx254je LRound_nearest_53255256cmpl RC_CHOP,%ecx257je LCheck_truncate_53258259cmpl RC_UP,%ecx /* Towards +infinity */260je LUp_53261262cmpl RC_DOWN,%ecx /* Towards -infinity */263je LDown_53264265#ifdef PARANOID266jmp L_bugged_round53267#endif /* PARANOID */268269LUp_53:270cmpb SIGN_POS,PARAM5271jne LCheck_truncate_53 /* If negative then up==truncate */272273jmp LCheck_53_round_up274275LDown_53:276cmpb SIGN_POS,PARAM5277je LCheck_truncate_53 /* If positive then down==truncate */278279LCheck_53_round_up:280movl %ebx,%ecx281andl $0x000007ff,%ecx282orl %edx,%ecx283jnz LDo_53_round_up284jmp L_Re_normalise285286LRound_nearest_53:287/* Do rounding of the 53rd bit if needed (nearest or even) */288movl %ebx,%ecx289andl $0x000007ff,%ecx290cmpl $0x00000400,%ecx291jc LCheck_truncate_53 /* less than half, no increment needed */292293jnz LGreater_Half_53 /* greater than half, increment needed */294295/* Possibly half, we need to check the ls bits */296orl %edx,%edx297jnz LGreater_Half_53 /* greater than half, increment needed */298299/* Exactly half, increment only if 53rd bit is 1 (round to even) */300testl $0x00000800,%ebx301jz LTruncate_53302303LGreater_Half_53: /* Rounding: increment at the 53rd bit */304LDo_53_round_up:305movb LOST_UP,FPU_bits_lost306andl $0xfffff800,%ebx /* Truncate to 53 bits */307addl $0x00000800,%ebx308adcl $0,%eax309jmp LCheck_Round_Overflow310311LCheck_truncate_53:312movl %ebx,%ecx313andl $0x000007ff,%ecx314orl %edx,%ecx315jz L_Re_normalise316317LTruncate_53:318movb LOST_DOWN,FPU_bits_lost319andl $0xfffff800,%ebx /* Truncate to 53 bits */320jmp L_Re_normalise321322323/* Round etc to 64 bit precision */324LRound_To_64:325movl %esi,%ecx326andl CW_RC,%ecx327cmpl RC_RND,%ecx328je LRound_nearest_64329330cmpl RC_CHOP,%ecx331je LCheck_truncate_64332333cmpl RC_UP,%ecx /* Towards +infinity */334je LUp_64335336cmpl RC_DOWN,%ecx /* Towards -infinity */337je LDown_64338339#ifdef PARANOID340jmp L_bugged_round64341#endif /* PARANOID */342343LUp_64:344cmpb SIGN_POS,PARAM5345jne LCheck_truncate_64 /* If negative then up==truncate */346347orl %edx,%edx348jnz LDo_64_round_up349jmp L_Re_normalise350351LDown_64:352cmpb SIGN_POS,PARAM5353je LCheck_truncate_64 /* If positive then down==truncate */354355orl %edx,%edx356jnz LDo_64_round_up357jmp L_Re_normalise358359LRound_nearest_64:360cmpl $0x80000000,%edx361jc LCheck_truncate_64362363jne LDo_64_round_up364365/* Now test for round-to-even */366testb $1,%bl367jz LCheck_truncate_64368369LDo_64_round_up:370movb LOST_UP,FPU_bits_lost371addl $1,%ebx372adcl $0,%eax373374LCheck_Round_Overflow:375jnc L_Re_normalise376377/* Overflow, adjust the result (significand to 1.0) */378rcrl $1,%eax379rcrl $1,%ebx380incw EXP(%edi)381jmp L_Re_normalise382383LCheck_truncate_64:384orl %edx,%edx385jz L_Re_normalise386387LTruncate_64:388movb LOST_DOWN,FPU_bits_lost389390L_Re_normalise:391testb $0xff,FPU_denormal392jnz Normalise_result393394L_Normalised:395movl TAG_Valid,%edx396397L_deNormalised:398cmpb LOST_UP,FPU_bits_lost399je L_precision_lost_up400401cmpb LOST_DOWN,FPU_bits_lost402je L_precision_lost_down403404L_no_precision_loss:405/* store the result */406407L_Store_significand:408movl %eax,SIGH(%edi)409movl %ebx,SIGL(%edi)410411cmpw EXP_OVER,EXP(%edi)412jge L_overflow413414movl %edx,%eax415416/* Convert the exponent to 80x87 form. */417addw EXTENDED_Ebias,EXP(%edi)418andw $0x7fff,EXP(%edi)419420fpu_reg_round_signed_special_exit:421422cmpb SIGN_POS,PARAM5423je fpu_reg_round_special_exit424425orw $0x8000,EXP(%edi) /* Negative sign for the result. */426427fpu_reg_round_special_exit:428429#ifndef NON_REENTRANT_FPU430popl %ebx /* adjust the stack pointer */431#endif /* NON_REENTRANT_FPU */432433fpu_Arith_exit:434popl %ebx435popl %edi436popl %esi437leave438ret439440441/*442* Set the FPU status flags to represent precision loss due to443* round-up.444*/445L_precision_lost_up:446push %edx447push %eax448call set_precision_flag_up449popl %eax450popl %edx451jmp L_no_precision_loss452453/*454* Set the FPU status flags to represent precision loss due to455* truncation.456*/457L_precision_lost_down:458push %edx459push %eax460call set_precision_flag_down461popl %eax462popl %edx463jmp L_no_precision_loss464465466/*467* The number is a denormal (which might get rounded up to a normal)468* Shift the number right the required number of bits, which will469* have to be undone later...470*/471L_Make_denorm:472/* The action to be taken depends upon whether the underflow473exception is masked */474testb CW_Underflow,%cl /* Underflow mask. */475jz Unmasked_underflow /* Do not make a denormal. */476477movb DENORMAL,FPU_denormal478479pushl %ecx /* Save */480movw EXP_UNDER+1,%cx481subw EXP(%edi),%cx482483cmpw $64,%cx /* shrd only works for 0..31 bits */484jnc Denorm_shift_more_than_63485486cmpw $32,%cx /* shrd only works for 0..31 bits */487jnc Denorm_shift_more_than_32488489/*490* We got here without jumps by assuming that the most common requirement491* is for a small de-normalising shift.492* Shift by [1..31] bits493*/494addw %cx,EXP(%edi)495orl %edx,%edx /* extension */496setne %ch /* Save whether %edx is non-zero */497xorl %edx,%edx498shrd %cl,%ebx,%edx499shrd %cl,%eax,%ebx500shr %cl,%eax501orb %ch,%dl502popl %ecx503jmp Denorm_done504505/* Shift by [32..63] bits */506Denorm_shift_more_than_32:507addw %cx,EXP(%edi)508subb $32,%cl509orl %edx,%edx510setne %ch511orb %ch,%bl512xorl %edx,%edx513shrd %cl,%ebx,%edx514shrd %cl,%eax,%ebx515shr %cl,%eax516orl %edx,%edx /* test these 32 bits */517setne %cl518orb %ch,%bl519orb %cl,%bl520movl %ebx,%edx521movl %eax,%ebx522xorl %eax,%eax523popl %ecx524jmp Denorm_done525526/* Shift by [64..) bits */527Denorm_shift_more_than_63:528cmpw $64,%cx529jne Denorm_shift_more_than_64530531/* Exactly 64 bit shift */532addw %cx,EXP(%edi)533xorl %ecx,%ecx534orl %edx,%edx535setne %cl536orl %ebx,%ebx537setne %ch538orb %ch,%cl539orb %cl,%al540movl %eax,%edx541xorl %eax,%eax542xorl %ebx,%ebx543popl %ecx544jmp Denorm_done545546Denorm_shift_more_than_64:547movw EXP_UNDER+1,EXP(%edi)548/* This is easy, %eax must be non-zero, so.. */549movl $1,%edx550xorl %eax,%eax551xorl %ebx,%ebx552popl %ecx553jmp Denorm_done554555556Unmasked_underflow:557movb UNMASKED_UNDERFLOW,FPU_denormal558jmp Denorm_done559560561/* Undo the de-normalisation. */562Normalise_result:563cmpb UNMASKED_UNDERFLOW,FPU_denormal564je Signal_underflow565566/* The number must be a denormal if we got here. */567#ifdef PARANOID568/* But check it... just in case. */569cmpw EXP_UNDER+1,EXP(%edi)570jne L_norm_bugged571#endif /* PARANOID */572573#ifdef PECULIAR_486574/*575* This implements a special feature of 80486 behaviour.576* Underflow will be signalled even if the number is577* not a denormal after rounding.578* This difference occurs only for masked underflow, and not579* in the unmasked case.580* Actual 80486 behaviour differs from this in some circumstances.581*/582orl %eax,%eax /* ms bits */583js LPseudoDenormal /* Will be masked underflow */584#else585orl %eax,%eax /* ms bits */586js L_Normalised /* No longer a denormal */587#endif /* PECULIAR_486 */588589jnz LDenormal_adj_exponent590591orl %ebx,%ebx592jz L_underflow_to_zero /* The contents are zero */593594LDenormal_adj_exponent:595decw EXP(%edi)596597LPseudoDenormal:598testb $0xff,FPU_bits_lost /* bits lost == underflow */599movl TAG_Special,%edx600jz L_deNormalised601602/* There must be a masked underflow */603push %eax604pushl EX_Underflow605call EXCEPTION606popl %eax607popl %eax608movl TAG_Special,%edx609jmp L_deNormalised610611612/*613* The operations resulted in a number too small to represent.614* Masked response.615*/616L_underflow_to_zero:617push %eax618call set_precision_flag_down619popl %eax620621push %eax622pushl EX_Underflow623call EXCEPTION624popl %eax625popl %eax626627/* Reduce the exponent to EXP_UNDER */628movw EXP_UNDER,EXP(%edi)629movl TAG_Zero,%edx630jmp L_Store_significand631632633/* The operations resulted in a number too large to represent. */634L_overflow:635addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */636push %edi637call arith_overflow638pop %edi639jmp fpu_reg_round_signed_special_exit640641642Signal_underflow:643/* The number may have been changed to a non-denormal */644/* by the rounding operations. */645cmpw EXP_UNDER,EXP(%edi)646jle Do_unmasked_underflow647648jmp L_Normalised649650Do_unmasked_underflow:651/* Increase the exponent by the magic number */652addw $(3*(1<<13)),EXP(%edi)653push %eax654pushl EX_Underflow655call EXCEPTION656popl %eax657popl %eax658jmp L_Normalised659660661#ifdef PARANOID662#ifdef PECULIAR_486663L_bugged_denorm_486:664pushl EX_INTERNAL|0x236665call EXCEPTION666popl %ebx667jmp L_exception_exit668#else669L_bugged_denorm:670pushl EX_INTERNAL|0x230671call EXCEPTION672popl %ebx673jmp L_exception_exit674#endif /* PECULIAR_486 */675676L_bugged_round24:677pushl EX_INTERNAL|0x231678call EXCEPTION679popl %ebx680jmp L_exception_exit681682L_bugged_round53:683pushl EX_INTERNAL|0x232684call EXCEPTION685popl %ebx686jmp L_exception_exit687688L_bugged_round64:689pushl EX_INTERNAL|0x233690call EXCEPTION691popl %ebx692jmp L_exception_exit693694L_norm_bugged:695pushl EX_INTERNAL|0x234696call EXCEPTION697popl %ebx698jmp L_exception_exit699700L_entry_bugged:701pushl EX_INTERNAL|0x235702call EXCEPTION703popl %ebx704L_exception_exit:705mov $-1,%eax706jmp fpu_reg_round_special_exit707#endif /* PARANOID */708709710