/* SPDX-License-Identifier: GPL-2.0 */1.file "reg_round.S"2/*---------------------------------------------------------------------------+3| reg_round.S |4| |5| Rounding/truncation/etc for FPU basic arithmetic functions. |6| |7| Copyright (C) 1993,1995,1997 |8| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |9| Australia. E-mail [email protected] |10| |11| This code has four possible entry points. |12| The following must be entered by a jmp instruction: |13| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |14| |15| The FPU_round entry point is intended to be used by C code. |16| From C, call as: |17| int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |18| |19| Return value is the tag of the answer, or-ed with FPU_Exception if |20| one was raised, or -1 on internal error. |21| |22| For correct "up" and "down" rounding, the argument must have the correct |23| sign. |24| |25+---------------------------------------------------------------------------*/2627/*---------------------------------------------------------------------------+28| Four entry points. |29| |30| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |31| %eax:%ebx 64 bit significand |32| %edx 32 bit extension of the significand |33| %edi pointer to an FPU_REG for the result to be stored |34| stack calling function must have set up a C stack frame and |35| pushed %esi, %edi, and %ebx |36| |37| Needed just for the fpu_reg_round_sqrt entry point: |38| %cx A control word in the same format as the FPU control word. |39| Otherwise, PARAM4 must give such a value. |40| |41| |42| The significand and its extension are assumed to be exact in the |43| following sense: |44| If the significand by itself is the exact result then the significand |45| extension (%edx) must contain 0, otherwise the significand extension |46| must be non-zero. |47| If the significand extension is non-zero then the significand is |48| smaller than the magnitude of the correct exact result by an amount |49| greater than zero and less than one ls bit of the significand. |50| The significand extension is only required to have three possible |51| non-zero values: |52| less than 0x80000000 <=> the significand is less than 1/2 an ls |53| bit smaller than the magnitude of the |54| true exact result. |55| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |56| smaller than the magnitude of the true |57| exact result. |58| greater than 0x80000000 <=> the significand is more than 1/2 an ls |59| bit smaller than the magnitude of the |60| true exact result. |61| |62+---------------------------------------------------------------------------*/6364/*---------------------------------------------------------------------------+65| The code in this module has become quite complex, but it should handle |66| all of the FPU flags which are set at this stage of the basic arithmetic |67| computations. |68| There are a few rare cases where the results are not set identically to |69| a real FPU. These require a bit more thought because at this stage the |70| results of the code here appear to be more consistent... |71| This may be changed in a future version. |72+---------------------------------------------------------------------------*/737475#include "fpu_emu.h"76#include "exception.h"77#include "control_w.h"7879/* Flags for FPU_bits_lost */80#define LOST_DOWN $181#define LOST_UP $28283/* Flags for FPU_denormal */84#define DENORMAL $185#define UNMASKED_UNDERFLOW $2868788#ifndef NON_REENTRANT_FPU89/* Make the code re-entrant by putting90local storage on the stack: */91#define FPU_bits_lost (%esp)92#define FPU_denormal 1(%esp)9394#else95/* Not re-entrant, so we can gain speed by putting96local storage in a static area: */97.data98.align 4,099FPU_bits_lost:100.byte 0101FPU_denormal:102.byte 0103#endif /* NON_REENTRANT_FPU */104105106.text107.globl fpu_reg_round108.globl fpu_Arith_exit109110/* Entry point when called from C */111SYM_FUNC_START(FPU_round)112pushl %ebp113movl %esp,%ebp114pushl %esi115pushl %edi116pushl %ebx117118movl PARAM1,%edi119movl SIGH(%edi),%eax120movl SIGL(%edi),%ebx121movl PARAM2,%edx122123fpu_reg_round: /* Normal entry point */124movl PARAM4,%ecx125126#ifndef NON_REENTRANT_FPU127pushl %ebx /* adjust the stack pointer */128#endif /* NON_REENTRANT_FPU */129130#ifdef PARANOID131/* Cannot use this here yet */132/* orl %eax,%eax */133/* jns L_entry_bugged */134#endif /* PARANOID */135136cmpw EXP_UNDER,EXP(%edi)137jle L_Make_denorm /* The number is a de-normal */138139movb $0,FPU_denormal /* 0 -> not a de-normal */140141Denorm_done:142movb $0,FPU_bits_lost /* No bits yet lost in rounding */143144movl %ecx,%esi145andl CW_PC,%ecx146cmpl PR_64_BITS,%ecx147je LRound_To_64148149cmpl PR_53_BITS,%ecx150je LRound_To_53151152cmpl PR_24_BITS,%ecx153je LRound_To_24154155#ifdef PECULIAR_486156/* With the precision control bits set to 01 "(reserved)", a real 80486157behaves as if the precision control bits were set to 11 "64 bits" */158cmpl PR_RESERVED_BITS,%ecx159je LRound_To_64160#ifdef PARANOID161jmp L_bugged_denorm_486162#endif /* PARANOID */163#else164#ifdef PARANOID165jmp L_bugged_denorm /* There is no bug, just a bad control word */166#endif /* PARANOID */167#endif /* PECULIAR_486 */168169170/* Round etc to 24 bit precision */171LRound_To_24:172movl %esi,%ecx173andl CW_RC,%ecx174cmpl RC_RND,%ecx175je LRound_nearest_24176177cmpl RC_CHOP,%ecx178je LCheck_truncate_24179180cmpl RC_UP,%ecx /* Towards +infinity */181je LUp_24182183cmpl RC_DOWN,%ecx /* Towards -infinity */184je LDown_24185186#ifdef PARANOID187jmp L_bugged_round24188#endif /* PARANOID */189190LUp_24:191cmpb SIGN_POS,PARAM5192jne LCheck_truncate_24 /* If negative then up==truncate */193194jmp LCheck_24_round_up195196LDown_24:197cmpb SIGN_POS,PARAM5198je LCheck_truncate_24 /* If positive then down==truncate */199200LCheck_24_round_up:201movl %eax,%ecx202andl $0x000000ff,%ecx203orl %ebx,%ecx204orl %edx,%ecx205jnz LDo_24_round_up206jmp L_Re_normalise207208LRound_nearest_24:209/* Do rounding of the 24th bit if needed (nearest or even) */210movl %eax,%ecx211andl $0x000000ff,%ecx212cmpl $0x00000080,%ecx213jc LCheck_truncate_24 /* less than half, no increment needed */214215jne LGreater_Half_24 /* greater than half, increment needed */216217/* Possibly half, we need to check the ls bits */218orl %ebx,%ebx219jnz LGreater_Half_24 /* greater than half, increment needed */220221orl %edx,%edx222jnz LGreater_Half_24 /* greater than half, increment needed */223224/* Exactly half, increment only if 24th bit is 1 (round to even) */225testl $0x00000100,%eax226jz LDo_truncate_24227228LGreater_Half_24: /* Rounding: increment at the 24th bit */229LDo_24_round_up:230andl $0xffffff00,%eax /* Truncate to 24 bits */231xorl %ebx,%ebx232movb LOST_UP,FPU_bits_lost233addl $0x00000100,%eax234jmp LCheck_Round_Overflow235236LCheck_truncate_24:237movl %eax,%ecx238andl $0x000000ff,%ecx239orl %ebx,%ecx240orl %edx,%ecx241jz L_Re_normalise /* No truncation needed */242243LDo_truncate_24:244andl $0xffffff00,%eax /* Truncate to 24 bits */245xorl %ebx,%ebx246movb LOST_DOWN,FPU_bits_lost247jmp L_Re_normalise248249250/* Round etc to 53 bit precision */251LRound_To_53:252movl %esi,%ecx253andl CW_RC,%ecx254cmpl RC_RND,%ecx255je LRound_nearest_53256257cmpl RC_CHOP,%ecx258je LCheck_truncate_53259260cmpl RC_UP,%ecx /* Towards +infinity */261je LUp_53262263cmpl RC_DOWN,%ecx /* Towards -infinity */264je LDown_53265266#ifdef PARANOID267jmp L_bugged_round53268#endif /* PARANOID */269270LUp_53:271cmpb SIGN_POS,PARAM5272jne LCheck_truncate_53 /* If negative then up==truncate */273274jmp LCheck_53_round_up275276LDown_53:277cmpb SIGN_POS,PARAM5278je LCheck_truncate_53 /* If positive then down==truncate */279280LCheck_53_round_up:281movl %ebx,%ecx282andl $0x000007ff,%ecx283orl %edx,%ecx284jnz LDo_53_round_up285jmp L_Re_normalise286287LRound_nearest_53:288/* Do rounding of the 53rd bit if needed (nearest or even) */289movl %ebx,%ecx290andl $0x000007ff,%ecx291cmpl $0x00000400,%ecx292jc LCheck_truncate_53 /* less than half, no increment needed */293294jnz LGreater_Half_53 /* greater than half, increment needed */295296/* Possibly half, we need to check the ls bits */297orl %edx,%edx298jnz LGreater_Half_53 /* greater than half, increment needed */299300/* Exactly half, increment only if 53rd bit is 1 (round to even) */301testl $0x00000800,%ebx302jz LTruncate_53303304LGreater_Half_53: /* Rounding: increment at the 53rd bit */305LDo_53_round_up:306movb LOST_UP,FPU_bits_lost307andl $0xfffff800,%ebx /* Truncate to 53 bits */308addl $0x00000800,%ebx309adcl $0,%eax310jmp LCheck_Round_Overflow311312LCheck_truncate_53:313movl %ebx,%ecx314andl $0x000007ff,%ecx315orl %edx,%ecx316jz L_Re_normalise317318LTruncate_53:319movb LOST_DOWN,FPU_bits_lost320andl $0xfffff800,%ebx /* Truncate to 53 bits */321jmp L_Re_normalise322323324/* Round etc to 64 bit precision */325LRound_To_64:326movl %esi,%ecx327andl CW_RC,%ecx328cmpl RC_RND,%ecx329je LRound_nearest_64330331cmpl RC_CHOP,%ecx332je LCheck_truncate_64333334cmpl RC_UP,%ecx /* Towards +infinity */335je LUp_64336337cmpl RC_DOWN,%ecx /* Towards -infinity */338je LDown_64339340#ifdef PARANOID341jmp L_bugged_round64342#endif /* PARANOID */343344LUp_64:345cmpb SIGN_POS,PARAM5346jne LCheck_truncate_64 /* If negative then up==truncate */347348orl %edx,%edx349jnz LDo_64_round_up350jmp L_Re_normalise351352LDown_64:353cmpb SIGN_POS,PARAM5354je LCheck_truncate_64 /* If positive then down==truncate */355356orl %edx,%edx357jnz LDo_64_round_up358jmp L_Re_normalise359360LRound_nearest_64:361cmpl $0x80000000,%edx362jc LCheck_truncate_64363364jne LDo_64_round_up365366/* Now test for round-to-even */367testb $1,%bl368jz LCheck_truncate_64369370LDo_64_round_up:371movb LOST_UP,FPU_bits_lost372addl $1,%ebx373adcl $0,%eax374375LCheck_Round_Overflow:376jnc L_Re_normalise377378/* Overflow, adjust the result (significand to 1.0) */379rcrl $1,%eax380rcrl $1,%ebx381incw EXP(%edi)382jmp L_Re_normalise383384LCheck_truncate_64:385orl %edx,%edx386jz L_Re_normalise387388LTruncate_64:389movb LOST_DOWN,FPU_bits_lost390391L_Re_normalise:392testb $0xff,FPU_denormal393jnz Normalise_result394395L_Normalised:396movl TAG_Valid,%edx397398L_deNormalised:399cmpb LOST_UP,FPU_bits_lost400je L_precision_lost_up401402cmpb LOST_DOWN,FPU_bits_lost403je L_precision_lost_down404405L_no_precision_loss:406/* store the result */407408L_Store_significand:409movl %eax,SIGH(%edi)410movl %ebx,SIGL(%edi)411412cmpw EXP_OVER,EXP(%edi)413jge L_overflow414415movl %edx,%eax416417/* Convert the exponent to 80x87 form. */418addw EXTENDED_Ebias,EXP(%edi)419andw $0x7fff,EXP(%edi)420421fpu_reg_round_signed_special_exit:422423cmpb SIGN_POS,PARAM5424je fpu_reg_round_special_exit425426orw $0x8000,EXP(%edi) /* Negative sign for the result. */427428fpu_reg_round_special_exit:429430#ifndef NON_REENTRANT_FPU431popl %ebx /* adjust the stack pointer */432#endif /* NON_REENTRANT_FPU */433434fpu_Arith_exit:435popl %ebx436popl %edi437popl %esi438leave439RET440441442/*443* Set the FPU status flags to represent precision loss due to444* round-up.445*/446L_precision_lost_up:447push %edx448push %eax449call set_precision_flag_up450popl %eax451popl %edx452jmp L_no_precision_loss453454/*455* Set the FPU status flags to represent precision loss due to456* truncation.457*/458L_precision_lost_down:459push %edx460push %eax461call set_precision_flag_down462popl %eax463popl %edx464jmp L_no_precision_loss465466467/*468* The number is a denormal (which might get rounded up to a normal)469* Shift the number right the required number of bits, which will470* have to be undone later...471*/472L_Make_denorm:473/* The action to be taken depends upon whether the underflow474exception is masked */475testb CW_Underflow,%cl /* Underflow mask. */476jz Unmasked_underflow /* Do not make a denormal. */477478movb DENORMAL,FPU_denormal479480pushl %ecx /* Save */481movw EXP_UNDER+1,%cx482subw EXP(%edi),%cx483484cmpw $64,%cx /* shrd only works for 0..31 bits */485jnc Denorm_shift_more_than_63486487cmpw $32,%cx /* shrd only works for 0..31 bits */488jnc Denorm_shift_more_than_32489490/*491* We got here without jumps by assuming that the most common requirement492* is for a small de-normalising shift.493* Shift by [1..31] bits494*/495addw %cx,EXP(%edi)496orl %edx,%edx /* extension */497setne %ch /* Save whether %edx is non-zero */498xorl %edx,%edx499shrd %cl,%ebx,%edx500shrd %cl,%eax,%ebx501shr %cl,%eax502orb %ch,%dl503popl %ecx504jmp Denorm_done505506/* Shift by [32..63] bits */507Denorm_shift_more_than_32:508addw %cx,EXP(%edi)509subb $32,%cl510orl %edx,%edx511setne %ch512orb %ch,%bl513xorl %edx,%edx514shrd %cl,%ebx,%edx515shrd %cl,%eax,%ebx516shr %cl,%eax517orl %edx,%edx /* test these 32 bits */518setne %cl519orb %ch,%bl520orb %cl,%bl521movl %ebx,%edx522movl %eax,%ebx523xorl %eax,%eax524popl %ecx525jmp Denorm_done526527/* Shift by [64..) bits */528Denorm_shift_more_than_63:529cmpw $64,%cx530jne Denorm_shift_more_than_64531532/* Exactly 64 bit shift */533addw %cx,EXP(%edi)534xorl %ecx,%ecx535orl %edx,%edx536setne %cl537orl %ebx,%ebx538setne %ch539orb %ch,%cl540orb %cl,%al541movl %eax,%edx542xorl %eax,%eax543xorl %ebx,%ebx544popl %ecx545jmp Denorm_done546547Denorm_shift_more_than_64:548movw EXP_UNDER+1,EXP(%edi)549/* This is easy, %eax must be non-zero, so.. */550movl $1,%edx551xorl %eax,%eax552xorl %ebx,%ebx553popl %ecx554jmp Denorm_done555556557Unmasked_underflow:558movb UNMASKED_UNDERFLOW,FPU_denormal559jmp Denorm_done560561562/* Undo the de-normalisation. */563Normalise_result:564cmpb UNMASKED_UNDERFLOW,FPU_denormal565je Signal_underflow566567/* The number must be a denormal if we got here. */568#ifdef PARANOID569/* But check it... just in case. */570cmpw EXP_UNDER+1,EXP(%edi)571jne L_norm_bugged572#endif /* PARANOID */573574#ifdef PECULIAR_486575/*576* This implements a special feature of 80486 behaviour.577* Underflow will be signaled even if the number is578* not a denormal after rounding.579* This difference occurs only for masked underflow, and not580* in the unmasked case.581* Actual 80486 behaviour differs from this in some circumstances.582*/583orl %eax,%eax /* ms bits */584js LPseudoDenormal /* Will be masked underflow */585#else586orl %eax,%eax /* ms bits */587js L_Normalised /* No longer a denormal */588#endif /* PECULIAR_486 */589590jnz LDenormal_adj_exponent591592orl %ebx,%ebx593jz L_underflow_to_zero /* The contents are zero */594595LDenormal_adj_exponent:596decw EXP(%edi)597598LPseudoDenormal:599testb $0xff,FPU_bits_lost /* bits lost == underflow */600movl TAG_Special,%edx601jz L_deNormalised602603/* There must be a masked underflow */604push %eax605pushl EX_Underflow606call EXCEPTION607popl %eax608popl %eax609movl TAG_Special,%edx610jmp L_deNormalised611612613/*614* The operations resulted in a number too small to represent.615* Masked response.616*/617L_underflow_to_zero:618push %eax619call set_precision_flag_down620popl %eax621622push %eax623pushl EX_Underflow624call EXCEPTION625popl %eax626popl %eax627628/* Reduce the exponent to EXP_UNDER */629movw EXP_UNDER,EXP(%edi)630movl TAG_Zero,%edx631jmp L_Store_significand632633634/* The operations resulted in a number too large to represent. */635L_overflow:636addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */637push %edi638call arith_overflow639pop %edi640jmp fpu_reg_round_signed_special_exit641642643Signal_underflow:644/* The number may have been changed to a non-denormal */645/* by the rounding operations. */646cmpw EXP_UNDER,EXP(%edi)647jle Do_unmasked_underflow648649jmp L_Normalised650651Do_unmasked_underflow:652/* Increase the exponent by the magic number */653addw $(3*(1<<13)),EXP(%edi)654push %eax655pushl EX_Underflow656call EXCEPTION657popl %eax658popl %eax659jmp L_Normalised660661662#ifdef PARANOID663#ifdef PECULIAR_486664L_bugged_denorm_486:665pushl EX_INTERNAL|0x236666call EXCEPTION667popl %ebx668jmp L_exception_exit669#else670L_bugged_denorm:671pushl EX_INTERNAL|0x230672call EXCEPTION673popl %ebx674jmp L_exception_exit675#endif /* PECULIAR_486 */676677L_bugged_round24:678pushl EX_INTERNAL|0x231679call EXCEPTION680popl %ebx681jmp L_exception_exit682683L_bugged_round53:684pushl EX_INTERNAL|0x232685call EXCEPTION686popl %ebx687jmp L_exception_exit688689L_bugged_round64:690pushl EX_INTERNAL|0x233691call EXCEPTION692popl %ebx693jmp L_exception_exit694695L_norm_bugged:696pushl EX_INTERNAL|0x234697call EXCEPTION698popl %ebx699jmp L_exception_exit700701L_entry_bugged:702pushl EX_INTERNAL|0x235703call EXCEPTION704popl %ebx705L_exception_exit:706mov $-1,%eax707jmp fpu_reg_round_special_exit708#endif /* PARANOID */709710SYM_FUNC_END(FPU_round)711712713