~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~1MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP2M68000 Hi-Performance Microprocessor Division3M68060 Software Package4Production Release P1.00 -- October 10, 199456M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.78THE SOFTWARE is provided on an "AS IS" basis and without warranty.9To the maximum extent permitted by applicable law,10MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,11INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE12and any warranty against infringement with regard to the SOFTWARE13(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.1415To the maximum extent permitted by applicable law,16IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER17(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,18BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)19ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.20Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.2122You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE23so long as this entire notice is retained without alteration in any modified and/or24redistributed versions, and that such modified versions are clearly identified as such.25No licenses are granted by implication, estoppel or otherwise under any patents26or trademarks of Motorola, Inc.27~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~28# freal.s:29# This file is appended to the top of the 060FPSP package30# and contains the entry points into the package. The user, in31# effect, branches to one of the branch table entries located32# after _060FPSP_TABLE.33# Also, subroutine stubs exist in this file (_fpsp_done for34# example) that are referenced by the FPSP package itself in order35# to call a given routine. The stub routine actually performs the36# callout. The FPSP code does a "bsr" to the stub routine. This37# extra layer of hierarchy adds a slight performance penalty but38# it makes the FPSP code easier to read and more mainatinable.39#4041set _off_bsun, 0x0042set _off_snan, 0x0443set _off_operr, 0x0844set _off_ovfl, 0x0c45set _off_unfl, 0x1046set _off_dz, 0x1447set _off_inex, 0x1848set _off_fline, 0x1c49set _off_fpu_dis, 0x2050set _off_trap, 0x2451set _off_trace, 0x2852set _off_access, 0x2c53set _off_done, 0x305455set _off_imr, 0x4056set _off_dmr, 0x4457set _off_dmw, 0x4858set _off_irw, 0x4c59set _off_irl, 0x5060set _off_drb, 0x5461set _off_drw, 0x5862set _off_drl, 0x5c63set _off_dwb, 0x6064set _off_dww, 0x6465set _off_dwl, 0x686667_060FPSP_TABLE:6869###############################################################7071# Here's the table of ENTRY POINTS for those linking the package.72bra.l _fpsp_snan73short 0x000074bra.l _fpsp_operr75short 0x000076bra.l _fpsp_ovfl77short 0x000078bra.l _fpsp_unfl79short 0x000080bra.l _fpsp_dz81short 0x000082bra.l _fpsp_inex83short 0x000084bra.l _fpsp_fline85short 0x000086bra.l _fpsp_unsupp87short 0x000088bra.l _fpsp_effadd89short 0x00009091space 569293###############################################################94global _fpsp_done95_fpsp_done:96mov.l %d0,-(%sp)97mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d098pea.l (_060FPSP_TABLE-0x80,%pc,%d0)99mov.l 0x4(%sp),%d0100rtd &0x4101102global _real_ovfl103_real_ovfl:104mov.l %d0,-(%sp)105mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0106pea.l (_060FPSP_TABLE-0x80,%pc,%d0)107mov.l 0x4(%sp),%d0108rtd &0x4109110global _real_unfl111_real_unfl:112mov.l %d0,-(%sp)113mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0114pea.l (_060FPSP_TABLE-0x80,%pc,%d0)115mov.l 0x4(%sp),%d0116rtd &0x4117118global _real_inex119_real_inex:120mov.l %d0,-(%sp)121mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0122pea.l (_060FPSP_TABLE-0x80,%pc,%d0)123mov.l 0x4(%sp),%d0124rtd &0x4125126global _real_bsun127_real_bsun:128mov.l %d0,-(%sp)129mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0130pea.l (_060FPSP_TABLE-0x80,%pc,%d0)131mov.l 0x4(%sp),%d0132rtd &0x4133134global _real_operr135_real_operr:136mov.l %d0,-(%sp)137mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0138pea.l (_060FPSP_TABLE-0x80,%pc,%d0)139mov.l 0x4(%sp),%d0140rtd &0x4141142global _real_snan143_real_snan:144mov.l %d0,-(%sp)145mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0146pea.l (_060FPSP_TABLE-0x80,%pc,%d0)147mov.l 0x4(%sp),%d0148rtd &0x4149150global _real_dz151_real_dz:152mov.l %d0,-(%sp)153mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0154pea.l (_060FPSP_TABLE-0x80,%pc,%d0)155mov.l 0x4(%sp),%d0156rtd &0x4157158global _real_fline159_real_fline:160mov.l %d0,-(%sp)161mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0162pea.l (_060FPSP_TABLE-0x80,%pc,%d0)163mov.l 0x4(%sp),%d0164rtd &0x4165166global _real_fpu_disabled167_real_fpu_disabled:168mov.l %d0,-(%sp)169mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0170pea.l (_060FPSP_TABLE-0x80,%pc,%d0)171mov.l 0x4(%sp),%d0172rtd &0x4173174global _real_trap175_real_trap:176mov.l %d0,-(%sp)177mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0178pea.l (_060FPSP_TABLE-0x80,%pc,%d0)179mov.l 0x4(%sp),%d0180rtd &0x4181182global _real_trace183_real_trace:184mov.l %d0,-(%sp)185mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0186pea.l (_060FPSP_TABLE-0x80,%pc,%d0)187mov.l 0x4(%sp),%d0188rtd &0x4189190global _real_access191_real_access:192mov.l %d0,-(%sp)193mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0194pea.l (_060FPSP_TABLE-0x80,%pc,%d0)195mov.l 0x4(%sp),%d0196rtd &0x4197198#######################################199200global _imem_read201_imem_read:202mov.l %d0,-(%sp)203mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0204pea.l (_060FPSP_TABLE-0x80,%pc,%d0)205mov.l 0x4(%sp),%d0206rtd &0x4207208global _dmem_read209_dmem_read:210mov.l %d0,-(%sp)211mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0212pea.l (_060FPSP_TABLE-0x80,%pc,%d0)213mov.l 0x4(%sp),%d0214rtd &0x4215216global _dmem_write217_dmem_write:218mov.l %d0,-(%sp)219mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0220pea.l (_060FPSP_TABLE-0x80,%pc,%d0)221mov.l 0x4(%sp),%d0222rtd &0x4223224global _imem_read_word225_imem_read_word:226mov.l %d0,-(%sp)227mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0228pea.l (_060FPSP_TABLE-0x80,%pc,%d0)229mov.l 0x4(%sp),%d0230rtd &0x4231232global _imem_read_long233_imem_read_long:234mov.l %d0,-(%sp)235mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0236pea.l (_060FPSP_TABLE-0x80,%pc,%d0)237mov.l 0x4(%sp),%d0238rtd &0x4239240global _dmem_read_byte241_dmem_read_byte:242mov.l %d0,-(%sp)243mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0244pea.l (_060FPSP_TABLE-0x80,%pc,%d0)245mov.l 0x4(%sp),%d0246rtd &0x4247248global _dmem_read_word249_dmem_read_word:250mov.l %d0,-(%sp)251mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0252pea.l (_060FPSP_TABLE-0x80,%pc,%d0)253mov.l 0x4(%sp),%d0254rtd &0x4255256global _dmem_read_long257_dmem_read_long:258mov.l %d0,-(%sp)259mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0260pea.l (_060FPSP_TABLE-0x80,%pc,%d0)261mov.l 0x4(%sp),%d0262rtd &0x4263264global _dmem_write_byte265_dmem_write_byte:266mov.l %d0,-(%sp)267mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0268pea.l (_060FPSP_TABLE-0x80,%pc,%d0)269mov.l 0x4(%sp),%d0270rtd &0x4271272global _dmem_write_word273_dmem_write_word:274mov.l %d0,-(%sp)275mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0276pea.l (_060FPSP_TABLE-0x80,%pc,%d0)277mov.l 0x4(%sp),%d0278rtd &0x4279280global _dmem_write_long281_dmem_write_long:282mov.l %d0,-(%sp)283mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0284pea.l (_060FPSP_TABLE-0x80,%pc,%d0)285mov.l 0x4(%sp),%d0286rtd &0x4287288#289# This file contains a set of define statements for constants290# in order to promote readability within the corecode itself.291#292293set LOCAL_SIZE, 192 # stack frame size(bytes)294set LV, -LOCAL_SIZE # stack offset295296set EXC_SR, 0x4 # stack status register297set EXC_PC, 0x6 # stack pc298set EXC_VOFF, 0xa # stacked vector offset299set EXC_EA, 0xc # stacked <ea>300301set EXC_FP, 0x0 # frame pointer302303set EXC_AREGS, -68 # offset of all address regs304set EXC_DREGS, -100 # offset of all data regs305set EXC_FPREGS, -36 # offset of all fp regs306307set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7308set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7309set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6310set EXC_A5, EXC_AREGS+(5*4)311set EXC_A4, EXC_AREGS+(4*4)312set EXC_A3, EXC_AREGS+(3*4)313set EXC_A2, EXC_AREGS+(2*4)314set EXC_A1, EXC_AREGS+(1*4)315set EXC_A0, EXC_AREGS+(0*4)316set EXC_D7, EXC_DREGS+(7*4)317set EXC_D6, EXC_DREGS+(6*4)318set EXC_D5, EXC_DREGS+(5*4)319set EXC_D4, EXC_DREGS+(4*4)320set EXC_D3, EXC_DREGS+(3*4)321set EXC_D2, EXC_DREGS+(2*4)322set EXC_D1, EXC_DREGS+(1*4)323set EXC_D0, EXC_DREGS+(0*4)324325set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0326set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1327set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)328329set FP_SCR1, LV+80 # fp scratch 1330set FP_SCR1_EX, FP_SCR1+0331set FP_SCR1_SGN, FP_SCR1+2332set FP_SCR1_HI, FP_SCR1+4333set FP_SCR1_LO, FP_SCR1+8334335set FP_SCR0, LV+68 # fp scratch 0336set FP_SCR0_EX, FP_SCR0+0337set FP_SCR0_SGN, FP_SCR0+2338set FP_SCR0_HI, FP_SCR0+4339set FP_SCR0_LO, FP_SCR0+8340341set FP_DST, LV+56 # fp destination operand342set FP_DST_EX, FP_DST+0343set FP_DST_SGN, FP_DST+2344set FP_DST_HI, FP_DST+4345set FP_DST_LO, FP_DST+8346347set FP_SRC, LV+44 # fp source operand348set FP_SRC_EX, FP_SRC+0349set FP_SRC_SGN, FP_SRC+2350set FP_SRC_HI, FP_SRC+4351set FP_SRC_LO, FP_SRC+8352353set USER_FPIAR, LV+40 # FP instr address register354355set USER_FPSR, LV+36 # FP status register356set FPSR_CC, USER_FPSR+0 # FPSR condition codes357set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte358set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte359set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte360361set USER_FPCR, LV+32 # FP control register362set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable363set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control364365set L_SCR3, LV+28 # integer scratch 3366set L_SCR2, LV+24 # integer scratch 2367set L_SCR1, LV+20 # integer scratch 1368369set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)370371set EXC_TEMP2, LV+24 # temporary space372set EXC_TEMP, LV+16 # temporary space373374set DTAG, LV+15 # destination operand type375set STAG, LV+14 # source operand type376377set SPCOND_FLG, LV+10 # flag: special case (see below)378379set EXC_CC, LV+8 # saved condition codes380set EXC_EXTWPTR, LV+4 # saved current PC (active)381set EXC_EXTWORD, LV+2 # saved extension word382set EXC_CMDREG, LV+2 # saved extension word383set EXC_OPWORD, LV+0 # saved operation word384385################################386387# Helpful macros388389set FTEMP, 0 # offsets within an390set FTEMP_EX, 0 # extended precision391set FTEMP_SGN, 2 # value saved in memory.392set FTEMP_HI, 4393set FTEMP_LO, 8394set FTEMP_GRS, 12395396set LOCAL, 0 # offsets within an397set LOCAL_EX, 0 # extended precision398set LOCAL_SGN, 2 # value saved in memory.399set LOCAL_HI, 4400set LOCAL_LO, 8401set LOCAL_GRS, 12402403set DST, 0 # offsets within an404set DST_EX, 0 # extended precision405set DST_HI, 4 # value saved in memory.406set DST_LO, 8407408set SRC, 0 # offsets within an409set SRC_EX, 0 # extended precision410set SRC_HI, 4 # value saved in memory.411set SRC_LO, 8412413set SGL_LO, 0x3f81 # min sgl prec exponent414set SGL_HI, 0x407e # max sgl prec exponent415set DBL_LO, 0x3c01 # min dbl prec exponent416set DBL_HI, 0x43fe # max dbl prec exponent417set EXT_LO, 0x0 # min ext prec exponent418set EXT_HI, 0x7ffe # max ext prec exponent419420set EXT_BIAS, 0x3fff # extended precision bias421set SGL_BIAS, 0x007f # single precision bias422set DBL_BIAS, 0x03ff # double precision bias423424set NORM, 0x00 # operand type for STAG/DTAG425set ZERO, 0x01 # operand type for STAG/DTAG426set INF, 0x02 # operand type for STAG/DTAG427set QNAN, 0x03 # operand type for STAG/DTAG428set DENORM, 0x04 # operand type for STAG/DTAG429set SNAN, 0x05 # operand type for STAG/DTAG430set UNNORM, 0x06 # operand type for STAG/DTAG431432##################433# FPSR/FPCR bits #434##################435set neg_bit, 0x3 # negative result436set z_bit, 0x2 # zero result437set inf_bit, 0x1 # infinite result438set nan_bit, 0x0 # NAN result439440set q_sn_bit, 0x7 # sign bit of quotient byte441442set bsun_bit, 7 # branch on unordered443set snan_bit, 6 # signalling NAN444set operr_bit, 5 # operand error445set ovfl_bit, 4 # overflow446set unfl_bit, 3 # underflow447set dz_bit, 2 # divide by zero448set inex2_bit, 1 # inexact result 2449set inex1_bit, 0 # inexact result 1450451set aiop_bit, 7 # accrued inexact operation bit452set aovfl_bit, 6 # accrued overflow bit453set aunfl_bit, 5 # accrued underflow bit454set adz_bit, 4 # accrued dz bit455set ainex_bit, 3 # accrued inexact bit456457#############################458# FPSR individual bit masks #459#############################460set neg_mask, 0x08000000 # negative bit mask (lw)461set inf_mask, 0x02000000 # infinity bit mask (lw)462set z_mask, 0x04000000 # zero bit mask (lw)463set nan_mask, 0x01000000 # nan bit mask (lw)464465set neg_bmask, 0x08 # negative bit mask (byte)466set inf_bmask, 0x02 # infinity bit mask (byte)467set z_bmask, 0x04 # zero bit mask (byte)468set nan_bmask, 0x01 # nan bit mask (byte)469470set bsun_mask, 0x00008000 # bsun exception mask471set snan_mask, 0x00004000 # snan exception mask472set operr_mask, 0x00002000 # operr exception mask473set ovfl_mask, 0x00001000 # overflow exception mask474set unfl_mask, 0x00000800 # underflow exception mask475set dz_mask, 0x00000400 # dz exception mask476set inex2_mask, 0x00000200 # inex2 exception mask477set inex1_mask, 0x00000100 # inex1 exception mask478479set aiop_mask, 0x00000080 # accrued illegal operation480set aovfl_mask, 0x00000040 # accrued overflow481set aunfl_mask, 0x00000020 # accrued underflow482set adz_mask, 0x00000010 # accrued divide by zero483set ainex_mask, 0x00000008 # accrued inexact484485######################################486# FPSR combinations used in the FPSP #487######################################488set dzinf_mask, inf_mask+dz_mask+adz_mask489set opnan_mask, nan_mask+operr_mask+aiop_mask490set nzi_mask, 0x01ffffff #clears N, Z, and I491set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask492set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask493set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask494set inx1a_mask, inex1_mask+ainex_mask495set inx2a_mask, inex2_mask+ainex_mask496set snaniop_mask, nan_mask+snan_mask+aiop_mask497set snaniop2_mask, snan_mask+aiop_mask498set naniop_mask, nan_mask+aiop_mask499set neginf_mask, neg_mask+inf_mask500set infaiop_mask, inf_mask+aiop_mask501set negz_mask, neg_mask+z_mask502set opaop_mask, operr_mask+aiop_mask503set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask504set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask505506#########507# misc. #508#########509set rnd_stky_bit, 29 # stky bit pos in longword510511set sign_bit, 0x7 # sign bit512set signan_bit, 0x6 # signalling nan bit513514set sgl_thresh, 0x3f81 # minimum sgl exponent515set dbl_thresh, 0x3c01 # minimum dbl exponent516517set x_mode, 0x0 # extended precision518set s_mode, 0x4 # single precision519set d_mode, 0x8 # double precision520521set rn_mode, 0x0 # round-to-nearest522set rz_mode, 0x1 # round-to-zero523set rm_mode, 0x2 # round-tp-minus-infinity524set rp_mode, 0x3 # round-to-plus-infinity525526set mantissalen, 64 # length of mantissa in bits527528set BYTE, 1 # len(byte) == 1 byte529set WORD, 2 # len(word) == 2 bytes530set LONG, 4 # len(longword) == 2 bytes531532set BSUN_VEC, 0xc0 # bsun vector offset533set INEX_VEC, 0xc4 # inexact vector offset534set DZ_VEC, 0xc8 # dz vector offset535set UNFL_VEC, 0xcc # unfl vector offset536set OPERR_VEC, 0xd0 # operr vector offset537set OVFL_VEC, 0xd4 # ovfl vector offset538set SNAN_VEC, 0xd8 # snan vector offset539540###########################541# SPecial CONDition FLaGs #542###########################543set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception544set fbsun_flg, 0x02 # flag bit: bsun exception545set mia7_flg, 0x04 # flag bit: (a7)+ <ea>546set mda7_flg, 0x08 # flag bit: -(a7) <ea>547set fmovm_flg, 0x40 # flag bit: fmovm instruction548set immed_flg, 0x80 # flag bit: &<data> <ea>549550set ftrapcc_bit, 0x0551set fbsun_bit, 0x1552set mia7_bit, 0x2553set mda7_bit, 0x3554set immed_bit, 0x7555556##################################557# TRANSCENDENTAL "LAST-OP" FLAGS #558##################################559set FMUL_OP, 0x0 # fmul instr performed last560set FDIV_OP, 0x1 # fdiv performed last561set FADD_OP, 0x2 # fadd performed last562set FMOV_OP, 0x3 # fmov performed last563564#############565# CONSTANTS #566#############567T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD568T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL569570PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000571PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000572573TWOBYPI:574long 0x3FE45F30,0x6DC9C883575576#########################################################################577# XDEF **************************************************************** #578# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #579# #580# This handler should be the first code executed upon taking the #581# FP Overflow exception in an operating system. #582# #583# XREF **************************************************************** #584# _imem_read_long() - read instruction longword #585# fix_skewed_ops() - adjust src operand in fsave frame #586# set_tag_x() - determine optype of src/dst operands #587# store_fpreg() - store opclass 0 or 2 result to FP regfile #588# unnorm_fix() - change UNNORM operands to NORM or ZERO #589# load_fpn2() - load dst operand from FP regfile #590# fout() - emulate an opclass 3 instruction #591# tbl_unsupp - add of table of emulation routines for opclass 0,2 #592# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #593# _real_ovfl() - "callout" for Overflow exception enabled code #594# _real_inex() - "callout" for Inexact exception enabled code #595# _real_trace() - "callout" for Trace exception code #596# #597# INPUT *************************************************************** #598# - The system stack contains the FP Ovfl exception stack frame #599# - The fsave frame contains the source operand #600# #601# OUTPUT ************************************************************** #602# Overflow Exception enabled: #603# - The system stack is unchanged #604# - The fsave frame contains the adjusted src op for opclass 0,2 #605# Overflow Exception disabled: #606# - The system stack is unchanged #607# - The "exception present" flag in the fsave frame is cleared #608# #609# ALGORITHM *********************************************************** #610# On the 060, if an FP overflow is present as the result of any #611# instruction, the 060 will take an overflow exception whether the #612# exception is enabled or disabled in the FPCR. For the disabled case, #613# This handler emulates the instruction to determine what the correct #614# default result should be for the operation. This default result is #615# then stored in either the FP regfile, data regfile, or memory. #616# Finally, the handler exits through the "callout" _fpsp_done() #617# denoting that no exceptional conditions exist within the machine. #618# If the exception is enabled, then this handler must create the #619# exceptional operand and plave it in the fsave state frame, and store #620# the default result (only if the instruction is opclass 3). For #621# exceptions enabled, this handler must exit through the "callout" #622# _real_ovfl() so that the operating system enabled overflow handler #623# can handle this case. #624# Two other conditions exist. First, if overflow was disabled #625# but the inexact exception was enabled, this handler must exit #626# through the "callout" _real_inex() regardless of whether the result #627# was inexact. #628# Also, in the case of an opclass three instruction where #629# overflow was disabled and the trace exception was enabled, this #630# handler must exit through the "callout" _real_trace(). #631# #632#########################################################################633634global _fpsp_ovfl635_fpsp_ovfl:636637#$# sub.l &24,%sp # make room for src/dst638639link.w %a6,&-LOCAL_SIZE # init stack frame640641fsave FP_SRC(%a6) # grab the "busy" frame642643movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1644fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs645fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack646647# the FPIAR holds the "current PC" of the faulting instruction648mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)649mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr650addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr651bsr.l _imem_read_long # fetch the instruction words652mov.l %d0,EXC_OPWORD(%a6)653654##############################################################################655656btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?657bne.w fovfl_out658659660lea FP_SRC(%a6),%a0 # pass: ptr to src op661bsr.l fix_skewed_ops # fix src op662663# since, I believe, only NORMs and DENORMs can come through here,664# maybe we can avoid the subroutine call.665lea FP_SRC(%a6),%a0 # pass: ptr to src op666bsr.l set_tag_x # tag the operand type667mov.b %d0,STAG(%a6) # maybe NORM,DENORM668669# bit five of the fp extension word separates the monadic and dyadic operations670# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos671# will never take this exception.672btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?673beq.b fovfl_extract # monadic674675bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg676bsr.l load_fpn2 # load dst into FP_DST677678lea FP_DST(%a6),%a0 # pass: ptr to dst op679bsr.l set_tag_x # tag the operand type680cmpi.b %d0,&UNNORM # is operand an UNNORM?681bne.b fovfl_op2_done # no682bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO683fovfl_op2_done:684mov.b %d0,DTAG(%a6) # save dst optype tag685686fovfl_extract:687688#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)689#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)690#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)691#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)692#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)693#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)694695clr.l %d0696mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode697698mov.b 1+EXC_CMDREG(%a6),%d1699andi.w &0x007f,%d1 # extract extension700701andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field702703fmov.l &0x0,%fpcr # zero current control regs704fmov.l &0x0,%fpsr705706lea FP_SRC(%a6),%a0707lea FP_DST(%a6),%a1708709# maybe we can make these entry points ONLY the OVFL entry points of each routine.710mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr711jsr (tbl_unsupp.l,%pc,%d1.l*1)712713# the operation has been emulated. the result is in fp0.714# the EXOP, if an exception occurred, is in fp1.715# we must save the default result regardless of whether716# traps are enabled or disabled.717bfextu EXC_CMDREG(%a6){&6:&3},%d0718bsr.l store_fpreg719720# the exceptional possibilities we have left ourselves with are ONLY overflow721# and inexact. and, the inexact is such that overflow occurred and was disabled722# but inexact was enabled.723btst &ovfl_bit,FPCR_ENABLE(%a6)724bne.b fovfl_ovfl_on725726btst &inex2_bit,FPCR_ENABLE(%a6)727bne.b fovfl_inex_on728729fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1730fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs731movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1732733unlk %a6734#$# add.l &24,%sp735bra.l _fpsp_done736737# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP738# in fp1. now, simply jump to _real_ovfl()!739fovfl_ovfl_on:740fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack741742mov.w &0xe005,2+FP_SRC(%a6) # save exc status743744fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1745fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs746movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1747748frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!749750unlk %a6751752bra.l _real_ovfl753754# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,755# we must jump to real_inex().756fovfl_inex_on:757758fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack759760mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4761mov.w &0xe001,2+FP_SRC(%a6) # save exc status762763fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1764fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs765movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1766767frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!768769unlk %a6770771bra.l _real_inex772773########################################################################774fovfl_out:775776777#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)778#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)779#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)780781# the src operand is definitely a NORM(!), so tag it as such782mov.b &NORM,STAG(%a6) # set src optype tag783784clr.l %d0785mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode786787and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field788789fmov.l &0x0,%fpcr # zero current control regs790fmov.l &0x0,%fpsr791792lea FP_SRC(%a6),%a0 # pass ptr to src operand793794bsr.l fout795796btst &ovfl_bit,FPCR_ENABLE(%a6)797bne.w fovfl_ovfl_on798799btst &inex2_bit,FPCR_ENABLE(%a6)800bne.w fovfl_inex_on801802fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1803fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs804movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1805806unlk %a6807#$# add.l &24,%sp808809btst &0x7,(%sp) # is trace on?810beq.l _fpsp_done # no811812fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR813mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024814bra.l _real_trace815816#########################################################################817# XDEF **************************************************************** #818# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #819# #820# This handler should be the first code executed upon taking the #821# FP Underflow exception in an operating system. #822# #823# XREF **************************************************************** #824# _imem_read_long() - read instruction longword #825# fix_skewed_ops() - adjust src operand in fsave frame #826# set_tag_x() - determine optype of src/dst operands #827# store_fpreg() - store opclass 0 or 2 result to FP regfile #828# unnorm_fix() - change UNNORM operands to NORM or ZERO #829# load_fpn2() - load dst operand from FP regfile #830# fout() - emulate an opclass 3 instruction #831# tbl_unsupp - add of table of emulation routines for opclass 0,2 #832# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #833# _real_ovfl() - "callout" for Overflow exception enabled code #834# _real_inex() - "callout" for Inexact exception enabled code #835# _real_trace() - "callout" for Trace exception code #836# #837# INPUT *************************************************************** #838# - The system stack contains the FP Unfl exception stack frame #839# - The fsave frame contains the source operand #840# #841# OUTPUT ************************************************************** #842# Underflow Exception enabled: #843# - The system stack is unchanged #844# - The fsave frame contains the adjusted src op for opclass 0,2 #845# Underflow Exception disabled: #846# - The system stack is unchanged #847# - The "exception present" flag in the fsave frame is cleared #848# #849# ALGORITHM *********************************************************** #850# On the 060, if an FP underflow is present as the result of any #851# instruction, the 060 will take an underflow exception whether the #852# exception is enabled or disabled in the FPCR. For the disabled case, #853# This handler emulates the instruction to determine what the correct #854# default result should be for the operation. This default result is #855# then stored in either the FP regfile, data regfile, or memory. #856# Finally, the handler exits through the "callout" _fpsp_done() #857# denoting that no exceptional conditions exist within the machine. #858# If the exception is enabled, then this handler must create the #859# exceptional operand and plave it in the fsave state frame, and store #860# the default result (only if the instruction is opclass 3). For #861# exceptions enabled, this handler must exit through the "callout" #862# _real_unfl() so that the operating system enabled overflow handler #863# can handle this case. #864# Two other conditions exist. First, if underflow was disabled #865# but the inexact exception was enabled and the result was inexact, #866# this handler must exit through the "callout" _real_inex(). #867# was inexact. #868# Also, in the case of an opclass three instruction where #869# underflow was disabled and the trace exception was enabled, this #870# handler must exit through the "callout" _real_trace(). #871# #872#########################################################################873874global _fpsp_unfl875_fpsp_unfl:876877#$# sub.l &24,%sp # make room for src/dst878879link.w %a6,&-LOCAL_SIZE # init stack frame880881fsave FP_SRC(%a6) # grab the "busy" frame882883movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1884fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs885fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack886887# the FPIAR holds the "current PC" of the faulting instruction888mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)889mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr890addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr891bsr.l _imem_read_long # fetch the instruction words892mov.l %d0,EXC_OPWORD(%a6)893894##############################################################################895896btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?897bne.w funfl_out898899900lea FP_SRC(%a6),%a0 # pass: ptr to src op901bsr.l fix_skewed_ops # fix src op902903lea FP_SRC(%a6),%a0 # pass: ptr to src op904bsr.l set_tag_x # tag the operand type905mov.b %d0,STAG(%a6) # maybe NORM,DENORM906907# bit five of the fp ext word separates the monadic and dyadic operations908# that can pass through fpsp_unfl(). remember that fcmp, and ftst909# will never take this exception.910btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?911beq.b funfl_extract # monadic912913# now, what's left that's not dyadic is fsincos. we can distinguish it914# from all dyadics by the '0110xxx pattern915btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?916bne.b funfl_extract # yes917918bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg919bsr.l load_fpn2 # load dst into FP_DST920921lea FP_DST(%a6),%a0 # pass: ptr to dst op922bsr.l set_tag_x # tag the operand type923cmpi.b %d0,&UNNORM # is operand an UNNORM?924bne.b funfl_op2_done # no925bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO926funfl_op2_done:927mov.b %d0,DTAG(%a6) # save dst optype tag928929funfl_extract:930931#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)932#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)933#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)934#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)935#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)936#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)937938clr.l %d0939mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode940941mov.b 1+EXC_CMDREG(%a6),%d1942andi.w &0x007f,%d1 # extract extension943944andi.l &0x00ff01ff,USER_FPSR(%a6)945946fmov.l &0x0,%fpcr # zero current control regs947fmov.l &0x0,%fpsr948949lea FP_SRC(%a6),%a0950lea FP_DST(%a6),%a1951952# maybe we can make these entry points ONLY the OVFL entry points of each routine.953mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr954jsr (tbl_unsupp.l,%pc,%d1.l*1)955956bfextu EXC_CMDREG(%a6){&6:&3},%d0957bsr.l store_fpreg958959# The `060 FPU multiplier hardware is such that if the result of a960# multiply operation is the smallest possible normalized number961# (0x00000000_80000000_00000000), then the machine will take an962# underflow exception. Since this is incorrect, we need to check963# if our emulation, after re-doing the operation, decided that964# no underflow was called for. We do these checks only in965# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this966# special case will simply exit gracefully with the correct result.967968# the exceptional possibilities we have left ourselves with are ONLY overflow969# and inexact. and, the inexact is such that overflow occurred and was disabled970# but inexact was enabled.971btst &unfl_bit,FPCR_ENABLE(%a6)972bne.b funfl_unfl_on973974funfl_chkinex:975btst &inex2_bit,FPCR_ENABLE(%a6)976bne.b funfl_inex_on977978funfl_exit:979fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1980fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs981movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1982983unlk %a6984#$# add.l &24,%sp985bra.l _fpsp_done986987# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP988# in fp1 (don't forget to save fp0). what to do now?989# well, we simply have to get to go to _real_unfl()!990funfl_unfl_on:991992# The `060 FPU multiplier hardware is such that if the result of a993# multiply operation is the smallest possible normalized number994# (0x00000000_80000000_00000000), then the machine will take an995# underflow exception. Since this is incorrect, we check here to see996# if our emulation, after re-doing the operation, decided that997# no underflow was called for.998btst &unfl_bit,FPSR_EXCEPT(%a6)999beq.w funfl_chkinex10001001funfl_unfl_on2:1002fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack10031004mov.w &0xe003,2+FP_SRC(%a6) # save exc status10051006fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp11007fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1008movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a110091010frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!10111012unlk %a610131014bra.l _real_unfl10151016# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,1017# we must jump to real_inex().1018funfl_inex_on:10191020# The `060 FPU multiplier hardware is such that if the result of a1021# multiply operation is the smallest possible normalized number1022# (0x00000000_80000000_00000000), then the machine will take an1023# underflow exception.1024# But, whether bogus or not, if inexact is enabled AND it occurred,1025# then we have to branch to real_inex.10261027btst &inex2_bit,FPSR_EXCEPT(%a6)1028beq.w funfl_exit10291030funfl_inex_on2:10311032fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack10331034mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc41035mov.w &0xe001,2+FP_SRC(%a6) # save exc status10361037fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp11038fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1039movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a110401041frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!10421043unlk %a610441045bra.l _real_inex10461047#######################################################################1048funfl_out:104910501051#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)1052#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)1053#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)10541055# the src operand is definitely a NORM(!), so tag it as such1056mov.b &NORM,STAG(%a6) # set src optype tag10571058clr.l %d01059mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode10601061and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field10621063fmov.l &0x0,%fpcr # zero current control regs1064fmov.l &0x0,%fpsr10651066lea FP_SRC(%a6),%a0 # pass ptr to src operand10671068bsr.l fout10691070btst &unfl_bit,FPCR_ENABLE(%a6)1071bne.w funfl_unfl_on210721073btst &inex2_bit,FPCR_ENABLE(%a6)1074bne.w funfl_inex_on210751076fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp11077fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1078movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a110791080unlk %a61081#$# add.l &24,%sp10821083btst &0x7,(%sp) # is trace on?1084beq.l _fpsp_done # no10851086fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR1087mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x0241088bra.l _real_trace10891090#########################################################################1091# XDEF **************************************************************** #1092# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #1093# Data Type" exception. #1094# #1095# This handler should be the first code executed upon taking the #1096# FP Unimplemented Data Type exception in an operating system. #1097# #1098# XREF **************************************************************** #1099# _imem_read_{word,long}() - read instruction word/longword #1100# fix_skewed_ops() - adjust src operand in fsave frame #1101# set_tag_x() - determine optype of src/dst operands #1102# store_fpreg() - store opclass 0 or 2 result to FP regfile #1103# unnorm_fix() - change UNNORM operands to NORM or ZERO #1104# load_fpn2() - load dst operand from FP regfile #1105# load_fpn1() - load src operand from FP regfile #1106# fout() - emulate an opclass 3 instruction #1107# tbl_unsupp - add of table of emulation routines for opclass 0,2 #1108# _real_inex() - "callout" to operating system inexact handler #1109# _fpsp_done() - "callout" for exit; work all done #1110# _real_trace() - "callout" for Trace enabled exception #1111# funimp_skew() - adjust fsave src ops to "incorrect" value #1112# _real_snan() - "callout" for SNAN exception #1113# _real_operr() - "callout" for OPERR exception #1114# _real_ovfl() - "callout" for OVFL exception #1115# _real_unfl() - "callout" for UNFL exception #1116# get_packed() - fetch packed operand from memory #1117# #1118# INPUT *************************************************************** #1119# - The system stack contains the "Unimp Data Type" stk frame #1120# - The fsave frame contains the ssrc op (for UNNORM/DENORM) #1121# #1122# OUTPUT ************************************************************** #1123# If Inexact exception (opclass 3): #1124# - The system stack is changed to an Inexact exception stk frame #1125# If SNAN exception (opclass 3): #1126# - The system stack is changed to an SNAN exception stk frame #1127# If OPERR exception (opclass 3): #1128# - The system stack is changed to an OPERR exception stk frame #1129# If OVFL exception (opclass 3): #1130# - The system stack is changed to an OVFL exception stk frame #1131# If UNFL exception (opclass 3): #1132# - The system stack is changed to an UNFL exception stack frame #1133# If Trace exception enabled: #1134# - The system stack is changed to a Trace exception stack frame #1135# Else: (normal case) #1136# - Correct result has been stored as appropriate #1137# #1138# ALGORITHM *********************************************************** #1139# Two main instruction types can enter here: (1) DENORM or UNNORM #1140# unimplemented data types. These can be either opclass 0,2 or 3 #1141# instructions, and (2) PACKED unimplemented data format instructions #1142# also of opclasses 0,2, or 3. #1143# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #1144# operand from the fsave state frame and the dst operand (if dyadic) #1145# from the FP register file. The instruction is then emulated by #1146# choosing an emulation routine from a table of routines indexed by #1147# instruction type. Once the instruction has been emulated and result #1148# saved, then we check to see if any enabled exceptions resulted from #1149# instruction emulation. If none, then we exit through the "callout" #1150# _fpsp_done(). If there is an enabled FP exception, then we insert #1151# this exception into the FPU in the fsave state frame and then exit #1152# through _fpsp_done(). #1153# PACKED opclass 0 and 2 is similar in how the instruction is #1154# emulated and exceptions handled. The differences occur in how the #1155# handler loads the packed op (by calling get_packed() routine) and #1156# by the fact that a Trace exception could be pending for PACKED ops. #1157# If a Trace exception is pending, then the current exception stack #1158# frame is changed to a Trace exception stack frame and an exit is #1159# made through _real_trace(). #1160# For UNNORM/DENORM opclass 3, the actual move out to memory is #1161# performed by calling the routine fout(). If no exception should occur #1162# as the result of emulation, then an exit either occurs through #1163# _fpsp_done() or through _real_trace() if a Trace exception is pending #1164# (a Trace stack frame must be created here, too). If an FP exception #1165# should occur, then we must create an exception stack frame of that #1166# type and jump to either _real_snan(), _real_operr(), _real_inex(), #1167# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #1168# emulation is performed in a similar manner. #1169# #1170#########################################################################11711172#1173# (1) DENORM and UNNORM (unimplemented) data types:1174#1175# post-instruction1176# *****************1177# * EA *1178# pre-instruction * *1179# ***************** *****************1180# * 0x0 * 0x0dc * * 0x3 * 0x0dc *1181# ***************** *****************1182# * Next * * Next *1183# * PC * * PC *1184# ***************** *****************1185# * SR * * SR *1186# ***************** *****************1187#1188# (2) PACKED format (unsupported) opclasses two and three:1189# *****************1190# * EA *1191# * *1192# *****************1193# * 0x2 * 0x0dc *1194# *****************1195# * Next *1196# * PC *1197# *****************1198# * SR *1199# *****************1200#1201global _fpsp_unsupp1202_fpsp_unsupp:12031204link.w %a6,&-LOCAL_SIZE # init stack frame12051206fsave FP_SRC(%a6) # save fp state12071208movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a11209fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs1210fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack12111212btst &0x5,EXC_SR(%a6) # user or supervisor mode?1213bne.b fu_s1214fu_u:1215mov.l %usp,%a0 # fetch user stack pointer1216mov.l %a0,EXC_A7(%a6) # save on stack1217bra.b fu_cont1218# if the exception is an opclass zero or two unimplemented data type1219# exception, then the a7' calculated here is wrong since it doesn't1220# stack an ea. however, we don't need an a7' for this case anyways.1221fu_s:1222lea 0x4+EXC_EA(%a6),%a0 # load old a7'1223mov.l %a0,EXC_A7(%a6) # save on stack12241225fu_cont:12261227# the FPIAR holds the "current PC" of the faulting instruction1228# the FPIAR should be set correctly for ALL exceptions passing through1229# this point.1230mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)1231mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr1232addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr1233bsr.l _imem_read_long # fetch the instruction words1234mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD12351236############################12371238clr.b SPCOND_FLG(%a6) # clear special condition flag12391240# Separate opclass three (fpn-to-mem) ops since they have a different1241# stack frame and protocol.1242btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?1243bne.w fu_out # yes12441245# Separate packed opclass two instructions.1246bfextu EXC_CMDREG(%a6){&0:&6},%d01247cmpi.b %d0,&0x131248beq.w fu_in_pack124912501251# I'm not sure at this point what FPSR bits are valid for this instruction.1252# so, since the emulation routines re-create them anyways, zero exception field1253andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field12541255fmov.l &0x0,%fpcr # zero current control regs1256fmov.l &0x0,%fpsr12571258# Opclass two w/ memory-to-fpn operation will have an incorrect extended1259# precision format if the src format was single or double and the1260# source data type was an INF, NAN, DENORM, or UNNORM1261lea FP_SRC(%a6),%a0 # pass ptr to input1262bsr.l fix_skewed_ops12631264# we don't know whether the src operand or the dst operand (or both) is the1265# UNNORM or DENORM. call the function that tags the operand type. if the1266# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.1267lea FP_SRC(%a6),%a0 # pass: ptr to src op1268bsr.l set_tag_x # tag the operand type1269cmpi.b %d0,&UNNORM # is operand an UNNORM?1270bne.b fu_op2 # no1271bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO12721273fu_op2:1274mov.b %d0,STAG(%a6) # save src optype tag12751276bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg12771278# bit five of the fp extension word separates the monadic and dyadic operations1279# at this point1280btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?1281beq.b fu_extract # monadic1282cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?1283beq.b fu_extract # yes, so it's monadic, too12841285bsr.l load_fpn2 # load dst into FP_DST12861287lea FP_DST(%a6),%a0 # pass: ptr to dst op1288bsr.l set_tag_x # tag the operand type1289cmpi.b %d0,&UNNORM # is operand an UNNORM?1290bne.b fu_op2_done # no1291bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO1292fu_op2_done:1293mov.b %d0,DTAG(%a6) # save dst optype tag12941295fu_extract:1296clr.l %d01297mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec12981299bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension13001301lea FP_SRC(%a6),%a01302lea FP_DST(%a6),%a113031304mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr1305jsr (tbl_unsupp.l,%pc,%d1.l*1)13061307#1308# Exceptions in order of precedence:1309# BSUN : none1310# SNAN : all dyadic ops1311# OPERR : fsqrt(-NORM)1312# OVFL : all except ftst,fcmp1313# UNFL : all except ftst,fcmp1314# DZ : fdiv1315# INEX2 : all except ftst,fcmp1316# INEX1 : none (packed doesn't go through here)1317#13181319# we determine the highest priority exception(if any) set by the1320# emulation routine that has also been enabled by the user.1321mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set1322bne.b fu_in_ena # some are enabled13231324fu_in_cont:1325# fcmp and ftst do not store any result.1326mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension1327andi.b &0x38,%d0 # extract bits 3-51328cmpi.b %d0,&0x38 # is instr fcmp or ftst?1329beq.b fu_in_exit # yes13301331bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg1332bsr.l store_fpreg # store the result13331334fu_in_exit:13351336fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11337fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1338movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a113391340unlk %a613411342bra.l _fpsp_done13431344fu_in_ena:1345and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled1346bfffo %d0{&24:&8},%d0 # find highest priority exception1347bne.b fu_in_exc # there is at least one set13481349#1350# No exceptions occurred that were also enabled. Now:1351#1352# if (OVFL && ovfl_disabled && inexact_enabled) {1353# branch to _real_inex() (even if the result was exact!);1354# } else {1355# save the result in the proper fp reg (unless the op is fcmp or ftst);1356# return;1357# }1358#1359btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?1360beq.b fu_in_cont # no13611362fu_in_ovflchk:1363btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?1364beq.b fu_in_cont # no1365bra.w fu_in_exc_ovfl # go insert overflow frame13661367#1368# An exception occurred and that exception was enabled:1369#1370# shift enabled exception field into lo byte of d0;1371# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||1372# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {1373# /*1374# * this is the case where we must call _real_inex() now or else1375# * there will be no other way to pass it the exceptional operand1376# */1377# call _real_inex();1378# } else {1379# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;1380# }1381#1382fu_in_exc:1383subi.l &24,%d0 # fix offset to be 0-81384cmpi.b %d0,&0x6 # is exception INEX? (6)1385bne.b fu_in_exc_exit # no13861387# the enabled exception was inexact1388btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?1389bne.w fu_in_exc_unfl # yes1390btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?1391bne.w fu_in_exc_ovfl # yes13921393# here, we insert the correct fsave status value into the fsave frame for the1394# corresponding exception. the operand in the fsave frame should be the original1395# src operand.1396fu_in_exc_exit:1397mov.l %d0,-(%sp) # save d01398bsr.l funimp_skew # skew sgl or dbl inputs1399mov.l (%sp)+,%d0 # restore d014001401mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status14021403fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11404fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1405movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a114061407frestore FP_SRC(%a6) # restore src op14081409unlk %a614101411bra.l _fpsp_done14121413tbl_except:1414short 0xe000,0xe006,0xe004,0xe0051415short 0xe003,0xe002,0xe001,0xe00114161417fu_in_exc_unfl:1418mov.w &0x4,%d01419bra.b fu_in_exc_exit1420fu_in_exc_ovfl:1421mov.w &0x03,%d01422bra.b fu_in_exc_exit14231424# If the input operand to this operation was opclass two and a single1425# or double precision denorm, inf, or nan, the operand needs to be1426# "corrected" in order to have the proper equivalent extended precision1427# number.1428global fix_skewed_ops1429fix_skewed_ops:1430bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt1431cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?1432beq.b fso_sgl # yes1433cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?1434beq.b fso_dbl # yes1435rts # no14361437fso_sgl:1438mov.w LOCAL_EX(%a0),%d0 # fetch src exponent1439andi.w &0x7fff,%d0 # strip sign1440cmpi.w %d0,&0x3f80 # is |exp| == $3f80?1441beq.b fso_sgl_dnrm_zero # yes1442cmpi.w %d0,&0x407f # no; is |exp| == $407f?1443beq.b fso_infnan # yes1444rts # no14451446fso_sgl_dnrm_zero:1447andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit1448beq.b fso_zero # it's a skewed zero1449fso_sgl_dnrm:1450# here, we count on norm not to alter a0...1451bsr.l norm # normalize mantissa1452neg.w %d0 # -shft amt1453addi.w &0x3f81,%d0 # adjust new exponent1454andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent1455or.w %d0,LOCAL_EX(%a0) # insert new exponent1456rts14571458fso_zero:1459andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent1460rts14611462fso_infnan:1463andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit1464ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff1465rts14661467fso_dbl:1468mov.w LOCAL_EX(%a0),%d0 # fetch src exponent1469andi.w &0x7fff,%d0 # strip sign1470cmpi.w %d0,&0x3c00 # is |exp| == $3c00?1471beq.b fso_dbl_dnrm_zero # yes1472cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?1473beq.b fso_infnan # yes1474rts # no14751476fso_dbl_dnrm_zero:1477andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit1478bne.b fso_dbl_dnrm # it's a skewed denorm1479tst.l LOCAL_LO(%a0) # is it a zero?1480beq.b fso_zero # yes1481fso_dbl_dnrm:1482# here, we count on norm not to alter a0...1483bsr.l norm # normalize mantissa1484neg.w %d0 # -shft amt1485addi.w &0x3c01,%d0 # adjust new exponent1486andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent1487or.w %d0,LOCAL_EX(%a0) # insert new exponent1488rts14891490#################################################################14911492# fmove out took an unimplemented data type exception.1493# the src operand is in FP_SRC. Call _fout() to write out the result and1494# to determine which exceptions, if any, to take.1495fu_out:14961497# Separate packed move outs from the UNNORM and DENORM move outs.1498bfextu EXC_CMDREG(%a6){&3:&3},%d01499cmpi.b %d0,&0x31500beq.w fu_out_pack1501cmpi.b %d0,&0x71502beq.w fu_out_pack150315041505# I'm not sure at this point what FPSR bits are valid for this instruction.1506# so, since the emulation routines re-create them anyways, zero exception field.1507# fmove out doesn't affect ccodes.1508and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field15091510fmov.l &0x0,%fpcr # zero current control regs1511fmov.l &0x0,%fpsr15121513# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine1514# call here. just figure out what it is...1515mov.w FP_SRC_EX(%a6),%d0 # get exponent1516andi.w &0x7fff,%d0 # strip sign1517beq.b fu_out_denorm # it's a DENORM15181519lea FP_SRC(%a6),%a01520bsr.l unnorm_fix # yes; fix it15211522mov.b %d0,STAG(%a6)15231524bra.b fu_out_cont1525fu_out_denorm:1526mov.b &DENORM,STAG(%a6)1527fu_out_cont:15281529clr.l %d01530mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec15311532lea FP_SRC(%a6),%a0 # pass ptr to src operand15331534mov.l (%a6),EXC_A6(%a6) # in case a6 changes1535bsr.l fout # call fmove out routine15361537# Exceptions in order of precedence:1538# BSUN : none1539# SNAN : none1540# OPERR : fmove.{b,w,l} out of large UNNORM1541# OVFL : fmove.{s,d}1542# UNFL : fmove.{s,d,x}1543# DZ : none1544# INEX2 : all1545# INEX1 : none (packed doesn't travel through here)15461547# determine the highest priority exception(if any) set by the1548# emulation routine that has also been enabled by the user.1549mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled1550bne.w fu_out_ena # some are enabled15511552fu_out_done:15531554mov.l EXC_A6(%a6),(%a6) # in case a6 changed15551556# on extended precision opclass three instructions using pre-decrement or1557# post-increment addressing mode, the address register is not updated. is the1558# address register was the stack pointer used from user mode, then let's update1559# it here. if it was used from supervisor mode, then we have to handle this1560# as a special case.1561btst &0x5,EXC_SR(%a6)1562bne.b fu_out_done_s15631564mov.l EXC_A7(%a6),%a0 # restore a71565mov.l %a0,%usp15661567fu_out_done_cont:1568fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11569fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1570movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a115711572unlk %a615731574btst &0x7,(%sp) # is trace on?1575bne.b fu_out_trace # yes15761577bra.l _fpsp_done15781579# is the ea mode pre-decrement of the stack pointer from supervisor mode?1580# ("fmov.x fpm,-(a7)") if so,1581fu_out_done_s:1582cmpi.b SPCOND_FLG(%a6),&mda7_flg1583bne.b fu_out_done_cont15841585# the extended precision result is still in fp0. but, we need to save it1586# somewhere on the stack until we can copy it to its final resting place.1587# here, we're counting on the top of the stack to be the old place-holders1588# for fp0/fp1 which have already been restored. that way, we can write1589# over those destinations with the shifted stack frame.1590fmovm.x &0x80,FP_SRC(%a6) # put answer on stack15911592fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11593fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1594movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a115951596mov.l (%a6),%a6 # restore frame pointer15971598mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)1599mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)16001601# now, copy the result to the proper place on the stack1602mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)1603mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)1604mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)16051606add.l &LOCAL_SIZE-0x8,%sp16071608btst &0x7,(%sp)1609bne.b fu_out_trace16101611bra.l _fpsp_done16121613fu_out_ena:1614and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled1615bfffo %d0{&24:&8},%d0 # find highest priority exception1616bne.b fu_out_exc # there is at least one set16171618# no exceptions were set.1619# if a disabled overflow occurred and inexact was enabled but the result1620# was exact, then a branch to _real_inex() is made.1621btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?1622beq.w fu_out_done # no16231624fu_out_ovflchk:1625btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?1626beq.w fu_out_done # no1627bra.w fu_inex # yes16281629#1630# The fp move out that took the "Unimplemented Data Type" exception was1631# being traced. Since the stack frames are similar, get the "current" PC1632# from FPIAR and put it in the trace stack frame then jump to _real_trace().1633#1634# UNSUPP FRAME TRACE FRAME1635# ***************** *****************1636# * EA * * Current *1637# * * * PC *1638# ***************** *****************1639# * 0x3 * 0x0dc * * 0x2 * 0x024 *1640# ***************** *****************1641# * Next * * Next *1642# * PC * * PC *1643# ***************** *****************1644# * SR * * SR *1645# ***************** *****************1646#1647fu_out_trace:1648mov.w &0x2024,0x6(%sp)1649fmov.l %fpiar,0x8(%sp)1650bra.l _real_trace16511652# an exception occurred and that exception was enabled.1653fu_out_exc:1654subi.l &24,%d0 # fix offset to be 0-816551656# we don't mess with the existing fsave frame. just re-insert it and1657# jump to the "_real_{}()" handler...1658mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d01659jmp (tbl_fu_out.b,%pc,%d0.w*1)16601661swbeg &0x81662tbl_fu_out:1663short tbl_fu_out - tbl_fu_out # BSUN can't happen1664short tbl_fu_out - tbl_fu_out # SNAN can't happen1665short fu_operr - tbl_fu_out # OPERR1666short fu_ovfl - tbl_fu_out # OVFL1667short fu_unfl - tbl_fu_out # UNFL1668short tbl_fu_out - tbl_fu_out # DZ can't happen1669short fu_inex - tbl_fu_out # INEX21670short tbl_fu_out - tbl_fu_out # INEX1 won't make it here16711672# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just1673# frestore it.1674fu_snan:1675fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11676fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1677movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a116781679mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd81680mov.w &0xe006,2+FP_SRC(%a6)16811682frestore FP_SRC(%a6)16831684unlk %a6168516861687bra.l _real_snan16881689fu_operr:1690fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11691fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1692movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a116931694mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd01695mov.w &0xe004,2+FP_SRC(%a6)16961697frestore FP_SRC(%a6)16981699unlk %a6170017011702bra.l _real_operr17031704fu_ovfl:1705fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack17061707fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11708fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1709movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a117101711mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd41712mov.w &0xe005,2+FP_SRC(%a6)17131714frestore FP_SRC(%a6) # restore EXOP17151716unlk %a617171718bra.l _real_ovfl17191720# underflow can happen for extended precision. extended precision opclass1721# three instruction exceptions don't update the stack pointer. so, if the1722# exception occurred from user mode, then simply update a7 and exit normally.1723# if the exception occurred from supervisor mode, check if1724fu_unfl:1725mov.l EXC_A6(%a6),(%a6) # restore a617261727btst &0x5,EXC_SR(%a6)1728bne.w fu_unfl_s17291730mov.l EXC_A7(%a6),%a0 # restore a7 whether we need1731mov.l %a0,%usp # to or not...17321733fu_unfl_cont:1734fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack17351736fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11737fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1738movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a117391740mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc1741mov.w &0xe003,2+FP_SRC(%a6)17421743frestore FP_SRC(%a6) # restore EXOP17441745unlk %a617461747bra.l _real_unfl17481749fu_unfl_s:1750cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?1751bne.b fu_unfl_cont17521753# the extended precision result is still in fp0. but, we need to save it1754# somewhere on the stack until we can copy it to its final resting place1755# (where the exc frame is currently). make sure it's not at the top of the1756# frame or it will get overwritten when the exc stack frame is shifted "down".1757fmovm.x &0x80,FP_SRC(%a6) # put answer on stack1758fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack17591760fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11761fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1762movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a117631764mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc1765mov.w &0xe003,2+FP_DST(%a6)17661767frestore FP_DST(%a6) # restore EXOP17681769mov.l (%a6),%a6 # restore frame pointer17701771mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)1772mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)1773mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)17741775# now, copy the result to the proper place on the stack1776mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)1777mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)1778mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)17791780add.l &LOCAL_SIZE-0x8,%sp17811782bra.l _real_unfl17831784# fmove in and out enter here.1785fu_inex:1786fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack17871788fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11789fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1790movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a117911792mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc41793mov.w &0xe001,2+FP_SRC(%a6)17941795frestore FP_SRC(%a6) # restore EXOP17961797unlk %a6179817991800bra.l _real_inex18011802#########################################################################1803#########################################################################1804fu_in_pack:180518061807# I'm not sure at this point what FPSR bits are valid for this instruction.1808# so, since the emulation routines re-create them anyways, zero exception field1809andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field18101811fmov.l &0x0,%fpcr # zero current control regs1812fmov.l &0x0,%fpsr18131814bsr.l get_packed # fetch packed src operand18151816lea FP_SRC(%a6),%a0 # pass ptr to src1817bsr.l set_tag_x # set src optype tag18181819mov.b %d0,STAG(%a6) # save src optype tag18201821bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg18221823# bit five of the fp extension word separates the monadic and dyadic operations1824# at this point1825btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?1826beq.b fu_extract_p # monadic1827cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?1828beq.b fu_extract_p # yes, so it's monadic, too18291830bsr.l load_fpn2 # load dst into FP_DST18311832lea FP_DST(%a6),%a0 # pass: ptr to dst op1833bsr.l set_tag_x # tag the operand type1834cmpi.b %d0,&UNNORM # is operand an UNNORM?1835bne.b fu_op2_done_p # no1836bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO1837fu_op2_done_p:1838mov.b %d0,DTAG(%a6) # save dst optype tag18391840fu_extract_p:1841clr.l %d01842mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec18431844bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension18451846lea FP_SRC(%a6),%a01847lea FP_DST(%a6),%a118481849mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr1850jsr (tbl_unsupp.l,%pc,%d1.l*1)18511852#1853# Exceptions in order of precedence:1854# BSUN : none1855# SNAN : all dyadic ops1856# OPERR : fsqrt(-NORM)1857# OVFL : all except ftst,fcmp1858# UNFL : all except ftst,fcmp1859# DZ : fdiv1860# INEX2 : all except ftst,fcmp1861# INEX1 : all1862#18631864# we determine the highest priority exception(if any) set by the1865# emulation routine that has also been enabled by the user.1866mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled1867bne.w fu_in_ena_p # some are enabled18681869fu_in_cont_p:1870# fcmp and ftst do not store any result.1871mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension1872andi.b &0x38,%d0 # extract bits 3-51873cmpi.b %d0,&0x38 # is instr fcmp or ftst?1874beq.b fu_in_exit_p # yes18751876bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg1877bsr.l store_fpreg # store the result18781879fu_in_exit_p:18801881btst &0x5,EXC_SR(%a6) # user or supervisor?1882bne.w fu_in_exit_s_p # supervisor18831884mov.l EXC_A7(%a6),%a0 # update user a71885mov.l %a0,%usp18861887fu_in_exit_cont_p:1888fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11889fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1890movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a118911892unlk %a6 # unravel stack frame18931894btst &0x7,(%sp) # is trace on?1895bne.w fu_trace_p # yes18961897bra.l _fpsp_done # exit to os18981899# the exception occurred in supervisor mode. check to see if the1900# addressing mode was (a7)+. if so, we'll need to shift the1901# stack frame "up".1902fu_in_exit_s_p:1903btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+1904beq.b fu_in_exit_cont_p # no19051906fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11907fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1908movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a119091910unlk %a6 # unravel stack frame19111912# shift the stack frame "up". we don't really care about the <ea> field.1913mov.l 0x4(%sp),0x10(%sp)1914mov.l 0x0(%sp),0xc(%sp)1915add.l &0xc,%sp19161917btst &0x7,(%sp) # is trace on?1918bne.w fu_trace_p # yes19191920bra.l _fpsp_done # exit to os19211922fu_in_ena_p:1923and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set1924bfffo %d0{&24:&8},%d0 # find highest priority exception1925bne.b fu_in_exc_p # at least one was set19261927#1928# No exceptions occurred that were also enabled. Now:1929#1930# if (OVFL && ovfl_disabled && inexact_enabled) {1931# branch to _real_inex() (even if the result was exact!);1932# } else {1933# save the result in the proper fp reg (unless the op is fcmp or ftst);1934# return;1935# }1936#1937btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?1938beq.w fu_in_cont_p # no19391940fu_in_ovflchk_p:1941btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?1942beq.w fu_in_cont_p # no1943bra.w fu_in_exc_ovfl_p # do _real_inex() now19441945#1946# An exception occurred and that exception was enabled:1947#1948# shift enabled exception field into lo byte of d0;1949# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||1950# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {1951# /*1952# * this is the case where we must call _real_inex() now or else1953# * there will be no other way to pass it the exceptional operand1954# */1955# call _real_inex();1956# } else {1957# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;1958# }1959#1960fu_in_exc_p:1961subi.l &24,%d0 # fix offset to be 0-81962cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)1963blt.b fu_in_exc_exit_p # no19641965# the enabled exception was inexact1966btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?1967bne.w fu_in_exc_unfl_p # yes1968btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?1969bne.w fu_in_exc_ovfl_p # yes19701971# here, we insert the correct fsave status value into the fsave frame for the1972# corresponding exception. the operand in the fsave frame should be the original1973# src operand.1974# as a reminder for future predicted pain and agony, we are passing in fsave the1975# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.1976# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!1977fu_in_exc_exit_p:1978btst &0x5,EXC_SR(%a6) # user or supervisor?1979bne.w fu_in_exc_exit_s_p # supervisor19801981mov.l EXC_A7(%a6),%a0 # update user a71982mov.l %a0,%usp19831984fu_in_exc_exit_cont_p:1985mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)19861987fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11988fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1989movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a119901991frestore FP_SRC(%a6) # restore src op19921993unlk %a619941995btst &0x7,(%sp) # is trace enabled?1996bne.w fu_trace_p # yes19971998bra.l _fpsp_done19992000tbl_except_p:2001short 0xe000,0xe006,0xe004,0xe0052002short 0xe003,0xe002,0xe001,0xe00120032004fu_in_exc_ovfl_p:2005mov.w &0x3,%d02006bra.w fu_in_exc_exit_p20072008fu_in_exc_unfl_p:2009mov.w &0x4,%d02010bra.w fu_in_exc_exit_p20112012fu_in_exc_exit_s_p:2013btst &mia7_bit,SPCOND_FLG(%a6)2014beq.b fu_in_exc_exit_cont_p20152016mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)20172018fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12019fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2020movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a120212022frestore FP_SRC(%a6) # restore src op20232024unlk %a6 # unravel stack frame20252026# shift stack frame "up". who cares about <ea> field.2027mov.l 0x4(%sp),0x10(%sp)2028mov.l 0x0(%sp),0xc(%sp)2029add.l &0xc,%sp20302031btst &0x7,(%sp) # is trace on?2032bne.b fu_trace_p # yes20332034bra.l _fpsp_done # exit to os20352036#2037# The opclass two PACKED instruction that took an "Unimplemented Data Type"2038# exception was being traced. Make the "current" PC the FPIAR and put it in the2039# trace stack frame then jump to _real_trace().2040#2041# UNSUPP FRAME TRACE FRAME2042# ***************** *****************2043# * EA * * Current *2044# * * * PC *2045# ***************** *****************2046# * 0x2 * 0x0dc * * 0x2 * 0x024 *2047# ***************** *****************2048# * Next * * Next *2049# * PC * * PC *2050# ***************** *****************2051# * SR * * SR *2052# ***************** *****************2053fu_trace_p:2054mov.w &0x2024,0x6(%sp)2055fmov.l %fpiar,0x8(%sp)20562057bra.l _real_trace20582059#########################################################2060#########################################################2061fu_out_pack:206220632064# I'm not sure at this point what FPSR bits are valid for this instruction.2065# so, since the emulation routines re-create them anyways, zero exception field.2066# fmove out doesn't affect ccodes.2067and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field20682069fmov.l &0x0,%fpcr # zero current control regs2070fmov.l &0x0,%fpsr20712072bfextu EXC_CMDREG(%a6){&6:&3},%d02073bsr.l load_fpn120742075# unlike other opclass 3, unimplemented data type exceptions, packed must be2076# able to detect all operand types.2077lea FP_SRC(%a6),%a02078bsr.l set_tag_x # tag the operand type2079cmpi.b %d0,&UNNORM # is operand an UNNORM?2080bne.b fu_op2_p # no2081bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO20822083fu_op2_p:2084mov.b %d0,STAG(%a6) # save src optype tag20852086clr.l %d02087mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec20882089lea FP_SRC(%a6),%a0 # pass ptr to src operand20902091mov.l (%a6),EXC_A6(%a6) # in case a6 changes2092bsr.l fout # call fmove out routine20932094# Exceptions in order of precedence:2095# BSUN : no2096# SNAN : yes2097# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))2098# OVFL : no2099# UNFL : no2100# DZ : no2101# INEX2 : yes2102# INEX1 : no21032104# determine the highest priority exception(if any) set by the2105# emulation routine that has also been enabled by the user.2106mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled2107bne.w fu_out_ena_p # some are enabled21082109fu_out_exit_p:2110mov.l EXC_A6(%a6),(%a6) # restore a621112112btst &0x5,EXC_SR(%a6) # user or supervisor?2113bne.b fu_out_exit_s_p # supervisor21142115mov.l EXC_A7(%a6),%a0 # update user a72116mov.l %a0,%usp21172118fu_out_exit_cont_p:2119fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12120fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2121movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a121222123unlk %a6 # unravel stack frame21242125btst &0x7,(%sp) # is trace on?2126bne.w fu_trace_p # yes21272128bra.l _fpsp_done # exit to os21292130# the exception occurred in supervisor mode. check to see if the2131# addressing mode was -(a7). if so, we'll need to shift the2132# stack frame "down".2133fu_out_exit_s_p:2134btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)2135beq.b fu_out_exit_cont_p # no21362137fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12138fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2139movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a121402141mov.l (%a6),%a6 # restore frame pointer21422143mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)2144mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)21452146# now, copy the result to the proper place on the stack2147mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)2148mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)2149mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)21502151add.l &LOCAL_SIZE-0x8,%sp21522153btst &0x7,(%sp)2154bne.w fu_trace_p21552156bra.l _fpsp_done21572158fu_out_ena_p:2159and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled2160bfffo %d0{&24:&8},%d0 # find highest priority exception2161beq.w fu_out_exit_p21622163mov.l EXC_A6(%a6),(%a6) # restore a621642165# an exception occurred and that exception was enabled.2166# the only exception possible on packed move out are INEX, OPERR, and SNAN.2167fu_out_exc_p:2168cmpi.b %d0,&0x1a2169bgt.w fu_inex_p22170beq.w fu_operr_p21712172fu_snan_p:2173btst &0x5,EXC_SR(%a6)2174bne.b fu_snan_s_p21752176mov.l EXC_A7(%a6),%a02177mov.l %a0,%usp2178bra.w fu_snan21792180fu_snan_s_p:2181cmpi.b SPCOND_FLG(%a6),&mda7_flg2182bne.w fu_snan21832184# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.2185# the strategy is to move the exception frame "down" 12 bytes. then, we2186# can store the default result where the exception frame was.2187fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12188fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2189movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a121902191mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd02192mov.w &0xe006,2+FP_SRC(%a6) # set fsave status21932194frestore FP_SRC(%a6) # restore src operand21952196mov.l (%a6),%a6 # restore frame pointer21972198mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)2199mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)2200mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)22012202# now, we copy the default result to its proper location2203mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)2204mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)2205mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)22062207add.l &LOCAL_SIZE-0x8,%sp220822092210bra.l _real_snan22112212fu_operr_p:2213btst &0x5,EXC_SR(%a6)2214bne.w fu_operr_p_s22152216mov.l EXC_A7(%a6),%a02217mov.l %a0,%usp2218bra.w fu_operr22192220fu_operr_p_s:2221cmpi.b SPCOND_FLG(%a6),&mda7_flg2222bne.w fu_operr22232224# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.2225# the strategy is to move the exception frame "down" 12 bytes. then, we2226# can store the default result where the exception frame was.2227fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12228fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2229movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a122302231mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd02232mov.w &0xe004,2+FP_SRC(%a6) # set fsave status22332234frestore FP_SRC(%a6) # restore src operand22352236mov.l (%a6),%a6 # restore frame pointer22372238mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)2239mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)2240mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)22412242# now, we copy the default result to its proper location2243mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)2244mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)2245mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)22462247add.l &LOCAL_SIZE-0x8,%sp224822492250bra.l _real_operr22512252fu_inex_p2:2253btst &0x5,EXC_SR(%a6)2254bne.w fu_inex_s_p222552256mov.l EXC_A7(%a6),%a02257mov.l %a0,%usp2258bra.w fu_inex22592260fu_inex_s_p2:2261cmpi.b SPCOND_FLG(%a6),&mda7_flg2262bne.w fu_inex22632264# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.2265# the strategy is to move the exception frame "down" 12 bytes. then, we2266# can store the default result where the exception frame was.2267fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12268fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2269movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a122702271mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc42272mov.w &0xe001,2+FP_SRC(%a6) # set fsave status22732274frestore FP_SRC(%a6) # restore src operand22752276mov.l (%a6),%a6 # restore frame pointer22772278mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)2279mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)2280mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)22812282# now, we copy the default result to its proper location2283mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)2284mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)2285mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)22862287add.l &LOCAL_SIZE-0x8,%sp228822892290bra.l _real_inex22912292#########################################################################22932294#2295# if we're stuffing a source operand back into an fsave frame then we2296# have to make sure that for single or double source operands that the2297# format stuffed is as weird as the hardware usually makes it.2298#2299global funimp_skew2300funimp_skew:2301bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier2302cmpi.b %d0,&0x1 # was src sgl?2303beq.b funimp_skew_sgl # yes2304cmpi.b %d0,&0x5 # was src dbl?2305beq.b funimp_skew_dbl # yes2306rts23072308funimp_skew_sgl:2309mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent2310andi.w &0x7fff,%d0 # strip sign2311beq.b funimp_skew_sgl_not2312cmpi.w %d0,&0x3f802313bgt.b funimp_skew_sgl_not2314neg.w %d0 # make exponent negative2315addi.w &0x3f81,%d0 # find amt to shift2316mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)2317lsr.l %d0,%d1 # shift it2318bset &31,%d1 # set j-bit2319mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)2320andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent2321ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent2322funimp_skew_sgl_not:2323rts23242325funimp_skew_dbl:2326mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent2327andi.w &0x7fff,%d0 # strip sign2328beq.b funimp_skew_dbl_not2329cmpi.w %d0,&0x3c002330bgt.b funimp_skew_dbl_not23312332tst.b FP_SRC_EX(%a6) # make "internal format"2333smi.b 0x2+FP_SRC(%a6)2334mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign2335clr.l %d0 # clear g,r,s2336lea FP_SRC(%a6),%a0 # pass ptr to src op2337mov.w &0x3c01,%d1 # pass denorm threshold2338bsr.l dnrm_lp # denorm it2339mov.w &0x3c00,%d0 # new exponent2340tst.b 0x2+FP_SRC(%a6) # is sign set?2341beq.b fss_dbl_denorm_done # no2342bset &15,%d0 # set sign2343fss_dbl_denorm_done:2344bset &0x7,FP_SRC_HI(%a6) # set j-bit2345mov.w %d0,FP_SRC_EX(%a6) # insert new exponent2346funimp_skew_dbl_not:2347rts23482349#########################################################################2350global _mem_write22351_mem_write2:2352btst &0x5,EXC_SR(%a6)2353beq.l _dmem_write2354mov.l 0x0(%a0),FP_DST_EX(%a6)2355mov.l 0x4(%a0),FP_DST_HI(%a6)2356mov.l 0x8(%a0),FP_DST_LO(%a6)2357clr.l %d12358rts23592360#########################################################################2361# XDEF **************************************************************** #2362# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #2363# effective address" exception. #2364# #2365# This handler should be the first code executed upon taking the #2366# FP Unimplemented Effective Address exception in an operating #2367# system. #2368# #2369# XREF **************************************************************** #2370# _imem_read_long() - read instruction longword #2371# fix_skewed_ops() - adjust src operand in fsave frame #2372# set_tag_x() - determine optype of src/dst operands #2373# store_fpreg() - store opclass 0 or 2 result to FP regfile #2374# unnorm_fix() - change UNNORM operands to NORM or ZERO #2375# load_fpn2() - load dst operand from FP regfile #2376# tbl_unsupp - add of table of emulation routines for opclass 0,2 #2377# decbin() - convert packed data to FP binary data #2378# _real_fpu_disabled() - "callout" for "FPU disabled" exception #2379# _real_access() - "callout" for access error exception #2380# _mem_read() - read extended immediate operand from memory #2381# _fpsp_done() - "callout" for exit; work all done #2382# _real_trace() - "callout" for Trace enabled exception #2383# fmovm_dynamic() - emulate dynamic fmovm instruction #2384# fmovm_ctrl() - emulate fmovm control instruction #2385# #2386# INPUT *************************************************************** #2387# - The system stack contains the "Unimplemented <ea>" stk frame #2388# #2389# OUTPUT ************************************************************** #2390# If access error: #2391# - The system stack is changed to an access error stack frame #2392# If FPU disabled: #2393# - The system stack is changed to an FPU disabled stack frame #2394# If Trace exception enabled: #2395# - The system stack is changed to a Trace exception stack frame #2396# Else: (normal case) #2397# - None (correct result has been stored as appropriate) #2398# #2399# ALGORITHM *********************************************************** #2400# This exception handles 3 types of operations: #2401# (1) FP Instructions using extended precision or packed immediate #2402# addressing mode. #2403# (2) The "fmovm.x" instruction w/ dynamic register specification. #2404# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #2405# #2406# For immediate data operations, the data is read in w/ a #2407# _mem_read() "callout", converted to FP binary (if packed), and used #2408# as the source operand to the instruction specified by the instruction #2409# word. If no FP exception should be reported ads a result of the #2410# emulation, then the result is stored to the destination register and #2411# the handler exits through _fpsp_done(). If an enabled exc has been #2412# signalled as a result of emulation, then an fsave state frame #2413# corresponding to the FP exception type must be entered into the 060 #2414# FPU before exiting. In either the enabled or disabled cases, we #2415# must also check if a Trace exception is pending, in which case, we #2416# must create a Trace exception stack frame from the current exception #2417# stack frame. If no Trace is pending, we simply exit through #2418# _fpsp_done(). #2419# For "fmovm.x", call the routine fmovm_dynamic() which will #2420# decode and emulate the instruction. No FP exceptions can be pending #2421# as a result of this operation emulation. A Trace exception can be #2422# pending, though, which means the current stack frame must be changed #2423# to a Trace stack frame and an exit made through _real_trace(). #2424# For the case of "fmovm.x Dn,-(a7)", where the offending instruction #2425# was executed from supervisor mode, this handler must store the FP #2426# register file values to the system stack by itself since #2427# fmovm_dynamic() can't handle this. A normal exit is made through #2428# fpsp_done(). #2429# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #2430# Again, a Trace exception may be pending and an exit made through #2431# _real_trace(). Else, a normal exit is made through _fpsp_done(). #2432# #2433# Before any of the above is attempted, it must be checked to #2434# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #2435# before the "FPU disabled" exception, but the "FPU disabled" exception #2436# has higher priority, we check the disabled bit in the PCR. If set, #2437# then we must create an 8 word "FPU disabled" exception stack frame #2438# from the current 4 word exception stack frame. This includes #2439# reproducing the effective address of the instruction to put on the #2440# new stack frame. #2441# #2442# In the process of all emulation work, if a _mem_read() #2443# "callout" returns a failing result indicating an access error, then #2444# we must create an access error stack frame from the current stack #2445# frame. This information includes a faulting address and a fault- #2446# status-longword. These are created within this handler. #2447# #2448#########################################################################24492450global _fpsp_effadd2451_fpsp_effadd:24522453# This exception type takes priority over the "Line F Emulator"2454# exception. Therefore, the FPU could be disabled when entering here.2455# So, we must check to see if it's disabled and handle that case separately.2456mov.l %d0,-(%sp) # save d02457movc %pcr,%d0 # load proc cr2458btst &0x1,%d0 # is FPU disabled?2459bne.w iea_disabled # yes2460mov.l (%sp)+,%d0 # restore d024612462link %a6,&-LOCAL_SIZE # init stack frame24632464movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a12465fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs2466fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack24672468# PC of instruction that took the exception is the PC in the frame2469mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)24702471mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr2472addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr2473bsr.l _imem_read_long # fetch the instruction words2474mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD24752476#########################################################################24772478tst.w %d0 # is operation fmovem?2479bmi.w iea_fmovm # yes24802481#2482# here, we will have:2483# fabs fdabs fsabs facos fmod2484# fadd fdadd fsadd fasin frem2485# fcmp fatan fscale2486# fdiv fddiv fsdiv fatanh fsin2487# fint fcos fsincos2488# fintrz fcosh fsinh2489# fmove fdmove fsmove fetox ftan2490# fmul fdmul fsmul fetoxm1 ftanh2491# fneg fdneg fsneg fgetexp ftentox2492# fsgldiv fgetman ftwotox2493# fsglmul flog102494# fsqrt flog22495# fsub fdsub fssub flogn2496# ftst flognp12497# which can all use f<op>.{x,p}2498# so, now it's immediate data extended precision AND PACKED FORMAT!2499#2500iea_op:2501andi.l &0x00ff00ff,USER_FPSR(%a6)25022503btst &0xa,%d0 # is src fmt x or p?2504bne.b iea_op_pack # packed250525062507mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>2508lea FP_SRC(%a6),%a1 # pass: ptr to super addr2509mov.l &0xc,%d0 # pass: 12 bytes2510bsr.l _imem_read # read extended immediate25112512tst.l %d1 # did ifetch fail?2513bne.w iea_iacc # yes25142515bra.b iea_op_setsrc25162517iea_op_pack:25182519mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>2520lea FP_SRC(%a6),%a1 # pass: ptr to super dst2521mov.l &0xc,%d0 # pass: 12 bytes2522bsr.l _imem_read # read packed operand25232524tst.l %d1 # did ifetch fail?2525bne.w iea_iacc # yes25262527# The packed operand is an INF or a NAN if the exponent field is all ones.2528bfextu FP_SRC(%a6){&1:&15},%d0 # get exp2529cmpi.w %d0,&0x7fff # INF or NAN?2530beq.b iea_op_setsrc # operand is an INF or NAN25312532# The packed operand is a zero if the mantissa is all zero, else it's2533# a normal packed op.2534mov.b 3+FP_SRC(%a6),%d0 # get byte 42535andi.b &0x0f,%d0 # clear all but last nybble2536bne.b iea_op_gp_not_spec # not a zero2537tst.l FP_SRC_HI(%a6) # is lw 2 zero?2538bne.b iea_op_gp_not_spec # not a zero2539tst.l FP_SRC_LO(%a6) # is lw 3 zero?2540beq.b iea_op_setsrc # operand is a ZERO2541iea_op_gp_not_spec:2542lea FP_SRC(%a6),%a0 # pass: ptr to packed op2543bsr.l decbin # convert to extended2544fmovm.x &0x80,FP_SRC(%a6) # make this the srcop25452546iea_op_setsrc:2547addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer25482549# FP_SRC now holds the src operand.2550lea FP_SRC(%a6),%a0 # pass: ptr to src op2551bsr.l set_tag_x # tag the operand type2552mov.b %d0,STAG(%a6) # could be ANYTHING!!!2553cmpi.b %d0,&UNNORM # is operand an UNNORM?2554bne.b iea_op_getdst # no2555bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO2556mov.b %d0,STAG(%a6) # set new optype tag2557iea_op_getdst:2558clr.b STORE_FLG(%a6) # clear "store result" boolean25592560btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?2561beq.b iea_op_extract # monadic2562btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?2563bne.b iea_op_spec # yes25642565iea_op_loaddst:2566bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno2567bsr.l load_fpn2 # load dst operand25682569lea FP_DST(%a6),%a0 # pass: ptr to dst op2570bsr.l set_tag_x # tag the operand type2571mov.b %d0,DTAG(%a6) # could be ANYTHING!!!2572cmpi.b %d0,&UNNORM # is operand an UNNORM?2573bne.b iea_op_extract # no2574bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO2575mov.b %d0,DTAG(%a6) # set new optype tag2576bra.b iea_op_extract25772578# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic2579iea_op_spec:2580btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?2581beq.b iea_op_extract # yes2582# now, we're left with ftst and fcmp. so, first let's tag them so that they don't2583# store a result. then, only fcmp will branch back and pick up a dst operand.2584st STORE_FLG(%a6) # don't store a final result2585btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?2586beq.b iea_op_loaddst # yes25872588iea_op_extract:2589clr.l %d02590mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec25912592mov.b 1+EXC_CMDREG(%a6),%d12593andi.w &0x007f,%d1 # extract extension25942595fmov.l &0x0,%fpcr2596fmov.l &0x0,%fpsr25972598lea FP_SRC(%a6),%a02599lea FP_DST(%a6),%a126002601mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr2602jsr (tbl_unsupp.l,%pc,%d1.l*1)26032604#2605# Exceptions in order of precedence:2606# BSUN : none2607# SNAN : all operations2608# OPERR : all reg-reg or mem-reg operations that can normally operr2609# OVFL : same as OPERR2610# UNFL : same as OPERR2611# DZ : same as OPERR2612# INEX2 : same as OPERR2613# INEX1 : all packed immediate operations2614#26152616# we determine the highest priority exception(if any) set by the2617# emulation routine that has also been enabled by the user.2618mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled2619bne.b iea_op_ena # some are enabled26202621# now, we save the result, unless, of course, the operation was ftst or fcmp.2622# these don't save results.2623iea_op_save:2624tst.b STORE_FLG(%a6) # does this op store a result?2625bne.b iea_op_exit1 # exit with no frestore26262627iea_op_store:2628bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno2629bsr.l store_fpreg # store the result26302631iea_op_exit1:2632mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"2633mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame26342635fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp12636fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2637movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a126382639unlk %a6 # unravel the frame26402641btst &0x7,(%sp) # is trace on?2642bne.w iea_op_trace # yes26432644bra.l _fpsp_done # exit to os26452646iea_op_ena:2647and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set2648bfffo %d0{&24:&8},%d0 # find highest priority exception2649bne.b iea_op_exc # at least one was set26502651# no exception occurred. now, did a disabled, exact overflow occur with inexact2652# enabled? if so, then we have to stuff an overflow frame into the FPU.2653btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?2654beq.b iea_op_save26552656iea_op_ovfl:2657btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?2658beq.b iea_op_store # no2659bra.b iea_op_exc_ovfl # yes26602661# an enabled exception occurred. we have to insert the exception type back into2662# the machine.2663iea_op_exc:2664subi.l &24,%d0 # fix offset to be 0-82665cmpi.b %d0,&0x6 # is exception INEX?2666bne.b iea_op_exc_force # no26672668# the enabled exception was inexact. so, if it occurs with an overflow2669# or underflow that was disabled, then we have to force an overflow or2670# underflow frame.2671btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?2672bne.b iea_op_exc_ovfl # yes2673btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?2674bne.b iea_op_exc_unfl # yes26752676iea_op_exc_force:2677mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)2678bra.b iea_op_exit2 # exit with frestore26792680tbl_iea_except:2681short 0xe002, 0xe006, 0xe004, 0xe0052682short 0xe003, 0xe002, 0xe001, 0xe00126832684iea_op_exc_ovfl:2685mov.w &0xe005,2+FP_SRC(%a6)2686bra.b iea_op_exit226872688iea_op_exc_unfl:2689mov.w &0xe003,2+FP_SRC(%a6)26902691iea_op_exit2:2692mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"2693mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame26942695fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp12696fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2697movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a126982699frestore FP_SRC(%a6) # restore exceptional state27002701unlk %a6 # unravel the frame27022703btst &0x7,(%sp) # is trace on?2704bne.b iea_op_trace # yes27052706bra.l _fpsp_done # exit to os27072708#2709# The opclass two instruction that took an "Unimplemented Effective Address"2710# exception was being traced. Make the "current" PC the FPIAR and put it in2711# the trace stack frame then jump to _real_trace().2712#2713# UNIMP EA FRAME TRACE FRAME2714# ***************** *****************2715# * 0x0 * 0x0f0 * * Current *2716# ***************** * PC *2717# * Current * *****************2718# * PC * * 0x2 * 0x024 *2719# ***************** *****************2720# * SR * * Next *2721# ***************** * PC *2722# *****************2723# * SR *2724# *****************2725iea_op_trace:2726mov.l (%sp),-(%sp) # shift stack frame "down"2727mov.w 0x8(%sp),0x4(%sp)2728mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x0242729fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR27302731bra.l _real_trace27322733#########################################################################2734iea_fmovm:2735btst &14,%d0 # ctrl or data reg2736beq.w iea_fmovm_ctrl27372738iea_fmovm_data:27392740btst &0x5,EXC_SR(%a6) # user or supervisor mode2741bne.b iea_fmovm_data_s27422743iea_fmovm_data_u:2744mov.l %usp,%a02745mov.l %a0,EXC_A7(%a6) # store current a72746bsr.l fmovm_dynamic # do dynamic fmovm2747mov.l EXC_A7(%a6),%a0 # load possibly new a72748mov.l %a0,%usp # update usp2749bra.w iea_fmovm_exit27502751iea_fmovm_data_s:2752clr.b SPCOND_FLG(%a6)2753lea 0x2+EXC_VOFF(%a6),%a02754mov.l %a0,EXC_A7(%a6)2755bsr.l fmovm_dynamic # do dynamic fmovm27562757cmpi.b SPCOND_FLG(%a6),&mda7_flg2758beq.w iea_fmovm_data_predec2759cmpi.b SPCOND_FLG(%a6),&mia7_flg2760bne.w iea_fmovm_exit27612762# right now, d0 = the size.2763# the data has been fetched from the supervisor stack, but we have not2764# incremented the stack pointer by the appropriate number of bytes.2765# do it here.2766iea_fmovm_data_postinc:2767btst &0x7,EXC_SR(%a6)2768bne.b iea_fmovm_data_pi_trace27692770mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)2771mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)2772mov.w &0x00f0,(EXC_VOFF,%a6,%d0)27732774lea (EXC_SR,%a6,%d0),%a02775mov.l %a0,EXC_SR(%a6)27762777fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp12778fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2779movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a127802781unlk %a62782mov.l (%sp)+,%sp2783bra.l _fpsp_done27842785iea_fmovm_data_pi_trace:2786mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)2787mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)2788mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)2789mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)27902791lea (EXC_SR-0x4,%a6,%d0),%a02792mov.l %a0,EXC_SR(%a6)27932794fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp12795fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2796movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a127972798unlk %a62799mov.l (%sp)+,%sp2800bra.l _real_trace28012802# right now, d1 = size and d0 = the strg.2803iea_fmovm_data_predec:2804mov.b %d1,EXC_VOFF(%a6) # store strg2805mov.b %d0,0x1+EXC_VOFF(%a6) # store size28062807fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp12808fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2809movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a128102811mov.l (%a6),-(%sp) # make a copy of a62812mov.l %d0,-(%sp) # save d02813mov.l %d1,-(%sp) # save d12814mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC28152816clr.l %d02817mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size2818neg.l %d0 # get negative of size28192820btst &0x7,EXC_SR(%a6) # is trace enabled?2821beq.b iea_fmovm_data_p228222823mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)2824mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)2825mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)2826mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)28272828pea (%a6,%d0) # create final sp2829bra.b iea_fmovm_data_p328302831iea_fmovm_data_p2:2832mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)2833mov.l (%sp)+,(EXC_PC,%a6,%d0)2834mov.w &0x00f0,(EXC_VOFF,%a6,%d0)28352836pea (0x4,%a6,%d0) # create final sp28372838iea_fmovm_data_p3:2839clr.l %d12840mov.b EXC_VOFF(%a6),%d1 # fetch strg28412842tst.b %d12843bpl.b fm_12844fmovm.x &0x80,(0x4+0x8,%a6,%d0)2845addi.l &0xc,%d02846fm_1:2847lsl.b &0x1,%d12848bpl.b fm_22849fmovm.x &0x40,(0x4+0x8,%a6,%d0)2850addi.l &0xc,%d02851fm_2:2852lsl.b &0x1,%d12853bpl.b fm_32854fmovm.x &0x20,(0x4+0x8,%a6,%d0)2855addi.l &0xc,%d02856fm_3:2857lsl.b &0x1,%d12858bpl.b fm_42859fmovm.x &0x10,(0x4+0x8,%a6,%d0)2860addi.l &0xc,%d02861fm_4:2862lsl.b &0x1,%d12863bpl.b fm_52864fmovm.x &0x08,(0x4+0x8,%a6,%d0)2865addi.l &0xc,%d02866fm_5:2867lsl.b &0x1,%d12868bpl.b fm_62869fmovm.x &0x04,(0x4+0x8,%a6,%d0)2870addi.l &0xc,%d02871fm_6:2872lsl.b &0x1,%d12873bpl.b fm_72874fmovm.x &0x02,(0x4+0x8,%a6,%d0)2875addi.l &0xc,%d02876fm_7:2877lsl.b &0x1,%d12878bpl.b fm_end2879fmovm.x &0x01,(0x4+0x8,%a6,%d0)2880fm_end:2881mov.l 0x4(%sp),%d12882mov.l 0x8(%sp),%d02883mov.l 0xc(%sp),%a62884mov.l (%sp)+,%sp28852886btst &0x7,(%sp) # is trace enabled?2887beq.l _fpsp_done2888bra.l _real_trace28892890#########################################################################2891iea_fmovm_ctrl:28922893bsr.l fmovm_ctrl # load ctrl regs28942895iea_fmovm_exit:2896fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp12897fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2898movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a128992900btst &0x7,EXC_SR(%a6) # is trace on?2901bne.b iea_fmovm_trace # yes29022903mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC29042905unlk %a6 # unravel the frame29062907bra.l _fpsp_done # exit to os29082909#2910# The control reg instruction that took an "Unimplemented Effective Address"2911# exception was being traced. The "Current PC" for the trace frame is the2912# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.2913# After fixing the stack frame, jump to _real_trace().2914#2915# UNIMP EA FRAME TRACE FRAME2916# ***************** *****************2917# * 0x0 * 0x0f0 * * Current *2918# ***************** * PC *2919# * Current * *****************2920# * PC * * 0x2 * 0x024 *2921# ***************** *****************2922# * SR * * Next *2923# ***************** * PC *2924# *****************2925# * SR *2926# *****************2927# this ain't a pretty solution, but it works:2928# -restore a6 (not with unlk)2929# -shift stack frame down over where old a6 used to be2930# -add LOCAL_SIZE to stack pointer2931iea_fmovm_trace:2932mov.l (%a6),%a6 # restore frame pointer2933mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)2934mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)2935mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)2936mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x0242937add.l &LOCAL_SIZE,%sp # clear stack frame29382939bra.l _real_trace29402941#########################################################################2942# The FPU is disabled and so we should really have taken the "Line2943# F Emulator" exception. So, here we create an 8-word stack frame2944# from our 4-word stack frame. This means we must calculate the length2945# the faulting instruction to get the "next PC". This is trivial for2946# immediate operands but requires some extra work for fmovm dynamic2947# which can use most addressing modes.2948iea_disabled:2949mov.l (%sp)+,%d0 # restore d029502951link %a6,&-LOCAL_SIZE # init stack frame29522953movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a129542955# PC of instruction that took the exception is the PC in the frame2956mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)2957mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr2958addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr2959bsr.l _imem_read_long # fetch the instruction words2960mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD29612962tst.w %d0 # is instr fmovm?2963bmi.b iea_dis_fmovm # yes2964# instruction is using an extended precision immediate operand. Therefore,2965# the total instruction length is 16 bytes.2966iea_dis_immed:2967mov.l &0x10,%d0 # 16 bytes of instruction2968bra.b iea_dis_cont2969iea_dis_fmovm:2970btst &0xe,%d0 # is instr fmovm ctrl2971bne.b iea_dis_fmovm_data # no2972# the instruction is a fmovm.l with 2 or 3 registers.2973bfextu %d0{&19:&3},%d12974mov.l &0xc,%d02975cmpi.b %d1,&0x7 # move all regs?2976bne.b iea_dis_cont2977addq.l &0x4,%d02978bra.b iea_dis_cont2979# the instruction is an fmovm.x dynamic which can use many addressing2980# modes and thus can have several different total instruction lengths.2981# call fmovm_calc_ea which will go through the ea calc process and,2982# as a by-product, will tell us how long the instruction is.2983iea_dis_fmovm_data:2984clr.l %d02985bsr.l fmovm_calc_ea2986mov.l EXC_EXTWPTR(%a6),%d02987sub.l EXC_PC(%a6),%d02988iea_dis_cont:2989mov.w %d0,EXC_VOFF(%a6) # store stack shift value29902991movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a129922993unlk %a629942995# here, we actually create the 8-word frame from the 4-word frame,2996# with the "next PC" as additional info.2997# the <ea> field is let as undefined.2998subq.l &0x8,%sp # make room for new stack2999mov.l %d0,-(%sp) # save d03000mov.w 0xc(%sp),0x4(%sp) # move SR3001mov.l 0xe(%sp),0x6(%sp) # move Current PC3002clr.l %d03003mov.w 0x12(%sp),%d03004mov.l 0x6(%sp),0x10(%sp) # move Current PC3005add.l %d0,0x6(%sp) # make Next PC3006mov.w &0x402c,0xa(%sp) # insert offset,frame format3007mov.l (%sp)+,%d0 # restore d030083009bra.l _real_fpu_disabled30103011##########30123013iea_iacc:3014movc %pcr,%d03015btst &0x1,%d03016bne.b iea_iacc_cont3017fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3018fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack3019iea_iacc_cont:3020movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a130213022unlk %a630233024subq.w &0x8,%sp # make stack frame bigger3025mov.l 0x8(%sp),(%sp) # store SR,hi(PC)3026mov.w 0xc(%sp),0x4(%sp) # store lo(PC)3027mov.w &0x4008,0x6(%sp) # store voff3028mov.l 0x2(%sp),0x8(%sp) # store ea3029mov.l &0x09428001,0xc(%sp) # store fslw30303031iea_acc_done:3032btst &0x5,(%sp) # user or supervisor mode?3033beq.b iea_acc_done2 # user3034bset &0x2,0xd(%sp) # set supervisor TM bit30353036iea_acc_done2:3037bra.l _real_access30383039iea_dacc:3040lea -LOCAL_SIZE(%a6),%sp30413042movc %pcr,%d13043btst &0x1,%d13044bne.b iea_dacc_cont3045fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack3046fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs3047iea_dacc_cont:3048mov.l (%a6),%a630493050mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)3051mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)3052mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)3053mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)3054mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)3055mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)30563057movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a13058add.w &LOCAL_SIZE-0x4,%sp30593060bra.b iea_acc_done30613062#########################################################################3063# XDEF **************************************************************** #3064# _fpsp_operr(): 060FPSP entry point for FP Operr exception. #3065# #3066# This handler should be the first code executed upon taking the #3067# FP Operand Error exception in an operating system. #3068# #3069# XREF **************************************************************** #3070# _imem_read_long() - read instruction longword #3071# fix_skewed_ops() - adjust src operand in fsave frame #3072# _real_operr() - "callout" to operating system operr handler #3073# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #3074# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #3075# facc_out_{b,w,l}() - store to memory took access error (opcl 3) #3076# #3077# INPUT *************************************************************** #3078# - The system stack contains the FP Operr exception frame #3079# - The fsave frame contains the source operand #3080# #3081# OUTPUT ************************************************************** #3082# No access error: #3083# - The system stack is unchanged #3084# - The fsave frame contains the adjusted src op for opclass 0,2 #3085# #3086# ALGORITHM *********************************************************** #3087# In a system where the FP Operr exception is enabled, the goal #3088# is to get to the handler specified at _real_operr(). But, on the 060, #3089# for opclass zero and two instruction taking this exception, the #3090# input operand in the fsave frame may be incorrect for some cases #3091# and needs to be corrected. This handler calls fix_skewed_ops() to #3092# do just this and then exits through _real_operr(). #3093# For opclass 3 instructions, the 060 doesn't store the default #3094# operr result out to memory or data register file as it should. #3095# This code must emulate the move out before finally exiting through #3096# _real_inex(). The move out, if to memory, is performed using #3097# _mem_write() "callout" routines that may return a failing result. #3098# In this special case, the handler must exit through facc_out() #3099# which creates an access error stack frame from the current operr #3100# stack frame. #3101# #3102#########################################################################31033104global _fpsp_operr3105_fpsp_operr:31063107link.w %a6,&-LOCAL_SIZE # init stack frame31083109fsave FP_SRC(%a6) # grab the "busy" frame31103111movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a13112fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs3113fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack31143115# the FPIAR holds the "current PC" of the faulting instruction3116mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)31173118mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr3119addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr3120bsr.l _imem_read_long # fetch the instruction words3121mov.l %d0,EXC_OPWORD(%a6)31223123##############################################################################31243125btst &13,%d0 # is instr an fmove out?3126bne.b foperr_out # fmove out312731283129# here, we simply see if the operand in the fsave frame needs to be "unskewed".3130# this would be the case for opclass two operations with a source infinity or3131# denorm operand in the sgl or dbl format. NANs also become skewed, but can't3132# cause an operr so we don't need to check for them here.3133lea FP_SRC(%a6),%a0 # pass: ptr to src op3134bsr.l fix_skewed_ops # fix src op31353136foperr_exit:3137fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13138fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3139movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a131403141frestore FP_SRC(%a6)31423143unlk %a63144bra.l _real_operr31453146########################################################################31473148#3149# the hardware does not save the default result to memory on enabled3150# operand error exceptions. we do this here before passing control to3151# the user operand error handler.3152#3153# byte, word, and long destination format operations can pass3154# through here. we simply need to test the sign of the src3155# operand and save the appropriate minimum or maximum integer value3156# to the effective address as pointed to by the stacked effective address.3157#3158# although packed opclass three operations can take operand error3159# exceptions, they won't pass through here since they are caught3160# first by the unsupported data format exception handler. that handler3161# sends them directly to _real_operr() if necessary.3162#3163foperr_out:31643165mov.w FP_SRC_EX(%a6),%d1 # fetch exponent3166andi.w &0x7fff,%d13167cmpi.w %d1,&0x7fff3168bne.b foperr_out_not_qnan3169# the operand is either an infinity or a QNAN.3170tst.l FP_SRC_LO(%a6)3171bne.b foperr_out_qnan3172mov.l FP_SRC_HI(%a6),%d13173andi.l &0x7fffffff,%d13174beq.b foperr_out_not_qnan3175foperr_out_qnan:3176mov.l FP_SRC_HI(%a6),L_SCR1(%a6)3177bra.b foperr_out_jmp31783179foperr_out_not_qnan:3180mov.l &0x7fffffff,%d13181tst.b FP_SRC_EX(%a6)3182bpl.b foperr_out_not_qnan23183addq.l &0x1,%d13184foperr_out_not_qnan2:3185mov.l %d1,L_SCR1(%a6)31863187foperr_out_jmp:3188bfextu %d0{&19:&3},%d0 # extract dst format field3189mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg3190mov.w (tbl_operr.b,%pc,%d0.w*2),%a03191jmp (tbl_operr.b,%pc,%a0)31923193tbl_operr:3194short foperr_out_l - tbl_operr # long word integer3195short tbl_operr - tbl_operr # sgl prec shouldn't happen3196short tbl_operr - tbl_operr # ext prec shouldn't happen3197short foperr_exit - tbl_operr # packed won't enter here3198short foperr_out_w - tbl_operr # word integer3199short tbl_operr - tbl_operr # dbl prec shouldn't happen3200short foperr_out_b - tbl_operr # byte integer3201short tbl_operr - tbl_operr # packed won't enter here32023203foperr_out_b:3204mov.b L_SCR1(%a6),%d0 # load positive default result3205cmpi.b %d1,&0x7 # is <ea> mode a data reg?3206ble.b foperr_out_b_save_dn # yes3207mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3208bsr.l _dmem_write_byte # write the default result32093210tst.l %d1 # did dstore fail?3211bne.l facc_out_b # yes32123213bra.w foperr_exit3214foperr_out_b_save_dn:3215andi.w &0x0007,%d13216bsr.l store_dreg_b # store result to regfile3217bra.w foperr_exit32183219foperr_out_w:3220mov.w L_SCR1(%a6),%d0 # load positive default result3221cmpi.b %d1,&0x7 # is <ea> mode a data reg?3222ble.b foperr_out_w_save_dn # yes3223mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3224bsr.l _dmem_write_word # write the default result32253226tst.l %d1 # did dstore fail?3227bne.l facc_out_w # yes32283229bra.w foperr_exit3230foperr_out_w_save_dn:3231andi.w &0x0007,%d13232bsr.l store_dreg_w # store result to regfile3233bra.w foperr_exit32343235foperr_out_l:3236mov.l L_SCR1(%a6),%d0 # load positive default result3237cmpi.b %d1,&0x7 # is <ea> mode a data reg?3238ble.b foperr_out_l_save_dn # yes3239mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3240bsr.l _dmem_write_long # write the default result32413242tst.l %d1 # did dstore fail?3243bne.l facc_out_l # yes32443245bra.w foperr_exit3246foperr_out_l_save_dn:3247andi.w &0x0007,%d13248bsr.l store_dreg_l # store result to regfile3249bra.w foperr_exit32503251#########################################################################3252# XDEF **************************************************************** #3253# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #3254# #3255# This handler should be the first code executed upon taking the #3256# FP Signalling NAN exception in an operating system. #3257# #3258# XREF **************************************************************** #3259# _imem_read_long() - read instruction longword #3260# fix_skewed_ops() - adjust src operand in fsave frame #3261# _real_snan() - "callout" to operating system SNAN handler #3262# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #3263# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #3264# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #3265# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #3266# #3267# INPUT *************************************************************** #3268# - The system stack contains the FP SNAN exception frame #3269# - The fsave frame contains the source operand #3270# #3271# OUTPUT ************************************************************** #3272# No access error: #3273# - The system stack is unchanged #3274# - The fsave frame contains the adjusted src op for opclass 0,2 #3275# #3276# ALGORITHM *********************************************************** #3277# In a system where the FP SNAN exception is enabled, the goal #3278# is to get to the handler specified at _real_snan(). But, on the 060, #3279# for opclass zero and two instructions taking this exception, the #3280# input operand in the fsave frame may be incorrect for some cases #3281# and needs to be corrected. This handler calls fix_skewed_ops() to #3282# do just this and then exits through _real_snan(). #3283# For opclass 3 instructions, the 060 doesn't store the default #3284# SNAN result out to memory or data register file as it should. #3285# This code must emulate the move out before finally exiting through #3286# _real_snan(). The move out, if to memory, is performed using #3287# _mem_write() "callout" routines that may return a failing result. #3288# In this special case, the handler must exit through facc_out() #3289# which creates an access error stack frame from the current SNAN #3290# stack frame. #3291# For the case of an extended precision opclass 3 instruction, #3292# if the effective addressing mode was -() or ()+, then the address #3293# register must get updated by calling _calc_ea_fout(). If the <ea> #3294# was -(a7) from supervisor mode, then the exception frame currently #3295# on the system stack must be carefully moved "down" to make room #3296# for the operand being moved. #3297# #3298#########################################################################32993300global _fpsp_snan3301_fpsp_snan:33023303link.w %a6,&-LOCAL_SIZE # init stack frame33043305fsave FP_SRC(%a6) # grab the "busy" frame33063307movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a13308fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs3309fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack33103311# the FPIAR holds the "current PC" of the faulting instruction3312mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)33133314mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr3315addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr3316bsr.l _imem_read_long # fetch the instruction words3317mov.l %d0,EXC_OPWORD(%a6)33183319##############################################################################33203321btst &13,%d0 # is instr an fmove out?3322bne.w fsnan_out # fmove out332333243325# here, we simply see if the operand in the fsave frame needs to be "unskewed".3326# this would be the case for opclass two operations with a source infinity or3327# denorm operand in the sgl or dbl format. NANs also become skewed and must be3328# fixed here.3329lea FP_SRC(%a6),%a0 # pass: ptr to src op3330bsr.l fix_skewed_ops # fix src op33313332fsnan_exit:3333fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13334fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3335movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a133363337frestore FP_SRC(%a6)33383339unlk %a63340bra.l _real_snan33413342########################################################################33433344#3345# the hardware does not save the default result to memory on enabled3346# snan exceptions. we do this here before passing control to3347# the user snan handler.3348#3349# byte, word, long, and packed destination format operations can pass3350# through here. since packed format operations already were handled by3351# fpsp_unsupp(), then we need to do nothing else for them here.3352# for byte, word, and long, we simply need to test the sign of the src3353# operand and save the appropriate minimum or maximum integer value3354# to the effective address as pointed to by the stacked effective address.3355#3356fsnan_out:33573358bfextu %d0{&19:&3},%d0 # extract dst format field3359mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg3360mov.w (tbl_snan.b,%pc,%d0.w*2),%a03361jmp (tbl_snan.b,%pc,%a0)33623363tbl_snan:3364short fsnan_out_l - tbl_snan # long word integer3365short fsnan_out_s - tbl_snan # sgl prec shouldn't happen3366short fsnan_out_x - tbl_snan # ext prec shouldn't happen3367short tbl_snan - tbl_snan # packed needs no help3368short fsnan_out_w - tbl_snan # word integer3369short fsnan_out_d - tbl_snan # dbl prec shouldn't happen3370short fsnan_out_b - tbl_snan # byte integer3371short tbl_snan - tbl_snan # packed needs no help33723373fsnan_out_b:3374mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN3375bset &6,%d0 # set SNAN bit3376cmpi.b %d1,&0x7 # is <ea> mode a data reg?3377ble.b fsnan_out_b_dn # yes3378mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3379bsr.l _dmem_write_byte # write the default result33803381tst.l %d1 # did dstore fail?3382bne.l facc_out_b # yes33833384bra.w fsnan_exit3385fsnan_out_b_dn:3386andi.w &0x0007,%d13387bsr.l store_dreg_b # store result to regfile3388bra.w fsnan_exit33893390fsnan_out_w:3391mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN3392bset &14,%d0 # set SNAN bit3393cmpi.b %d1,&0x7 # is <ea> mode a data reg?3394ble.b fsnan_out_w_dn # yes3395mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3396bsr.l _dmem_write_word # write the default result33973398tst.l %d1 # did dstore fail?3399bne.l facc_out_w # yes34003401bra.w fsnan_exit3402fsnan_out_w_dn:3403andi.w &0x0007,%d13404bsr.l store_dreg_w # store result to regfile3405bra.w fsnan_exit34063407fsnan_out_l:3408mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN3409bset &30,%d0 # set SNAN bit3410cmpi.b %d1,&0x7 # is <ea> mode a data reg?3411ble.b fsnan_out_l_dn # yes3412mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3413bsr.l _dmem_write_long # write the default result34143415tst.l %d1 # did dstore fail?3416bne.l facc_out_l # yes34173418bra.w fsnan_exit3419fsnan_out_l_dn:3420andi.w &0x0007,%d13421bsr.l store_dreg_l # store result to regfile3422bra.w fsnan_exit34233424fsnan_out_s:3425cmpi.b %d1,&0x7 # is <ea> mode a data reg?3426ble.b fsnan_out_d_dn # yes3427mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign3428andi.l &0x80000000,%d0 # keep sign3429ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit3430mov.l FP_SRC_HI(%a6),%d1 # load mantissa3431lsr.l &0x8,%d1 # shift mantissa for sgl3432or.l %d1,%d0 # create sgl SNAN3433mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3434bsr.l _dmem_write_long # write the default result34353436tst.l %d1 # did dstore fail?3437bne.l facc_out_l # yes34383439bra.w fsnan_exit3440fsnan_out_d_dn:3441mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign3442andi.l &0x80000000,%d0 # keep sign3443ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit3444mov.l %d1,-(%sp)3445mov.l FP_SRC_HI(%a6),%d1 # load mantissa3446lsr.l &0x8,%d1 # shift mantissa for sgl3447or.l %d1,%d0 # create sgl SNAN3448mov.l (%sp)+,%d13449andi.w &0x0007,%d13450bsr.l store_dreg_l # store result to regfile3451bra.w fsnan_exit34523453fsnan_out_d:3454mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign3455andi.l &0x80000000,%d0 # keep sign3456ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit3457mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa3458mov.l %d0,FP_SCR0_EX(%a6) # store to temp space3459mov.l &11,%d0 # load shift amt3460lsr.l %d0,%d13461or.l %d1,FP_SCR0_EX(%a6) # create dbl hi3462mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa3463andi.l &0x000007ff,%d13464ror.l %d0,%d13465mov.l %d1,FP_SCR0_HI(%a6) # store to temp space3466mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa3467lsr.l %d0,%d13468or.l %d1,FP_SCR0_HI(%a6) # create dbl lo3469lea FP_SCR0(%a6),%a0 # pass: ptr to operand3470mov.l EXC_EA(%a6),%a1 # pass: dst addr3471movq.l &0x8,%d0 # pass: size of 8 bytes3472bsr.l _dmem_write # write the default result34733474tst.l %d1 # did dstore fail?3475bne.l facc_out_d # yes34763477bra.w fsnan_exit34783479# for extended precision, if the addressing mode is pre-decrement or3480# post-increment, then the address register did not get updated.3481# in addition, for pre-decrement, the stacked <ea> is incorrect.3482fsnan_out_x:3483clr.b SPCOND_FLG(%a6) # clear special case flag34843485mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)3486clr.w 2+FP_SCR0(%a6)3487mov.l FP_SRC_HI(%a6),%d03488bset &30,%d03489mov.l %d0,FP_SCR0_HI(%a6)3490mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)34913492btst &0x5,EXC_SR(%a6) # supervisor mode exception?3493bne.b fsnan_out_x_s # yes34943495mov.l %usp,%a0 # fetch user stack pointer3496mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()3497mov.l (%a6),EXC_A6(%a6)34983499bsr.l _calc_ea_fout # find the correct ea,update An3500mov.l %a0,%a13501mov.l %a0,EXC_EA(%a6) # stack correct <ea>35023503mov.l EXC_A7(%a6),%a03504mov.l %a0,%usp # restore user stack pointer3505mov.l EXC_A6(%a6),(%a6)35063507fsnan_out_x_save:3508lea FP_SCR0(%a6),%a0 # pass: ptr to operand3509movq.l &0xc,%d0 # pass: size of extended3510bsr.l _dmem_write # write the default result35113512tst.l %d1 # did dstore fail?3513bne.l facc_out_x # yes35143515bra.w fsnan_exit35163517fsnan_out_x_s:3518mov.l (%a6),EXC_A6(%a6)35193520bsr.l _calc_ea_fout # find the correct ea,update An3521mov.l %a0,%a13522mov.l %a0,EXC_EA(%a6) # stack correct <ea>35233524mov.l EXC_A6(%a6),(%a6)35253526cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?3527bne.b fsnan_out_x_save # no35283529# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.3530fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13531fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3532movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a135333534frestore FP_SRC(%a6)35353536mov.l EXC_A6(%a6),%a6 # restore frame pointer35373538mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)3539mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)3540mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)35413542mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)3543mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)3544mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)35453546add.l &LOCAL_SIZE-0x8,%sp35473548bra.l _real_snan35493550#########################################################################3551# XDEF **************************************************************** #3552# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #3553# #3554# This handler should be the first code executed upon taking the #3555# FP Inexact exception in an operating system. #3556# #3557# XREF **************************************************************** #3558# _imem_read_long() - read instruction longword #3559# fix_skewed_ops() - adjust src operand in fsave frame #3560# set_tag_x() - determine optype of src/dst operands #3561# store_fpreg() - store opclass 0 or 2 result to FP regfile #3562# unnorm_fix() - change UNNORM operands to NORM or ZERO #3563# load_fpn2() - load dst operand from FP regfile #3564# smovcr() - emulate an "fmovcr" instruction #3565# fout() - emulate an opclass 3 instruction #3566# tbl_unsupp - add of table of emulation routines for opclass 0,2 #3567# _real_inex() - "callout" to operating system inexact handler #3568# #3569# INPUT *************************************************************** #3570# - The system stack contains the FP Inexact exception frame #3571# - The fsave frame contains the source operand #3572# #3573# OUTPUT ************************************************************** #3574# - The system stack is unchanged #3575# - The fsave frame contains the adjusted src op for opclass 0,2 #3576# #3577# ALGORITHM *********************************************************** #3578# In a system where the FP Inexact exception is enabled, the goal #3579# is to get to the handler specified at _real_inex(). But, on the 060, #3580# for opclass zero and two instruction taking this exception, the #3581# hardware doesn't store the correct result to the destination FP #3582# register as did the '040 and '881/2. This handler must emulate the #3583# instruction in order to get this value and then store it to the #3584# correct register before calling _real_inex(). #3585# For opclass 3 instructions, the 060 doesn't store the default #3586# inexact result out to memory or data register file as it should. #3587# This code must emulate the move out by calling fout() before finally #3588# exiting through _real_inex(). #3589# #3590#########################################################################35913592global _fpsp_inex3593_fpsp_inex:35943595link.w %a6,&-LOCAL_SIZE # init stack frame35963597fsave FP_SRC(%a6) # grab the "busy" frame35983599movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a13600fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs3601fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack36023603# the FPIAR holds the "current PC" of the faulting instruction3604mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)36053606mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr3607addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr3608bsr.l _imem_read_long # fetch the instruction words3609mov.l %d0,EXC_OPWORD(%a6)36103611##############################################################################36123613btst &13,%d0 # is instr an fmove out?3614bne.w finex_out # fmove out361536163617# the hardware, for "fabs" and "fneg" w/ a long source format, puts the3618# longword integer directly into the upper longword of the mantissa along3619# w/ an exponent value of 0x401e. we convert this to extended precision here.3620bfextu %d0{&19:&3},%d0 # fetch instr size3621bne.b finex_cont # instr size is not long3622cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?3623bne.b finex_cont # no3624fmov.l &0x0,%fpcr3625fmov.l FP_SRC_HI(%a6),%fp0 # load integer src3626fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision3627mov.w &0xe001,0x2+FP_SRC(%a6)36283629finex_cont:3630lea FP_SRC(%a6),%a0 # pass: ptr to src op3631bsr.l fix_skewed_ops # fix src op36323633# Here, we zero the ccode and exception byte field since we're going to3634# emulate the whole instruction. Notice, though, that we don't kill the3635# INEX1 bit. This is because a packed op has long since been converted3636# to extended before arriving here. Therefore, we need to retain the3637# INEX1 bit from when the operand was first converted.3638andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field36393640fmov.l &0x0,%fpcr # zero current control regs3641fmov.l &0x0,%fpsr36423643bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg3644cmpi.b %d1,&0x17 # is op an fmovecr?3645beq.w finex_fmovcr # yes36463647lea FP_SRC(%a6),%a0 # pass: ptr to src op3648bsr.l set_tag_x # tag the operand type3649mov.b %d0,STAG(%a6) # maybe NORM,DENORM36503651# bits four and five of the fp extension word separate the monadic and dyadic3652# operations that can pass through fpsp_inex(). remember that fcmp and ftst3653# will never take this exception, but fsincos will.3654btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?3655beq.b finex_extract # monadic36563657btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?3658bne.b finex_extract # yes36593660bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg3661bsr.l load_fpn2 # load dst into FP_DST36623663lea FP_DST(%a6),%a0 # pass: ptr to dst op3664bsr.l set_tag_x # tag the operand type3665cmpi.b %d0,&UNNORM # is operand an UNNORM?3666bne.b finex_op2_done # no3667bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO3668finex_op2_done:3669mov.b %d0,DTAG(%a6) # save dst optype tag36703671finex_extract:3672clr.l %d03673mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode36743675mov.b 1+EXC_CMDREG(%a6),%d13676andi.w &0x007f,%d1 # extract extension36773678lea FP_SRC(%a6),%a03679lea FP_DST(%a6),%a136803681mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr3682jsr (tbl_unsupp.l,%pc,%d1.l*1)36833684# the operation has been emulated. the result is in fp0.3685finex_save:3686bfextu EXC_CMDREG(%a6){&6:&3},%d03687bsr.l store_fpreg36883689finex_exit:3690fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13691fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3692movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a136933694frestore FP_SRC(%a6)36953696unlk %a63697bra.l _real_inex36983699finex_fmovcr:3700clr.l %d03701mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode3702mov.b 1+EXC_CMDREG(%a6),%d13703andi.l &0x0000007f,%d1 # pass rom offset3704bsr.l smovcr3705bra.b finex_save37063707########################################################################37083709#3710# the hardware does not save the default result to memory on enabled3711# inexact exceptions. we do this here before passing control to3712# the user inexact handler.3713#3714# byte, word, and long destination format operations can pass3715# through here. so can double and single precision.3716# although packed opclass three operations can take inexact3717# exceptions, they won't pass through here since they are caught3718# first by the unsupported data format exception handler. that handler3719# sends them directly to _real_inex() if necessary.3720#3721finex_out:37223723mov.b &NORM,STAG(%a6) # src is a NORM37243725clr.l %d03726mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode37273728andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field37293730lea FP_SRC(%a6),%a0 # pass ptr to src operand37313732bsr.l fout # store the default result37333734bra.b finex_exit37353736#########################################################################3737# XDEF **************************************************************** #3738# _fpsp_dz(): 060FPSP entry point for FP DZ exception. #3739# #3740# This handler should be the first code executed upon taking #3741# the FP DZ exception in an operating system. #3742# #3743# XREF **************************************************************** #3744# _imem_read_long() - read instruction longword from memory #3745# fix_skewed_ops() - adjust fsave operand #3746# _real_dz() - "callout" exit point from FP DZ handler #3747# #3748# INPUT *************************************************************** #3749# - The system stack contains the FP DZ exception stack. #3750# - The fsave frame contains the source operand. #3751# #3752# OUTPUT ************************************************************** #3753# - The system stack contains the FP DZ exception stack. #3754# - The fsave frame contains the adjusted source operand. #3755# #3756# ALGORITHM *********************************************************** #3757# In a system where the DZ exception is enabled, the goal is to #3758# get to the handler specified at _real_dz(). But, on the 060, when the #3759# exception is taken, the input operand in the fsave state frame may #3760# be incorrect for some cases and need to be adjusted. So, this package #3761# adjusts the operand using fix_skewed_ops() and then branches to #3762# _real_dz(). #3763# #3764#########################################################################37653766global _fpsp_dz3767_fpsp_dz:37683769link.w %a6,&-LOCAL_SIZE # init stack frame37703771fsave FP_SRC(%a6) # grab the "busy" frame37723773movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a13774fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs3775fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack37763777# the FPIAR holds the "current PC" of the faulting instruction3778mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)37793780mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr3781addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr3782bsr.l _imem_read_long # fetch the instruction words3783mov.l %d0,EXC_OPWORD(%a6)37843785##############################################################################378637873788# here, we simply see if the operand in the fsave frame needs to be "unskewed".3789# this would be the case for opclass two operations with a source zero3790# in the sgl or dbl format.3791lea FP_SRC(%a6),%a0 # pass: ptr to src op3792bsr.l fix_skewed_ops # fix src op37933794fdz_exit:3795fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13796fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3797movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a137983799frestore FP_SRC(%a6)38003801unlk %a63802bra.l _real_dz38033804#########################################################################3805# XDEF **************************************************************** #3806# _fpsp_fline(): 060FPSP entry point for "Line F emulator" #3807# exception when the "reduced" version of the #3808# FPSP is implemented that does not emulate #3809# FP unimplemented instructions. #3810# #3811# This handler should be the first code executed upon taking a #3812# "Line F Emulator" exception in an operating system integrating #3813# the reduced version of 060FPSP. #3814# #3815# XREF **************************************************************** #3816# _real_fpu_disabled() - Handle "FPU disabled" exceptions #3817# _real_fline() - Handle all other cases (treated equally) #3818# #3819# INPUT *************************************************************** #3820# - The system stack contains a "Line F Emulator" exception #3821# stack frame. #3822# #3823# OUTPUT ************************************************************** #3824# - The system stack is unchanged. #3825# #3826# ALGORITHM *********************************************************** #3827# When a "Line F Emulator" exception occurs in a system where #3828# "FPU Unimplemented" instructions will not be emulated, the exception #3829# can occur because then FPU is disabled or the instruction is to be #3830# classifed as "Line F". This module determines which case exists and #3831# calls the appropriate "callout". #3832# #3833#########################################################################38343835global _fpsp_fline3836_fpsp_fline:38373838# check to see if the FPU is disabled. if so, jump to the OS entry3839# point for that condition.3840cmpi.w 0x6(%sp),&0x402c3841beq.l _real_fpu_disabled38423843bra.l _real_fline38443845#########################################################################3846# XDEF **************************************************************** #3847# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #3848# #3849# XREF **************************************************************** #3850# inc_areg() - increment an address register #3851# dec_areg() - decrement an address register #3852# #3853# INPUT *************************************************************** #3854# d0 = number of bytes to adjust <ea> by #3855# #3856# OUTPUT ************************************************************** #3857# None #3858# #3859# ALGORITHM *********************************************************** #3860# "Dummy" CALCulate Effective Address: #3861# The stacked <ea> for FP unimplemented instructions and opclass #3862# two packed instructions is correct with the exception of... #3863# #3864# 1) -(An) : The register is not updated regardless of size. #3865# Also, for extended precision and packed, the #3866# stacked <ea> value is 8 bytes too big #3867# 2) (An)+ : The register is not updated. #3868# 3) #<data> : The upper longword of the immediate operand is #3869# stacked b,w,l and s sizes are completely stacked. #3870# d,x, and p are not. #3871# #3872#########################################################################38733874global _dcalc_ea3875_dcalc_ea:3876mov.l %d0, %a0 # move # bytes to %a038773878mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word3879mov.l %d0, %d1 # make a copy38803881andi.w &0x38, %d0 # extract mode field3882andi.l &0x7, %d1 # extract reg field38833884cmpi.b %d0,&0x18 # is mode (An)+ ?3885beq.b dcea_pi # yes38863887cmpi.b %d0,&0x20 # is mode -(An) ?3888beq.b dcea_pd # yes38893890or.w %d1,%d0 # concat mode,reg3891cmpi.b %d0,&0x3c # is mode #<data>?38923893beq.b dcea_imm # yes38943895mov.l EXC_EA(%a6),%a0 # return <ea>3896rts38973898# need to set immediate data flag here since we'll need to do3899# an imem_read to fetch this later.3900dcea_imm:3901mov.b &immed_flg,SPCOND_FLG(%a6)3902lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>3903rts39043905# here, the <ea> is stacked correctly. however, we must update the3906# address register...3907dcea_pi:3908mov.l %a0,%d0 # pass amt to inc by3909bsr.l inc_areg # inc addr register39103911mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct3912rts39133914# the <ea> is stacked correctly for all but extended and packed which3915# the <ea>s are 8 bytes too large.3916# it would make no sense to have a pre-decrement to a7 in supervisor3917# mode so we don't even worry about this tricky case here : )3918dcea_pd:3919mov.l %a0,%d0 # pass amt to dec by3920bsr.l dec_areg # dec addr register39213922mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct39233924cmpi.b %d0,&0xc # is opsize ext or packed?3925beq.b dcea_pd2 # yes3926rts3927dcea_pd2:3928sub.l &0x8,%a0 # correct <ea>3929mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack3930rts39313932#########################################################################3933# XDEF **************************************************************** #3934# _calc_ea_fout(): calculate correct stacked <ea> for extended #3935# and packed data opclass 3 operations. #3936# #3937# XREF **************************************************************** #3938# None #3939# #3940# INPUT *************************************************************** #3941# None #3942# #3943# OUTPUT ************************************************************** #3944# a0 = return correct effective address #3945# #3946# ALGORITHM *********************************************************** #3947# For opclass 3 extended and packed data operations, the <ea> #3948# stacked for the exception is incorrect for -(an) and (an)+ addressing #3949# modes. Also, while we're at it, the index register itself must get #3950# updated. #3951# So, for -(an), we must subtract 8 off of the stacked <ea> value #3952# and return that value as the correct <ea> and store that value in An. #3953# For (an)+, the stacked <ea> is correct but we must adjust An by +12. #3954# #3955#########################################################################39563957# This calc_ea is currently used to retrieve the correct <ea>3958# for fmove outs of type extended and packed.3959global _calc_ea_fout3960_calc_ea_fout:3961mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word3962mov.l %d0,%d1 # make a copy39633964andi.w &0x38,%d0 # extract mode field3965andi.l &0x7,%d1 # extract reg field39663967cmpi.b %d0,&0x18 # is mode (An)+ ?3968beq.b ceaf_pi # yes39693970cmpi.b %d0,&0x20 # is mode -(An) ?3971beq.w ceaf_pd # yes39723973mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct3974rts39753976# (An)+ : extended and packed fmove out3977# : stacked <ea> is correct3978# : "An" not updated3979ceaf_pi:3980mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d13981mov.l EXC_EA(%a6),%a03982jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)39833984swbeg &0x83985tbl_ceaf_pi:3986short ceaf_pi0 - tbl_ceaf_pi3987short ceaf_pi1 - tbl_ceaf_pi3988short ceaf_pi2 - tbl_ceaf_pi3989short ceaf_pi3 - tbl_ceaf_pi3990short ceaf_pi4 - tbl_ceaf_pi3991short ceaf_pi5 - tbl_ceaf_pi3992short ceaf_pi6 - tbl_ceaf_pi3993short ceaf_pi7 - tbl_ceaf_pi39943995ceaf_pi0:3996addi.l &0xc,EXC_DREGS+0x8(%a6)3997rts3998ceaf_pi1:3999addi.l &0xc,EXC_DREGS+0xc(%a6)4000rts4001ceaf_pi2:4002add.l &0xc,%a24003rts4004ceaf_pi3:4005add.l &0xc,%a34006rts4007ceaf_pi4:4008add.l &0xc,%a44009rts4010ceaf_pi5:4011add.l &0xc,%a54012rts4013ceaf_pi6:4014addi.l &0xc,EXC_A6(%a6)4015rts4016ceaf_pi7:4017mov.b &mia7_flg,SPCOND_FLG(%a6)4018addi.l &0xc,EXC_A7(%a6)4019rts40204021# -(An) : extended and packed fmove out4022# : stacked <ea> = actual <ea> + 84023# : "An" not updated4024ceaf_pd:4025mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d14026mov.l EXC_EA(%a6),%a04027sub.l &0x8,%a04028sub.l &0x8,EXC_EA(%a6)4029jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)40304031swbeg &0x84032tbl_ceaf_pd:4033short ceaf_pd0 - tbl_ceaf_pd4034short ceaf_pd1 - tbl_ceaf_pd4035short ceaf_pd2 - tbl_ceaf_pd4036short ceaf_pd3 - tbl_ceaf_pd4037short ceaf_pd4 - tbl_ceaf_pd4038short ceaf_pd5 - tbl_ceaf_pd4039short ceaf_pd6 - tbl_ceaf_pd4040short ceaf_pd7 - tbl_ceaf_pd40414042ceaf_pd0:4043mov.l %a0,EXC_DREGS+0x8(%a6)4044rts4045ceaf_pd1:4046mov.l %a0,EXC_DREGS+0xc(%a6)4047rts4048ceaf_pd2:4049mov.l %a0,%a24050rts4051ceaf_pd3:4052mov.l %a0,%a34053rts4054ceaf_pd4:4055mov.l %a0,%a44056rts4057ceaf_pd5:4058mov.l %a0,%a54059rts4060ceaf_pd6:4061mov.l %a0,EXC_A6(%a6)4062rts4063ceaf_pd7:4064mov.l %a0,EXC_A7(%a6)4065mov.b &mda7_flg,SPCOND_FLG(%a6)4066rts40674068#4069# This table holds the offsets of the emulation routines for each individual4070# math operation relative to the address of this table. Included are4071# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because4072# this table is for the version if the 060FPSP without transcendentals.4073# The location within the table is determined by the extension bits of the4074# operation longword.4075#40764077swbeg &1094078tbl_unsupp:4079long fin - tbl_unsupp # 00: fmove4080long fint - tbl_unsupp # 01: fint4081long tbl_unsupp - tbl_unsupp # 02: fsinh4082long fintrz - tbl_unsupp # 03: fintrz4083long fsqrt - tbl_unsupp # 04: fsqrt4084long tbl_unsupp - tbl_unsupp4085long tbl_unsupp - tbl_unsupp # 06: flognp14086long tbl_unsupp - tbl_unsupp4087long tbl_unsupp - tbl_unsupp # 08: fetoxm14088long tbl_unsupp - tbl_unsupp # 09: ftanh4089long tbl_unsupp - tbl_unsupp # 0a: fatan4090long tbl_unsupp - tbl_unsupp4091long tbl_unsupp - tbl_unsupp # 0c: fasin4092long tbl_unsupp - tbl_unsupp # 0d: fatanh4093long tbl_unsupp - tbl_unsupp # 0e: fsin4094long tbl_unsupp - tbl_unsupp # 0f: ftan4095long tbl_unsupp - tbl_unsupp # 10: fetox4096long tbl_unsupp - tbl_unsupp # 11: ftwotox4097long tbl_unsupp - tbl_unsupp # 12: ftentox4098long tbl_unsupp - tbl_unsupp4099long tbl_unsupp - tbl_unsupp # 14: flogn4100long tbl_unsupp - tbl_unsupp # 15: flog104101long tbl_unsupp - tbl_unsupp # 16: flog24102long tbl_unsupp - tbl_unsupp4103long fabs - tbl_unsupp # 18: fabs4104long tbl_unsupp - tbl_unsupp # 19: fcosh4105long fneg - tbl_unsupp # 1a: fneg4106long tbl_unsupp - tbl_unsupp4107long tbl_unsupp - tbl_unsupp # 1c: facos4108long tbl_unsupp - tbl_unsupp # 1d: fcos4109long tbl_unsupp - tbl_unsupp # 1e: fgetexp4110long tbl_unsupp - tbl_unsupp # 1f: fgetman4111long fdiv - tbl_unsupp # 20: fdiv4112long tbl_unsupp - tbl_unsupp # 21: fmod4113long fadd - tbl_unsupp # 22: fadd4114long fmul - tbl_unsupp # 23: fmul4115long fsgldiv - tbl_unsupp # 24: fsgldiv4116long tbl_unsupp - tbl_unsupp # 25: frem4117long tbl_unsupp - tbl_unsupp # 26: fscale4118long fsglmul - tbl_unsupp # 27: fsglmul4119long fsub - tbl_unsupp # 28: fsub4120long tbl_unsupp - tbl_unsupp4121long tbl_unsupp - tbl_unsupp4122long tbl_unsupp - tbl_unsupp4123long tbl_unsupp - tbl_unsupp4124long tbl_unsupp - tbl_unsupp4125long tbl_unsupp - tbl_unsupp4126long tbl_unsupp - tbl_unsupp4127long tbl_unsupp - tbl_unsupp # 30: fsincos4128long tbl_unsupp - tbl_unsupp # 31: fsincos4129long tbl_unsupp - tbl_unsupp # 32: fsincos4130long tbl_unsupp - tbl_unsupp # 33: fsincos4131long tbl_unsupp - tbl_unsupp # 34: fsincos4132long tbl_unsupp - tbl_unsupp # 35: fsincos4133long tbl_unsupp - tbl_unsupp # 36: fsincos4134long tbl_unsupp - tbl_unsupp # 37: fsincos4135long fcmp - tbl_unsupp # 38: fcmp4136long tbl_unsupp - tbl_unsupp4137long ftst - tbl_unsupp # 3a: ftst4138long tbl_unsupp - tbl_unsupp4139long tbl_unsupp - tbl_unsupp4140long tbl_unsupp - tbl_unsupp4141long tbl_unsupp - tbl_unsupp4142long tbl_unsupp - tbl_unsupp4143long fsin - tbl_unsupp # 40: fsmove4144long fssqrt - tbl_unsupp # 41: fssqrt4145long tbl_unsupp - tbl_unsupp4146long tbl_unsupp - tbl_unsupp4147long fdin - tbl_unsupp # 44: fdmove4148long fdsqrt - tbl_unsupp # 45: fdsqrt4149long tbl_unsupp - tbl_unsupp4150long tbl_unsupp - tbl_unsupp4151long tbl_unsupp - tbl_unsupp4152long tbl_unsupp - tbl_unsupp4153long tbl_unsupp - tbl_unsupp4154long tbl_unsupp - tbl_unsupp4155long tbl_unsupp - tbl_unsupp4156long tbl_unsupp - tbl_unsupp4157long tbl_unsupp - tbl_unsupp4158long tbl_unsupp - tbl_unsupp4159long tbl_unsupp - tbl_unsupp4160long tbl_unsupp - tbl_unsupp4161long tbl_unsupp - tbl_unsupp4162long tbl_unsupp - tbl_unsupp4163long tbl_unsupp - tbl_unsupp4164long tbl_unsupp - tbl_unsupp4165long tbl_unsupp - tbl_unsupp4166long tbl_unsupp - tbl_unsupp4167long fsabs - tbl_unsupp # 58: fsabs4168long tbl_unsupp - tbl_unsupp4169long fsneg - tbl_unsupp # 5a: fsneg4170long tbl_unsupp - tbl_unsupp4171long fdabs - tbl_unsupp # 5c: fdabs4172long tbl_unsupp - tbl_unsupp4173long fdneg - tbl_unsupp # 5e: fdneg4174long tbl_unsupp - tbl_unsupp4175long fsdiv - tbl_unsupp # 60: fsdiv4176long tbl_unsupp - tbl_unsupp4177long fsadd - tbl_unsupp # 62: fsadd4178long fsmul - tbl_unsupp # 63: fsmul4179long fddiv - tbl_unsupp # 64: fddiv4180long tbl_unsupp - tbl_unsupp4181long fdadd - tbl_unsupp # 66: fdadd4182long fdmul - tbl_unsupp # 67: fdmul4183long fssub - tbl_unsupp # 68: fssub4184long tbl_unsupp - tbl_unsupp4185long tbl_unsupp - tbl_unsupp4186long tbl_unsupp - tbl_unsupp4187long fdsub - tbl_unsupp # 6c: fdsub41884189#################################################4190# Add this here so non-fp modules can compile.4191# (smovcr is called from fpsp_inex.)4192global smovcr4193smovcr:4194bra.b smovcr41954196#########################################################################4197# XDEF **************************************************************** #4198# fmovm_dynamic(): emulate "fmovm" dynamic instruction #4199# #4200# XREF **************************************************************** #4201# fetch_dreg() - fetch data register #4202# {i,d,}mem_read() - fetch data from memory #4203# _mem_write() - write data to memory #4204# iea_iacc() - instruction memory access error occurred #4205# iea_dacc() - data memory access error occurred #4206# restore() - restore An index regs if access error occurred #4207# #4208# INPUT *************************************************************** #4209# None #4210# #4211# OUTPUT ************************************************************** #4212# If instr is "fmovm Dn,-(A7)" from supervisor mode, #4213# d0 = size of dump #4214# d1 = Dn #4215# Else if instruction access error, #4216# d0 = FSLW #4217# Else if data access error, #4218# d0 = FSLW #4219# a0 = address of fault #4220# Else #4221# none. #4222# #4223# ALGORITHM *********************************************************** #4224# The effective address must be calculated since this is entered #4225# from an "Unimplemented Effective Address" exception handler. So, we #4226# have our own fcalc_ea() routine here. If an access error is flagged #4227# by a _{i,d,}mem_read() call, we must exit through the special #4228# handler. #4229# The data register is determined and its value loaded to get the #4230# string of FP registers affected. This value is used as an index into #4231# a lookup table such that we can determine the number of bytes #4232# involved. #4233# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #4234# to read in all FP values. Again, _mem_read() may fail and require a #4235# special exit. #4236# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #4237# to write all FP values. _mem_write() may also fail. #4238# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #4239# then we return the size of the dump and the string to the caller #4240# so that the move can occur outside of this routine. This special #4241# case is required so that moves to the system stack are handled #4242# correctly. #4243# #4244# DYNAMIC: #4245# fmovm.x dn, <ea> #4246# fmovm.x <ea>, dn #4247# #4248# <WORD 1> <WORD2> #4249# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #4250# #4251# & = (0): predecrement addressing mode #4252# (1): postincrement or control addressing mode #4253# @ = (0): move listed regs from memory to the FPU #4254# (1): move listed regs from the FPU to memory #4255# $$$ : index of data register holding reg select mask #4256# #4257# NOTES: #4258# If the data register holds a zero, then the #4259# instruction is a nop. #4260# #4261#########################################################################42624263global fmovm_dynamic4264fmovm_dynamic:42654266# extract the data register in which the bit string resides...4267mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword4268andi.w &0x70,%d1 # extract reg bits4269lsr.b &0x4,%d1 # shift into lo bits42704271# fetch the bit string into d0...4272bsr.l fetch_dreg # fetch reg string42734274andi.l &0x000000ff,%d0 # keep only lo byte42754276mov.l %d0,-(%sp) # save strg4277mov.b (tbl_fmovm_size.w,%pc,%d0),%d04278mov.l %d0,-(%sp) # save size4279bsr.l fmovm_calc_ea # calculate <ea>4280mov.l (%sp)+,%d0 # restore size4281mov.l (%sp)+,%d1 # restore strg42824283# if the bit string is a zero, then the operation is a no-op4284# but, make sure that we've calculated ea and advanced the opword pointer4285beq.w fmovm_data_done42864287# separate move ins from move outs...4288btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?4289beq.w fmovm_data_in # it's a move out42904291#############4292# MOVE OUT: #4293#############4294fmovm_data_out:4295btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?4296bne.w fmovm_out_ctrl # control42974298############################4299fmovm_out_predec:4300# for predecrement mode, the bit string is the opposite of both control4301# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)4302# here, we convert it to be just like the others...4303mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d143044305btst &0x5,EXC_SR(%a6) # user or supervisor mode?4306beq.b fmovm_out_ctrl # user43074308fmovm_out_predec_s:4309cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?4310bne.b fmovm_out_ctrl43114312# the operation was unfortunately an: fmovm.x dn,-(sp)4313# called from supervisor mode.4314# we're also passing "size" and "strg" back to the calling routine4315rts43164317############################4318fmovm_out_ctrl:4319mov.l %a0,%a1 # move <ea> to a143204321sub.l %d0,%sp # subtract size of dump4322lea (%sp),%a043234324tst.b %d1 # should FP0 be moved?4325bpl.b fmovm_out_ctrl_fp1 # no43264327mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes4328mov.l 0x4+EXC_FP0(%a6),(%a0)+4329mov.l 0x8+EXC_FP0(%a6),(%a0)+43304331fmovm_out_ctrl_fp1:4332lsl.b &0x1,%d1 # should FP1 be moved?4333bpl.b fmovm_out_ctrl_fp2 # no43344335mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes4336mov.l 0x4+EXC_FP1(%a6),(%a0)+4337mov.l 0x8+EXC_FP1(%a6),(%a0)+43384339fmovm_out_ctrl_fp2:4340lsl.b &0x1,%d1 # should FP2 be moved?4341bpl.b fmovm_out_ctrl_fp3 # no43424343fmovm.x &0x20,(%a0) # yes4344add.l &0xc,%a043454346fmovm_out_ctrl_fp3:4347lsl.b &0x1,%d1 # should FP3 be moved?4348bpl.b fmovm_out_ctrl_fp4 # no43494350fmovm.x &0x10,(%a0) # yes4351add.l &0xc,%a043524353fmovm_out_ctrl_fp4:4354lsl.b &0x1,%d1 # should FP4 be moved?4355bpl.b fmovm_out_ctrl_fp5 # no43564357fmovm.x &0x08,(%a0) # yes4358add.l &0xc,%a043594360fmovm_out_ctrl_fp5:4361lsl.b &0x1,%d1 # should FP5 be moved?4362bpl.b fmovm_out_ctrl_fp6 # no43634364fmovm.x &0x04,(%a0) # yes4365add.l &0xc,%a043664367fmovm_out_ctrl_fp6:4368lsl.b &0x1,%d1 # should FP6 be moved?4369bpl.b fmovm_out_ctrl_fp7 # no43704371fmovm.x &0x02,(%a0) # yes4372add.l &0xc,%a043734374fmovm_out_ctrl_fp7:4375lsl.b &0x1,%d1 # should FP7 be moved?4376bpl.b fmovm_out_ctrl_done # no43774378fmovm.x &0x01,(%a0) # yes4379add.l &0xc,%a043804381fmovm_out_ctrl_done:4382mov.l %a1,L_SCR1(%a6)43834384lea (%sp),%a0 # pass: supervisor src4385mov.l %d0,-(%sp) # save size4386bsr.l _dmem_write # copy data to user mem43874388mov.l (%sp)+,%d04389add.l %d0,%sp # clear fpreg data from stack43904391tst.l %d1 # did dstore err?4392bne.w fmovm_out_err # yes43934394rts43954396############4397# MOVE IN: #4398############4399fmovm_data_in:4400mov.l %a0,L_SCR1(%a6)44014402sub.l %d0,%sp # make room for fpregs4403lea (%sp),%a144044405mov.l %d1,-(%sp) # save bit string for later4406mov.l %d0,-(%sp) # save # of bytes44074408bsr.l _dmem_read # copy data from user mem44094410mov.l (%sp)+,%d0 # retrieve # of bytes44114412tst.l %d1 # did dfetch fail?4413bne.w fmovm_in_err # yes44144415mov.l (%sp)+,%d1 # load bit string44164417lea (%sp),%a0 # addr of stack44184419tst.b %d1 # should FP0 be moved?4420bpl.b fmovm_data_in_fp1 # no44214422mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes4423mov.l (%a0)+,0x4+EXC_FP0(%a6)4424mov.l (%a0)+,0x8+EXC_FP0(%a6)44254426fmovm_data_in_fp1:4427lsl.b &0x1,%d1 # should FP1 be moved?4428bpl.b fmovm_data_in_fp2 # no44294430mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes4431mov.l (%a0)+,0x4+EXC_FP1(%a6)4432mov.l (%a0)+,0x8+EXC_FP1(%a6)44334434fmovm_data_in_fp2:4435lsl.b &0x1,%d1 # should FP2 be moved?4436bpl.b fmovm_data_in_fp3 # no44374438fmovm.x (%a0)+,&0x20 # yes44394440fmovm_data_in_fp3:4441lsl.b &0x1,%d1 # should FP3 be moved?4442bpl.b fmovm_data_in_fp4 # no44434444fmovm.x (%a0)+,&0x10 # yes44454446fmovm_data_in_fp4:4447lsl.b &0x1,%d1 # should FP4 be moved?4448bpl.b fmovm_data_in_fp5 # no44494450fmovm.x (%a0)+,&0x08 # yes44514452fmovm_data_in_fp5:4453lsl.b &0x1,%d1 # should FP5 be moved?4454bpl.b fmovm_data_in_fp6 # no44554456fmovm.x (%a0)+,&0x04 # yes44574458fmovm_data_in_fp6:4459lsl.b &0x1,%d1 # should FP6 be moved?4460bpl.b fmovm_data_in_fp7 # no44614462fmovm.x (%a0)+,&0x02 # yes44634464fmovm_data_in_fp7:4465lsl.b &0x1,%d1 # should FP7 be moved?4466bpl.b fmovm_data_in_done # no44674468fmovm.x (%a0)+,&0x01 # yes44694470fmovm_data_in_done:4471add.l %d0,%sp # remove fpregs from stack4472rts44734474#####################################44754476fmovm_data_done:4477rts44784479##############################################################################44804481#4482# table indexed by the operation's bit string that gives the number4483# of bytes that will be moved.4484#4485# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)4486#4487tbl_fmovm_size:4488byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x244489byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x304490byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x304491byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4492byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x304493byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4494byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4495byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484496byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x304497byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4498byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4499byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484500byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4501byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484502byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484503byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x544504byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x304505byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4506byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4507byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484508byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4509byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484510byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484511byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x544512byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c4513byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484514byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484515byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x544516byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x484517byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x544518byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x544519byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x6045204521#4522# table to convert a pre-decrement bit string into a post-increment4523# or control bit string.4524# ex: 0x00 ==> 0x004525# 0x01 ==> 0x804526# 0x02 ==> 0x404527# .4528# .4529# 0xfd ==> 0xbf4530# 0xfe ==> 0x7f4531# 0xff ==> 0xff4532#4533tbl_fmovm_convert:4534byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe04535byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf04536byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe84537byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf84538byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe44539byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf44540byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec4541byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc4542byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe24543byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf24544byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea4545byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa4546byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe64547byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf64548byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee4549byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe4550byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe14551byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf14552byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe94553byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf94554byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe54555byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf54556byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed4557byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd4558byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe34559byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf34560byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb4561byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb4562byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe74563byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf74564byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef4565byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff45664567global fmovm_calc_ea4568###############################################4569# _fmovm_calc_ea: calculate effective address #4570###############################################4571fmovm_calc_ea:4572mov.l %d0,%a0 # move # bytes to a045734574# currently, MODE and REG are taken from the EXC_OPWORD. this could be4575# easily changed if they were inputs passed in registers.4576mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word4577mov.w %d0,%d1 # make a copy45784579andi.w &0x3f,%d0 # extract mode field4580andi.l &0x7,%d1 # extract reg field45814582# jump to the corresponding function for each {MODE,REG} pair.4583mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance4584jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode45854586swbeg &644587tbl_fea_mode:4588short tbl_fea_mode - tbl_fea_mode4589short tbl_fea_mode - tbl_fea_mode4590short tbl_fea_mode - tbl_fea_mode4591short tbl_fea_mode - tbl_fea_mode4592short tbl_fea_mode - tbl_fea_mode4593short tbl_fea_mode - tbl_fea_mode4594short tbl_fea_mode - tbl_fea_mode4595short tbl_fea_mode - tbl_fea_mode45964597short tbl_fea_mode - tbl_fea_mode4598short tbl_fea_mode - tbl_fea_mode4599short tbl_fea_mode - tbl_fea_mode4600short tbl_fea_mode - tbl_fea_mode4601short tbl_fea_mode - tbl_fea_mode4602short tbl_fea_mode - tbl_fea_mode4603short tbl_fea_mode - tbl_fea_mode4604short tbl_fea_mode - tbl_fea_mode46054606short faddr_ind_a0 - tbl_fea_mode4607short faddr_ind_a1 - tbl_fea_mode4608short faddr_ind_a2 - tbl_fea_mode4609short faddr_ind_a3 - tbl_fea_mode4610short faddr_ind_a4 - tbl_fea_mode4611short faddr_ind_a5 - tbl_fea_mode4612short faddr_ind_a6 - tbl_fea_mode4613short faddr_ind_a7 - tbl_fea_mode46144615short faddr_ind_p_a0 - tbl_fea_mode4616short faddr_ind_p_a1 - tbl_fea_mode4617short faddr_ind_p_a2 - tbl_fea_mode4618short faddr_ind_p_a3 - tbl_fea_mode4619short faddr_ind_p_a4 - tbl_fea_mode4620short faddr_ind_p_a5 - tbl_fea_mode4621short faddr_ind_p_a6 - tbl_fea_mode4622short faddr_ind_p_a7 - tbl_fea_mode46234624short faddr_ind_m_a0 - tbl_fea_mode4625short faddr_ind_m_a1 - tbl_fea_mode4626short faddr_ind_m_a2 - tbl_fea_mode4627short faddr_ind_m_a3 - tbl_fea_mode4628short faddr_ind_m_a4 - tbl_fea_mode4629short faddr_ind_m_a5 - tbl_fea_mode4630short faddr_ind_m_a6 - tbl_fea_mode4631short faddr_ind_m_a7 - tbl_fea_mode46324633short faddr_ind_disp_a0 - tbl_fea_mode4634short faddr_ind_disp_a1 - tbl_fea_mode4635short faddr_ind_disp_a2 - tbl_fea_mode4636short faddr_ind_disp_a3 - tbl_fea_mode4637short faddr_ind_disp_a4 - tbl_fea_mode4638short faddr_ind_disp_a5 - tbl_fea_mode4639short faddr_ind_disp_a6 - tbl_fea_mode4640short faddr_ind_disp_a7 - tbl_fea_mode46414642short faddr_ind_ext - tbl_fea_mode4643short faddr_ind_ext - tbl_fea_mode4644short faddr_ind_ext - tbl_fea_mode4645short faddr_ind_ext - tbl_fea_mode4646short faddr_ind_ext - tbl_fea_mode4647short faddr_ind_ext - tbl_fea_mode4648short faddr_ind_ext - tbl_fea_mode4649short faddr_ind_ext - tbl_fea_mode46504651short fabs_short - tbl_fea_mode4652short fabs_long - tbl_fea_mode4653short fpc_ind - tbl_fea_mode4654short fpc_ind_ext - tbl_fea_mode4655short tbl_fea_mode - tbl_fea_mode4656short tbl_fea_mode - tbl_fea_mode4657short tbl_fea_mode - tbl_fea_mode4658short tbl_fea_mode - tbl_fea_mode46594660###################################4661# Address register indirect: (An) #4662###################################4663faddr_ind_a0:4664mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a04665rts46664667faddr_ind_a1:4668mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a14669rts46704671faddr_ind_a2:4672mov.l %a2,%a0 # Get current a24673rts46744675faddr_ind_a3:4676mov.l %a3,%a0 # Get current a34677rts46784679faddr_ind_a4:4680mov.l %a4,%a0 # Get current a44681rts46824683faddr_ind_a5:4684mov.l %a5,%a0 # Get current a54685rts46864687faddr_ind_a6:4688mov.l (%a6),%a0 # Get current a64689rts46904691faddr_ind_a7:4692mov.l EXC_A7(%a6),%a0 # Get current a74693rts46944695#####################################################4696# Address register indirect w/ postincrement: (An)+ #4697#####################################################4698faddr_ind_p_a0:4699mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a04700mov.l %d0,%d14701add.l %a0,%d1 # Increment4702mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value4703mov.l %d0,%a04704rts47054706faddr_ind_p_a1:4707mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a14708mov.l %d0,%d14709add.l %a0,%d1 # Increment4710mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value4711mov.l %d0,%a04712rts47134714faddr_ind_p_a2:4715mov.l %a2,%d0 # Get current a24716mov.l %d0,%d14717add.l %a0,%d1 # Increment4718mov.l %d1,%a2 # Save incr value4719mov.l %d0,%a04720rts47214722faddr_ind_p_a3:4723mov.l %a3,%d0 # Get current a34724mov.l %d0,%d14725add.l %a0,%d1 # Increment4726mov.l %d1,%a3 # Save incr value4727mov.l %d0,%a04728rts47294730faddr_ind_p_a4:4731mov.l %a4,%d0 # Get current a44732mov.l %d0,%d14733add.l %a0,%d1 # Increment4734mov.l %d1,%a4 # Save incr value4735mov.l %d0,%a04736rts47374738faddr_ind_p_a5:4739mov.l %a5,%d0 # Get current a54740mov.l %d0,%d14741add.l %a0,%d1 # Increment4742mov.l %d1,%a5 # Save incr value4743mov.l %d0,%a04744rts47454746faddr_ind_p_a6:4747mov.l (%a6),%d0 # Get current a64748mov.l %d0,%d14749add.l %a0,%d1 # Increment4750mov.l %d1,(%a6) # Save incr value4751mov.l %d0,%a04752rts47534754faddr_ind_p_a7:4755mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag47564757mov.l EXC_A7(%a6),%d0 # Get current a74758mov.l %d0,%d14759add.l %a0,%d1 # Increment4760mov.l %d1,EXC_A7(%a6) # Save incr value4761mov.l %d0,%a04762rts47634764####################################################4765# Address register indirect w/ predecrement: -(An) #4766####################################################4767faddr_ind_m_a0:4768mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a04769sub.l %a0,%d0 # Decrement4770mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value4771mov.l %d0,%a04772rts47734774faddr_ind_m_a1:4775mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a14776sub.l %a0,%d0 # Decrement4777mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value4778mov.l %d0,%a04779rts47804781faddr_ind_m_a2:4782mov.l %a2,%d0 # Get current a24783sub.l %a0,%d0 # Decrement4784mov.l %d0,%a2 # Save decr value4785mov.l %d0,%a04786rts47874788faddr_ind_m_a3:4789mov.l %a3,%d0 # Get current a34790sub.l %a0,%d0 # Decrement4791mov.l %d0,%a3 # Save decr value4792mov.l %d0,%a04793rts47944795faddr_ind_m_a4:4796mov.l %a4,%d0 # Get current a44797sub.l %a0,%d0 # Decrement4798mov.l %d0,%a4 # Save decr value4799mov.l %d0,%a04800rts48014802faddr_ind_m_a5:4803mov.l %a5,%d0 # Get current a54804sub.l %a0,%d0 # Decrement4805mov.l %d0,%a5 # Save decr value4806mov.l %d0,%a04807rts48084809faddr_ind_m_a6:4810mov.l (%a6),%d0 # Get current a64811sub.l %a0,%d0 # Decrement4812mov.l %d0,(%a6) # Save decr value4813mov.l %d0,%a04814rts48154816faddr_ind_m_a7:4817mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag48184819mov.l EXC_A7(%a6),%d0 # Get current a74820sub.l %a0,%d0 # Decrement4821mov.l %d0,EXC_A7(%a6) # Save decr value4822mov.l %d0,%a04823rts48244825########################################################4826# Address register indirect w/ displacement: (d16, An) #4827########################################################4828faddr_ind_disp_a0:4829mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4830addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4831bsr.l _imem_read_word48324833tst.l %d1 # did ifetch fail?4834bne.l iea_iacc # yes48354836mov.w %d0,%a0 # sign extend displacement48374838add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d164839rts48404841faddr_ind_disp_a1:4842mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4843addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4844bsr.l _imem_read_word48454846tst.l %d1 # did ifetch fail?4847bne.l iea_iacc # yes48484849mov.w %d0,%a0 # sign extend displacement48504851add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d164852rts48534854faddr_ind_disp_a2:4855mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4856addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4857bsr.l _imem_read_word48584859tst.l %d1 # did ifetch fail?4860bne.l iea_iacc # yes48614862mov.w %d0,%a0 # sign extend displacement48634864add.l %a2,%a0 # a2 + d164865rts48664867faddr_ind_disp_a3:4868mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4869addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4870bsr.l _imem_read_word48714872tst.l %d1 # did ifetch fail?4873bne.l iea_iacc # yes48744875mov.w %d0,%a0 # sign extend displacement48764877add.l %a3,%a0 # a3 + d164878rts48794880faddr_ind_disp_a4:4881mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4882addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4883bsr.l _imem_read_word48844885tst.l %d1 # did ifetch fail?4886bne.l iea_iacc # yes48874888mov.w %d0,%a0 # sign extend displacement48894890add.l %a4,%a0 # a4 + d164891rts48924893faddr_ind_disp_a5:4894mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4895addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4896bsr.l _imem_read_word48974898tst.l %d1 # did ifetch fail?4899bne.l iea_iacc # yes49004901mov.w %d0,%a0 # sign extend displacement49024903add.l %a5,%a0 # a5 + d164904rts49054906faddr_ind_disp_a6:4907mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4908addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4909bsr.l _imem_read_word49104911tst.l %d1 # did ifetch fail?4912bne.l iea_iacc # yes49134914mov.w %d0,%a0 # sign extend displacement49154916add.l (%a6),%a0 # a6 + d164917rts49184919faddr_ind_disp_a7:4920mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4921addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4922bsr.l _imem_read_word49234924tst.l %d1 # did ifetch fail?4925bne.l iea_iacc # yes49264927mov.w %d0,%a0 # sign extend displacement49284929add.l EXC_A7(%a6),%a0 # a7 + d164930rts49314932########################################################################4933# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #4934# " " " w/ " (base displacement): (bd, An, Xn) #4935# Memory indirect postindexed: ([bd, An], Xn, od) #4936# Memory indirect preindexed: ([bd, An, Xn], od) #4937########################################################################4938faddr_ind_ext:4939addq.l &0x8,%d14940bsr.l fetch_dreg # fetch base areg4941mov.l %d0,-(%sp)49424943mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4944addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4945bsr.l _imem_read_word # fetch extword in d049464947tst.l %d1 # did ifetch fail?4948bne.l iea_iacc # yes49494950mov.l (%sp)+,%a049514952btst &0x8,%d04953bne.w fcalc_mem_ind49544955mov.l %d0,L_SCR1(%a6) # hold opword49564957mov.l %d0,%d14958rol.w &0x4,%d14959andi.w &0xf,%d1 # extract index regno49604961# count on fetch_dreg() not to alter a0...4962bsr.l fetch_dreg # fetch index49634964mov.l %d2,-(%sp) # save d24965mov.l L_SCR1(%a6),%d2 # fetch opword49664967btst &0xb,%d2 # is it word or long?4968bne.b faii8_long4969ext.l %d0 # sign extend word index4970faii8_long:4971mov.l %d2,%d14972rol.w &0x7,%d14973andi.l &0x3,%d1 # extract scale value49744975lsl.l %d1,%d0 # shift index by scale49764977extb.l %d2 # sign extend displacement4978add.l %d2,%d0 # index + disp4979add.l %d0,%a0 # An + (index + disp)49804981mov.l (%sp)+,%d2 # restore old d24982rts49834984###########################4985# Absolute short: (XXX).W #4986###########################4987fabs_short:4988mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4989addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4990bsr.l _imem_read_word # fetch short address49914992tst.l %d1 # did ifetch fail?4993bne.l iea_iacc # yes49944995mov.w %d0,%a0 # return <ea> in a04996rts49974998##########################4999# Absolute long: (XXX).L #5000##########################5001fabs_long:5002mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5003addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5004bsr.l _imem_read_long # fetch long address50055006tst.l %d1 # did ifetch fail?5007bne.l iea_iacc # yes50085009mov.l %d0,%a0 # return <ea> in a05010rts50115012#######################################################5013# Program counter indirect w/ displacement: (d16, PC) #5014#######################################################5015fpc_ind:5016mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5017addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr5018bsr.l _imem_read_word # fetch word displacement50195020tst.l %d1 # did ifetch fail?5021bne.l iea_iacc # yes50225023mov.w %d0,%a0 # sign extend displacement50245025add.l EXC_EXTWPTR(%a6),%a0 # pc + d1650265027# _imem_read_word() increased the extwptr by 2. need to adjust here.5028subq.l &0x2,%a0 # adjust <ea>5029rts50305031##########################################################5032# PC indirect w/ index(8-bit displacement): (d8, PC, An) #5033# " " w/ " (base displacement): (bd, PC, An) #5034# PC memory indirect postindexed: ([bd, PC], Xn, od) #5035# PC memory indirect preindexed: ([bd, PC, Xn], od) #5036##########################################################5037fpc_ind_ext:5038mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5039addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr5040bsr.l _imem_read_word # fetch ext word50415042tst.l %d1 # did ifetch fail?5043bne.l iea_iacc # yes50445045mov.l EXC_EXTWPTR(%a6),%a0 # put base in a05046subq.l &0x2,%a0 # adjust base50475048btst &0x8,%d0 # is disp only 8 bits?5049bne.w fcalc_mem_ind # calc memory indirect50505051mov.l %d0,L_SCR1(%a6) # store opword50525053mov.l %d0,%d1 # make extword copy5054rol.w &0x4,%d1 # rotate reg num into place5055andi.w &0xf,%d1 # extract register number50565057# count on fetch_dreg() not to alter a0...5058bsr.l fetch_dreg # fetch index50595060mov.l %d2,-(%sp) # save d25061mov.l L_SCR1(%a6),%d2 # fetch opword50625063btst &0xb,%d2 # is index word or long?5064bne.b fpii8_long # long5065ext.l %d0 # sign extend word index5066fpii8_long:5067mov.l %d2,%d15068rol.w &0x7,%d1 # rotate scale value into place5069andi.l &0x3,%d1 # extract scale value50705071lsl.l %d1,%d0 # shift index by scale50725073extb.l %d2 # sign extend displacement5074add.l %d2,%d0 # disp + index5075add.l %d0,%a0 # An + (index + disp)50765077mov.l (%sp)+,%d2 # restore temp register5078rts50795080# d2 = index5081# d3 = base5082# d4 = od5083# d5 = extword5084fcalc_mem_ind:5085btst &0x6,%d0 # is the index suppressed?5086beq.b fcalc_index50875088movm.l &0x3c00,-(%sp) # save d2-d550895090mov.l %d0,%d5 # put extword in d55091mov.l %a0,%d3 # put base in d350925093clr.l %d2 # yes, so index = 05094bra.b fbase_supp_ck50955096# index:5097fcalc_index:5098mov.l %d0,L_SCR1(%a6) # save d0 (opword)5099bfextu %d0{&16:&4},%d1 # fetch dreg index5100bsr.l fetch_dreg51015102movm.l &0x3c00,-(%sp) # save d2-d55103mov.l %d0,%d2 # put index in d25104mov.l L_SCR1(%a6),%d55105mov.l %a0,%d351065107btst &0xb,%d5 # is index word or long?5108bne.b fno_ext5109ext.l %d251105111fno_ext:5112bfextu %d5{&21:&2},%d05113lsl.l %d0,%d251145115# base address (passed as parameter in d3):5116# we clear the value here if it should actually be suppressed.5117fbase_supp_ck:5118btst &0x7,%d5 # is the bd suppressed?5119beq.b fno_base_sup5120clr.l %d351215122# base displacement:5123fno_base_sup:5124bfextu %d5{&26:&2},%d0 # get bd size5125# beq.l fmovm_error # if (size == 0) it's reserved51265127cmpi.b %d0,&0x25128blt.b fno_bd5129beq.b fget_word_bd51305131mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5132addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5133bsr.l _imem_read_long51345135tst.l %d1 # did ifetch fail?5136bne.l fcea_iacc # yes51375138bra.b fchk_ind51395140fget_word_bd:5141mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5142addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr5143bsr.l _imem_read_word51445145tst.l %d1 # did ifetch fail?5146bne.l fcea_iacc # yes51475148ext.l %d0 # sign extend bd51495150fchk_ind:5151add.l %d0,%d3 # base += bd51525153# outer displacement:5154fno_bd:5155bfextu %d5{&30:&2},%d0 # is od suppressed?5156beq.w faii_bd51575158cmpi.b %d0,&0x25159blt.b fnull_od5160beq.b fword_od51615162mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5163addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5164bsr.l _imem_read_long51655166tst.l %d1 # did ifetch fail?5167bne.l fcea_iacc # yes51685169bra.b fadd_them51705171fword_od:5172mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5173addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr5174bsr.l _imem_read_word51755176tst.l %d1 # did ifetch fail?5177bne.l fcea_iacc # yes51785179ext.l %d0 # sign extend od5180bra.b fadd_them51815182fnull_od:5183clr.l %d051845185fadd_them:5186mov.l %d0,%d451875188btst &0x2,%d5 # pre or post indexing?5189beq.b fpre_indexed51905191mov.l %d3,%a05192bsr.l _dmem_read_long51935194tst.l %d1 # did dfetch fail?5195bne.w fcea_err # yes51965197add.l %d2,%d0 # <ea> += index5198add.l %d4,%d0 # <ea> += od5199bra.b fdone_ea52005201fpre_indexed:5202add.l %d2,%d3 # preindexing5203mov.l %d3,%a05204bsr.l _dmem_read_long52055206tst.l %d1 # did dfetch fail?5207bne.w fcea_err # yes52085209add.l %d4,%d0 # ea += od5210bra.b fdone_ea52115212faii_bd:5213add.l %d2,%d3 # ea = (base + bd) + index5214mov.l %d3,%d05215fdone_ea:5216mov.l %d0,%a052175218movm.l (%sp)+,&0x003c # restore d2-d55219rts52205221#########################################################5222fcea_err:5223mov.l %d3,%a052245225movm.l (%sp)+,&0x003c # restore d2-d55226mov.w &0x0101,%d05227bra.l iea_dacc52285229fcea_iacc:5230movm.l (%sp)+,&0x003c # restore d2-d55231bra.l iea_iacc52325233fmovm_out_err:5234bsr.l restore5235mov.w &0x00e1,%d05236bra.b fmovm_err52375238fmovm_in_err:5239bsr.l restore5240mov.w &0x0161,%d052415242fmovm_err:5243mov.l L_SCR1(%a6),%a05244bra.l iea_dacc52455246#########################################################################5247# XDEF **************************************************************** #5248# fmovm_ctrl(): emulate fmovm.l of control registers instr #5249# #5250# XREF **************************************************************** #5251# _imem_read_long() - read longword from memory #5252# iea_iacc() - _imem_read_long() failed; error recovery #5253# #5254# INPUT *************************************************************** #5255# None #5256# #5257# OUTPUT ************************************************************** #5258# If _imem_read_long() doesn't fail: #5259# USER_FPCR(a6) = new FPCR value #5260# USER_FPSR(a6) = new FPSR value #5261# USER_FPIAR(a6) = new FPIAR value #5262# #5263# ALGORITHM *********************************************************** #5264# Decode the instruction type by looking at the extension word #5265# in order to see how many control registers to fetch from memory. #5266# Fetch them using _imem_read_long(). If this fetch fails, exit through #5267# the special access error exit handler iea_iacc(). #5268# #5269# Instruction word decoding: #5270# #5271# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #5272# #5273# WORD1 WORD2 #5274# 1111 0010 00 111100 100$ $$00 0000 0000 #5275# #5276# $$$ (100): FPCR #5277# (010): FPSR #5278# (001): FPIAR #5279# (000): FPIAR #5280# #5281#########################################################################52825283global fmovm_ctrl5284fmovm_ctrl:5285mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits5286cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?5287beq.w fctrl_in_7 # yes5288cmpi.b %d0,&0x98 # fpcr & fpsr ?5289beq.w fctrl_in_6 # yes5290cmpi.b %d0,&0x94 # fpcr & fpiar ?5291beq.b fctrl_in_5 # yes52925293# fmovem.l #<data>, fpsr/fpiar5294fctrl_in_3:5295mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5296addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5297bsr.l _imem_read_long # fetch FPSR from mem52985299tst.l %d1 # did ifetch fail?5300bne.l iea_iacc # yes53015302mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack5303mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5304addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5305bsr.l _imem_read_long # fetch FPIAR from mem53065307tst.l %d1 # did ifetch fail?5308bne.l iea_iacc # yes53095310mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack5311rts53125313# fmovem.l #<data>, fpcr/fpiar5314fctrl_in_5:5315mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5316addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5317bsr.l _imem_read_long # fetch FPCR from mem53185319tst.l %d1 # did ifetch fail?5320bne.l iea_iacc # yes53215322mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack5323mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5324addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5325bsr.l _imem_read_long # fetch FPIAR from mem53265327tst.l %d1 # did ifetch fail?5328bne.l iea_iacc # yes53295330mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack5331rts53325333# fmovem.l #<data>, fpcr/fpsr5334fctrl_in_6:5335mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5336addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5337bsr.l _imem_read_long # fetch FPCR from mem53385339tst.l %d1 # did ifetch fail?5340bne.l iea_iacc # yes53415342mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem5343mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5344addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5345bsr.l _imem_read_long # fetch FPSR from mem53465347tst.l %d1 # did ifetch fail?5348bne.l iea_iacc # yes53495350mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem5351rts53525353# fmovem.l #<data>, fpcr/fpsr/fpiar5354fctrl_in_7:5355mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5356addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5357bsr.l _imem_read_long # fetch FPCR from mem53585359tst.l %d1 # did ifetch fail?5360bne.l iea_iacc # yes53615362mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem5363mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5364addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5365bsr.l _imem_read_long # fetch FPSR from mem53665367tst.l %d1 # did ifetch fail?5368bne.l iea_iacc # yes53695370mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem5371mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr5372addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr5373bsr.l _imem_read_long # fetch FPIAR from mem53745375tst.l %d1 # did ifetch fail?5376bne.l iea_iacc # yes53775378mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem5379rts53805381##########################################################################53825383#########################################################################5384# XDEF **************************************************************** #5385# addsub_scaler2(): scale inputs to fadd/fsub such that no #5386# OVFL/UNFL exceptions will result #5387# #5388# XREF **************************************************************** #5389# norm() - normalize mantissa after adjusting exponent #5390# #5391# INPUT *************************************************************** #5392# FP_SRC(a6) = fp op1(src) #5393# FP_DST(a6) = fp op2(dst) #5394# #5395# OUTPUT ************************************************************** #5396# FP_SRC(a6) = fp op1 scaled(src) #5397# FP_DST(a6) = fp op2 scaled(dst) #5398# d0 = scale amount #5399# #5400# ALGORITHM *********************************************************** #5401# If the DST exponent is > the SRC exponent, set the DST exponent #5402# equal to 0x3fff and scale the SRC exponent by the value that the #5403# DST exponent was scaled by. If the SRC exponent is greater or equal, #5404# do the opposite. Return this scale factor in d0. #5405# If the two exponents differ by > the number of mantissa bits #5406# plus two, then set the smallest exponent to a very small value as a #5407# quick shortcut. #5408# #5409#########################################################################54105411global addsub_scaler25412addsub_scaler2:5413mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)5414mov.l DST_HI(%a1),FP_SCR1_HI(%a6)5415mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)5416mov.l DST_LO(%a1),FP_SCR1_LO(%a6)5417mov.w SRC_EX(%a0),%d05418mov.w DST_EX(%a1),%d15419mov.w %d0,FP_SCR0_EX(%a6)5420mov.w %d1,FP_SCR1_EX(%a6)54215422andi.w &0x7fff,%d05423andi.w &0x7fff,%d15424mov.w %d0,L_SCR1(%a6) # store src exponent5425mov.w %d1,2+L_SCR1(%a6) # store dst exponent54265427cmp.w %d0, %d1 # is src exp >= dst exp?5428bge.l src_exp_ge254295430# dst exp is > src exp; scale dst to exp = 0x3fff5431dst_exp_gt2:5432bsr.l scale_to_zero_dst5433mov.l %d0,-(%sp) # save scale factor54345435cmpi.b STAG(%a6),&DENORM # is dst denormalized?5436bne.b cmpexp1254375438lea FP_SCR0(%a6),%a05439bsr.l norm # normalize the denorm; result is new exp5440neg.w %d0 # new exp = -(shft val)5441mov.w %d0,L_SCR1(%a6) # inset new exp54425443cmpexp12:5444mov.w 2+L_SCR1(%a6),%d05445subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp54465447cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?5448bge.b quick_scale1254495450mov.w L_SCR1(%a6),%d05451add.w 0x2(%sp),%d0 # scale src exponent by scale factor5452mov.w FP_SCR0_EX(%a6),%d15453and.w &0x8000,%d15454or.w %d1,%d0 # concat {sgn,new exp}5455mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent54565457mov.l (%sp)+,%d0 # return SCALE factor5458rts54595460quick_scale12:5461andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent5462bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 154635464mov.l (%sp)+,%d0 # return SCALE factor5465rts54665467# src exp is >= dst exp; scale src to exp = 0x3fff5468src_exp_ge2:5469bsr.l scale_to_zero_src5470mov.l %d0,-(%sp) # save scale factor54715472cmpi.b DTAG(%a6),&DENORM # is dst denormalized?5473bne.b cmpexp225474lea FP_SCR1(%a6),%a05475bsr.l norm # normalize the denorm; result is new exp5476neg.w %d0 # new exp = -(shft val)5477mov.w %d0,2+L_SCR1(%a6) # inset new exp54785479cmpexp22:5480mov.w L_SCR1(%a6),%d05481subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp54825483cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?5484bge.b quick_scale2254855486mov.w 2+L_SCR1(%a6),%d05487add.w 0x2(%sp),%d0 # scale dst exponent by scale factor5488mov.w FP_SCR1_EX(%a6),%d15489andi.w &0x8000,%d15490or.w %d1,%d0 # concat {sgn,new exp}5491mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent54925493mov.l (%sp)+,%d0 # return SCALE factor5494rts54955496quick_scale22:5497andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent5498bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 154995500mov.l (%sp)+,%d0 # return SCALE factor5501rts55025503##########################################################################55045505#########################################################################5506# XDEF **************************************************************** #5507# scale_to_zero_src(): scale the exponent of extended precision #5508# value at FP_SCR0(a6). #5509# #5510# XREF **************************************************************** #5511# norm() - normalize the mantissa if the operand was a DENORM #5512# #5513# INPUT *************************************************************** #5514# FP_SCR0(a6) = extended precision operand to be scaled #5515# #5516# OUTPUT ************************************************************** #5517# FP_SCR0(a6) = scaled extended precision operand #5518# d0 = scale value #5519# #5520# ALGORITHM *********************************************************** #5521# Set the exponent of the input operand to 0x3fff. Save the value #5522# of the difference between the original and new exponent. Then, #5523# normalize the operand if it was a DENORM. Add this normalization #5524# value to the previous value. Return the result. #5525# #5526#########################################################################55275528global scale_to_zero_src5529scale_to_zero_src:5530mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}5531mov.w %d1,%d0 # make a copy55325533andi.l &0x7fff,%d1 # extract operand's exponent55345535andi.w &0x8000,%d0 # extract operand's sgn5536or.w &0x3fff,%d0 # insert new operand's exponent(=0)55375538mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent55395540cmpi.b STAG(%a6),&DENORM # is operand normalized?5541beq.b stzs_denorm # normalize the DENORM55425543stzs_norm:5544mov.l &0x3fff,%d05545sub.l %d1,%d0 # scale = BIAS + (-exp)55465547rts55485549stzs_denorm:5550lea FP_SCR0(%a6),%a0 # pass ptr to src op5551bsr.l norm # normalize denorm5552neg.l %d0 # new exponent = -(shft val)5553mov.l %d0,%d1 # prepare for op_norm call5554bra.b stzs_norm # finish scaling55555556###55575558#########################################################################5559# XDEF **************************************************************** #5560# scale_sqrt(): scale the input operand exponent so a subsequent #5561# fsqrt operation won't take an exception. #5562# #5563# XREF **************************************************************** #5564# norm() - normalize the mantissa if the operand was a DENORM #5565# #5566# INPUT *************************************************************** #5567# FP_SCR0(a6) = extended precision operand to be scaled #5568# #5569# OUTPUT ************************************************************** #5570# FP_SCR0(a6) = scaled extended precision operand #5571# d0 = scale value #5572# #5573# ALGORITHM *********************************************************** #5574# If the input operand is a DENORM, normalize it. #5575# If the exponent of the input operand is even, set the exponent #5576# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #5577# exponent of the input operand is off, set the exponent to ox3fff and #5578# return a scale factor of "(exp-0x3fff)/2". #5579# #5580#########################################################################55815582global scale_sqrt5583scale_sqrt:5584cmpi.b STAG(%a6),&DENORM # is operand normalized?5585beq.b ss_denorm # normalize the DENORM55865587mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}5588andi.l &0x7fff,%d1 # extract operand's exponent55895590andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn55915592btst &0x0,%d1 # is exp even or odd?5593beq.b ss_norm_even55945595ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)55965597mov.l &0x3fff,%d05598sub.l %d1,%d0 # scale = BIAS + (-exp)5599asr.l &0x1,%d0 # divide scale factor by 25600rts56015602ss_norm_even:5603ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)56045605mov.l &0x3ffe,%d05606sub.l %d1,%d0 # scale = BIAS + (-exp)5607asr.l &0x1,%d0 # divide scale factor by 25608rts56095610ss_denorm:5611lea FP_SCR0(%a6),%a0 # pass ptr to src op5612bsr.l norm # normalize denorm56135614btst &0x0,%d0 # is exp even or odd?5615beq.b ss_denorm_even56165617ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)56185619add.l &0x3fff,%d05620asr.l &0x1,%d0 # divide scale factor by 25621rts56225623ss_denorm_even:5624ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)56255626add.l &0x3ffe,%d05627asr.l &0x1,%d0 # divide scale factor by 25628rts56295630###56315632#########################################################################5633# XDEF **************************************************************** #5634# scale_to_zero_dst(): scale the exponent of extended precision #5635# value at FP_SCR1(a6). #5636# #5637# XREF **************************************************************** #5638# norm() - normalize the mantissa if the operand was a DENORM #5639# #5640# INPUT *************************************************************** #5641# FP_SCR1(a6) = extended precision operand to be scaled #5642# #5643# OUTPUT ************************************************************** #5644# FP_SCR1(a6) = scaled extended precision operand #5645# d0 = scale value #5646# #5647# ALGORITHM *********************************************************** #5648# Set the exponent of the input operand to 0x3fff. Save the value #5649# of the difference between the original and new exponent. Then, #5650# normalize the operand if it was a DENORM. Add this normalization #5651# value to the previous value. Return the result. #5652# #5653#########################################################################56545655global scale_to_zero_dst5656scale_to_zero_dst:5657mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}5658mov.w %d1,%d0 # make a copy56595660andi.l &0x7fff,%d1 # extract operand's exponent56615662andi.w &0x8000,%d0 # extract operand's sgn5663or.w &0x3fff,%d0 # insert new operand's exponent(=0)56645665mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent56665667cmpi.b DTAG(%a6),&DENORM # is operand normalized?5668beq.b stzd_denorm # normalize the DENORM56695670stzd_norm:5671mov.l &0x3fff,%d05672sub.l %d1,%d0 # scale = BIAS + (-exp)5673rts56745675stzd_denorm:5676lea FP_SCR1(%a6),%a0 # pass ptr to dst op5677bsr.l norm # normalize denorm5678neg.l %d0 # new exponent = -(shft val)5679mov.l %d0,%d1 # prepare for op_norm call5680bra.b stzd_norm # finish scaling56815682##########################################################################56835684#########################################################################5685# XDEF **************************************************************** #5686# res_qnan(): return default result w/ QNAN operand for dyadic #5687# res_snan(): return default result w/ SNAN operand for dyadic #5688# res_qnan_1op(): return dflt result w/ QNAN operand for monadic #5689# res_snan_1op(): return dflt result w/ SNAN operand for monadic #5690# #5691# XREF **************************************************************** #5692# None #5693# #5694# INPUT *************************************************************** #5695# FP_SRC(a6) = pointer to extended precision src operand #5696# FP_DST(a6) = pointer to extended precision dst operand #5697# #5698# OUTPUT ************************************************************** #5699# fp0 = default result #5700# #5701# ALGORITHM *********************************************************** #5702# If either operand (but not both operands) of an operation is a #5703# nonsignalling NAN, then that NAN is returned as the result. If both #5704# operands are nonsignalling NANs, then the destination operand #5705# nonsignalling NAN is returned as the result. #5706# If either operand to an operation is a signalling NAN (SNAN), #5707# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #5708# enable bit is set in the FPCR, then the trap is taken and the #5709# destination is not modified. If the SNAN trap enable bit is not set, #5710# then the SNAN is converted to a nonsignalling NAN (by setting the #5711# SNAN bit in the operand to one), and the operation continues as #5712# described in the preceding paragraph, for nonsignalling NANs. #5713# Make sure the appropriate FPSR bits are set before exiting. #5714# #5715#########################################################################57165717global res_qnan5718global res_snan5719res_qnan:5720res_snan:5721cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?5722beq.b dst_snan25723cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?5724beq.b dst_qnan25725src_nan:5726cmp.b STAG(%a6), &QNAN5727beq.b src_qnan25728global res_snan_1op5729res_snan_1op:5730src_snan2:5731bset &0x6, FP_SRC_HI(%a6) # set SNAN bit5732or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)5733lea FP_SRC(%a6), %a05734bra.b nan_comp5735global res_qnan_1op5736res_qnan_1op:5737src_qnan2:5738or.l &nan_mask, USER_FPSR(%a6)5739lea FP_SRC(%a6), %a05740bra.b nan_comp5741dst_snan2:5742or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)5743bset &0x6, FP_DST_HI(%a6) # set SNAN bit5744lea FP_DST(%a6), %a05745bra.b nan_comp5746dst_qnan2:5747lea FP_DST(%a6), %a05748cmp.b STAG(%a6), &SNAN5749bne nan_done5750or.l &aiop_mask+snan_mask, USER_FPSR(%a6)5751nan_done:5752or.l &nan_mask, USER_FPSR(%a6)5753nan_comp:5754btst &0x7, FTEMP_EX(%a0) # is NAN neg?5755beq.b nan_not_neg5756or.l &neg_mask, USER_FPSR(%a6)5757nan_not_neg:5758fmovm.x (%a0), &0x805759rts57605761#########################################################################5762# XDEF **************************************************************** #5763# res_operr(): return default result during operand error #5764# #5765# XREF **************************************************************** #5766# None #5767# #5768# INPUT *************************************************************** #5769# None #5770# #5771# OUTPUT ************************************************************** #5772# fp0 = default operand error result #5773# #5774# ALGORITHM *********************************************************** #5775# An nonsignalling NAN is returned as the default result when #5776# an operand error occurs for the following cases: #5777# #5778# Multiply: (Infinity x Zero) #5779# Divide : (Zero / Zero) || (Infinity / Infinity) #5780# #5781#########################################################################57825783global res_operr5784res_operr:5785or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)5786fmovm.x nan_return(%pc), &0x805787rts57885789nan_return:5790long 0x7fff0000, 0xffffffff, 0xffffffff57915792#########################################################################5793# XDEF **************************************************************** #5794# _denorm(): denormalize an intermediate result #5795# #5796# XREF **************************************************************** #5797# None #5798# #5799# INPUT *************************************************************** #5800# a0 = points to the operand to be denormalized #5801# (in the internal extended format) #5802# #5803# d0 = rounding precision #5804# #5805# OUTPUT ************************************************************** #5806# a0 = pointer to the denormalized result #5807# (in the internal extended format) #5808# #5809# d0 = guard,round,sticky #5810# #5811# ALGORITHM *********************************************************** #5812# According to the exponent underflow threshold for the given #5813# precision, shift the mantissa bits to the right in order raise the #5814# exponent of the operand to the threshold value. While shifting the #5815# mantissa bits right, maintain the value of the guard, round, and #5816# sticky bits. #5817# other notes: #5818# (1) _denorm() is called by the underflow routines #5819# (2) _denorm() does NOT affect the status register #5820# #5821#########################################################################58225823#5824# table of exponent threshold values for each precision5825#5826tbl_thresh:5827short 0x05828short sgl_thresh5829short dbl_thresh58305831global _denorm5832_denorm:5833#5834# Load the exponent threshold for the precision selected and check5835# to see if (threshold - exponent) is > 65 in which case we can5836# simply calculate the sticky bit and zero the mantissa. otherwise5837# we have to call the denormalization routine.5838#5839lsr.b &0x2, %d0 # shift prec to lo bits5840mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold5841mov.w %d1, %d0 # copy d1 into d05842sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp5843cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)5844bpl.b denorm_set_stky # yes; just calc sticky58455846clr.l %d0 # clear g,r,s5847btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?5848beq.b denorm_call # no; don't change anything5849bset &29, %d0 # yes; set sticky bit58505851denorm_call:5852bsr.l dnrm_lp # denormalize the number5853rts58545855#5856# all bit would have been shifted off during the denorm so simply5857# calculate if the sticky should be set and clear the entire mantissa.5858#5859denorm_set_stky:5860mov.l &0x20000000, %d0 # set sticky bit in return value5861mov.w %d1, FTEMP_EX(%a0) # load exp with threshold5862clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)5863clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)5864rts58655866# #5867# dnrm_lp(): normalize exponent/mantissa to specified threshold #5868# #5869# INPUT: #5870# %a0 : points to the operand to be denormalized #5871# %d0{31:29} : initial guard,round,sticky #5872# %d1{15:0} : denormalization threshold #5873# OUTPUT: #5874# %a0 : points to the denormalized operand #5875# %d0{31:29} : final guard,round,sticky #5876# #58775878# *** Local Equates *** #5879set GRS, L_SCR2 # g,r,s temp storage5880set FTEMP_LO2, L_SCR1 # FTEMP_LO copy58815882global dnrm_lp5883dnrm_lp:58845885#5886# make a copy of FTEMP_LO and place the g,r,s bits directly after it5887# in memory so as to make the bitfield extraction for denormalization easier.5888#5889mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy5890mov.l %d0, GRS(%a6) # place g,r,s after it58915892#5893# check to see how much less than the underflow threshold the operand5894# exponent is.5895#5896mov.l %d1, %d0 # copy the denorm threshold5897sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent5898ble.b dnrm_no_lp # d1 <= 05899cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?5900blt.b case_1 # yes5901cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?5902blt.b case_2 # yes5903bra.w case_3 # (d1 >= 64)59045905#5906# No normalization necessary5907#5908dnrm_no_lp:5909mov.l GRS(%a6), %d0 # restore original g,r,s5910rts59115912#5913# case (0<d1<32)5914#5915# %d0 = denorm threshold5916# %d1 = "n" = amt to shift5917#5918# ---------------------------------------------------------5919# | FTEMP_HI | FTEMP_LO |grs000.........000|5920# ---------------------------------------------------------5921# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->5922# \ \ \ \5923# \ \ \ \5924# \ \ \ \5925# \ \ \ \5926# \ \ \ \5927# \ \ \ \5928# \ \ \ \5929# \ \ \ \5930# <-(n)-><-(32 - n)-><------(32)-------><------(32)------->5931# ---------------------------------------------------------5932# |0.....0| NEW_HI | NEW_FTEMP_LO |grs |5933# ---------------------------------------------------------5934#5935case_1:5936mov.l %d2, -(%sp) # create temp storage59375938mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold5939mov.l &32, %d05940sub.w %d1, %d0 # %d0 = 32 - %d159415942cmpi.w %d1, &29 # is shft amt >= 295943blt.b case1_extract # no; no fix needed5944mov.b GRS(%a6), %d25945or.b %d2, 3+FTEMP_LO2(%a6)59465947case1_extract:5948bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI5949bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO5950bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S59515952mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI5953mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO59545955bftst %d0{&2:&30} # were bits shifted off?5956beq.b case1_sticky_clear # no; go finish5957bset &rnd_stky_bit, %d0 # yes; set sticky bit59585959case1_sticky_clear:5960and.l &0xe0000000, %d0 # clear all but G,R,S5961mov.l (%sp)+, %d2 # restore temp register5962rts59635964#5965# case (32<=d1<64)5966#5967# %d0 = denorm threshold5968# %d1 = "n" = amt to shift5969#5970# ---------------------------------------------------------5971# | FTEMP_HI | FTEMP_LO |grs000.........000|5972# ---------------------------------------------------------5973# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->5974# \ \ \5975# \ \ \5976# \ \ -------------------5977# \ -------------------- \5978# ------------------- \ \5979# \ \ \5980# \ \ \5981# \ \ \5982# <-------(32)------><-(n)-><-(32 - n)-><------(32)------->5983# ---------------------------------------------------------5984# |0...............0|0....0| NEW_LO |grs |5985# ---------------------------------------------------------5986#5987case_2:5988mov.l %d2, -(%sp) # create temp storage59895990mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold5991subi.w &0x20, %d1 # %d1 now between 0 and 325992mov.l &0x20, %d05993sub.w %d1, %d0 # %d0 = 32 - %d159945995# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize5996# the number of bits to check for the sticky detect.5997# it only plays a role in shift amounts of 61-63.5998mov.b GRS(%a6), %d25999or.b %d2, 3+FTEMP_LO2(%a6)60006001bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO6002bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S60036004bftst %d1{&2:&30} # were any bits shifted off?6005bne.b case2_set_sticky # yes; set sticky bit6006bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?6007bne.b case2_set_sticky # yes; set sticky bit60086009mov.l %d1, %d0 # move new G,R,S to %d06010bra.b case2_end60116012case2_set_sticky:6013mov.l %d1, %d0 # move new G,R,S to %d06014bset &rnd_stky_bit, %d0 # set sticky bit60156016case2_end:6017clr.l FTEMP_HI(%a0) # store FTEMP_HI = 06018mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO6019and.l &0xe0000000, %d0 # clear all but G,R,S60206021mov.l (%sp)+,%d2 # restore temp register6022rts60236024#6025# case (d1>=64)6026#6027# %d0 = denorm threshold6028# %d1 = amt to shift6029#6030case_3:6031mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold60326033cmpi.w %d1, &65 # is shift amt > 65?6034blt.b case3_64 # no; it's == 646035beq.b case3_65 # no; it's == 6560366037#6038# case (d1>65)6039#6040# Shift value is > 65 and out of range. All bits are shifted off.6041# Return a zero mantissa with the sticky bit set6042#6043clr.l FTEMP_HI(%a0) # clear hi(mantissa)6044clr.l FTEMP_LO(%a0) # clear lo(mantissa)6045mov.l &0x20000000, %d0 # set sticky bit6046rts60476048#6049# case (d1 == 64)6050#6051# ---------------------------------------------------------6052# | FTEMP_HI | FTEMP_LO |grs000.........000|6053# ---------------------------------------------------------6054# <-------(32)------>6055# \ \6056# \ \6057# \ \6058# \ ------------------------------6059# ------------------------------- \6060# \ \6061# \ \6062# \ \6063# <-------(32)------>6064# ---------------------------------------------------------6065# |0...............0|0................0|grs |6066# ---------------------------------------------------------6067#6068case3_64:6069mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)6070mov.l %d0, %d1 # make a copy6071and.l &0xc0000000, %d0 # extract G,R6072and.l &0x3fffffff, %d1 # extract other bits60736074bra.b case3_complete60756076#6077# case (d1 == 65)6078#6079# ---------------------------------------------------------6080# | FTEMP_HI | FTEMP_LO |grs000.........000|6081# ---------------------------------------------------------6082# <-------(32)------>6083# \ \6084# \ \6085# \ \6086# \ ------------------------------6087# -------------------------------- \6088# \ \6089# \ \6090# \ \6091# <-------(31)----->6092# ---------------------------------------------------------6093# |0...............0|0................0|0rs |6094# ---------------------------------------------------------6095#6096case3_65:6097mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)6098and.l &0x80000000, %d0 # extract R bit6099lsr.l &0x1, %d0 # shift high bit into R bit6100and.l &0x7fffffff, %d1 # extract other bits61016102case3_complete:6103# last operation done was an "and" of the bits shifted off so the condition6104# codes are already set so branch accordingly.6105bne.b case3_set_sticky # yes; go set new sticky6106tst.l FTEMP_LO(%a0) # were any bits shifted off?6107bne.b case3_set_sticky # yes; go set new sticky6108tst.b GRS(%a6) # were any bits shifted off?6109bne.b case3_set_sticky # yes; go set new sticky61106111#6112# no bits were shifted off so don't set the sticky bit.6113# the guard and6114# the entire mantissa is zero.6115#6116clr.l FTEMP_HI(%a0) # clear hi(mantissa)6117clr.l FTEMP_LO(%a0) # clear lo(mantissa)6118rts61196120#6121# some bits were shifted off so set the sticky bit.6122# the entire mantissa is zero.6123#6124case3_set_sticky:6125bset &rnd_stky_bit,%d0 # set new sticky bit6126clr.l FTEMP_HI(%a0) # clear hi(mantissa)6127clr.l FTEMP_LO(%a0) # clear lo(mantissa)6128rts61296130#########################################################################6131# XDEF **************************************************************** #6132# _round(): round result according to precision/mode #6133# #6134# XREF **************************************************************** #6135# None #6136# #6137# INPUT *************************************************************** #6138# a0 = ptr to input operand in internal extended format #6139# d1(hi) = contains rounding precision: #6140# ext = $0000xxxx #6141# sgl = $0004xxxx #6142# dbl = $0008xxxx #6143# d1(lo) = contains rounding mode: #6144# RN = $xxxx0000 #6145# RZ = $xxxx0001 #6146# RM = $xxxx0002 #6147# RP = $xxxx0003 #6148# d0{31:29} = contains the g,r,s bits (extended) #6149# #6150# OUTPUT ************************************************************** #6151# a0 = pointer to rounded result #6152# #6153# ALGORITHM *********************************************************** #6154# On return the value pointed to by a0 is correctly rounded, #6155# a0 is preserved and the g-r-s bits in d0 are cleared. #6156# The result is not typed - the tag field is invalid. The #6157# result is still in the internal extended format. #6158# #6159# The INEX bit of USER_FPSR will be set if the rounded result was #6160# inexact (i.e. if any of the g-r-s bits were set). #6161# #6162#########################################################################61636164global _round6165_round:6166#6167# ext_grs() looks at the rounding precision and sets the appropriate6168# G,R,S bits.6169# If (G,R,S == 0) then result is exact and round is done, else set6170# the inex flag in status reg and continue.6171#6172bsr.l ext_grs # extract G,R,S61736174tst.l %d0 # are G,R,S zero?6175beq.w truncate # yes; round is complete61766177or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex61786179#6180# Use rounding mode as an index into a jump table for these modes.6181# All of the following assumes grs != 0.6182#6183mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset6184jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler61856186tbl_mode:6187short rnd_near - tbl_mode6188short truncate - tbl_mode # RZ always truncates6189short rnd_mnus - tbl_mode6190short rnd_plus - tbl_mode61916192#################################################################6193# ROUND PLUS INFINITY #6194# #6195# If sign of fp number = 0 (positive), then add 1 to l. #6196#################################################################6197rnd_plus:6198tst.b FTEMP_SGN(%a0) # check for sign6199bmi.w truncate # if positive then truncate62006201mov.l &0xffffffff, %d0 # force g,r,s to be all f's6202swap %d1 # set up d1 for round prec.62036204cmpi.b %d1, &s_mode # is prec = sgl?6205beq.w add_sgl # yes6206bgt.w add_dbl # no; it's dbl6207bra.w add_ext # no; it's ext62086209#################################################################6210# ROUND MINUS INFINITY #6211# #6212# If sign of fp number = 1 (negative), then add 1 to l. #6213#################################################################6214rnd_mnus:6215tst.b FTEMP_SGN(%a0) # check for sign6216bpl.w truncate # if negative then truncate62176218mov.l &0xffffffff, %d0 # force g,r,s to be all f's6219swap %d1 # set up d1 for round prec.62206221cmpi.b %d1, &s_mode # is prec = sgl?6222beq.w add_sgl # yes6223bgt.w add_dbl # no; it's dbl6224bra.w add_ext # no; it's ext62256226#################################################################6227# ROUND NEAREST #6228# #6229# If (g=1), then add 1 to l and if (r=s=0), then clear l #6230# Note that this will round to even in case of a tie. #6231#################################################################6232rnd_near:6233asl.l &0x1, %d0 # shift g-bit to c-bit6234bcc.w truncate # if (g=1) then62356236swap %d1 # set up d1 for round prec.62376238cmpi.b %d1, &s_mode # is prec = sgl?6239beq.w add_sgl # yes6240bgt.w add_dbl # no; it's dbl6241bra.w add_ext # no; it's ext62426243# *** LOCAL EQUATES ***6244set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec6245set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec62466247#########################6248# ADD SINGLE #6249#########################6250add_sgl:6251add.l &ad_1_sgl, FTEMP_HI(%a0)6252bcc.b scc_clr # no mantissa overflow6253roxr.w FTEMP_HI(%a0) # shift v-bit back in6254roxr.w FTEMP_HI+2(%a0) # shift v-bit back in6255add.w &0x1, FTEMP_EX(%a0) # and incr exponent6256scc_clr:6257tst.l %d0 # test for rs = 06258bne.b sgl_done6259and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit6260sgl_done:6261and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit6262clr.l FTEMP_LO(%a0) # clear d26263rts62646265#########################6266# ADD EXTENDED #6267#########################6268add_ext:6269addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit6270bcc.b xcc_clr # test for carry out6271addq.l &1,FTEMP_HI(%a0) # propagate carry6272bcc.b xcc_clr6273roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit6274roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit6275roxr.w FTEMP_LO(%a0)6276roxr.w FTEMP_LO+2(%a0)6277add.w &0x1,FTEMP_EX(%a0) # and inc exp6278xcc_clr:6279tst.l %d0 # test rs = 06280bne.b add_ext_done6281and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit6282add_ext_done:6283rts62846285#########################6286# ADD DOUBLE #6287#########################6288add_dbl:6289add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb6290bcc.b dcc_clr # no carry6291addq.l &0x1, FTEMP_HI(%a0) # propagate carry6292bcc.b dcc_clr # no carry62936294roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit6295roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit6296roxr.w FTEMP_LO(%a0)6297roxr.w FTEMP_LO+2(%a0)6298addq.w &0x1, FTEMP_EX(%a0) # incr exponent6299dcc_clr:6300tst.l %d0 # test for rs = 06301bne.b dbl_done6302and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit63036304dbl_done:6305and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit6306rts63076308###########################6309# Truncate all other bits #6310###########################6311truncate:6312swap %d1 # select rnd prec63136314cmpi.b %d1, &s_mode # is prec sgl?6315beq.w sgl_done # yes6316bgt.b dbl_done # no; it's dbl6317rts # no; it's ext631863196320#6321# ext_grs(): extract guard, round and sticky bits according to6322# rounding precision.6323#6324# INPUT6325# d0 = extended precision g,r,s (in d0{31:29})6326# d1 = {PREC,ROUND}6327# OUTPUT6328# d0{31:29} = guard, round, sticky6329#6330# The ext_grs extract the guard/round/sticky bits according to the6331# selected rounding precision. It is called by the round subroutine6332# only. All registers except d0 are kept intact. d0 becomes an6333# updated guard,round,sticky in d0{31:29}6334#6335# Notes: the ext_grs uses the round PREC, and therefore has to swap d16336# prior to usage, and needs to restore d1 to original. this6337# routine is tightly tied to the round routine and not meant to6338# uphold standard subroutine calling practices.6339#63406341ext_grs:6342swap %d1 # have d1.w point to round precision6343tst.b %d1 # is rnd prec = extended?6344bne.b ext_grs_not_ext # no; go handle sgl or dbl63456346#6347# %d0 actually already hold g,r,s since _round() had it before calling6348# this function. so, as long as we don't disturb it, we are "returning" it.6349#6350ext_grs_ext:6351swap %d1 # yes; return to correct positions6352rts63536354ext_grs_not_ext:6355movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}63566357cmpi.b %d1, &s_mode # is rnd prec = sgl?6358bne.b ext_grs_dbl # no; go handle dbl63596360#6361# sgl:6362# 96 64 40 32 06363# -----------------------------------------------------6364# | EXP |XXXXXXX| |xx | |grs|6365# -----------------------------------------------------6366# <--(24)--->nn\ /6367# ee ---------------------6368# ww |6369# v6370# gr new sticky6371#6372ext_grs_sgl:6373bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right6374mov.l &30, %d2 # of the sgl prec. limits6375lsl.l %d2, %d3 # shift g-r bits to MSB of d36376mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test6377and.l &0x0000003f, %d2 # s bit is the or of all other6378bne.b ext_grs_st_stky # bits to the right of g-r6379tst.l FTEMP_LO(%a0) # test lower mantissa6380bne.b ext_grs_st_stky # if any are set, set sticky6381tst.l %d0 # test original g,r,s6382bne.b ext_grs_st_stky # if any are set, set sticky6383bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit63846385#6386# dbl:6387# 96 64 32 11 06388# -----------------------------------------------------6389# | EXP |XXXXXXX| | |xx |grs|6390# -----------------------------------------------------6391# nn\ /6392# ee -------6393# ww |6394# v6395# gr new sticky6396#6397ext_grs_dbl:6398bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right6399mov.l &30, %d2 # of the dbl prec. limits6400lsl.l %d2, %d3 # shift g-r bits to the MSB of d36401mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test6402and.l &0x000001ff, %d2 # s bit is the or-ing of all6403bne.b ext_grs_st_stky # other bits to the right of g-r6404tst.l %d0 # test word original g,r,s6405bne.b ext_grs_st_stky # if any are set, set sticky6406bra.b ext_grs_end_sd # if clear, exit64076408ext_grs_st_stky:6409bset &rnd_stky_bit, %d3 # set sticky bit6410ext_grs_end_sd:6411mov.l %d3, %d0 # return grs to d064126413movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}64146415swap %d1 # restore d1 to original6416rts64176418#########################################################################6419# norm(): normalize the mantissa of an extended precision input. the #6420# input operand should not be normalized already. #6421# #6422# XDEF **************************************************************** #6423# norm() #6424# #6425# XREF **************************************************************** #6426# none #6427# #6428# INPUT *************************************************************** #6429# a0 = pointer fp extended precision operand to normalize #6430# #6431# OUTPUT ************************************************************** #6432# d0 = number of bit positions the mantissa was shifted #6433# a0 = the input operand's mantissa is normalized; the exponent #6434# is unchanged. #6435# #6436#########################################################################6437global norm6438norm:6439mov.l %d2, -(%sp) # create some temp regs6440mov.l %d3, -(%sp)64416442mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)6443mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)64446445bfffo %d0{&0:&32}, %d2 # how many places to shift?6446beq.b norm_lo # hi(man) is all zeroes!64476448norm_hi:6449lsl.l %d2, %d0 # left shift hi(man)6450bfextu %d1{&0:%d2}, %d3 # extract lo bits64516452or.l %d3, %d0 # create hi(man)6453lsl.l %d2, %d1 # create lo(man)64546455mov.l %d0, FTEMP_HI(%a0) # store new hi(man)6456mov.l %d1, FTEMP_LO(%a0) # store new lo(man)64576458mov.l %d2, %d0 # return shift amount64596460mov.l (%sp)+, %d3 # restore temp regs6461mov.l (%sp)+, %d264626463rts64646465norm_lo:6466bfffo %d1{&0:&32}, %d2 # how many places to shift?6467lsl.l %d2, %d1 # shift lo(man)6468add.l &32, %d2 # add 32 to shft amount64696470mov.l %d1, FTEMP_HI(%a0) # store hi(man)6471clr.l FTEMP_LO(%a0) # lo(man) is now zero64726473mov.l %d2, %d0 # return shift amount64746475mov.l (%sp)+, %d3 # restore temp regs6476mov.l (%sp)+, %d264776478rts64796480#########################################################################6481# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #6482# - returns corresponding optype tag #6483# #6484# XDEF **************************************************************** #6485# unnorm_fix() #6486# #6487# XREF **************************************************************** #6488# norm() - normalize the mantissa #6489# #6490# INPUT *************************************************************** #6491# a0 = pointer to unnormalized extended precision number #6492# #6493# OUTPUT ************************************************************** #6494# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #6495# a0 = input operand has been converted to a norm, denorm, or #6496# zero; both the exponent and mantissa are changed. #6497# #6498#########################################################################64996500global unnorm_fix6501unnorm_fix:6502bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?6503bne.b unnorm_shift # hi(man) is not all zeroes65046505#6506# hi(man) is all zeroes so see if any bits in lo(man) are set6507#6508unnorm_chk_lo:6509bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?6510beq.w unnorm_zero # yes65116512add.w &32, %d0 # no; fix shift distance65136514#6515# d0 = # shifts needed for complete normalization6516#6517unnorm_shift:6518clr.l %d1 # clear top word6519mov.w FTEMP_EX(%a0), %d1 # extract exponent6520and.w &0x7fff, %d1 # strip off sgn65216522cmp.w %d0, %d1 # will denorm push exp < 0?6523bgt.b unnorm_nrm_zero # yes; denorm only until exp = 065246525#6526# exponent would not go < 0. Therefore, number stays normalized6527#6528sub.w %d0, %d1 # shift exponent value6529mov.w FTEMP_EX(%a0), %d0 # load old exponent6530and.w &0x8000, %d0 # save old sign6531or.w %d0, %d1 # {sgn,new exp}6532mov.w %d1, FTEMP_EX(%a0) # insert new exponent65336534bsr.l norm # normalize UNNORM65356536mov.b &NORM, %d0 # return new optype tag6537rts65386539#6540# exponent would go < 0, so only denormalize until exp = 06541#6542unnorm_nrm_zero:6543cmp.b %d1, &32 # is exp <= 32?6544bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent65456546bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)6547mov.l %d0, FTEMP_HI(%a0) # save new hi(man)65486549mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)6550lsl.l %d1, %d0 # extract new lo(man)6551mov.l %d0, FTEMP_LO(%a0) # save new lo(man)65526553and.w &0x8000, FTEMP_EX(%a0) # set exp = 065546555mov.b &DENORM, %d0 # return new optype tag6556rts65576558#6559# only mantissa bits set are in lo(man)6560#6561unnorm_nrm_zero_lrg:6562sub.w &32, %d1 # adjust shft amt by 3265636564mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)6565lsl.l %d1, %d0 # left shift lo(man)65666567mov.l %d0, FTEMP_HI(%a0) # store new hi(man)6568clr.l FTEMP_LO(%a0) # lo(man) = 065696570and.w &0x8000, FTEMP_EX(%a0) # set exp = 065716572mov.b &DENORM, %d0 # return new optype tag6573rts65746575#6576# whole mantissa is zero so this UNNORM is actually a zero6577#6578unnorm_zero:6579and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero65806581mov.b &ZERO, %d0 # fix optype tag6582rts65836584#########################################################################6585# XDEF **************************************************************** #6586# set_tag_x(): return the optype of the input ext fp number #6587# #6588# XREF **************************************************************** #6589# None #6590# #6591# INPUT *************************************************************** #6592# a0 = pointer to extended precision operand #6593# #6594# OUTPUT ************************************************************** #6595# d0 = value of type tag #6596# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #6597# #6598# ALGORITHM *********************************************************** #6599# Simply test the exponent, j-bit, and mantissa values to #6600# determine the type of operand. #6601# If it's an unnormalized zero, alter the operand and force it #6602# to be a normal zero. #6603# #6604#########################################################################66056606global set_tag_x6607set_tag_x:6608mov.w FTEMP_EX(%a0), %d0 # extract exponent6609andi.w &0x7fff, %d0 # strip off sign6610cmpi.w %d0, &0x7fff # is (EXP == MAX)?6611beq.b inf_or_nan_x6612not_inf_or_nan_x:6613btst &0x7,FTEMP_HI(%a0)6614beq.b not_norm_x6615is_norm_x:6616mov.b &NORM, %d06617rts6618not_norm_x:6619tst.w %d0 # is exponent = 0?6620bne.b is_unnorm_x6621not_unnorm_x:6622tst.l FTEMP_HI(%a0)6623bne.b is_denorm_x6624tst.l FTEMP_LO(%a0)6625bne.b is_denorm_x6626is_zero_x:6627mov.b &ZERO, %d06628rts6629is_denorm_x:6630mov.b &DENORM, %d06631rts6632# must distinguish now "Unnormalized zeroes" which we6633# must convert to zero.6634is_unnorm_x:6635tst.l FTEMP_HI(%a0)6636bne.b is_unnorm_reg_x6637tst.l FTEMP_LO(%a0)6638bne.b is_unnorm_reg_x6639# it's an "unnormalized zero". let's convert it to an actual zero...6640andi.w &0x8000,FTEMP_EX(%a0) # clear exponent6641mov.b &ZERO, %d06642rts6643is_unnorm_reg_x:6644mov.b &UNNORM, %d06645rts6646inf_or_nan_x:6647tst.l FTEMP_LO(%a0)6648bne.b is_nan_x6649mov.l FTEMP_HI(%a0), %d06650and.l &0x7fffffff, %d0 # msb is a don't care!6651bne.b is_nan_x6652is_inf_x:6653mov.b &INF, %d06654rts6655is_nan_x:6656btst &0x6, FTEMP_HI(%a0)6657beq.b is_snan_x6658mov.b &QNAN, %d06659rts6660is_snan_x:6661mov.b &SNAN, %d06662rts66636664#########################################################################6665# XDEF **************************************************************** #6666# set_tag_d(): return the optype of the input dbl fp number #6667# #6668# XREF **************************************************************** #6669# None #6670# #6671# INPUT *************************************************************** #6672# a0 = points to double precision operand #6673# #6674# OUTPUT ************************************************************** #6675# d0 = value of type tag #6676# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #6677# #6678# ALGORITHM *********************************************************** #6679# Simply test the exponent, j-bit, and mantissa values to #6680# determine the type of operand. #6681# #6682#########################################################################66836684global set_tag_d6685set_tag_d:6686mov.l FTEMP(%a0), %d06687mov.l %d0, %d166886689andi.l &0x7ff00000, %d06690beq.b zero_or_denorm_d66916692cmpi.l %d0, &0x7ff000006693beq.b inf_or_nan_d66946695is_norm_d:6696mov.b &NORM, %d06697rts6698zero_or_denorm_d:6699and.l &0x000fffff, %d16700bne is_denorm_d6701tst.l 4+FTEMP(%a0)6702bne is_denorm_d6703is_zero_d:6704mov.b &ZERO, %d06705rts6706is_denorm_d:6707mov.b &DENORM, %d06708rts6709inf_or_nan_d:6710and.l &0x000fffff, %d16711bne is_nan_d6712tst.l 4+FTEMP(%a0)6713bne is_nan_d6714is_inf_d:6715mov.b &INF, %d06716rts6717is_nan_d:6718btst &19, %d16719bne is_qnan_d6720is_snan_d:6721mov.b &SNAN, %d06722rts6723is_qnan_d:6724mov.b &QNAN, %d06725rts67266727#########################################################################6728# XDEF **************************************************************** #6729# set_tag_s(): return the optype of the input sgl fp number #6730# #6731# XREF **************************************************************** #6732# None #6733# #6734# INPUT *************************************************************** #6735# a0 = pointer to single precision operand #6736# #6737# OUTPUT ************************************************************** #6738# d0 = value of type tag #6739# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #6740# #6741# ALGORITHM *********************************************************** #6742# Simply test the exponent, j-bit, and mantissa values to #6743# determine the type of operand. #6744# #6745#########################################################################67466747global set_tag_s6748set_tag_s:6749mov.l FTEMP(%a0), %d06750mov.l %d0, %d167516752andi.l &0x7f800000, %d06753beq.b zero_or_denorm_s67546755cmpi.l %d0, &0x7f8000006756beq.b inf_or_nan_s67576758is_norm_s:6759mov.b &NORM, %d06760rts6761zero_or_denorm_s:6762and.l &0x007fffff, %d16763bne is_denorm_s6764is_zero_s:6765mov.b &ZERO, %d06766rts6767is_denorm_s:6768mov.b &DENORM, %d06769rts6770inf_or_nan_s:6771and.l &0x007fffff, %d16772bne is_nan_s6773is_inf_s:6774mov.b &INF, %d06775rts6776is_nan_s:6777btst &22, %d16778bne is_qnan_s6779is_snan_s:6780mov.b &SNAN, %d06781rts6782is_qnan_s:6783mov.b &QNAN, %d06784rts67856786#########################################################################6787# XDEF **************************************************************** #6788# unf_res(): routine to produce default underflow result of a #6789# scaled extended precision number; this is used by #6790# fadd/fdiv/fmul/etc. emulation routines. #6791# unf_res4(): same as above but for fsglmul/fsgldiv which use #6792# single round prec and extended prec mode. #6793# #6794# XREF **************************************************************** #6795# _denorm() - denormalize according to scale factor #6796# _round() - round denormalized number according to rnd prec #6797# #6798# INPUT *************************************************************** #6799# a0 = pointer to extended precison operand #6800# d0 = scale factor #6801# d1 = rounding precision/mode #6802# #6803# OUTPUT ************************************************************** #6804# a0 = pointer to default underflow result in extended precision #6805# d0.b = result FPSR_cc which caller may or may not want to save #6806# #6807# ALGORITHM *********************************************************** #6808# Convert the input operand to "internal format" which means the #6809# exponent is extended to 16 bits and the sign is stored in the unused #6810# portion of the extended precison operand. Denormalize the number #6811# according to the scale factor passed in d0. Then, round the #6812# denormalized result. #6813# Set the FPSR_exc bits as appropriate but return the cc bits in #6814# d0 in case the caller doesn't want to save them (as is the case for #6815# fmove out). #6816# unf_res4() for fsglmul/fsgldiv forces the denorm to extended #6817# precision and the rounding mode to single. #6818# #6819#########################################################################6820global unf_res6821unf_res:6822mov.l %d1, -(%sp) # save rnd prec,mode on stack68236824btst &0x7, FTEMP_EX(%a0) # make "internal" format6825sne FTEMP_SGN(%a0)68266827mov.w FTEMP_EX(%a0), %d1 # extract exponent6828and.w &0x7fff, %d16829sub.w %d0, %d16830mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent68316832mov.l %a0, -(%sp) # save operand ptr during calls68336834mov.l 0x4(%sp),%d0 # pass rnd prec.6835andi.w &0x00c0,%d06836lsr.w &0x4,%d06837bsr.l _denorm # denorm result68386839mov.l (%sp),%a06840mov.w 0x6(%sp),%d1 # load prec:mode into %d16841andi.w &0xc0,%d1 # extract rnd prec6842lsr.w &0x4,%d16843swap %d16844mov.w 0x6(%sp),%d16845andi.w &0x30,%d16846lsr.w &0x4,%d16847bsr.l _round # round the denorm68486849mov.l (%sp)+, %a068506851# result is now rounded properly. convert back to normal format6852bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue6853tst.b FTEMP_SGN(%a0) # is "internal result" sign set?6854beq.b unf_res_chkifzero # no; result is positive6855bset &0x7, FTEMP_EX(%a0) # set result sgn6856clr.b FTEMP_SGN(%a0) # clear temp sign68576858# the number may have become zero after rounding. set ccodes accordingly.6859unf_res_chkifzero:6860clr.l %d06861tst.l FTEMP_HI(%a0) # is value now a zero?6862bne.b unf_res_cont # no6863tst.l FTEMP_LO(%a0)6864bne.b unf_res_cont # no6865# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit6866bset &z_bit, %d0 # yes; set zero ccode bit68676868unf_res_cont:68696870#6871# can inex1 also be set along with unfl and inex2???6872#6873# we know that underflow has occurred. aunfl should be set if INEX2 is also set.6874#6875btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?6876beq.b unf_res_end # no6877bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl68786879unf_res_end:6880add.l &0x4, %sp # clear stack6881rts68826883# unf_res() for fsglmul() and fsgldiv().6884global unf_res46885unf_res4:6886mov.l %d1,-(%sp) # save rnd prec,mode on stack68876888btst &0x7,FTEMP_EX(%a0) # make "internal" format6889sne FTEMP_SGN(%a0)68906891mov.w FTEMP_EX(%a0),%d1 # extract exponent6892and.w &0x7fff,%d16893sub.w %d0,%d16894mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent68956896mov.l %a0,-(%sp) # save operand ptr during calls68976898clr.l %d0 # force rnd prec = ext6899bsr.l _denorm # denorm result69006901mov.l (%sp),%a06902mov.w &s_mode,%d1 # force rnd prec = sgl6903swap %d16904mov.w 0x6(%sp),%d1 # load rnd mode6905andi.w &0x30,%d1 # extract rnd prec6906lsr.w &0x4,%d16907bsr.l _round # round the denorm69086909mov.l (%sp)+,%a069106911# result is now rounded properly. convert back to normal format6912bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue6913tst.b FTEMP_SGN(%a0) # is "internal result" sign set?6914beq.b unf_res4_chkifzero # no; result is positive6915bset &0x7,FTEMP_EX(%a0) # set result sgn6916clr.b FTEMP_SGN(%a0) # clear temp sign69176918# the number may have become zero after rounding. set ccodes accordingly.6919unf_res4_chkifzero:6920clr.l %d06921tst.l FTEMP_HI(%a0) # is value now a zero?6922bne.b unf_res4_cont # no6923tst.l FTEMP_LO(%a0)6924bne.b unf_res4_cont # no6925# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit6926bset &z_bit,%d0 # yes; set zero ccode bit69276928unf_res4_cont:69296930#6931# can inex1 also be set along with unfl and inex2???6932#6933# we know that underflow has occurred. aunfl should be set if INEX2 is also set.6934#6935btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?6936beq.b unf_res4_end # no6937bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl69386939unf_res4_end:6940add.l &0x4,%sp # clear stack6941rts69426943#########################################################################6944# XDEF **************************************************************** #6945# ovf_res(): routine to produce the default overflow result of #6946# an overflowing number. #6947# ovf_res2(): same as above but the rnd mode/prec are passed #6948# differently. #6949# #6950# XREF **************************************************************** #6951# none #6952# #6953# INPUT *************************************************************** #6954# d1.b = '-1' => (-); '0' => (+) #6955# ovf_res(): #6956# d0 = rnd mode/prec #6957# ovf_res2(): #6958# hi(d0) = rnd prec #6959# lo(d0) = rnd mode #6960# #6961# OUTPUT ************************************************************** #6962# a0 = points to extended precision result #6963# d0.b = condition code bits #6964# #6965# ALGORITHM *********************************************************** #6966# The default overflow result can be determined by the sign of #6967# the result and the rounding mode/prec in effect. These bits are #6968# concatenated together to create an index into the default result #6969# table. A pointer to the correct result is returned in a0. The #6970# resulting condition codes are returned in d0 in case the caller #6971# doesn't want FPSR_cc altered (as is the case for fmove out). #6972# #6973#########################################################################69746975global ovf_res6976ovf_res:6977andi.w &0x10,%d1 # keep result sign6978lsr.b &0x4,%d0 # shift prec/mode6979or.b %d0,%d1 # concat the two6980mov.w %d1,%d0 # make a copy6981lsl.b &0x1,%d1 # multiply d1 by 26982bra.b ovf_res_load69836984global ovf_res26985ovf_res2:6986and.w &0x10, %d1 # keep result sign6987or.b %d0, %d1 # insert rnd mode6988swap %d06989or.b %d0, %d1 # insert rnd prec6990mov.w %d1, %d0 # make a copy6991lsl.b &0x1, %d1 # shift left by 169926993#6994# use the rounding mode, precision, and result sign as in index into the6995# two tables below to fetch the default result and the result ccodes.6996#6997ovf_res_load:6998mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes6999lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr70007001rts70027003tbl_ovfl_cc:7004byte 0x2, 0x0, 0x0, 0x27005byte 0x2, 0x0, 0x0, 0x27006byte 0x2, 0x0, 0x0, 0x27007byte 0x0, 0x0, 0x0, 0x07008byte 0x2+0x8, 0x8, 0x2+0x8, 0x87009byte 0x2+0x8, 0x8, 0x2+0x8, 0x87010byte 0x2+0x8, 0x8, 0x2+0x8, 0x870117012tbl_ovfl_result:7013long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN7014long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ7015long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM7016long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP70177018long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN7019long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ7020long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM7021long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP70227023long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN7024long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ7025long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM7026long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP70277028long 0x00000000,0x00000000,0x00000000,0x000000007029long 0x00000000,0x00000000,0x00000000,0x000000007030long 0x00000000,0x00000000,0x00000000,0x000000007031long 0x00000000,0x00000000,0x00000000,0x0000000070327033long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN7034long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ7035long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM7036long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP70377038long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN7039long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ7040long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM7041long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP70427043long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN7044long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ7045long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM7046long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP70477048#########################################################################7049# XDEF **************************************************************** #7050# fout(): move from fp register to memory or data register #7051# #7052# XREF **************************************************************** #7053# _round() - needed to create EXOP for sgl/dbl precision #7054# norm() - needed to create EXOP for extended precision #7055# ovf_res() - create default overflow result for sgl/dbl precision#7056# unf_res() - create default underflow result for sgl/dbl prec. #7057# dst_dbl() - create rounded dbl precision result. #7058# dst_sgl() - create rounded sgl precision result. #7059# fetch_dreg() - fetch dynamic k-factor reg for packed. #7060# bindec() - convert FP binary number to packed number. #7061# _mem_write() - write data to memory. #7062# _mem_write2() - write data to memory unless supv mode -(a7) exc.#7063# _dmem_write_{byte,word,long}() - write data to memory. #7064# store_dreg_{b,w,l}() - store data to data register file. #7065# facc_out_{b,w,l,d,x}() - data access error occurred. #7066# #7067# INPUT *************************************************************** #7068# a0 = pointer to extended precision source operand #7069# d0 = round prec,mode #7070# #7071# OUTPUT ************************************************************** #7072# fp0 : intermediate underflow or overflow result if #7073# OVFL/UNFL occurred for a sgl or dbl operand #7074# #7075# ALGORITHM *********************************************************** #7076# This routine is accessed by many handlers that need to do an #7077# opclass three move of an operand out to memory. #7078# Decode an fmove out (opclass 3) instruction to determine if #7079# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #7080# register or memory. The algorithm uses a standard "fmove" to create #7081# the rounded result. Also, since exceptions are disabled, this also #7082# create the correct OPERR default result if appropriate. #7083# For sgl or dbl precision, overflow or underflow can occur. If #7084# either occurs and is enabled, the EXOP. #7085# For extended precision, the stacked <ea> must be fixed along #7086# w/ the address index register as appropriate w/ _calc_ea_fout(). If #7087# the source is a denorm and if underflow is enabled, an EXOP must be #7088# created. #7089# For packed, the k-factor must be fetched from the instruction #7090# word or a data register. The <ea> must be fixed as w/ extended #7091# precision. Then, bindec() is called to create the appropriate #7092# packed result. #7093# If at any time an access error is flagged by one of the move- #7094# to-memory routines, then a special exit must be made so that the #7095# access error can be handled properly. #7096# #7097#########################################################################70987099global fout7100fout:7101bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt7102mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index7103jmp (tbl_fout.b,%pc,%a1) # jump to routine71047105swbeg &0x87106tbl_fout:7107short fout_long - tbl_fout7108short fout_sgl - tbl_fout7109short fout_ext - tbl_fout7110short fout_pack - tbl_fout7111short fout_word - tbl_fout7112short fout_dbl - tbl_fout7113short fout_byte - tbl_fout7114short fout_pack - tbl_fout71157116#################################################################7117# fmove.b out ###################################################7118#################################################################71197120# Only "Unimplemented Data Type" exceptions enter here. The operand7121# is either a DENORM or a NORM.7122fout_byte:7123tst.b STAG(%a6) # is operand normalized?7124bne.b fout_byte_denorm # no71257126fmovm.x SRC(%a0),&0x80 # load value71277128fout_byte_norm:7129fmov.l %d0,%fpcr # insert rnd prec,mode71307131fmov.b %fp0,%d0 # exec move out w/ correct rnd mode71327133fmov.l &0x0,%fpcr # clear FPCR7134fmov.l %fpsr,%d1 # fetch FPSR7135or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits71367137mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode7138andi.b &0x38,%d1 # is mode == 0? (Dreg dst)7139beq.b fout_byte_dn # must save to integer regfile71407141mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct7142bsr.l _dmem_write_byte # write byte71437144tst.l %d1 # did dstore fail?7145bne.l facc_out_b # yes71467147rts71487149fout_byte_dn:7150mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn7151andi.w &0x7,%d17152bsr.l store_dreg_b7153rts71547155fout_byte_denorm:7156mov.l SRC_EX(%a0),%d17157andi.l &0x80000000,%d1 # keep DENORM sign7158ori.l &0x00800000,%d1 # make smallest sgl7159fmov.s %d1,%fp07160bra.b fout_byte_norm71617162#################################################################7163# fmove.w out ###################################################7164#################################################################71657166# Only "Unimplemented Data Type" exceptions enter here. The operand7167# is either a DENORM or a NORM.7168fout_word:7169tst.b STAG(%a6) # is operand normalized?7170bne.b fout_word_denorm # no71717172fmovm.x SRC(%a0),&0x80 # load value71737174fout_word_norm:7175fmov.l %d0,%fpcr # insert rnd prec:mode71767177fmov.w %fp0,%d0 # exec move out w/ correct rnd mode71787179fmov.l &0x0,%fpcr # clear FPCR7180fmov.l %fpsr,%d1 # fetch FPSR7181or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits71827183mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode7184andi.b &0x38,%d1 # is mode == 0? (Dreg dst)7185beq.b fout_word_dn # must save to integer regfile71867187mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct7188bsr.l _dmem_write_word # write word71897190tst.l %d1 # did dstore fail?7191bne.l facc_out_w # yes71927193rts71947195fout_word_dn:7196mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn7197andi.w &0x7,%d17198bsr.l store_dreg_w7199rts72007201fout_word_denorm:7202mov.l SRC_EX(%a0),%d17203andi.l &0x80000000,%d1 # keep DENORM sign7204ori.l &0x00800000,%d1 # make smallest sgl7205fmov.s %d1,%fp07206bra.b fout_word_norm72077208#################################################################7209# fmove.l out ###################################################7210#################################################################72117212# Only "Unimplemented Data Type" exceptions enter here. The operand7213# is either a DENORM or a NORM.7214fout_long:7215tst.b STAG(%a6) # is operand normalized?7216bne.b fout_long_denorm # no72177218fmovm.x SRC(%a0),&0x80 # load value72197220fout_long_norm:7221fmov.l %d0,%fpcr # insert rnd prec:mode72227223fmov.l %fp0,%d0 # exec move out w/ correct rnd mode72247225fmov.l &0x0,%fpcr # clear FPCR7226fmov.l %fpsr,%d1 # fetch FPSR7227or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits72287229fout_long_write:7230mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode7231andi.b &0x38,%d1 # is mode == 0? (Dreg dst)7232beq.b fout_long_dn # must save to integer regfile72337234mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct7235bsr.l _dmem_write_long # write long72367237tst.l %d1 # did dstore fail?7238bne.l facc_out_l # yes72397240rts72417242fout_long_dn:7243mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn7244andi.w &0x7,%d17245bsr.l store_dreg_l7246rts72477248fout_long_denorm:7249mov.l SRC_EX(%a0),%d17250andi.l &0x80000000,%d1 # keep DENORM sign7251ori.l &0x00800000,%d1 # make smallest sgl7252fmov.s %d1,%fp07253bra.b fout_long_norm72547255#################################################################7256# fmove.x out ###################################################7257#################################################################72587259# Only "Unimplemented Data Type" exceptions enter here. The operand7260# is either a DENORM or a NORM.7261# The DENORM causes an Underflow exception.7262fout_ext:72637264# we copy the extended precision result to FP_SCR0 so that the reserved7265# 16-bit field gets zeroed. we do this since we promise not to disturb7266# what's at SRC(a0).7267mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)7268clr.w 2+FP_SCR0_EX(%a6) # clear reserved field7269mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)7270mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)72717272fmovm.x SRC(%a0),&0x80 # return result72737274bsr.l _calc_ea_fout # fix stacked <ea>72757276mov.l %a0,%a1 # pass: dst addr7277lea FP_SCR0(%a6),%a0 # pass: src addr7278mov.l &0xc,%d0 # pass: opsize is 12 bytes72797280# we must not yet write the extended precision data to the stack7281# in the pre-decrement case from supervisor mode or else we'll corrupt7282# the stack frame. so, leave it in FP_SRC for now and deal with it later...7283cmpi.b SPCOND_FLG(%a6),&mda7_flg7284beq.b fout_ext_a772857286bsr.l _dmem_write # write ext prec number to memory72877288tst.l %d1 # did dstore fail?7289bne.w fout_ext_err # yes72907291tst.b STAG(%a6) # is operand normalized?7292bne.b fout_ext_denorm # no7293rts72947295# the number is a DENORM. must set the underflow exception bit7296fout_ext_denorm:7297bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit72987299mov.b FPCR_ENABLE(%a6),%d07300andi.b &0x0a,%d0 # is UNFL or INEX enabled?7301bne.b fout_ext_exc # yes7302rts73037304# we don't want to do the write if the exception occurred in supervisor mode7305# so _mem_write2() handles this for us.7306fout_ext_a7:7307bsr.l _mem_write2 # write ext prec number to memory73087309tst.l %d1 # did dstore fail?7310bne.w fout_ext_err # yes73117312tst.b STAG(%a6) # is operand normalized?7313bne.b fout_ext_denorm # no7314rts73157316fout_ext_exc:7317lea FP_SCR0(%a6),%a07318bsr.l norm # normalize the mantissa7319neg.w %d0 # new exp = -(shft amt)7320andi.w &0x7fff,%d07321andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign7322or.w %d0,FP_SCR0_EX(%a6) # insert new exponent7323fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp17324rts73257326fout_ext_err:7327mov.l EXC_A6(%a6),(%a6) # fix stacked a67328bra.l facc_out_x73297330#########################################################################7331# fmove.s out ###########################################################7332#########################################################################7333fout_sgl:7334andi.b &0x30,%d0 # clear rnd prec7335ori.b &s_mode*0x10,%d0 # insert sgl prec7336mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack73377338#7339# operand is a normalized number. first, we check to see if the move out7340# would cause either an underflow or overflow. these cases are handled7341# separately. otherwise, set the FPCR to the proper rounding mode and7342# execute the move.7343#7344mov.w SRC_EX(%a0),%d0 # extract exponent7345andi.w &0x7fff,%d0 # strip sign73467347cmpi.w %d0,&SGL_HI # will operand overflow?7348bgt.w fout_sgl_ovfl # yes; go handle OVFL7349beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL7350cmpi.w %d0,&SGL_LO # will operand underflow?7351blt.w fout_sgl_unfl # yes; go handle underflow73527353#7354# NORMs(in range) can be stored out by a simple "fmov.s"7355# Unnormalized inputs can come through this point.7356#7357fout_sgl_exg:7358fmovm.x SRC(%a0),&0x80 # fetch fop from stack73597360fmov.l L_SCR3(%a6),%fpcr # set FPCR7361fmov.l &0x0,%fpsr # clear FPSR73627363fmov.s %fp0,%d0 # store does convert and round73647365fmov.l &0x0,%fpcr # clear FPCR7366fmov.l %fpsr,%d1 # save FPSR73677368or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex73697370fout_sgl_exg_write:7371mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode7372andi.b &0x38,%d1 # is mode == 0? (Dreg dst)7373beq.b fout_sgl_exg_write_dn # must save to integer regfile73747375mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct7376bsr.l _dmem_write_long # write long73777378tst.l %d1 # did dstore fail?7379bne.l facc_out_l # yes73807381rts73827383fout_sgl_exg_write_dn:7384mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn7385andi.w &0x7,%d17386bsr.l store_dreg_l7387rts73887389#7390# here, we know that the operand would UNFL if moved out to single prec,7391# so, denorm and round and then use generic store single routine to7392# write the value to memory.7393#7394fout_sgl_unfl:7395bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL73967397mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)7398mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)7399mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)7400mov.l %a0,-(%sp)74017402clr.l %d0 # pass: S.F. = 074037404cmpi.b STAG(%a6),&DENORM # fetch src optype tag7405bne.b fout_sgl_unfl_cont # let DENORMs fall through74067407lea FP_SCR0(%a6),%a07408bsr.l norm # normalize the DENORM74097410fout_sgl_unfl_cont:7411lea FP_SCR0(%a6),%a0 # pass: ptr to operand7412mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode7413bsr.l unf_res # calc default underflow result74147415lea FP_SCR0(%a6),%a0 # pass: ptr to fop7416bsr.l dst_sgl # convert to single prec74177418mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode7419andi.b &0x38,%d1 # is mode == 0? (Dreg dst)7420beq.b fout_sgl_unfl_dn # must save to integer regfile74217422mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct7423bsr.l _dmem_write_long # write long74247425tst.l %d1 # did dstore fail?7426bne.l facc_out_l # yes74277428bra.b fout_sgl_unfl_chkexc74297430fout_sgl_unfl_dn:7431mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn7432andi.w &0x7,%d17433bsr.l store_dreg_l74347435fout_sgl_unfl_chkexc:7436mov.b FPCR_ENABLE(%a6),%d17437andi.b &0x0a,%d1 # is UNFL or INEX enabled?7438bne.w fout_sd_exc_unfl # yes7439addq.l &0x4,%sp7440rts74417442#7443# it's definitely an overflow so call ovf_res to get the correct answer7444#7445fout_sgl_ovfl:7446tst.b 3+SRC_HI(%a0) # is result inexact?7447bne.b fout_sgl_ovfl_inex27448tst.l SRC_LO(%a0) # is result inexact?7449bne.b fout_sgl_ovfl_inex27450ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex7451bra.b fout_sgl_ovfl_cont7452fout_sgl_ovfl_inex2:7453ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex274547455fout_sgl_ovfl_cont:7456mov.l %a0,-(%sp)74577458# call ovf_res() w/ sgl prec and the correct rnd mode to create the default7459# overflow result. DON'T save the returned ccodes from ovf_res() since7460# fmove out doesn't alter them.7461tst.b SRC_EX(%a0) # is operand negative?7462smi %d1 # set if so7463mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode7464bsr.l ovf_res # calc OVFL result7465fmovm.x (%a0),&0x80 # load default overflow result7466fmov.s %fp0,%d0 # store to single74677468mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode7469andi.b &0x38,%d1 # is mode == 0? (Dreg dst)7470beq.b fout_sgl_ovfl_dn # must save to integer regfile74717472mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct7473bsr.l _dmem_write_long # write long74747475tst.l %d1 # did dstore fail?7476bne.l facc_out_l # yes74777478bra.b fout_sgl_ovfl_chkexc74797480fout_sgl_ovfl_dn:7481mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn7482andi.w &0x7,%d17483bsr.l store_dreg_l74847485fout_sgl_ovfl_chkexc:7486mov.b FPCR_ENABLE(%a6),%d17487andi.b &0x0a,%d1 # is UNFL or INEX enabled?7488bne.w fout_sd_exc_ovfl # yes7489addq.l &0x4,%sp7490rts74917492#7493# move out MAY overflow:7494# (1) force the exp to 0x3fff7495# (2) do a move w/ appropriate rnd mode7496# (3) if exp still equals zero, then insert original exponent7497# for the correct result.7498# if exp now equals one, then it overflowed so call ovf_res.7499#7500fout_sgl_may_ovfl:7501mov.w SRC_EX(%a0),%d1 # fetch current sign7502andi.w &0x8000,%d1 # keep it,clear exp7503ori.w &0x3fff,%d1 # insert exp = 07504mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp7505mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)7506mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)75077508fmov.l L_SCR3(%a6),%fpcr # set FPCR75097510fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded7511fmov.l &0x0,%fpcr # clear FPCR75127513fabs.x %fp0 # need absolute value7514fcmp.b %fp0,&0x2 # did exponent increase?7515fblt.w fout_sgl_exg # no; go finish NORM7516bra.w fout_sgl_ovfl # yes; go handle overflow75177518################75197520fout_sd_exc_unfl:7521mov.l (%sp)+,%a075227523mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)7524mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)7525mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)75267527cmpi.b STAG(%a6),&DENORM # was src a DENORM?7528bne.b fout_sd_exc_cont # no75297530lea FP_SCR0(%a6),%a07531bsr.l norm7532neg.l %d07533andi.w &0x7fff,%d07534bfins %d0,FP_SCR0_EX(%a6){&1:&15}7535bra.b fout_sd_exc_cont75367537fout_sd_exc:7538fout_sd_exc_ovfl:7539mov.l (%sp)+,%a0 # restore a075407541mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)7542mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)7543mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)75447545fout_sd_exc_cont:7546bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit7547sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit7548lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM75497550mov.b 3+L_SCR3(%a6),%d17551lsr.b &0x4,%d17552andi.w &0x0c,%d17553swap %d17554mov.b 3+L_SCR3(%a6),%d17555lsr.b &0x4,%d17556andi.w &0x03,%d17557clr.l %d0 # pass: zero g,r,s7558bsr.l _round # round the DENORM75597560tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?7561beq.b fout_sd_exc_done # no7562bset &0x7,FP_SCR0_EX(%a6) # yes75637564fout_sd_exc_done:7565fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp17566rts75677568#################################################################7569# fmove.d out ###################################################7570#################################################################7571fout_dbl:7572andi.b &0x30,%d0 # clear rnd prec7573ori.b &d_mode*0x10,%d0 # insert dbl prec7574mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack75757576#7577# operand is a normalized number. first, we check to see if the move out7578# would cause either an underflow or overflow. these cases are handled7579# separately. otherwise, set the FPCR to the proper rounding mode and7580# execute the move.7581#7582mov.w SRC_EX(%a0),%d0 # extract exponent7583andi.w &0x7fff,%d0 # strip sign75847585cmpi.w %d0,&DBL_HI # will operand overflow?7586bgt.w fout_dbl_ovfl # yes; go handle OVFL7587beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL7588cmpi.w %d0,&DBL_LO # will operand underflow?7589blt.w fout_dbl_unfl # yes; go handle underflow75907591#7592# NORMs(in range) can be stored out by a simple "fmov.d"7593# Unnormalized inputs can come through this point.7594#7595fout_dbl_exg:7596fmovm.x SRC(%a0),&0x80 # fetch fop from stack75977598fmov.l L_SCR3(%a6),%fpcr # set FPCR7599fmov.l &0x0,%fpsr # clear FPSR76007601fmov.d %fp0,L_SCR1(%a6) # store does convert and round76027603fmov.l &0x0,%fpcr # clear FPCR7604fmov.l %fpsr,%d0 # save FPSR76057606or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex76077608mov.l EXC_EA(%a6),%a1 # pass: dst addr7609lea L_SCR1(%a6),%a0 # pass: src addr7610movq.l &0x8,%d0 # pass: opsize is 8 bytes7611bsr.l _dmem_write # store dbl fop to memory76127613tst.l %d1 # did dstore fail?7614bne.l facc_out_d # yes76157616rts # no; so we're finished76177618#7619# here, we know that the operand would UNFL if moved out to double prec,7620# so, denorm and round and then use generic store double routine to7621# write the value to memory.7622#7623fout_dbl_unfl:7624bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL76257626mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)7627mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)7628mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)7629mov.l %a0,-(%sp)76307631clr.l %d0 # pass: S.F. = 076327633cmpi.b STAG(%a6),&DENORM # fetch src optype tag7634bne.b fout_dbl_unfl_cont # let DENORMs fall through76357636lea FP_SCR0(%a6),%a07637bsr.l norm # normalize the DENORM76387639fout_dbl_unfl_cont:7640lea FP_SCR0(%a6),%a0 # pass: ptr to operand7641mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode7642bsr.l unf_res # calc default underflow result76437644lea FP_SCR0(%a6),%a0 # pass: ptr to fop7645bsr.l dst_dbl # convert to single prec7646mov.l %d0,L_SCR1(%a6)7647mov.l %d1,L_SCR2(%a6)76487649mov.l EXC_EA(%a6),%a1 # pass: dst addr7650lea L_SCR1(%a6),%a0 # pass: src addr7651movq.l &0x8,%d0 # pass: opsize is 8 bytes7652bsr.l _dmem_write # store dbl fop to memory76537654tst.l %d1 # did dstore fail?7655bne.l facc_out_d # yes76567657mov.b FPCR_ENABLE(%a6),%d17658andi.b &0x0a,%d1 # is UNFL or INEX enabled?7659bne.w fout_sd_exc_unfl # yes7660addq.l &0x4,%sp7661rts76627663#7664# it's definitely an overflow so call ovf_res to get the correct answer7665#7666fout_dbl_ovfl:7667mov.w 2+SRC_LO(%a0),%d07668andi.w &0x7ff,%d07669bne.b fout_dbl_ovfl_inex276707671ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex7672bra.b fout_dbl_ovfl_cont7673fout_dbl_ovfl_inex2:7674ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex276757676fout_dbl_ovfl_cont:7677mov.l %a0,-(%sp)76787679# call ovf_res() w/ dbl prec and the correct rnd mode to create the default7680# overflow result. DON'T save the returned ccodes from ovf_res() since7681# fmove out doesn't alter them.7682tst.b SRC_EX(%a0) # is operand negative?7683smi %d1 # set if so7684mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode7685bsr.l ovf_res # calc OVFL result7686fmovm.x (%a0),&0x80 # load default overflow result7687fmov.d %fp0,L_SCR1(%a6) # store to double76887689mov.l EXC_EA(%a6),%a1 # pass: dst addr7690lea L_SCR1(%a6),%a0 # pass: src addr7691movq.l &0x8,%d0 # pass: opsize is 8 bytes7692bsr.l _dmem_write # store dbl fop to memory76937694tst.l %d1 # did dstore fail?7695bne.l facc_out_d # yes76967697mov.b FPCR_ENABLE(%a6),%d17698andi.b &0x0a,%d1 # is UNFL or INEX enabled?7699bne.w fout_sd_exc_ovfl # yes7700addq.l &0x4,%sp7701rts77027703#7704# move out MAY overflow:7705# (1) force the exp to 0x3fff7706# (2) do a move w/ appropriate rnd mode7707# (3) if exp still equals zero, then insert original exponent7708# for the correct result.7709# if exp now equals one, then it overflowed so call ovf_res.7710#7711fout_dbl_may_ovfl:7712mov.w SRC_EX(%a0),%d1 # fetch current sign7713andi.w &0x8000,%d1 # keep it,clear exp7714ori.w &0x3fff,%d1 # insert exp = 07715mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp7716mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)7717mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)77187719fmov.l L_SCR3(%a6),%fpcr # set FPCR77207721fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded7722fmov.l &0x0,%fpcr # clear FPCR77237724fabs.x %fp0 # need absolute value7725fcmp.b %fp0,&0x2 # did exponent increase?7726fblt.w fout_dbl_exg # no; go finish NORM7727bra.w fout_dbl_ovfl # yes; go handle overflow77287729#########################################################################7730# XDEF **************************************************************** #7731# dst_dbl(): create double precision value from extended prec. #7732# #7733# XREF **************************************************************** #7734# None #7735# #7736# INPUT *************************************************************** #7737# a0 = pointer to source operand in extended precision #7738# #7739# OUTPUT ************************************************************** #7740# d0 = hi(double precision result) #7741# d1 = lo(double precision result) #7742# #7743# ALGORITHM *********************************************************** #7744# #7745# Changes extended precision to double precision. #7746# Note: no attempt is made to round the extended value to double. #7747# dbl_sign = ext_sign #7748# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #7749# get rid of ext integer bit #7750# dbl_mant = ext_mant{62:12} #7751# #7752# --------------- --------------- --------------- #7753# extended -> |s| exp | |1| ms mant | | ls mant | #7754# --------------- --------------- --------------- #7755# 95 64 63 62 32 31 11 0 #7756# | | #7757# | | #7758# | | #7759# v v #7760# --------------- --------------- #7761# double -> |s|exp| mant | | mant | #7762# --------------- --------------- #7763# 63 51 32 31 0 #7764# #7765#########################################################################77667767dst_dbl:7768clr.l %d0 # clear d07769mov.w FTEMP_EX(%a0),%d0 # get exponent7770subi.w &EXT_BIAS,%d0 # subtract extended precision bias7771addi.w &DBL_BIAS,%d0 # add double precision bias7772tst.b FTEMP_HI(%a0) # is number a denorm?7773bmi.b dst_get_dupper # no7774subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 17775dst_get_dupper:7776swap %d0 # d0 now in upper word7777lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp7778tst.b FTEMP_EX(%a0) # test sign7779bpl.b dst_get_dman # if positive, go process mantissa7780bset &0x1f,%d0 # if negative, set sign7781dst_get_dman:7782mov.l FTEMP_HI(%a0),%d1 # get ms mantissa7783bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms7784or.l %d1,%d0 # put these bits in ms word of double7785mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack7786mov.l FTEMP_HI(%a0),%d1 # get ms mantissa7787mov.l &21,%d0 # load shift count7788lsl.l %d0,%d1 # put lower 11 bits in upper bits7789mov.l %d1,L_SCR2(%a6) # build lower lword in memory7790mov.l FTEMP_LO(%a0),%d1 # get ls mantissa7791bfextu %d1{&0:&21},%d0 # get ls 21 bits of double7792mov.l L_SCR2(%a6),%d17793or.l %d0,%d1 # put them in double result7794mov.l L_SCR1(%a6),%d07795rts77967797#########################################################################7798# XDEF **************************************************************** #7799# dst_sgl(): create single precision value from extended prec #7800# #7801# XREF **************************************************************** #7802# #7803# INPUT *************************************************************** #7804# a0 = pointer to source operand in extended precision #7805# #7806# OUTPUT ************************************************************** #7807# d0 = single precision result #7808# #7809# ALGORITHM *********************************************************** #7810# #7811# Changes extended precision to single precision. #7812# sgl_sign = ext_sign #7813# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #7814# get rid of ext integer bit #7815# sgl_mant = ext_mant{62:12} #7816# #7817# --------------- --------------- --------------- #7818# extended -> |s| exp | |1| ms mant | | ls mant | #7819# --------------- --------------- --------------- #7820# 95 64 63 62 40 32 31 12 0 #7821# | | #7822# | | #7823# | | #7824# v v #7825# --------------- #7826# single -> |s|exp| mant | #7827# --------------- #7828# 31 22 0 #7829# #7830#########################################################################78317832dst_sgl:7833clr.l %d07834mov.w FTEMP_EX(%a0),%d0 # get exponent7835subi.w &EXT_BIAS,%d0 # subtract extended precision bias7836addi.w &SGL_BIAS,%d0 # add single precision bias7837tst.b FTEMP_HI(%a0) # is number a denorm?7838bmi.b dst_get_supper # no7839subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 17840dst_get_supper:7841swap %d0 # put exp in upper word of d07842lsl.l &0x7,%d0 # shift it into single exp bits7843tst.b FTEMP_EX(%a0) # test sign7844bpl.b dst_get_sman # if positive, continue7845bset &0x1f,%d0 # if negative, put in sign first7846dst_get_sman:7847mov.l FTEMP_HI(%a0),%d1 # get ms mantissa7848andi.l &0x7fffff00,%d1 # get upper 23 bits of ms7849lsr.l &0x8,%d1 # and put them flush right7850or.l %d1,%d0 # put these bits in ms word of single7851rts78527853##############################################################################7854fout_pack:7855bsr.l _calc_ea_fout # fetch the <ea>7856mov.l %a0,-(%sp)78577858mov.b STAG(%a6),%d0 # fetch input type7859bne.w fout_pack_not_norm # input is not NORM78607861fout_pack_norm:7862btst &0x4,EXC_CMDREG(%a6) # static or dynamic?7863beq.b fout_pack_s # static78647865fout_pack_d:7866mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg7867lsr.b &0x4,%d17868andi.w &0x7,%d178697870bsr.l fetch_dreg # fetch Dn w/ k-factor78717872bra.b fout_pack_type7873fout_pack_s:7874mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field78757876fout_pack_type:7877bfexts %d0{&25:&7},%d0 # extract k-factor7878mov.l %d0,-(%sp)78797880lea FP_SRC(%a6),%a0 # pass: ptr to input78817882# bindec is currently scrambling FP_SRC for denorm inputs.7883# we'll have to change this, but for now, tough luck!!!7884bsr.l bindec # convert xprec to packed78857886# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields7887andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields78887889mov.l (%sp)+,%d078907891tst.b 3+FP_SCR0_EX(%a6)7892bne.b fout_pack_set7893tst.l FP_SCR0_HI(%a6)7894bne.b fout_pack_set7895tst.l FP_SCR0_LO(%a6)7896bne.b fout_pack_set78977898# add the extra condition that only if the k-factor was zero, too, should7899# we zero the exponent7900tst.l %d07901bne.b fout_pack_set7902# "mantissa" is all zero which means that the answer is zero. but, the '0407903# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,7904# if the mantissa is zero, I will zero the exponent, too.7905# the question now is whether the exponents sign bit is allowed to be non-zero7906# for a zero, also...7907andi.w &0xf000,FP_SCR0(%a6)79087909fout_pack_set:79107911lea FP_SCR0(%a6),%a0 # pass: src addr79127913fout_pack_write:7914mov.l (%sp)+,%a1 # pass: dst addr7915mov.l &0xc,%d0 # pass: opsize is 12 bytes79167917cmpi.b SPCOND_FLG(%a6),&mda7_flg7918beq.b fout_pack_a779197920bsr.l _dmem_write # write ext prec number to memory79217922tst.l %d1 # did dstore fail?7923bne.w fout_ext_err # yes79247925rts79267927# we don't want to do the write if the exception occurred in supervisor mode7928# so _mem_write2() handles this for us.7929fout_pack_a7:7930bsr.l _mem_write2 # write ext prec number to memory79317932tst.l %d1 # did dstore fail?7933bne.w fout_ext_err # yes79347935rts79367937fout_pack_not_norm:7938cmpi.b %d0,&DENORM # is it a DENORM?7939beq.w fout_pack_norm # yes7940lea FP_SRC(%a6),%a07941clr.w 2+FP_SRC_EX(%a6)7942cmpi.b %d0,&SNAN # is it an SNAN?7943beq.b fout_pack_snan # yes7944bra.b fout_pack_write # no79457946fout_pack_snan:7947ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP7948bset &0x6,FP_SRC_HI(%a6) # set snan bit7949bra.b fout_pack_write79507951#########################################################################7952# XDEF **************************************************************** #7953# fmul(): emulates the fmul instruction #7954# fsmul(): emulates the fsmul instruction #7955# fdmul(): emulates the fdmul instruction #7956# #7957# XREF **************************************************************** #7958# scale_to_zero_src() - scale src exponent to zero #7959# scale_to_zero_dst() - scale dst exponent to zero #7960# unf_res() - return default underflow result #7961# ovf_res() - return default overflow result #7962# res_qnan() - return QNAN result #7963# res_snan() - return SNAN result #7964# #7965# INPUT *************************************************************** #7966# a0 = pointer to extended precision source operand #7967# a1 = pointer to extended precision destination operand #7968# d0 rnd prec,mode #7969# #7970# OUTPUT ************************************************************** #7971# fp0 = result #7972# fp1 = EXOP (if exception occurred) #7973# #7974# ALGORITHM *********************************************************** #7975# Handle NANs, infinities, and zeroes as special cases. Divide #7976# norms/denorms into ext/sgl/dbl precision. #7977# For norms/denorms, scale the exponents such that a multiply #7978# instruction won't cause an exception. Use the regular fmul to #7979# compute a result. Check if the regular operands would have taken #7980# an exception. If so, return the default overflow/underflow result #7981# and return the EXOP if exceptions are enabled. Else, scale the #7982# result operand to the proper exponent. #7983# #7984#########################################################################79857986align 0x107987tbl_fmul_ovfl:7988long 0x3fff - 0x7ffe # ext_max7989long 0x3fff - 0x407e # sgl_max7990long 0x3fff - 0x43fe # dbl_max7991tbl_fmul_unfl:7992long 0x3fff + 0x0001 # ext_unfl7993long 0x3fff - 0x3f80 # sgl_unfl7994long 0x3fff - 0x3c00 # dbl_unfl79957996global fsmul7997fsmul:7998andi.b &0x30,%d0 # clear rnd prec7999ori.b &s_mode*0x10,%d0 # insert sgl prec8000bra.b fmul80018002global fdmul8003fdmul:8004andi.b &0x30,%d08005ori.b &d_mode*0x10,%d0 # insert dbl prec80068007global fmul8008fmul:8009mov.l %d0,L_SCR3(%a6) # store rnd info80108011clr.w %d18012mov.b DTAG(%a6),%d18013lsl.b &0x3,%d18014or.b STAG(%a6),%d1 # combine src tags8015bne.w fmul_not_norm # optimize on non-norm input80168017fmul_norm:8018mov.w DST_EX(%a1),FP_SCR1_EX(%a6)8019mov.l DST_HI(%a1),FP_SCR1_HI(%a6)8020mov.l DST_LO(%a1),FP_SCR1_LO(%a6)80218022mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)8023mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)8024mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)80258026bsr.l scale_to_zero_src # scale src exponent8027mov.l %d0,-(%sp) # save scale factor 180288029bsr.l scale_to_zero_dst # scale dst exponent80308031add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale280328033mov.w 2+L_SCR3(%a6),%d1 # fetch precision8034lsr.b &0x6,%d1 # shift to lo bits8035mov.l (%sp)+,%d0 # load S.F.8036cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?8037beq.w fmul_may_ovfl # result may rnd to overflow8038blt.w fmul_ovfl # result will overflow80398040cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?8041beq.w fmul_may_unfl # result may rnd to no unfl8042bgt.w fmul_unfl # result will underflow80438044#8045# NORMAL:8046# - the result of the multiply operation will neither overflow nor underflow.8047# - do the multiply to the proper precision and rounding mode.8048# - scale the result exponent using the scale factor. if both operands were8049# normalized then we really don't need to go through this scaling. but for now,8050# this will do.8051#8052fmul_normal:8053fmovm.x FP_SCR1(%a6),&0x80 # load dst operand80548055fmov.l L_SCR3(%a6),%fpcr # set FPCR8056fmov.l &0x0,%fpsr # clear FPSR80578058fmul.x FP_SCR0(%a6),%fp0 # execute multiply80598060fmov.l %fpsr,%d1 # save status8061fmov.l &0x0,%fpcr # clear FPCR80628063or.l %d1,USER_FPSR(%a6) # save INEX2,N80648065fmul_normal_exit:8066fmovm.x &0x80,FP_SCR0(%a6) # store out result8067mov.l %d2,-(%sp) # save d28068mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}8069mov.l %d1,%d2 # make a copy8070andi.l &0x7fff,%d1 # strip sign8071andi.w &0x8000,%d2 # keep old sign8072sub.l %d0,%d1 # add scale factor8073or.w %d2,%d1 # concat old sign,new exp8074mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent8075mov.l (%sp)+,%d2 # restore d28076fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp08077rts80788079#8080# OVERFLOW:8081# - the result of the multiply operation is an overflow.8082# - do the multiply to the proper precision and rounding mode in order to8083# set the inexact bits.8084# - calculate the default result and return it in fp0.8085# - if overflow or inexact is enabled, we need a multiply result rounded to8086# extended precision. if the original operation was extended, then we have this8087# result. if the original operation was single or double, we have to do another8088# multiply using extended precision and the correct rounding mode. the result8089# of this operation then has its exponent scaled by -0x6000 to create the8090# exceptional operand.8091#8092fmul_ovfl:8093fmovm.x FP_SCR1(%a6),&0x80 # load dst operand80948095fmov.l L_SCR3(%a6),%fpcr # set FPCR8096fmov.l &0x0,%fpsr # clear FPSR80978098fmul.x FP_SCR0(%a6),%fp0 # execute multiply80998100fmov.l %fpsr,%d1 # save status8101fmov.l &0x0,%fpcr # clear FPCR81028103or.l %d1,USER_FPSR(%a6) # save INEX2,N81048105# save setting this until now because this is where fmul_may_ovfl may jump in8106fmul_ovfl_tst:8107or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex81088109mov.b FPCR_ENABLE(%a6),%d18110andi.b &0x13,%d1 # is OVFL or INEX enabled?8111bne.b fmul_ovfl_ena # yes81128113# calculate the default result8114fmul_ovfl_dis:8115btst &neg_bit,FPSR_CC(%a6) # is result negative?8116sne %d1 # set sign param accordingly8117mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode8118bsr.l ovf_res # calculate default result8119or.b %d0,FPSR_CC(%a6) # set INF,N if applicable8120fmovm.x (%a0),&0x80 # return default result in fp08121rts81228123#8124# OVFL is enabled; Create EXOP:8125# - if precision is extended, then we have the EXOP. simply bias the exponent8126# with an extra -0x6000. if the precision is single or double, we need to8127# calculate a result rounded to extended precision.8128#8129fmul_ovfl_ena:8130mov.l L_SCR3(%a6),%d18131andi.b &0xc0,%d1 # test the rnd prec8132bne.b fmul_ovfl_ena_sd # it's sgl or dbl81338134fmul_ovfl_ena_cont:8135fmovm.x &0x80,FP_SCR0(%a6) # move result to stack81368137mov.l %d2,-(%sp) # save d28138mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}8139mov.w %d1,%d2 # make a copy8140andi.l &0x7fff,%d1 # strip sign8141sub.l %d0,%d1 # add scale factor8142subi.l &0x6000,%d1 # subtract bias8143andi.w &0x7fff,%d1 # clear sign bit8144andi.w &0x8000,%d2 # keep old sign8145or.w %d2,%d1 # concat old sign,new exp8146mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent8147mov.l (%sp)+,%d2 # restore d28148fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp18149bra.b fmul_ovfl_dis81508151fmul_ovfl_ena_sd:8152fmovm.x FP_SCR1(%a6),&0x80 # load dst operand81538154mov.l L_SCR3(%a6),%d18155andi.b &0x30,%d1 # keep rnd mode only8156fmov.l %d1,%fpcr # set FPCR81578158fmul.x FP_SCR0(%a6),%fp0 # execute multiply81598160fmov.l &0x0,%fpcr # clear FPCR8161bra.b fmul_ovfl_ena_cont81628163#8164# may OVERFLOW:8165# - the result of the multiply operation MAY overflow.8166# - do the multiply to the proper precision and rounding mode in order to8167# set the inexact bits.8168# - calculate the default result and return it in fp0.8169#8170fmul_may_ovfl:8171fmovm.x FP_SCR1(%a6),&0x80 # load dst op81728173fmov.l L_SCR3(%a6),%fpcr # set FPCR8174fmov.l &0x0,%fpsr # clear FPSR81758176fmul.x FP_SCR0(%a6),%fp0 # execute multiply81778178fmov.l %fpsr,%d1 # save status8179fmov.l &0x0,%fpcr # clear FPCR81808181or.l %d1,USER_FPSR(%a6) # save INEX2,N81828183fabs.x %fp0,%fp1 # make a copy of result8184fcmp.b %fp1,&0x2 # is |result| >= 2.b?8185fbge.w fmul_ovfl_tst # yes; overflow has occurred81868187# no, it didn't overflow; we have correct result8188bra.w fmul_normal_exit81898190#8191# UNDERFLOW:8192# - the result of the multiply operation is an underflow.8193# - do the multiply to the proper precision and rounding mode in order to8194# set the inexact bits.8195# - calculate the default result and return it in fp0.8196# - if overflow or inexact is enabled, we need a multiply result rounded to8197# extended precision. if the original operation was extended, then we have this8198# result. if the original operation was single or double, we have to do another8199# multiply using extended precision and the correct rounding mode. the result8200# of this operation then has its exponent scaled by -0x6000 to create the8201# exceptional operand.8202#8203fmul_unfl:8204bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit82058206# for fun, let's use only extended precision, round to zero. then, let8207# the unf_res() routine figure out all the rest.8208# will we get the correct answer.8209fmovm.x FP_SCR1(%a6),&0x80 # load dst operand82108211fmov.l &rz_mode*0x10,%fpcr # set FPCR8212fmov.l &0x0,%fpsr # clear FPSR82138214fmul.x FP_SCR0(%a6),%fp0 # execute multiply82158216fmov.l %fpsr,%d1 # save status8217fmov.l &0x0,%fpcr # clear FPCR82188219or.l %d1,USER_FPSR(%a6) # save INEX2,N82208221mov.b FPCR_ENABLE(%a6),%d18222andi.b &0x0b,%d1 # is UNFL or INEX enabled?8223bne.b fmul_unfl_ena # yes82248225fmul_unfl_dis:8226fmovm.x &0x80,FP_SCR0(%a6) # store out result82278228lea FP_SCR0(%a6),%a0 # pass: result addr8229mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode8230bsr.l unf_res # calculate default result8231or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'8232fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp08233rts82348235#8236# UNFL is enabled.8237#8238fmul_unfl_ena:8239fmovm.x FP_SCR1(%a6),&0x40 # load dst op82408241mov.l L_SCR3(%a6),%d18242andi.b &0xc0,%d1 # is precision extended?8243bne.b fmul_unfl_ena_sd # no, sgl or dbl82448245# if the rnd mode is anything but RZ, then we have to re-do the above8246# multiplication because we used RZ for all.8247fmov.l L_SCR3(%a6),%fpcr # set FPCR82488249fmul_unfl_ena_cont:8250fmov.l &0x0,%fpsr # clear FPSR82518252fmul.x FP_SCR0(%a6),%fp1 # execute multiply82538254fmov.l &0x0,%fpcr # clear FPCR82558256fmovm.x &0x40,FP_SCR0(%a6) # save result to stack8257mov.l %d2,-(%sp) # save d28258mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}8259mov.l %d1,%d2 # make a copy8260andi.l &0x7fff,%d1 # strip sign8261andi.w &0x8000,%d2 # keep old sign8262sub.l %d0,%d1 # add scale factor8263addi.l &0x6000,%d1 # add bias8264andi.w &0x7fff,%d18265or.w %d2,%d1 # concat old sign,new exp8266mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent8267mov.l (%sp)+,%d2 # restore d28268fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp18269bra.w fmul_unfl_dis82708271fmul_unfl_ena_sd:8272mov.l L_SCR3(%a6),%d18273andi.b &0x30,%d1 # use only rnd mode8274fmov.l %d1,%fpcr # set FPCR82758276bra.b fmul_unfl_ena_cont82778278# MAY UNDERFLOW:8279# -use the correct rounding mode and precision. this code favors operations8280# that do not underflow.8281fmul_may_unfl:8282fmovm.x FP_SCR1(%a6),&0x80 # load dst operand82838284fmov.l L_SCR3(%a6),%fpcr # set FPCR8285fmov.l &0x0,%fpsr # clear FPSR82868287fmul.x FP_SCR0(%a6),%fp0 # execute multiply82888289fmov.l %fpsr,%d1 # save status8290fmov.l &0x0,%fpcr # clear FPCR82918292or.l %d1,USER_FPSR(%a6) # save INEX2,N82938294fabs.x %fp0,%fp1 # make a copy of result8295fcmp.b %fp1,&0x2 # is |result| > 2.b?8296fbgt.w fmul_normal_exit # no; no underflow occurred8297fblt.w fmul_unfl # yes; underflow occurred82988299#8300# we still don't know if underflow occurred. result is ~ equal to 2. but,8301# we don't know if the result was an underflow that rounded up to a 2 or8302# a normalized number that rounded down to a 2. so, redo the entire operation8303# using RZ as the rounding mode to see what the pre-rounded result is.8304# this case should be relatively rare.8305#8306fmovm.x FP_SCR1(%a6),&0x40 # load dst operand83078308mov.l L_SCR3(%a6),%d18309andi.b &0xc0,%d1 # keep rnd prec8310ori.b &rz_mode*0x10,%d1 # insert RZ83118312fmov.l %d1,%fpcr # set FPCR8313fmov.l &0x0,%fpsr # clear FPSR83148315fmul.x FP_SCR0(%a6),%fp1 # execute multiply83168317fmov.l &0x0,%fpcr # clear FPCR8318fabs.x %fp1 # make absolute value8319fcmp.b %fp1,&0x2 # is |result| < 2.b?8320fbge.w fmul_normal_exit # no; no underflow occurred8321bra.w fmul_unfl # yes, underflow occurred83228323################################################################################83248325#8326# Multiply: inputs are not both normalized; what are they?8327#8328fmul_not_norm:8329mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d18330jmp (tbl_fmul_op.b,%pc,%d1.w)83318332swbeg &488333tbl_fmul_op:8334short fmul_norm - tbl_fmul_op # NORM x NORM8335short fmul_zero - tbl_fmul_op # NORM x ZERO8336short fmul_inf_src - tbl_fmul_op # NORM x INF8337short fmul_res_qnan - tbl_fmul_op # NORM x QNAN8338short fmul_norm - tbl_fmul_op # NORM x DENORM8339short fmul_res_snan - tbl_fmul_op # NORM x SNAN8340short tbl_fmul_op - tbl_fmul_op #8341short tbl_fmul_op - tbl_fmul_op #83428343short fmul_zero - tbl_fmul_op # ZERO x NORM8344short fmul_zero - tbl_fmul_op # ZERO x ZERO8345short fmul_res_operr - tbl_fmul_op # ZERO x INF8346short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN8347short fmul_zero - tbl_fmul_op # ZERO x DENORM8348short fmul_res_snan - tbl_fmul_op # ZERO x SNAN8349short tbl_fmul_op - tbl_fmul_op #8350short tbl_fmul_op - tbl_fmul_op #83518352short fmul_inf_dst - tbl_fmul_op # INF x NORM8353short fmul_res_operr - tbl_fmul_op # INF x ZERO8354short fmul_inf_dst - tbl_fmul_op # INF x INF8355short fmul_res_qnan - tbl_fmul_op # INF x QNAN8356short fmul_inf_dst - tbl_fmul_op # INF x DENORM8357short fmul_res_snan - tbl_fmul_op # INF x SNAN8358short tbl_fmul_op - tbl_fmul_op #8359short tbl_fmul_op - tbl_fmul_op #83608361short fmul_res_qnan - tbl_fmul_op # QNAN x NORM8362short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO8363short fmul_res_qnan - tbl_fmul_op # QNAN x INF8364short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN8365short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM8366short fmul_res_snan - tbl_fmul_op # QNAN x SNAN8367short tbl_fmul_op - tbl_fmul_op #8368short tbl_fmul_op - tbl_fmul_op #83698370short fmul_norm - tbl_fmul_op # NORM x NORM8371short fmul_zero - tbl_fmul_op # NORM x ZERO8372short fmul_inf_src - tbl_fmul_op # NORM x INF8373short fmul_res_qnan - tbl_fmul_op # NORM x QNAN8374short fmul_norm - tbl_fmul_op # NORM x DENORM8375short fmul_res_snan - tbl_fmul_op # NORM x SNAN8376short tbl_fmul_op - tbl_fmul_op #8377short tbl_fmul_op - tbl_fmul_op #83788379short fmul_res_snan - tbl_fmul_op # SNAN x NORM8380short fmul_res_snan - tbl_fmul_op # SNAN x ZERO8381short fmul_res_snan - tbl_fmul_op # SNAN x INF8382short fmul_res_snan - tbl_fmul_op # SNAN x QNAN8383short fmul_res_snan - tbl_fmul_op # SNAN x DENORM8384short fmul_res_snan - tbl_fmul_op # SNAN x SNAN8385short tbl_fmul_op - tbl_fmul_op #8386short tbl_fmul_op - tbl_fmul_op #83878388fmul_res_operr:8389bra.l res_operr8390fmul_res_snan:8391bra.l res_snan8392fmul_res_qnan:8393bra.l res_qnan83948395#8396# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)8397#8398global fmul_zero # global for fsglmul8399fmul_zero:8400mov.b SRC_EX(%a0),%d0 # exclusive or the signs8401mov.b DST_EX(%a1),%d18402eor.b %d0,%d18403bpl.b fmul_zero_p # result ZERO is pos.8404fmul_zero_n:8405fmov.s &0x80000000,%fp0 # load -ZERO8406mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N8407rts8408fmul_zero_p:8409fmov.s &0x00000000,%fp0 # load +ZERO8410mov.b &z_bmask,FPSR_CC(%a6) # set Z8411rts84128413#8414# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)8415#8416# Note: The j-bit for an infinity is a don't-care. However, to be8417# strictly compatible w/ the 68881/882, we make sure to return an8418# INF w/ the j-bit set if the input INF j-bit was set. Destination8419# INFs take priority.8420#8421global fmul_inf_dst # global for fsglmul8422fmul_inf_dst:8423fmovm.x DST(%a1),&0x80 # return INF result in fp08424mov.b SRC_EX(%a0),%d0 # exclusive or the signs8425mov.b DST_EX(%a1),%d18426eor.b %d0,%d18427bpl.b fmul_inf_dst_p # result INF is pos.8428fmul_inf_dst_n:8429fabs.x %fp0 # clear result sign8430fneg.x %fp0 # set result sign8431mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N8432rts8433fmul_inf_dst_p:8434fabs.x %fp0 # clear result sign8435mov.b &inf_bmask,FPSR_CC(%a6) # set INF8436rts84378438global fmul_inf_src # global for fsglmul8439fmul_inf_src:8440fmovm.x SRC(%a0),&0x80 # return INF result in fp08441mov.b SRC_EX(%a0),%d0 # exclusive or the signs8442mov.b DST_EX(%a1),%d18443eor.b %d0,%d18444bpl.b fmul_inf_dst_p # result INF is pos.8445bra.b fmul_inf_dst_n84468447#########################################################################8448# XDEF **************************************************************** #8449# fin(): emulates the fmove instruction #8450# fsin(): emulates the fsmove instruction #8451# fdin(): emulates the fdmove instruction #8452# #8453# XREF **************************************************************** #8454# norm() - normalize mantissa for EXOP on denorm #8455# scale_to_zero_src() - scale src exponent to zero #8456# ovf_res() - return default overflow result #8457# unf_res() - return default underflow result #8458# res_qnan_1op() - return QNAN result #8459# res_snan_1op() - return SNAN result #8460# #8461# INPUT *************************************************************** #8462# a0 = pointer to extended precision source operand #8463# d0 = round prec/mode #8464# #8465# OUTPUT ************************************************************** #8466# fp0 = result #8467# fp1 = EXOP (if exception occurred) #8468# #8469# ALGORITHM *********************************************************** #8470# Handle NANs, infinities, and zeroes as special cases. Divide #8471# norms into extended, single, and double precision. #8472# Norms can be emulated w/ a regular fmove instruction. For #8473# sgl/dbl, must scale exponent and perform an "fmove". Check to see #8474# if the result would have overflowed/underflowed. If so, use unf_res() #8475# or ovf_res() to return the default result. Also return EXOP if #8476# exception is enabled. If no exception, return the default result. #8477# Unnorms don't pass through here. #8478# #8479#########################################################################84808481global fsin8482fsin:8483andi.b &0x30,%d0 # clear rnd prec8484ori.b &s_mode*0x10,%d0 # insert sgl precision8485bra.b fin84868487global fdin8488fdin:8489andi.b &0x30,%d0 # clear rnd prec8490ori.b &d_mode*0x10,%d0 # insert dbl precision84918492global fin8493fin:8494mov.l %d0,L_SCR3(%a6) # store rnd info84958496mov.b STAG(%a6),%d1 # fetch src optype tag8497bne.w fin_not_norm # optimize on non-norm input84988499#8500# FP MOVE IN: NORMs and DENORMs ONLY!8501#8502fin_norm:8503andi.b &0xc0,%d0 # is precision extended?8504bne.w fin_not_ext # no, so go handle dbl or sgl85058506#8507# precision selected is extended. so...we cannot get an underflow8508# or overflow because of rounding to the correct precision. so...8509# skip the scaling and unscaling...8510#8511tst.b SRC_EX(%a0) # is the operand negative?8512bpl.b fin_norm_done # no8513bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit8514fin_norm_done:8515fmovm.x SRC(%a0),&0x80 # return result in fp08516rts85178518#8519# for an extended precision DENORM, the UNFL exception bit is set8520# the accrued bit is NOT set in this instance(no inexactness!)8521#8522fin_denorm:8523andi.b &0xc0,%d0 # is precision extended?8524bne.w fin_not_ext # no, so go handle dbl or sgl85258526bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit8527tst.b SRC_EX(%a0) # is the operand negative?8528bpl.b fin_denorm_done # no8529bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit8530fin_denorm_done:8531fmovm.x SRC(%a0),&0x80 # return result in fp08532btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?8533bne.b fin_denorm_unfl_ena # yes8534rts85358536#8537# the input is an extended DENORM and underflow is enabled in the FPCR.8538# normalize the mantissa and add the bias of 0x6000 to the resulting negative8539# exponent and insert back into the operand.8540#8541fin_denorm_unfl_ena:8542mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)8543mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)8544mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)8545lea FP_SCR0(%a6),%a0 # pass: ptr to operand8546bsr.l norm # normalize result8547neg.w %d0 # new exponent = -(shft val)8548addi.w &0x6000,%d0 # add new bias to exponent8549mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp8550andi.w &0x8000,%d1 # keep old sign8551andi.w &0x7fff,%d0 # clear sign position8552or.w %d1,%d0 # concat new exo,old sign8553mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent8554fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp18555rts85568557#8558# operand is to be rounded to single or double precision8559#8560fin_not_ext:8561cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec8562bne.b fin_dbl85638564#8565# operand is to be rounded to single precision8566#8567fin_sgl:8568mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)8569mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)8570mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)8571bsr.l scale_to_zero_src # calculate scale factor85728573cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?8574bge.w fin_sd_unfl # yes; go handle underflow8575cmpi.l %d0,&0x3fff-0x407e # will move in overflow?8576beq.w fin_sd_may_ovfl # maybe; go check8577blt.w fin_sd_ovfl # yes; go handle overflow85788579#8580# operand will NOT overflow or underflow when moved into the fp reg file8581#8582fin_sd_normal:8583fmov.l &0x0,%fpsr # clear FPSR8584fmov.l L_SCR3(%a6),%fpcr # set FPCR85858586fmov.x FP_SCR0(%a6),%fp0 # perform move85878588fmov.l %fpsr,%d1 # save FPSR8589fmov.l &0x0,%fpcr # clear FPCR85908591or.l %d1,USER_FPSR(%a6) # save INEX2,N85928593fin_sd_normal_exit:8594mov.l %d2,-(%sp) # save d28595fmovm.x &0x80,FP_SCR0(%a6) # store out result8596mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}8597mov.w %d1,%d2 # make a copy8598andi.l &0x7fff,%d1 # strip sign8599sub.l %d0,%d1 # add scale factor8600andi.w &0x8000,%d2 # keep old sign8601or.w %d1,%d2 # concat old sign,new exponent8602mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent8603mov.l (%sp)+,%d2 # restore d28604fmovm.x FP_SCR0(%a6),&0x80 # return result in fp08605rts86068607#8608# operand is to be rounded to double precision8609#8610fin_dbl:8611mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)8612mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)8613mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)8614bsr.l scale_to_zero_src # calculate scale factor86158616cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?8617bge.w fin_sd_unfl # yes; go handle underflow8618cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?8619beq.w fin_sd_may_ovfl # maybe; go check8620blt.w fin_sd_ovfl # yes; go handle overflow8621bra.w fin_sd_normal # no; ho handle normalized op86228623#8624# operand WILL underflow when moved in to the fp register file8625#8626fin_sd_unfl:8627bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit86288629tst.b FP_SCR0_EX(%a6) # is operand negative?8630bpl.b fin_sd_unfl_tst8631bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit86328633# if underflow or inexact is enabled, then go calculate the EXOP first.8634fin_sd_unfl_tst:8635mov.b FPCR_ENABLE(%a6),%d18636andi.b &0x0b,%d1 # is UNFL or INEX enabled?8637bne.b fin_sd_unfl_ena # yes86388639fin_sd_unfl_dis:8640lea FP_SCR0(%a6),%a0 # pass: result addr8641mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode8642bsr.l unf_res # calculate default result8643or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'8644fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp08645rts86468647#8648# operand will underflow AND underflow or inexact is enabled.8649# Therefore, we must return the result rounded to extended precision.8650#8651fin_sd_unfl_ena:8652mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)8653mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)8654mov.w FP_SCR0_EX(%a6),%d1 # load current exponent86558656mov.l %d2,-(%sp) # save d28657mov.w %d1,%d2 # make a copy8658andi.l &0x7fff,%d1 # strip sign8659sub.l %d0,%d1 # subtract scale factor8660andi.w &0x8000,%d2 # extract old sign8661addi.l &0x6000,%d1 # add new bias8662andi.w &0x7fff,%d18663or.w %d1,%d2 # concat old sign,new exp8664mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent8665fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp18666mov.l (%sp)+,%d2 # restore d28667bra.b fin_sd_unfl_dis86688669#8670# operand WILL overflow.8671#8672fin_sd_ovfl:8673fmov.l &0x0,%fpsr # clear FPSR8674fmov.l L_SCR3(%a6),%fpcr # set FPCR86758676fmov.x FP_SCR0(%a6),%fp0 # perform move86778678fmov.l &0x0,%fpcr # clear FPCR8679fmov.l %fpsr,%d1 # save FPSR86808681or.l %d1,USER_FPSR(%a6) # save INEX2,N86828683fin_sd_ovfl_tst:8684or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex86858686mov.b FPCR_ENABLE(%a6),%d18687andi.b &0x13,%d1 # is OVFL or INEX enabled?8688bne.b fin_sd_ovfl_ena # yes86898690#8691# OVFL is not enabled; therefore, we must create the default result by8692# calling ovf_res().8693#8694fin_sd_ovfl_dis:8695btst &neg_bit,FPSR_CC(%a6) # is result negative?8696sne %d1 # set sign param accordingly8697mov.l L_SCR3(%a6),%d0 # pass: prec,mode8698bsr.l ovf_res # calculate default result8699or.b %d0,FPSR_CC(%a6) # set INF,N if applicable8700fmovm.x (%a0),&0x80 # return default result in fp08701rts87028703#8704# OVFL is enabled.8705# the INEX2 bit has already been updated by the round to the correct precision.8706# now, round to extended(and don't alter the FPSR).8707#8708fin_sd_ovfl_ena:8709mov.l %d2,-(%sp) # save d28710mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}8711mov.l %d1,%d2 # make a copy8712andi.l &0x7fff,%d1 # strip sign8713andi.w &0x8000,%d2 # keep old sign8714sub.l %d0,%d1 # add scale factor8715sub.l &0x6000,%d1 # subtract bias8716andi.w &0x7fff,%d18717or.w %d2,%d18718mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent8719mov.l (%sp)+,%d2 # restore d28720fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp18721bra.b fin_sd_ovfl_dis87228723#8724# the move in MAY overflow. so...8725#8726fin_sd_may_ovfl:8727fmov.l &0x0,%fpsr # clear FPSR8728fmov.l L_SCR3(%a6),%fpcr # set FPCR87298730fmov.x FP_SCR0(%a6),%fp0 # perform the move87318732fmov.l %fpsr,%d1 # save status8733fmov.l &0x0,%fpcr # clear FPCR87348735or.l %d1,USER_FPSR(%a6) # save INEX2,N87368737fabs.x %fp0,%fp1 # make a copy of result8738fcmp.b %fp1,&0x2 # is |result| >= 2.b?8739fbge.w fin_sd_ovfl_tst # yes; overflow has occurred87408741# no, it didn't overflow; we have correct result8742bra.w fin_sd_normal_exit87438744##########################################################################87458746#8747# operand is not a NORM: check its optype and branch accordingly8748#8749fin_not_norm:8750cmpi.b %d1,&DENORM # weed out DENORM8751beq.w fin_denorm8752cmpi.b %d1,&SNAN # weed out SNANs8753beq.l res_snan_1op8754cmpi.b %d1,&QNAN # weed out QNANs8755beq.l res_qnan_1op87568757#8758# do the fmove in; at this point, only possible ops are ZERO and INF.8759# use fmov to determine ccodes.8760# prec:mode should be zero at this point but it won't affect answer anyways.8761#8762fmov.x SRC(%a0),%fp0 # do fmove in8763fmov.l %fpsr,%d0 # no exceptions possible8764rol.l &0x8,%d0 # put ccodes in lo byte8765mov.b %d0,FPSR_CC(%a6) # insert correct ccodes8766rts87678768#########################################################################8769# XDEF **************************************************************** #8770# fdiv(): emulates the fdiv instruction #8771# fsdiv(): emulates the fsdiv instruction #8772# fddiv(): emulates the fddiv instruction #8773# #8774# XREF **************************************************************** #8775# scale_to_zero_src() - scale src exponent to zero #8776# scale_to_zero_dst() - scale dst exponent to zero #8777# unf_res() - return default underflow result #8778# ovf_res() - return default overflow result #8779# res_qnan() - return QNAN result #8780# res_snan() - return SNAN result #8781# #8782# INPUT *************************************************************** #8783# a0 = pointer to extended precision source operand #8784# a1 = pointer to extended precision destination operand #8785# d0 rnd prec,mode #8786# #8787# OUTPUT ************************************************************** #8788# fp0 = result #8789# fp1 = EXOP (if exception occurred) #8790# #8791# ALGORITHM *********************************************************** #8792# Handle NANs, infinities, and zeroes as special cases. Divide #8793# norms/denorms into ext/sgl/dbl precision. #8794# For norms/denorms, scale the exponents such that a divide #8795# instruction won't cause an exception. Use the regular fdiv to #8796# compute a result. Check if the regular operands would have taken #8797# an exception. If so, return the default overflow/underflow result #8798# and return the EXOP if exceptions are enabled. Else, scale the #8799# result operand to the proper exponent. #8800# #8801#########################################################################88028803align 0x108804tbl_fdiv_unfl:8805long 0x3fff - 0x0000 # ext_unfl8806long 0x3fff - 0x3f81 # sgl_unfl8807long 0x3fff - 0x3c01 # dbl_unfl88088809tbl_fdiv_ovfl:8810long 0x3fff - 0x7ffe # ext overflow exponent8811long 0x3fff - 0x407e # sgl overflow exponent8812long 0x3fff - 0x43fe # dbl overflow exponent88138814global fsdiv8815fsdiv:8816andi.b &0x30,%d0 # clear rnd prec8817ori.b &s_mode*0x10,%d0 # insert sgl prec8818bra.b fdiv88198820global fddiv8821fddiv:8822andi.b &0x30,%d0 # clear rnd prec8823ori.b &d_mode*0x10,%d0 # insert dbl prec88248825global fdiv8826fdiv:8827mov.l %d0,L_SCR3(%a6) # store rnd info88288829clr.w %d18830mov.b DTAG(%a6),%d18831lsl.b &0x3,%d18832or.b STAG(%a6),%d1 # combine src tags88338834bne.w fdiv_not_norm # optimize on non-norm input88358836#8837# DIVIDE: NORMs and DENORMs ONLY!8838#8839fdiv_norm:8840mov.w DST_EX(%a1),FP_SCR1_EX(%a6)8841mov.l DST_HI(%a1),FP_SCR1_HI(%a6)8842mov.l DST_LO(%a1),FP_SCR1_LO(%a6)88438844mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)8845mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)8846mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)88478848bsr.l scale_to_zero_src # scale src exponent8849mov.l %d0,-(%sp) # save scale factor 188508851bsr.l scale_to_zero_dst # scale dst exponent88528853neg.l (%sp) # SCALE FACTOR = scale1 - scale28854add.l %d0,(%sp)88558856mov.w 2+L_SCR3(%a6),%d1 # fetch precision8857lsr.b &0x6,%d1 # shift to lo bits8858mov.l (%sp)+,%d0 # load S.F.8859cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?8860ble.w fdiv_may_ovfl # result will overflow88618862cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?8863beq.w fdiv_may_unfl # maybe8864bgt.w fdiv_unfl # yes; go handle underflow88658866fdiv_normal:8867fmovm.x FP_SCR1(%a6),&0x80 # load dst op88688869fmov.l L_SCR3(%a6),%fpcr # save FPCR8870fmov.l &0x0,%fpsr # clear FPSR88718872fdiv.x FP_SCR0(%a6),%fp0 # perform divide88738874fmov.l %fpsr,%d1 # save FPSR8875fmov.l &0x0,%fpcr # clear FPCR88768877or.l %d1,USER_FPSR(%a6) # save INEX2,N88788879fdiv_normal_exit:8880fmovm.x &0x80,FP_SCR0(%a6) # store result on stack8881mov.l %d2,-(%sp) # store d28882mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}8883mov.l %d1,%d2 # make a copy8884andi.l &0x7fff,%d1 # strip sign8885andi.w &0x8000,%d2 # keep old sign8886sub.l %d0,%d1 # add scale factor8887or.w %d2,%d1 # concat old sign,new exp8888mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent8889mov.l (%sp)+,%d2 # restore d28890fmovm.x FP_SCR0(%a6),&0x80 # return result in fp08891rts88928893tbl_fdiv_ovfl2:8894long 0x7fff8895long 0x407f8896long 0x43ff88978898fdiv_no_ovfl:8899mov.l (%sp)+,%d0 # restore scale factor8900bra.b fdiv_normal_exit89018902fdiv_may_ovfl:8903mov.l %d0,-(%sp) # save scale factor89048905fmovm.x FP_SCR1(%a6),&0x80 # load dst op89068907fmov.l L_SCR3(%a6),%fpcr # set FPCR8908fmov.l &0x0,%fpsr # set FPSR89098910fdiv.x FP_SCR0(%a6),%fp0 # execute divide89118912fmov.l %fpsr,%d08913fmov.l &0x0,%fpcr89148915or.l %d0,USER_FPSR(%a6) # save INEX,N89168917fmovm.x &0x01,-(%sp) # save result to stack8918mov.w (%sp),%d0 # fetch new exponent8919add.l &0xc,%sp # clear result from stack8920andi.l &0x7fff,%d0 # strip sign8921sub.l (%sp),%d0 # add scale factor8922cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)8923blt.b fdiv_no_ovfl8924mov.l (%sp)+,%d089258926fdiv_ovfl_tst:8927or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex89288929mov.b FPCR_ENABLE(%a6),%d18930andi.b &0x13,%d1 # is OVFL or INEX enabled?8931bne.b fdiv_ovfl_ena # yes89328933fdiv_ovfl_dis:8934btst &neg_bit,FPSR_CC(%a6) # is result negative?8935sne %d1 # set sign param accordingly8936mov.l L_SCR3(%a6),%d0 # pass prec:rnd8937bsr.l ovf_res # calculate default result8938or.b %d0,FPSR_CC(%a6) # set INF if applicable8939fmovm.x (%a0),&0x80 # return default result in fp08940rts89418942fdiv_ovfl_ena:8943mov.l L_SCR3(%a6),%d18944andi.b &0xc0,%d1 # is precision extended?8945bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl89468947fdiv_ovfl_ena_cont:8948fmovm.x &0x80,FP_SCR0(%a6) # move result to stack89498950mov.l %d2,-(%sp) # save d28951mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}8952mov.w %d1,%d2 # make a copy8953andi.l &0x7fff,%d1 # strip sign8954sub.l %d0,%d1 # add scale factor8955subi.l &0x6000,%d1 # subtract bias8956andi.w &0x7fff,%d1 # clear sign bit8957andi.w &0x8000,%d2 # keep old sign8958or.w %d2,%d1 # concat old sign,new exp8959mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent8960mov.l (%sp)+,%d2 # restore d28961fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp18962bra.b fdiv_ovfl_dis89638964fdiv_ovfl_ena_sd:8965fmovm.x FP_SCR1(%a6),&0x80 # load dst operand89668967mov.l L_SCR3(%a6),%d18968andi.b &0x30,%d1 # keep rnd mode8969fmov.l %d1,%fpcr # set FPCR89708971fdiv.x FP_SCR0(%a6),%fp0 # execute divide89728973fmov.l &0x0,%fpcr # clear FPCR8974bra.b fdiv_ovfl_ena_cont89758976fdiv_unfl:8977bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit89788979fmovm.x FP_SCR1(%a6),&0x80 # load dst op89808981fmov.l &rz_mode*0x10,%fpcr # set FPCR8982fmov.l &0x0,%fpsr # clear FPSR89838984fdiv.x FP_SCR0(%a6),%fp0 # execute divide89858986fmov.l %fpsr,%d1 # save status8987fmov.l &0x0,%fpcr # clear FPCR89888989or.l %d1,USER_FPSR(%a6) # save INEX2,N89908991mov.b FPCR_ENABLE(%a6),%d18992andi.b &0x0b,%d1 # is UNFL or INEX enabled?8993bne.b fdiv_unfl_ena # yes89948995fdiv_unfl_dis:8996fmovm.x &0x80,FP_SCR0(%a6) # store out result89978998lea FP_SCR0(%a6),%a0 # pass: result addr8999mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode9000bsr.l unf_res # calculate default result9001or.b %d0,FPSR_CC(%a6) # 'Z' may have been set9002fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp09003rts90049005#9006# UNFL is enabled.9007#9008fdiv_unfl_ena:9009fmovm.x FP_SCR1(%a6),&0x40 # load dst op90109011mov.l L_SCR3(%a6),%d19012andi.b &0xc0,%d1 # is precision extended?9013bne.b fdiv_unfl_ena_sd # no, sgl or dbl90149015fmov.l L_SCR3(%a6),%fpcr # set FPCR90169017fdiv_unfl_ena_cont:9018fmov.l &0x0,%fpsr # clear FPSR90199020fdiv.x FP_SCR0(%a6),%fp1 # execute divide90219022fmov.l &0x0,%fpcr # clear FPCR90239024fmovm.x &0x40,FP_SCR0(%a6) # save result to stack9025mov.l %d2,-(%sp) # save d29026mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}9027mov.l %d1,%d2 # make a copy9028andi.l &0x7fff,%d1 # strip sign9029andi.w &0x8000,%d2 # keep old sign9030sub.l %d0,%d1 # add scale factoer9031addi.l &0x6000,%d1 # add bias9032andi.w &0x7fff,%d19033or.w %d2,%d1 # concat old sign,new exp9034mov.w %d1,FP_SCR0_EX(%a6) # insert new exp9035mov.l (%sp)+,%d2 # restore d29036fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp19037bra.w fdiv_unfl_dis90389039fdiv_unfl_ena_sd:9040mov.l L_SCR3(%a6),%d19041andi.b &0x30,%d1 # use only rnd mode9042fmov.l %d1,%fpcr # set FPCR90439044bra.b fdiv_unfl_ena_cont90459046#9047# the divide operation MAY underflow:9048#9049fdiv_may_unfl:9050fmovm.x FP_SCR1(%a6),&0x80 # load dst op90519052fmov.l L_SCR3(%a6),%fpcr # set FPCR9053fmov.l &0x0,%fpsr # clear FPSR90549055fdiv.x FP_SCR0(%a6),%fp0 # execute divide90569057fmov.l %fpsr,%d1 # save status9058fmov.l &0x0,%fpcr # clear FPCR90599060or.l %d1,USER_FPSR(%a6) # save INEX2,N90619062fabs.x %fp0,%fp1 # make a copy of result9063fcmp.b %fp1,&0x1 # is |result| > 1.b?9064fbgt.w fdiv_normal_exit # no; no underflow occurred9065fblt.w fdiv_unfl # yes; underflow occurred90669067#9068# we still don't know if underflow occurred. result is ~ equal to 1. but,9069# we don't know if the result was an underflow that rounded up to a 19070# or a normalized number that rounded down to a 1. so, redo the entire9071# operation using RZ as the rounding mode to see what the pre-rounded9072# result is. this case should be relatively rare.9073#9074fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp190759076mov.l L_SCR3(%a6),%d19077andi.b &0xc0,%d1 # keep rnd prec9078ori.b &rz_mode*0x10,%d1 # insert RZ90799080fmov.l %d1,%fpcr # set FPCR9081fmov.l &0x0,%fpsr # clear FPSR90829083fdiv.x FP_SCR0(%a6),%fp1 # execute divide90849085fmov.l &0x0,%fpcr # clear FPCR9086fabs.x %fp1 # make absolute value9087fcmp.b %fp1,&0x1 # is |result| < 1.b?9088fbge.w fdiv_normal_exit # no; no underflow occurred9089bra.w fdiv_unfl # yes; underflow occurred90909091############################################################################90929093#9094# Divide: inputs are not both normalized; what are they?9095#9096fdiv_not_norm:9097mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d19098jmp (tbl_fdiv_op.b,%pc,%d1.w*1)90999100swbeg &489101tbl_fdiv_op:9102short fdiv_norm - tbl_fdiv_op # NORM / NORM9103short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO9104short fdiv_zero_load - tbl_fdiv_op # NORM / INF9105short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN9106short fdiv_norm - tbl_fdiv_op # NORM / DENORM9107short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN9108short tbl_fdiv_op - tbl_fdiv_op #9109short tbl_fdiv_op - tbl_fdiv_op #91109111short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM9112short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO9113short fdiv_zero_load - tbl_fdiv_op # ZERO / INF9114short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN9115short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM9116short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN9117short tbl_fdiv_op - tbl_fdiv_op #9118short tbl_fdiv_op - tbl_fdiv_op #91199120short fdiv_inf_dst - tbl_fdiv_op # INF / NORM9121short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO9122short fdiv_res_operr - tbl_fdiv_op # INF / INF9123short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN9124short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM9125short fdiv_res_snan - tbl_fdiv_op # INF / SNAN9126short tbl_fdiv_op - tbl_fdiv_op #9127short tbl_fdiv_op - tbl_fdiv_op #91289129short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM9130short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO9131short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF9132short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN9133short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM9134short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN9135short tbl_fdiv_op - tbl_fdiv_op #9136short tbl_fdiv_op - tbl_fdiv_op #91379138short fdiv_norm - tbl_fdiv_op # DENORM / NORM9139short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO9140short fdiv_zero_load - tbl_fdiv_op # DENORM / INF9141short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN9142short fdiv_norm - tbl_fdiv_op # DENORM / DENORM9143short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN9144short tbl_fdiv_op - tbl_fdiv_op #9145short tbl_fdiv_op - tbl_fdiv_op #91469147short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM9148short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO9149short fdiv_res_snan - tbl_fdiv_op # SNAN / INF9150short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN9151short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM9152short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN9153short tbl_fdiv_op - tbl_fdiv_op #9154short tbl_fdiv_op - tbl_fdiv_op #91559156fdiv_res_qnan:9157bra.l res_qnan9158fdiv_res_snan:9159bra.l res_snan9160fdiv_res_operr:9161bra.l res_operr91629163global fdiv_zero_load # global for fsgldiv9164fdiv_zero_load:9165mov.b SRC_EX(%a0),%d0 # result sign is exclusive9166mov.b DST_EX(%a1),%d1 # or of input signs.9167eor.b %d0,%d19168bpl.b fdiv_zero_load_p # result is positive9169fmov.s &0x80000000,%fp0 # load a -ZERO9170mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N9171rts9172fdiv_zero_load_p:9173fmov.s &0x00000000,%fp0 # load a +ZERO9174mov.b &z_bmask,FPSR_CC(%a6) # set Z9175rts91769177#9178# The destination was In Range and the source was a ZERO. The result,9179# Therefore, is an INF w/ the proper sign.9180# So, determine the sign and return a new INF (w/ the j-bit cleared).9181#9182global fdiv_inf_load # global for fsgldiv9183fdiv_inf_load:9184ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ9185mov.b SRC_EX(%a0),%d0 # load both signs9186mov.b DST_EX(%a1),%d19187eor.b %d0,%d19188bpl.b fdiv_inf_load_p # result is positive9189fmov.s &0xff800000,%fp0 # make result -INF9190mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N9191rts9192fdiv_inf_load_p:9193fmov.s &0x7f800000,%fp0 # make result +INF9194mov.b &inf_bmask,FPSR_CC(%a6) # set INF9195rts91969197#9198# The destination was an INF w/ an In Range or ZERO source, the result is9199# an INF w/ the proper sign.9200# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the9201# dst INF is set, then then j-bit of the result INF is also set).9202#9203global fdiv_inf_dst # global for fsgldiv9204fdiv_inf_dst:9205mov.b DST_EX(%a1),%d0 # load both signs9206mov.b SRC_EX(%a0),%d19207eor.b %d0,%d19208bpl.b fdiv_inf_dst_p # result is positive92099210fmovm.x DST(%a1),&0x80 # return result in fp09211fabs.x %fp0 # clear sign bit9212fneg.x %fp0 # set sign bit9213mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG9214rts92159216fdiv_inf_dst_p:9217fmovm.x DST(%a1),&0x80 # return result in fp09218fabs.x %fp0 # return positive INF9219mov.b &inf_bmask,FPSR_CC(%a6) # set INF9220rts92219222#########################################################################9223# XDEF **************************************************************** #9224# fneg(): emulates the fneg instruction #9225# fsneg(): emulates the fsneg instruction #9226# fdneg(): emulates the fdneg instruction #9227# #9228# XREF **************************************************************** #9229# norm() - normalize a denorm to provide EXOP #9230# scale_to_zero_src() - scale sgl/dbl source exponent #9231# ovf_res() - return default overflow result #9232# unf_res() - return default underflow result #9233# res_qnan_1op() - return QNAN result #9234# res_snan_1op() - return SNAN result #9235# #9236# INPUT *************************************************************** #9237# a0 = pointer to extended precision source operand #9238# d0 = rnd prec,mode #9239# #9240# OUTPUT ************************************************************** #9241# fp0 = result #9242# fp1 = EXOP (if exception occurred) #9243# #9244# ALGORITHM *********************************************************** #9245# Handle NANs, zeroes, and infinities as special cases. Separate #9246# norms/denorms into ext/sgl/dbl precisions. Extended precision can be #9247# emulated by simply setting sign bit. Sgl/dbl operands must be scaled #9248# and an actual fneg performed to see if overflow/underflow would have #9249# occurred. If so, return default underflow/overflow result. Else, #9250# scale the result exponent and return result. FPSR gets set based on #9251# the result value. #9252# #9253#########################################################################92549255global fsneg9256fsneg:9257andi.b &0x30,%d0 # clear rnd prec9258ori.b &s_mode*0x10,%d0 # insert sgl precision9259bra.b fneg92609261global fdneg9262fdneg:9263andi.b &0x30,%d0 # clear rnd prec9264ori.b &d_mode*0x10,%d0 # insert dbl prec92659266global fneg9267fneg:9268mov.l %d0,L_SCR3(%a6) # store rnd info9269mov.b STAG(%a6),%d19270bne.w fneg_not_norm # optimize on non-norm input92719272#9273# NEGATE SIGN : norms and denorms ONLY!9274#9275fneg_norm:9276andi.b &0xc0,%d0 # is precision extended?9277bne.w fneg_not_ext # no; go handle sgl or dbl92789279#9280# precision selected is extended. so...we can not get an underflow9281# or overflow because of rounding to the correct precision. so...9282# skip the scaling and unscaling...9283#9284mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)9285mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)9286mov.w SRC_EX(%a0),%d09287eori.w &0x8000,%d0 # negate sign9288bpl.b fneg_norm_load # sign is positive9289mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit9290fneg_norm_load:9291mov.w %d0,FP_SCR0_EX(%a6)9292fmovm.x FP_SCR0(%a6),&0x80 # return result in fp09293rts92949295#9296# for an extended precision DENORM, the UNFL exception bit is set9297# the accrued bit is NOT set in this instance(no inexactness!)9298#9299fneg_denorm:9300andi.b &0xc0,%d0 # is precision extended?9301bne.b fneg_not_ext # no; go handle sgl or dbl93029303bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit93049305mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)9306mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)9307mov.w SRC_EX(%a0),%d09308eori.w &0x8000,%d0 # negate sign9309bpl.b fneg_denorm_done # no9310mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit9311fneg_denorm_done:9312mov.w %d0,FP_SCR0_EX(%a6)9313fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp093149315btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?9316bne.b fneg_ext_unfl_ena # yes9317rts93189319#9320# the input is an extended DENORM and underflow is enabled in the FPCR.9321# normalize the mantissa and add the bias of 0x6000 to the resulting negative9322# exponent and insert back into the operand.9323#9324fneg_ext_unfl_ena:9325lea FP_SCR0(%a6),%a0 # pass: ptr to operand9326bsr.l norm # normalize result9327neg.w %d0 # new exponent = -(shft val)9328addi.w &0x6000,%d0 # add new bias to exponent9329mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp9330andi.w &0x8000,%d1 # keep old sign9331andi.w &0x7fff,%d0 # clear sign position9332or.w %d1,%d0 # concat old sign, new exponent9333mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent9334fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp19335rts93369337#9338# operand is either single or double9339#9340fneg_not_ext:9341cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec9342bne.b fneg_dbl93439344#9345# operand is to be rounded to single precision9346#9347fneg_sgl:9348mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)9349mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)9350mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)9351bsr.l scale_to_zero_src # calculate scale factor93529353cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?9354bge.w fneg_sd_unfl # yes; go handle underflow9355cmpi.l %d0,&0x3fff-0x407e # will move in overflow?9356beq.w fneg_sd_may_ovfl # maybe; go check9357blt.w fneg_sd_ovfl # yes; go handle overflow93589359#9360# operand will NOT overflow or underflow when moved in to the fp reg file9361#9362fneg_sd_normal:9363fmov.l &0x0,%fpsr # clear FPSR9364fmov.l L_SCR3(%a6),%fpcr # set FPCR93659366fneg.x FP_SCR0(%a6),%fp0 # perform negation93679368fmov.l %fpsr,%d1 # save FPSR9369fmov.l &0x0,%fpcr # clear FPCR93709371or.l %d1,USER_FPSR(%a6) # save INEX2,N93729373fneg_sd_normal_exit:9374mov.l %d2,-(%sp) # save d29375fmovm.x &0x80,FP_SCR0(%a6) # store out result9376mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp9377mov.w %d1,%d2 # make a copy9378andi.l &0x7fff,%d1 # strip sign9379sub.l %d0,%d1 # add scale factor9380andi.w &0x8000,%d2 # keep old sign9381or.w %d1,%d2 # concat old sign,new exp9382mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent9383mov.l (%sp)+,%d2 # restore d29384fmovm.x FP_SCR0(%a6),&0x80 # return result in fp09385rts93869387#9388# operand is to be rounded to double precision9389#9390fneg_dbl:9391mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)9392mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)9393mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)9394bsr.l scale_to_zero_src # calculate scale factor93959396cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?9397bge.b fneg_sd_unfl # yes; go handle underflow9398cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?9399beq.w fneg_sd_may_ovfl # maybe; go check9400blt.w fneg_sd_ovfl # yes; go handle overflow9401bra.w fneg_sd_normal # no; ho handle normalized op94029403#9404# operand WILL underflow when moved in to the fp register file9405#9406fneg_sd_unfl:9407bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit94089409eori.b &0x80,FP_SCR0_EX(%a6) # negate sign9410bpl.b fneg_sd_unfl_tst9411bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit94129413# if underflow or inexact is enabled, go calculate EXOP first.9414fneg_sd_unfl_tst:9415mov.b FPCR_ENABLE(%a6),%d19416andi.b &0x0b,%d1 # is UNFL or INEX enabled?9417bne.b fneg_sd_unfl_ena # yes94189419fneg_sd_unfl_dis:9420lea FP_SCR0(%a6),%a0 # pass: result addr9421mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode9422bsr.l unf_res # calculate default result9423or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'9424fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp09425rts94269427#9428# operand will underflow AND underflow is enabled.9429# Therefore, we must return the result rounded to extended precision.9430#9431fneg_sd_unfl_ena:9432mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)9433mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)9434mov.w FP_SCR0_EX(%a6),%d1 # load current exponent94359436mov.l %d2,-(%sp) # save d29437mov.l %d1,%d2 # make a copy9438andi.l &0x7fff,%d1 # strip sign9439andi.w &0x8000,%d2 # keep old sign9440sub.l %d0,%d1 # subtract scale factor9441addi.l &0x6000,%d1 # add new bias9442andi.w &0x7fff,%d19443or.w %d2,%d1 # concat new sign,new exp9444mov.w %d1,FP_SCR1_EX(%a6) # insert new exp9445fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp19446mov.l (%sp)+,%d2 # restore d29447bra.b fneg_sd_unfl_dis94489449#9450# operand WILL overflow.9451#9452fneg_sd_ovfl:9453fmov.l &0x0,%fpsr # clear FPSR9454fmov.l L_SCR3(%a6),%fpcr # set FPCR94559456fneg.x FP_SCR0(%a6),%fp0 # perform negation94579458fmov.l &0x0,%fpcr # clear FPCR9459fmov.l %fpsr,%d1 # save FPSR94609461or.l %d1,USER_FPSR(%a6) # save INEX2,N94629463fneg_sd_ovfl_tst:9464or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex94659466mov.b FPCR_ENABLE(%a6),%d19467andi.b &0x13,%d1 # is OVFL or INEX enabled?9468bne.b fneg_sd_ovfl_ena # yes94699470#9471# OVFL is not enabled; therefore, we must create the default result by9472# calling ovf_res().9473#9474fneg_sd_ovfl_dis:9475btst &neg_bit,FPSR_CC(%a6) # is result negative?9476sne %d1 # set sign param accordingly9477mov.l L_SCR3(%a6),%d0 # pass: prec,mode9478bsr.l ovf_res # calculate default result9479or.b %d0,FPSR_CC(%a6) # set INF,N if applicable9480fmovm.x (%a0),&0x80 # return default result in fp09481rts94829483#9484# OVFL is enabled.9485# the INEX2 bit has already been updated by the round to the correct precision.9486# now, round to extended(and don't alter the FPSR).9487#9488fneg_sd_ovfl_ena:9489mov.l %d2,-(%sp) # save d29490mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}9491mov.l %d1,%d2 # make a copy9492andi.l &0x7fff,%d1 # strip sign9493andi.w &0x8000,%d2 # keep old sign9494sub.l %d0,%d1 # add scale factor9495subi.l &0x6000,%d1 # subtract bias9496andi.w &0x7fff,%d19497or.w %d2,%d1 # concat sign,exp9498mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent9499fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp19500mov.l (%sp)+,%d2 # restore d29501bra.b fneg_sd_ovfl_dis95029503#9504# the move in MAY underflow. so...9505#9506fneg_sd_may_ovfl:9507fmov.l &0x0,%fpsr # clear FPSR9508fmov.l L_SCR3(%a6),%fpcr # set FPCR95099510fneg.x FP_SCR0(%a6),%fp0 # perform negation95119512fmov.l %fpsr,%d1 # save status9513fmov.l &0x0,%fpcr # clear FPCR95149515or.l %d1,USER_FPSR(%a6) # save INEX2,N95169517fabs.x %fp0,%fp1 # make a copy of result9518fcmp.b %fp1,&0x2 # is |result| >= 2.b?9519fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred95209521# no, it didn't overflow; we have correct result9522bra.w fneg_sd_normal_exit95239524##########################################################################95259526#9527# input is not normalized; what is it?9528#9529fneg_not_norm:9530cmpi.b %d1,&DENORM # weed out DENORM9531beq.w fneg_denorm9532cmpi.b %d1,&SNAN # weed out SNAN9533beq.l res_snan_1op9534cmpi.b %d1,&QNAN # weed out QNAN9535beq.l res_qnan_1op95369537#9538# do the fneg; at this point, only possible ops are ZERO and INF.9539# use fneg to determine ccodes.9540# prec:mode should be zero at this point but it won't affect answer anyways.9541#9542fneg.x SRC_EX(%a0),%fp0 # do fneg9543fmov.l %fpsr,%d09544rol.l &0x8,%d0 # put ccodes in lo byte9545mov.b %d0,FPSR_CC(%a6) # insert correct ccodes9546rts95479548#########################################################################9549# XDEF **************************************************************** #9550# ftst(): emulates the ftest instruction #9551# #9552# XREF **************************************************************** #9553# res{s,q}nan_1op() - set NAN result for monadic instruction #9554# #9555# INPUT *************************************************************** #9556# a0 = pointer to extended precision source operand #9557# #9558# OUTPUT ************************************************************** #9559# none #9560# #9561# ALGORITHM *********************************************************** #9562# Check the source operand tag (STAG) and set the FPCR according #9563# to the operand type and sign. #9564# #9565#########################################################################95669567global ftst9568ftst:9569mov.b STAG(%a6),%d19570bne.b ftst_not_norm # optimize on non-norm input95719572#9573# Norm:9574#9575ftst_norm:9576tst.b SRC_EX(%a0) # is operand negative?9577bmi.b ftst_norm_m # yes9578rts9579ftst_norm_m:9580mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit9581rts95829583#9584# input is not normalized; what is it?9585#9586ftst_not_norm:9587cmpi.b %d1,&ZERO # weed out ZERO9588beq.b ftst_zero9589cmpi.b %d1,&INF # weed out INF9590beq.b ftst_inf9591cmpi.b %d1,&SNAN # weed out SNAN9592beq.l res_snan_1op9593cmpi.b %d1,&QNAN # weed out QNAN9594beq.l res_qnan_1op95959596#9597# Denorm:9598#9599ftst_denorm:9600tst.b SRC_EX(%a0) # is operand negative?9601bmi.b ftst_denorm_m # yes9602rts9603ftst_denorm_m:9604mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit9605rts96069607#9608# Infinity:9609#9610ftst_inf:9611tst.b SRC_EX(%a0) # is operand negative?9612bmi.b ftst_inf_m # yes9613ftst_inf_p:9614mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit9615rts9616ftst_inf_m:9617mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits9618rts96199620#9621# Zero:9622#9623ftst_zero:9624tst.b SRC_EX(%a0) # is operand negative?9625bmi.b ftst_zero_m # yes9626ftst_zero_p:9627mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit9628rts9629ftst_zero_m:9630mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits9631rts96329633#########################################################################9634# XDEF **************************************************************** #9635# fint(): emulates the fint instruction #9636# #9637# XREF **************************************************************** #9638# res_{s,q}nan_1op() - set NAN result for monadic operation #9639# #9640# INPUT *************************************************************** #9641# a0 = pointer to extended precision source operand #9642# d0 = round precision/mode #9643# #9644# OUTPUT ************************************************************** #9645# fp0 = result #9646# #9647# ALGORITHM *********************************************************** #9648# Separate according to operand type. Unnorms don't pass through #9649# here. For norms, load the rounding mode/prec, execute a "fint", then #9650# store the resulting FPSR bits. #9651# For denorms, force the j-bit to a one and do the same as for #9652# norms. Denorms are so low that the answer will either be a zero or a #9653# one. #9654# For zeroes/infs/NANs, return the same while setting the FPSR #9655# as appropriate. #9656# #9657#########################################################################96589659global fint9660fint:9661mov.b STAG(%a6),%d19662bne.b fint_not_norm # optimize on non-norm input96639664#9665# Norm:9666#9667fint_norm:9668andi.b &0x30,%d0 # set prec = ext96699670fmov.l %d0,%fpcr # set FPCR9671fmov.l &0x0,%fpsr # clear FPSR96729673fint.x SRC(%a0),%fp0 # execute fint96749675fmov.l &0x0,%fpcr # clear FPCR9676fmov.l %fpsr,%d0 # save FPSR9677or.l %d0,USER_FPSR(%a6) # set exception bits96789679rts96809681#9682# input is not normalized; what is it?9683#9684fint_not_norm:9685cmpi.b %d1,&ZERO # weed out ZERO9686beq.b fint_zero9687cmpi.b %d1,&INF # weed out INF9688beq.b fint_inf9689cmpi.b %d1,&DENORM # weed out DENORM9690beq.b fint_denorm9691cmpi.b %d1,&SNAN # weed out SNAN9692beq.l res_snan_1op9693bra.l res_qnan_1op # weed out QNAN96949695#9696# Denorm:9697#9698# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.9699# also, the INEX2 and AINEX exception bits will be set.9700# so, we could either set these manually or force the DENORM9701# to a very small NORM and ship it to the NORM routine.9702# I do the latter.9703#9704fint_denorm:9705mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp9706mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM9707lea FP_SCR0(%a6),%a09708bra.b fint_norm97099710#9711# Zero:9712#9713fint_zero:9714tst.b SRC_EX(%a0) # is ZERO negative?9715bmi.b fint_zero_m # yes9716fint_zero_p:9717fmov.s &0x00000000,%fp0 # return +ZERO in fp09718mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit9719rts9720fint_zero_m:9721fmov.s &0x80000000,%fp0 # return -ZERO in fp09722mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits9723rts97249725#9726# Infinity:9727#9728fint_inf:9729fmovm.x SRC(%a0),&0x80 # return result in fp09730tst.b SRC_EX(%a0) # is INF negative?9731bmi.b fint_inf_m # yes9732fint_inf_p:9733mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit9734rts9735fint_inf_m:9736mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits9737rts97389739#########################################################################9740# XDEF **************************************************************** #9741# fintrz(): emulates the fintrz instruction #9742# #9743# XREF **************************************************************** #9744# res_{s,q}nan_1op() - set NAN result for monadic operation #9745# #9746# INPUT *************************************************************** #9747# a0 = pointer to extended precision source operand #9748# d0 = round precision/mode #9749# #9750# OUTPUT ************************************************************** #9751# fp0 = result #9752# #9753# ALGORITHM *********************************************************** #9754# Separate according to operand type. Unnorms don't pass through #9755# here. For norms, load the rounding mode/prec, execute a "fintrz", #9756# then store the resulting FPSR bits. #9757# For denorms, force the j-bit to a one and do the same as for #9758# norms. Denorms are so low that the answer will either be a zero or a #9759# one. #9760# For zeroes/infs/NANs, return the same while setting the FPSR #9761# as appropriate. #9762# #9763#########################################################################97649765global fintrz9766fintrz:9767mov.b STAG(%a6),%d19768bne.b fintrz_not_norm # optimize on non-norm input97699770#9771# Norm:9772#9773fintrz_norm:9774fmov.l &0x0,%fpsr # clear FPSR97759776fintrz.x SRC(%a0),%fp0 # execute fintrz97779778fmov.l %fpsr,%d0 # save FPSR9779or.l %d0,USER_FPSR(%a6) # set exception bits97809781rts97829783#9784# input is not normalized; what is it?9785#9786fintrz_not_norm:9787cmpi.b %d1,&ZERO # weed out ZERO9788beq.b fintrz_zero9789cmpi.b %d1,&INF # weed out INF9790beq.b fintrz_inf9791cmpi.b %d1,&DENORM # weed out DENORM9792beq.b fintrz_denorm9793cmpi.b %d1,&SNAN # weed out SNAN9794beq.l res_snan_1op9795bra.l res_qnan_1op # weed out QNAN97969797#9798# Denorm:9799#9800# for DENORMs, the result will be (+/-)ZERO.9801# also, the INEX2 and AINEX exception bits will be set.9802# so, we could either set these manually or force the DENORM9803# to a very small NORM and ship it to the NORM routine.9804# I do the latter.9805#9806fintrz_denorm:9807mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp9808mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM9809lea FP_SCR0(%a6),%a09810bra.b fintrz_norm98119812#9813# Zero:9814#9815fintrz_zero:9816tst.b SRC_EX(%a0) # is ZERO negative?9817bmi.b fintrz_zero_m # yes9818fintrz_zero_p:9819fmov.s &0x00000000,%fp0 # return +ZERO in fp09820mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit9821rts9822fintrz_zero_m:9823fmov.s &0x80000000,%fp0 # return -ZERO in fp09824mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits9825rts98269827#9828# Infinity:9829#9830fintrz_inf:9831fmovm.x SRC(%a0),&0x80 # return result in fp09832tst.b SRC_EX(%a0) # is INF negative?9833bmi.b fintrz_inf_m # yes9834fintrz_inf_p:9835mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit9836rts9837fintrz_inf_m:9838mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits9839rts98409841#########################################################################9842# XDEF **************************************************************** #9843# fabs(): emulates the fabs instruction #9844# fsabs(): emulates the fsabs instruction #9845# fdabs(): emulates the fdabs instruction #9846# #9847# XREF **************************************************************** #9848# norm() - normalize denorm mantissa to provide EXOP #9849# scale_to_zero_src() - make exponent. = 0; get scale factor #9850# unf_res() - calculate underflow result #9851# ovf_res() - calculate overflow result #9852# res_{s,q}nan_1op() - set NAN result for monadic operation #9853# #9854# INPUT *************************************************************** #9855# a0 = pointer to extended precision source operand #9856# d0 = rnd precision/mode #9857# #9858# OUTPUT ************************************************************** #9859# fp0 = result #9860# fp1 = EXOP (if exception occurred) #9861# #9862# ALGORITHM *********************************************************** #9863# Handle NANs, infinities, and zeroes as special cases. Divide #9864# norms into extended, single, and double precision. #9865# Simply clear sign for extended precision norm. Ext prec denorm #9866# gets an EXOP created for it since it's an underflow. #9867# Double and single precision can overflow and underflow. First, #9868# scale the operand such that the exponent is zero. Perform an "fabs" #9869# using the correct rnd mode/prec. Check to see if the original #9870# exponent would take an exception. If so, use unf_res() or ovf_res() #9871# to calculate the default result. Also, create the EXOP for the #9872# exceptional case. If no exception should occur, insert the correct #9873# result exponent and return. #9874# Unnorms don't pass through here. #9875# #9876#########################################################################98779878global fsabs9879fsabs:9880andi.b &0x30,%d0 # clear rnd prec9881ori.b &s_mode*0x10,%d0 # insert sgl precision9882bra.b fabs98839884global fdabs9885fdabs:9886andi.b &0x30,%d0 # clear rnd prec9887ori.b &d_mode*0x10,%d0 # insert dbl precision98889889global fabs9890fabs:9891mov.l %d0,L_SCR3(%a6) # store rnd info9892mov.b STAG(%a6),%d19893bne.w fabs_not_norm # optimize on non-norm input98949895#9896# ABSOLUTE VALUE: norms and denorms ONLY!9897#9898fabs_norm:9899andi.b &0xc0,%d0 # is precision extended?9900bne.b fabs_not_ext # no; go handle sgl or dbl99019902#9903# precision selected is extended. so...we can not get an underflow9904# or overflow because of rounding to the correct precision. so...9905# skip the scaling and unscaling...9906#9907mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)9908mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)9909mov.w SRC_EX(%a0),%d19910bclr &15,%d1 # force absolute value9911mov.w %d1,FP_SCR0_EX(%a6) # insert exponent9912fmovm.x FP_SCR0(%a6),&0x80 # return result in fp09913rts99149915#9916# for an extended precision DENORM, the UNFL exception bit is set9917# the accrued bit is NOT set in this instance(no inexactness!)9918#9919fabs_denorm:9920andi.b &0xc0,%d0 # is precision extended?9921bne.b fabs_not_ext # no99229923bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit99249925mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)9926mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)9927mov.w SRC_EX(%a0),%d09928bclr &15,%d0 # clear sign9929mov.w %d0,FP_SCR0_EX(%a6) # insert exponent99309931fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp099329933btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?9934bne.b fabs_ext_unfl_ena9935rts99369937#9938# the input is an extended DENORM and underflow is enabled in the FPCR.9939# normalize the mantissa and add the bias of 0x6000 to the resulting negative9940# exponent and insert back into the operand.9941#9942fabs_ext_unfl_ena:9943lea FP_SCR0(%a6),%a0 # pass: ptr to operand9944bsr.l norm # normalize result9945neg.w %d0 # new exponent = -(shft val)9946addi.w &0x6000,%d0 # add new bias to exponent9947mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp9948andi.w &0x8000,%d1 # keep old sign9949andi.w &0x7fff,%d0 # clear sign position9950or.w %d1,%d0 # concat old sign, new exponent9951mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent9952fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp19953rts99549955#9956# operand is either single or double9957#9958fabs_not_ext:9959cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec9960bne.b fabs_dbl99619962#9963# operand is to be rounded to single precision9964#9965fabs_sgl:9966mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)9967mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)9968mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)9969bsr.l scale_to_zero_src # calculate scale factor99709971cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?9972bge.w fabs_sd_unfl # yes; go handle underflow9973cmpi.l %d0,&0x3fff-0x407e # will move in overflow?9974beq.w fabs_sd_may_ovfl # maybe; go check9975blt.w fabs_sd_ovfl # yes; go handle overflow99769977#9978# operand will NOT overflow or underflow when moved in to the fp reg file9979#9980fabs_sd_normal:9981fmov.l &0x0,%fpsr # clear FPSR9982fmov.l L_SCR3(%a6),%fpcr # set FPCR99839984fabs.x FP_SCR0(%a6),%fp0 # perform absolute99859986fmov.l %fpsr,%d1 # save FPSR9987fmov.l &0x0,%fpcr # clear FPCR99889989or.l %d1,USER_FPSR(%a6) # save INEX2,N99909991fabs_sd_normal_exit:9992mov.l %d2,-(%sp) # save d29993fmovm.x &0x80,FP_SCR0(%a6) # store out result9994mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp9995mov.l %d1,%d2 # make a copy9996andi.l &0x7fff,%d1 # strip sign9997sub.l %d0,%d1 # add scale factor9998andi.w &0x8000,%d2 # keep old sign9999or.w %d1,%d2 # concat old sign,new exp10000mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent10001mov.l (%sp)+,%d2 # restore d210002fmovm.x FP_SCR0(%a6),&0x80 # return result in fp010003rts1000410005#10006# operand is to be rounded to double precision10007#10008fabs_dbl:10009mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)10010mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)10011mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)10012bsr.l scale_to_zero_src # calculate scale factor1001310014cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?10015bge.b fabs_sd_unfl # yes; go handle underflow10016cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?10017beq.w fabs_sd_may_ovfl # maybe; go check10018blt.w fabs_sd_ovfl # yes; go handle overflow10019bra.w fabs_sd_normal # no; ho handle normalized op1002010021#10022# operand WILL underflow when moved in to the fp register file10023#10024fabs_sd_unfl:10025bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1002610027bclr &0x7,FP_SCR0_EX(%a6) # force absolute value1002810029# if underflow or inexact is enabled, go calculate EXOP first.10030mov.b FPCR_ENABLE(%a6),%d110031andi.b &0x0b,%d1 # is UNFL or INEX enabled?10032bne.b fabs_sd_unfl_ena # yes1003310034fabs_sd_unfl_dis:10035lea FP_SCR0(%a6),%a0 # pass: result addr10036mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode10037bsr.l unf_res # calculate default result10038or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode10039fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp010040rts1004110042#10043# operand will underflow AND underflow is enabled.10044# Therefore, we must return the result rounded to extended precision.10045#10046fabs_sd_unfl_ena:10047mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)10048mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)10049mov.w FP_SCR0_EX(%a6),%d1 # load current exponent1005010051mov.l %d2,-(%sp) # save d210052mov.l %d1,%d2 # make a copy10053andi.l &0x7fff,%d1 # strip sign10054andi.w &0x8000,%d2 # keep old sign10055sub.l %d0,%d1 # subtract scale factor10056addi.l &0x6000,%d1 # add new bias10057andi.w &0x7fff,%d110058or.w %d2,%d1 # concat new sign,new exp10059mov.w %d1,FP_SCR1_EX(%a6) # insert new exp10060fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp110061mov.l (%sp)+,%d2 # restore d210062bra.b fabs_sd_unfl_dis1006310064#10065# operand WILL overflow.10066#10067fabs_sd_ovfl:10068fmov.l &0x0,%fpsr # clear FPSR10069fmov.l L_SCR3(%a6),%fpcr # set FPCR1007010071fabs.x FP_SCR0(%a6),%fp0 # perform absolute1007210073fmov.l &0x0,%fpcr # clear FPCR10074fmov.l %fpsr,%d1 # save FPSR1007510076or.l %d1,USER_FPSR(%a6) # save INEX2,N1007710078fabs_sd_ovfl_tst:10079or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1008010081mov.b FPCR_ENABLE(%a6),%d110082andi.b &0x13,%d1 # is OVFL or INEX enabled?10083bne.b fabs_sd_ovfl_ena # yes1008410085#10086# OVFL is not enabled; therefore, we must create the default result by10087# calling ovf_res().10088#10089fabs_sd_ovfl_dis:10090btst &neg_bit,FPSR_CC(%a6) # is result negative?10091sne %d1 # set sign param accordingly10092mov.l L_SCR3(%a6),%d0 # pass: prec,mode10093bsr.l ovf_res # calculate default result10094or.b %d0,FPSR_CC(%a6) # set INF,N if applicable10095fmovm.x (%a0),&0x80 # return default result in fp010096rts1009710098#10099# OVFL is enabled.10100# the INEX2 bit has already been updated by the round to the correct precision.10101# now, round to extended(and don't alter the FPSR).10102#10103fabs_sd_ovfl_ena:10104mov.l %d2,-(%sp) # save d210105mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}10106mov.l %d1,%d2 # make a copy10107andi.l &0x7fff,%d1 # strip sign10108andi.w &0x8000,%d2 # keep old sign10109sub.l %d0,%d1 # add scale factor10110subi.l &0x6000,%d1 # subtract bias10111andi.w &0x7fff,%d110112or.w %d2,%d1 # concat sign,exp10113mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent10114fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp110115mov.l (%sp)+,%d2 # restore d210116bra.b fabs_sd_ovfl_dis1011710118#10119# the move in MAY underflow. so...10120#10121fabs_sd_may_ovfl:10122fmov.l &0x0,%fpsr # clear FPSR10123fmov.l L_SCR3(%a6),%fpcr # set FPCR1012410125fabs.x FP_SCR0(%a6),%fp0 # perform absolute1012610127fmov.l %fpsr,%d1 # save status10128fmov.l &0x0,%fpcr # clear FPCR1012910130or.l %d1,USER_FPSR(%a6) # save INEX2,N1013110132fabs.x %fp0,%fp1 # make a copy of result10133fcmp.b %fp1,&0x2 # is |result| >= 2.b?10134fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred1013510136# no, it didn't overflow; we have correct result10137bra.w fabs_sd_normal_exit1013810139##########################################################################1014010141#10142# input is not normalized; what is it?10143#10144fabs_not_norm:10145cmpi.b %d1,&DENORM # weed out DENORM10146beq.w fabs_denorm10147cmpi.b %d1,&SNAN # weed out SNAN10148beq.l res_snan_1op10149cmpi.b %d1,&QNAN # weed out QNAN10150beq.l res_qnan_1op1015110152fabs.x SRC(%a0),%fp0 # force absolute value1015310154cmpi.b %d1,&INF # weed out INF10155beq.b fabs_inf10156fabs_zero:10157mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit10158rts10159fabs_inf:10160mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit10161rts1016210163#########################################################################10164# XDEF **************************************************************** #10165# fcmp(): fp compare op routine #10166# #10167# XREF **************************************************************** #10168# res_qnan() - return QNAN result #10169# res_snan() - return SNAN result #10170# #10171# INPUT *************************************************************** #10172# a0 = pointer to extended precision source operand #10173# a1 = pointer to extended precision destination operand #10174# d0 = round prec/mode #10175# #10176# OUTPUT ************************************************************** #10177# None #10178# #10179# ALGORITHM *********************************************************** #10180# Handle NANs and denorms as special cases. For everything else, #10181# just use the actual fcmp instruction to produce the correct condition #10182# codes. #10183# #10184#########################################################################1018510186global fcmp10187fcmp:10188clr.w %d110189mov.b DTAG(%a6),%d110190lsl.b &0x3,%d110191or.b STAG(%a6),%d110192bne.b fcmp_not_norm # optimize on non-norm input1019310194#10195# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs10196#10197fcmp_norm:10198fmovm.x DST(%a1),&0x80 # load dst op1019910200fcmp.x %fp0,SRC(%a0) # do compare1020110202fmov.l %fpsr,%d0 # save FPSR10203rol.l &0x8,%d0 # extract ccode bits10204mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)1020510206rts1020710208#10209# fcmp: inputs are not both normalized; what are they?10210#10211fcmp_not_norm:10212mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d110213jmp (tbl_fcmp_op.b,%pc,%d1.w*1)1021410215swbeg &4810216tbl_fcmp_op:10217short fcmp_norm - tbl_fcmp_op # NORM - NORM10218short fcmp_norm - tbl_fcmp_op # NORM - ZERO10219short fcmp_norm - tbl_fcmp_op # NORM - INF10220short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN10221short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM10222short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN10223short tbl_fcmp_op - tbl_fcmp_op #10224short tbl_fcmp_op - tbl_fcmp_op #1022510226short fcmp_norm - tbl_fcmp_op # ZERO - NORM10227short fcmp_norm - tbl_fcmp_op # ZERO - ZERO10228short fcmp_norm - tbl_fcmp_op # ZERO - INF10229short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN10230short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM10231short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN10232short tbl_fcmp_op - tbl_fcmp_op #10233short tbl_fcmp_op - tbl_fcmp_op #1023410235short fcmp_norm - tbl_fcmp_op # INF - NORM10236short fcmp_norm - tbl_fcmp_op # INF - ZERO10237short fcmp_norm - tbl_fcmp_op # INF - INF10238short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN10239short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM10240short fcmp_res_snan - tbl_fcmp_op # INF - SNAN10241short tbl_fcmp_op - tbl_fcmp_op #10242short tbl_fcmp_op - tbl_fcmp_op #1024310244short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM10245short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO10246short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF10247short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN10248short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM10249short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN10250short tbl_fcmp_op - tbl_fcmp_op #10251short tbl_fcmp_op - tbl_fcmp_op #1025210253short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM10254short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO10255short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF10256short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN10257short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM10258short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN10259short tbl_fcmp_op - tbl_fcmp_op #10260short tbl_fcmp_op - tbl_fcmp_op #1026110262short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM10263short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO10264short fcmp_res_snan - tbl_fcmp_op # SNAN - INF10265short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN10266short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM10267short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN10268short tbl_fcmp_op - tbl_fcmp_op #10269short tbl_fcmp_op - tbl_fcmp_op #1027010271# unlike all other functions for QNAN and SNAN, fcmp does NOT set the10272# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.10273fcmp_res_qnan:10274bsr.l res_qnan10275andi.b &0xf7,FPSR_CC(%a6)10276rts10277fcmp_res_snan:10278bsr.l res_snan10279andi.b &0xf7,FPSR_CC(%a6)10280rts1028110282#10283# DENORMs are a little more difficult.10284# If you have a 2 DENORMs, then you can just force the j-bit to a one10285# and use the fcmp_norm routine.10286# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one10287# and use the fcmp_norm routine.10288# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.10289# But with a DENORM and a NORM of the same sign, the neg bit is set if the10290# (1) signs are (+) and the DENORM is the dst or10291# (2) signs are (-) and the DENORM is the src10292#1029310294fcmp_dnrm_s:10295mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)10296mov.l SRC_HI(%a0),%d010297bset &31,%d0 # DENORM src; make into small norm10298mov.l %d0,FP_SCR0_HI(%a6)10299mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)10300lea FP_SCR0(%a6),%a010301bra.w fcmp_norm1030210303fcmp_dnrm_d:10304mov.l DST_EX(%a1),FP_SCR0_EX(%a6)10305mov.l DST_HI(%a1),%d010306bset &31,%d0 # DENORM src; make into small norm10307mov.l %d0,FP_SCR0_HI(%a6)10308mov.l DST_LO(%a1),FP_SCR0_LO(%a6)10309lea FP_SCR0(%a6),%a110310bra.w fcmp_norm1031110312fcmp_dnrm_sd:10313mov.w DST_EX(%a1),FP_SCR1_EX(%a6)10314mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)10315mov.l DST_HI(%a1),%d010316bset &31,%d0 # DENORM dst; make into small norm10317mov.l %d0,FP_SCR1_HI(%a6)10318mov.l SRC_HI(%a0),%d010319bset &31,%d0 # DENORM dst; make into small norm10320mov.l %d0,FP_SCR0_HI(%a6)10321mov.l DST_LO(%a1),FP_SCR1_LO(%a6)10322mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)10323lea FP_SCR1(%a6),%a110324lea FP_SCR0(%a6),%a010325bra.w fcmp_norm1032610327fcmp_nrm_dnrm:10328mov.b SRC_EX(%a0),%d0 # determine if like signs10329mov.b DST_EX(%a1),%d110330eor.b %d0,%d110331bmi.w fcmp_dnrm_s1033210333# signs are the same, so must determine the answer ourselves.10334tst.b %d0 # is src op negative?10335bmi.b fcmp_nrm_dnrm_m # yes10336rts10337fcmp_nrm_dnrm_m:10338mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit10339rts1034010341fcmp_dnrm_nrm:10342mov.b SRC_EX(%a0),%d0 # determine if like signs10343mov.b DST_EX(%a1),%d110344eor.b %d0,%d110345bmi.w fcmp_dnrm_d1034610347# signs are the same, so must determine the answer ourselves.10348tst.b %d0 # is src op negative?10349bpl.b fcmp_dnrm_nrm_m # no10350rts10351fcmp_dnrm_nrm_m:10352mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit10353rts1035410355#########################################################################10356# XDEF **************************************************************** #10357# fsglmul(): emulates the fsglmul instruction #10358# #10359# XREF **************************************************************** #10360# scale_to_zero_src() - scale src exponent to zero #10361# scale_to_zero_dst() - scale dst exponent to zero #10362# unf_res4() - return default underflow result for sglop #10363# ovf_res() - return default overflow result #10364# res_qnan() - return QNAN result #10365# res_snan() - return SNAN result #10366# #10367# INPUT *************************************************************** #10368# a0 = pointer to extended precision source operand #10369# a1 = pointer to extended precision destination operand #10370# d0 rnd prec,mode #10371# #10372# OUTPUT ************************************************************** #10373# fp0 = result #10374# fp1 = EXOP (if exception occurred) #10375# #10376# ALGORITHM *********************************************************** #10377# Handle NANs, infinities, and zeroes as special cases. Divide #10378# norms/denorms into ext/sgl/dbl precision. #10379# For norms/denorms, scale the exponents such that a multiply #10380# instruction won't cause an exception. Use the regular fsglmul to #10381# compute a result. Check if the regular operands would have taken #10382# an exception. If so, return the default overflow/underflow result #10383# and return the EXOP if exceptions are enabled. Else, scale the #10384# result operand to the proper exponent. #10385# #10386#########################################################################1038710388global fsglmul10389fsglmul:10390mov.l %d0,L_SCR3(%a6) # store rnd info1039110392clr.w %d110393mov.b DTAG(%a6),%d110394lsl.b &0x3,%d110395or.b STAG(%a6),%d11039610397bne.w fsglmul_not_norm # optimize on non-norm input1039810399fsglmul_norm:10400mov.w DST_EX(%a1),FP_SCR1_EX(%a6)10401mov.l DST_HI(%a1),FP_SCR1_HI(%a6)10402mov.l DST_LO(%a1),FP_SCR1_LO(%a6)1040310404mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)10405mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)10406mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1040710408bsr.l scale_to_zero_src # scale exponent10409mov.l %d0,-(%sp) # save scale factor 11041010411bsr.l scale_to_zero_dst # scale dst exponent1041210413add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale21041410415cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?10416beq.w fsglmul_may_ovfl # result may rnd to overflow10417blt.w fsglmul_ovfl # result will overflow1041810419cmpi.l %d0,&0x3fff+0x0001 # would result unfl?10420beq.w fsglmul_may_unfl # result may rnd to no unfl10421bgt.w fsglmul_unfl # result will underflow1042210423fsglmul_normal:10424fmovm.x FP_SCR1(%a6),&0x80 # load dst op1042510426fmov.l L_SCR3(%a6),%fpcr # set FPCR10427fmov.l &0x0,%fpsr # clear FPSR1042810429fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1043010431fmov.l %fpsr,%d1 # save status10432fmov.l &0x0,%fpcr # clear FPCR1043310434or.l %d1,USER_FPSR(%a6) # save INEX2,N1043510436fsglmul_normal_exit:10437fmovm.x &0x80,FP_SCR0(%a6) # store out result10438mov.l %d2,-(%sp) # save d210439mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}10440mov.l %d1,%d2 # make a copy10441andi.l &0x7fff,%d1 # strip sign10442andi.w &0x8000,%d2 # keep old sign10443sub.l %d0,%d1 # add scale factor10444or.w %d2,%d1 # concat old sign,new exp10445mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent10446mov.l (%sp)+,%d2 # restore d210447fmovm.x FP_SCR0(%a6),&0x80 # return result in fp010448rts1044910450fsglmul_ovfl:10451fmovm.x FP_SCR1(%a6),&0x80 # load dst op1045210453fmov.l L_SCR3(%a6),%fpcr # set FPCR10454fmov.l &0x0,%fpsr # clear FPSR1045510456fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1045710458fmov.l %fpsr,%d1 # save status10459fmov.l &0x0,%fpcr # clear FPCR1046010461or.l %d1,USER_FPSR(%a6) # save INEX2,N1046210463fsglmul_ovfl_tst:1046410465# save setting this until now because this is where fsglmul_may_ovfl may jump in10466or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex1046710468mov.b FPCR_ENABLE(%a6),%d110469andi.b &0x13,%d1 # is OVFL or INEX enabled?10470bne.b fsglmul_ovfl_ena # yes1047110472fsglmul_ovfl_dis:10473btst &neg_bit,FPSR_CC(%a6) # is result negative?10474sne %d1 # set sign param accordingly10475mov.l L_SCR3(%a6),%d0 # pass prec:rnd10476andi.b &0x30,%d0 # force prec = ext10477bsr.l ovf_res # calculate default result10478or.b %d0,FPSR_CC(%a6) # set INF,N if applicable10479fmovm.x (%a0),&0x80 # return default result in fp010480rts1048110482fsglmul_ovfl_ena:10483fmovm.x &0x80,FP_SCR0(%a6) # move result to stack1048410485mov.l %d2,-(%sp) # save d210486mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}10487mov.l %d1,%d2 # make a copy10488andi.l &0x7fff,%d1 # strip sign10489sub.l %d0,%d1 # add scale factor10490subi.l &0x6000,%d1 # subtract bias10491andi.w &0x7fff,%d110492andi.w &0x8000,%d2 # keep old sign10493or.w %d2,%d1 # concat old sign,new exp10494mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent10495mov.l (%sp)+,%d2 # restore d210496fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp110497bra.b fsglmul_ovfl_dis1049810499fsglmul_may_ovfl:10500fmovm.x FP_SCR1(%a6),&0x80 # load dst op1050110502fmov.l L_SCR3(%a6),%fpcr # set FPCR10503fmov.l &0x0,%fpsr # clear FPSR1050410505fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1050610507fmov.l %fpsr,%d1 # save status10508fmov.l &0x0,%fpcr # clear FPCR1050910510or.l %d1,USER_FPSR(%a6) # save INEX2,N1051110512fabs.x %fp0,%fp1 # make a copy of result10513fcmp.b %fp1,&0x2 # is |result| >= 2.b?10514fbge.w fsglmul_ovfl_tst # yes; overflow has occurred1051510516# no, it didn't overflow; we have correct result10517bra.w fsglmul_normal_exit1051810519fsglmul_unfl:10520bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1052110522fmovm.x FP_SCR1(%a6),&0x80 # load dst op1052310524fmov.l &rz_mode*0x10,%fpcr # set FPCR10525fmov.l &0x0,%fpsr # clear FPSR1052610527fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1052810529fmov.l %fpsr,%d1 # save status10530fmov.l &0x0,%fpcr # clear FPCR1053110532or.l %d1,USER_FPSR(%a6) # save INEX2,N1053310534mov.b FPCR_ENABLE(%a6),%d110535andi.b &0x0b,%d1 # is UNFL or INEX enabled?10536bne.b fsglmul_unfl_ena # yes1053710538fsglmul_unfl_dis:10539fmovm.x &0x80,FP_SCR0(%a6) # store out result1054010541lea FP_SCR0(%a6),%a0 # pass: result addr10542mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode10543bsr.l unf_res4 # calculate default result10544or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set10545fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp010546rts1054710548#10549# UNFL is enabled.10550#10551fsglmul_unfl_ena:10552fmovm.x FP_SCR1(%a6),&0x40 # load dst op1055310554fmov.l L_SCR3(%a6),%fpcr # set FPCR10555fmov.l &0x0,%fpsr # clear FPSR1055610557fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply1055810559fmov.l &0x0,%fpcr # clear FPCR1056010561fmovm.x &0x40,FP_SCR0(%a6) # save result to stack10562mov.l %d2,-(%sp) # save d210563mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}10564mov.l %d1,%d2 # make a copy10565andi.l &0x7fff,%d1 # strip sign10566andi.w &0x8000,%d2 # keep old sign10567sub.l %d0,%d1 # add scale factor10568addi.l &0x6000,%d1 # add bias10569andi.w &0x7fff,%d110570or.w %d2,%d1 # concat old sign,new exp10571mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent10572mov.l (%sp)+,%d2 # restore d210573fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp110574bra.w fsglmul_unfl_dis1057510576fsglmul_may_unfl:10577fmovm.x FP_SCR1(%a6),&0x80 # load dst op1057810579fmov.l L_SCR3(%a6),%fpcr # set FPCR10580fmov.l &0x0,%fpsr # clear FPSR1058110582fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1058310584fmov.l %fpsr,%d1 # save status10585fmov.l &0x0,%fpcr # clear FPCR1058610587or.l %d1,USER_FPSR(%a6) # save INEX2,N1058810589fabs.x %fp0,%fp1 # make a copy of result10590fcmp.b %fp1,&0x2 # is |result| > 2.b?10591fbgt.w fsglmul_normal_exit # no; no underflow occurred10592fblt.w fsglmul_unfl # yes; underflow occurred1059310594#10595# we still don't know if underflow occurred. result is ~ equal to 2. but,10596# we don't know if the result was an underflow that rounded up to a 2 or10597# a normalized number that rounded down to a 2. so, redo the entire operation10598# using RZ as the rounding mode to see what the pre-rounded result is.10599# this case should be relatively rare.10600#10601fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp11060210603mov.l L_SCR3(%a6),%d110604andi.b &0xc0,%d1 # keep rnd prec10605ori.b &rz_mode*0x10,%d1 # insert RZ1060610607fmov.l %d1,%fpcr # set FPCR10608fmov.l &0x0,%fpsr # clear FPSR1060910610fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply1061110612fmov.l &0x0,%fpcr # clear FPCR10613fabs.x %fp1 # make absolute value10614fcmp.b %fp1,&0x2 # is |result| < 2.b?10615fbge.w fsglmul_normal_exit # no; no underflow occurred10616bra.w fsglmul_unfl # yes, underflow occurred1061710618##############################################################################1061910620#10621# Single Precision Multiply: inputs are not both normalized; what are they?10622#10623fsglmul_not_norm:10624mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d110625jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)1062610627swbeg &4810628tbl_fsglmul_op:10629short fsglmul_norm - tbl_fsglmul_op # NORM x NORM10630short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO10631short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF10632short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN10633short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM10634short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN10635short tbl_fsglmul_op - tbl_fsglmul_op #10636short tbl_fsglmul_op - tbl_fsglmul_op #1063710638short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM10639short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO10640short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF10641short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN10642short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM10643short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN10644short tbl_fsglmul_op - tbl_fsglmul_op #10645short tbl_fsglmul_op - tbl_fsglmul_op #1064610647short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM10648short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO10649short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF10650short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN10651short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM10652short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN10653short tbl_fsglmul_op - tbl_fsglmul_op #10654short tbl_fsglmul_op - tbl_fsglmul_op #1065510656short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM10657short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO10658short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF10659short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN10660short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM10661short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN10662short tbl_fsglmul_op - tbl_fsglmul_op #10663short tbl_fsglmul_op - tbl_fsglmul_op #1066410665short fsglmul_norm - tbl_fsglmul_op # NORM x NORM10666short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO10667short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF10668short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN10669short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM10670short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN10671short tbl_fsglmul_op - tbl_fsglmul_op #10672short tbl_fsglmul_op - tbl_fsglmul_op #1067310674short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM10675short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO10676short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF10677short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN10678short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM10679short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN10680short tbl_fsglmul_op - tbl_fsglmul_op #10681short tbl_fsglmul_op - tbl_fsglmul_op #1068210683fsglmul_res_operr:10684bra.l res_operr10685fsglmul_res_snan:10686bra.l res_snan10687fsglmul_res_qnan:10688bra.l res_qnan10689fsglmul_zero:10690bra.l fmul_zero10691fsglmul_inf_src:10692bra.l fmul_inf_src10693fsglmul_inf_dst:10694bra.l fmul_inf_dst1069510696#########################################################################10697# XDEF **************************************************************** #10698# fsgldiv(): emulates the fsgldiv instruction #10699# #10700# XREF **************************************************************** #10701# scale_to_zero_src() - scale src exponent to zero #10702# scale_to_zero_dst() - scale dst exponent to zero #10703# unf_res4() - return default underflow result for sglop #10704# ovf_res() - return default overflow result #10705# res_qnan() - return QNAN result #10706# res_snan() - return SNAN result #10707# #10708# INPUT *************************************************************** #10709# a0 = pointer to extended precision source operand #10710# a1 = pointer to extended precision destination operand #10711# d0 rnd prec,mode #10712# #10713# OUTPUT ************************************************************** #10714# fp0 = result #10715# fp1 = EXOP (if exception occurred) #10716# #10717# ALGORITHM *********************************************************** #10718# Handle NANs, infinities, and zeroes as special cases. Divide #10719# norms/denorms into ext/sgl/dbl precision. #10720# For norms/denorms, scale the exponents such that a divide #10721# instruction won't cause an exception. Use the regular fsgldiv to #10722# compute a result. Check if the regular operands would have taken #10723# an exception. If so, return the default overflow/underflow result #10724# and return the EXOP if exceptions are enabled. Else, scale the #10725# result operand to the proper exponent. #10726# #10727#########################################################################1072810729global fsgldiv10730fsgldiv:10731mov.l %d0,L_SCR3(%a6) # store rnd info1073210733clr.w %d110734mov.b DTAG(%a6),%d110735lsl.b &0x3,%d110736or.b STAG(%a6),%d1 # combine src tags1073710738bne.w fsgldiv_not_norm # optimize on non-norm input1073910740#10741# DIVIDE: NORMs and DENORMs ONLY!10742#10743fsgldiv_norm:10744mov.w DST_EX(%a1),FP_SCR1_EX(%a6)10745mov.l DST_HI(%a1),FP_SCR1_HI(%a6)10746mov.l DST_LO(%a1),FP_SCR1_LO(%a6)1074710748mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)10749mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)10750mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1075110752bsr.l scale_to_zero_src # calculate scale factor 110753mov.l %d0,-(%sp) # save scale factor 11075410755bsr.l scale_to_zero_dst # calculate scale factor 21075610757neg.l (%sp) # S.F. = scale1 - scale210758add.l %d0,(%sp)1075910760mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode10761lsr.b &0x6,%d110762mov.l (%sp)+,%d010763cmpi.l %d0,&0x3fff-0x7ffe10764ble.w fsgldiv_may_ovfl1076510766cmpi.l %d0,&0x3fff-0x0000 # will result underflow?10767beq.w fsgldiv_may_unfl # maybe10768bgt.w fsgldiv_unfl # yes; go handle underflow1076910770fsgldiv_normal:10771fmovm.x FP_SCR1(%a6),&0x80 # load dst op1077210773fmov.l L_SCR3(%a6),%fpcr # save FPCR10774fmov.l &0x0,%fpsr # clear FPSR1077510776fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide1077710778fmov.l %fpsr,%d1 # save FPSR10779fmov.l &0x0,%fpcr # clear FPCR1078010781or.l %d1,USER_FPSR(%a6) # save INEX2,N1078210783fsgldiv_normal_exit:10784fmovm.x &0x80,FP_SCR0(%a6) # store result on stack10785mov.l %d2,-(%sp) # save d210786mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}10787mov.l %d1,%d2 # make a copy10788andi.l &0x7fff,%d1 # strip sign10789andi.w &0x8000,%d2 # keep old sign10790sub.l %d0,%d1 # add scale factor10791or.w %d2,%d1 # concat old sign,new exp10792mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent10793mov.l (%sp)+,%d2 # restore d210794fmovm.x FP_SCR0(%a6),&0x80 # return result in fp010795rts1079610797fsgldiv_may_ovfl:10798fmovm.x FP_SCR1(%a6),&0x80 # load dst op1079910800fmov.l L_SCR3(%a6),%fpcr # set FPCR10801fmov.l &0x0,%fpsr # set FPSR1080210803fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide1080410805fmov.l %fpsr,%d110806fmov.l &0x0,%fpcr1080710808or.l %d1,USER_FPSR(%a6) # save INEX,N1080910810fmovm.x &0x01,-(%sp) # save result to stack10811mov.w (%sp),%d1 # fetch new exponent10812add.l &0xc,%sp # clear result10813andi.l &0x7fff,%d1 # strip sign10814sub.l %d0,%d1 # add scale factor10815cmp.l %d1,&0x7fff # did divide overflow?10816blt.b fsgldiv_normal_exit1081710818fsgldiv_ovfl_tst:10819or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex1082010821mov.b FPCR_ENABLE(%a6),%d110822andi.b &0x13,%d1 # is OVFL or INEX enabled?10823bne.b fsgldiv_ovfl_ena # yes1082410825fsgldiv_ovfl_dis:10826btst &neg_bit,FPSR_CC(%a6) # is result negative10827sne %d1 # set sign param accordingly10828mov.l L_SCR3(%a6),%d0 # pass prec:rnd10829andi.b &0x30,%d0 # kill precision10830bsr.l ovf_res # calculate default result10831or.b %d0,FPSR_CC(%a6) # set INF if applicable10832fmovm.x (%a0),&0x80 # return default result in fp010833rts1083410835fsgldiv_ovfl_ena:10836fmovm.x &0x80,FP_SCR0(%a6) # move result to stack1083710838mov.l %d2,-(%sp) # save d210839mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}10840mov.l %d1,%d2 # make a copy10841andi.l &0x7fff,%d1 # strip sign10842andi.w &0x8000,%d2 # keep old sign10843sub.l %d0,%d1 # add scale factor10844subi.l &0x6000,%d1 # subtract new bias10845andi.w &0x7fff,%d1 # clear ms bit10846or.w %d2,%d1 # concat old sign,new exp10847mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent10848mov.l (%sp)+,%d2 # restore d210849fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp110850bra.b fsgldiv_ovfl_dis1085110852fsgldiv_unfl:10853bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1085410855fmovm.x FP_SCR1(%a6),&0x80 # load dst op1085610857fmov.l &rz_mode*0x10,%fpcr # set FPCR10858fmov.l &0x0,%fpsr # clear FPSR1085910860fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide1086110862fmov.l %fpsr,%d1 # save status10863fmov.l &0x0,%fpcr # clear FPCR1086410865or.l %d1,USER_FPSR(%a6) # save INEX2,N1086610867mov.b FPCR_ENABLE(%a6),%d110868andi.b &0x0b,%d1 # is UNFL or INEX enabled?10869bne.b fsgldiv_unfl_ena # yes1087010871fsgldiv_unfl_dis:10872fmovm.x &0x80,FP_SCR0(%a6) # store out result1087310874lea FP_SCR0(%a6),%a0 # pass: result addr10875mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode10876bsr.l unf_res4 # calculate default result10877or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set10878fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp010879rts1088010881#10882# UNFL is enabled.10883#10884fsgldiv_unfl_ena:10885fmovm.x FP_SCR1(%a6),&0x40 # load dst op1088610887fmov.l L_SCR3(%a6),%fpcr # set FPCR10888fmov.l &0x0,%fpsr # clear FPSR1088910890fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide1089110892fmov.l &0x0,%fpcr # clear FPCR1089310894fmovm.x &0x40,FP_SCR0(%a6) # save result to stack10895mov.l %d2,-(%sp) # save d210896mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}10897mov.l %d1,%d2 # make a copy10898andi.l &0x7fff,%d1 # strip sign10899andi.w &0x8000,%d2 # keep old sign10900sub.l %d0,%d1 # add scale factor10901addi.l &0x6000,%d1 # add bias10902andi.w &0x7fff,%d1 # clear top bit10903or.w %d2,%d1 # concat old sign, new exp10904mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent10905mov.l (%sp)+,%d2 # restore d210906fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp110907bra.b fsgldiv_unfl_dis1090810909#10910# the divide operation MAY underflow:10911#10912fsgldiv_may_unfl:10913fmovm.x FP_SCR1(%a6),&0x80 # load dst op1091410915fmov.l L_SCR3(%a6),%fpcr # set FPCR10916fmov.l &0x0,%fpsr # clear FPSR1091710918fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide1091910920fmov.l %fpsr,%d1 # save status10921fmov.l &0x0,%fpcr # clear FPCR1092210923or.l %d1,USER_FPSR(%a6) # save INEX2,N1092410925fabs.x %fp0,%fp1 # make a copy of result10926fcmp.b %fp1,&0x1 # is |result| > 1.b?10927fbgt.w fsgldiv_normal_exit # no; no underflow occurred10928fblt.w fsgldiv_unfl # yes; underflow occurred1092910930#10931# we still don't know if underflow occurred. result is ~ equal to 1. but,10932# we don't know if the result was an underflow that rounded up to a 110933# or a normalized number that rounded down to a 1. so, redo the entire10934# operation using RZ as the rounding mode to see what the pre-rounded10935# result is. this case should be relatively rare.10936#10937fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp11093810939clr.l %d1 # clear scratch register10940ori.b &rz_mode*0x10,%d1 # force RZ rnd mode1094110942fmov.l %d1,%fpcr # set FPCR10943fmov.l &0x0,%fpsr # clear FPSR1094410945fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide1094610947fmov.l &0x0,%fpcr # clear FPCR10948fabs.x %fp1 # make absolute value10949fcmp.b %fp1,&0x1 # is |result| < 1.b?10950fbge.w fsgldiv_normal_exit # no; no underflow occurred10951bra.w fsgldiv_unfl # yes; underflow occurred1095210953############################################################################1095410955#10956# Divide: inputs are not both normalized; what are they?10957#10958fsgldiv_not_norm:10959mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d110960jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)1096110962swbeg &4810963tbl_fsgldiv_op:10964short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM10965short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO10966short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF10967short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN10968short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM10969short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN10970short tbl_fsgldiv_op - tbl_fsgldiv_op #10971short tbl_fsgldiv_op - tbl_fsgldiv_op #1097210973short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM10974short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO10975short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF10976short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN10977short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM10978short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN10979short tbl_fsgldiv_op - tbl_fsgldiv_op #10980short tbl_fsgldiv_op - tbl_fsgldiv_op #1098110982short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM10983short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO10984short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF10985short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN10986short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM10987short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN10988short tbl_fsgldiv_op - tbl_fsgldiv_op #10989short tbl_fsgldiv_op - tbl_fsgldiv_op #1099010991short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM10992short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO10993short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF10994short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN10995short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM10996short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN10997short tbl_fsgldiv_op - tbl_fsgldiv_op #10998short tbl_fsgldiv_op - tbl_fsgldiv_op #1099911000short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM11001short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO11002short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF11003short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN11004short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM11005short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN11006short tbl_fsgldiv_op - tbl_fsgldiv_op #11007short tbl_fsgldiv_op - tbl_fsgldiv_op #1100811009short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM11010short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO11011short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF11012short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN11013short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM11014short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN11015short tbl_fsgldiv_op - tbl_fsgldiv_op #11016short tbl_fsgldiv_op - tbl_fsgldiv_op #1101711018fsgldiv_res_qnan:11019bra.l res_qnan11020fsgldiv_res_snan:11021bra.l res_snan11022fsgldiv_res_operr:11023bra.l res_operr11024fsgldiv_inf_load:11025bra.l fdiv_inf_load11026fsgldiv_zero_load:11027bra.l fdiv_zero_load11028fsgldiv_inf_dst:11029bra.l fdiv_inf_dst1103011031#########################################################################11032# XDEF **************************************************************** #11033# fadd(): emulates the fadd instruction #11034# fsadd(): emulates the fadd instruction #11035# fdadd(): emulates the fdadd instruction #11036# #11037# XREF **************************************************************** #11038# addsub_scaler2() - scale the operands so they won't take exc #11039# ovf_res() - return default overflow result #11040# unf_res() - return default underflow result #11041# res_qnan() - set QNAN result #11042# res_snan() - set SNAN result #11043# res_operr() - set OPERR result #11044# scale_to_zero_src() - set src operand exponent equal to zero #11045# scale_to_zero_dst() - set dst operand exponent equal to zero #11046# #11047# INPUT *************************************************************** #11048# a0 = pointer to extended precision source operand #11049# a1 = pointer to extended precision destination operand #11050# #11051# OUTPUT ************************************************************** #11052# fp0 = result #11053# fp1 = EXOP (if exception occurred) #11054# #11055# ALGORITHM *********************************************************** #11056# Handle NANs, infinities, and zeroes as special cases. Divide #11057# norms into extended, single, and double precision. #11058# Do addition after scaling exponents such that exception won't #11059# occur. Then, check result exponent to see if exception would have #11060# occurred. If so, return default result and maybe EXOP. Else, insert #11061# the correct result exponent and return. Set FPSR bits as appropriate. #11062# #11063#########################################################################1106411065global fsadd11066fsadd:11067andi.b &0x30,%d0 # clear rnd prec11068ori.b &s_mode*0x10,%d0 # insert sgl prec11069bra.b fadd1107011071global fdadd11072fdadd:11073andi.b &0x30,%d0 # clear rnd prec11074ori.b &d_mode*0x10,%d0 # insert dbl prec1107511076global fadd11077fadd:11078mov.l %d0,L_SCR3(%a6) # store rnd info1107911080clr.w %d111081mov.b DTAG(%a6),%d111082lsl.b &0x3,%d111083or.b STAG(%a6),%d1 # combine src tags1108411085bne.w fadd_not_norm # optimize on non-norm input1108611087#11088# ADD: norms and denorms11089#11090fadd_norm:11091bsr.l addsub_scaler2 # scale exponents1109211093fadd_zero_entry:11094fmovm.x FP_SCR1(%a6),&0x80 # load dst op1109511096fmov.l &0x0,%fpsr # clear FPSR11097fmov.l L_SCR3(%a6),%fpcr # set FPCR1109811099fadd.x FP_SCR0(%a6),%fp0 # execute add1110011101fmov.l &0x0,%fpcr # clear FPCR11102fmov.l %fpsr,%d1 # fetch INEX2,N,Z1110311104or.l %d1,USER_FPSR(%a6) # save exc and ccode bits1110511106fbeq.w fadd_zero_exit # if result is zero, end now1110711108mov.l %d2,-(%sp) # save d21110911110fmovm.x &0x01,-(%sp) # save result to stack1111111112mov.w 2+L_SCR3(%a6),%d111113lsr.b &0x6,%d11111411115mov.w (%sp),%d2 # fetch new sign, exp11116andi.l &0x7fff,%d2 # strip sign11117sub.l %d0,%d2 # add scale factor1111811119cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?11120bge.b fadd_ovfl # yes1112111122cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?11123blt.w fadd_unfl # yes11124beq.w fadd_may_unfl # maybe; go find out1112511126fadd_normal:11127mov.w (%sp),%d111128andi.w &0x8000,%d1 # keep sign11129or.w %d2,%d1 # concat sign,new exp11130mov.w %d1,(%sp) # insert new exponent1113111132fmovm.x (%sp)+,&0x80 # return result in fp01113311134mov.l (%sp)+,%d2 # restore d211135rts1113611137fadd_zero_exit:11138# fmov.s &0x00000000,%fp0 # return zero in fp011139rts1114011141tbl_fadd_ovfl:11142long 0x7fff # ext ovfl11143long 0x407f # sgl ovfl11144long 0x43ff # dbl ovfl1114511146tbl_fadd_unfl:11147long 0x0000 # ext unfl11148long 0x3f81 # sgl unfl11149long 0x3c01 # dbl unfl1115011151fadd_ovfl:11152or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1115311154mov.b FPCR_ENABLE(%a6),%d111155andi.b &0x13,%d1 # is OVFL or INEX enabled?11156bne.b fadd_ovfl_ena # yes1115711158add.l &0xc,%sp11159fadd_ovfl_dis:11160btst &neg_bit,FPSR_CC(%a6) # is result negative?11161sne %d1 # set sign param accordingly11162mov.l L_SCR3(%a6),%d0 # pass prec:rnd11163bsr.l ovf_res # calculate default result11164or.b %d0,FPSR_CC(%a6) # set INF,N if applicable11165fmovm.x (%a0),&0x80 # return default result in fp011166mov.l (%sp)+,%d2 # restore d211167rts1116811169fadd_ovfl_ena:11170mov.b L_SCR3(%a6),%d111171andi.b &0xc0,%d1 # is precision extended?11172bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl1117311174fadd_ovfl_ena_cont:11175mov.w (%sp),%d111176andi.w &0x8000,%d1 # keep sign11177subi.l &0x6000,%d2 # add extra bias11178andi.w &0x7fff,%d211179or.w %d2,%d1 # concat sign,new exp11180mov.w %d1,(%sp) # insert new exponent1118111182fmovm.x (%sp)+,&0x40 # return EXOP in fp111183bra.b fadd_ovfl_dis1118411185fadd_ovfl_ena_sd:11186fmovm.x FP_SCR1(%a6),&0x80 # load dst op1118711188mov.l L_SCR3(%a6),%d111189andi.b &0x30,%d1 # keep rnd mode11190fmov.l %d1,%fpcr # set FPCR1119111192fadd.x FP_SCR0(%a6),%fp0 # execute add1119311194fmov.l &0x0,%fpcr # clear FPCR1119511196add.l &0xc,%sp11197fmovm.x &0x01,-(%sp)11198bra.b fadd_ovfl_ena_cont1119911200fadd_unfl:11201bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1120211203add.l &0xc,%sp1120411205fmovm.x FP_SCR1(%a6),&0x80 # load dst op1120611207fmov.l &rz_mode*0x10,%fpcr # set FPCR11208fmov.l &0x0,%fpsr # clear FPSR1120911210fadd.x FP_SCR0(%a6),%fp0 # execute add1121111212fmov.l &0x0,%fpcr # clear FPCR11213fmov.l %fpsr,%d1 # save status1121411215or.l %d1,USER_FPSR(%a6) # save INEX,N1121611217mov.b FPCR_ENABLE(%a6),%d111218andi.b &0x0b,%d1 # is UNFL or INEX enabled?11219bne.b fadd_unfl_ena # yes1122011221fadd_unfl_dis:11222fmovm.x &0x80,FP_SCR0(%a6) # store out result1122311224lea FP_SCR0(%a6),%a0 # pass: result addr11225mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode11226bsr.l unf_res # calculate default result11227or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set11228fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp011229mov.l (%sp)+,%d2 # restore d211230rts1123111232fadd_unfl_ena:11233fmovm.x FP_SCR1(%a6),&0x40 # load dst op1123411235mov.l L_SCR3(%a6),%d111236andi.b &0xc0,%d1 # is precision extended?11237bne.b fadd_unfl_ena_sd # no; sgl or dbl1123811239fmov.l L_SCR3(%a6),%fpcr # set FPCR1124011241fadd_unfl_ena_cont:11242fmov.l &0x0,%fpsr # clear FPSR1124311244fadd.x FP_SCR0(%a6),%fp1 # execute multiply1124511246fmov.l &0x0,%fpcr # clear FPCR1124711248fmovm.x &0x40,FP_SCR0(%a6) # save result to stack11249mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}11250mov.l %d1,%d2 # make a copy11251andi.l &0x7fff,%d1 # strip sign11252andi.w &0x8000,%d2 # keep old sign11253sub.l %d0,%d1 # add scale factor11254addi.l &0x6000,%d1 # add new bias11255andi.w &0x7fff,%d1 # clear top bit11256or.w %d2,%d1 # concat sign,new exp11257mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent11258fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp111259bra.w fadd_unfl_dis1126011261fadd_unfl_ena_sd:11262mov.l L_SCR3(%a6),%d111263andi.b &0x30,%d1 # use only rnd mode11264fmov.l %d1,%fpcr # set FPCR1126511266bra.b fadd_unfl_ena_cont1126711268#11269# result is equal to the smallest normalized number in the selected precision11270# if the precision is extended, this result could not have come from an11271# underflow that rounded up.11272#11273fadd_may_unfl:11274mov.l L_SCR3(%a6),%d111275andi.b &0xc0,%d111276beq.w fadd_normal # yes; no underflow occurred1127711278mov.l 0x4(%sp),%d1 # extract hi(man)11279cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?11280bne.w fadd_normal # no; no underflow occurred1128111282tst.l 0x8(%sp) # is lo(man) = 0x0?11283bne.w fadd_normal # no; no underflow occurred1128411285btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?11286beq.w fadd_normal # no; no underflow occurred1128711288#11289# ok, so now the result has a exponent equal to the smallest normalized11290# exponent for the selected precision. also, the mantissa is equal to11291# 0x8000000000000000 and this mantissa is the result of rounding non-zero11292# g,r,s.11293# now, we must determine whether the pre-rounded result was an underflow11294# rounded "up" or a normalized number rounded "down".11295# so, we do this be re-executing the add using RZ as the rounding mode and11296# seeing if the new result is smaller or equal to the current result.11297#11298fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp11129911300mov.l L_SCR3(%a6),%d111301andi.b &0xc0,%d1 # keep rnd prec11302ori.b &rz_mode*0x10,%d1 # insert rnd mode11303fmov.l %d1,%fpcr # set FPCR11304fmov.l &0x0,%fpsr # clear FPSR1130511306fadd.x FP_SCR0(%a6),%fp1 # execute add1130711308fmov.l &0x0,%fpcr # clear FPCR1130911310fabs.x %fp0 # compare absolute values11311fabs.x %fp111312fcmp.x %fp0,%fp1 # is first result > second?1131311314fbgt.w fadd_unfl # yes; it's an underflow11315bra.w fadd_normal # no; it's not an underflow1131611317##########################################################################1131811319#11320# Add: inputs are not both normalized; what are they?11321#11322fadd_not_norm:11323mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d111324jmp (tbl_fadd_op.b,%pc,%d1.w*1)1132511326swbeg &4811327tbl_fadd_op:11328short fadd_norm - tbl_fadd_op # NORM + NORM11329short fadd_zero_src - tbl_fadd_op # NORM + ZERO11330short fadd_inf_src - tbl_fadd_op # NORM + INF11331short fadd_res_qnan - tbl_fadd_op # NORM + QNAN11332short fadd_norm - tbl_fadd_op # NORM + DENORM11333short fadd_res_snan - tbl_fadd_op # NORM + SNAN11334short tbl_fadd_op - tbl_fadd_op #11335short tbl_fadd_op - tbl_fadd_op #1133611337short fadd_zero_dst - tbl_fadd_op # ZERO + NORM11338short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO11339short fadd_inf_src - tbl_fadd_op # ZERO + INF11340short fadd_res_qnan - tbl_fadd_op # NORM + QNAN11341short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM11342short fadd_res_snan - tbl_fadd_op # NORM + SNAN11343short tbl_fadd_op - tbl_fadd_op #11344short tbl_fadd_op - tbl_fadd_op #1134511346short fadd_inf_dst - tbl_fadd_op # INF + NORM11347short fadd_inf_dst - tbl_fadd_op # INF + ZERO11348short fadd_inf_2 - tbl_fadd_op # INF + INF11349short fadd_res_qnan - tbl_fadd_op # NORM + QNAN11350short fadd_inf_dst - tbl_fadd_op # INF + DENORM11351short fadd_res_snan - tbl_fadd_op # NORM + SNAN11352short tbl_fadd_op - tbl_fadd_op #11353short tbl_fadd_op - tbl_fadd_op #1135411355short fadd_res_qnan - tbl_fadd_op # QNAN + NORM11356short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO11357short fadd_res_qnan - tbl_fadd_op # QNAN + INF11358short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN11359short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM11360short fadd_res_snan - tbl_fadd_op # QNAN + SNAN11361short tbl_fadd_op - tbl_fadd_op #11362short tbl_fadd_op - tbl_fadd_op #1136311364short fadd_norm - tbl_fadd_op # DENORM + NORM11365short fadd_zero_src - tbl_fadd_op # DENORM + ZERO11366short fadd_inf_src - tbl_fadd_op # DENORM + INF11367short fadd_res_qnan - tbl_fadd_op # NORM + QNAN11368short fadd_norm - tbl_fadd_op # DENORM + DENORM11369short fadd_res_snan - tbl_fadd_op # NORM + SNAN11370short tbl_fadd_op - tbl_fadd_op #11371short tbl_fadd_op - tbl_fadd_op #1137211373short fadd_res_snan - tbl_fadd_op # SNAN + NORM11374short fadd_res_snan - tbl_fadd_op # SNAN + ZERO11375short fadd_res_snan - tbl_fadd_op # SNAN + INF11376short fadd_res_snan - tbl_fadd_op # SNAN + QNAN11377short fadd_res_snan - tbl_fadd_op # SNAN + DENORM11378short fadd_res_snan - tbl_fadd_op # SNAN + SNAN11379short tbl_fadd_op - tbl_fadd_op #11380short tbl_fadd_op - tbl_fadd_op #1138111382fadd_res_qnan:11383bra.l res_qnan11384fadd_res_snan:11385bra.l res_snan1138611387#11388# both operands are ZEROes11389#11390fadd_zero_2:11391mov.b SRC_EX(%a0),%d0 # are the signs opposite11392mov.b DST_EX(%a1),%d111393eor.b %d0,%d111394bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)1139511396# the signs are the same. so determine whether they are positive or negative11397# and return the appropriately signed zero.11398tst.b %d0 # are ZEROes positive or negative?11399bmi.b fadd_zero_rm # negative11400fmov.s &0x00000000,%fp0 # return +ZERO11401mov.b &z_bmask,FPSR_CC(%a6) # set Z11402rts1140311404#11405# the ZEROes have opposite signs:11406# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.11407# - -ZERO is returned in the case of RM.11408#11409fadd_zero_2_chk_rm:11410mov.b 3+L_SCR3(%a6),%d111411andi.b &0x30,%d1 # extract rnd mode11412cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?11413beq.b fadd_zero_rm # yes11414fmov.s &0x00000000,%fp0 # return +ZERO11415mov.b &z_bmask,FPSR_CC(%a6) # set Z11416rts1141711418fadd_zero_rm:11419fmov.s &0x80000000,%fp0 # return -ZERO11420mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z11421rts1142211423#11424# one operand is a ZERO and the other is a DENORM or NORM. scale11425# the DENORM or NORM and jump to the regular fadd routine.11426#11427fadd_zero_dst:11428mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)11429mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)11430mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)11431bsr.l scale_to_zero_src # scale the operand11432clr.w FP_SCR1_EX(%a6)11433clr.l FP_SCR1_HI(%a6)11434clr.l FP_SCR1_LO(%a6)11435bra.w fadd_zero_entry # go execute fadd1143611437fadd_zero_src:11438mov.w DST_EX(%a1),FP_SCR1_EX(%a6)11439mov.l DST_HI(%a1),FP_SCR1_HI(%a6)11440mov.l DST_LO(%a1),FP_SCR1_LO(%a6)11441bsr.l scale_to_zero_dst # scale the operand11442clr.w FP_SCR0_EX(%a6)11443clr.l FP_SCR0_HI(%a6)11444clr.l FP_SCR0_LO(%a6)11445bra.w fadd_zero_entry # go execute fadd1144611447#11448# both operands are INFs. an OPERR will result if the INFs have11449# different signs. else, an INF of the same sign is returned11450#11451fadd_inf_2:11452mov.b SRC_EX(%a0),%d0 # exclusive or the signs11453mov.b DST_EX(%a1),%d111454eor.b %d1,%d011455bmi.l res_operr # weed out (-INF)+(+INF)1145611457# ok, so it's not an OPERR. but, we do have to remember to return the11458# src INF since that's where the 881/882 gets the j-bit from...1145911460#11461# operands are INF and one of {ZERO, INF, DENORM, NORM}11462#11463fadd_inf_src:11464fmovm.x SRC(%a0),&0x80 # return src INF11465tst.b SRC_EX(%a0) # is INF positive?11466bpl.b fadd_inf_done # yes; we're done11467mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG11468rts1146911470#11471# operands are INF and one of {ZERO, INF, DENORM, NORM}11472#11473fadd_inf_dst:11474fmovm.x DST(%a1),&0x80 # return dst INF11475tst.b DST_EX(%a1) # is INF positive?11476bpl.b fadd_inf_done # yes; we're done11477mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG11478rts1147911480fadd_inf_done:11481mov.b &inf_bmask,FPSR_CC(%a6) # set INF11482rts1148311484#########################################################################11485# XDEF **************************************************************** #11486# fsub(): emulates the fsub instruction #11487# fssub(): emulates the fssub instruction #11488# fdsub(): emulates the fdsub instruction #11489# #11490# XREF **************************************************************** #11491# addsub_scaler2() - scale the operands so they won't take exc #11492# ovf_res() - return default overflow result #11493# unf_res() - return default underflow result #11494# res_qnan() - set QNAN result #11495# res_snan() - set SNAN result #11496# res_operr() - set OPERR result #11497# scale_to_zero_src() - set src operand exponent equal to zero #11498# scale_to_zero_dst() - set dst operand exponent equal to zero #11499# #11500# INPUT *************************************************************** #11501# a0 = pointer to extended precision source operand #11502# a1 = pointer to extended precision destination operand #11503# #11504# OUTPUT ************************************************************** #11505# fp0 = result #11506# fp1 = EXOP (if exception occurred) #11507# #11508# ALGORITHM *********************************************************** #11509# Handle NANs, infinities, and zeroes as special cases. Divide #11510# norms into extended, single, and double precision. #11511# Do subtraction after scaling exponents such that exception won't#11512# occur. Then, check result exponent to see if exception would have #11513# occurred. If so, return default result and maybe EXOP. Else, insert #11514# the correct result exponent and return. Set FPSR bits as appropriate. #11515# #11516#########################################################################1151711518global fssub11519fssub:11520andi.b &0x30,%d0 # clear rnd prec11521ori.b &s_mode*0x10,%d0 # insert sgl prec11522bra.b fsub1152311524global fdsub11525fdsub:11526andi.b &0x30,%d0 # clear rnd prec11527ori.b &d_mode*0x10,%d0 # insert dbl prec1152811529global fsub11530fsub:11531mov.l %d0,L_SCR3(%a6) # store rnd info1153211533clr.w %d111534mov.b DTAG(%a6),%d111535lsl.b &0x3,%d111536or.b STAG(%a6),%d1 # combine src tags1153711538bne.w fsub_not_norm # optimize on non-norm input1153911540#11541# SUB: norms and denorms11542#11543fsub_norm:11544bsr.l addsub_scaler2 # scale exponents1154511546fsub_zero_entry:11547fmovm.x FP_SCR1(%a6),&0x80 # load dst op1154811549fmov.l &0x0,%fpsr # clear FPSR11550fmov.l L_SCR3(%a6),%fpcr # set FPCR1155111552fsub.x FP_SCR0(%a6),%fp0 # execute subtract1155311554fmov.l &0x0,%fpcr # clear FPCR11555fmov.l %fpsr,%d1 # fetch INEX2, N, Z1155611557or.l %d1,USER_FPSR(%a6) # save exc and ccode bits1155811559fbeq.w fsub_zero_exit # if result zero, end now1156011561mov.l %d2,-(%sp) # save d21156211563fmovm.x &0x01,-(%sp) # save result to stack1156411565mov.w 2+L_SCR3(%a6),%d111566lsr.b &0x6,%d11156711568mov.w (%sp),%d2 # fetch new exponent11569andi.l &0x7fff,%d2 # strip sign11570sub.l %d0,%d2 # add scale factor1157111572cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?11573bge.b fsub_ovfl # yes1157411575cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?11576blt.w fsub_unfl # yes11577beq.w fsub_may_unfl # maybe; go find out1157811579fsub_normal:11580mov.w (%sp),%d111581andi.w &0x8000,%d1 # keep sign11582or.w %d2,%d1 # insert new exponent11583mov.w %d1,(%sp) # insert new exponent1158411585fmovm.x (%sp)+,&0x80 # return result in fp01158611587mov.l (%sp)+,%d2 # restore d211588rts1158911590fsub_zero_exit:11591# fmov.s &0x00000000,%fp0 # return zero in fp011592rts1159311594tbl_fsub_ovfl:11595long 0x7fff # ext ovfl11596long 0x407f # sgl ovfl11597long 0x43ff # dbl ovfl1159811599tbl_fsub_unfl:11600long 0x0000 # ext unfl11601long 0x3f81 # sgl unfl11602long 0x3c01 # dbl unfl1160311604fsub_ovfl:11605or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1160611607mov.b FPCR_ENABLE(%a6),%d111608andi.b &0x13,%d1 # is OVFL or INEX enabled?11609bne.b fsub_ovfl_ena # yes1161011611add.l &0xc,%sp11612fsub_ovfl_dis:11613btst &neg_bit,FPSR_CC(%a6) # is result negative?11614sne %d1 # set sign param accordingly11615mov.l L_SCR3(%a6),%d0 # pass prec:rnd11616bsr.l ovf_res # calculate default result11617or.b %d0,FPSR_CC(%a6) # set INF,N if applicable11618fmovm.x (%a0),&0x80 # return default result in fp011619mov.l (%sp)+,%d2 # restore d211620rts1162111622fsub_ovfl_ena:11623mov.b L_SCR3(%a6),%d111624andi.b &0xc0,%d1 # is precision extended?11625bne.b fsub_ovfl_ena_sd # no1162611627fsub_ovfl_ena_cont:11628mov.w (%sp),%d1 # fetch {sgn,exp}11629andi.w &0x8000,%d1 # keep sign11630subi.l &0x6000,%d2 # subtract new bias11631andi.w &0x7fff,%d2 # clear top bit11632or.w %d2,%d1 # concat sign,exp11633mov.w %d1,(%sp) # insert new exponent1163411635fmovm.x (%sp)+,&0x40 # return EXOP in fp111636bra.b fsub_ovfl_dis1163711638fsub_ovfl_ena_sd:11639fmovm.x FP_SCR1(%a6),&0x80 # load dst op1164011641mov.l L_SCR3(%a6),%d111642andi.b &0x30,%d1 # clear rnd prec11643fmov.l %d1,%fpcr # set FPCR1164411645fsub.x FP_SCR0(%a6),%fp0 # execute subtract1164611647fmov.l &0x0,%fpcr # clear FPCR1164811649add.l &0xc,%sp11650fmovm.x &0x01,-(%sp)11651bra.b fsub_ovfl_ena_cont1165211653fsub_unfl:11654bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1165511656add.l &0xc,%sp1165711658fmovm.x FP_SCR1(%a6),&0x80 # load dst op1165911660fmov.l &rz_mode*0x10,%fpcr # set FPCR11661fmov.l &0x0,%fpsr # clear FPSR1166211663fsub.x FP_SCR0(%a6),%fp0 # execute subtract1166411665fmov.l &0x0,%fpcr # clear FPCR11666fmov.l %fpsr,%d1 # save status1166711668or.l %d1,USER_FPSR(%a6)1166911670mov.b FPCR_ENABLE(%a6),%d111671andi.b &0x0b,%d1 # is UNFL or INEX enabled?11672bne.b fsub_unfl_ena # yes1167311674fsub_unfl_dis:11675fmovm.x &0x80,FP_SCR0(%a6) # store out result1167611677lea FP_SCR0(%a6),%a0 # pass: result addr11678mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode11679bsr.l unf_res # calculate default result11680or.b %d0,FPSR_CC(%a6) # 'Z' may have been set11681fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp011682mov.l (%sp)+,%d2 # restore d211683rts1168411685fsub_unfl_ena:11686fmovm.x FP_SCR1(%a6),&0x401168711688mov.l L_SCR3(%a6),%d111689andi.b &0xc0,%d1 # is precision extended?11690bne.b fsub_unfl_ena_sd # no1169111692fmov.l L_SCR3(%a6),%fpcr # set FPCR1169311694fsub_unfl_ena_cont:11695fmov.l &0x0,%fpsr # clear FPSR1169611697fsub.x FP_SCR0(%a6),%fp1 # execute subtract1169811699fmov.l &0x0,%fpcr # clear FPCR1170011701fmovm.x &0x40,FP_SCR0(%a6) # store result to stack11702mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}11703mov.l %d1,%d2 # make a copy11704andi.l &0x7fff,%d1 # strip sign11705andi.w &0x8000,%d2 # keep old sign11706sub.l %d0,%d1 # add scale factor11707addi.l &0x6000,%d1 # subtract new bias11708andi.w &0x7fff,%d1 # clear top bit11709or.w %d2,%d1 # concat sgn,exp11710mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent11711fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp111712bra.w fsub_unfl_dis1171311714fsub_unfl_ena_sd:11715mov.l L_SCR3(%a6),%d111716andi.b &0x30,%d1 # clear rnd prec11717fmov.l %d1,%fpcr # set FPCR1171811719bra.b fsub_unfl_ena_cont1172011721#11722# result is equal to the smallest normalized number in the selected precision11723# if the precision is extended, this result could not have come from an11724# underflow that rounded up.11725#11726fsub_may_unfl:11727mov.l L_SCR3(%a6),%d111728andi.b &0xc0,%d1 # fetch rnd prec11729beq.w fsub_normal # yes; no underflow occurred1173011731mov.l 0x4(%sp),%d111732cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?11733bne.w fsub_normal # no; no underflow occurred1173411735tst.l 0x8(%sp) # is lo(man) = 0x0?11736bne.w fsub_normal # no; no underflow occurred1173711738btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?11739beq.w fsub_normal # no; no underflow occurred1174011741#11742# ok, so now the result has a exponent equal to the smallest normalized11743# exponent for the selected precision. also, the mantissa is equal to11744# 0x8000000000000000 and this mantissa is the result of rounding non-zero11745# g,r,s.11746# now, we must determine whether the pre-rounded result was an underflow11747# rounded "up" or a normalized number rounded "down".11748# so, we do this be re-executing the add using RZ as the rounding mode and11749# seeing if the new result is smaller or equal to the current result.11750#11751fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp11175211753mov.l L_SCR3(%a6),%d111754andi.b &0xc0,%d1 # keep rnd prec11755ori.b &rz_mode*0x10,%d1 # insert rnd mode11756fmov.l %d1,%fpcr # set FPCR11757fmov.l &0x0,%fpsr # clear FPSR1175811759fsub.x FP_SCR0(%a6),%fp1 # execute subtract1176011761fmov.l &0x0,%fpcr # clear FPCR1176211763fabs.x %fp0 # compare absolute values11764fabs.x %fp111765fcmp.x %fp0,%fp1 # is first result > second?1176611767fbgt.w fsub_unfl # yes; it's an underflow11768bra.w fsub_normal # no; it's not an underflow1176911770##########################################################################1177111772#11773# Sub: inputs are not both normalized; what are they?11774#11775fsub_not_norm:11776mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d111777jmp (tbl_fsub_op.b,%pc,%d1.w*1)1177811779swbeg &4811780tbl_fsub_op:11781short fsub_norm - tbl_fsub_op # NORM - NORM11782short fsub_zero_src - tbl_fsub_op # NORM - ZERO11783short fsub_inf_src - tbl_fsub_op # NORM - INF11784short fsub_res_qnan - tbl_fsub_op # NORM - QNAN11785short fsub_norm - tbl_fsub_op # NORM - DENORM11786short fsub_res_snan - tbl_fsub_op # NORM - SNAN11787short tbl_fsub_op - tbl_fsub_op #11788short tbl_fsub_op - tbl_fsub_op #1178911790short fsub_zero_dst - tbl_fsub_op # ZERO - NORM11791short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO11792short fsub_inf_src - tbl_fsub_op # ZERO - INF11793short fsub_res_qnan - tbl_fsub_op # NORM - QNAN11794short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM11795short fsub_res_snan - tbl_fsub_op # NORM - SNAN11796short tbl_fsub_op - tbl_fsub_op #11797short tbl_fsub_op - tbl_fsub_op #1179811799short fsub_inf_dst - tbl_fsub_op # INF - NORM11800short fsub_inf_dst - tbl_fsub_op # INF - ZERO11801short fsub_inf_2 - tbl_fsub_op # INF - INF11802short fsub_res_qnan - tbl_fsub_op # NORM - QNAN11803short fsub_inf_dst - tbl_fsub_op # INF - DENORM11804short fsub_res_snan - tbl_fsub_op # NORM - SNAN11805short tbl_fsub_op - tbl_fsub_op #11806short tbl_fsub_op - tbl_fsub_op #1180711808short fsub_res_qnan - tbl_fsub_op # QNAN - NORM11809short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO11810short fsub_res_qnan - tbl_fsub_op # QNAN - INF11811short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN11812short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM11813short fsub_res_snan - tbl_fsub_op # QNAN - SNAN11814short tbl_fsub_op - tbl_fsub_op #11815short tbl_fsub_op - tbl_fsub_op #1181611817short fsub_norm - tbl_fsub_op # DENORM - NORM11818short fsub_zero_src - tbl_fsub_op # DENORM - ZERO11819short fsub_inf_src - tbl_fsub_op # DENORM - INF11820short fsub_res_qnan - tbl_fsub_op # NORM - QNAN11821short fsub_norm - tbl_fsub_op # DENORM - DENORM11822short fsub_res_snan - tbl_fsub_op # NORM - SNAN11823short tbl_fsub_op - tbl_fsub_op #11824short tbl_fsub_op - tbl_fsub_op #1182511826short fsub_res_snan - tbl_fsub_op # SNAN - NORM11827short fsub_res_snan - tbl_fsub_op # SNAN - ZERO11828short fsub_res_snan - tbl_fsub_op # SNAN - INF11829short fsub_res_snan - tbl_fsub_op # SNAN - QNAN11830short fsub_res_snan - tbl_fsub_op # SNAN - DENORM11831short fsub_res_snan - tbl_fsub_op # SNAN - SNAN11832short tbl_fsub_op - tbl_fsub_op #11833short tbl_fsub_op - tbl_fsub_op #1183411835fsub_res_qnan:11836bra.l res_qnan11837fsub_res_snan:11838bra.l res_snan1183911840#11841# both operands are ZEROes11842#11843fsub_zero_2:11844mov.b SRC_EX(%a0),%d011845mov.b DST_EX(%a1),%d111846eor.b %d1,%d011847bpl.b fsub_zero_2_chk_rm1184811849# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO11850tst.b %d0 # is dst negative?11851bmi.b fsub_zero_2_rm # yes11852fmov.s &0x00000000,%fp0 # no; return +ZERO11853mov.b &z_bmask,FPSR_CC(%a6) # set Z11854rts1185511856#11857# the ZEROes have the same signs:11858# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP11859# - -ZERO is returned in the case of RM.11860#11861fsub_zero_2_chk_rm:11862mov.b 3+L_SCR3(%a6),%d111863andi.b &0x30,%d1 # extract rnd mode11864cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?11865beq.b fsub_zero_2_rm # yes11866fmov.s &0x00000000,%fp0 # no; return +ZERO11867mov.b &z_bmask,FPSR_CC(%a6) # set Z11868rts1186911870fsub_zero_2_rm:11871fmov.s &0x80000000,%fp0 # return -ZERO11872mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG11873rts1187411875#11876# one operand is a ZERO and the other is a DENORM or a NORM.11877# scale the DENORM or NORM and jump to the regular fsub routine.11878#11879fsub_zero_dst:11880mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)11881mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)11882mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)11883bsr.l scale_to_zero_src # scale the operand11884clr.w FP_SCR1_EX(%a6)11885clr.l FP_SCR1_HI(%a6)11886clr.l FP_SCR1_LO(%a6)11887bra.w fsub_zero_entry # go execute fsub1188811889fsub_zero_src:11890mov.w DST_EX(%a1),FP_SCR1_EX(%a6)11891mov.l DST_HI(%a1),FP_SCR1_HI(%a6)11892mov.l DST_LO(%a1),FP_SCR1_LO(%a6)11893bsr.l scale_to_zero_dst # scale the operand11894clr.w FP_SCR0_EX(%a6)11895clr.l FP_SCR0_HI(%a6)11896clr.l FP_SCR0_LO(%a6)11897bra.w fsub_zero_entry # go execute fsub1189811899#11900# both operands are INFs. an OPERR will result if the INFs have the11901# same signs. else,11902#11903fsub_inf_2:11904mov.b SRC_EX(%a0),%d0 # exclusive or the signs11905mov.b DST_EX(%a1),%d111906eor.b %d1,%d011907bpl.l res_operr # weed out (-INF)+(+INF)1190811909# ok, so it's not an OPERR. but we do have to remember to return11910# the src INF since that's where the 881/882 gets the j-bit.1191111912fsub_inf_src:11913fmovm.x SRC(%a0),&0x80 # return src INF11914fneg.x %fp0 # invert sign11915fbge.w fsub_inf_done # sign is now positive11916mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG11917rts1191811919fsub_inf_dst:11920fmovm.x DST(%a1),&0x80 # return dst INF11921tst.b DST_EX(%a1) # is INF negative?11922bpl.b fsub_inf_done # no11923mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG11924rts1192511926fsub_inf_done:11927mov.b &inf_bmask,FPSR_CC(%a6) # set INF11928rts1192911930#########################################################################11931# XDEF **************************************************************** #11932# fsqrt(): emulates the fsqrt instruction #11933# fssqrt(): emulates the fssqrt instruction #11934# fdsqrt(): emulates the fdsqrt instruction #11935# #11936# XREF **************************************************************** #11937# scale_sqrt() - scale the source operand #11938# unf_res() - return default underflow result #11939# ovf_res() - return default overflow result #11940# res_qnan_1op() - return QNAN result #11941# res_snan_1op() - return SNAN result #11942# #11943# INPUT *************************************************************** #11944# a0 = pointer to extended precision source operand #11945# d0 rnd prec,mode #11946# #11947# OUTPUT ************************************************************** #11948# fp0 = result #11949# fp1 = EXOP (if exception occurred) #11950# #11951# ALGORITHM *********************************************************** #11952# Handle NANs, infinities, and zeroes as special cases. Divide #11953# norms/denorms into ext/sgl/dbl precision. #11954# For norms/denorms, scale the exponents such that a sqrt #11955# instruction won't cause an exception. Use the regular fsqrt to #11956# compute a result. Check if the regular operands would have taken #11957# an exception. If so, return the default overflow/underflow result #11958# and return the EXOP if exceptions are enabled. Else, scale the #11959# result operand to the proper exponent. #11960# #11961#########################################################################1196211963global fssqrt11964fssqrt:11965andi.b &0x30,%d0 # clear rnd prec11966ori.b &s_mode*0x10,%d0 # insert sgl precision11967bra.b fsqrt1196811969global fdsqrt11970fdsqrt:11971andi.b &0x30,%d0 # clear rnd prec11972ori.b &d_mode*0x10,%d0 # insert dbl precision1197311974global fsqrt11975fsqrt:11976mov.l %d0,L_SCR3(%a6) # store rnd info11977clr.w %d111978mov.b STAG(%a6),%d111979bne.w fsqrt_not_norm # optimize on non-norm input1198011981#11982# SQUARE ROOT: norms and denorms ONLY!11983#11984fsqrt_norm:11985tst.b SRC_EX(%a0) # is operand negative?11986bmi.l res_operr # yes1198711988andi.b &0xc0,%d0 # is precision extended?11989bne.b fsqrt_not_ext # no; go handle sgl or dbl1199011991fmov.l L_SCR3(%a6),%fpcr # set FPCR11992fmov.l &0x0,%fpsr # clear FPSR1199311994fsqrt.x (%a0),%fp0 # execute square root1199511996fmov.l %fpsr,%d111997or.l %d1,USER_FPSR(%a6) # set N,INEX1199811999rts1200012001fsqrt_denorm:12002tst.b SRC_EX(%a0) # is operand negative?12003bmi.l res_operr # yes1200412005andi.b &0xc0,%d0 # is precision extended?12006bne.b fsqrt_not_ext # no; go handle sgl or dbl1200712008mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)12009mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12010mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1201112012bsr.l scale_sqrt # calculate scale factor1201312014bra.w fsqrt_sd_normal1201512016#12017# operand is either single or double12018#12019fsqrt_not_ext:12020cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec12021bne.w fsqrt_dbl1202212023#12024# operand is to be rounded to single precision12025#12026fsqrt_sgl:12027mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)12028mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12029mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1203012031bsr.l scale_sqrt # calculate scale factor1203212033cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?12034beq.w fsqrt_sd_may_unfl12035bgt.w fsqrt_sd_unfl # yes; go handle underflow12036cmpi.l %d0,&0x3fff-0x407f # will move in overflow?12037beq.w fsqrt_sd_may_ovfl # maybe; go check12038blt.w fsqrt_sd_ovfl # yes; go handle overflow1203912040#12041# operand will NOT overflow or underflow when moved in to the fp reg file12042#12043fsqrt_sd_normal:12044fmov.l &0x0,%fpsr # clear FPSR12045fmov.l L_SCR3(%a6),%fpcr # set FPCR1204612047fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute1204812049fmov.l %fpsr,%d1 # save FPSR12050fmov.l &0x0,%fpcr # clear FPCR1205112052or.l %d1,USER_FPSR(%a6) # save INEX2,N1205312054fsqrt_sd_normal_exit:12055mov.l %d2,-(%sp) # save d212056fmovm.x &0x80,FP_SCR0(%a6) # store out result12057mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp12058mov.l %d1,%d2 # make a copy12059andi.l &0x7fff,%d1 # strip sign12060sub.l %d0,%d1 # add scale factor12061andi.w &0x8000,%d2 # keep old sign12062or.w %d1,%d2 # concat old sign,new exp12063mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent12064mov.l (%sp)+,%d2 # restore d212065fmovm.x FP_SCR0(%a6),&0x80 # return result in fp012066rts1206712068#12069# operand is to be rounded to double precision12070#12071fsqrt_dbl:12072mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)12073mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12074mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1207512076bsr.l scale_sqrt # calculate scale factor1207712078cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?12079beq.w fsqrt_sd_may_unfl12080bgt.b fsqrt_sd_unfl # yes; go handle underflow12081cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?12082beq.w fsqrt_sd_may_ovfl # maybe; go check12083blt.w fsqrt_sd_ovfl # yes; go handle overflow12084bra.w fsqrt_sd_normal # no; ho handle normalized op1208512086# we're on the line here and the distinguising characteristic is whether12087# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number12088# elsewise fall through to underflow.12089fsqrt_sd_may_unfl:12090btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?12091bne.w fsqrt_sd_normal # yes, so no underflow1209212093#12094# operand WILL underflow when moved in to the fp register file12095#12096fsqrt_sd_unfl:12097bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1209812099fmov.l &rz_mode*0x10,%fpcr # set FPCR12100fmov.l &0x0,%fpsr # clear FPSR1210112102fsqrt.x FP_SCR0(%a6),%fp0 # execute square root1210312104fmov.l %fpsr,%d1 # save status12105fmov.l &0x0,%fpcr # clear FPCR1210612107or.l %d1,USER_FPSR(%a6) # save INEX2,N1210812109# if underflow or inexact is enabled, go calculate EXOP first.12110mov.b FPCR_ENABLE(%a6),%d112111andi.b &0x0b,%d1 # is UNFL or INEX enabled?12112bne.b fsqrt_sd_unfl_ena # yes1211312114fsqrt_sd_unfl_dis:12115fmovm.x &0x80,FP_SCR0(%a6) # store out result1211612117lea FP_SCR0(%a6),%a0 # pass: result addr12118mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode12119bsr.l unf_res # calculate default result12120or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode12121fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp012122rts1212312124#12125# operand will underflow AND underflow is enabled.12126# Therefore, we must return the result rounded to extended precision.12127#12128fsqrt_sd_unfl_ena:12129mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)12130mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)12131mov.w FP_SCR0_EX(%a6),%d1 # load current exponent1213212133mov.l %d2,-(%sp) # save d212134mov.l %d1,%d2 # make a copy12135andi.l &0x7fff,%d1 # strip sign12136andi.w &0x8000,%d2 # keep old sign12137sub.l %d0,%d1 # subtract scale factor12138addi.l &0x6000,%d1 # add new bias12139andi.w &0x7fff,%d112140or.w %d2,%d1 # concat new sign,new exp12141mov.w %d1,FP_SCR1_EX(%a6) # insert new exp12142fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp112143mov.l (%sp)+,%d2 # restore d212144bra.b fsqrt_sd_unfl_dis1214512146#12147# operand WILL overflow.12148#12149fsqrt_sd_ovfl:12150fmov.l &0x0,%fpsr # clear FPSR12151fmov.l L_SCR3(%a6),%fpcr # set FPCR1215212153fsqrt.x FP_SCR0(%a6),%fp0 # perform square root1215412155fmov.l &0x0,%fpcr # clear FPCR12156fmov.l %fpsr,%d1 # save FPSR1215712158or.l %d1,USER_FPSR(%a6) # save INEX2,N1215912160fsqrt_sd_ovfl_tst:12161or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1216212163mov.b FPCR_ENABLE(%a6),%d112164andi.b &0x13,%d1 # is OVFL or INEX enabled?12165bne.b fsqrt_sd_ovfl_ena # yes1216612167#12168# OVFL is not enabled; therefore, we must create the default result by12169# calling ovf_res().12170#12171fsqrt_sd_ovfl_dis:12172btst &neg_bit,FPSR_CC(%a6) # is result negative?12173sne %d1 # set sign param accordingly12174mov.l L_SCR3(%a6),%d0 # pass: prec,mode12175bsr.l ovf_res # calculate default result12176or.b %d0,FPSR_CC(%a6) # set INF,N if applicable12177fmovm.x (%a0),&0x80 # return default result in fp012178rts1217912180#12181# OVFL is enabled.12182# the INEX2 bit has already been updated by the round to the correct precision.12183# now, round to extended(and don't alter the FPSR).12184#12185fsqrt_sd_ovfl_ena:12186mov.l %d2,-(%sp) # save d212187mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}12188mov.l %d1,%d2 # make a copy12189andi.l &0x7fff,%d1 # strip sign12190andi.w &0x8000,%d2 # keep old sign12191sub.l %d0,%d1 # add scale factor12192subi.l &0x6000,%d1 # subtract bias12193andi.w &0x7fff,%d112194or.w %d2,%d1 # concat sign,exp12195mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent12196fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp112197mov.l (%sp)+,%d2 # restore d212198bra.b fsqrt_sd_ovfl_dis1219912200#12201# the move in MAY underflow. so...12202#12203fsqrt_sd_may_ovfl:12204btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?12205bne.w fsqrt_sd_ovfl # yes, so overflow1220612207fmov.l &0x0,%fpsr # clear FPSR12208fmov.l L_SCR3(%a6),%fpcr # set FPCR1220912210fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute1221112212fmov.l %fpsr,%d1 # save status12213fmov.l &0x0,%fpcr # clear FPCR1221412215or.l %d1,USER_FPSR(%a6) # save INEX2,N1221612217fmov.x %fp0,%fp1 # make a copy of result12218fcmp.b %fp1,&0x1 # is |result| >= 1.b?12219fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred1222012221# no, it didn't overflow; we have correct result12222bra.w fsqrt_sd_normal_exit1222312224##########################################################################1222512226#12227# input is not normalized; what is it?12228#12229fsqrt_not_norm:12230cmpi.b %d1,&DENORM # weed out DENORM12231beq.w fsqrt_denorm12232cmpi.b %d1,&ZERO # weed out ZERO12233beq.b fsqrt_zero12234cmpi.b %d1,&INF # weed out INF12235beq.b fsqrt_inf12236cmpi.b %d1,&SNAN # weed out SNAN12237beq.l res_snan_1op12238bra.l res_qnan_1op1223912240#12241# fsqrt(+0) = +012242# fsqrt(-0) = -012243# fsqrt(+INF) = +INF12244# fsqrt(-INF) = OPERR12245#12246fsqrt_zero:12247tst.b SRC_EX(%a0) # is ZERO positive or negative?12248bmi.b fsqrt_zero_m # negative12249fsqrt_zero_p:12250fmov.s &0x00000000,%fp0 # return +ZERO12251mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit12252rts12253fsqrt_zero_m:12254fmov.s &0x80000000,%fp0 # return -ZERO12255mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits12256rts1225712258fsqrt_inf:12259tst.b SRC_EX(%a0) # is INF positive or negative?12260bmi.l res_operr # negative12261fsqrt_inf_p:12262fmovm.x SRC(%a0),&0x80 # return +INF in fp012263mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit12264rts1226512266#########################################################################12267# XDEF **************************************************************** #12268# fetch_dreg(): fetch register according to index in d1 #12269# #12270# XREF **************************************************************** #12271# None #12272# #12273# INPUT *************************************************************** #12274# d1 = index of register to fetch from #12275# #12276# OUTPUT ************************************************************** #12277# d0 = value of register fetched #12278# #12279# ALGORITHM *********************************************************** #12280# According to the index value in d1 which can range from zero #12281# to fifteen, load the corresponding register file value (where #12282# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #12283# stack. The rest should still be in their original places. #12284# #12285#########################################################################1228612287# this routine leaves d1 intact for subsequent store_dreg calls.12288global fetch_dreg12289fetch_dreg:12290mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d012291jmp (tbl_fdreg.b,%pc,%d0.w*1)1229212293tbl_fdreg:12294short fdreg0 - tbl_fdreg12295short fdreg1 - tbl_fdreg12296short fdreg2 - tbl_fdreg12297short fdreg3 - tbl_fdreg12298short fdreg4 - tbl_fdreg12299short fdreg5 - tbl_fdreg12300short fdreg6 - tbl_fdreg12301short fdreg7 - tbl_fdreg12302short fdreg8 - tbl_fdreg12303short fdreg9 - tbl_fdreg12304short fdrega - tbl_fdreg12305short fdregb - tbl_fdreg12306short fdregc - tbl_fdreg12307short fdregd - tbl_fdreg12308short fdrege - tbl_fdreg12309short fdregf - tbl_fdreg1231012311fdreg0:12312mov.l EXC_DREGS+0x0(%a6),%d012313rts12314fdreg1:12315mov.l EXC_DREGS+0x4(%a6),%d012316rts12317fdreg2:12318mov.l %d2,%d012319rts12320fdreg3:12321mov.l %d3,%d012322rts12323fdreg4:12324mov.l %d4,%d012325rts12326fdreg5:12327mov.l %d5,%d012328rts12329fdreg6:12330mov.l %d6,%d012331rts12332fdreg7:12333mov.l %d7,%d012334rts12335fdreg8:12336mov.l EXC_DREGS+0x8(%a6),%d012337rts12338fdreg9:12339mov.l EXC_DREGS+0xc(%a6),%d012340rts12341fdrega:12342mov.l %a2,%d012343rts12344fdregb:12345mov.l %a3,%d012346rts12347fdregc:12348mov.l %a4,%d012349rts12350fdregd:12351mov.l %a5,%d012352rts12353fdrege:12354mov.l (%a6),%d012355rts12356fdregf:12357mov.l EXC_A7(%a6),%d012358rts1235912360#########################################################################12361# XDEF **************************************************************** #12362# store_dreg_l(): store longword to data register specified by d1 #12363# #12364# XREF **************************************************************** #12365# None #12366# #12367# INPUT *************************************************************** #12368# d0 = longowrd value to store #12369# d1 = index of register to fetch from #12370# #12371# OUTPUT ************************************************************** #12372# (data register is updated) #12373# #12374# ALGORITHM *********************************************************** #12375# According to the index value in d1, store the longword value #12376# in d0 to the corresponding data register. D0/D1 are on the stack #12377# while the rest are in their initial places. #12378# #12379#########################################################################1238012381global store_dreg_l12382store_dreg_l:12383mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d112384jmp (tbl_sdregl.b,%pc,%d1.w*1)1238512386tbl_sdregl:12387short sdregl0 - tbl_sdregl12388short sdregl1 - tbl_sdregl12389short sdregl2 - tbl_sdregl12390short sdregl3 - tbl_sdregl12391short sdregl4 - tbl_sdregl12392short sdregl5 - tbl_sdregl12393short sdregl6 - tbl_sdregl12394short sdregl7 - tbl_sdregl1239512396sdregl0:12397mov.l %d0,EXC_DREGS+0x0(%a6)12398rts12399sdregl1:12400mov.l %d0,EXC_DREGS+0x4(%a6)12401rts12402sdregl2:12403mov.l %d0,%d212404rts12405sdregl3:12406mov.l %d0,%d312407rts12408sdregl4:12409mov.l %d0,%d412410rts12411sdregl5:12412mov.l %d0,%d512413rts12414sdregl6:12415mov.l %d0,%d612416rts12417sdregl7:12418mov.l %d0,%d712419rts1242012421#########################################################################12422# XDEF **************************************************************** #12423# store_dreg_w(): store word to data register specified by d1 #12424# #12425# XREF **************************************************************** #12426# None #12427# #12428# INPUT *************************************************************** #12429# d0 = word value to store #12430# d1 = index of register to fetch from #12431# #12432# OUTPUT ************************************************************** #12433# (data register is updated) #12434# #12435# ALGORITHM *********************************************************** #12436# According to the index value in d1, store the word value #12437# in d0 to the corresponding data register. D0/D1 are on the stack #12438# while the rest are in their initial places. #12439# #12440#########################################################################1244112442global store_dreg_w12443store_dreg_w:12444mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d112445jmp (tbl_sdregw.b,%pc,%d1.w*1)1244612447tbl_sdregw:12448short sdregw0 - tbl_sdregw12449short sdregw1 - tbl_sdregw12450short sdregw2 - tbl_sdregw12451short sdregw3 - tbl_sdregw12452short sdregw4 - tbl_sdregw12453short sdregw5 - tbl_sdregw12454short sdregw6 - tbl_sdregw12455short sdregw7 - tbl_sdregw1245612457sdregw0:12458mov.w %d0,2+EXC_DREGS+0x0(%a6)12459rts12460sdregw1:12461mov.w %d0,2+EXC_DREGS+0x4(%a6)12462rts12463sdregw2:12464mov.w %d0,%d212465rts12466sdregw3:12467mov.w %d0,%d312468rts12469sdregw4:12470mov.w %d0,%d412471rts12472sdregw5:12473mov.w %d0,%d512474rts12475sdregw6:12476mov.w %d0,%d612477rts12478sdregw7:12479mov.w %d0,%d712480rts1248112482#########################################################################12483# XDEF **************************************************************** #12484# store_dreg_b(): store byte to data register specified by d1 #12485# #12486# XREF **************************************************************** #12487# None #12488# #12489# INPUT *************************************************************** #12490# d0 = byte value to store #12491# d1 = index of register to fetch from #12492# #12493# OUTPUT ************************************************************** #12494# (data register is updated) #12495# #12496# ALGORITHM *********************************************************** #12497# According to the index value in d1, store the byte value #12498# in d0 to the corresponding data register. D0/D1 are on the stack #12499# while the rest are in their initial places. #12500# #12501#########################################################################1250212503global store_dreg_b12504store_dreg_b:12505mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d112506jmp (tbl_sdregb.b,%pc,%d1.w*1)1250712508tbl_sdregb:12509short sdregb0 - tbl_sdregb12510short sdregb1 - tbl_sdregb12511short sdregb2 - tbl_sdregb12512short sdregb3 - tbl_sdregb12513short sdregb4 - tbl_sdregb12514short sdregb5 - tbl_sdregb12515short sdregb6 - tbl_sdregb12516short sdregb7 - tbl_sdregb1251712518sdregb0:12519mov.b %d0,3+EXC_DREGS+0x0(%a6)12520rts12521sdregb1:12522mov.b %d0,3+EXC_DREGS+0x4(%a6)12523rts12524sdregb2:12525mov.b %d0,%d212526rts12527sdregb3:12528mov.b %d0,%d312529rts12530sdregb4:12531mov.b %d0,%d412532rts12533sdregb5:12534mov.b %d0,%d512535rts12536sdregb6:12537mov.b %d0,%d612538rts12539sdregb7:12540mov.b %d0,%d712541rts1254212543#########################################################################12544# XDEF **************************************************************** #12545# inc_areg(): increment an address register by the value in d0 #12546# #12547# XREF **************************************************************** #12548# None #12549# #12550# INPUT *************************************************************** #12551# d0 = amount to increment by #12552# d1 = index of address register to increment #12553# #12554# OUTPUT ************************************************************** #12555# (address register is updated) #12556# #12557# ALGORITHM *********************************************************** #12558# Typically used for an instruction w/ a post-increment <ea>, #12559# this routine adds the increment value in d0 to the address register #12560# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #12561# in their original places. #12562# For a7, if the increment amount is one, then we have to #12563# increment by two. For any a7 update, set the mia7_flag so that if #12564# an access error exception occurs later in emulation, this address #12565# register update can be undone. #12566# #12567#########################################################################1256812569global inc_areg12570inc_areg:12571mov.w (tbl_iareg.b,%pc,%d1.w*2),%d112572jmp (tbl_iareg.b,%pc,%d1.w*1)1257312574tbl_iareg:12575short iareg0 - tbl_iareg12576short iareg1 - tbl_iareg12577short iareg2 - tbl_iareg12578short iareg3 - tbl_iareg12579short iareg4 - tbl_iareg12580short iareg5 - tbl_iareg12581short iareg6 - tbl_iareg12582short iareg7 - tbl_iareg1258312584iareg0: add.l %d0,EXC_DREGS+0x8(%a6)12585rts12586iareg1: add.l %d0,EXC_DREGS+0xc(%a6)12587rts12588iareg2: add.l %d0,%a212589rts12590iareg3: add.l %d0,%a312591rts12592iareg4: add.l %d0,%a412593rts12594iareg5: add.l %d0,%a512595rts12596iareg6: add.l %d0,(%a6)12597rts12598iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)12599cmpi.b %d0,&0x112600beq.b iareg7b12601add.l %d0,EXC_A7(%a6)12602rts12603iareg7b:12604addq.l &0x2,EXC_A7(%a6)12605rts1260612607#########################################################################12608# XDEF **************************************************************** #12609# dec_areg(): decrement an address register by the value in d0 #12610# #12611# XREF **************************************************************** #12612# None #12613# #12614# INPUT *************************************************************** #12615# d0 = amount to decrement by #12616# d1 = index of address register to decrement #12617# #12618# OUTPUT ************************************************************** #12619# (address register is updated) #12620# #12621# ALGORITHM *********************************************************** #12622# Typically used for an instruction w/ a pre-decrement <ea>, #12623# this routine adds the decrement value in d0 to the address register #12624# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #12625# in their original places. #12626# For a7, if the decrement amount is one, then we have to #12627# decrement by two. For any a7 update, set the mda7_flag so that if #12628# an access error exception occurs later in emulation, this address #12629# register update can be undone. #12630# #12631#########################################################################1263212633global dec_areg12634dec_areg:12635mov.w (tbl_dareg.b,%pc,%d1.w*2),%d112636jmp (tbl_dareg.b,%pc,%d1.w*1)1263712638tbl_dareg:12639short dareg0 - tbl_dareg12640short dareg1 - tbl_dareg12641short dareg2 - tbl_dareg12642short dareg3 - tbl_dareg12643short dareg4 - tbl_dareg12644short dareg5 - tbl_dareg12645short dareg6 - tbl_dareg12646short dareg7 - tbl_dareg1264712648dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)12649rts12650dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)12651rts12652dareg2: sub.l %d0,%a212653rts12654dareg3: sub.l %d0,%a312655rts12656dareg4: sub.l %d0,%a412657rts12658dareg5: sub.l %d0,%a512659rts12660dareg6: sub.l %d0,(%a6)12661rts12662dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)12663cmpi.b %d0,&0x112664beq.b dareg7b12665sub.l %d0,EXC_A7(%a6)12666rts12667dareg7b:12668subq.l &0x2,EXC_A7(%a6)12669rts1267012671##############################################################################1267212673#########################################################################12674# XDEF **************************************************************** #12675# load_fpn1(): load FP register value into FP_SRC(a6). #12676# #12677# XREF **************************************************************** #12678# None #12679# #12680# INPUT *************************************************************** #12681# d0 = index of FP register to load #12682# #12683# OUTPUT ************************************************************** #12684# FP_SRC(a6) = value loaded from FP register file #12685# #12686# ALGORITHM *********************************************************** #12687# Using the index in d0, load FP_SRC(a6) with a number from the #12688# FP register file. #12689# #12690#########################################################################1269112692global load_fpn112693load_fpn1:12694mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d012695jmp (tbl_load_fpn1.b,%pc,%d0.w*1)1269612697tbl_load_fpn1:12698short load_fpn1_0 - tbl_load_fpn112699short load_fpn1_1 - tbl_load_fpn112700short load_fpn1_2 - tbl_load_fpn112701short load_fpn1_3 - tbl_load_fpn112702short load_fpn1_4 - tbl_load_fpn112703short load_fpn1_5 - tbl_load_fpn112704short load_fpn1_6 - tbl_load_fpn112705short load_fpn1_7 - tbl_load_fpn11270612707load_fpn1_0:12708mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)12709mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)12710mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)12711lea FP_SRC(%a6), %a012712rts12713load_fpn1_1:12714mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)12715mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)12716mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)12717lea FP_SRC(%a6), %a012718rts12719load_fpn1_2:12720fmovm.x &0x20, FP_SRC(%a6)12721lea FP_SRC(%a6), %a012722rts12723load_fpn1_3:12724fmovm.x &0x10, FP_SRC(%a6)12725lea FP_SRC(%a6), %a012726rts12727load_fpn1_4:12728fmovm.x &0x08, FP_SRC(%a6)12729lea FP_SRC(%a6), %a012730rts12731load_fpn1_5:12732fmovm.x &0x04, FP_SRC(%a6)12733lea FP_SRC(%a6), %a012734rts12735load_fpn1_6:12736fmovm.x &0x02, FP_SRC(%a6)12737lea FP_SRC(%a6), %a012738rts12739load_fpn1_7:12740fmovm.x &0x01, FP_SRC(%a6)12741lea FP_SRC(%a6), %a012742rts1274312744#############################################################################1274512746#########################################################################12747# XDEF **************************************************************** #12748# load_fpn2(): load FP register value into FP_DST(a6). #12749# #12750# XREF **************************************************************** #12751# None #12752# #12753# INPUT *************************************************************** #12754# d0 = index of FP register to load #12755# #12756# OUTPUT ************************************************************** #12757# FP_DST(a6) = value loaded from FP register file #12758# #12759# ALGORITHM *********************************************************** #12760# Using the index in d0, load FP_DST(a6) with a number from the #12761# FP register file. #12762# #12763#########################################################################1276412765global load_fpn212766load_fpn2:12767mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d012768jmp (tbl_load_fpn2.b,%pc,%d0.w*1)1276912770tbl_load_fpn2:12771short load_fpn2_0 - tbl_load_fpn212772short load_fpn2_1 - tbl_load_fpn212773short load_fpn2_2 - tbl_load_fpn212774short load_fpn2_3 - tbl_load_fpn212775short load_fpn2_4 - tbl_load_fpn212776short load_fpn2_5 - tbl_load_fpn212777short load_fpn2_6 - tbl_load_fpn212778short load_fpn2_7 - tbl_load_fpn21277912780load_fpn2_0:12781mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)12782mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)12783mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)12784lea FP_DST(%a6), %a012785rts12786load_fpn2_1:12787mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)12788mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)12789mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)12790lea FP_DST(%a6), %a012791rts12792load_fpn2_2:12793fmovm.x &0x20, FP_DST(%a6)12794lea FP_DST(%a6), %a012795rts12796load_fpn2_3:12797fmovm.x &0x10, FP_DST(%a6)12798lea FP_DST(%a6), %a012799rts12800load_fpn2_4:12801fmovm.x &0x08, FP_DST(%a6)12802lea FP_DST(%a6), %a012803rts12804load_fpn2_5:12805fmovm.x &0x04, FP_DST(%a6)12806lea FP_DST(%a6), %a012807rts12808load_fpn2_6:12809fmovm.x &0x02, FP_DST(%a6)12810lea FP_DST(%a6), %a012811rts12812load_fpn2_7:12813fmovm.x &0x01, FP_DST(%a6)12814lea FP_DST(%a6), %a012815rts1281612817#############################################################################1281812819#########################################################################12820# XDEF **************************************************************** #12821# store_fpreg(): store an fp value to the fpreg designated d0. #12822# #12823# XREF **************************************************************** #12824# None #12825# #12826# INPUT *************************************************************** #12827# fp0 = extended precision value to store #12828# d0 = index of floating-point register #12829# #12830# OUTPUT ************************************************************** #12831# None #12832# #12833# ALGORITHM *********************************************************** #12834# Store the value in fp0 to the FP register designated by the #12835# value in d0. The FP number can be DENORM or SNAN so we have to be #12836# careful that we don't take an exception here. #12837# #12838#########################################################################1283912840global store_fpreg12841store_fpreg:12842mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d012843jmp (tbl_store_fpreg.b,%pc,%d0.w*1)1284412845tbl_store_fpreg:12846short store_fpreg_0 - tbl_store_fpreg12847short store_fpreg_1 - tbl_store_fpreg12848short store_fpreg_2 - tbl_store_fpreg12849short store_fpreg_3 - tbl_store_fpreg12850short store_fpreg_4 - tbl_store_fpreg12851short store_fpreg_5 - tbl_store_fpreg12852short store_fpreg_6 - tbl_store_fpreg12853short store_fpreg_7 - tbl_store_fpreg1285412855store_fpreg_0:12856fmovm.x &0x80, EXC_FP0(%a6)12857rts12858store_fpreg_1:12859fmovm.x &0x80, EXC_FP1(%a6)12860rts12861store_fpreg_2:12862fmovm.x &0x01, -(%sp)12863fmovm.x (%sp)+, &0x2012864rts12865store_fpreg_3:12866fmovm.x &0x01, -(%sp)12867fmovm.x (%sp)+, &0x1012868rts12869store_fpreg_4:12870fmovm.x &0x01, -(%sp)12871fmovm.x (%sp)+, &0x0812872rts12873store_fpreg_5:12874fmovm.x &0x01, -(%sp)12875fmovm.x (%sp)+, &0x0412876rts12877store_fpreg_6:12878fmovm.x &0x01, -(%sp)12879fmovm.x (%sp)+, &0x0212880rts12881store_fpreg_7:12882fmovm.x &0x01, -(%sp)12883fmovm.x (%sp)+, &0x0112884rts1288512886#########################################################################12887# XDEF **************************************************************** #12888# get_packed(): fetch a packed operand from memory and then #12889# convert it to a floating-point binary number. #12890# #12891# XREF **************************************************************** #12892# _dcalc_ea() - calculate the correct <ea> #12893# _mem_read() - fetch the packed operand from memory #12894# facc_in_x() - the fetch failed so jump to special exit code #12895# decbin() - convert packed to binary extended precision #12896# #12897# INPUT *************************************************************** #12898# None #12899# #12900# OUTPUT ************************************************************** #12901# If no failure on _mem_read(): #12902# FP_SRC(a6) = packed operand now as a binary FP number #12903# #12904# ALGORITHM *********************************************************** #12905# Get the correct <ea> which is the value on the exception stack #12906# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #12907# Then, fetch the operand from memory. If the fetch fails, exit #12908# through facc_in_x(). #12909# If the packed operand is a ZERO,NAN, or INF, convert it to #12910# its binary representation here. Else, call decbin() which will #12911# convert the packed value to an extended precision binary value. #12912# #12913#########################################################################1291412915# the stacked <ea> for packed is correct except for -(An).12916# the base reg must be updated for both -(An) and (An)+.12917global get_packed12918get_packed:12919mov.l &0xc,%d0 # packed is 12 bytes12920bsr.l _dcalc_ea # fetch <ea>; correct An1292112922lea FP_SRC(%a6),%a1 # pass: ptr to super dst12923mov.l &0xc,%d0 # pass: 12 bytes12924bsr.l _dmem_read # read packed operand1292512926tst.l %d1 # did dfetch fail?12927bne.l facc_in_x # yes1292812929# The packed operand is an INF or a NAN if the exponent field is all ones.12930bfextu FP_SRC(%a6){&1:&15},%d0 # get exp12931cmpi.w %d0,&0x7fff # INF or NAN?12932bne.b gp_try_zero # no12933rts # operand is an INF or NAN1293412935# The packed operand is a zero if the mantissa is all zero, else it's12936# a normal packed op.12937gp_try_zero:12938mov.b 3+FP_SRC(%a6),%d0 # get byte 412939andi.b &0x0f,%d0 # clear all but last nybble12940bne.b gp_not_spec # not a zero12941tst.l FP_SRC_HI(%a6) # is lw 2 zero?12942bne.b gp_not_spec # not a zero12943tst.l FP_SRC_LO(%a6) # is lw 3 zero?12944bne.b gp_not_spec # not a zero12945rts # operand is a ZERO12946gp_not_spec:12947lea FP_SRC(%a6),%a0 # pass: ptr to packed op12948bsr.l decbin # convert to extended12949fmovm.x &0x80,FP_SRC(%a6) # make this the srcop12950rts1295112952#########################################################################12953# decbin(): Converts normalized packed bcd value pointed to by register #12954# a0 to extended-precision value in fp0. #12955# #12956# INPUT *************************************************************** #12957# a0 = pointer to normalized packed bcd value #12958# #12959# OUTPUT ************************************************************** #12960# fp0 = exact fp representation of the packed bcd value. #12961# #12962# ALGORITHM *********************************************************** #12963# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #12964# and NaN operands are dispatched without entering this routine) #12965# value in 68881/882 format at location (a0). #12966# #12967# A1. Convert the bcd exponent to binary by successive adds and #12968# muls. Set the sign according to SE. Subtract 16 to compensate #12969# for the mantissa which is to be interpreted as 17 integer #12970# digits, rather than 1 integer and 16 fraction digits. #12971# Note: this operation can never overflow. #12972# #12973# A2. Convert the bcd mantissa to binary by successive #12974# adds and muls in FP0. Set the sign according to SM. #12975# The mantissa digits will be converted with the decimal point #12976# assumed following the least-significant digit. #12977# Note: this operation can never overflow. #12978# #12979# A3. Count the number of leading/trailing zeros in the #12980# bcd string. If SE is positive, count the leading zeros; #12981# if negative, count the trailing zeros. Set the adjusted #12982# exponent equal to the exponent from A1 and the zero count #12983# added if SM = 1 and subtracted if SM = 0. Scale the #12984# mantissa the equivalent of forcing in the bcd value: #12985# #12986# SM = 0 a non-zero digit in the integer position #12987# SM = 1 a non-zero digit in Mant0, lsd of the fraction #12988# #12989# this will insure that any value, regardless of its #12990# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #12991# consistently. #12992# #12993# A4. Calculate the factor 10^exp in FP1 using a table of #12994# 10^(2^n) values. To reduce the error in forming factors #12995# greater than 10^27, a directed rounding scheme is used with #12996# tables rounded to RN, RM, and RP, according to the table #12997# in the comments of the pwrten section. #12998# #12999# A5. Form the final binary number by scaling the mantissa by #13000# the exponent factor. This is done by multiplying the #13001# mantissa in FP0 by the factor in FP1 if the adjusted #13002# exponent sign is positive, and dividing FP0 by FP1 if #13003# it is negative. #13004# #13005# Clean up and return. Check if the final mul or div was inexact. #13006# If so, set INEX1 in USER_FPSR. #13007# #13008#########################################################################1300913010#13011# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded13012# to nearest, minus, and plus, respectively. The tables include13013# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding13014# is required until the power is greater than 27, however, all13015# tables include the first 5 for ease of indexing.13016#13017RTABLE:13018byte 0,0,0,013019byte 2,3,2,313020byte 2,3,3,213021byte 3,2,2,31302213023set FNIBS,713024set FSTRT,01302513026set ESTRT,413027set EDIGITS,21302813029global decbin13030decbin:13031mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input13032mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it13033mov.l 0x8(%a0),FP_SCR0_LO(%a6)1303413035lea FP_SCR0(%a6),%a01303613037movm.l &0x3c00,-(%sp) # save d2-d513038fmovm.x &0x1,-(%sp) # save fp113039#13040# Calculate exponent:13041# 1. Copy bcd value in memory for use as a working copy.13042# 2. Calculate absolute value of exponent in d1 by mul and add.13043# 3. Correct for exponent sign.13044# 4. Subtract 16 to compensate for interpreting the mant as all integer digits.13045# (i.e., all digits assumed left of the decimal point.)13046#13047# Register usage:13048#13049# calc_e:13050# (*) d0: temp digit storage13051# (*) d1: accumulator for binary exponent13052# (*) d2: digit count13053# (*) d3: offset pointer13054# ( ) d4: first word of bcd13055# ( ) a0: pointer to working bcd value13056# ( ) a6: pointer to original bcd value13057# (*) FP_SCR1: working copy of original bcd value13058# (*) L_SCR1: copy of original exponent word13059#13060calc_e:13061mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part13062mov.l &ESTRT,%d3 # counter to pick up digits13063mov.l (%a0),%d4 # get first word of bcd13064clr.l %d1 # zero d1 for accumulator13065e_gd:13066mulu.l &0xa,%d1 # mul partial product by one digit place13067bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d013068add.l %d0,%d1 # d1 = d1 + d013069addq.b &4,%d3 # advance d3 to the next digit13070dbf.w %d2,e_gd # if we have used all 3 digits, exit loop13071btst &30,%d4 # get SE13072beq.b e_pos # don't negate if pos13073neg.l %d1 # negate before subtracting13074e_pos:13075sub.l &16,%d1 # sub to compensate for shift of mant13076bge.b e_save # if still pos, do not neg13077neg.l %d1 # now negative, make pos and set SE13078or.l &0x40000000,%d4 # set SE in d4,13079or.l &0x40000000,(%a0) # and in working bcd13080e_save:13081mov.l %d1,-(%sp) # save exp on stack13082#13083#13084# Calculate mantissa:13085# 1. Calculate absolute value of mantissa in fp0 by mul and add.13086# 2. Correct for mantissa sign.13087# (i.e., all digits assumed left of the decimal point.)13088#13089# Register usage:13090#13091# calc_m:13092# (*) d0: temp digit storage13093# (*) d1: lword counter13094# (*) d2: digit count13095# (*) d3: offset pointer13096# ( ) d4: words 2 and 3 of bcd13097# ( ) a0: pointer to working bcd value13098# ( ) a6: pointer to original bcd value13099# (*) fp0: mantissa accumulator13100# ( ) FP_SCR1: working copy of original bcd value13101# ( ) L_SCR1: copy of original exponent word13102#13103calc_m:13104mov.l &1,%d1 # word counter, init to 113105fmov.s &0x00000000,%fp0 # accumulator13106#13107#13108# Since the packed number has a long word between the first & second parts,13109# get the integer digit then skip down & get the rest of the13110# mantissa. We will unroll the loop once.13111#13112bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word13113fadd.b %d0,%fp0 # add digit to sum in fp013114#13115#13116# Get the rest of the mantissa.13117#13118loadlw:13119mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d413120mov.l &FSTRT,%d3 # counter to pick up digits13121mov.l &FNIBS,%d2 # reset number of digits per a0 ptr13122md2b:13123fmul.s &0x41200000,%fp0 # fp0 = fp0 * 1013124bfextu %d4{%d3:&4},%d0 # get the digit and zero extend13125fadd.b %d0,%fp0 # fp0 = fp0 + digit13126#13127#13128# If all the digits (8) in that long word have been converted (d2=0),13129# then inc d1 (=2) to point to the next long word and reset d3 to 013130# to initialize the digit offset, and set d2 to 7 for the digit count;13131# else continue with this long word.13132#13133addq.b &4,%d3 # advance d3 to the next digit13134dbf.w %d2,md2b # check for last digit in this lw13135nextlw:13136addq.l &1,%d1 # inc lw pointer in mantissa13137cmp.l %d1,&2 # test for last lw13138ble.b loadlw # if not, get last one13139#13140# Check the sign of the mant and make the value in fp0 the same sign.13141#13142m_sign:13143btst &31,(%a0) # test sign of the mantissa13144beq.b ap_st_z # if clear, go to append/strip zeros13145fneg.x %fp0 # if set, negate fp013146#13147# Append/strip zeros:13148#13149# For adjusted exponents which have an absolute value greater than 27*,13150# this routine calculates the amount needed to normalize the mantissa13151# for the adjusted exponent. That number is subtracted from the exp13152# if the exp was positive, and added if it was negative. The purpose13153# of this is to reduce the value of the exponent and the possibility13154# of error in calculation of pwrten.13155#13156# 1. Branch on the sign of the adjusted exponent.13157# 2p.(positive exp)13158# 2. Check M16 and the digits in lwords 2 and 3 in decending order.13159# 3. Add one for each zero encountered until a non-zero digit.13160# 4. Subtract the count from the exp.13161# 5. Check if the exp has crossed zero in #3 above; make the exp abs13162# and set SE.13163# 6. Multiply the mantissa by 10**count.13164# 2n.(negative exp)13165# 2. Check the digits in lwords 3 and 2 in decending order.13166# 3. Add one for each zero encountered until a non-zero digit.13167# 4. Add the count to the exp.13168# 5. Check if the exp has crossed zero in #3 above; clear SE.13169# 6. Divide the mantissa by 10**count.13170#13171# *Why 27? If the adjusted exponent is within -28 < expA < 28, than13172# any adjustment due to append/strip zeros will drive the resultane13173# exponent towards zero. Since all pwrten constants with a power13174# of 27 or less are exact, there is no need to use this routine to13175# attempt to lessen the resultant exponent.13176#13177# Register usage:13178#13179# ap_st_z:13180# (*) d0: temp digit storage13181# (*) d1: zero count13182# (*) d2: digit count13183# (*) d3: offset pointer13184# ( ) d4: first word of bcd13185# (*) d5: lword counter13186# ( ) a0: pointer to working bcd value13187# ( ) FP_SCR1: working copy of original bcd value13188# ( ) L_SCR1: copy of original exponent word13189#13190#13191# First check the absolute value of the exponent to see if this13192# routine is necessary. If so, then check the sign of the exponent13193# and do append (+) or strip (-) zeros accordingly.13194# This section handles a positive adjusted exponent.13195#13196ap_st_z:13197mov.l (%sp),%d1 # load expA for range test13198cmp.l %d1,&27 # test is with 2713199ble.w pwrten # if abs(expA) <28, skip ap/st zeros13200btst &30,(%a0) # check sign of exp13201bne.b ap_st_n # if neg, go to neg side13202clr.l %d1 # zero count reg13203mov.l (%a0),%d4 # load lword 1 to d413204bfextu %d4{&28:&4},%d0 # get M16 in d013205bne.b ap_p_fx # if M16 is non-zero, go fix exp13206addq.l &1,%d1 # inc zero count13207mov.l &1,%d5 # init lword counter13208mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d413209bne.b ap_p_cl # if lw 2 is zero, skip it13210addq.l &8,%d1 # and inc count by 813211addq.l &1,%d5 # inc lword counter13212mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d413213ap_p_cl:13214clr.l %d3 # init offset reg13215mov.l &7,%d2 # init digit counter13216ap_p_gd:13217bfextu %d4{%d3:&4},%d0 # get digit13218bne.b ap_p_fx # if non-zero, go to fix exp13219addq.l &4,%d3 # point to next digit13220addq.l &1,%d1 # inc digit counter13221dbf.w %d2,ap_p_gd # get next digit13222ap_p_fx:13223mov.l %d1,%d0 # copy counter to d213224mov.l (%sp),%d1 # get adjusted exp from memory13225sub.l %d0,%d1 # subtract count from exp13226bge.b ap_p_fm # if still pos, go to pwrten13227neg.l %d1 # now its neg; get abs13228mov.l (%a0),%d4 # load lword 1 to d413229or.l &0x40000000,%d4 # and set SE in d413230or.l &0x40000000,(%a0) # and in memory13231#13232# Calculate the mantissa multiplier to compensate for the striping of13233# zeros from the mantissa.13234#13235ap_p_fm:13236lea.l PTENRN(%pc),%a1 # get address of power-of-ten table13237clr.l %d3 # init table index13238fmov.s &0x3f800000,%fp1 # init fp1 to 113239mov.l &3,%d2 # init d2 to count bits in counter13240ap_p_el:13241asr.l &1,%d0 # shift lsb into carry13242bcc.b ap_p_en # if 1, mul fp1 by pwrten factor13243fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)13244ap_p_en:13245add.l &12,%d3 # inc d3 to next rtable entry13246tst.l %d0 # check if d0 is zero13247bne.b ap_p_el # if not, get next bit13248fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)13249bra.b pwrten # go calc pwrten13250#13251# This section handles a negative adjusted exponent.13252#13253ap_st_n:13254clr.l %d1 # clr counter13255mov.l &2,%d5 # set up d5 to point to lword 313256mov.l (%a0,%d5.L*4),%d4 # get lword 313257bne.b ap_n_cl # if not zero, check digits13258sub.l &1,%d5 # dec d5 to point to lword 213259addq.l &8,%d1 # inc counter by 813260mov.l (%a0,%d5.L*4),%d4 # get lword 213261ap_n_cl:13262mov.l &28,%d3 # point to last digit13263mov.l &7,%d2 # init digit counter13264ap_n_gd:13265bfextu %d4{%d3:&4},%d0 # get digit13266bne.b ap_n_fx # if non-zero, go to exp fix13267subq.l &4,%d3 # point to previous digit13268addq.l &1,%d1 # inc digit counter13269dbf.w %d2,ap_n_gd # get next digit13270ap_n_fx:13271mov.l %d1,%d0 # copy counter to d013272mov.l (%sp),%d1 # get adjusted exp from memory13273sub.l %d0,%d1 # subtract count from exp13274bgt.b ap_n_fm # if still pos, go fix mantissa13275neg.l %d1 # take abs of exp and clr SE13276mov.l (%a0),%d4 # load lword 1 to d413277and.l &0xbfffffff,%d4 # and clr SE in d413278and.l &0xbfffffff,(%a0) # and in memory13279#13280# Calculate the mantissa multiplier to compensate for the appending of13281# zeros to the mantissa.13282#13283ap_n_fm:13284lea.l PTENRN(%pc),%a1 # get address of power-of-ten table13285clr.l %d3 # init table index13286fmov.s &0x3f800000,%fp1 # init fp1 to 113287mov.l &3,%d2 # init d2 to count bits in counter13288ap_n_el:13289asr.l &1,%d0 # shift lsb into carry13290bcc.b ap_n_en # if 1, mul fp1 by pwrten factor13291fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)13292ap_n_en:13293add.l &12,%d3 # inc d3 to next rtable entry13294tst.l %d0 # check if d0 is zero13295bne.b ap_n_el # if not, get next bit13296fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)13297#13298#13299# Calculate power-of-ten factor from adjusted and shifted exponent.13300#13301# Register usage:13302#13303# pwrten:13304# (*) d0: temp13305# ( ) d1: exponent13306# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp13307# (*) d3: FPCR work copy13308# ( ) d4: first word of bcd13309# (*) a1: RTABLE pointer13310# calc_p:13311# (*) d0: temp13312# ( ) d1: exponent13313# (*) d3: PWRTxx table index13314# ( ) a0: pointer to working copy of bcd13315# (*) a1: PWRTxx pointer13316# (*) fp1: power-of-ten accumulator13317#13318# Pwrten calculates the exponent factor in the selected rounding mode13319# according to the following table:13320#13321# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode13322#13323# ANY ANY RN RN13324#13325# + + RP RP13326# - + RP RM13327# + - RP RM13328# - - RP RP13329#13330# + + RM RM13331# - + RM RP13332# + - RM RP13333# - - RM RM13334#13335# + + RZ RM13336# - + RZ RM13337# + - RZ RP13338# - - RZ RP13339#13340#13341pwrten:13342mov.l USER_FPCR(%a6),%d3 # get user's FPCR13343bfextu %d3{&26:&2},%d2 # isolate rounding mode bits13344mov.l (%a0),%d4 # reload 1st bcd word to d413345asl.l &2,%d2 # format d2 to be13346bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}13347add.l %d0,%d2 # in d2 as index into RTABLE13348lea.l RTABLE(%pc),%a1 # load rtable base13349mov.b (%a1,%d2),%d0 # load new rounding bits from table13350clr.l %d3 # clear d3 to force no exc and extended13351bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR13352fmov.l %d3,%fpcr # write new FPCR13353asr.l &1,%d0 # write correct PTENxx table13354bcc.b not_rp # to a113355lea.l PTENRP(%pc),%a1 # it is RP13356bra.b calc_p # go to init section13357not_rp:13358asr.l &1,%d0 # keep checking13359bcc.b not_rm13360lea.l PTENRM(%pc),%a1 # it is RM13361bra.b calc_p # go to init section13362not_rm:13363lea.l PTENRN(%pc),%a1 # it is RN13364calc_p:13365mov.l %d1,%d0 # copy exp to d0;use d013366bpl.b no_neg # if exp is negative,13367neg.l %d0 # invert it13368or.l &0x40000000,(%a0) # and set SE bit13369no_neg:13370clr.l %d3 # table index13371fmov.s &0x3f800000,%fp1 # init fp1 to 113372e_loop:13373asr.l &1,%d0 # shift next bit into carry13374bcc.b e_next # if zero, skip the mul13375fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)13376e_next:13377add.l &12,%d3 # inc d3 to next rtable entry13378tst.l %d0 # check if d0 is zero13379bne.b e_loop # not zero, continue shifting13380#13381#13382# Check the sign of the adjusted exp and make the value in fp0 the13383# same sign. If the exp was pos then multiply fp1*fp0;13384# else divide fp0/fp1.13385#13386# Register Usage:13387# norm:13388# ( ) a0: pointer to working bcd value13389# (*) fp0: mantissa accumulator13390# ( ) fp1: scaling factor - 10**(abs(exp))13391#13392pnorm:13393btst &30,(%a0) # test the sign of the exponent13394beq.b mul # if clear, go to multiply13395div:13396fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp13397bra.b end_dec13398mul:13399fmul.x %fp1,%fp0 # exp is positive, so multiply by exp13400#13401#13402# Clean up and return with result in fp0.13403#13404# If the final mul/div in decbin incurred an inex exception,13405# it will be inex2, but will be reported as inex1 by get_op.13406#13407end_dec:13408fmov.l %fpsr,%d0 # get status register13409bclr &inex2_bit+8,%d0 # test for inex2 and clear it13410beq.b no_exc # skip this if no exc13411ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX13412no_exc:13413add.l &0x4,%sp # clear 1 lw param13414fmovm.x (%sp)+,&0x40 # restore fp113415movm.l (%sp)+,&0x3c # restore d2-d513416fmov.l &0x0,%fpcr13417fmov.l &0x0,%fpsr13418rts1341913420#########################################################################13421# bindec(): Converts an input in extended precision format to bcd format#13422# #13423# INPUT *************************************************************** #13424# a0 = pointer to the input extended precision value in memory. #13425# the input may be either normalized, unnormalized, or #13426# denormalized. #13427# d0 = contains the k-factor sign-extended to 32-bits. #13428# #13429# OUTPUT ************************************************************** #13430# FP_SCR0(a6) = bcd format result on the stack. #13431# #13432# ALGORITHM *********************************************************** #13433# #13434# A1. Set RM and size ext; Set SIGMA = sign of input. #13435# The k-factor is saved for use in d7. Clear the #13436# BINDEC_FLG for separating normalized/denormalized #13437# input. If input is unnormalized or denormalized, #13438# normalize it. #13439# #13440# A2. Set X = abs(input). #13441# #13442# A3. Compute ILOG. #13443# ILOG is the log base 10 of the input value. It is #13444# approximated by adding e + 0.f when the original #13445# value is viewed as 2^^e * 1.f in extended precision. #13446# This value is stored in d6. #13447# #13448# A4. Clr INEX bit. #13449# The operation in A3 above may have set INEX2. #13450# #13451# A5. Set ICTR = 0; #13452# ICTR is a flag used in A13. It must be set before the #13453# loop entry A6. #13454# #13455# A6. Calculate LEN. #13456# LEN is the number of digits to be displayed. The #13457# k-factor can dictate either the total number of digits, #13458# if it is a positive number, or the number of digits #13459# after the decimal point which are to be included as #13460# significant. See the 68882 manual for examples. #13461# If LEN is computed to be greater than 17, set OPERR in #13462# USER_FPSR. LEN is stored in d4. #13463# #13464# A7. Calculate SCALE. #13465# SCALE is equal to 10^ISCALE, where ISCALE is the number #13466# of decimal places needed to insure LEN integer digits #13467# in the output before conversion to bcd. LAMBDA is the #13468# sign of ISCALE, used in A9. Fp1 contains #13469# 10^^(abs(ISCALE)) using a rounding mode which is a #13470# function of the original rounding mode and the signs #13471# of ISCALE and X. A table is given in the code. #13472# #13473# A8. Clr INEX; Force RZ. #13474# The operation in A3 above may have set INEX2. #13475# RZ mode is forced for the scaling operation to insure #13476# only one rounding error. The grs bits are collected in #13477# the INEX flag for use in A10. #13478# #13479# A9. Scale X -> Y. #13480# The mantissa is scaled to the desired number of #13481# significant digits. The excess digits are collected #13482# in INEX2. #13483# #13484# A10. Or in INEX. #13485# If INEX is set, round error occurred. This is #13486# compensated for by 'or-ing' in the INEX2 flag to #13487# the lsb of Y. #13488# #13489# A11. Restore original FPCR; set size ext. #13490# Perform FINT operation in the user's rounding mode. #13491# Keep the size to extended. #13492# #13493# A12. Calculate YINT = FINT(Y) according to user's rounding #13494# mode. The FPSP routine sintd0 is used. The output #13495# is in fp0. #13496# #13497# A13. Check for LEN digits. #13498# If the int operation results in more than LEN digits, #13499# or less than LEN -1 digits, adjust ILOG and repeat from #13500# A6. This test occurs only on the first pass. If the #13501# result is exactly 10^LEN, decrement ILOG and divide #13502# the mantissa by 10. #13503# #13504# A14. Convert the mantissa to bcd. #13505# The binstr routine is used to convert the LEN digit #13506# mantissa to bcd in memory. The input to binstr is #13507# to be a fraction; i.e. (mantissa)/10^LEN and adjusted #13508# such that the decimal point is to the left of bit 63. #13509# The bcd digits are stored in the correct position in #13510# the final string area in memory. #13511# #13512# A15. Convert the exponent to bcd. #13513# As in A14 above, the exp is converted to bcd and the #13514# digits are stored in the final string. #13515# Test the length of the final exponent string. If the #13516# length is 4, set operr. #13517# #13518# A16. Write sign bits to final string. #13519# #13520#########################################################################1352113522set BINDEC_FLG, EXC_TEMP # DENORM flag1352313524# Constants in extended precision13525PLOG2:13526long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x0000000013527PLOG2UP1:13528long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x000000001352913530# Constants in single precision13531FONE:13532long 0x3F800000,0x00000000,0x00000000,0x0000000013533FTWO:13534long 0x40000000,0x00000000,0x00000000,0x0000000013535FTEN:13536long 0x41200000,0x00000000,0x00000000,0x0000000013537F4933:13538long 0x459A2800,0x00000000,0x00000000,0x000000001353913540RBDTBL:13541byte 0,0,0,013542byte 3,3,2,213543byte 3,2,2,313544byte 2,3,3,21354513546# Implementation Notes:13547#13548# The registers are used as follows:13549#13550# d0: scratch; LEN input to binstr13551# d1: scratch13552# d2: upper 32-bits of mantissa for binstr13553# d3: scratch;lower 32-bits of mantissa for binstr13554# d4: LEN13555# d5: LAMBDA/ICTR13556# d6: ILOG13557# d7: k-factor13558# a0: ptr for original operand/final result13559# a1: scratch pointer13560# a2: pointer to FP_X; abs(original value) in ext13561# fp0: scratch13562# fp1: scratch13563# fp2: scratch13564# F_SCR1:13565# F_SCR2:13566# L_SCR1:13567# L_SCR2:1356813569global bindec13570bindec:13571movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}13572fmovm.x &0x7,-(%sp) # {%fp0-%fp2}1357313574# A1. Set RM and size ext. Set SIGMA = sign input;13575# The k-factor is saved for use in d7. Clear BINDEC_FLG for13576# separating normalized/denormalized input. If the input13577# is a denormalized number, set the BINDEC_FLG memory word13578# to signal denorm. If the input is unnormalized, normalize13579# the input and test for denormalized result.13580#13581fmov.l &rm_mode*0x10,%fpcr # set RM and ext13582mov.l (%a0),L_SCR2(%a6) # save exponent for sign check13583mov.l %d0,%d7 # move k-factor to d71358413585clr.b BINDEC_FLG(%a6) # clr norm/denorm flag13586cmpi.b STAG(%a6),&DENORM # is input a DENORM?13587bne.w A2_str # no; input is a NORM1358813589#13590# Normalize the denorm13591#13592un_de_norm:13593mov.w (%a0),%d013594and.w &0x7fff,%d0 # strip sign of normalized exp13595mov.l 4(%a0),%d113596mov.l 8(%a0),%d213597norm_loop:13598sub.w &1,%d013599lsl.l &1,%d213600roxl.l &1,%d113601tst.l %d113602bge.b norm_loop13603#13604# Test if the normalized input is denormalized13605#13606tst.w %d013607bgt.b pos_exp # if greater than zero, it is a norm13608st BINDEC_FLG(%a6) # set flag for denorm13609pos_exp:13610and.w &0x7fff,%d0 # strip sign of normalized exp13611mov.w %d0,(%a0)13612mov.l %d1,4(%a0)13613mov.l %d2,8(%a0)1361413615# A2. Set X = abs(input).13616#13617A2_str:13618mov.l (%a0),FP_SCR1(%a6) # move input to work space13619mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space13620mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space13621and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)1362213623# A3. Compute ILOG.13624# ILOG is the log base 10 of the input value. It is approx-13625# imated by adding e + 0.f when the original value is viewed13626# as 2^^e * 1.f in extended precision. This value is stored13627# in d6.13628#13629# Register usage:13630# Input/Output13631# d0: k-factor/exponent13632# d2: x/x13633# d3: x/x13634# d4: x/x13635# d5: x/x13636# d6: x/ILOG13637# d7: k-factor/Unchanged13638# a0: ptr for original operand/final result13639# a1: x/x13640# a2: x/x13641# fp0: x/float(ILOG)13642# fp1: x/x13643# fp2: x/x13644# F_SCR1:x/x13645# F_SCR2:Abs(X)/Abs(X) with $3fff exponent13646# L_SCR1:x/x13647# L_SCR2:first word of X packed/Unchanged1364813649tst.b BINDEC_FLG(%a6) # check for denorm13650beq.b A3_cont # if clr, continue with norm13651mov.l &-4933,%d6 # force ILOG = -493313652bra.b A4_str13653A3_cont:13654mov.w FP_SCR1(%a6),%d0 # move exp to d013655mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff13656fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f13657sub.w &0x3fff,%d0 # strip off bias13658fadd.w %d0,%fp0 # add in exp13659fsub.s FONE(%pc),%fp0 # subtract off 1.013660fbge.w pos_res # if pos, branch13661fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP113662fmov.l %fp0,%d6 # put ILOG in d6 as a lword13663bra.b A4_str # go move out ILOG13664pos_res:13665fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG213666fmov.l %fp0,%d6 # put ILOG in d6 as a lword136671366813669# A4. Clr INEX bit.13670# The operation in A3 above may have set INEX2.1367113672A4_str:13673fmov.l &0,%fpsr # zero all of fpsr - nothing needed136741367513676# A5. Set ICTR = 0;13677# ICTR is a flag used in A13. It must be set before the13678# loop entry A6. The lower word of d5 is used for ICTR.1367913680clr.w %d5 # clear ICTR1368113682# A6. Calculate LEN.13683# LEN is the number of digits to be displayed. The k-factor13684# can dictate either the total number of digits, if it is13685# a positive number, or the number of digits after the13686# original decimal point which are to be included as13687# significant. See the 68882 manual for examples.13688# If LEN is computed to be greater than 17, set OPERR in13689# USER_FPSR. LEN is stored in d4.13690#13691# Register usage:13692# Input/Output13693# d0: exponent/Unchanged13694# d2: x/x/scratch13695# d3: x/x13696# d4: exc picture/LEN13697# d5: ICTR/Unchanged13698# d6: ILOG/Unchanged13699# d7: k-factor/Unchanged13700# a0: ptr for original operand/final result13701# a1: x/x13702# a2: x/x13703# fp0: float(ILOG)/Unchanged13704# fp1: x/x13705# fp2: x/x13706# F_SCR1:x/x13707# F_SCR2:Abs(X) with $3fff exponent/Unchanged13708# L_SCR1:x/x13709# L_SCR2:first word of X packed/Unchanged1371013711A6_str:13712tst.l %d7 # branch on sign of k13713ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k13714mov.l %d7,%d4 # if k > 0, LEN = k13715bra.b len_ck # skip to LEN check13716k_neg:13717mov.l %d6,%d4 # first load ILOG to d413718sub.l %d7,%d4 # subtract off k13719addq.l &1,%d4 # add in the 113720len_ck:13721tst.l %d4 # LEN check: branch on sign of LEN13722ble.b LEN_ng # if neg, set LEN = 113723cmp.l %d4,&17 # test if LEN > 1713724ble.b A7_str # if not, forget it13725mov.l &17,%d4 # set max LEN = 1713726tst.l %d7 # if negative, never set OPERR13727ble.b A7_str # if positive, continue13728or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR13729bra.b A7_str # finished here13730LEN_ng:13731mov.l &1,%d4 # min LEN is 1137321373313734# A7. Calculate SCALE.13735# SCALE is equal to 10^ISCALE, where ISCALE is the number13736# of decimal places needed to insure LEN integer digits13737# in the output before conversion to bcd. LAMBDA is the sign13738# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using13739# the rounding mode as given in the following table (see13740# Coonen, p. 7.23 as ref.; however, the SCALE variable is13741# of opposite sign in bindec.sa from Coonen).13742#13743# Initial USE13744# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]13745# ----------------------------------------------13746# RN 00 0 0 00/0 RN13747# RN 00 0 1 00/0 RN13748# RN 00 1 0 00/0 RN13749# RN 00 1 1 00/0 RN13750# RZ 01 0 0 11/3 RP13751# RZ 01 0 1 11/3 RP13752# RZ 01 1 0 10/2 RM13753# RZ 01 1 1 10/2 RM13754# RM 10 0 0 11/3 RP13755# RM 10 0 1 10/2 RM13756# RM 10 1 0 10/2 RM13757# RM 10 1 1 11/3 RP13758# RP 11 0 0 10/2 RM13759# RP 11 0 1 11/3 RP13760# RP 11 1 0 11/3 RP13761# RP 11 1 1 10/2 RM13762#13763# Register usage:13764# Input/Output13765# d0: exponent/scratch - final is 013766# d2: x/0 or 24 for A913767# d3: x/scratch - offset ptr into PTENRM array13768# d4: LEN/Unchanged13769# d5: 0/ICTR:LAMBDA13770# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))13771# d7: k-factor/Unchanged13772# a0: ptr for original operand/final result13773# a1: x/ptr to PTENRM array13774# a2: x/x13775# fp0: float(ILOG)/Unchanged13776# fp1: x/10^ISCALE13777# fp2: x/x13778# F_SCR1:x/x13779# F_SCR2:Abs(X) with $3fff exponent/Unchanged13780# L_SCR1:x/x13781# L_SCR2:first word of X packed/Unchanged1378213783A7_str:13784tst.l %d7 # test sign of k13785bgt.b k_pos # if pos and > 0, skip this13786cmp.l %d7,%d6 # test k - ILOG13787blt.b k_pos # if ILOG >= k, skip this13788mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k13789k_pos:13790mov.l %d6,%d0 # calc ILOG + 1 - LEN in d013791addq.l &1,%d0 # add the 113792sub.l %d4,%d0 # sub off LEN13793swap %d5 # use upper word of d5 for LAMBDA13794clr.w %d5 # set it zero initially13795clr.w %d2 # set up d2 for very small case13796tst.l %d0 # test sign of ISCALE13797bge.b iscale # if pos, skip next inst13798addq.w &1,%d5 # if neg, set LAMBDA true13799cmp.l %d0,&0xffffecd4 # test iscale <= -490813800bgt.b no_inf # if false, skip rest13801add.l &24,%d0 # add in 24 to iscale13802mov.l &24,%d2 # put 24 in d2 for A913803no_inf:13804neg.l %d0 # and take abs of ISCALE13805iscale:13806fmov.s FONE(%pc),%fp1 # init fp1 to 113807bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits13808lsl.w &1,%d1 # put them in bits 2:113809add.w %d5,%d1 # add in LAMBDA13810lsl.w &1,%d1 # put them in bits 3:113811tst.l L_SCR2(%a6) # test sign of original x13812bge.b x_pos # if pos, don't set bit 013813addq.l &1,%d1 # if neg, set bit 013814x_pos:13815lea.l RBDTBL(%pc),%a2 # load rbdtbl base13816mov.b (%a2,%d1),%d3 # load d3 with new rmode13817lsl.l &4,%d3 # put bits in proper position13818fmov.l %d3,%fpcr # load bits into fpu13819lsr.l &4,%d3 # put bits in proper position13820tst.b %d3 # decode new rmode for pten table13821bne.b not_rn # if zero, it is RN13822lea.l PTENRN(%pc),%a1 # load a1 with RN table base13823bra.b rmode # exit decode13824not_rn:13825lsr.b &1,%d3 # get lsb in carry13826bcc.b not_rp2 # if carry clear, it is RM13827lea.l PTENRP(%pc),%a1 # load a1 with RP table base13828bra.b rmode # exit decode13829not_rp2:13830lea.l PTENRM(%pc),%a1 # load a1 with RM table base13831rmode:13832clr.l %d3 # clr table index13833e_loop2:13834lsr.l &1,%d0 # shift next bit into carry13835bcc.b e_next2 # if zero, skip the mul13836fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)13837e_next2:13838add.l &12,%d3 # inc d3 to next pwrten table entry13839tst.l %d0 # test if ISCALE is zero13840bne.b e_loop2 # if not, loop1384113842# A8. Clr INEX; Force RZ.13843# The operation in A3 above may have set INEX2.13844# RZ mode is forced for the scaling operation to insure13845# only one rounding error. The grs bits are collected in13846# the INEX flag for use in A10.13847#13848# Register usage:13849# Input/Output1385013851fmov.l &0,%fpsr # clr INEX13852fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode1385313854# A9. Scale X -> Y.13855# The mantissa is scaled to the desired number of significant13856# digits. The excess digits are collected in INEX2. If mul,13857# Check d2 for excess 10 exponential value. If not zero,13858# the iscale value would have caused the pwrten calculation13859# to overflow. Only a negative iscale can cause this, so13860# multiply by 10^(d2), which is now only allowed to be 24,13861# with a multiply by 10^8 and 10^16, which is exact since13862# 10^24 is exact. If the input was denormalized, we must13863# create a busy stack frame with the mul command and the13864# two operands, and allow the fpu to complete the multiply.13865#13866# Register usage:13867# Input/Output13868# d0: FPCR with RZ mode/Unchanged13869# d2: 0 or 24/unchanged13870# d3: x/x13871# d4: LEN/Unchanged13872# d5: ICTR:LAMBDA13873# d6: ILOG/Unchanged13874# d7: k-factor/Unchanged13875# a0: ptr for original operand/final result13876# a1: ptr to PTENRM array/Unchanged13877# a2: x/x13878# fp0: float(ILOG)/X adjusted for SCALE (Y)13879# fp1: 10^ISCALE/Unchanged13880# fp2: x/x13881# F_SCR1:x/x13882# F_SCR2:Abs(X) with $3fff exponent/Unchanged13883# L_SCR1:x/x13884# L_SCR2:first word of X packed/Unchanged1388513886A9_str:13887fmov.x (%a0),%fp0 # load X from memory13888fabs.x %fp0 # use abs(X)13889tst.w %d5 # LAMBDA is in lower word of d513890bne.b sc_mul # if neg (LAMBDA = 1), scale by mul13891fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp013892bra.w A10_st # branch to A101389313894sc_mul:13895tst.b BINDEC_FLG(%a6) # check for denorm13896beq.w A9_norm # if norm, continue with mul1389713898# for DENORM, we must calculate:13899# fp0 = input_op * 10^ISCALE * 10^2413900# since the input operand is a DENORM, we can't multiply it directly.13901# so, we do the multiplication of the exponents and mantissas separately.13902# in this way, we avoid underflow on intermediate stages of the13903# multiplication and guarantee a result without exception.13904fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack1390513906mov.w (%sp),%d3 # grab exponent13907andi.w &0x7fff,%d3 # clear sign13908ori.w &0x8000,(%a0) # make DENORM exp negative13909add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp13910subi.w &0x3fff,%d3 # subtract BIAS13911add.w 36(%a1),%d313912subi.w &0x3fff,%d3 # subtract BIAS13913add.w 48(%a1),%d313914subi.w &0x3fff,%d3 # subtract BIAS1391513916bmi.w sc_mul_err # is result is DENORM, punt!!!1391713918andi.w &0x8000,(%sp) # keep sign13919or.w %d3,(%sp) # insert new exponent13920andi.w &0x7fff,(%a0) # clear sign bit on DENORM again13921mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk13922mov.l 0x4(%a0),-(%sp)13923mov.l &0x3fff0000,-(%sp) # force exp to zero13924fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp013925fmul.x (%sp)+,%fp01392613927# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^813928# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^1613929mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa13930mov.l 36+4(%a1),-(%sp)13931mov.l &0x3fff0000,-(%sp) # force exp to zero13932mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa13933mov.l 48+4(%a1),-(%sp)13934mov.l &0x3fff0000,-(%sp)# force exp to zero13935fmul.x (%sp)+,%fp0 # multiply fp0 by 10^813936fmul.x (%sp)+,%fp0 # multiply fp0 by 10^1613937bra.b A10_st1393813939sc_mul_err:13940bra.b sc_mul_err1394113942A9_norm:13943tst.w %d2 # test for small exp case13944beq.b A9_con # if zero, continue as normal13945fmul.x 36(%a1),%fp0 # multiply fp0 by 10^813946fmul.x 48(%a1),%fp0 # multiply fp0 by 10^1613947A9_con:13948fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp01394913950# A10. Or in INEX.13951# If INEX is set, round error occurred. This is compensated13952# for by 'or-ing' in the INEX2 flag to the lsb of Y.13953#13954# Register usage:13955# Input/Output13956# d0: FPCR with RZ mode/FPSR with INEX2 isolated13957# d2: x/x13958# d3: x/x13959# d4: LEN/Unchanged13960# d5: ICTR:LAMBDA13961# d6: ILOG/Unchanged13962# d7: k-factor/Unchanged13963# a0: ptr for original operand/final result13964# a1: ptr to PTENxx array/Unchanged13965# a2: x/ptr to FP_SCR1(a6)13966# fp0: Y/Y with lsb adjusted13967# fp1: 10^ISCALE/Unchanged13968# fp2: x/x1396913970A10_st:13971fmov.l %fpsr,%d0 # get FPSR13972fmov.x %fp0,FP_SCR1(%a6) # move Y to memory13973lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR113974btst &9,%d0 # check if INEX2 set13975beq.b A11_st # if clear, skip rest13976or.l &1,8(%a2) # or in 1 to lsb of mantissa13977fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu139781397913980# A11. Restore original FPCR; set size ext.13981# Perform FINT operation in the user's rounding mode. Keep13982# the size to extended. The sintdo entry point in the sint13983# routine expects the FPCR value to be in USER_FPCR for13984# mode and precision. The original FPCR is saved in L_SCR1.1398513986A11_st:13987mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later13988and.l &0x00000030,USER_FPCR(%a6) # set size to ext,13989# ;block exceptions139901399113992# A12. Calculate YINT = FINT(Y) according to user's rounding mode.13993# The FPSP routine sintd0 is used. The output is in fp0.13994#13995# Register usage:13996# Input/Output13997# d0: FPSR with AINEX cleared/FPCR with size set to ext13998# d2: x/x/scratch13999# d3: x/x14000# d4: LEN/Unchanged14001# d5: ICTR:LAMBDA/Unchanged14002# d6: ILOG/Unchanged14003# d7: k-factor/Unchanged14004# a0: ptr for original operand/src ptr for sintdo14005# a1: ptr to PTENxx array/Unchanged14006# a2: ptr to FP_SCR1(a6)/Unchanged14007# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored14008# fp0: Y/YINT14009# fp1: 10^ISCALE/Unchanged14010# fp2: x/x14011# F_SCR1:x/x14012# F_SCR2:Y adjusted for inex/Y with original exponent14013# L_SCR1:x/original USER_FPCR14014# L_SCR2:first word of X packed/Unchanged1401514016A12_st:14017movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}14018mov.l L_SCR1(%a6),-(%sp)14019mov.l L_SCR2(%a6),-(%sp)1402014021lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)14022fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)14023tst.l L_SCR2(%a6) # test sign of original operand14024bge.b do_fint12 # if pos, use Y14025or.l &0x80000000,(%a0) # if neg, use -Y14026do_fint12:14027mov.l USER_FPSR(%a6),-(%sp)14028# bsr sintdo # sint routine returns int in fp01402914030fmov.l USER_FPCR(%a6),%fpcr14031fmov.l &0x0,%fpsr # clear the AEXC bits!!!14032## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode14033## andi.l &0x00000030,%d014034## fmov.l %d0,%fpcr14035fint.x FP_SCR1(%a6),%fp0 # do fint()14036fmov.l %fpsr,%d014037or.w %d0,FPSR_EXCEPT(%a6)14038## fmov.l &0x0,%fpcr14039## fmov.l %fpsr,%d0 # don't keep ccodes14040## or.w %d0,FPSR_EXCEPT(%a6)1404114042mov.b (%sp),USER_FPSR(%a6)14043add.l &4,%sp1404414045mov.l (%sp)+,L_SCR2(%a6)14046mov.l (%sp)+,L_SCR1(%a6)14047movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}1404814049mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent14050mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR1405114052# A13. Check for LEN digits.14053# If the int operation results in more than LEN digits,14054# or less than LEN -1 digits, adjust ILOG and repeat from14055# A6. This test occurs only on the first pass. If the14056# result is exactly 10^LEN, decrement ILOG and divide14057# the mantissa by 10. The calculation of 10^LEN cannot14058# be inexact, since all powers of ten up to 10^27 are exact14059# in extended precision, so the use of a previous power-of-ten14060# table will introduce no error.14061#14062#14063# Register usage:14064# Input/Output14065# d0: FPCR with size set to ext/scratch final = 014066# d2: x/x14067# d3: x/scratch final = x14068# d4: LEN/LEN adjusted14069# d5: ICTR:LAMBDA/LAMBDA:ICTR14070# d6: ILOG/ILOG adjusted14071# d7: k-factor/Unchanged14072# a0: pointer into memory for packed bcd string formation14073# a1: ptr to PTENxx array/Unchanged14074# a2: ptr to FP_SCR1(a6)/Unchanged14075# fp0: int portion of Y/abs(YINT) adjusted14076# fp1: 10^ISCALE/Unchanged14077# fp2: x/10^LEN14078# F_SCR1:x/x14079# F_SCR2:Y with original exponent/Unchanged14080# L_SCR1:original USER_FPCR/Unchanged14081# L_SCR2:first word of X packed/Unchanged1408214083A13_st:14084swap %d5 # put ICTR in lower word of d514085tst.w %d5 # check if ICTR = 014086bne not_zr # if non-zero, go to second test14087#14088# Compute 10^(LEN-1)14089#14090fmov.s FONE(%pc),%fp2 # init fp2 to 1.014091mov.l %d4,%d0 # put LEN in d014092subq.l &1,%d0 # d0 = LEN -114093clr.l %d3 # clr table index14094l_loop:14095lsr.l &1,%d0 # shift next bit into carry14096bcc.b l_next # if zero, skip the mul14097fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)14098l_next:14099add.l &12,%d3 # inc d3 to next pwrten table entry14100tst.l %d0 # test if LEN is zero14101bne.b l_loop # if not, loop14102#14103# 10^LEN-1 is computed for this test and A14. If the input was14104# denormalized, check only the case in which YINT > 10^LEN.14105#14106tst.b BINDEC_FLG(%a6) # check if input was norm14107beq.b A13_con # if norm, continue with checking14108fabs.x %fp0 # take abs of YINT14109bra test_214110#14111# Compare abs(YINT) to 10^(LEN-1) and 10^LEN14112#14113A13_con:14114fabs.x %fp0 # take abs of YINT14115fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)14116fbge.w test_2 # if greater, do next test14117subq.l &1,%d6 # subtract 1 from ILOG14118mov.w &1,%d5 # set ICTR14119fmov.l &rm_mode*0x10,%fpcr # set rmode to RM14120fmul.s FTEN(%pc),%fp2 # compute 10^LEN14121bra.w A6_str # return to A6 and recompute YINT14122test_2:14123fmul.s FTEN(%pc),%fp2 # compute 10^LEN14124fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN14125fblt.w A14_st # if less, all is ok, go to A1414126fbgt.w fix_ex # if greater, fix and redo14127fdiv.s FTEN(%pc),%fp0 # if equal, divide by 1014128addq.l &1,%d6 # and inc ILOG14129bra.b A14_st # and continue elsewhere14130fix_ex:14131addq.l &1,%d6 # increment ILOG by 114132mov.w &1,%d5 # set ICTR14133fmov.l &rm_mode*0x10,%fpcr # set rmode to RM14134bra.w A6_str # return to A6 and recompute YINT14135#14136# Since ICTR <> 0, we have already been through one adjustment,14137# and shouldn't have another; this is to check if abs(YINT) = 10^LEN14138# 10^LEN is again computed using whatever table is in a1 since the14139# value calculated cannot be inexact.14140#14141not_zr:14142fmov.s FONE(%pc),%fp2 # init fp2 to 1.014143mov.l %d4,%d0 # put LEN in d014144clr.l %d3 # clr table index14145z_loop:14146lsr.l &1,%d0 # shift next bit into carry14147bcc.b z_next # if zero, skip the mul14148fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)14149z_next:14150add.l &12,%d3 # inc d3 to next pwrten table entry14151tst.l %d0 # test if LEN is zero14152bne.b z_loop # if not, loop14153fabs.x %fp0 # get abs(YINT)14154fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN14155fbneq.w A14_st # if not, skip this14156fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 1014157addq.l &1,%d6 # and inc ILOG by 114158addq.l &1,%d4 # and inc LEN14159fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN1416014161# A14. Convert the mantissa to bcd.14162# The binstr routine is used to convert the LEN digit14163# mantissa to bcd in memory. The input to binstr is14164# to be a fraction; i.e. (mantissa)/10^LEN and adjusted14165# such that the decimal point is to the left of bit 63.14166# The bcd digits are stored in the correct position in14167# the final string area in memory.14168#14169#14170# Register usage:14171# Input/Output14172# d0: x/LEN call to binstr - final is 014173# d1: x/014174# d2: x/ms 32-bits of mant of abs(YINT)14175# d3: x/ls 32-bits of mant of abs(YINT)14176# d4: LEN/Unchanged14177# d5: ICTR:LAMBDA/LAMBDA:ICTR14178# d6: ILOG14179# d7: k-factor/Unchanged14180# a0: pointer into memory for packed bcd string formation14181# /ptr to first mantissa byte in result string14182# a1: ptr to PTENxx array/Unchanged14183# a2: ptr to FP_SCR1(a6)/Unchanged14184# fp0: int portion of Y/abs(YINT) adjusted14185# fp1: 10^ISCALE/Unchanged14186# fp2: 10^LEN/Unchanged14187# F_SCR1:x/Work area for final result14188# F_SCR2:Y with original exponent/Unchanged14189# L_SCR1:original USER_FPCR/Unchanged14190# L_SCR2:first word of X packed/Unchanged1419114192A14_st:14193fmov.l &rz_mode*0x10,%fpcr # force rz for conversion14194fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN14195lea.l FP_SCR0(%a6),%a014196fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory14197mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d214198mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d314199clr.l 4(%a0) # zero word 2 of FP_RES14200clr.l 8(%a0) # zero word 3 of FP_RES14201mov.l (%a0),%d0 # move exponent to d014202swap %d0 # put exponent in lower word14203beq.b no_sft # if zero, don't shift14204sub.l &0x3ffd,%d0 # sub bias less 2 to make fract14205tst.l %d0 # check if > 114206bgt.b no_sft # if so, don't shift14207neg.l %d0 # make exp positive14208m_loop:14209lsr.l &1,%d2 # shift d2:d3 right, add 0s14210roxr.l &1,%d3 # the number of places14211dbf.w %d0,m_loop # given in d014212no_sft:14213tst.l %d2 # check for mantissa of zero14214bne.b no_zr # if not, go on14215tst.l %d3 # continue zero check14216beq.b zer_m # if zero, go directly to binstr14217no_zr:14218clr.l %d1 # put zero in d1 for addx14219add.l &0x00000080,%d3 # inc at bit 714220addx.l %d1,%d2 # continue inc14221and.l &0xffffff80,%d3 # strip off lsb not used by 88214222zer_m:14223mov.l %d4,%d0 # put LEN in d0 for binstr call14224addq.l &3,%a0 # a0 points to M16 byte in result14225bsr binstr # call binstr to convert mant142261422714228# A15. Convert the exponent to bcd.14229# As in A14 above, the exp is converted to bcd and the14230# digits are stored in the final string.14231#14232# Digits are stored in L_SCR1(a6) on return from BINDEC as:14233#14234# 32 16 15 014235# -----------------------------------------14236# | 0 | e3 | e2 | e1 | e4 | X | X | X |14237# -----------------------------------------14238#14239# And are moved into their proper places in FP_SCR0. If digit e414240# is non-zero, OPERR is signaled. In all cases, all 4 digits are14241# written as specified in the 881/882 manual for packed decimal.14242#14243# Register usage:14244# Input/Output14245# d0: x/LEN call to binstr - final is 014246# d1: x/scratch (0);shift count for final exponent packing14247# d2: x/ms 32-bits of exp fraction/scratch14248# d3: x/ls 32-bits of exp fraction14249# d4: LEN/Unchanged14250# d5: ICTR:LAMBDA/LAMBDA:ICTR14251# d6: ILOG14252# d7: k-factor/Unchanged14253# a0: ptr to result string/ptr to L_SCR1(a6)14254# a1: ptr to PTENxx array/Unchanged14255# a2: ptr to FP_SCR1(a6)/Unchanged14256# fp0: abs(YINT) adjusted/float(ILOG)14257# fp1: 10^ISCALE/Unchanged14258# fp2: 10^LEN/Unchanged14259# F_SCR1:Work area for final result/BCD result14260# F_SCR2:Y with original exponent/ILOG/10^414261# L_SCR1:original USER_FPCR/Exponent digits on return from binstr14262# L_SCR2:first word of X packed/Unchanged1426314264A15_st:14265tst.b BINDEC_FLG(%a6) # check for denorm14266beq.b not_denorm14267ftest.x %fp0 # test for zero14268fbeq.w den_zero # if zero, use k-factor or 493314269fmov.l %d6,%fp0 # float ILOG14270fabs.x %fp0 # get abs of ILOG14271bra.b convrt14272den_zero:14273tst.l %d7 # check sign of the k-factor14274blt.b use_ilog # if negative, use ILOG14275fmov.s F4933(%pc),%fp0 # force exponent to 493314276bra.b convrt # do it14277use_ilog:14278fmov.l %d6,%fp0 # float ILOG14279fabs.x %fp0 # get abs of ILOG14280bra.b convrt14281not_denorm:14282ftest.x %fp0 # test for zero14283fbneq.w not_zero # if zero, force exponent14284fmov.s FONE(%pc),%fp0 # force exponent to 114285bra.b convrt # do it14286not_zero:14287fmov.l %d6,%fp0 # float ILOG14288fabs.x %fp0 # get abs of ILOG14289convrt:14290fdiv.x 24(%a1),%fp0 # compute ILOG/10^414291fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory14292mov.l 4(%a2),%d2 # move word 2 to d214293mov.l 8(%a2),%d3 # move word 3 to d314294mov.w (%a2),%d0 # move exp to d014295beq.b x_loop_fin # if zero, skip the shift14296sub.w &0x3ffd,%d0 # subtract off bias14297neg.w %d0 # make exp positive14298x_loop:14299lsr.l &1,%d2 # shift d2:d3 right14300roxr.l &1,%d3 # the number of places14301dbf.w %d0,x_loop # given in d014302x_loop_fin:14303clr.l %d1 # put zero in d1 for addx14304add.l &0x00000080,%d3 # inc at bit 614305addx.l %d1,%d2 # continue inc14306and.l &0xffffff80,%d3 # strip off lsb not used by 88214307mov.l &4,%d0 # put 4 in d0 for binstr call14308lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits14309bsr binstr # call binstr to convert exp14310mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d014311mov.l &12,%d1 # use d1 for shift count14312lsr.l %d1,%d0 # shift d0 right by 1214313bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR014314lsr.l %d1,%d0 # shift d0 right by 1214315bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR014316tst.b %d0 # check if e4 is zero14317beq.b A16_st # if zero, skip rest14318or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR143191432014321# A16. Write sign bits to final string.14322# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).14323#14324# Register usage:14325# Input/Output14326# d0: x/scratch - final is x14327# d2: x/x14328# d3: x/x14329# d4: LEN/Unchanged14330# d5: ICTR:LAMBDA/LAMBDA:ICTR14331# d6: ILOG/ILOG adjusted14332# d7: k-factor/Unchanged14333# a0: ptr to L_SCR1(a6)/Unchanged14334# a1: ptr to PTENxx array/Unchanged14335# a2: ptr to FP_SCR1(a6)/Unchanged14336# fp0: float(ILOG)/Unchanged14337# fp1: 10^ISCALE/Unchanged14338# fp2: 10^LEN/Unchanged14339# F_SCR1:BCD result with correct signs14340# F_SCR2:ILOG/10^414341# L_SCR1:Exponent digits on return from binstr14342# L_SCR2:first word of X packed/Unchanged1434314344A16_st:14345clr.l %d0 # clr d0 for collection of signs14346and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR014347tst.l L_SCR2(%a6) # check sign of original mantissa14348bge.b mant_p # if pos, don't set SM14349mov.l &2,%d0 # move 2 in to d0 for SM14350mant_p:14351tst.l %d6 # check sign of ILOG14352bge.b wr_sgn # if pos, don't set SE14353addq.l &1,%d0 # set bit 0 in d0 for SE14354wr_sgn:14355bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR01435614357# Clean up and restore all registers used.1435814359fmov.l &0,%fpsr # clear possible inex2/ainex bits14360fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}14361movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}14362rts1436314364global PTENRN14365PTENRN:14366long 0x40020000,0xA0000000,0x00000000 # 10 ^ 114367long 0x40050000,0xC8000000,0x00000000 # 10 ^ 214368long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 414369long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 814370long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 1614371long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 3214372long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 6414373long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 12814374long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 25614375long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 51214376long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 102414377long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 204814378long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 40961437914380global PTENRP14381PTENRP:14382long 0x40020000,0xA0000000,0x00000000 # 10 ^ 114383long 0x40050000,0xC8000000,0x00000000 # 10 ^ 214384long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 414385long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 814386long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 1614387long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 3214388long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 6414389long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 12814390long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 25614391long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 51214392long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 102414393long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 204814394long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 40961439514396global PTENRM14397PTENRM:14398long 0x40020000,0xA0000000,0x00000000 # 10 ^ 114399long 0x40050000,0xC8000000,0x00000000 # 10 ^ 214400long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 414401long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 814402long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 1614403long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 3214404long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 6414405long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 12814406long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 25614407long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 51214408long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 102414409long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 204814410long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 40961441114412#########################################################################14413# binstr(): Converts a 64-bit binary integer to bcd. #14414# #14415# INPUT *************************************************************** #14416# d2:d3 = 64-bit binary integer #14417# d0 = desired length (LEN) #14418# a0 = pointer to start in memory for bcd characters #14419# (This pointer must point to byte 4 of the first #14420# lword of the packed decimal memory string.) #14421# #14422# OUTPUT ************************************************************** #14423# a0 = pointer to LEN bcd digits representing the 64-bit integer. #14424# #14425# ALGORITHM *********************************************************** #14426# The 64-bit binary is assumed to have a decimal point before #14427# bit 63. The fraction is multiplied by 10 using a mul by 2 #14428# shift and a mul by 8 shift. The bits shifted out of the #14429# msb form a decimal digit. This process is iterated until #14430# LEN digits are formed. #14431# #14432# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #14433# digit formed will be assumed the least significant. This is #14434# to force the first byte formed to have a 0 in the upper 4 bits. #14435# #14436# A2. Beginning of the loop: #14437# Copy the fraction in d2:d3 to d4:d5. #14438# #14439# A3. Multiply the fraction in d2:d3 by 8 using bit-field #14440# extracts and shifts. The three msbs from d2 will go into d1. #14441# #14442# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #14443# will be collected by the carry. #14444# #14445# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #14446# into d2:d3. D1 will contain the bcd digit formed. #14447# #14448# A6. Test d7. If zero, the digit formed is the ms digit. If non- #14449# zero, it is the ls digit. Put the digit in its place in the #14450# upper word of d0. If it is the ls digit, write the word #14451# from d0 to memory. #14452# #14453# A7. Decrement d6 (LEN counter) and repeat the loop until zero. #14454# #14455#########################################################################1445614457# Implementation Notes:14458#14459# The registers are used as follows:14460#14461# d0: LEN counter14462# d1: temp used to form the digit14463# d2: upper 32-bits of fraction for mul by 814464# d3: lower 32-bits of fraction for mul by 814465# d4: upper 32-bits of fraction for mul by 214466# d5: lower 32-bits of fraction for mul by 214467# d6: temp for bit-field extracts14468# d7: byte digit formation word;digit count {0,1}14469# a0: pointer into memory for packed bcd string formation14470#1447114472global binstr14473binstr:14474movm.l &0xff00,-(%sp) # {%d0-%d7}1447514476#14477# A1: Init d714478#14479mov.l &1,%d7 # init d7 for second digit14480subq.l &1,%d0 # for dbf d0 would have LEN+1 passes14481#14482# A2. Copy d2:d3 to d4:d5. Start loop.14483#14484loop:14485mov.l %d2,%d4 # copy the fraction before muls14486mov.l %d3,%d5 # to d4:d514487#14488# A3. Multiply d2:d3 by 8; extract msbs into d1.14489#14490bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d114491asl.l &3,%d2 # shift d2 left by 3 places14492bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d614493asl.l &3,%d3 # shift d3 left by 3 places14494or.l %d6,%d2 # or in msbs from d3 into d214495#14496# A4. Multiply d4:d5 by 2; add carry out to d1.14497#14498asl.l &1,%d5 # mul d5 by 214499roxl.l &1,%d4 # mul d4 by 214500swap %d6 # put 0 in d6 lower word14501addx.w %d6,%d1 # add in extend from mul by 214502#14503# A5. Add mul by 8 to mul by 2. D1 contains the digit formed.14504#14505add.l %d5,%d3 # add lower 32 bits14506nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)14507addx.l %d4,%d2 # add with extend upper 32 bits14508nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)14509addx.w %d6,%d1 # add in extend from add to d114510swap %d6 # with d6 = 0; put 0 in upper word14511#14512# A6. Test d7 and branch.14513#14514tst.w %d7 # if zero, store digit & to loop14515beq.b first_d # if non-zero, form byte & write14516sec_d:14517swap %d7 # bring first digit to word d7b14518asl.w &4,%d7 # first digit in upper 4 bits d7b14519add.w %d1,%d7 # add in ls digit to d7b14520mov.b %d7,(%a0)+ # store d7b byte in memory14521swap %d7 # put LEN counter in word d7a14522clr.w %d7 # set d7a to signal no digits done14523dbf.w %d0,loop # do loop some more!14524bra.b end_bstr # finished, so exit14525first_d:14526swap %d7 # put digit word in d7b14527mov.w %d1,%d7 # put new digit in d7b14528swap %d7 # put LEN counter in word d7a14529addq.w &1,%d7 # set d7a to signal first digit done14530dbf.w %d0,loop # do loop some more!14531swap %d7 # put last digit in string14532lsl.w &4,%d7 # move it to upper 4 bits14533mov.b %d7,(%a0)+ # store it in memory string14534#14535# Clean up and return with result in fp0.14536#14537end_bstr:14538movm.l (%sp)+,&0xff # {%d0-%d7}14539rts1454014541#########################################################################14542# XDEF **************************************************************** #14543# facc_in_b(): dmem_read_byte failed #14544# facc_in_w(): dmem_read_word failed #14545# facc_in_l(): dmem_read_long failed #14546# facc_in_d(): dmem_read of dbl prec failed #14547# facc_in_x(): dmem_read of ext prec failed #14548# #14549# facc_out_b(): dmem_write_byte failed #14550# facc_out_w(): dmem_write_word failed #14551# facc_out_l(): dmem_write_long failed #14552# facc_out_d(): dmem_write of dbl prec failed #14553# facc_out_x(): dmem_write of ext prec failed #14554# #14555# XREF **************************************************************** #14556# _real_access() - exit through access error handler #14557# #14558# INPUT *************************************************************** #14559# None #14560# #14561# OUTPUT ************************************************************** #14562# None #14563# #14564# ALGORITHM *********************************************************** #14565# Flow jumps here when an FP data fetch call gets an error #14566# result. This means the operating system wants an access error frame #14567# made out of the current exception stack frame. #14568# So, we first call restore() which makes sure that any updated #14569# -(an)+ register gets returned to its pre-exception value and then #14570# we change the stack to an access error stack frame. #14571# #14572#########################################################################1457314574facc_in_b:14575movq.l &0x1,%d0 # one byte14576bsr.w restore # fix An1457714578mov.w &0x0121,EXC_VOFF(%a6) # set FSLW14579bra.w facc_finish1458014581facc_in_w:14582movq.l &0x2,%d0 # two bytes14583bsr.w restore # fix An1458414585mov.w &0x0141,EXC_VOFF(%a6) # set FSLW14586bra.b facc_finish1458714588facc_in_l:14589movq.l &0x4,%d0 # four bytes14590bsr.w restore # fix An1459114592mov.w &0x0101,EXC_VOFF(%a6) # set FSLW14593bra.b facc_finish1459414595facc_in_d:14596movq.l &0x8,%d0 # eight bytes14597bsr.w restore # fix An1459814599mov.w &0x0161,EXC_VOFF(%a6) # set FSLW14600bra.b facc_finish1460114602facc_in_x:14603movq.l &0xc,%d0 # twelve bytes14604bsr.w restore # fix An1460514606mov.w &0x0161,EXC_VOFF(%a6) # set FSLW14607bra.b facc_finish1460814609################################################################1461014611facc_out_b:14612movq.l &0x1,%d0 # one byte14613bsr.w restore # restore An1461414615mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW14616bra.b facc_finish1461714618facc_out_w:14619movq.l &0x2,%d0 # two bytes14620bsr.w restore # restore An1462114622mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW14623bra.b facc_finish1462414625facc_out_l:14626movq.l &0x4,%d0 # four bytes14627bsr.w restore # restore An1462814629mov.w &0x0081,EXC_VOFF(%a6) # set FSLW14630bra.b facc_finish1463114632facc_out_d:14633movq.l &0x8,%d0 # eight bytes14634bsr.w restore # restore An1463514636mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW14637bra.b facc_finish1463814639facc_out_x:14640mov.l &0xc,%d0 # twelve bytes14641bsr.w restore # restore An1464214643mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW1464414645# here's where we actually create the access error frame from the14646# current exception stack frame.14647facc_finish:14648mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC1464914650fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp114651fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs14652movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a11465314654unlk %a61465514656mov.l (%sp),-(%sp) # store SR, hi(PC)14657mov.l 0x8(%sp),0x4(%sp) # store lo(PC)14658mov.l 0xc(%sp),0x8(%sp) # store EA14659mov.l &0x00000001,0xc(%sp) # store FSLW14660mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)14661mov.w &0x4008,0x6(%sp) # store voff1466214663btst &0x5,(%sp) # supervisor or user mode?14664beq.b facc_out2 # user14665bset &0x2,0xd(%sp) # set supervisor TM bit1466614667facc_out2:14668bra.l _real_access1466914670##################################################################1467114672# if the effective addressing mode was predecrement or postincrement,14673# the emulation has already changed its value to the correct post-14674# instruction value. but since we're exiting to the access error14675# handler, then AN must be returned to its pre-instruction value.14676# we do that here.14677restore:14678mov.b EXC_OPWORD+0x1(%a6),%d114679andi.b &0x38,%d1 # extract opmode14680cmpi.b %d1,&0x18 # postinc?14681beq.w rest_inc14682cmpi.b %d1,&0x20 # predec?14683beq.w rest_dec14684rts1468514686rest_inc:14687mov.b EXC_OPWORD+0x1(%a6),%d114688andi.w &0x0007,%d1 # fetch An1468914690mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d114691jmp (tbl_rest_inc.b,%pc,%d1.w*1)1469214693tbl_rest_inc:14694short ri_a0 - tbl_rest_inc14695short ri_a1 - tbl_rest_inc14696short ri_a2 - tbl_rest_inc14697short ri_a3 - tbl_rest_inc14698short ri_a4 - tbl_rest_inc14699short ri_a5 - tbl_rest_inc14700short ri_a6 - tbl_rest_inc14701short ri_a7 - tbl_rest_inc1470214703ri_a0:14704sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a014705rts14706ri_a1:14707sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a114708rts14709ri_a2:14710sub.l %d0,%a2 # fix a214711rts14712ri_a3:14713sub.l %d0,%a3 # fix a314714rts14715ri_a4:14716sub.l %d0,%a4 # fix a414717rts14718ri_a5:14719sub.l %d0,%a5 # fix a514720rts14721ri_a6:14722sub.l %d0,(%a6) # fix stacked a614723rts14724# if it's a fmove out instruction, we don't have to fix a714725# because we hadn't changed it yet. if it's an opclass two14726# instruction (data moved in) and the exception was in supervisor14727# mode, then also also wasn't updated. if it was user mode, then14728# restore the correct a7 which is in the USP currently.14729ri_a7:14730cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?14731bne.b ri_a7_done # out1473214733btst &0x5,EXC_SR(%a6) # user or supervisor?14734bne.b ri_a7_done # supervisor14735movc %usp,%a0 # restore USP14736sub.l %d0,%a014737movc %a0,%usp14738ri_a7_done:14739rts1474014741# need to invert adjustment value if the <ea> was predec14742rest_dec:14743neg.l %d014744bra.b rest_inc147451474614747