.file "wm_shrx.S"1/*---------------------------------------------------------------------------+2| wm_shrx.S |3| |4| 64 bit right shift functions |5| |6| Copyright (C) 1992,1995 |7| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |8| Australia. E-mail [email protected] |9| |10| Call from C as: |11| unsigned FPU_shrx(void *arg1, unsigned arg2) |12| and |13| unsigned FPU_shrxs(void *arg1, unsigned arg2) |14| |15+---------------------------------------------------------------------------*/1617#include "fpu_emu.h"1819.text20/*---------------------------------------------------------------------------+21| unsigned FPU_shrx(void *arg1, unsigned arg2) |22| |23| Extended shift right function. |24| Fastest for small shifts. |25| Shifts the 64 bit quantity pointed to by the first arg (arg1) |26| right by the number of bits specified by the second arg (arg2). |27| Forms a 96 bit quantity from the 64 bit arg and eax: |28| [ 64 bit arg ][ eax ] |29| shift right ---------> |30| The eax register is initialized to 0 before the shifting. |31| Results returned in the 64 bit arg and eax. |32+---------------------------------------------------------------------------*/3334ENTRY(FPU_shrx)35push %ebp36movl %esp,%ebp37pushl %esi38movl PARAM2,%ecx39movl PARAM1,%esi40cmpl $32,%ecx /* shrd only works for 0..31 bits */41jnc L_more_than_314243/* less than 32 bits */44pushl %ebx45movl (%esi),%ebx /* lsl */46movl 4(%esi),%edx /* msl */47xorl %eax,%eax /* extension */48shrd %cl,%ebx,%eax49shrd %cl,%edx,%ebx50shr %cl,%edx51movl %ebx,(%esi)52movl %edx,4(%esi)53popl %ebx54popl %esi55leave56ret5758L_more_than_31:59cmpl $64,%ecx60jnc L_more_than_636162subb $32,%cl63movl (%esi),%eax /* lsl */64movl 4(%esi),%edx /* msl */65shrd %cl,%edx,%eax66shr %cl,%edx67movl %edx,(%esi)68movl $0,4(%esi)69popl %esi70leave71ret7273L_more_than_63:74cmpl $96,%ecx75jnc L_more_than_957677subb $64,%cl78movl 4(%esi),%eax /* msl */79shr %cl,%eax80xorl %edx,%edx81movl %edx,(%esi)82movl %edx,4(%esi)83popl %esi84leave85ret8687L_more_than_95:88xorl %eax,%eax89movl %eax,(%esi)90movl %eax,4(%esi)91popl %esi92leave93ret949596/*---------------------------------------------------------------------------+97| unsigned FPU_shrxs(void *arg1, unsigned arg2) |98| |99| Extended shift right function (optimized for small floating point |100| integers). |101| Shifts the 64 bit quantity pointed to by the first arg (arg1) |102| right by the number of bits specified by the second arg (arg2). |103| Forms a 96 bit quantity from the 64 bit arg and eax: |104| [ 64 bit arg ][ eax ] |105| shift right ---------> |106| The eax register is initialized to 0 before the shifting. |107| The lower 8 bits of eax are lost and replaced by a flag which is |108| set (to 0x01) if any bit, apart from the first one, is set in the |109| part which has been shifted out of the arg. |110| Results returned in the 64 bit arg and eax. |111+---------------------------------------------------------------------------*/112ENTRY(FPU_shrxs)113push %ebp114movl %esp,%ebp115pushl %esi116pushl %ebx117movl PARAM2,%ecx118movl PARAM1,%esi119cmpl $64,%ecx /* shrd only works for 0..31 bits */120jnc Ls_more_than_63121122cmpl $32,%ecx /* shrd only works for 0..31 bits */123jc Ls_less_than_32124125/* We got here without jumps by assuming that the most common requirement126is for small integers */127/* Shift by [32..63] bits */128subb $32,%cl129movl (%esi),%eax /* lsl */130movl 4(%esi),%edx /* msl */131xorl %ebx,%ebx132shrd %cl,%eax,%ebx133shrd %cl,%edx,%eax134shr %cl,%edx135orl %ebx,%ebx /* test these 32 bits */136setne %bl137test $0x7fffffff,%eax /* and 31 bits here */138setne %bh139orw %bx,%bx /* Any of the 63 bit set ? */140setne %al141movl %edx,(%esi)142movl $0,4(%esi)143popl %ebx144popl %esi145leave146ret147148/* Shift by [0..31] bits */149Ls_less_than_32:150movl (%esi),%ebx /* lsl */151movl 4(%esi),%edx /* msl */152xorl %eax,%eax /* extension */153shrd %cl,%ebx,%eax154shrd %cl,%edx,%ebx155shr %cl,%edx156test $0x7fffffff,%eax /* only need to look at eax here */157setne %al158movl %ebx,(%esi)159movl %edx,4(%esi)160popl %ebx161popl %esi162leave163ret164165/* Shift by [64..95] bits */166Ls_more_than_63:167cmpl $96,%ecx168jnc Ls_more_than_95169170subb $64,%cl171movl (%esi),%ebx /* lsl */172movl 4(%esi),%eax /* msl */173xorl %edx,%edx /* extension */174shrd %cl,%ebx,%edx175shrd %cl,%eax,%ebx176shr %cl,%eax177orl %ebx,%edx178setne %bl179test $0x7fffffff,%eax /* only need to look at eax here */180setne %bh181orw %bx,%bx182setne %al183xorl %edx,%edx184movl %edx,(%esi) /* set to zero */185movl %edx,4(%esi) /* set to zero */186popl %ebx187popl %esi188leave189ret190191Ls_more_than_95:192/* Shift by [96..inf) bits */193xorl %eax,%eax194movl (%esi),%ebx195orl 4(%esi),%ebx196setne %al197xorl %ebx,%ebx198movl %ebx,(%esi)199movl %ebx,4(%esi)200popl %ebx201popl %esi202leave203ret204205206