/* SPDX-License-Identifier: GPL-2.0 */1.file "wm_shrx.S"2/*---------------------------------------------------------------------------+3| wm_shrx.S |4| |5| 64 bit right shift functions |6| |7| Copyright (C) 1992,1995 |8| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |9| Australia. E-mail [email protected] |10| |11| Call from C as: |12| unsigned FPU_shrx(void *arg1, unsigned arg2) |13| and |14| unsigned FPU_shrxs(void *arg1, unsigned arg2) |15| |16+---------------------------------------------------------------------------*/1718#include "fpu_emu.h"1920.text21/*---------------------------------------------------------------------------+22| unsigned FPU_shrx(void *arg1, unsigned arg2) |23| |24| Extended shift right function. |25| Fastest for small shifts. |26| Shifts the 64 bit quantity pointed to by the first arg (arg1) |27| right by the number of bits specified by the second arg (arg2). |28| Forms a 96 bit quantity from the 64 bit arg and eax: |29| [ 64 bit arg ][ eax ] |30| shift right ---------> |31| The eax register is initialized to 0 before the shifting. |32| Results returned in the 64 bit arg and eax. |33+---------------------------------------------------------------------------*/3435SYM_FUNC_START(FPU_shrx)36push %ebp37movl %esp,%ebp38pushl %esi39movl PARAM2,%ecx40movl PARAM1,%esi41cmpl $32,%ecx /* shrd only works for 0..31 bits */42jnc L_more_than_314344/* less than 32 bits */45pushl %ebx46movl (%esi),%ebx /* lsl */47movl 4(%esi),%edx /* msl */48xorl %eax,%eax /* extension */49shrd %cl,%ebx,%eax50shrd %cl,%edx,%ebx51shr %cl,%edx52movl %ebx,(%esi)53movl %edx,4(%esi)54popl %ebx55popl %esi56leave57RET5859L_more_than_31:60cmpl $64,%ecx61jnc L_more_than_636263subb $32,%cl64movl (%esi),%eax /* lsl */65movl 4(%esi),%edx /* msl */66shrd %cl,%edx,%eax67shr %cl,%edx68movl %edx,(%esi)69movl $0,4(%esi)70popl %esi71leave72RET7374L_more_than_63:75cmpl $96,%ecx76jnc L_more_than_957778subb $64,%cl79movl 4(%esi),%eax /* msl */80shr %cl,%eax81xorl %edx,%edx82movl %edx,(%esi)83movl %edx,4(%esi)84popl %esi85leave86RET8788L_more_than_95:89xorl %eax,%eax90movl %eax,(%esi)91movl %eax,4(%esi)92popl %esi93leave94RET95SYM_FUNC_END(FPU_shrx)969798/*---------------------------------------------------------------------------+99| unsigned FPU_shrxs(void *arg1, unsigned arg2) |100| |101| Extended shift right function (optimized for small floating point |102| integers). |103| Shifts the 64 bit quantity pointed to by the first arg (arg1) |104| right by the number of bits specified by the second arg (arg2). |105| Forms a 96 bit quantity from the 64 bit arg and eax: |106| [ 64 bit arg ][ eax ] |107| shift right ---------> |108| The eax register is initialized to 0 before the shifting. |109| The lower 8 bits of eax are lost and replaced by a flag which is |110| set (to 0x01) if any bit, apart from the first one, is set in the |111| part which has been shifted out of the arg. |112| Results returned in the 64 bit arg and eax. |113+---------------------------------------------------------------------------*/114SYM_FUNC_START(FPU_shrxs)115push %ebp116movl %esp,%ebp117pushl %esi118pushl %ebx119movl PARAM2,%ecx120movl PARAM1,%esi121cmpl $64,%ecx /* shrd only works for 0..31 bits */122jnc Ls_more_than_63123124cmpl $32,%ecx /* shrd only works for 0..31 bits */125jc Ls_less_than_32126127/* We got here without jumps by assuming that the most common requirement128is for small integers */129/* Shift by [32..63] bits */130subb $32,%cl131movl (%esi),%eax /* lsl */132movl 4(%esi),%edx /* msl */133xorl %ebx,%ebx134shrd %cl,%eax,%ebx135shrd %cl,%edx,%eax136shr %cl,%edx137orl %ebx,%ebx /* test these 32 bits */138setne %bl139test $0x7fffffff,%eax /* and 31 bits here */140setne %bh141orw %bx,%bx /* Any of the 63 bit set ? */142setne %al143movl %edx,(%esi)144movl $0,4(%esi)145popl %ebx146popl %esi147leave148RET149150/* Shift by [0..31] bits */151Ls_less_than_32:152movl (%esi),%ebx /* lsl */153movl 4(%esi),%edx /* msl */154xorl %eax,%eax /* extension */155shrd %cl,%ebx,%eax156shrd %cl,%edx,%ebx157shr %cl,%edx158test $0x7fffffff,%eax /* only need to look at eax here */159setne %al160movl %ebx,(%esi)161movl %edx,4(%esi)162popl %ebx163popl %esi164leave165RET166167/* Shift by [64..95] bits */168Ls_more_than_63:169cmpl $96,%ecx170jnc Ls_more_than_95171172subb $64,%cl173movl (%esi),%ebx /* lsl */174movl 4(%esi),%eax /* msl */175xorl %edx,%edx /* extension */176shrd %cl,%ebx,%edx177shrd %cl,%eax,%ebx178shr %cl,%eax179orl %ebx,%edx180setne %bl181test $0x7fffffff,%eax /* only need to look at eax here */182setne %bh183orw %bx,%bx184setne %al185xorl %edx,%edx186movl %edx,(%esi) /* set to zero */187movl %edx,4(%esi) /* set to zero */188popl %ebx189popl %esi190leave191RET192193Ls_more_than_95:194/* Shift by [96..inf) bits */195xorl %eax,%eax196movl (%esi),%ebx197orl 4(%esi),%ebx198setne %al199xorl %ebx,%ebx200movl %ebx,(%esi)201movl %ebx,4(%esi)202popl %ebx203popl %esi204leave205RET206SYM_FUNC_END(FPU_shrxs)207208209