Path: blob/21.2-virgl/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
4561 views
/**************************************************************************1*2* Copyright (C) 1999-2005 Brian Paul All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included12* in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS15* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR20* OTHER DEALINGS IN THE SOFTWARE.21*22**************************************************************************/2324#ifndef _RTASM_X86SSE_H_25#define _RTASM_X86SSE_H_2627#include "pipe/p_compiler.h"28#include "pipe/p_config.h"2930#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)3132/* It is up to the caller to ensure that instructions issued are33* suitable for the host cpu. There are no checks made in this module34* for mmx/sse/sse2 support on the cpu.35*/36struct x86_reg {37unsigned file:2;38unsigned idx:4;39unsigned mod:2; /* mod_REG if this is just a register */40int disp:24; /* only +/- 23bits of offset - should be enough... */41};4243#define X86_MMX 144#define X86_MMX2 245#define X86_SSE 446#define X86_SSE2 847#define X86_SSE3 0x1048#define X86_SSE4_1 0x204950struct x86_function {51unsigned caps;52unsigned size;53unsigned char *store;54unsigned char *csr;5556unsigned stack_offset:16;57unsigned need_emms:8;58int x87_stack:8;5960unsigned char error_overflow[4];61};6263enum x86_reg_file {64file_REG32,65file_MMX,66file_XMM,67file_x8768};6970/* Values for mod field of modr/m byte71*/72enum x86_reg_mod {73mod_INDIRECT,74mod_DISP8,75mod_DISP32,76mod_REG77};7879enum x86_reg_name {80reg_AX,81reg_CX,82reg_DX,83reg_BX,84reg_SP,85reg_BP,86reg_SI,87reg_DI,88reg_R8,89reg_R9,90reg_R10,91reg_R11,92reg_R12,93reg_R13,94reg_R14,95reg_R1596};979899enum x86_cc {100cc_O, /* overflow */101cc_NO, /* not overflow */102cc_NAE, /* not above or equal / carry */103cc_AE, /* above or equal / not carry */104cc_E, /* equal / zero */105cc_NE /* not equal / not zero */106};107108enum sse_cc {109cc_Equal,110cc_LessThan,111cc_LessThanEqual,112cc_Unordered,113cc_NotEqual,114cc_NotLessThan,115cc_NotLessThanEqual,116cc_Ordered117};118119#define cc_Z cc_E120#define cc_NZ cc_NE121122123/** generic pointer to function */124typedef void (*x86_func)(void);125126127/* Begin/end/retrieve function creation:128*/129130enum x86_target131{132X86_32,133X86_64_STD_ABI,134X86_64_WIN64_ABI135};136137/* make this read a member of x86_function if target != host is desired */138static inline enum x86_target x86_target( struct x86_function* p )139{140#ifdef PIPE_ARCH_X86141return X86_32;142#elif (defined(PIPE_OS_CYGWIN) || defined(PIPE_OS_WINDOWS)) && defined(PIPE_ARCH_X86_64)143return X86_64_WIN64_ABI;144#elif defined(PIPE_ARCH_X86_64)145return X86_64_STD_ABI;146#endif147}148149static inline unsigned x86_target_caps( struct x86_function* p )150{151return p->caps;152}153154void x86_init_func( struct x86_function *p );155void x86_init_func_size( struct x86_function *p, unsigned code_size );156void x86_release_func( struct x86_function *p );157x86_func x86_get_func( struct x86_function *p );158159/* Debugging:160*/161void x86_print_reg( struct x86_reg reg );162163164/* Create and manipulate registers and regmem values:165*/166struct x86_reg x86_make_reg( enum x86_reg_file file,167enum x86_reg_name idx );168169struct x86_reg x86_make_disp( struct x86_reg reg,170int disp );171172struct x86_reg x86_deref( struct x86_reg reg );173174struct x86_reg x86_get_base_reg( struct x86_reg reg );175176177/* Labels, jumps and fixup:178*/179int x86_get_label( struct x86_function *p );180181void x64_rexw(struct x86_function *p);182183void x86_jcc( struct x86_function *p,184enum x86_cc cc,185int label );186187int x86_jcc_forward( struct x86_function *p,188enum x86_cc cc );189190int x86_jmp_forward( struct x86_function *p);191192int x86_call_forward( struct x86_function *p);193194void x86_fixup_fwd_jump( struct x86_function *p,195int fixup );196197void x86_jmp( struct x86_function *p, int label );198199/* void x86_call( struct x86_function *p, void (*label)() ); */200void x86_call( struct x86_function *p, struct x86_reg reg);201202void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );203void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm );204void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm );205void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm );206void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm );207void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm );208void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm );209210211/* Macro for sse_shufps() and sse2_pshufd():212*/213#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))214#define SHUF_NOOP RSW(0,1,2,3)215#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3)216217void mmx_emms( struct x86_function *p );218void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );219void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );220void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );221void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );222223void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );224void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );225void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );226void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );227void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );228void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );229void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );230231void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );232void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );233void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );234void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );235void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );236237void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );238void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );239void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );240void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );241void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,242unsigned char shuf );243void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,244unsigned char shuf );245void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,246unsigned char shuf );247void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );248void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );249250void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );251void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );252void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );253void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );254255void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );256void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );257void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );258259void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );260void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );261void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );262263void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );264void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );265266void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );267268void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );269void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );270void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );271272void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);273void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);274void sse_prefetch1( struct x86_function *p, struct x86_reg ptr);275276void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);277278void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );279void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );280void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );281void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );282void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );283void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );284void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,285enum sse_cc cc );286void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );287void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );288void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );289void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );290void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );291void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );292void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );293void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );294void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );295void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );296void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );297void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );298void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );299void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );300void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );301void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );302void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );303void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,304unsigned char shuf );305void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );306void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );307void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );308void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);309310void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );311void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );312void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc );313void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );314void x86_dec( struct x86_function *p, struct x86_reg reg );315void x86_inc( struct x86_function *p, struct x86_reg reg );316void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );317void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );318void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );319void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );320void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );321void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );322void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );323void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );324void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );325void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );326void x86_mul( struct x86_function *p, struct x86_reg src );327void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );328void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );329void x86_pop( struct x86_function *p, struct x86_reg reg );330void x86_push( struct x86_function *p, struct x86_reg reg );331void x86_push_imm32( struct x86_function *p, int imm );332void x86_ret( struct x86_function *p );333void x86_retw( struct x86_function *p, unsigned short imm );334void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );335void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );336void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );337void x86_sahf( struct x86_function *p );338void x86_div( struct x86_function *p, struct x86_reg src );339void x86_bswap( struct x86_function *p, struct x86_reg src );340void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );341void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );342void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );343344void x86_cdecl_caller_push_regs( struct x86_function *p );345void x86_cdecl_caller_pop_regs( struct x86_function *p );346347void x87_assert_stack_empty( struct x86_function *p );348349void x87_f2xm1( struct x86_function *p );350void x87_fabs( struct x86_function *p );351void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );352void x87_faddp( struct x86_function *p, struct x86_reg dst );353void x87_fchs( struct x86_function *p );354void x87_fclex( struct x86_function *p );355void x87_fcmovb( struct x86_function *p, struct x86_reg src );356void x87_fcmovbe( struct x86_function *p, struct x86_reg src );357void x87_fcmove( struct x86_function *p, struct x86_reg src );358void x87_fcmovnb( struct x86_function *p, struct x86_reg src );359void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );360void x87_fcmovne( struct x86_function *p, struct x86_reg src );361void x87_fcom( struct x86_function *p, struct x86_reg dst );362void x87_fcomi( struct x86_function *p, struct x86_reg dst );363void x87_fcomip( struct x86_function *p, struct x86_reg dst );364void x87_fcomp( struct x86_function *p, struct x86_reg dst );365void x87_fcos( struct x86_function *p );366void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );367void x87_fdivp( struct x86_function *p, struct x86_reg dst );368void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );369void x87_fdivrp( struct x86_function *p, struct x86_reg dst );370void x87_fild( struct x86_function *p, struct x86_reg arg );371void x87_fist( struct x86_function *p, struct x86_reg dst );372void x87_fistp( struct x86_function *p, struct x86_reg dst );373void x87_fld( struct x86_function *p, struct x86_reg arg );374void x87_fld1( struct x86_function *p );375void x87_fldcw( struct x86_function *p, struct x86_reg arg );376void x87_fldl2e( struct x86_function *p );377void x87_fldln2( struct x86_function *p );378void x87_fldz( struct x86_function *p );379void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );380void x87_fmulp( struct x86_function *p, struct x86_reg dst );381void x87_fnclex( struct x86_function *p );382void x87_fprndint( struct x86_function *p );383void x87_fpop( struct x86_function *p );384void x87_fscale( struct x86_function *p );385void x87_fsin( struct x86_function *p );386void x87_fsincos( struct x86_function *p );387void x87_fsqrt( struct x86_function *p );388void x87_fst( struct x86_function *p, struct x86_reg dst );389void x87_fstp( struct x86_function *p, struct x86_reg dst );390void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );391void x87_fsubp( struct x86_function *p, struct x86_reg dst );392void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );393void x87_fsubrp( struct x86_function *p, struct x86_reg dst );394void x87_ftst( struct x86_function *p );395void x87_fxch( struct x86_function *p, struct x86_reg dst );396void x87_fxtract( struct x86_function *p );397void x87_fyl2x( struct x86_function *p );398void x87_fyl2xp1( struct x86_function *p );399void x87_fwait( struct x86_function *p );400void x87_fnstcw( struct x86_function *p, struct x86_reg dst );401void x87_fnstsw( struct x86_function *p, struct x86_reg dst );402void x87_fucompp( struct x86_function *p );403void x87_fucomp( struct x86_function *p, struct x86_reg arg );404void x87_fucom( struct x86_function *p, struct x86_reg arg );405406407408/* Retrieve a reference to one of the function arguments, taking into409* account any push/pop activity. Note - doesn't track explicit410* manipulation of ESP by other instructions.411*/412struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );413414#endif415#endif416417418