Path: blob/21.2-virgl/src/gallium/auxiliary/tgsi/tgsi_util.c
4565 views
/**************************************************************************1*2* Copyright 2007 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include "util/u_debug.h"28#include "pipe/p_shader_tokens.h"29#include "tgsi_info.h"30#include "tgsi_parse.h"31#include "tgsi_util.h"32#include "tgsi_exec.h"33#include "util/bitscan.h"3435union pointer_hack36{37void *pointer;38uint64_t uint64;39};4041void *42tgsi_align_128bit(void *unaligned)43{44union pointer_hack ph;4546ph.uint64 = 0;47ph.pointer = unaligned;48ph.uint64 = (ph.uint64 + 15) & ~15;49return ph.pointer;50}5152unsigned53tgsi_util_get_src_register_swizzle(const struct tgsi_src_register *reg,54unsigned component)55{56switch (component) {57case TGSI_CHAN_X:58return reg->SwizzleX;59case TGSI_CHAN_Y:60return reg->SwizzleY;61case TGSI_CHAN_Z:62return reg->SwizzleZ;63case TGSI_CHAN_W:64return reg->SwizzleW;65default:66assert(0);67}68return 0;69}707172unsigned73tgsi_util_get_full_src_register_swizzle(74const struct tgsi_full_src_register *reg,75unsigned component)76{77return tgsi_util_get_src_register_swizzle(®->Register, component);78}798081void82tgsi_util_set_src_register_swizzle(struct tgsi_src_register *reg,83unsigned swizzle,84unsigned component)85{86switch (component) {87case 0:88reg->SwizzleX = swizzle;89break;90case 1:91reg->SwizzleY = swizzle;92break;93case 2:94reg->SwizzleZ = swizzle;95break;96case 3:97reg->SwizzleW = swizzle;98break;99default:100assert(0);101}102}103104105/**106* Determine which channels of the specificed src register are effectively107* used by this instruction.108*/109unsigned110tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,111unsigned src_idx)112{113const struct tgsi_full_src_register *src = &inst->Src[src_idx];114unsigned write_mask = inst->Dst[0].Register.WriteMask;115unsigned read_mask;116unsigned usage_mask;117unsigned chan;118119switch (inst->Instruction.Opcode) {120case TGSI_OPCODE_IF:121case TGSI_OPCODE_UIF:122case TGSI_OPCODE_EMIT:123case TGSI_OPCODE_ENDPRIM:124case TGSI_OPCODE_RCP:125case TGSI_OPCODE_RSQ:126case TGSI_OPCODE_SQRT:127case TGSI_OPCODE_EX2:128case TGSI_OPCODE_LG2:129case TGSI_OPCODE_SIN:130case TGSI_OPCODE_COS:131case TGSI_OPCODE_POW: /* reads src0.x and src1.x */132case TGSI_OPCODE_UP2H:133case TGSI_OPCODE_UP2US:134case TGSI_OPCODE_UP4B:135case TGSI_OPCODE_UP4UB:136case TGSI_OPCODE_MEMBAR:137case TGSI_OPCODE_BALLOT:138read_mask = TGSI_WRITEMASK_X;139break;140141case TGSI_OPCODE_DP2:142case TGSI_OPCODE_PK2H:143case TGSI_OPCODE_PK2US:144case TGSI_OPCODE_DFRACEXP:145case TGSI_OPCODE_F2D:146case TGSI_OPCODE_I2D:147case TGSI_OPCODE_U2D:148case TGSI_OPCODE_F2U64:149case TGSI_OPCODE_F2I64:150case TGSI_OPCODE_U2I64:151case TGSI_OPCODE_I2I64:152case TGSI_OPCODE_TXQS: /* bindless handle possible */153case TGSI_OPCODE_RESQ: /* bindless handle possible */154read_mask = TGSI_WRITEMASK_XY;155break;156157case TGSI_OPCODE_TXQ:158if (src_idx == 0)159read_mask = TGSI_WRITEMASK_X;160else161read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */162break;163164case TGSI_OPCODE_DP3:165read_mask = TGSI_WRITEMASK_XYZ;166break;167168case TGSI_OPCODE_DSEQ:169case TGSI_OPCODE_DSNE:170case TGSI_OPCODE_DSLT:171case TGSI_OPCODE_DSGE:172case TGSI_OPCODE_DP4:173case TGSI_OPCODE_PK4B:174case TGSI_OPCODE_PK4UB:175case TGSI_OPCODE_D2F:176case TGSI_OPCODE_D2I:177case TGSI_OPCODE_D2U:178case TGSI_OPCODE_I2F:179case TGSI_OPCODE_U2F:180case TGSI_OPCODE_U64SEQ:181case TGSI_OPCODE_U64SNE:182case TGSI_OPCODE_U64SLT:183case TGSI_OPCODE_U64SGE:184case TGSI_OPCODE_U642F:185case TGSI_OPCODE_I64SLT:186case TGSI_OPCODE_I64SGE:187case TGSI_OPCODE_I642F:188read_mask = TGSI_WRITEMASK_XYZW;189break;190191case TGSI_OPCODE_LIT:192read_mask = write_mask & TGSI_WRITEMASK_YZ ?193TGSI_WRITEMASK_XY | TGSI_WRITEMASK_W : 0;194break;195196case TGSI_OPCODE_EXP:197case TGSI_OPCODE_LOG:198read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;199break;200201case TGSI_OPCODE_DST:202if (src_idx == 0)203read_mask = TGSI_WRITEMASK_YZ;204else205read_mask = TGSI_WRITEMASK_YW;206break;207208case TGSI_OPCODE_DLDEXP:209if (src_idx == 0) {210read_mask = write_mask;211} else {212read_mask =213(write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0) |214(write_mask & TGSI_WRITEMASK_ZW ? TGSI_WRITEMASK_Z : 0);215}216break;217218case TGSI_OPCODE_READ_INVOC:219if (src_idx == 0)220read_mask = write_mask;221else222read_mask = TGSI_WRITEMASK_X;223break;224225case TGSI_OPCODE_FBFETCH:226read_mask = 0; /* not a real register read */227break;228229case TGSI_OPCODE_TEX:230case TGSI_OPCODE_TEX_LZ:231case TGSI_OPCODE_TXF_LZ:232case TGSI_OPCODE_TXF:233case TGSI_OPCODE_TXB:234case TGSI_OPCODE_TXL:235case TGSI_OPCODE_TXP:236case TGSI_OPCODE_TXD:237case TGSI_OPCODE_TEX2:238case TGSI_OPCODE_TXB2:239case TGSI_OPCODE_TXL2:240case TGSI_OPCODE_LODQ:241case TGSI_OPCODE_TG4: {242unsigned dim_layer =243tgsi_util_get_texture_coord_dim(inst->Texture.Texture);244unsigned dim_layer_shadow, dim;245246/* Add shadow. */247if (tgsi_is_shadow_target(inst->Texture.Texture)) {248dim_layer_shadow = dim_layer + 1;249if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D)250dim_layer_shadow = 3;251} else {252dim_layer_shadow = dim_layer;253}254255/* Remove layer. */256if (tgsi_is_array_sampler(inst->Texture.Texture))257dim = dim_layer - 1;258else259dim = dim_layer;260261read_mask = TGSI_WRITEMASK_XY; /* bindless handle in the last operand */262263switch (src_idx) {264case 0:265if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ)266read_mask = u_bit_consecutive(0, dim);267else268read_mask = u_bit_consecutive(0, dim_layer_shadow) & 0xf;269270if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D)271read_mask &= ~TGSI_WRITEMASK_Y;272273if (inst->Instruction.Opcode == TGSI_OPCODE_TXF ||274inst->Instruction.Opcode == TGSI_OPCODE_TXB ||275inst->Instruction.Opcode == TGSI_OPCODE_TXL ||276inst->Instruction.Opcode == TGSI_OPCODE_TXP)277read_mask |= TGSI_WRITEMASK_W;278break;279280case 1:281if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)282read_mask = u_bit_consecutive(0, dim);283else if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||284inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||285inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||286inst->Instruction.Opcode == TGSI_OPCODE_TG4)287read_mask = TGSI_WRITEMASK_X;288break;289290case 2:291if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)292read_mask = u_bit_consecutive(0, dim);293break;294}295break;296}297298case TGSI_OPCODE_LOAD:299if (src_idx == 0) {300read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */301} else {302unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture);303read_mask = u_bit_consecutive(0, dim);304}305break;306307case TGSI_OPCODE_STORE:308if (src_idx == 0) {309unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture);310read_mask = u_bit_consecutive(0, dim);311} else {312read_mask = TGSI_WRITEMASK_XYZW;313}314break;315316case TGSI_OPCODE_ATOMUADD:317case TGSI_OPCODE_ATOMXCHG:318case TGSI_OPCODE_ATOMCAS:319case TGSI_OPCODE_ATOMAND:320case TGSI_OPCODE_ATOMOR:321case TGSI_OPCODE_ATOMXOR:322case TGSI_OPCODE_ATOMUMIN:323case TGSI_OPCODE_ATOMUMAX:324case TGSI_OPCODE_ATOMIMIN:325case TGSI_OPCODE_ATOMIMAX:326case TGSI_OPCODE_ATOMFADD:327if (src_idx == 0) {328read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */329} else if (src_idx == 1) {330unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture);331read_mask = u_bit_consecutive(0, dim);332} else {333read_mask = TGSI_WRITEMASK_XYZW;334}335break;336337case TGSI_OPCODE_INTERP_CENTROID:338case TGSI_OPCODE_INTERP_SAMPLE:339case TGSI_OPCODE_INTERP_OFFSET:340if (src_idx == 0)341read_mask = write_mask;342else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET)343read_mask = TGSI_WRITEMASK_XY; /* offset */344else345read_mask = TGSI_WRITEMASK_X; /* sample */346break;347348default:349if (tgsi_get_opcode_info(inst->Instruction.Opcode)->output_mode ==350TGSI_OUTPUT_COMPONENTWISE)351read_mask = write_mask;352else353read_mask = TGSI_WRITEMASK_XYZW; /* assume all channels are read */354break;355}356357usage_mask = 0;358for (chan = 0; chan < 4; ++chan) {359if (read_mask & (1 << chan)) {360usage_mask |= 1 << tgsi_util_get_full_src_register_swizzle(src, chan);361}362}363364return usage_mask;365}366367/**368* Convert a tgsi_ind_register into a tgsi_src_register369*/370struct tgsi_src_register371tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg)372{373struct tgsi_src_register src = { 0 };374375src.File = reg->File;376src.Index = reg->Index;377src.SwizzleX = reg->Swizzle;378src.SwizzleY = reg->Swizzle;379src.SwizzleZ = reg->Swizzle;380src.SwizzleW = reg->Swizzle;381382return src;383}384385/**386* Return the dimension of the texture coordinates (layer included for array387* textures), as well as the location of the shadow reference value or the388* sample index.389*/390int391tgsi_util_get_texture_coord_dim(enum tgsi_texture_type tgsi_tex)392{393/*394* Depending on the texture target, (src0.xyzw, src1.x) is interpreted395* differently:396*397* (s, X, X, X, X), for BUFFER398* (s, X, X, X, X), for 1D399* (s, t, X, X, X), for 2D, RECT400* (s, t, r, X, X), for 3D, CUBE401*402* (s, layer, X, X, X), for 1D_ARRAY403* (s, t, layer, X, X), for 2D_ARRAY404* (s, t, r, layer, X), for CUBE_ARRAY405*406* (s, X, shadow, X, X), for SHADOW1D407* (s, t, shadow, X, X), for SHADOW2D, SHADOWRECT408* (s, t, r, shadow, X), for SHADOWCUBE409*410* (s, layer, shadow, X, X), for SHADOW1D_ARRAY411* (s, t, layer, shadow, X), for SHADOW2D_ARRAY412* (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY413*414* (s, t, sample, X, X), for 2D_MSAA415* (s, t, layer, sample, X), for 2D_ARRAY_MSAA416*/417switch (tgsi_tex) {418case TGSI_TEXTURE_BUFFER:419case TGSI_TEXTURE_1D:420case TGSI_TEXTURE_SHADOW1D:421return 1;422case TGSI_TEXTURE_2D:423case TGSI_TEXTURE_RECT:424case TGSI_TEXTURE_1D_ARRAY:425case TGSI_TEXTURE_SHADOW2D:426case TGSI_TEXTURE_SHADOWRECT:427case TGSI_TEXTURE_SHADOW1D_ARRAY:428case TGSI_TEXTURE_2D_MSAA:429return 2;430case TGSI_TEXTURE_3D:431case TGSI_TEXTURE_CUBE:432case TGSI_TEXTURE_2D_ARRAY:433case TGSI_TEXTURE_SHADOWCUBE:434case TGSI_TEXTURE_SHADOW2D_ARRAY:435case TGSI_TEXTURE_2D_ARRAY_MSAA:436return 3;437case TGSI_TEXTURE_CUBE_ARRAY:438case TGSI_TEXTURE_SHADOWCUBE_ARRAY:439return 4;440default:441assert(!"unknown texture target");442return 0;443}444}445446447/**448* Given a TGSI_TEXTURE_x target, return register component where the449* shadow reference/distance coordinate is found. Typically, components450* 0 and 1 are the (s,t) texcoords and component 2 or 3 hold the shadow451* reference value. But if we return 4, it means the reference value is452* found in the 0th component of the second coordinate argument to the453* TEX2 instruction.454*/455int456tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex)457{458switch (tgsi_tex) {459case TGSI_TEXTURE_SHADOW1D:460case TGSI_TEXTURE_SHADOW2D:461case TGSI_TEXTURE_SHADOWRECT:462case TGSI_TEXTURE_SHADOW1D_ARRAY:463return 2;464case TGSI_TEXTURE_SHADOWCUBE:465case TGSI_TEXTURE_SHADOW2D_ARRAY:466case TGSI_TEXTURE_2D_MSAA:467case TGSI_TEXTURE_2D_ARRAY_MSAA:468return 3;469case TGSI_TEXTURE_SHADOWCUBE_ARRAY:470return 4;471default:472/* no shadow nor sample */473return -1;474}475}476477478bool479tgsi_is_shadow_target(enum tgsi_texture_type target)480{481switch (target) {482case TGSI_TEXTURE_SHADOW1D:483case TGSI_TEXTURE_SHADOW2D:484case TGSI_TEXTURE_SHADOWRECT:485case TGSI_TEXTURE_SHADOW1D_ARRAY:486case TGSI_TEXTURE_SHADOW2D_ARRAY:487case TGSI_TEXTURE_SHADOWCUBE:488case TGSI_TEXTURE_SHADOWCUBE_ARRAY:489return TRUE;490default:491return FALSE;492}493}494495496