Path: blob/21.2-virgl/src/gallium/auxiliary/tgsi/tgsi_exec.c
4565 views
/**************************************************************************1*2* Copyright 2007-2008 VMware, Inc.3* All Rights Reserved.4* Copyright 2009-2010 VMware, Inc. All rights Reserved.5*6* Permission is hereby granted, free of charge, to any person obtaining a7* copy of this software and associated documentation files (the8* "Software"), to deal in the Software without restriction, including9* without limitation the rights to use, copy, modify, merge, publish,10* distribute, sub license, and/or sell copies of the Software, and to11* permit persons to whom the Software is furnished to do so, subject to12* the following conditions:13*14* The above copyright notice and this permission notice (including the15* next paragraph) shall be included in all copies or substantial portions16* of the Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS19* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF20* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.21* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR22* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,23* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE24* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.25*26**************************************************************************/2728/**29* TGSI interpreter/executor.30*31* Flow control information:32*33* Since we operate on 'quads' (4 pixels or 4 vertices in parallel)34* flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special35* care since a condition may be true for some quad components but false36* for other components.37*38* We basically execute all statements (even if they're in the part of39* an IF/ELSE clause that's "not taken") and use a special mask to40* control writing to destination registers. This is the ExecMask.41* See store_dest().42*43* The ExecMask is computed from three other masks (CondMask, LoopMask and44* ContMask) which are controlled by the flow control instructions (namely:45* (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).46*47*48* Authors:49* Michal Krol50* Brian Paul51*/5253#include "pipe/p_compiler.h"54#include "pipe/p_state.h"55#include "pipe/p_shader_tokens.h"56#include "tgsi/tgsi_dump.h"57#include "tgsi/tgsi_parse.h"58#include "tgsi/tgsi_util.h"59#include "tgsi_exec.h"60#include "util/compiler.h"61#include "util/half_float.h"62#include "util/u_memory.h"63#include "util/u_math.h"64#include "util/rounding.h"656667#define DEBUG_EXECUTION 0686970#define TILE_TOP_LEFT 071#define TILE_TOP_RIGHT 172#define TILE_BOTTOM_LEFT 273#define TILE_BOTTOM_RIGHT 37475union tgsi_double_channel {76double d[TGSI_QUAD_SIZE];77unsigned u[TGSI_QUAD_SIZE][2];78uint64_t u64[TGSI_QUAD_SIZE];79int64_t i64[TGSI_QUAD_SIZE];80} ALIGN16;8182struct ALIGN16 tgsi_double_vector {83union tgsi_double_channel xy;84union tgsi_double_channel zw;85};8687static void88micro_abs(union tgsi_exec_channel *dst,89const union tgsi_exec_channel *src)90{91dst->f[0] = fabsf(src->f[0]);92dst->f[1] = fabsf(src->f[1]);93dst->f[2] = fabsf(src->f[2]);94dst->f[3] = fabsf(src->f[3]);95}9697static void98micro_arl(union tgsi_exec_channel *dst,99const union tgsi_exec_channel *src)100{101dst->i[0] = (int)floorf(src->f[0]);102dst->i[1] = (int)floorf(src->f[1]);103dst->i[2] = (int)floorf(src->f[2]);104dst->i[3] = (int)floorf(src->f[3]);105}106107static void108micro_arr(union tgsi_exec_channel *dst,109const union tgsi_exec_channel *src)110{111dst->i[0] = (int)floorf(src->f[0] + 0.5f);112dst->i[1] = (int)floorf(src->f[1] + 0.5f);113dst->i[2] = (int)floorf(src->f[2] + 0.5f);114dst->i[3] = (int)floorf(src->f[3] + 0.5f);115}116117static void118micro_ceil(union tgsi_exec_channel *dst,119const union tgsi_exec_channel *src)120{121dst->f[0] = ceilf(src->f[0]);122dst->f[1] = ceilf(src->f[1]);123dst->f[2] = ceilf(src->f[2]);124dst->f[3] = ceilf(src->f[3]);125}126127static void128micro_cmp(union tgsi_exec_channel *dst,129const union tgsi_exec_channel *src0,130const union tgsi_exec_channel *src1,131const union tgsi_exec_channel *src2)132{133dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];134dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];135dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];136dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];137}138139static void140micro_cos(union tgsi_exec_channel *dst,141const union tgsi_exec_channel *src)142{143dst->f[0] = cosf(src->f[0]);144dst->f[1] = cosf(src->f[1]);145dst->f[2] = cosf(src->f[2]);146dst->f[3] = cosf(src->f[3]);147}148149static void150micro_d2f(union tgsi_exec_channel *dst,151const union tgsi_double_channel *src)152{153dst->f[0] = (float)src->d[0];154dst->f[1] = (float)src->d[1];155dst->f[2] = (float)src->d[2];156dst->f[3] = (float)src->d[3];157}158159static void160micro_d2i(union tgsi_exec_channel *dst,161const union tgsi_double_channel *src)162{163dst->i[0] = (int)src->d[0];164dst->i[1] = (int)src->d[1];165dst->i[2] = (int)src->d[2];166dst->i[3] = (int)src->d[3];167}168169static void170micro_d2u(union tgsi_exec_channel *dst,171const union tgsi_double_channel *src)172{173dst->u[0] = (unsigned)src->d[0];174dst->u[1] = (unsigned)src->d[1];175dst->u[2] = (unsigned)src->d[2];176dst->u[3] = (unsigned)src->d[3];177}178static void179micro_dabs(union tgsi_double_channel *dst,180const union tgsi_double_channel *src)181{182dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];183dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];184dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];185dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];186}187188static void189micro_dadd(union tgsi_double_channel *dst,190const union tgsi_double_channel *src)191{192dst->d[0] = src[0].d[0] + src[1].d[0];193dst->d[1] = src[0].d[1] + src[1].d[1];194dst->d[2] = src[0].d[2] + src[1].d[2];195dst->d[3] = src[0].d[3] + src[1].d[3];196}197198static void199micro_ddiv(union tgsi_double_channel *dst,200const union tgsi_double_channel *src)201{202dst->d[0] = src[0].d[0] / src[1].d[0];203dst->d[1] = src[0].d[1] / src[1].d[1];204dst->d[2] = src[0].d[2] / src[1].d[2];205dst->d[3] = src[0].d[3] / src[1].d[3];206}207208static void209micro_ddx(union tgsi_exec_channel *dst,210const union tgsi_exec_channel *src)211{212dst->f[0] =213dst->f[1] =214dst->f[2] =215dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];216}217218static void219micro_ddx_fine(union tgsi_exec_channel *dst,220const union tgsi_exec_channel *src)221{222dst->f[0] =223dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];224dst->f[2] =225dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];226}227228229static void230micro_ddy(union tgsi_exec_channel *dst,231const union tgsi_exec_channel *src)232{233dst->f[0] =234dst->f[1] =235dst->f[2] =236dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];237}238239static void240micro_ddy_fine(union tgsi_exec_channel *dst,241const union tgsi_exec_channel *src)242{243dst->f[0] =244dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];245dst->f[1] =246dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];247}248249static void250micro_dmul(union tgsi_double_channel *dst,251const union tgsi_double_channel *src)252{253dst->d[0] = src[0].d[0] * src[1].d[0];254dst->d[1] = src[0].d[1] * src[1].d[1];255dst->d[2] = src[0].d[2] * src[1].d[2];256dst->d[3] = src[0].d[3] * src[1].d[3];257}258259static void260micro_dmax(union tgsi_double_channel *dst,261const union tgsi_double_channel *src)262{263dst->d[0] = fmax(src[0].d[0], src[1].d[0]);264dst->d[1] = fmax(src[0].d[1], src[1].d[1]);265dst->d[2] = fmax(src[0].d[2], src[1].d[2]);266dst->d[3] = fmax(src[0].d[3], src[1].d[3]);267}268269static void270micro_dmin(union tgsi_double_channel *dst,271const union tgsi_double_channel *src)272{273dst->d[0] = fmin(src[0].d[0], src[1].d[0]);274dst->d[1] = fmin(src[0].d[1], src[1].d[1]);275dst->d[2] = fmin(src[0].d[2], src[1].d[2]);276dst->d[3] = fmin(src[0].d[3], src[1].d[3]);277}278279static void280micro_dneg(union tgsi_double_channel *dst,281const union tgsi_double_channel *src)282{283dst->d[0] = -src->d[0];284dst->d[1] = -src->d[1];285dst->d[2] = -src->d[2];286dst->d[3] = -src->d[3];287}288289static void290micro_dslt(union tgsi_double_channel *dst,291const union tgsi_double_channel *src)292{293dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;294dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;295dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;296dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;297}298299static void300micro_dsne(union tgsi_double_channel *dst,301const union tgsi_double_channel *src)302{303dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;304dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;305dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;306dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;307}308309static void310micro_dsge(union tgsi_double_channel *dst,311const union tgsi_double_channel *src)312{313dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;314dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;315dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;316dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;317}318319static void320micro_dseq(union tgsi_double_channel *dst,321const union tgsi_double_channel *src)322{323dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;324dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;325dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;326dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;327}328329static void330micro_drcp(union tgsi_double_channel *dst,331const union tgsi_double_channel *src)332{333dst->d[0] = 1.0 / src->d[0];334dst->d[1] = 1.0 / src->d[1];335dst->d[2] = 1.0 / src->d[2];336dst->d[3] = 1.0 / src->d[3];337}338339static void340micro_dsqrt(union tgsi_double_channel *dst,341const union tgsi_double_channel *src)342{343dst->d[0] = sqrt(src->d[0]);344dst->d[1] = sqrt(src->d[1]);345dst->d[2] = sqrt(src->d[2]);346dst->d[3] = sqrt(src->d[3]);347}348349static void350micro_drsq(union tgsi_double_channel *dst,351const union tgsi_double_channel *src)352{353dst->d[0] = 1.0 / sqrt(src->d[0]);354dst->d[1] = 1.0 / sqrt(src->d[1]);355dst->d[2] = 1.0 / sqrt(src->d[2]);356dst->d[3] = 1.0 / sqrt(src->d[3]);357}358359static void360micro_dmad(union tgsi_double_channel *dst,361const union tgsi_double_channel *src)362{363dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];364dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];365dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];366dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];367}368369static void370micro_dfrac(union tgsi_double_channel *dst,371const union tgsi_double_channel *src)372{373dst->d[0] = src->d[0] - floor(src->d[0]);374dst->d[1] = src->d[1] - floor(src->d[1]);375dst->d[2] = src->d[2] - floor(src->d[2]);376dst->d[3] = src->d[3] - floor(src->d[3]);377}378379static void380micro_dflr(union tgsi_double_channel *dst,381const union tgsi_double_channel *src)382{383dst->d[0] = floor(src->d[0]);384dst->d[1] = floor(src->d[1]);385dst->d[2] = floor(src->d[2]);386dst->d[3] = floor(src->d[3]);387}388389static void390micro_dldexp(union tgsi_double_channel *dst,391const union tgsi_double_channel *src0,392union tgsi_exec_channel *src1)393{394dst->d[0] = ldexp(src0->d[0], src1->i[0]);395dst->d[1] = ldexp(src0->d[1], src1->i[1]);396dst->d[2] = ldexp(src0->d[2], src1->i[2]);397dst->d[3] = ldexp(src0->d[3], src1->i[3]);398}399400static void401micro_dfracexp(union tgsi_double_channel *dst,402union tgsi_exec_channel *dst_exp,403const union tgsi_double_channel *src)404{405dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);406dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);407dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);408dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);409}410411static void412micro_exp2(union tgsi_exec_channel *dst,413const union tgsi_exec_channel *src)414{415#if DEBUG416/* Inf is okay for this instruction, so clamp it to silence assertions. */417uint i;418union tgsi_exec_channel clamped;419420for (i = 0; i < 4; i++) {421if (src->f[i] > 127.99999f) {422clamped.f[i] = 127.99999f;423} else if (src->f[i] < -126.99999f) {424clamped.f[i] = -126.99999f;425} else {426clamped.f[i] = src->f[i];427}428}429src = &clamped;430#endif /* DEBUG */431432dst->f[0] = powf(2.0f, src->f[0]);433dst->f[1] = powf(2.0f, src->f[1]);434dst->f[2] = powf(2.0f, src->f[2]);435dst->f[3] = powf(2.0f, src->f[3]);436}437438static void439micro_f2d(union tgsi_double_channel *dst,440const union tgsi_exec_channel *src)441{442dst->d[0] = (double)src->f[0];443dst->d[1] = (double)src->f[1];444dst->d[2] = (double)src->f[2];445dst->d[3] = (double)src->f[3];446}447448static void449micro_flr(union tgsi_exec_channel *dst,450const union tgsi_exec_channel *src)451{452dst->f[0] = floorf(src->f[0]);453dst->f[1] = floorf(src->f[1]);454dst->f[2] = floorf(src->f[2]);455dst->f[3] = floorf(src->f[3]);456}457458static void459micro_frc(union tgsi_exec_channel *dst,460const union tgsi_exec_channel *src)461{462dst->f[0] = src->f[0] - floorf(src->f[0]);463dst->f[1] = src->f[1] - floorf(src->f[1]);464dst->f[2] = src->f[2] - floorf(src->f[2]);465dst->f[3] = src->f[3] - floorf(src->f[3]);466}467468static void469micro_i2d(union tgsi_double_channel *dst,470const union tgsi_exec_channel *src)471{472dst->d[0] = (double)src->i[0];473dst->d[1] = (double)src->i[1];474dst->d[2] = (double)src->i[2];475dst->d[3] = (double)src->i[3];476}477478static void479micro_iabs(union tgsi_exec_channel *dst,480const union tgsi_exec_channel *src)481{482dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];483dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];484dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];485dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];486}487488static void489micro_ineg(union tgsi_exec_channel *dst,490const union tgsi_exec_channel *src)491{492dst->i[0] = -src->i[0];493dst->i[1] = -src->i[1];494dst->i[2] = -src->i[2];495dst->i[3] = -src->i[3];496}497498static void499micro_lg2(union tgsi_exec_channel *dst,500const union tgsi_exec_channel *src)501{502dst->f[0] = logf(src->f[0]) * 1.442695f;503dst->f[1] = logf(src->f[1]) * 1.442695f;504dst->f[2] = logf(src->f[2]) * 1.442695f;505dst->f[3] = logf(src->f[3]) * 1.442695f;506}507508static void509micro_lrp(union tgsi_exec_channel *dst,510const union tgsi_exec_channel *src0,511const union tgsi_exec_channel *src1,512const union tgsi_exec_channel *src2)513{514dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];515dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];516dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];517dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];518}519520static void521micro_mad(union tgsi_exec_channel *dst,522const union tgsi_exec_channel *src0,523const union tgsi_exec_channel *src1,524const union tgsi_exec_channel *src2)525{526dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];527dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];528dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];529dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];530}531532static void533micro_mov(union tgsi_exec_channel *dst,534const union tgsi_exec_channel *src)535{536dst->u[0] = src->u[0];537dst->u[1] = src->u[1];538dst->u[2] = src->u[2];539dst->u[3] = src->u[3];540}541542static void543micro_rcp(union tgsi_exec_channel *dst,544const union tgsi_exec_channel *src)545{546#if 0 /* for debugging */547assert(src->f[0] != 0.0f);548assert(src->f[1] != 0.0f);549assert(src->f[2] != 0.0f);550assert(src->f[3] != 0.0f);551#endif552dst->f[0] = 1.0f / src->f[0];553dst->f[1] = 1.0f / src->f[1];554dst->f[2] = 1.0f / src->f[2];555dst->f[3] = 1.0f / src->f[3];556}557558static void559micro_rnd(union tgsi_exec_channel *dst,560const union tgsi_exec_channel *src)561{562dst->f[0] = _mesa_roundevenf(src->f[0]);563dst->f[1] = _mesa_roundevenf(src->f[1]);564dst->f[2] = _mesa_roundevenf(src->f[2]);565dst->f[3] = _mesa_roundevenf(src->f[3]);566}567568static void569micro_rsq(union tgsi_exec_channel *dst,570const union tgsi_exec_channel *src)571{572#if 0 /* for debugging */573assert(src->f[0] != 0.0f);574assert(src->f[1] != 0.0f);575assert(src->f[2] != 0.0f);576assert(src->f[3] != 0.0f);577#endif578dst->f[0] = 1.0f / sqrtf(src->f[0]);579dst->f[1] = 1.0f / sqrtf(src->f[1]);580dst->f[2] = 1.0f / sqrtf(src->f[2]);581dst->f[3] = 1.0f / sqrtf(src->f[3]);582}583584static void585micro_sqrt(union tgsi_exec_channel *dst,586const union tgsi_exec_channel *src)587{588dst->f[0] = sqrtf(src->f[0]);589dst->f[1] = sqrtf(src->f[1]);590dst->f[2] = sqrtf(src->f[2]);591dst->f[3] = sqrtf(src->f[3]);592}593594static void595micro_seq(union tgsi_exec_channel *dst,596const union tgsi_exec_channel *src0,597const union tgsi_exec_channel *src1)598{599dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;600dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;601dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;602dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;603}604605static void606micro_sge(union tgsi_exec_channel *dst,607const union tgsi_exec_channel *src0,608const union tgsi_exec_channel *src1)609{610dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;611dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;612dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;613dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;614}615616static void617micro_sgn(union tgsi_exec_channel *dst,618const union tgsi_exec_channel *src)619{620dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;621dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;622dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;623dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;624}625626static void627micro_isgn(union tgsi_exec_channel *dst,628const union tgsi_exec_channel *src)629{630dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;631dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;632dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;633dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;634}635636static void637micro_sgt(union tgsi_exec_channel *dst,638const union tgsi_exec_channel *src0,639const union tgsi_exec_channel *src1)640{641dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;642dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;643dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;644dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;645}646647static void648micro_sin(union tgsi_exec_channel *dst,649const union tgsi_exec_channel *src)650{651dst->f[0] = sinf(src->f[0]);652dst->f[1] = sinf(src->f[1]);653dst->f[2] = sinf(src->f[2]);654dst->f[3] = sinf(src->f[3]);655}656657static void658micro_sle(union tgsi_exec_channel *dst,659const union tgsi_exec_channel *src0,660const union tgsi_exec_channel *src1)661{662dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;663dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;664dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;665dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;666}667668static void669micro_slt(union tgsi_exec_channel *dst,670const union tgsi_exec_channel *src0,671const union tgsi_exec_channel *src1)672{673dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;674dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;675dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;676dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;677}678679static void680micro_sne(union tgsi_exec_channel *dst,681const union tgsi_exec_channel *src0,682const union tgsi_exec_channel *src1)683{684dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;685dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;686dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;687dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;688}689690static void691micro_trunc(union tgsi_exec_channel *dst,692const union tgsi_exec_channel *src)693{694dst->f[0] = truncf(src->f[0]);695dst->f[1] = truncf(src->f[1]);696dst->f[2] = truncf(src->f[2]);697dst->f[3] = truncf(src->f[3]);698}699700static void701micro_u2d(union tgsi_double_channel *dst,702const union tgsi_exec_channel *src)703{704dst->d[0] = (double)src->u[0];705dst->d[1] = (double)src->u[1];706dst->d[2] = (double)src->u[2];707dst->d[3] = (double)src->u[3];708}709710static void711micro_i64abs(union tgsi_double_channel *dst,712const union tgsi_double_channel *src)713{714dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];715dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];716dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];717dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];718}719720static void721micro_i64sgn(union tgsi_double_channel *dst,722const union tgsi_double_channel *src)723{724dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;725dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;726dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;727dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;728}729730static void731micro_i64neg(union tgsi_double_channel *dst,732const union tgsi_double_channel *src)733{734dst->i64[0] = -src->i64[0];735dst->i64[1] = -src->i64[1];736dst->i64[2] = -src->i64[2];737dst->i64[3] = -src->i64[3];738}739740static void741micro_u64seq(union tgsi_double_channel *dst,742const union tgsi_double_channel *src)743{744dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;745dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;746dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;747dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;748}749750static void751micro_u64sne(union tgsi_double_channel *dst,752const union tgsi_double_channel *src)753{754dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;755dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;756dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;757dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;758}759760static void761micro_i64slt(union tgsi_double_channel *dst,762const union tgsi_double_channel *src)763{764dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;765dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;766dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;767dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;768}769770static void771micro_u64slt(union tgsi_double_channel *dst,772const union tgsi_double_channel *src)773{774dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;775dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;776dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;777dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;778}779780static void781micro_i64sge(union tgsi_double_channel *dst,782const union tgsi_double_channel *src)783{784dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;785dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;786dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;787dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;788}789790static void791micro_u64sge(union tgsi_double_channel *dst,792const union tgsi_double_channel *src)793{794dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;795dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;796dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;797dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;798}799800static void801micro_u64max(union tgsi_double_channel *dst,802const union tgsi_double_channel *src)803{804dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];805dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];806dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];807dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];808}809810static void811micro_i64max(union tgsi_double_channel *dst,812const union tgsi_double_channel *src)813{814dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];815dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];816dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];817dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];818}819820static void821micro_u64min(union tgsi_double_channel *dst,822const union tgsi_double_channel *src)823{824dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];825dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];826dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];827dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];828}829830static void831micro_i64min(union tgsi_double_channel *dst,832const union tgsi_double_channel *src)833{834dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];835dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];836dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];837dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];838}839840static void841micro_u64add(union tgsi_double_channel *dst,842const union tgsi_double_channel *src)843{844dst->u64[0] = src[0].u64[0] + src[1].u64[0];845dst->u64[1] = src[0].u64[1] + src[1].u64[1];846dst->u64[2] = src[0].u64[2] + src[1].u64[2];847dst->u64[3] = src[0].u64[3] + src[1].u64[3];848}849850static void851micro_u64mul(union tgsi_double_channel *dst,852const union tgsi_double_channel *src)853{854dst->u64[0] = src[0].u64[0] * src[1].u64[0];855dst->u64[1] = src[0].u64[1] * src[1].u64[1];856dst->u64[2] = src[0].u64[2] * src[1].u64[2];857dst->u64[3] = src[0].u64[3] * src[1].u64[3];858}859860static void861micro_u64div(union tgsi_double_channel *dst,862const union tgsi_double_channel *src)863{864dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;865dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;866dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;867dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;868}869870static void871micro_i64div(union tgsi_double_channel *dst,872const union tgsi_double_channel *src)873{874dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;875dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;876dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;877dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;878}879880static void881micro_u64mod(union tgsi_double_channel *dst,882const union tgsi_double_channel *src)883{884dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;885dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;886dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;887dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;888}889890static void891micro_i64mod(union tgsi_double_channel *dst,892const union tgsi_double_channel *src)893{894dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;895dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;896dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;897dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;898}899900static void901micro_u64shl(union tgsi_double_channel *dst,902const union tgsi_double_channel *src0,903union tgsi_exec_channel *src1)904{905unsigned masked_count;906masked_count = src1->u[0] & 0x3f;907dst->u64[0] = src0->u64[0] << masked_count;908masked_count = src1->u[1] & 0x3f;909dst->u64[1] = src0->u64[1] << masked_count;910masked_count = src1->u[2] & 0x3f;911dst->u64[2] = src0->u64[2] << masked_count;912masked_count = src1->u[3] & 0x3f;913dst->u64[3] = src0->u64[3] << masked_count;914}915916static void917micro_i64shr(union tgsi_double_channel *dst,918const union tgsi_double_channel *src0,919union tgsi_exec_channel *src1)920{921unsigned masked_count;922masked_count = src1->u[0] & 0x3f;923dst->i64[0] = src0->i64[0] >> masked_count;924masked_count = src1->u[1] & 0x3f;925dst->i64[1] = src0->i64[1] >> masked_count;926masked_count = src1->u[2] & 0x3f;927dst->i64[2] = src0->i64[2] >> masked_count;928masked_count = src1->u[3] & 0x3f;929dst->i64[3] = src0->i64[3] >> masked_count;930}931932static void933micro_u64shr(union tgsi_double_channel *dst,934const union tgsi_double_channel *src0,935union tgsi_exec_channel *src1)936{937unsigned masked_count;938masked_count = src1->u[0] & 0x3f;939dst->u64[0] = src0->u64[0] >> masked_count;940masked_count = src1->u[1] & 0x3f;941dst->u64[1] = src0->u64[1] >> masked_count;942masked_count = src1->u[2] & 0x3f;943dst->u64[2] = src0->u64[2] >> masked_count;944masked_count = src1->u[3] & 0x3f;945dst->u64[3] = src0->u64[3] >> masked_count;946}947948enum tgsi_exec_datatype {949TGSI_EXEC_DATA_FLOAT,950TGSI_EXEC_DATA_INT,951TGSI_EXEC_DATA_UINT,952TGSI_EXEC_DATA_DOUBLE,953TGSI_EXEC_DATA_INT64,954TGSI_EXEC_DATA_UINT64,955};956957/** The execution mask depends on the conditional mask and the loop mask */958#define UPDATE_EXEC_MASK(MACH) \959MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask960961962static const union tgsi_exec_channel ZeroVec =963{ { 0.0, 0.0, 0.0, 0.0 } };964965static const union tgsi_exec_channel OneVec = {966{1.0f, 1.0f, 1.0f, 1.0f}967};968969static const union tgsi_exec_channel P128Vec = {970{128.0f, 128.0f, 128.0f, 128.0f}971};972973static const union tgsi_exec_channel M128Vec = {974{-128.0f, -128.0f, -128.0f, -128.0f}975};976977978/**979* Assert that none of the float values in 'chan' are infinite or NaN.980* NaN and Inf may occur normally during program execution and should981* not lead to crashes, etc. But when debugging, it's helpful to catch982* them.983*/984static inline void985check_inf_or_nan(const union tgsi_exec_channel *chan)986{987assert(!util_is_inf_or_nan((chan)->f[0]));988assert(!util_is_inf_or_nan((chan)->f[1]));989assert(!util_is_inf_or_nan((chan)->f[2]));990assert(!util_is_inf_or_nan((chan)->f[3]));991}992993994#ifdef DEBUG995static void996print_chan(const char *msg, const union tgsi_exec_channel *chan)997{998debug_printf("%s = {%f, %f, %f, %f}\n",999msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);1000}1001#endif100210031004#ifdef DEBUG1005static void1006print_temp(const struct tgsi_exec_machine *mach, uint index)1007{1008const struct tgsi_exec_vector *tmp = &mach->Temps[index];1009int i;1010debug_printf("Temp[%u] =\n", index);1011for (i = 0; i < 4; i++) {1012debug_printf(" %c: { %f, %f, %f, %f }\n",1013"XYZW"[i],1014tmp->xyzw[i].f[0],1015tmp->xyzw[i].f[1],1016tmp->xyzw[i].f[2],1017tmp->xyzw[i].f[3]);1018}1019}1020#endif102110221023void1024tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,1025unsigned num_bufs,1026const void **bufs,1027const unsigned *buf_sizes)1028{1029unsigned i;10301031for (i = 0; i < num_bufs; i++) {1032mach->Consts[i] = bufs[i];1033mach->ConstsSize[i] = buf_sizes[i];1034}1035}10361037/**1038* Initialize machine state by expanding tokens to full instructions,1039* allocating temporary storage, setting up constants, etc.1040* After this, we can call tgsi_exec_machine_run() many times.1041*/1042void1043tgsi_exec_machine_bind_shader(1044struct tgsi_exec_machine *mach,1045const struct tgsi_token *tokens,1046struct tgsi_sampler *sampler,1047struct tgsi_image *image,1048struct tgsi_buffer *buffer)1049{1050uint k;1051struct tgsi_parse_context parse;1052struct tgsi_full_instruction *instructions;1053struct tgsi_full_declaration *declarations;1054uint maxInstructions = 10, numInstructions = 0;1055uint maxDeclarations = 10, numDeclarations = 0;10561057#if 01058tgsi_dump(tokens, 0);1059#endif10601061mach->Tokens = tokens;1062mach->Sampler = sampler;1063mach->Image = image;1064mach->Buffer = buffer;10651066if (!tokens) {1067/* unbind and free all */1068FREE(mach->Declarations);1069mach->Declarations = NULL;1070mach->NumDeclarations = 0;10711072FREE(mach->Instructions);1073mach->Instructions = NULL;1074mach->NumInstructions = 0;10751076return;1077}10781079k = tgsi_parse_init (&parse, mach->Tokens);1080if (k != TGSI_PARSE_OK) {1081debug_printf( "Problem parsing!\n" );1082return;1083}10841085mach->ImmLimit = 0;1086mach->NumOutputs = 0;10871088for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)1089mach->SysSemanticToIndex[k] = -1;10901091if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&1092!mach->UsedGeometryShader) {1093struct tgsi_exec_vector *inputs;1094struct tgsi_exec_vector *outputs;10951096inputs = align_malloc(sizeof(struct tgsi_exec_vector) *1097TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,109816);10991100if (!inputs)1101return;11021103outputs = align_malloc(sizeof(struct tgsi_exec_vector) *1104TGSI_MAX_TOTAL_VERTICES, 16);11051106if (!outputs) {1107align_free(inputs);1108return;1109}11101111align_free(mach->Inputs);1112align_free(mach->Outputs);11131114mach->Inputs = inputs;1115mach->Outputs = outputs;1116mach->UsedGeometryShader = TRUE;1117}11181119declarations = (struct tgsi_full_declaration *)1120MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );11211122if (!declarations) {1123return;1124}11251126instructions = (struct tgsi_full_instruction *)1127MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );11281129if (!instructions) {1130FREE( declarations );1131return;1132}11331134while( !tgsi_parse_end_of_tokens( &parse ) ) {1135uint i;11361137tgsi_parse_token( &parse );1138switch( parse.FullToken.Token.Type ) {1139case TGSI_TOKEN_TYPE_DECLARATION:1140/* save expanded declaration */1141if (numDeclarations == maxDeclarations) {1142declarations = REALLOC(declarations,1143maxDeclarations1144* sizeof(struct tgsi_full_declaration),1145(maxDeclarations + 10)1146* sizeof(struct tgsi_full_declaration));1147maxDeclarations += 10;1148}1149if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT)1150mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1);1151else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {1152const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;1153mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;1154}11551156memcpy(declarations + numDeclarations,1157&parse.FullToken.FullDeclaration,1158sizeof(declarations[0]));1159numDeclarations++;1160break;11611162case TGSI_TOKEN_TYPE_IMMEDIATE:1163{1164uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;1165assert( size <= 4 );1166if (mach->ImmLimit >= mach->ImmsReserved) {1167unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128;1168float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4));1169if (imms) {1170mach->ImmsReserved = newReserved;1171mach->Imms = imms;1172} else {1173debug_printf("Unable to (re)allocate space for immidiate constants\n");1174break;1175}1176}11771178for( i = 0; i < size; i++ ) {1179mach->Imms[mach->ImmLimit][i] =1180parse.FullToken.FullImmediate.u[i].Float;1181}1182mach->ImmLimit += 1;1183}1184break;11851186case TGSI_TOKEN_TYPE_INSTRUCTION:11871188/* save expanded instruction */1189if (numInstructions == maxInstructions) {1190instructions = REALLOC(instructions,1191maxInstructions1192* sizeof(struct tgsi_full_instruction),1193(maxInstructions + 10)1194* sizeof(struct tgsi_full_instruction));1195maxInstructions += 10;1196}11971198memcpy(instructions + numInstructions,1199&parse.FullToken.FullInstruction,1200sizeof(instructions[0]));12011202numInstructions++;1203break;12041205case TGSI_TOKEN_TYPE_PROPERTY:1206if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {1207if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {1208mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;1209}1210}1211break;12121213default:1214assert( 0 );1215}1216}1217tgsi_parse_free (&parse);12181219FREE(mach->Declarations);1220mach->Declarations = declarations;1221mach->NumDeclarations = numDeclarations;12221223FREE(mach->Instructions);1224mach->Instructions = instructions;1225mach->NumInstructions = numInstructions;1226}122712281229struct tgsi_exec_machine *1230tgsi_exec_machine_create(enum pipe_shader_type shader_type)1231{1232struct tgsi_exec_machine *mach;12331234mach = align_malloc( sizeof *mach, 16 );1235if (!mach)1236goto fail;12371238memset(mach, 0, sizeof(*mach));12391240mach->ShaderType = shader_type;12411242if (shader_type != PIPE_SHADER_COMPUTE) {1243mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);1244mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);1245if (!mach->Inputs || !mach->Outputs)1246goto fail;1247}12481249if (shader_type == PIPE_SHADER_FRAGMENT) {1250mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16);1251if (!mach->InputSampleOffsetApply)1252goto fail;1253}12541255#ifdef DEBUG1256/* silence warnings */1257(void) print_chan;1258(void) print_temp;1259#endif12601261return mach;12621263fail:1264if (mach) {1265align_free(mach->InputSampleOffsetApply);1266align_free(mach->Inputs);1267align_free(mach->Outputs);1268align_free(mach);1269}1270return NULL;1271}127212731274void1275tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)1276{1277if (mach) {1278FREE(mach->Instructions);1279FREE(mach->Declarations);1280FREE(mach->Imms);12811282align_free(mach->InputSampleOffsetApply);1283align_free(mach->Inputs);1284align_free(mach->Outputs);12851286align_free(mach);1287}1288}12891290static void1291micro_add(union tgsi_exec_channel *dst,1292const union tgsi_exec_channel *src0,1293const union tgsi_exec_channel *src1)1294{1295dst->f[0] = src0->f[0] + src1->f[0];1296dst->f[1] = src0->f[1] + src1->f[1];1297dst->f[2] = src0->f[2] + src1->f[2];1298dst->f[3] = src0->f[3] + src1->f[3];1299}13001301static void1302micro_div(1303union tgsi_exec_channel *dst,1304const union tgsi_exec_channel *src0,1305const union tgsi_exec_channel *src1 )1306{1307if (src1->f[0] != 0) {1308dst->f[0] = src0->f[0] / src1->f[0];1309}1310if (src1->f[1] != 0) {1311dst->f[1] = src0->f[1] / src1->f[1];1312}1313if (src1->f[2] != 0) {1314dst->f[2] = src0->f[2] / src1->f[2];1315}1316if (src1->f[3] != 0) {1317dst->f[3] = src0->f[3] / src1->f[3];1318}1319}13201321static void1322micro_lt(1323union tgsi_exec_channel *dst,1324const union tgsi_exec_channel *src0,1325const union tgsi_exec_channel *src1,1326const union tgsi_exec_channel *src2,1327const union tgsi_exec_channel *src3 )1328{1329dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];1330dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];1331dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];1332dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];1333}13341335static void1336micro_max(union tgsi_exec_channel *dst,1337const union tgsi_exec_channel *src0,1338const union tgsi_exec_channel *src1)1339{1340dst->f[0] = fmaxf(src0->f[0], src1->f[0]);1341dst->f[1] = fmaxf(src0->f[1], src1->f[1]);1342dst->f[2] = fmaxf(src0->f[2], src1->f[2]);1343dst->f[3] = fmaxf(src0->f[3], src1->f[3]);1344}13451346static void1347micro_min(union tgsi_exec_channel *dst,1348const union tgsi_exec_channel *src0,1349const union tgsi_exec_channel *src1)1350{1351dst->f[0] = fminf(src0->f[0], src1->f[0]);1352dst->f[1] = fminf(src0->f[1], src1->f[1]);1353dst->f[2] = fminf(src0->f[2], src1->f[2]);1354dst->f[3] = fminf(src0->f[3], src1->f[3]);1355}13561357static void1358micro_mul(union tgsi_exec_channel *dst,1359const union tgsi_exec_channel *src0,1360const union tgsi_exec_channel *src1)1361{1362dst->f[0] = src0->f[0] * src1->f[0];1363dst->f[1] = src0->f[1] * src1->f[1];1364dst->f[2] = src0->f[2] * src1->f[2];1365dst->f[3] = src0->f[3] * src1->f[3];1366}13671368static void1369micro_neg(1370union tgsi_exec_channel *dst,1371const union tgsi_exec_channel *src )1372{1373dst->f[0] = -src->f[0];1374dst->f[1] = -src->f[1];1375dst->f[2] = -src->f[2];1376dst->f[3] = -src->f[3];1377}13781379static void1380micro_pow(1381union tgsi_exec_channel *dst,1382const union tgsi_exec_channel *src0,1383const union tgsi_exec_channel *src1 )1384{1385dst->f[0] = powf( src0->f[0], src1->f[0] );1386dst->f[1] = powf( src0->f[1], src1->f[1] );1387dst->f[2] = powf( src0->f[2], src1->f[2] );1388dst->f[3] = powf( src0->f[3], src1->f[3] );1389}13901391static void1392micro_ldexp(union tgsi_exec_channel *dst,1393const union tgsi_exec_channel *src0,1394const union tgsi_exec_channel *src1)1395{1396dst->f[0] = ldexpf(src0->f[0], src1->i[0]);1397dst->f[1] = ldexpf(src0->f[1], src1->i[1]);1398dst->f[2] = ldexpf(src0->f[2], src1->i[2]);1399dst->f[3] = ldexpf(src0->f[3], src1->i[3]);1400}14011402static void1403micro_sub(union tgsi_exec_channel *dst,1404const union tgsi_exec_channel *src0,1405const union tgsi_exec_channel *src1)1406{1407dst->f[0] = src0->f[0] - src1->f[0];1408dst->f[1] = src0->f[1] - src1->f[1];1409dst->f[2] = src0->f[2] - src1->f[2];1410dst->f[3] = src0->f[3] - src1->f[3];1411}14121413static void1414fetch_src_file_channel(const struct tgsi_exec_machine *mach,1415const uint file,1416const uint swizzle,1417const union tgsi_exec_channel *index,1418const union tgsi_exec_channel *index2D,1419union tgsi_exec_channel *chan)1420{1421uint i;14221423assert(swizzle < 4);14241425switch (file) {1426case TGSI_FILE_CONSTANT:1427for (i = 0; i < TGSI_QUAD_SIZE; i++) {1428/* NOTE: copying the const value as a uint instead of float */1429const uint constbuf = index2D->i[i];1430const unsigned pos = index->i[i] * 4 + swizzle;1431/* const buffer bounds check */1432if (pos >= mach->ConstsSize[constbuf] / 4) {1433if (0) {1434/* Debug: print warning */1435static int count = 0;1436if (count++ < 100)1437debug_printf("TGSI Exec: const buffer index %d"1438" out of bounds\n", pos);1439}1440chan->u[i] = 0;1441} else {1442const uint *buf = (const uint *)mach->Consts[constbuf];1443chan->u[i] = buf[pos];1444}1445}1446break;14471448case TGSI_FILE_INPUT:1449for (i = 0; i < TGSI_QUAD_SIZE; i++) {1450/*1451if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {1452debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",1453index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],1454index2D->i[i], index->i[i]);1455}*/1456int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];1457assert(pos >= 0);1458assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);1459chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];1460}1461break;14621463case TGSI_FILE_SYSTEM_VALUE:1464for (i = 0; i < TGSI_QUAD_SIZE; i++) {1465chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];1466}1467break;14681469case TGSI_FILE_TEMPORARY:1470for (i = 0; i < TGSI_QUAD_SIZE; i++) {1471assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);1472assert(index2D->i[i] == 0);14731474chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];1475}1476break;14771478case TGSI_FILE_IMMEDIATE:1479for (i = 0; i < TGSI_QUAD_SIZE; i++) {1480assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);1481assert(index2D->i[i] == 0);14821483chan->f[i] = mach->Imms[index->i[i]][swizzle];1484}1485break;14861487case TGSI_FILE_ADDRESS:1488for (i = 0; i < TGSI_QUAD_SIZE; i++) {1489assert(index->i[i] >= 0 && index->i[i] < ARRAY_SIZE(mach->Addrs));1490assert(index2D->i[i] == 0);14911492chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];1493}1494break;14951496case TGSI_FILE_OUTPUT:1497/* vertex/fragment output vars can be read too */1498for (i = 0; i < TGSI_QUAD_SIZE; i++) {1499assert(index->i[i] >= 0);1500assert(index2D->i[i] == 0);15011502chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];1503}1504break;15051506default:1507assert(0);1508for (i = 0; i < TGSI_QUAD_SIZE; i++) {1509chan->u[i] = 0;1510}1511}1512}15131514static void1515get_index_registers(const struct tgsi_exec_machine *mach,1516const struct tgsi_full_src_register *reg,1517union tgsi_exec_channel *index,1518union tgsi_exec_channel *index2D)1519{1520uint swizzle;15211522/* We start with a direct index into a register file.1523*1524* file[1],1525* where:1526* file = Register.File1527* [1] = Register.Index1528*/1529index->i[0] =1530index->i[1] =1531index->i[2] =1532index->i[3] = reg->Register.Index;15331534/* There is an extra source register that indirectly subscripts1535* a register file. The direct index now becomes an offset1536* that is being added to the indirect register.1537*1538* file[ind[2].x+1],1539* where:1540* ind = Indirect.File1541* [2] = Indirect.Index1542* .x = Indirect.SwizzleX1543*/1544if (reg->Register.Indirect) {1545union tgsi_exec_channel index2;1546union tgsi_exec_channel indir_index;1547const uint execmask = mach->ExecMask;1548uint i;15491550/* which address register (always zero now) */1551index2.i[0] =1552index2.i[1] =1553index2.i[2] =1554index2.i[3] = reg->Indirect.Index;1555/* get current value of address register[swizzle] */1556swizzle = reg->Indirect.Swizzle;1557fetch_src_file_channel(mach,1558reg->Indirect.File,1559swizzle,1560&index2,1561&ZeroVec,1562&indir_index);15631564/* add value of address register to the offset */1565index->i[0] += indir_index.i[0];1566index->i[1] += indir_index.i[1];1567index->i[2] += indir_index.i[2];1568index->i[3] += indir_index.i[3];15691570/* for disabled execution channels, zero-out the index to1571* avoid using a potential garbage value.1572*/1573for (i = 0; i < TGSI_QUAD_SIZE; i++) {1574if ((execmask & (1 << i)) == 0)1575index->i[i] = 0;1576}1577}15781579/* There is an extra source register that is a second1580* subscript to a register file. Effectively it means that1581* the register file is actually a 2D array of registers.1582*1583* file[3][1],1584* where:1585* [3] = Dimension.Index1586*/1587if (reg->Register.Dimension) {1588index2D->i[0] =1589index2D->i[1] =1590index2D->i[2] =1591index2D->i[3] = reg->Dimension.Index;15921593/* Again, the second subscript index can be addressed indirectly1594* identically to the first one.1595* Nothing stops us from indirectly addressing the indirect register,1596* but there is no need for that, so we won't exercise it.1597*1598* file[ind[4].y+3][1],1599* where:1600* ind = DimIndirect.File1601* [4] = DimIndirect.Index1602* .y = DimIndirect.SwizzleX1603*/1604if (reg->Dimension.Indirect) {1605union tgsi_exec_channel index2;1606union tgsi_exec_channel indir_index;1607const uint execmask = mach->ExecMask;1608uint i;16091610index2.i[0] =1611index2.i[1] =1612index2.i[2] =1613index2.i[3] = reg->DimIndirect.Index;16141615swizzle = reg->DimIndirect.Swizzle;1616fetch_src_file_channel(mach,1617reg->DimIndirect.File,1618swizzle,1619&index2,1620&ZeroVec,1621&indir_index);16221623index2D->i[0] += indir_index.i[0];1624index2D->i[1] += indir_index.i[1];1625index2D->i[2] += indir_index.i[2];1626index2D->i[3] += indir_index.i[3];16271628/* for disabled execution channels, zero-out the index to1629* avoid using a potential garbage value.1630*/1631for (i = 0; i < TGSI_QUAD_SIZE; i++) {1632if ((execmask & (1 << i)) == 0) {1633index2D->i[i] = 0;1634}1635}1636}16371638/* If by any chance there was a need for a 3D array of register1639* files, we would have to check whether Dimension is followed1640* by a dimension register and continue the saga.1641*/1642} else {1643index2D->i[0] =1644index2D->i[1] =1645index2D->i[2] =1646index2D->i[3] = 0;1647}1648}164916501651static void1652fetch_source_d(const struct tgsi_exec_machine *mach,1653union tgsi_exec_channel *chan,1654const struct tgsi_full_src_register *reg,1655const uint chan_index)1656{1657union tgsi_exec_channel index;1658union tgsi_exec_channel index2D;1659uint swizzle;16601661get_index_registers(mach, reg, &index, &index2D);166216631664swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );1665fetch_src_file_channel(mach,1666reg->Register.File,1667swizzle,1668&index,1669&index2D,1670chan);1671}16721673static void1674fetch_source(const struct tgsi_exec_machine *mach,1675union tgsi_exec_channel *chan,1676const struct tgsi_full_src_register *reg,1677const uint chan_index,1678enum tgsi_exec_datatype src_datatype)1679{1680fetch_source_d(mach, chan, reg, chan_index);16811682if (reg->Register.Absolute) {1683assert(src_datatype == TGSI_EXEC_DATA_FLOAT);1684micro_abs(chan, chan);1685}16861687if (reg->Register.Negate) {1688if (src_datatype == TGSI_EXEC_DATA_FLOAT) {1689micro_neg(chan, chan);1690} else {1691micro_ineg(chan, chan);1692}1693}1694}16951696static union tgsi_exec_channel *1697store_dest_dstret(struct tgsi_exec_machine *mach,1698const union tgsi_exec_channel *chan,1699const struct tgsi_full_dst_register *reg,1700uint chan_index)1701{1702static union tgsi_exec_channel null;1703union tgsi_exec_channel *dst;1704int offset = 0; /* indirection offset */1705int index;170617071708/* There is an extra source register that indirectly subscripts1709* a register file. The direct index now becomes an offset1710* that is being added to the indirect register.1711*1712* file[ind[2].x+1],1713* where:1714* ind = Indirect.File1715* [2] = Indirect.Index1716* .x = Indirect.SwizzleX1717*/1718if (reg->Register.Indirect) {1719union tgsi_exec_channel index;1720union tgsi_exec_channel indir_index;1721uint swizzle;17221723/* which address register (always zero for now) */1724index.i[0] =1725index.i[1] =1726index.i[2] =1727index.i[3] = reg->Indirect.Index;17281729/* get current value of address register[swizzle] */1730swizzle = reg->Indirect.Swizzle;17311732/* fetch values from the address/indirection register */1733fetch_src_file_channel(mach,1734reg->Indirect.File,1735swizzle,1736&index,1737&ZeroVec,1738&indir_index);17391740/* save indirection offset */1741offset = indir_index.i[0];1742}17431744switch (reg->Register.File) {1745case TGSI_FILE_NULL:1746dst = &null;1747break;17481749case TGSI_FILE_OUTPUT:1750index = mach->OutputVertexOffset + reg->Register.Index;1751dst = &mach->Outputs[offset + index].xyzw[chan_index];1752#if 01753debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",1754mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],1755reg->Register.Index);1756if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {1757debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);1758for (i = 0; i < TGSI_QUAD_SIZE; i++)1759if (execmask & (1 << i))1760debug_printf("%f, ", chan->f[i]);1761debug_printf(")\n");1762}1763#endif1764break;17651766case TGSI_FILE_TEMPORARY:1767index = reg->Register.Index;1768assert( index < TGSI_EXEC_NUM_TEMPS );1769dst = &mach->Temps[offset + index].xyzw[chan_index];1770break;17711772case TGSI_FILE_ADDRESS:1773index = reg->Register.Index;1774assert(index >= 0 && index < ARRAY_SIZE(mach->Addrs));1775dst = &mach->Addrs[index].xyzw[chan_index];1776break;17771778default:1779unreachable("Bad destination file");1780}17811782return dst;1783}17841785static void1786store_dest_double(struct tgsi_exec_machine *mach,1787const union tgsi_exec_channel *chan,1788const struct tgsi_full_dst_register *reg,1789uint chan_index)1790{1791union tgsi_exec_channel *dst;1792const uint execmask = mach->ExecMask;1793int i;17941795dst = store_dest_dstret(mach, chan, reg, chan_index);1796if (!dst)1797return;17981799/* doubles path */1800for (i = 0; i < TGSI_QUAD_SIZE; i++)1801if (execmask & (1 << i))1802dst->i[i] = chan->i[i];1803}18041805static void1806store_dest(struct tgsi_exec_machine *mach,1807const union tgsi_exec_channel *chan,1808const struct tgsi_full_dst_register *reg,1809const struct tgsi_full_instruction *inst,1810uint chan_index)1811{1812union tgsi_exec_channel *dst;1813const uint execmask = mach->ExecMask;1814int i;18151816dst = store_dest_dstret(mach, chan, reg, chan_index);1817if (!dst)1818return;18191820if (!inst->Instruction.Saturate) {1821for (i = 0; i < TGSI_QUAD_SIZE; i++)1822if (execmask & (1 << i))1823dst->i[i] = chan->i[i];1824}1825else {1826for (i = 0; i < TGSI_QUAD_SIZE; i++)1827if (execmask & (1 << i))1828dst->f[i] = fminf(fmaxf(chan->f[i], 0.0f), 1.0f);1829}1830}18311832#define FETCH(VAL,INDEX,CHAN)\1833fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)18341835#define IFETCH(VAL,INDEX,CHAN)\1836fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)183718381839/**1840* Execute ARB-style KIL which is predicated by a src register.1841* Kill fragment if any of the four values is less than zero.1842*/1843static void1844exec_kill_if(struct tgsi_exec_machine *mach,1845const struct tgsi_full_instruction *inst)1846{1847uint uniquemask;1848uint chan_index;1849uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */1850union tgsi_exec_channel r[1];18511852/* This mask stores component bits that were already tested. */1853uniquemask = 0;18541855for (chan_index = 0; chan_index < 4; chan_index++)1856{1857uint swizzle;1858uint i;18591860/* unswizzle channel */1861swizzle = tgsi_util_get_full_src_register_swizzle (1862&inst->Src[0],1863chan_index);18641865/* check if the component has not been already tested */1866if (uniquemask & (1 << swizzle))1867continue;1868uniquemask |= 1 << swizzle;18691870FETCH(&r[0], 0, chan_index);1871for (i = 0; i < 4; i++)1872if (r[0].f[i] < 0.0f)1873kilmask |= 1 << i;1874}18751876/* restrict to fragments currently executing */1877kilmask &= mach->ExecMask;18781879mach->KillMask |= kilmask;1880}18811882/**1883* Unconditional fragment kill/discard.1884*/1885static void1886exec_kill(struct tgsi_exec_machine *mach)1887{1888/* kill fragment for all fragments currently executing.1889* bit 0 = pixel 0, bit 1 = pixel 1, etc.1890*/1891mach->KillMask |= mach->ExecMask;1892}18931894static void1895emit_vertex(struct tgsi_exec_machine *mach,1896const struct tgsi_full_instruction *inst)1897{1898union tgsi_exec_channel r[1];1899unsigned stream_id;1900unsigned prim_count;1901/* FIXME: check for exec mask correctly1902unsigned i;1903for (i = 0; i < TGSI_QUAD_SIZE; ++i) {1904if ((mach->ExecMask & (1 << i)))1905*/1906IFETCH(&r[0], 0, TGSI_CHAN_X);1907stream_id = r[0].u[0];1908prim_count = mach->OutputPrimCount[stream_id];1909if (mach->ExecMask) {1910if (mach->Primitives[stream_id][prim_count] >= mach->MaxOutputVertices)1911return;19121913if (mach->Primitives[stream_id][prim_count] == 0)1914mach->PrimitiveOffsets[stream_id][prim_count] = mach->OutputVertexOffset;1915mach->OutputVertexOffset += mach->NumOutputs;1916mach->Primitives[stream_id][prim_count]++;1917}1918}19191920static void1921emit_primitive(struct tgsi_exec_machine *mach,1922const struct tgsi_full_instruction *inst)1923{1924unsigned *prim_count;1925union tgsi_exec_channel r[1];1926unsigned stream_id = 0;1927/* FIXME: check for exec mask correctly1928unsigned i;1929for (i = 0; i < TGSI_QUAD_SIZE; ++i) {1930if ((mach->ExecMask & (1 << i)))1931*/1932if (inst) {1933IFETCH(&r[0], 0, TGSI_CHAN_X);1934stream_id = r[0].u[0];1935}1936prim_count = &mach->OutputPrimCount[stream_id];1937if (mach->ExecMask) {1938++(*prim_count);1939debug_assert((*prim_count * mach->NumOutputs) < TGSI_MAX_TOTAL_VERTICES);1940mach->Primitives[stream_id][*prim_count] = 0;1941}1942}19431944static void1945conditional_emit_primitive(struct tgsi_exec_machine *mach)1946{1947if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {1948int emitted_verts = mach->Primitives[0][mach->OutputPrimCount[0]];1949if (emitted_verts) {1950emit_primitive(mach, NULL);1951}1952}1953}195419551956/*1957* Fetch four texture samples using STR texture coordinates.1958*/1959static void1960fetch_texel( struct tgsi_sampler *sampler,1961const unsigned sview_idx,1962const unsigned sampler_idx,1963const union tgsi_exec_channel *s,1964const union tgsi_exec_channel *t,1965const union tgsi_exec_channel *p,1966const union tgsi_exec_channel *c0,1967const union tgsi_exec_channel *c1,1968float derivs[3][2][TGSI_QUAD_SIZE],1969const int8_t offset[3],1970enum tgsi_sampler_control control,1971union tgsi_exec_channel *r,1972union tgsi_exec_channel *g,1973union tgsi_exec_channel *b,1974union tgsi_exec_channel *a )1975{1976uint j;1977float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];19781979/* FIXME: handle explicit derivs, offsets */1980sampler->get_samples(sampler, sview_idx, sampler_idx,1981s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);19821983for (j = 0; j < 4; j++) {1984r->f[j] = rgba[0][j];1985g->f[j] = rgba[1][j];1986b->f[j] = rgba[2][j];1987a->f[j] = rgba[3][j];1988}1989}199019911992#define TEX_MODIFIER_NONE 01993#define TEX_MODIFIER_PROJECTED 11994#define TEX_MODIFIER_LOD_BIAS 21995#define TEX_MODIFIER_EXPLICIT_LOD 31996#define TEX_MODIFIER_LEVEL_ZERO 41997#define TEX_MODIFIER_GATHER 519981999/*2000* Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.2001*/2002static void2003fetch_texel_offsets(struct tgsi_exec_machine *mach,2004const struct tgsi_full_instruction *inst,2005int8_t offsets[3])2006{2007if (inst->Texture.NumOffsets == 1) {2008union tgsi_exec_channel index;2009union tgsi_exec_channel offset[3];2010index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;2011fetch_src_file_channel(mach, inst->TexOffsets[0].File,2012inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);2013fetch_src_file_channel(mach, inst->TexOffsets[0].File,2014inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);2015fetch_src_file_channel(mach, inst->TexOffsets[0].File,2016inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);2017offsets[0] = offset[0].i[0];2018offsets[1] = offset[1].i[0];2019offsets[2] = offset[2].i[0];2020} else {2021assert(inst->Texture.NumOffsets == 0);2022offsets[0] = offsets[1] = offsets[2] = 0;2023}2024}202520262027/*2028* Fetch dx and dy values for one channel (s, t or r).2029* Put dx values into one float array, dy values into another.2030*/2031static void2032fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,2033const struct tgsi_full_instruction *inst,2034unsigned regdsrcx,2035unsigned chan,2036float derivs[2][TGSI_QUAD_SIZE])2037{2038union tgsi_exec_channel d;2039FETCH(&d, regdsrcx, chan);2040derivs[0][0] = d.f[0];2041derivs[0][1] = d.f[1];2042derivs[0][2] = d.f[2];2043derivs[0][3] = d.f[3];2044FETCH(&d, regdsrcx + 1, chan);2045derivs[1][0] = d.f[0];2046derivs[1][1] = d.f[1];2047derivs[1][2] = d.f[2];2048derivs[1][3] = d.f[3];2049}20502051static uint2052fetch_sampler_unit(struct tgsi_exec_machine *mach,2053const struct tgsi_full_instruction *inst,2054uint sampler)2055{2056uint unit = 0;2057int i;2058if (inst->Src[sampler].Register.Indirect) {2059const struct tgsi_full_src_register *reg = &inst->Src[sampler];2060union tgsi_exec_channel indir_index, index2;2061const uint execmask = mach->ExecMask;2062index2.i[0] =2063index2.i[1] =2064index2.i[2] =2065index2.i[3] = reg->Indirect.Index;20662067fetch_src_file_channel(mach,2068reg->Indirect.File,2069reg->Indirect.Swizzle,2070&index2,2071&ZeroVec,2072&indir_index);2073for (i = 0; i < TGSI_QUAD_SIZE; i++) {2074if (execmask & (1 << i)) {2075unit = inst->Src[sampler].Register.Index + indir_index.i[i];2076break;2077}2078}20792080} else {2081unit = inst->Src[sampler].Register.Index;2082}2083return unit;2084}20852086/*2087* execute a texture instruction.2088*2089* modifier is used to control the channel routing for the2090* instruction variants like proj, lod, and texture with lod bias.2091* sampler indicates which src register the sampler is contained in.2092*/2093static void2094exec_tex(struct tgsi_exec_machine *mach,2095const struct tgsi_full_instruction *inst,2096uint modifier, uint sampler)2097{2098const union tgsi_exec_channel *args[5], *proj = NULL;2099union tgsi_exec_channel r[5];2100enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;2101uint chan;2102uint unit;2103int8_t offsets[3];2104int dim, shadow_ref, i;21052106unit = fetch_sampler_unit(mach, inst, sampler);2107/* always fetch all 3 offsets, overkill but keeps code simple */2108fetch_texel_offsets(mach, inst, offsets);21092110assert(modifier != TEX_MODIFIER_LEVEL_ZERO);2111assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);21122113dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);2114shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);21152116assert(dim <= 4);2117if (shadow_ref >= 0)2118assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args));21192120/* fetch modifier to the last argument */2121if (modifier != TEX_MODIFIER_NONE) {2122const int last = ARRAY_SIZE(args) - 1;21232124/* fetch modifier from src0.w or src1.x */2125if (sampler == 1) {2126assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);2127FETCH(&r[last], 0, TGSI_CHAN_W);2128}2129else {2130FETCH(&r[last], 1, TGSI_CHAN_X);2131}21322133if (modifier != TEX_MODIFIER_PROJECTED) {2134args[last] = &r[last];2135}2136else {2137proj = &r[last];2138args[last] = &ZeroVec;2139}21402141/* point unused arguments to zero vector */2142for (i = dim; i < last; i++)2143args[i] = &ZeroVec;21442145if (modifier == TEX_MODIFIER_EXPLICIT_LOD)2146control = TGSI_SAMPLER_LOD_EXPLICIT;2147else if (modifier == TEX_MODIFIER_LOD_BIAS)2148control = TGSI_SAMPLER_LOD_BIAS;2149else if (modifier == TEX_MODIFIER_GATHER)2150control = TGSI_SAMPLER_GATHER;2151}2152else {2153for (i = dim; i < (int)ARRAY_SIZE(args); i++)2154args[i] = &ZeroVec;2155}21562157/* fetch coordinates */2158for (i = 0; i < dim; i++) {2159FETCH(&r[i], 0, TGSI_CHAN_X + i);21602161if (proj)2162micro_div(&r[i], &r[i], proj);21632164args[i] = &r[i];2165}21662167/* fetch reference value */2168if (shadow_ref >= 0) {2169FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));21702171if (proj)2172micro_div(&r[shadow_ref], &r[shadow_ref], proj);21732174args[shadow_ref] = &r[shadow_ref];2175}21762177fetch_texel(mach->Sampler, unit, unit,2178args[0], args[1], args[2], args[3], args[4],2179NULL, offsets, control,2180&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */21812182#if 02183debug_printf("fetch r: %g %g %g %g\n",2184r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);2185debug_printf("fetch g: %g %g %g %g\n",2186r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);2187debug_printf("fetch b: %g %g %g %g\n",2188r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);2189debug_printf("fetch a: %g %g %g %g\n",2190r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);2191#endif21922193for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2194if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2195store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);2196}2197}2198}21992200static void2201exec_lodq(struct tgsi_exec_machine *mach,2202const struct tgsi_full_instruction *inst)2203{2204uint resource_unit, sampler_unit;2205unsigned dim;2206unsigned i;2207union tgsi_exec_channel coords[4];2208const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];2209union tgsi_exec_channel r[2];22102211resource_unit = fetch_sampler_unit(mach, inst, 1);2212if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {2213uint target = mach->SamplerViews[resource_unit].Resource;2214dim = tgsi_util_get_texture_coord_dim(target);2215sampler_unit = fetch_sampler_unit(mach, inst, 2);2216} else {2217dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);2218sampler_unit = resource_unit;2219}2220assert(dim <= ARRAY_SIZE(coords));2221/* fetch coordinates */2222for (i = 0; i < dim; i++) {2223FETCH(&coords[i], 0, TGSI_CHAN_X + i);2224args[i] = &coords[i];2225}2226for (i = dim; i < ARRAY_SIZE(coords); i++) {2227args[i] = &ZeroVec;2228}2229mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,2230args[0]->f,2231args[1]->f,2232args[2]->f,2233args[3]->f,2234TGSI_SAMPLER_LOD_NONE,2235r[0].f,2236r[1].f);22372238if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {2239store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);2240}2241if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {2242store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);2243}2244if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {2245unsigned char swizzles[4];2246unsigned chan;2247swizzles[0] = inst->Src[1].Register.SwizzleX;2248swizzles[1] = inst->Src[1].Register.SwizzleY;2249swizzles[2] = inst->Src[1].Register.SwizzleZ;2250swizzles[3] = inst->Src[1].Register.SwizzleW;22512252for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2253if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2254if (swizzles[chan] >= 2) {2255store_dest(mach, &ZeroVec,2256&inst->Dst[0], inst, chan);2257} else {2258store_dest(mach, &r[swizzles[chan]],2259&inst->Dst[0], inst, chan);2260}2261}2262}2263} else {2264if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {2265store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);2266}2267if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {2268store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);2269}2270}2271}22722273static void2274exec_txd(struct tgsi_exec_machine *mach,2275const struct tgsi_full_instruction *inst)2276{2277union tgsi_exec_channel r[4];2278float derivs[3][2][TGSI_QUAD_SIZE];2279uint chan;2280uint unit;2281int8_t offsets[3];22822283unit = fetch_sampler_unit(mach, inst, 3);2284/* always fetch all 3 offsets, overkill but keeps code simple */2285fetch_texel_offsets(mach, inst, offsets);22862287switch (inst->Texture.Texture) {2288case TGSI_TEXTURE_1D:2289FETCH(&r[0], 0, TGSI_CHAN_X);22902291fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);22922293fetch_texel(mach->Sampler, unit, unit,2294&r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */2295derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,2296&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */2297break;22982299case TGSI_TEXTURE_SHADOW1D:2300case TGSI_TEXTURE_1D_ARRAY:2301case TGSI_TEXTURE_SHADOW1D_ARRAY:2302/* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */2303FETCH(&r[0], 0, TGSI_CHAN_X);2304FETCH(&r[1], 0, TGSI_CHAN_Y);2305FETCH(&r[2], 0, TGSI_CHAN_Z);23062307fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);23082309fetch_texel(mach->Sampler, unit, unit,2310&r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */2311derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,2312&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */2313break;23142315case TGSI_TEXTURE_2D:2316case TGSI_TEXTURE_RECT:2317FETCH(&r[0], 0, TGSI_CHAN_X);2318FETCH(&r[1], 0, TGSI_CHAN_Y);23192320fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);2321fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);23222323fetch_texel(mach->Sampler, unit, unit,2324&r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */2325derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,2326&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */2327break;232823292330case TGSI_TEXTURE_SHADOW2D:2331case TGSI_TEXTURE_SHADOWRECT:2332case TGSI_TEXTURE_2D_ARRAY:2333case TGSI_TEXTURE_SHADOW2D_ARRAY:2334/* only SHADOW2D_ARRAY actually needs W */2335FETCH(&r[0], 0, TGSI_CHAN_X);2336FETCH(&r[1], 0, TGSI_CHAN_Y);2337FETCH(&r[2], 0, TGSI_CHAN_Z);2338FETCH(&r[3], 0, TGSI_CHAN_W);23392340fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);2341fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);23422343fetch_texel(mach->Sampler, unit, unit,2344&r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */2345derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,2346&r[0], &r[1], &r[2], &r[3]); /* outputs */2347break;23482349case TGSI_TEXTURE_3D:2350case TGSI_TEXTURE_CUBE:2351case TGSI_TEXTURE_CUBE_ARRAY:2352case TGSI_TEXTURE_SHADOWCUBE:2353/* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */2354FETCH(&r[0], 0, TGSI_CHAN_X);2355FETCH(&r[1], 0, TGSI_CHAN_Y);2356FETCH(&r[2], 0, TGSI_CHAN_Z);2357FETCH(&r[3], 0, TGSI_CHAN_W);23582359fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);2360fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);2361fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);23622363fetch_texel(mach->Sampler, unit, unit,2364&r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */2365derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,2366&r[0], &r[1], &r[2], &r[3]); /* outputs */2367break;23682369default:2370assert(0);2371}23722373for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2374if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2375store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);2376}2377}2378}237923802381static void2382exec_txf(struct tgsi_exec_machine *mach,2383const struct tgsi_full_instruction *inst)2384{2385union tgsi_exec_channel r[4];2386uint chan;2387uint unit;2388float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];2389int j;2390int8_t offsets[3];2391unsigned target;23922393unit = fetch_sampler_unit(mach, inst, 1);2394/* always fetch all 3 offsets, overkill but keeps code simple */2395fetch_texel_offsets(mach, inst, offsets);23962397IFETCH(&r[3], 0, TGSI_CHAN_W);23982399if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||2400inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {2401target = mach->SamplerViews[unit].Resource;2402}2403else {2404target = inst->Texture.Texture;2405}2406switch(target) {2407case TGSI_TEXTURE_3D:2408case TGSI_TEXTURE_2D_ARRAY:2409case TGSI_TEXTURE_SHADOW2D_ARRAY:2410case TGSI_TEXTURE_2D_ARRAY_MSAA:2411IFETCH(&r[2], 0, TGSI_CHAN_Z);2412FALLTHROUGH;2413case TGSI_TEXTURE_2D:2414case TGSI_TEXTURE_RECT:2415case TGSI_TEXTURE_SHADOW1D_ARRAY:2416case TGSI_TEXTURE_SHADOW2D:2417case TGSI_TEXTURE_SHADOWRECT:2418case TGSI_TEXTURE_1D_ARRAY:2419case TGSI_TEXTURE_2D_MSAA:2420IFETCH(&r[1], 0, TGSI_CHAN_Y);2421FALLTHROUGH;2422case TGSI_TEXTURE_BUFFER:2423case TGSI_TEXTURE_1D:2424case TGSI_TEXTURE_SHADOW1D:2425IFETCH(&r[0], 0, TGSI_CHAN_X);2426break;2427default:2428assert(0);2429break;2430}24312432mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,2433offsets, rgba);24342435for (j = 0; j < TGSI_QUAD_SIZE; j++) {2436r[0].f[j] = rgba[0][j];2437r[1].f[j] = rgba[1][j];2438r[2].f[j] = rgba[2][j];2439r[3].f[j] = rgba[3][j];2440}24412442if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||2443inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {2444unsigned char swizzles[4];2445swizzles[0] = inst->Src[1].Register.SwizzleX;2446swizzles[1] = inst->Src[1].Register.SwizzleY;2447swizzles[2] = inst->Src[1].Register.SwizzleZ;2448swizzles[3] = inst->Src[1].Register.SwizzleW;24492450for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2451if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2452store_dest(mach, &r[swizzles[chan]],2453&inst->Dst[0], inst, chan);2454}2455}2456}2457else {2458for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2459if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2460store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);2461}2462}2463}2464}24652466static void2467exec_txq(struct tgsi_exec_machine *mach,2468const struct tgsi_full_instruction *inst)2469{2470int result[4];2471union tgsi_exec_channel r[4], src;2472uint chan;2473uint unit;2474int i,j;24752476unit = fetch_sampler_unit(mach, inst, 1);24772478fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);24792480/* XXX: This interface can't return per-pixel values */2481mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);24822483for (i = 0; i < TGSI_QUAD_SIZE; i++) {2484for (j = 0; j < 4; j++) {2485r[j].i[i] = result[j];2486}2487}24882489for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2490if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2491store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);2492}2493}2494}24952496static void2497exec_sample(struct tgsi_exec_machine *mach,2498const struct tgsi_full_instruction *inst,2499uint modifier, boolean compare)2500{2501const uint resource_unit = inst->Src[1].Register.Index;2502const uint sampler_unit = inst->Src[2].Register.Index;2503union tgsi_exec_channel r[5], c1;2504const union tgsi_exec_channel *lod = &ZeroVec;2505enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;2506uint chan;2507unsigned char swizzles[4];2508int8_t offsets[3];25092510/* always fetch all 3 offsets, overkill but keeps code simple */2511fetch_texel_offsets(mach, inst, offsets);25122513assert(modifier != TEX_MODIFIER_PROJECTED);25142515if (modifier != TEX_MODIFIER_NONE) {2516if (modifier == TEX_MODIFIER_LOD_BIAS) {2517FETCH(&c1, 3, TGSI_CHAN_X);2518lod = &c1;2519control = TGSI_SAMPLER_LOD_BIAS;2520}2521else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {2522FETCH(&c1, 3, TGSI_CHAN_X);2523lod = &c1;2524control = TGSI_SAMPLER_LOD_EXPLICIT;2525}2526else if (modifier == TEX_MODIFIER_GATHER) {2527control = TGSI_SAMPLER_GATHER;2528}2529else {2530assert(modifier == TEX_MODIFIER_LEVEL_ZERO);2531control = TGSI_SAMPLER_LOD_ZERO;2532}2533}25342535FETCH(&r[0], 0, TGSI_CHAN_X);25362537switch (mach->SamplerViews[resource_unit].Resource) {2538case TGSI_TEXTURE_1D:2539if (compare) {2540FETCH(&r[2], 3, TGSI_CHAN_X);2541fetch_texel(mach->Sampler, resource_unit, sampler_unit,2542&r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */2543NULL, offsets, control,2544&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */2545}2546else {2547fetch_texel(mach->Sampler, resource_unit, sampler_unit,2548&r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */2549NULL, offsets, control,2550&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */2551}2552break;25532554case TGSI_TEXTURE_1D_ARRAY:2555case TGSI_TEXTURE_2D:2556case TGSI_TEXTURE_RECT:2557FETCH(&r[1], 0, TGSI_CHAN_Y);2558if (compare) {2559FETCH(&r[2], 3, TGSI_CHAN_X);2560fetch_texel(mach->Sampler, resource_unit, sampler_unit,2561&r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */2562NULL, offsets, control,2563&r[0], &r[1], &r[2], &r[3]); /* outputs */2564}2565else {2566fetch_texel(mach->Sampler, resource_unit, sampler_unit,2567&r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */2568NULL, offsets, control,2569&r[0], &r[1], &r[2], &r[3]); /* outputs */2570}2571break;25722573case TGSI_TEXTURE_2D_ARRAY:2574case TGSI_TEXTURE_3D:2575case TGSI_TEXTURE_CUBE:2576FETCH(&r[1], 0, TGSI_CHAN_Y);2577FETCH(&r[2], 0, TGSI_CHAN_Z);2578if(compare) {2579FETCH(&r[3], 3, TGSI_CHAN_X);2580fetch_texel(mach->Sampler, resource_unit, sampler_unit,2581&r[0], &r[1], &r[2], &r[3], lod,2582NULL, offsets, control,2583&r[0], &r[1], &r[2], &r[3]);2584}2585else {2586fetch_texel(mach->Sampler, resource_unit, sampler_unit,2587&r[0], &r[1], &r[2], &ZeroVec, lod,2588NULL, offsets, control,2589&r[0], &r[1], &r[2], &r[3]);2590}2591break;25922593case TGSI_TEXTURE_CUBE_ARRAY:2594FETCH(&r[1], 0, TGSI_CHAN_Y);2595FETCH(&r[2], 0, TGSI_CHAN_Z);2596FETCH(&r[3], 0, TGSI_CHAN_W);2597if(compare) {2598FETCH(&r[4], 3, TGSI_CHAN_X);2599fetch_texel(mach->Sampler, resource_unit, sampler_unit,2600&r[0], &r[1], &r[2], &r[3], &r[4],2601NULL, offsets, control,2602&r[0], &r[1], &r[2], &r[3]);2603}2604else {2605fetch_texel(mach->Sampler, resource_unit, sampler_unit,2606&r[0], &r[1], &r[2], &r[3], lod,2607NULL, offsets, control,2608&r[0], &r[1], &r[2], &r[3]);2609}2610break;261126122613default:2614assert(0);2615}26162617swizzles[0] = inst->Src[1].Register.SwizzleX;2618swizzles[1] = inst->Src[1].Register.SwizzleY;2619swizzles[2] = inst->Src[1].Register.SwizzleZ;2620swizzles[3] = inst->Src[1].Register.SwizzleW;26212622for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2623if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2624store_dest(mach, &r[swizzles[chan]],2625&inst->Dst[0], inst, chan);2626}2627}2628}26292630static void2631exec_sample_d(struct tgsi_exec_machine *mach,2632const struct tgsi_full_instruction *inst)2633{2634const uint resource_unit = inst->Src[1].Register.Index;2635const uint sampler_unit = inst->Src[2].Register.Index;2636union tgsi_exec_channel r[4];2637float derivs[3][2][TGSI_QUAD_SIZE];2638uint chan;2639unsigned char swizzles[4];2640int8_t offsets[3];26412642/* always fetch all 3 offsets, overkill but keeps code simple */2643fetch_texel_offsets(mach, inst, offsets);26442645FETCH(&r[0], 0, TGSI_CHAN_X);26462647switch (mach->SamplerViews[resource_unit].Resource) {2648case TGSI_TEXTURE_1D:2649case TGSI_TEXTURE_1D_ARRAY:2650/* only 1D array actually needs Y */2651FETCH(&r[1], 0, TGSI_CHAN_Y);26522653fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);26542655fetch_texel(mach->Sampler, resource_unit, sampler_unit,2656&r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */2657derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,2658&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */2659break;26602661case TGSI_TEXTURE_2D:2662case TGSI_TEXTURE_RECT:2663case TGSI_TEXTURE_2D_ARRAY:2664/* only 2D array actually needs Z */2665FETCH(&r[1], 0, TGSI_CHAN_Y);2666FETCH(&r[2], 0, TGSI_CHAN_Z);26672668fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);2669fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);26702671fetch_texel(mach->Sampler, resource_unit, sampler_unit,2672&r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */2673derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,2674&r[0], &r[1], &r[2], &r[3]); /* outputs */2675break;26762677case TGSI_TEXTURE_3D:2678case TGSI_TEXTURE_CUBE:2679case TGSI_TEXTURE_CUBE_ARRAY:2680/* only cube array actually needs W */2681FETCH(&r[1], 0, TGSI_CHAN_Y);2682FETCH(&r[2], 0, TGSI_CHAN_Z);2683FETCH(&r[3], 0, TGSI_CHAN_W);26842685fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);2686fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);2687fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);26882689fetch_texel(mach->Sampler, resource_unit, sampler_unit,2690&r[0], &r[1], &r[2], &r[3], &ZeroVec,2691derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,2692&r[0], &r[1], &r[2], &r[3]);2693break;26942695default:2696assert(0);2697}26982699swizzles[0] = inst->Src[1].Register.SwizzleX;2700swizzles[1] = inst->Src[1].Register.SwizzleY;2701swizzles[2] = inst->Src[1].Register.SwizzleZ;2702swizzles[3] = inst->Src[1].Register.SwizzleW;27032704for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2705if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2706store_dest(mach, &r[swizzles[chan]],2707&inst->Dst[0], inst, chan);2708}2709}2710}271127122713/**2714* Evaluate a constant-valued coefficient at the position of the2715* current quad.2716*/2717static void2718eval_constant_coef(2719struct tgsi_exec_machine *mach,2720unsigned attrib,2721unsigned chan )2722{2723unsigned i;27242725for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {2726mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];2727}2728}27292730static void2731interp_constant_offset(2732UNUSED const struct tgsi_exec_machine *mach,2733UNUSED unsigned attrib,2734UNUSED unsigned chan,2735UNUSED float ofs_x,2736UNUSED float ofs_y,2737UNUSED union tgsi_exec_channel *out_chan)2738{2739}27402741/**2742* Evaluate a linear-valued coefficient at the position of the2743* current quad.2744*/2745static void2746interp_linear_offset(2747const struct tgsi_exec_machine *mach,2748unsigned attrib,2749unsigned chan,2750float ofs_x,2751float ofs_y,2752union tgsi_exec_channel *out_chan)2753{2754const float dadx = mach->InterpCoefs[attrib].dadx[chan];2755const float dady = mach->InterpCoefs[attrib].dady[chan];2756const float delta = ofs_x * dadx + ofs_y * dady;2757out_chan->f[0] += delta;2758out_chan->f[1] += delta;2759out_chan->f[2] += delta;2760out_chan->f[3] += delta;2761}27622763static void2764eval_linear_coef(struct tgsi_exec_machine *mach,2765unsigned attrib,2766unsigned chan)2767{2768const float x = mach->QuadPos.xyzw[0].f[0];2769const float y = mach->QuadPos.xyzw[1].f[0];2770const float dadx = mach->InterpCoefs[attrib].dadx[chan];2771const float dady = mach->InterpCoefs[attrib].dady[chan];2772const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;27732774mach->Inputs[attrib].xyzw[chan].f[0] = a0;2775mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;2776mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;2777mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;2778}27792780/**2781* Evaluate a perspective-valued coefficient at the position of the2782* current quad.2783*/27842785static void2786interp_perspective_offset(2787const struct tgsi_exec_machine *mach,2788unsigned attrib,2789unsigned chan,2790float ofs_x,2791float ofs_y,2792union tgsi_exec_channel *out_chan)2793{2794const float dadx = mach->InterpCoefs[attrib].dadx[chan];2795const float dady = mach->InterpCoefs[attrib].dady[chan];2796const float *w = mach->QuadPos.xyzw[3].f;2797const float delta = ofs_x * dadx + ofs_y * dady;2798out_chan->f[0] += delta / w[0];2799out_chan->f[1] += delta / w[1];2800out_chan->f[2] += delta / w[2];2801out_chan->f[3] += delta / w[3];2802}28032804static void2805eval_perspective_coef(2806struct tgsi_exec_machine *mach,2807unsigned attrib,2808unsigned chan )2809{2810const float x = mach->QuadPos.xyzw[0].f[0];2811const float y = mach->QuadPos.xyzw[1].f[0];2812const float dadx = mach->InterpCoefs[attrib].dadx[chan];2813const float dady = mach->InterpCoefs[attrib].dady[chan];2814const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;2815const float *w = mach->QuadPos.xyzw[3].f;2816/* divide by W here */2817mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];2818mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];2819mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];2820mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];2821}282228232824typedef void (* eval_coef_func)(2825struct tgsi_exec_machine *mach,2826unsigned attrib,2827unsigned chan );28282829static void2830exec_declaration(struct tgsi_exec_machine *mach,2831const struct tgsi_full_declaration *decl)2832{2833if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {2834mach->SamplerViews[decl->Range.First] = decl->SamplerView;2835return;2836}28372838if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {2839if (decl->Declaration.File == TGSI_FILE_INPUT) {2840uint first, last, mask;28412842first = decl->Range.First;2843last = decl->Range.Last;2844mask = decl->Declaration.UsageMask;28452846/* XXX we could remove this special-case code since2847* mach->InterpCoefs[first].a0 should already have the2848* front/back-face value. But we should first update the2849* ureg code to emit the right UsageMask value (WRITEMASK_X).2850* Then, we could remove the tgsi_exec_machine::Face field.2851*/2852/* XXX make FACE a system value */2853if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {2854uint i;28552856assert(decl->Semantic.Index == 0);2857assert(first == last);28582859for (i = 0; i < TGSI_QUAD_SIZE; i++) {2860mach->Inputs[first].xyzw[0].f[i] = mach->Face;2861}2862} else {2863eval_coef_func eval;2864apply_sample_offset_func interp;2865uint i, j;28662867switch (decl->Interp.Interpolate) {2868case TGSI_INTERPOLATE_CONSTANT:2869eval = eval_constant_coef;2870interp = interp_constant_offset;2871break;28722873case TGSI_INTERPOLATE_LINEAR:2874eval = eval_linear_coef;2875interp = interp_linear_offset;2876break;28772878case TGSI_INTERPOLATE_PERSPECTIVE:2879eval = eval_perspective_coef;2880interp = interp_perspective_offset;2881break;28822883case TGSI_INTERPOLATE_COLOR:2884eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;2885interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;2886break;28872888default:2889assert(0);2890return;2891}28922893for (i = first; i <= last; i++)2894mach->InputSampleOffsetApply[i] = interp;28952896for (j = 0; j < TGSI_NUM_CHANNELS; j++) {2897if (mask & (1 << j)) {2898for (i = first; i <= last; i++) {2899eval(mach, i, j);2900}2901}2902}2903}29042905if (DEBUG_EXECUTION) {2906uint i, j;2907for (i = first; i <= last; ++i) {2908debug_printf("IN[%2u] = ", i);2909for (j = 0; j < TGSI_NUM_CHANNELS; j++) {2910if (j > 0) {2911debug_printf(" ");2912}2913debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",2914mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],2915mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],2916mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],2917mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);2918}2919}2920}2921}2922}29232924}29252926typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,2927const union tgsi_exec_channel *src);29282929static void2930exec_scalar_unary(struct tgsi_exec_machine *mach,2931const struct tgsi_full_instruction *inst,2932micro_unary_op op,2933enum tgsi_exec_datatype src_datatype)2934{2935unsigned int chan;2936union tgsi_exec_channel src;2937union tgsi_exec_channel dst;29382939fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);2940op(&dst, &src);2941for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2942if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2943store_dest(mach, &dst, &inst->Dst[0], inst, chan);2944}2945}2946}29472948static void2949exec_vector_unary(struct tgsi_exec_machine *mach,2950const struct tgsi_full_instruction *inst,2951micro_unary_op op,2952enum tgsi_exec_datatype src_datatype)2953{2954unsigned int chan;2955struct tgsi_exec_vector dst;29562957for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2958if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2959union tgsi_exec_channel src;29602961fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);2962op(&dst.xyzw[chan], &src);2963}2964}2965for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2966if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2967store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);2968}2969}2970}29712972typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,2973const union tgsi_exec_channel *src0,2974const union tgsi_exec_channel *src1);29752976static void2977exec_scalar_binary(struct tgsi_exec_machine *mach,2978const struct tgsi_full_instruction *inst,2979micro_binary_op op,2980enum tgsi_exec_datatype src_datatype)2981{2982unsigned int chan;2983union tgsi_exec_channel src[2];2984union tgsi_exec_channel dst;29852986fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);2987fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);2988op(&dst, &src[0], &src[1]);2989for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {2990if (inst->Dst[0].Register.WriteMask & (1 << chan)) {2991store_dest(mach, &dst, &inst->Dst[0], inst, chan);2992}2993}2994}29952996static void2997exec_vector_binary(struct tgsi_exec_machine *mach,2998const struct tgsi_full_instruction *inst,2999micro_binary_op op,3000enum tgsi_exec_datatype src_datatype)3001{3002unsigned int chan;3003struct tgsi_exec_vector dst;30043005for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3006if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3007union tgsi_exec_channel src[2];30083009fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);3010fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);3011op(&dst.xyzw[chan], &src[0], &src[1]);3012}3013}3014for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3015if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3016store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);3017}3018}3019}30203021typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,3022const union tgsi_exec_channel *src0,3023const union tgsi_exec_channel *src1,3024const union tgsi_exec_channel *src2);30253026static void3027exec_vector_trinary(struct tgsi_exec_machine *mach,3028const struct tgsi_full_instruction *inst,3029micro_trinary_op op,3030enum tgsi_exec_datatype src_datatype)3031{3032unsigned int chan;3033struct tgsi_exec_vector dst;30343035for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3036if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3037union tgsi_exec_channel src[3];30383039fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);3040fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);3041fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);3042op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);3043}3044}3045for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3046if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3047store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);3048}3049}3050}30513052typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,3053const union tgsi_exec_channel *src0,3054const union tgsi_exec_channel *src1,3055const union tgsi_exec_channel *src2,3056const union tgsi_exec_channel *src3);30573058static void3059exec_vector_quaternary(struct tgsi_exec_machine *mach,3060const struct tgsi_full_instruction *inst,3061micro_quaternary_op op,3062enum tgsi_exec_datatype src_datatype)3063{3064unsigned int chan;3065struct tgsi_exec_vector dst;30663067for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3068if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3069union tgsi_exec_channel src[4];30703071fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);3072fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);3073fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);3074fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);3075op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);3076}3077}3078for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3079if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3080store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);3081}3082}3083}30843085static void3086exec_dp3(struct tgsi_exec_machine *mach,3087const struct tgsi_full_instruction *inst)3088{3089unsigned int chan;3090union tgsi_exec_channel arg[3];30913092fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3093fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3094micro_mul(&arg[2], &arg[0], &arg[1]);30953096for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {3097fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);3098fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);3099micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);3100}31013102for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3103if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3104store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);3105}3106}3107}31083109static void3110exec_dp4(struct tgsi_exec_machine *mach,3111const struct tgsi_full_instruction *inst)3112{3113unsigned int chan;3114union tgsi_exec_channel arg[3];31153116fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3117fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3118micro_mul(&arg[2], &arg[0], &arg[1]);31193120for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {3121fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);3122fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);3123micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);3124}31253126for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3127if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3128store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);3129}3130}3131}31323133static void3134exec_dp2(struct tgsi_exec_machine *mach,3135const struct tgsi_full_instruction *inst)3136{3137unsigned int chan;3138union tgsi_exec_channel arg[3];31393140fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3141fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3142micro_mul(&arg[2], &arg[0], &arg[1]);31433144fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);3145fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);3146micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);31473148for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3149if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3150store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);3151}3152}3153}31543155static void3156exec_pk2h(struct tgsi_exec_machine *mach,3157const struct tgsi_full_instruction *inst)3158{3159unsigned chan;3160union tgsi_exec_channel arg[2], dst;31613162fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3163fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);3164for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {3165dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) |3166(_mesa_float_to_half(arg[1].f[chan]) << 16);3167}3168for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3169if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3170store_dest(mach, &dst, &inst->Dst[0], inst, chan);3171}3172}3173}31743175static void3176exec_up2h(struct tgsi_exec_machine *mach,3177const struct tgsi_full_instruction *inst)3178{3179unsigned chan;3180union tgsi_exec_channel arg, dst[2];31813182fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);3183for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {3184dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff);3185dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16);3186}3187for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3188if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3189store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan);3190}3191}3192}31933194static void3195micro_ucmp(union tgsi_exec_channel *dst,3196const union tgsi_exec_channel *src0,3197const union tgsi_exec_channel *src1,3198const union tgsi_exec_channel *src2)3199{3200dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];3201dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];3202dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];3203dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];3204}32053206static void3207exec_ucmp(struct tgsi_exec_machine *mach,3208const struct tgsi_full_instruction *inst)3209{3210unsigned int chan;3211struct tgsi_exec_vector dst;32123213for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3214if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3215union tgsi_exec_channel src[3];32163217fetch_source(mach, &src[0], &inst->Src[0], chan,3218TGSI_EXEC_DATA_UINT);3219fetch_source(mach, &src[1], &inst->Src[1], chan,3220TGSI_EXEC_DATA_FLOAT);3221fetch_source(mach, &src[2], &inst->Src[2], chan,3222TGSI_EXEC_DATA_FLOAT);3223micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);3224}3225}3226for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3227if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3228store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);3229}3230}3231}32323233static void3234exec_dst(struct tgsi_exec_machine *mach,3235const struct tgsi_full_instruction *inst)3236{3237union tgsi_exec_channel r[2];3238union tgsi_exec_channel d[4];32393240if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {3241fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);3242fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);3243micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);3244}3245if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {3246fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);3247}3248if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {3249fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);3250}32513252if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {3253store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);3254}3255if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {3256store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);3257}3258if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {3259store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);3260}3261if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {3262store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W);3263}3264}32653266static void3267exec_log(struct tgsi_exec_machine *mach,3268const struct tgsi_full_instruction *inst)3269{3270union tgsi_exec_channel r[3];32713272fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3273micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */3274micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */3275micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */3276if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {3277store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);3278}3279if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {3280micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */3281micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */3282store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y);3283}3284if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {3285store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z);3286}3287if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {3288store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);3289}3290}32913292static void3293exec_exp(struct tgsi_exec_machine *mach,3294const struct tgsi_full_instruction *inst)3295{3296union tgsi_exec_channel r[3];32973298fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3299micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */3300if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {3301micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */3302store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X);3303}3304if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {3305micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */3306store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y);3307}3308if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {3309micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */3310store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z);3311}3312if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {3313store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);3314}3315}33163317static void3318exec_lit(struct tgsi_exec_machine *mach,3319const struct tgsi_full_instruction *inst)3320{3321union tgsi_exec_channel r[3];3322union tgsi_exec_channel d[3];33233324if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {3325fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);3326if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {3327fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);3328micro_max(&r[1], &r[1], &ZeroVec);33293330fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);3331micro_min(&r[2], &r[2], &P128Vec);3332micro_max(&r[2], &r[2], &M128Vec);3333micro_pow(&r[1], &r[1], &r[2]);3334micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);3335store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);3336}3337if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {3338micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);3339store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);3340}3341}3342if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {3343store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);3344}33453346if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {3347store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);3348}3349}33503351static void3352exec_break(struct tgsi_exec_machine *mach)3353{3354if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {3355/* turn off loop channels for each enabled exec channel */3356mach->LoopMask &= ~mach->ExecMask;3357/* Todo: if mach->LoopMask == 0, jump to end of loop */3358UPDATE_EXEC_MASK(mach);3359} else {3360assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);33613362mach->Switch.mask = 0x0;33633364UPDATE_EXEC_MASK(mach);3365}3366}33673368static void3369exec_switch(struct tgsi_exec_machine *mach,3370const struct tgsi_full_instruction *inst)3371{3372assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);3373assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);33743375mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;3376fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);3377mach->Switch.mask = 0x0;3378mach->Switch.defaultMask = 0x0;33793380mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;3381mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;33823383UPDATE_EXEC_MASK(mach);3384}33853386static void3387exec_case(struct tgsi_exec_machine *mach,3388const struct tgsi_full_instruction *inst)3389{3390uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;3391union tgsi_exec_channel src;3392uint mask = 0;33933394fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);33953396if (mach->Switch.selector.u[0] == src.u[0]) {3397mask |= 0x1;3398}3399if (mach->Switch.selector.u[1] == src.u[1]) {3400mask |= 0x2;3401}3402if (mach->Switch.selector.u[2] == src.u[2]) {3403mask |= 0x4;3404}3405if (mach->Switch.selector.u[3] == src.u[3]) {3406mask |= 0x8;3407}34083409mach->Switch.defaultMask |= mask;34103411mach->Switch.mask |= mask & prevMask;34123413UPDATE_EXEC_MASK(mach);3414}34153416/* FIXME: this will only work if default is last */3417static void3418exec_default(struct tgsi_exec_machine *mach)3419{3420uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;34213422mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;34233424UPDATE_EXEC_MASK(mach);3425}34263427static void3428exec_endswitch(struct tgsi_exec_machine *mach)3429{3430mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];3431mach->BreakType = mach->BreakStack[--mach->BreakStackTop];34323433UPDATE_EXEC_MASK(mach);3434}34353436typedef void (* micro_dop)(union tgsi_double_channel *dst,3437const union tgsi_double_channel *src);34383439typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,3440const union tgsi_double_channel *src0,3441union tgsi_exec_channel *src1);34423443typedef void (* micro_dop_s)(union tgsi_double_channel *dst,3444const union tgsi_exec_channel *src);34453446typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,3447const union tgsi_double_channel *src);34483449static void3450fetch_double_channel(struct tgsi_exec_machine *mach,3451union tgsi_double_channel *chan,3452const struct tgsi_full_src_register *reg,3453uint chan_0,3454uint chan_1)3455{3456union tgsi_exec_channel src[2];3457uint i;34583459fetch_source_d(mach, &src[0], reg, chan_0);3460fetch_source_d(mach, &src[1], reg, chan_1);34613462for (i = 0; i < TGSI_QUAD_SIZE; i++) {3463chan->u[i][0] = src[0].u[i];3464chan->u[i][1] = src[1].u[i];3465}3466assert(!reg->Register.Absolute);3467assert(!reg->Register.Negate);3468}34693470static void3471store_double_channel(struct tgsi_exec_machine *mach,3472const union tgsi_double_channel *chan,3473const struct tgsi_full_dst_register *reg,3474const struct tgsi_full_instruction *inst,3475uint chan_0,3476uint chan_1)3477{3478union tgsi_exec_channel dst[2];3479uint i;3480union tgsi_double_channel temp;3481const uint execmask = mach->ExecMask;34823483if (!inst->Instruction.Saturate) {3484for (i = 0; i < TGSI_QUAD_SIZE; i++)3485if (execmask & (1 << i)) {3486dst[0].u[i] = chan->u[i][0];3487dst[1].u[i] = chan->u[i][1];3488}3489}3490else {3491for (i = 0; i < TGSI_QUAD_SIZE; i++)3492if (execmask & (1 << i)) {3493if (chan->d[i] < 0.0 || isnan(chan->d[i]))3494temp.d[i] = 0.0;3495else if (chan->d[i] > 1.0)3496temp.d[i] = 1.0;3497else3498temp.d[i] = chan->d[i];34993500dst[0].u[i] = temp.u[i][0];3501dst[1].u[i] = temp.u[i][1];3502}3503}35043505store_dest_double(mach, &dst[0], reg, chan_0);3506if (chan_1 != (unsigned)-1)3507store_dest_double(mach, &dst[1], reg, chan_1);3508}35093510static void3511exec_double_unary(struct tgsi_exec_machine *mach,3512const struct tgsi_full_instruction *inst,3513micro_dop op)3514{3515union tgsi_double_channel src;3516union tgsi_double_channel dst;35173518if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {3519fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);3520op(&dst, &src);3521store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);3522}3523if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {3524fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);3525op(&dst, &src);3526store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);3527}3528}35293530static void3531exec_double_binary(struct tgsi_exec_machine *mach,3532const struct tgsi_full_instruction *inst,3533micro_dop op,3534enum tgsi_exec_datatype dst_datatype)3535{3536union tgsi_double_channel src[2];3537union tgsi_double_channel dst;3538int first_dest_chan, second_dest_chan;3539int wmask;35403541wmask = inst->Dst[0].Register.WriteMask;3542/* these are & because of the way DSLT etc store their destinations */3543if (wmask & TGSI_WRITEMASK_XY) {3544first_dest_chan = TGSI_CHAN_X;3545second_dest_chan = TGSI_CHAN_Y;3546if (dst_datatype == TGSI_EXEC_DATA_UINT) {3547first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;3548second_dest_chan = -1;3549}35503551fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);3552fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);3553op(&dst, src);3554store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);3555}35563557if (wmask & TGSI_WRITEMASK_ZW) {3558first_dest_chan = TGSI_CHAN_Z;3559second_dest_chan = TGSI_CHAN_W;3560if (dst_datatype == TGSI_EXEC_DATA_UINT) {3561first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;3562second_dest_chan = -1;3563}35643565fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);3566fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);3567op(&dst, src);3568store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);3569}3570}35713572static void3573exec_double_trinary(struct tgsi_exec_machine *mach,3574const struct tgsi_full_instruction *inst,3575micro_dop op)3576{3577union tgsi_double_channel src[3];3578union tgsi_double_channel dst;35793580if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {3581fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);3582fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);3583fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);3584op(&dst, src);3585store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);3586}3587if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {3588fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);3589fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);3590fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);3591op(&dst, src);3592store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);3593}3594}35953596static void3597exec_dldexp(struct tgsi_exec_machine *mach,3598const struct tgsi_full_instruction *inst)3599{3600union tgsi_double_channel src0;3601union tgsi_exec_channel src1;3602union tgsi_double_channel dst;3603int wmask;36043605wmask = inst->Dst[0].Register.WriteMask;3606if (wmask & TGSI_WRITEMASK_XY) {3607fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);3608fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);3609micro_dldexp(&dst, &src0, &src1);3610store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);3611}36123613if (wmask & TGSI_WRITEMASK_ZW) {3614fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);3615fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);3616micro_dldexp(&dst, &src0, &src1);3617store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);3618}3619}36203621static void3622exec_dfracexp(struct tgsi_exec_machine *mach,3623const struct tgsi_full_instruction *inst)3624{3625union tgsi_double_channel src;3626union tgsi_double_channel dst;3627union tgsi_exec_channel dst_exp;36283629fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);3630micro_dfracexp(&dst, &dst_exp, &src);3631if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)3632store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);3633if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)3634store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);3635for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3636if (inst->Dst[1].Register.WriteMask & (1 << chan))3637store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan);3638}3639}36403641static void3642exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,3643const struct tgsi_full_instruction *inst,3644micro_dop_sop op)3645{3646union tgsi_double_channel src0;3647union tgsi_exec_channel src1;3648union tgsi_double_channel dst;3649int wmask;36503651wmask = inst->Dst[0].Register.WriteMask;3652if (wmask & TGSI_WRITEMASK_XY) {3653fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);3654fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);3655op(&dst, &src0, &src1);3656store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);3657}36583659if (wmask & TGSI_WRITEMASK_ZW) {3660fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);3661fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);3662op(&dst, &src0, &src1);3663store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);3664}3665}36663667static int3668get_image_coord_dim(unsigned tgsi_tex)3669{3670int dim;3671switch (tgsi_tex) {3672case TGSI_TEXTURE_BUFFER:3673case TGSI_TEXTURE_1D:3674dim = 1;3675break;3676case TGSI_TEXTURE_2D:3677case TGSI_TEXTURE_RECT:3678case TGSI_TEXTURE_1D_ARRAY:3679case TGSI_TEXTURE_2D_MSAA:3680dim = 2;3681break;3682case TGSI_TEXTURE_3D:3683case TGSI_TEXTURE_CUBE:3684case TGSI_TEXTURE_2D_ARRAY:3685case TGSI_TEXTURE_2D_ARRAY_MSAA:3686case TGSI_TEXTURE_CUBE_ARRAY:3687dim = 3;3688break;3689default:3690assert(!"unknown texture target");3691dim = 0;3692break;3693}36943695return dim;3696}36973698static int3699get_image_coord_sample(unsigned tgsi_tex)3700{3701int sample = 0;3702switch (tgsi_tex) {3703case TGSI_TEXTURE_2D_MSAA:3704sample = 3;3705break;3706case TGSI_TEXTURE_2D_ARRAY_MSAA:3707sample = 4;3708break;3709default:3710break;3711}3712return sample;3713}37143715static void3716exec_load_img(struct tgsi_exec_machine *mach,3717const struct tgsi_full_instruction *inst)3718{3719union tgsi_exec_channel r[4], sample_r;3720uint unit;3721int sample;3722int i, j;3723int dim;3724uint chan;3725float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];3726struct tgsi_image_params params;37273728unit = fetch_sampler_unit(mach, inst, 0);3729dim = get_image_coord_dim(inst->Memory.Texture);3730sample = get_image_coord_sample(inst->Memory.Texture);3731assert(dim <= 3);37323733params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;3734params.unit = unit;3735params.tgsi_tex_instr = inst->Memory.Texture;3736params.format = inst->Memory.Format;37373738for (i = 0; i < dim; i++) {3739IFETCH(&r[i], 1, TGSI_CHAN_X + i);3740}37413742if (sample)3743IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);37443745mach->Image->load(mach->Image, ¶ms,3746r[0].i, r[1].i, r[2].i, sample_r.i,3747rgba);3748for (j = 0; j < TGSI_QUAD_SIZE; j++) {3749r[0].f[j] = rgba[0][j];3750r[1].f[j] = rgba[1][j];3751r[2].f[j] = rgba[2][j];3752r[3].f[j] = rgba[3][j];3753}3754for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3755if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3756store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);3757}3758}3759}37603761static void3762exec_load_membuf(struct tgsi_exec_machine *mach,3763const struct tgsi_full_instruction *inst)3764{3765uint32_t unit = fetch_sampler_unit(mach, inst, 0);37663767uint32_t size;3768const char *ptr;3769switch (inst->Src[0].Register.File) {3770case TGSI_FILE_MEMORY:3771ptr = mach->LocalMem;3772size = mach->LocalMemSize;3773break;37743775case TGSI_FILE_BUFFER:3776ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);3777break;37783779case TGSI_FILE_CONSTANT:3780if (unit < ARRAY_SIZE(mach->Consts)) {3781ptr = mach->Consts[unit];3782size = mach->ConstsSize[unit];3783} else {3784ptr = NULL;3785size = 0;3786}3787break;37883789default:3790unreachable("unsupported TGSI_OPCODE_LOAD file");3791}37923793union tgsi_exec_channel offset;3794IFETCH(&offset, 1, TGSI_CHAN_X);37953796assert(inst->Dst[0].Register.WriteMask);3797uint32_t load_size = util_last_bit(inst->Dst[0].Register.WriteMask) * 4;37983799union tgsi_exec_channel rgba[TGSI_NUM_CHANNELS];3800memset(&rgba, 0, sizeof(rgba));3801for (int j = 0; j < TGSI_QUAD_SIZE; j++) {3802if (size >= load_size && offset.u[j] <= (size - load_size)) {3803for (int chan = 0; chan < load_size / 4; chan++)3804rgba[chan].u[j] = *(uint32_t *)(ptr + offset.u[j] + chan * 4);3805}3806}38073808for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3809if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3810store_dest(mach, &rgba[chan], &inst->Dst[0], inst, chan);3811}3812}3813}38143815static void3816exec_load(struct tgsi_exec_machine *mach,3817const struct tgsi_full_instruction *inst)3818{3819if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)3820exec_load_img(mach, inst);3821else3822exec_load_membuf(mach, inst);3823}38243825static uint3826fetch_store_img_unit(struct tgsi_exec_machine *mach,3827const struct tgsi_full_dst_register *dst)3828{3829uint unit = 0;3830int i;3831if (dst->Register.Indirect) {3832union tgsi_exec_channel indir_index, index2;3833const uint execmask = mach->ExecMask;3834index2.i[0] =3835index2.i[1] =3836index2.i[2] =3837index2.i[3] = dst->Indirect.Index;38383839fetch_src_file_channel(mach,3840dst->Indirect.File,3841dst->Indirect.Swizzle,3842&index2,3843&ZeroVec,3844&indir_index);3845for (i = 0; i < TGSI_QUAD_SIZE; i++) {3846if (execmask & (1 << i)) {3847unit = dst->Register.Index + indir_index.i[i];3848break;3849}3850}3851} else {3852unit = dst->Register.Index;3853}3854return unit;3855}38563857static void3858exec_store_img(struct tgsi_exec_machine *mach,3859const struct tgsi_full_instruction *inst)3860{3861union tgsi_exec_channel r[3], sample_r;3862union tgsi_exec_channel value[4];3863float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];3864struct tgsi_image_params params;3865int dim;3866int sample;3867int i, j;3868uint unit;3869unit = fetch_store_img_unit(mach, &inst->Dst[0]);3870dim = get_image_coord_dim(inst->Memory.Texture);3871sample = get_image_coord_sample(inst->Memory.Texture);3872assert(dim <= 3);38733874params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;3875params.unit = unit;3876params.tgsi_tex_instr = inst->Memory.Texture;3877params.format = inst->Memory.Format;38783879for (i = 0; i < dim; i++) {3880IFETCH(&r[i], 0, TGSI_CHAN_X + i);3881}38823883for (i = 0; i < 4; i++) {3884FETCH(&value[i], 1, TGSI_CHAN_X + i);3885}3886if (sample)3887IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);38883889for (j = 0; j < TGSI_QUAD_SIZE; j++) {3890rgba[0][j] = value[0].f[j];3891rgba[1][j] = value[1].f[j];3892rgba[2][j] = value[2].f[j];3893rgba[3][j] = value[3].f[j];3894}38953896mach->Image->store(mach->Image, ¶ms,3897r[0].i, r[1].i, r[2].i, sample_r.i,3898rgba);3899}39003901static void3902exec_store_buf(struct tgsi_exec_machine *mach,3903const struct tgsi_full_instruction *inst)3904{3905uint32_t unit = fetch_store_img_unit(mach, &inst->Dst[0]);3906uint32_t size;3907char *ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);39083909int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;39103911union tgsi_exec_channel offset;3912IFETCH(&offset, 0, TGSI_CHAN_X);39133914union tgsi_exec_channel value[4];3915for (int i = 0; i < 4; i++)3916FETCH(&value[i], 1, TGSI_CHAN_X + i);39173918for (int j = 0; j < TGSI_QUAD_SIZE; j++) {3919if (!(execmask & (1 << j)))3920continue;3921if (size < offset.u[j])3922continue;39233924uint32_t *invocation_ptr = (uint32_t *)(ptr + offset.u[j]);3925uint32_t size_avail = size - offset.u[j];39263927for (int chan = 0; chan < MIN2(4, size_avail / 4); chan++) {3928if (inst->Dst[0].Register.WriteMask & (1 << chan))3929memcpy(&invocation_ptr[chan], &value[chan].u[j], 4);3930}3931}3932}39333934static void3935exec_store_mem(struct tgsi_exec_machine *mach,3936const struct tgsi_full_instruction *inst)3937{3938union tgsi_exec_channel r[3];3939union tgsi_exec_channel value[4];3940uint i, chan;3941char *ptr = mach->LocalMem;3942int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;39433944IFETCH(&r[0], 0, TGSI_CHAN_X);39453946for (i = 0; i < 4; i++) {3947FETCH(&value[i], 1, TGSI_CHAN_X + i);3948}39493950if (r[0].u[0] >= mach->LocalMemSize)3951return;3952ptr += r[0].u[0];39533954for (i = 0; i < TGSI_QUAD_SIZE; i++) {3955if (execmask & (1 << i)) {3956for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {3957if (inst->Dst[0].Register.WriteMask & (1 << chan)) {3958memcpy(ptr + (chan * 4), &value[chan].u[0], 4);3959}3960}3961}3962}3963}39643965static void3966exec_store(struct tgsi_exec_machine *mach,3967const struct tgsi_full_instruction *inst)3968{3969if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)3970exec_store_img(mach, inst);3971else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)3972exec_store_buf(mach, inst);3973else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)3974exec_store_mem(mach, inst);3975}39763977static void3978exec_atomop_img(struct tgsi_exec_machine *mach,3979const struct tgsi_full_instruction *inst)3980{3981union tgsi_exec_channel r[4], sample_r;3982union tgsi_exec_channel value[4], value2[4];3983float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];3984float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];3985struct tgsi_image_params params;3986int dim;3987int sample;3988int i, j;3989uint unit, chan;3990unit = fetch_sampler_unit(mach, inst, 0);3991dim = get_image_coord_dim(inst->Memory.Texture);3992sample = get_image_coord_sample(inst->Memory.Texture);3993assert(dim <= 3);39943995params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;3996params.unit = unit;3997params.tgsi_tex_instr = inst->Memory.Texture;3998params.format = inst->Memory.Format;39994000for (i = 0; i < dim; i++) {4001IFETCH(&r[i], 1, TGSI_CHAN_X + i);4002}40034004for (i = 0; i < 4; i++) {4005FETCH(&value[i], 2, TGSI_CHAN_X + i);4006if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)4007FETCH(&value2[i], 3, TGSI_CHAN_X + i);4008}4009if (sample)4010IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);40114012for (j = 0; j < TGSI_QUAD_SIZE; j++) {4013rgba[0][j] = value[0].f[j];4014rgba[1][j] = value[1].f[j];4015rgba[2][j] = value[2].f[j];4016rgba[3][j] = value[3].f[j];4017}4018if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {4019for (j = 0; j < TGSI_QUAD_SIZE; j++) {4020rgba2[0][j] = value2[0].f[j];4021rgba2[1][j] = value2[1].f[j];4022rgba2[2][j] = value2[2].f[j];4023rgba2[3][j] = value2[3].f[j];4024}4025}40264027mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode,4028r[0].i, r[1].i, r[2].i, sample_r.i,4029rgba, rgba2);40304031for (j = 0; j < TGSI_QUAD_SIZE; j++) {4032r[0].f[j] = rgba[0][j];4033r[1].f[j] = rgba[1][j];4034r[2].f[j] = rgba[2][j];4035r[3].f[j] = rgba[3][j];4036}4037for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {4038if (inst->Dst[0].Register.WriteMask & (1 << chan)) {4039store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);4040}4041}4042}40434044static void4045exec_atomop_membuf(struct tgsi_exec_machine *mach,4046const struct tgsi_full_instruction *inst)4047{4048union tgsi_exec_channel offset, r0, r1;4049uint chan, i;4050int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;4051IFETCH(&offset, 1, TGSI_CHAN_X);40524053if (!(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X))4054return;40554056void *ptr[TGSI_QUAD_SIZE];4057if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {4058uint32_t unit = fetch_sampler_unit(mach, inst, 0);4059uint32_t size;4060char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size);4061for (int i = 0; i < TGSI_QUAD_SIZE; i++) {4062if (likely(size >= 4 && offset.u[i] <= size - 4))4063ptr[i] = buffer + offset.u[i];4064else4065ptr[i] = NULL;4066}4067} else {4068assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY);40694070for (i = 0; i < TGSI_QUAD_SIZE; i++) {4071if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4))4072ptr[i] = (char *)mach->LocalMem + offset.u[i];4073else4074ptr[i] = NULL;4075}4076}40774078FETCH(&r0, 2, TGSI_CHAN_X);4079if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)4080FETCH(&r1, 3, TGSI_CHAN_X);40814082/* The load/op/store sequence has to happen inside the loop since ptr4083* may have the same ptr in some of the invocations.4084*/4085for (int i = 0; i < TGSI_QUAD_SIZE; i++) {4086if (!(execmask & (1 << i)))4087continue;40884089uint32_t val = 0;4090if (ptr[i]) {4091memcpy(&val, ptr[i], sizeof(val));40924093uint32_t result;4094switch (inst->Instruction.Opcode) {4095case TGSI_OPCODE_ATOMUADD:4096result = val + r0.u[i];4097break;4098case TGSI_OPCODE_ATOMXOR:4099result = val ^ r0.u[i];4100break;4101case TGSI_OPCODE_ATOMOR:4102result = val | r0.u[i];4103break;4104case TGSI_OPCODE_ATOMAND:4105result = val & r0.u[i];4106break;4107case TGSI_OPCODE_ATOMUMIN:4108result = MIN2(val, r0.u[i]);4109break;4110case TGSI_OPCODE_ATOMUMAX:4111result = MAX2(val, r0.u[i]);4112break;4113case TGSI_OPCODE_ATOMIMIN:4114result = MIN2((int32_t)val, r0.i[i]);4115break;4116case TGSI_OPCODE_ATOMIMAX:4117result = MAX2((int32_t)val, r0.i[i]);4118break;4119case TGSI_OPCODE_ATOMXCHG:4120result = r0.u[i];4121break;4122case TGSI_OPCODE_ATOMCAS:4123if (val == r0.u[i])4124result = r1.u[i];4125else4126result = val;4127break;4128case TGSI_OPCODE_ATOMFADD:4129result = fui(uif(val) + r0.f[i]);4130break;4131default:4132unreachable("bad atomic op");4133}4134memcpy(ptr[i], &result, sizeof(result));4135}41364137r0.u[i] = val;4138}41394140for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++)4141store_dest(mach, &r0, &inst->Dst[0], inst, chan);4142}41434144static void4145exec_atomop(struct tgsi_exec_machine *mach,4146const struct tgsi_full_instruction *inst)4147{4148if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)4149exec_atomop_img(mach, inst);4150else4151exec_atomop_membuf(mach, inst);4152}41534154static void4155exec_resq_img(struct tgsi_exec_machine *mach,4156const struct tgsi_full_instruction *inst)4157{4158int result[4];4159union tgsi_exec_channel r[4];4160uint unit;4161int i, chan, j;4162struct tgsi_image_params params;41634164unit = fetch_sampler_unit(mach, inst, 0);41654166params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;4167params.unit = unit;4168params.tgsi_tex_instr = inst->Memory.Texture;4169params.format = inst->Memory.Format;41704171mach->Image->get_dims(mach->Image, ¶ms, result);41724173for (i = 0; i < TGSI_QUAD_SIZE; i++) {4174for (j = 0; j < 4; j++) {4175r[j].i[i] = result[j];4176}4177}41784179for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {4180if (inst->Dst[0].Register.WriteMask & (1 << chan)) {4181store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);4182}4183}4184}41854186static void4187exec_resq_buf(struct tgsi_exec_machine *mach,4188const struct tgsi_full_instruction *inst)4189{4190uint32_t unit = fetch_sampler_unit(mach, inst, 0);4191uint32_t size;4192(void)mach->Buffer->lookup(mach->Buffer, unit, &size);41934194union tgsi_exec_channel r;4195for (int i = 0; i < TGSI_QUAD_SIZE; i++)4196r.i[i] = size;41974198if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {4199for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {4200store_dest(mach, &r, &inst->Dst[0], inst, TGSI_CHAN_X);4201}4202}4203}42044205static void4206exec_resq(struct tgsi_exec_machine *mach,4207const struct tgsi_full_instruction *inst)4208{4209if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)4210exec_resq_img(mach, inst);4211else4212exec_resq_buf(mach, inst);4213}42144215static void4216micro_f2u64(union tgsi_double_channel *dst,4217const union tgsi_exec_channel *src)4218{4219dst->u64[0] = (uint64_t)src->f[0];4220dst->u64[1] = (uint64_t)src->f[1];4221dst->u64[2] = (uint64_t)src->f[2];4222dst->u64[3] = (uint64_t)src->f[3];4223}42244225static void4226micro_f2i64(union tgsi_double_channel *dst,4227const union tgsi_exec_channel *src)4228{4229dst->i64[0] = (int64_t)src->f[0];4230dst->i64[1] = (int64_t)src->f[1];4231dst->i64[2] = (int64_t)src->f[2];4232dst->i64[3] = (int64_t)src->f[3];4233}42344235static void4236micro_u2i64(union tgsi_double_channel *dst,4237const union tgsi_exec_channel *src)4238{4239dst->u64[0] = (uint64_t)src->u[0];4240dst->u64[1] = (uint64_t)src->u[1];4241dst->u64[2] = (uint64_t)src->u[2];4242dst->u64[3] = (uint64_t)src->u[3];4243}42444245static void4246micro_i2i64(union tgsi_double_channel *dst,4247const union tgsi_exec_channel *src)4248{4249dst->i64[0] = (int64_t)src->i[0];4250dst->i64[1] = (int64_t)src->i[1];4251dst->i64[2] = (int64_t)src->i[2];4252dst->i64[3] = (int64_t)src->i[3];4253}42544255static void4256micro_d2u64(union tgsi_double_channel *dst,4257const union tgsi_double_channel *src)4258{4259dst->u64[0] = (uint64_t)src->d[0];4260dst->u64[1] = (uint64_t)src->d[1];4261dst->u64[2] = (uint64_t)src->d[2];4262dst->u64[3] = (uint64_t)src->d[3];4263}42644265static void4266micro_d2i64(union tgsi_double_channel *dst,4267const union tgsi_double_channel *src)4268{4269dst->i64[0] = (int64_t)src->d[0];4270dst->i64[1] = (int64_t)src->d[1];4271dst->i64[2] = (int64_t)src->d[2];4272dst->i64[3] = (int64_t)src->d[3];4273}42744275static void4276micro_u642d(union tgsi_double_channel *dst,4277const union tgsi_double_channel *src)4278{4279dst->d[0] = (double)src->u64[0];4280dst->d[1] = (double)src->u64[1];4281dst->d[2] = (double)src->u64[2];4282dst->d[3] = (double)src->u64[3];4283}42844285static void4286micro_i642d(union tgsi_double_channel *dst,4287const union tgsi_double_channel *src)4288{4289dst->d[0] = (double)src->i64[0];4290dst->d[1] = (double)src->i64[1];4291dst->d[2] = (double)src->i64[2];4292dst->d[3] = (double)src->i64[3];4293}42944295static void4296micro_u642f(union tgsi_exec_channel *dst,4297const union tgsi_double_channel *src)4298{4299dst->f[0] = (float)src->u64[0];4300dst->f[1] = (float)src->u64[1];4301dst->f[2] = (float)src->u64[2];4302dst->f[3] = (float)src->u64[3];4303}43044305static void4306micro_i642f(union tgsi_exec_channel *dst,4307const union tgsi_double_channel *src)4308{4309dst->f[0] = (float)src->i64[0];4310dst->f[1] = (float)src->i64[1];4311dst->f[2] = (float)src->i64[2];4312dst->f[3] = (float)src->i64[3];4313}43144315static void4316exec_t_2_64(struct tgsi_exec_machine *mach,4317const struct tgsi_full_instruction *inst,4318micro_dop_s op,4319enum tgsi_exec_datatype src_datatype)4320{4321union tgsi_exec_channel src;4322union tgsi_double_channel dst;43234324if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {4325fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);4326op(&dst, &src);4327store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);4328}4329if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {4330fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);4331op(&dst, &src);4332store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);4333}4334}43354336static void4337exec_64_2_t(struct tgsi_exec_machine *mach,4338const struct tgsi_full_instruction *inst,4339micro_sop_d op)4340{4341union tgsi_double_channel src;4342union tgsi_exec_channel dst;4343int wm = inst->Dst[0].Register.WriteMask;4344int i;4345int bit;4346for (i = 0; i < 2; i++) {4347bit = ffs(wm);4348if (bit) {4349wm &= ~(1 << (bit - 1));4350if (i == 0)4351fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);4352else4353fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);4354op(&dst, &src);4355store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1);4356}4357}4358}43594360static void4361micro_i2f(union tgsi_exec_channel *dst,4362const union tgsi_exec_channel *src)4363{4364dst->f[0] = (float)src->i[0];4365dst->f[1] = (float)src->i[1];4366dst->f[2] = (float)src->i[2];4367dst->f[3] = (float)src->i[3];4368}43694370static void4371micro_not(union tgsi_exec_channel *dst,4372const union tgsi_exec_channel *src)4373{4374dst->u[0] = ~src->u[0];4375dst->u[1] = ~src->u[1];4376dst->u[2] = ~src->u[2];4377dst->u[3] = ~src->u[3];4378}43794380static void4381micro_shl(union tgsi_exec_channel *dst,4382const union tgsi_exec_channel *src0,4383const union tgsi_exec_channel *src1)4384{4385unsigned masked_count;4386masked_count = src1->u[0] & 0x1f;4387dst->u[0] = src0->u[0] << masked_count;4388masked_count = src1->u[1] & 0x1f;4389dst->u[1] = src0->u[1] << masked_count;4390masked_count = src1->u[2] & 0x1f;4391dst->u[2] = src0->u[2] << masked_count;4392masked_count = src1->u[3] & 0x1f;4393dst->u[3] = src0->u[3] << masked_count;4394}43954396static void4397micro_and(union tgsi_exec_channel *dst,4398const union tgsi_exec_channel *src0,4399const union tgsi_exec_channel *src1)4400{4401dst->u[0] = src0->u[0] & src1->u[0];4402dst->u[1] = src0->u[1] & src1->u[1];4403dst->u[2] = src0->u[2] & src1->u[2];4404dst->u[3] = src0->u[3] & src1->u[3];4405}44064407static void4408micro_or(union tgsi_exec_channel *dst,4409const union tgsi_exec_channel *src0,4410const union tgsi_exec_channel *src1)4411{4412dst->u[0] = src0->u[0] | src1->u[0];4413dst->u[1] = src0->u[1] | src1->u[1];4414dst->u[2] = src0->u[2] | src1->u[2];4415dst->u[3] = src0->u[3] | src1->u[3];4416}44174418static void4419micro_xor(union tgsi_exec_channel *dst,4420const union tgsi_exec_channel *src0,4421const union tgsi_exec_channel *src1)4422{4423dst->u[0] = src0->u[0] ^ src1->u[0];4424dst->u[1] = src0->u[1] ^ src1->u[1];4425dst->u[2] = src0->u[2] ^ src1->u[2];4426dst->u[3] = src0->u[3] ^ src1->u[3];4427}44284429static void4430micro_mod(union tgsi_exec_channel *dst,4431const union tgsi_exec_channel *src0,4432const union tgsi_exec_channel *src1)4433{4434dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;4435dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;4436dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;4437dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;4438}44394440static void4441micro_f2i(union tgsi_exec_channel *dst,4442const union tgsi_exec_channel *src)4443{4444dst->i[0] = (int)src->f[0];4445dst->i[1] = (int)src->f[1];4446dst->i[2] = (int)src->f[2];4447dst->i[3] = (int)src->f[3];4448}44494450static void4451micro_fseq(union tgsi_exec_channel *dst,4452const union tgsi_exec_channel *src0,4453const union tgsi_exec_channel *src1)4454{4455dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;4456dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;4457dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;4458dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;4459}44604461static void4462micro_fsge(union tgsi_exec_channel *dst,4463const union tgsi_exec_channel *src0,4464const union tgsi_exec_channel *src1)4465{4466dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;4467dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;4468dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;4469dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;4470}44714472static void4473micro_fslt(union tgsi_exec_channel *dst,4474const union tgsi_exec_channel *src0,4475const union tgsi_exec_channel *src1)4476{4477dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;4478dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;4479dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;4480dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;4481}44824483static void4484micro_fsne(union tgsi_exec_channel *dst,4485const union tgsi_exec_channel *src0,4486const union tgsi_exec_channel *src1)4487{4488dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;4489dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;4490dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;4491dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;4492}44934494static void4495micro_idiv(union tgsi_exec_channel *dst,4496const union tgsi_exec_channel *src0,4497const union tgsi_exec_channel *src1)4498{4499dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;4500dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;4501dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;4502dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;4503}45044505static void4506micro_imax(union tgsi_exec_channel *dst,4507const union tgsi_exec_channel *src0,4508const union tgsi_exec_channel *src1)4509{4510dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];4511dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];4512dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];4513dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];4514}45154516static void4517micro_imin(union tgsi_exec_channel *dst,4518const union tgsi_exec_channel *src0,4519const union tgsi_exec_channel *src1)4520{4521dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];4522dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];4523dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];4524dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];4525}45264527static void4528micro_isge(union tgsi_exec_channel *dst,4529const union tgsi_exec_channel *src0,4530const union tgsi_exec_channel *src1)4531{4532dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;4533dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;4534dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;4535dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;4536}45374538static void4539micro_ishr(union tgsi_exec_channel *dst,4540const union tgsi_exec_channel *src0,4541const union tgsi_exec_channel *src1)4542{4543unsigned masked_count;4544masked_count = src1->i[0] & 0x1f;4545dst->i[0] = src0->i[0] >> masked_count;4546masked_count = src1->i[1] & 0x1f;4547dst->i[1] = src0->i[1] >> masked_count;4548masked_count = src1->i[2] & 0x1f;4549dst->i[2] = src0->i[2] >> masked_count;4550masked_count = src1->i[3] & 0x1f;4551dst->i[3] = src0->i[3] >> masked_count;4552}45534554static void4555micro_islt(union tgsi_exec_channel *dst,4556const union tgsi_exec_channel *src0,4557const union tgsi_exec_channel *src1)4558{4559dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;4560dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;4561dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;4562dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;4563}45644565static void4566micro_f2u(union tgsi_exec_channel *dst,4567const union tgsi_exec_channel *src)4568{4569dst->u[0] = (uint)src->f[0];4570dst->u[1] = (uint)src->f[1];4571dst->u[2] = (uint)src->f[2];4572dst->u[3] = (uint)src->f[3];4573}45744575static void4576micro_u2f(union tgsi_exec_channel *dst,4577const union tgsi_exec_channel *src)4578{4579dst->f[0] = (float)src->u[0];4580dst->f[1] = (float)src->u[1];4581dst->f[2] = (float)src->u[2];4582dst->f[3] = (float)src->u[3];4583}45844585static void4586micro_uadd(union tgsi_exec_channel *dst,4587const union tgsi_exec_channel *src0,4588const union tgsi_exec_channel *src1)4589{4590dst->u[0] = src0->u[0] + src1->u[0];4591dst->u[1] = src0->u[1] + src1->u[1];4592dst->u[2] = src0->u[2] + src1->u[2];4593dst->u[3] = src0->u[3] + src1->u[3];4594}45954596static void4597micro_udiv(union tgsi_exec_channel *dst,4598const union tgsi_exec_channel *src0,4599const union tgsi_exec_channel *src1)4600{4601dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;4602dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;4603dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;4604dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;4605}46064607static void4608micro_umad(union tgsi_exec_channel *dst,4609const union tgsi_exec_channel *src0,4610const union tgsi_exec_channel *src1,4611const union tgsi_exec_channel *src2)4612{4613dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];4614dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];4615dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];4616dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];4617}46184619static void4620micro_umax(union tgsi_exec_channel *dst,4621const union tgsi_exec_channel *src0,4622const union tgsi_exec_channel *src1)4623{4624dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];4625dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];4626dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];4627dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];4628}46294630static void4631micro_umin(union tgsi_exec_channel *dst,4632const union tgsi_exec_channel *src0,4633const union tgsi_exec_channel *src1)4634{4635dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];4636dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];4637dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];4638dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];4639}46404641static void4642micro_umod(union tgsi_exec_channel *dst,4643const union tgsi_exec_channel *src0,4644const union tgsi_exec_channel *src1)4645{4646dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;4647dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;4648dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;4649dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;4650}46514652static void4653micro_umul(union tgsi_exec_channel *dst,4654const union tgsi_exec_channel *src0,4655const union tgsi_exec_channel *src1)4656{4657dst->u[0] = src0->u[0] * src1->u[0];4658dst->u[1] = src0->u[1] * src1->u[1];4659dst->u[2] = src0->u[2] * src1->u[2];4660dst->u[3] = src0->u[3] * src1->u[3];4661}46624663static void4664micro_imul_hi(union tgsi_exec_channel *dst,4665const union tgsi_exec_channel *src0,4666const union tgsi_exec_channel *src1)4667{4668#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)4669dst->i[0] = I64M(src0->i[0], src1->i[0]);4670dst->i[1] = I64M(src0->i[1], src1->i[1]);4671dst->i[2] = I64M(src0->i[2], src1->i[2]);4672dst->i[3] = I64M(src0->i[3], src1->i[3]);4673#undef I64M4674}46754676static void4677micro_umul_hi(union tgsi_exec_channel *dst,4678const union tgsi_exec_channel *src0,4679const union tgsi_exec_channel *src1)4680{4681#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)4682dst->u[0] = U64M(src0->u[0], src1->u[0]);4683dst->u[1] = U64M(src0->u[1], src1->u[1]);4684dst->u[2] = U64M(src0->u[2], src1->u[2]);4685dst->u[3] = U64M(src0->u[3], src1->u[3]);4686#undef U64M4687}46884689static void4690micro_useq(union tgsi_exec_channel *dst,4691const union tgsi_exec_channel *src0,4692const union tgsi_exec_channel *src1)4693{4694dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;4695dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;4696dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;4697dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;4698}46994700static void4701micro_usge(union tgsi_exec_channel *dst,4702const union tgsi_exec_channel *src0,4703const union tgsi_exec_channel *src1)4704{4705dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;4706dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;4707dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;4708dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;4709}47104711static void4712micro_ushr(union tgsi_exec_channel *dst,4713const union tgsi_exec_channel *src0,4714const union tgsi_exec_channel *src1)4715{4716unsigned masked_count;4717masked_count = src1->u[0] & 0x1f;4718dst->u[0] = src0->u[0] >> masked_count;4719masked_count = src1->u[1] & 0x1f;4720dst->u[1] = src0->u[1] >> masked_count;4721masked_count = src1->u[2] & 0x1f;4722dst->u[2] = src0->u[2] >> masked_count;4723masked_count = src1->u[3] & 0x1f;4724dst->u[3] = src0->u[3] >> masked_count;4725}47264727static void4728micro_uslt(union tgsi_exec_channel *dst,4729const union tgsi_exec_channel *src0,4730const union tgsi_exec_channel *src1)4731{4732dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;4733dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;4734dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;4735dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;4736}47374738static void4739micro_usne(union tgsi_exec_channel *dst,4740const union tgsi_exec_channel *src0,4741const union tgsi_exec_channel *src1)4742{4743dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;4744dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;4745dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;4746dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;4747}47484749static void4750micro_uarl(union tgsi_exec_channel *dst,4751const union tgsi_exec_channel *src)4752{4753dst->i[0] = src->u[0];4754dst->i[1] = src->u[1];4755dst->i[2] = src->u[2];4756dst->i[3] = src->u[3];4757}47584759/**4760* Signed bitfield extract (i.e. sign-extend the extracted bits)4761*/4762static void4763micro_ibfe(union tgsi_exec_channel *dst,4764const union tgsi_exec_channel *src0,4765const union tgsi_exec_channel *src1,4766const union tgsi_exec_channel *src2)4767{4768int i;4769for (i = 0; i < 4; i++) {4770int width = src2->i[i];4771int offset = src1->i[i] & 0x1f;4772if (width == 32 && offset == 0) {4773dst->i[i] = src0->i[i];4774continue;4775}4776width &= 0x1f;4777if (width == 0)4778dst->i[i] = 0;4779else if (width + offset < 32)4780dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);4781else4782dst->i[i] = src0->i[i] >> offset;4783}4784}47854786/**4787* Unsigned bitfield extract4788*/4789static void4790micro_ubfe(union tgsi_exec_channel *dst,4791const union tgsi_exec_channel *src0,4792const union tgsi_exec_channel *src1,4793const union tgsi_exec_channel *src2)4794{4795int i;4796for (i = 0; i < 4; i++) {4797int width = src2->u[i];4798int offset = src1->u[i] & 0x1f;4799if (width == 32 && offset == 0) {4800dst->u[i] = src0->u[i];4801continue;4802}4803width &= 0x1f;4804if (width == 0)4805dst->u[i] = 0;4806else if (width + offset < 32)4807dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);4808else4809dst->u[i] = src0->u[i] >> offset;4810}4811}48124813/**4814* Bitfield insert: copy low bits from src1 into a region of src0.4815*/4816static void4817micro_bfi(union tgsi_exec_channel *dst,4818const union tgsi_exec_channel *src0,4819const union tgsi_exec_channel *src1,4820const union tgsi_exec_channel *src2,4821const union tgsi_exec_channel *src3)4822{4823int i;4824for (i = 0; i < 4; i++) {4825int width = src3->u[i];4826int offset = src2->u[i] & 0x1f;4827if (width == 32) {4828dst->u[i] = src1->u[i];4829} else {4830int bitmask = ((1 << width) - 1) << offset;4831dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);4832}4833}4834}48354836static void4837micro_brev(union tgsi_exec_channel *dst,4838const union tgsi_exec_channel *src)4839{4840dst->u[0] = util_bitreverse(src->u[0]);4841dst->u[1] = util_bitreverse(src->u[1]);4842dst->u[2] = util_bitreverse(src->u[2]);4843dst->u[3] = util_bitreverse(src->u[3]);4844}48454846static void4847micro_popc(union tgsi_exec_channel *dst,4848const union tgsi_exec_channel *src)4849{4850dst->u[0] = util_bitcount(src->u[0]);4851dst->u[1] = util_bitcount(src->u[1]);4852dst->u[2] = util_bitcount(src->u[2]);4853dst->u[3] = util_bitcount(src->u[3]);4854}48554856static void4857micro_lsb(union tgsi_exec_channel *dst,4858const union tgsi_exec_channel *src)4859{4860dst->i[0] = ffs(src->u[0]) - 1;4861dst->i[1] = ffs(src->u[1]) - 1;4862dst->i[2] = ffs(src->u[2]) - 1;4863dst->i[3] = ffs(src->u[3]) - 1;4864}48654866static void4867micro_imsb(union tgsi_exec_channel *dst,4868const union tgsi_exec_channel *src)4869{4870dst->i[0] = util_last_bit_signed(src->i[0]) - 1;4871dst->i[1] = util_last_bit_signed(src->i[1]) - 1;4872dst->i[2] = util_last_bit_signed(src->i[2]) - 1;4873dst->i[3] = util_last_bit_signed(src->i[3]) - 1;4874}48754876static void4877micro_umsb(union tgsi_exec_channel *dst,4878const union tgsi_exec_channel *src)4879{4880dst->i[0] = util_last_bit(src->u[0]) - 1;4881dst->i[1] = util_last_bit(src->u[1]) - 1;4882dst->i[2] = util_last_bit(src->u[2]) - 1;4883dst->i[3] = util_last_bit(src->u[3]) - 1;4884}488548864887static void4888exec_interp_at_sample(struct tgsi_exec_machine *mach,4889const struct tgsi_full_instruction *inst)4890{4891union tgsi_exec_channel index;4892union tgsi_exec_channel index2D;4893union tgsi_exec_channel result[TGSI_NUM_CHANNELS];4894const struct tgsi_full_src_register *reg = &inst->Src[0];48954896assert(reg->Register.File == TGSI_FILE_INPUT);4897assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE);48984899get_index_registers(mach, reg, &index, &index2D);4900float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX];49014902/* Short cut: sample 0 is like a normal fetch */4903for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {4904if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))4905continue;49064907fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,4908&result[chan]);4909if (sample != 0.0f) {49104911/* TODO: define the samples > 0, but so far we only do fake MSAA */4912float x = 0;4913float y = 0;49144915unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan];4916assert(pos >= 0);4917assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);4918mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]);4919}4920store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);4921}4922}492349244925static void4926exec_interp_at_offset(struct tgsi_exec_machine *mach,4927const struct tgsi_full_instruction *inst)4928{4929union tgsi_exec_channel index;4930union tgsi_exec_channel index2D;4931union tgsi_exec_channel ofsx;4932union tgsi_exec_channel ofsy;4933const struct tgsi_full_src_register *reg = &inst->Src[0];49344935assert(reg->Register.File == TGSI_FILE_INPUT);49364937get_index_registers(mach, reg, &index, &index2D);4938unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0];49394940fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);4941fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);49424943for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {4944if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))4945continue;4946union tgsi_exec_channel result;4947fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result);4948mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result);4949store_dest(mach, &result, &inst->Dst[0], inst, chan);4950}4951}495249534954static void4955exec_interp_at_centroid(struct tgsi_exec_machine *mach,4956const struct tgsi_full_instruction *inst)4957{4958union tgsi_exec_channel index;4959union tgsi_exec_channel index2D;4960union tgsi_exec_channel result[TGSI_NUM_CHANNELS];4961const struct tgsi_full_src_register *reg = &inst->Src[0];49624963assert(reg->Register.File == TGSI_FILE_INPUT);4964get_index_registers(mach, reg, &index, &index2D);49654966for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {4967if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))4968continue;49694970/* Here we should add the change to use a sample that lies within the4971* primitive (Section 15.2):4972*4973* "When interpolating variables declared using centroid in ,4974* the variable is sampled at a location within the pixel covered4975* by the primitive generating the fragment.4976* ...4977* The built-in functions interpolateAtCentroid ... will sample4978* variables as though they were declared with the centroid ...4979* qualifier[s]."4980*4981* Since we only support 1 sample currently, this is just a pass-through.4982*/4983fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,4984&result[chan]);4985store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);4986}49874988}498949904991/**4992* Execute a TGSI instruction.4993* Returns TRUE if a barrier instruction is hit,4994* otherwise FALSE.4995*/4996static boolean4997exec_instruction(4998struct tgsi_exec_machine *mach,4999const struct tgsi_full_instruction *inst,5000int *pc )5001{5002union tgsi_exec_channel r[10];50035004(*pc)++;50055006switch (inst->Instruction.Opcode) {5007case TGSI_OPCODE_ARL:5008exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_FLOAT);5009break;50105011case TGSI_OPCODE_MOV:5012exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_FLOAT);5013break;50145015case TGSI_OPCODE_LIT:5016exec_lit(mach, inst);5017break;50185019case TGSI_OPCODE_RCP:5020exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT);5021break;50225023case TGSI_OPCODE_RSQ:5024exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT);5025break;50265027case TGSI_OPCODE_EXP:5028exec_exp(mach, inst);5029break;50305031case TGSI_OPCODE_LOG:5032exec_log(mach, inst);5033break;50345035case TGSI_OPCODE_MUL:5036exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT);5037break;50385039case TGSI_OPCODE_ADD:5040exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT);5041break;50425043case TGSI_OPCODE_DP3:5044exec_dp3(mach, inst);5045break;50465047case TGSI_OPCODE_DP4:5048exec_dp4(mach, inst);5049break;50505051case TGSI_OPCODE_DST:5052exec_dst(mach, inst);5053break;50545055case TGSI_OPCODE_MIN:5056exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT);5057break;50585059case TGSI_OPCODE_MAX:5060exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT);5061break;50625063case TGSI_OPCODE_SLT:5064exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT);5065break;50665067case TGSI_OPCODE_SGE:5068exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT);5069break;50705071case TGSI_OPCODE_MAD:5072exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT);5073break;50745075case TGSI_OPCODE_LRP:5076exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT);5077break;50785079case TGSI_OPCODE_SQRT:5080exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT);5081break;50825083case TGSI_OPCODE_FRC:5084exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT);5085break;50865087case TGSI_OPCODE_FLR:5088exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT);5089break;50905091case TGSI_OPCODE_ROUND:5092exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT);5093break;50945095case TGSI_OPCODE_EX2:5096exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT);5097break;50985099case TGSI_OPCODE_LG2:5100exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT);5101break;51025103case TGSI_OPCODE_POW:5104exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT);5105break;51065107case TGSI_OPCODE_LDEXP:5108exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT);5109break;51105111case TGSI_OPCODE_COS:5112exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT);5113break;51145115case TGSI_OPCODE_DDX_FINE:5116exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT);5117break;51185119case TGSI_OPCODE_DDX:5120exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT);5121break;51225123case TGSI_OPCODE_DDY_FINE:5124exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT);5125break;51265127case TGSI_OPCODE_DDY:5128exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT);5129break;51305131case TGSI_OPCODE_KILL:5132exec_kill (mach);5133break;51345135case TGSI_OPCODE_KILL_IF:5136exec_kill_if (mach, inst);5137break;51385139case TGSI_OPCODE_PK2H:5140exec_pk2h(mach, inst);5141break;51425143case TGSI_OPCODE_PK2US:5144assert (0);5145break;51465147case TGSI_OPCODE_PK4B:5148assert (0);5149break;51505151case TGSI_OPCODE_PK4UB:5152assert (0);5153break;51545155case TGSI_OPCODE_SEQ:5156exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT);5157break;51585159case TGSI_OPCODE_SGT:5160exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT);5161break;51625163case TGSI_OPCODE_SIN:5164exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT);5165break;51665167case TGSI_OPCODE_SLE:5168exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT);5169break;51705171case TGSI_OPCODE_SNE:5172exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT);5173break;51745175case TGSI_OPCODE_TEX:5176/* simple texture lookup */5177/* src[0] = texcoord */5178/* src[1] = sampler unit */5179exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);5180break;51815182case TGSI_OPCODE_TXB:5183/* Texture lookup with lod bias */5184/* src[0] = texcoord (src[0].w = LOD bias) */5185/* src[1] = sampler unit */5186exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);5187break;51885189case TGSI_OPCODE_TXD:5190/* Texture lookup with explict partial derivatives */5191/* src[0] = texcoord */5192/* src[1] = d[strq]/dx */5193/* src[2] = d[strq]/dy */5194/* src[3] = sampler unit */5195exec_txd(mach, inst);5196break;51975198case TGSI_OPCODE_TXL:5199/* Texture lookup with explit LOD */5200/* src[0] = texcoord (src[0].w = LOD) */5201/* src[1] = sampler unit */5202exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);5203break;52045205case TGSI_OPCODE_TXP:5206/* Texture lookup with projection */5207/* src[0] = texcoord (src[0].w = projection) */5208/* src[1] = sampler unit */5209exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);5210break;52115212case TGSI_OPCODE_TG4:5213/* src[0] = texcoord */5214/* src[1] = component */5215/* src[2] = sampler unit */5216exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);5217break;52185219case TGSI_OPCODE_LODQ:5220/* src[0] = texcoord */5221/* src[1] = sampler unit */5222exec_lodq(mach, inst);5223break;52245225case TGSI_OPCODE_UP2H:5226exec_up2h(mach, inst);5227break;52285229case TGSI_OPCODE_UP2US:5230assert (0);5231break;52325233case TGSI_OPCODE_UP4B:5234assert (0);5235break;52365237case TGSI_OPCODE_UP4UB:5238assert (0);5239break;52405241case TGSI_OPCODE_ARR:5242exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_FLOAT);5243break;52445245case TGSI_OPCODE_CAL:5246/* skip the call if no execution channels are enabled */5247if (mach->ExecMask) {5248/* do the call */52495250/* First, record the depths of the execution stacks.5251* This is important for deeply nested/looped return statements.5252* We have to unwind the stacks by the correct amount. For a5253* real code generator, we could determine the number of entries5254* to pop off each stack with simple static analysis and avoid5255* implementing this data structure at run time.5256*/5257mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;5258mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;5259mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;5260mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;5261mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;5262/* note that PC was already incremented above */5263mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;52645265mach->CallStackTop++;52665267/* Second, push the Cond, Loop, Cont, Func stacks */5268assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);5269assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);5270assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);5271assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);5272assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);5273assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);52745275mach->CondStack[mach->CondStackTop++] = mach->CondMask;5276mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;5277mach->ContStack[mach->ContStackTop++] = mach->ContMask;5278mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;5279mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;5280mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;52815282/* Finally, jump to the subroutine. The label is a pointer5283* (an instruction number) to the BGNSUB instruction.5284*/5285*pc = inst->Label.Label;5286assert(mach->Instructions[*pc].Instruction.Opcode5287== TGSI_OPCODE_BGNSUB);5288}5289break;52905291case TGSI_OPCODE_RET:5292mach->FuncMask &= ~mach->ExecMask;5293UPDATE_EXEC_MASK(mach);52945295if (mach->FuncMask == 0x0) {5296/* really return now (otherwise, keep executing */52975298if (mach->CallStackTop == 0) {5299/* returning from main() */5300mach->CondStackTop = 0;5301mach->LoopStackTop = 0;5302mach->ContStackTop = 0;5303mach->LoopLabelStackTop = 0;5304mach->SwitchStackTop = 0;5305mach->BreakStackTop = 0;5306*pc = -1;5307return FALSE;5308}53095310assert(mach->CallStackTop > 0);5311mach->CallStackTop--;53125313mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;5314mach->CondMask = mach->CondStack[mach->CondStackTop];53155316mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;5317mach->LoopMask = mach->LoopStack[mach->LoopStackTop];53185319mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;5320mach->ContMask = mach->ContStack[mach->ContStackTop];53215322mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;5323mach->Switch = mach->SwitchStack[mach->SwitchStackTop];53245325mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;5326mach->BreakType = mach->BreakStack[mach->BreakStackTop];53275328assert(mach->FuncStackTop > 0);5329mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];53305331*pc = mach->CallStack[mach->CallStackTop].ReturnAddr;53325333UPDATE_EXEC_MASK(mach);5334}5335break;53365337case TGSI_OPCODE_SSG:5338exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT);5339break;53405341case TGSI_OPCODE_CMP:5342exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT);5343break;53445345case TGSI_OPCODE_DIV:5346exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT);5347break;53485349case TGSI_OPCODE_DP2:5350exec_dp2(mach, inst);5351break;53525353case TGSI_OPCODE_IF:5354/* push CondMask */5355assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);5356mach->CondStack[mach->CondStackTop++] = mach->CondMask;5357FETCH( &r[0], 0, TGSI_CHAN_X );5358for (int i = 0; i < TGSI_QUAD_SIZE; i++) {5359if (!r[0].f[i])5360mach->CondMask &= ~(1 << i);5361}5362UPDATE_EXEC_MASK(mach);5363/* If no channels are taking the then branch, jump to ELSE. */5364if (!mach->CondMask)5365*pc = inst->Label.Label;5366break;53675368case TGSI_OPCODE_UIF:5369/* push CondMask */5370assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);5371mach->CondStack[mach->CondStackTop++] = mach->CondMask;5372IFETCH( &r[0], 0, TGSI_CHAN_X );5373for (int i = 0; i < TGSI_QUAD_SIZE; i++) {5374if (!r[0].u[i])5375mach->CondMask &= ~(1 << i);5376}5377UPDATE_EXEC_MASK(mach);5378/* If no channels are taking the then branch, jump to ELSE. */5379if (!mach->CondMask)5380*pc = inst->Label.Label;5381break;53825383case TGSI_OPCODE_ELSE:5384/* invert CondMask wrt previous mask */5385{5386uint prevMask;5387assert(mach->CondStackTop > 0);5388prevMask = mach->CondStack[mach->CondStackTop - 1];5389mach->CondMask = ~mach->CondMask & prevMask;5390UPDATE_EXEC_MASK(mach);53915392/* If no channels are taking ELSE, jump to ENDIF */5393if (!mach->CondMask)5394*pc = inst->Label.Label;5395}5396break;53975398case TGSI_OPCODE_ENDIF:5399/* pop CondMask */5400assert(mach->CondStackTop > 0);5401mach->CondMask = mach->CondStack[--mach->CondStackTop];5402UPDATE_EXEC_MASK(mach);5403break;54045405case TGSI_OPCODE_END:5406/* make sure we end primitives which haven't5407* been explicitly emitted */5408conditional_emit_primitive(mach);5409/* halt execution */5410*pc = -1;5411break;54125413case TGSI_OPCODE_CEIL:5414exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT);5415break;54165417case TGSI_OPCODE_I2F:5418exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_INT);5419break;54205421case TGSI_OPCODE_NOT:5422exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT);5423break;54245425case TGSI_OPCODE_TRUNC:5426exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT);5427break;54285429case TGSI_OPCODE_SHL:5430exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT);5431break;54325433case TGSI_OPCODE_AND:5434exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT);5435break;54365437case TGSI_OPCODE_OR:5438exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT);5439break;54405441case TGSI_OPCODE_MOD:5442exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT);5443break;54445445case TGSI_OPCODE_XOR:5446exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT);5447break;54485449case TGSI_OPCODE_TXF:5450exec_txf(mach, inst);5451break;54525453case TGSI_OPCODE_TXQ:5454exec_txq(mach, inst);5455break;54565457case TGSI_OPCODE_EMIT:5458emit_vertex(mach, inst);5459break;54605461case TGSI_OPCODE_ENDPRIM:5462emit_primitive(mach, inst);5463break;54645465case TGSI_OPCODE_BGNLOOP:5466/* push LoopMask and ContMasks */5467assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);5468assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);5469assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);5470assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);54715472mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;5473mach->ContStack[mach->ContStackTop++] = mach->ContMask;5474mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;5475mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;5476mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;5477break;54785479case TGSI_OPCODE_ENDLOOP:5480/* Restore ContMask, but don't pop */5481assert(mach->ContStackTop > 0);5482mach->ContMask = mach->ContStack[mach->ContStackTop - 1];5483UPDATE_EXEC_MASK(mach);5484if (mach->ExecMask) {5485/* repeat loop: jump to instruction just past BGNLOOP */5486assert(mach->LoopLabelStackTop > 0);5487*pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;5488}5489else {5490/* exit loop: pop LoopMask */5491assert(mach->LoopStackTop > 0);5492mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];5493/* pop ContMask */5494assert(mach->ContStackTop > 0);5495mach->ContMask = mach->ContStack[--mach->ContStackTop];5496assert(mach->LoopLabelStackTop > 0);5497--mach->LoopLabelStackTop;54985499mach->BreakType = mach->BreakStack[--mach->BreakStackTop];5500}5501UPDATE_EXEC_MASK(mach);5502break;55035504case TGSI_OPCODE_BRK:5505exec_break(mach);5506break;55075508case TGSI_OPCODE_CONT:5509/* turn off cont channels for each enabled exec channel */5510mach->ContMask &= ~mach->ExecMask;5511/* Todo: if mach->LoopMask == 0, jump to end of loop */5512UPDATE_EXEC_MASK(mach);5513break;55145515case TGSI_OPCODE_BGNSUB:5516/* no-op */5517break;55185519case TGSI_OPCODE_ENDSUB:5520/*5521* XXX: This really should be a no-op. We should never reach this opcode.5522*/55235524assert(mach->CallStackTop > 0);5525mach->CallStackTop--;55265527mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;5528mach->CondMask = mach->CondStack[mach->CondStackTop];55295530mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;5531mach->LoopMask = mach->LoopStack[mach->LoopStackTop];55325533mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;5534mach->ContMask = mach->ContStack[mach->ContStackTop];55355536mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;5537mach->Switch = mach->SwitchStack[mach->SwitchStackTop];55385539mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;5540mach->BreakType = mach->BreakStack[mach->BreakStackTop];55415542assert(mach->FuncStackTop > 0);5543mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];55445545*pc = mach->CallStack[mach->CallStackTop].ReturnAddr;55465547UPDATE_EXEC_MASK(mach);5548break;55495550case TGSI_OPCODE_NOP:5551break;55525553case TGSI_OPCODE_F2I:5554exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_FLOAT);5555break;55565557case TGSI_OPCODE_FSEQ:5558exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_FLOAT);5559break;55605561case TGSI_OPCODE_FSGE:5562exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_FLOAT);5563break;55645565case TGSI_OPCODE_FSLT:5566exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_FLOAT);5567break;55685569case TGSI_OPCODE_FSNE:5570exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_FLOAT);5571break;55725573case TGSI_OPCODE_IDIV:5574exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT);5575break;55765577case TGSI_OPCODE_IMAX:5578exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT);5579break;55805581case TGSI_OPCODE_IMIN:5582exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT);5583break;55845585case TGSI_OPCODE_INEG:5586exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT);5587break;55885589case TGSI_OPCODE_ISGE:5590exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT);5591break;55925593case TGSI_OPCODE_ISHR:5594exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT);5595break;55965597case TGSI_OPCODE_ISLT:5598exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT);5599break;56005601case TGSI_OPCODE_F2U:5602exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_FLOAT);5603break;56045605case TGSI_OPCODE_U2F:5606exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_UINT);5607break;56085609case TGSI_OPCODE_UADD:5610exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT);5611break;56125613case TGSI_OPCODE_UDIV:5614exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT);5615break;56165617case TGSI_OPCODE_UMAD:5618exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT);5619break;56205621case TGSI_OPCODE_UMAX:5622exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT);5623break;56245625case TGSI_OPCODE_UMIN:5626exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT);5627break;56285629case TGSI_OPCODE_UMOD:5630exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT);5631break;56325633case TGSI_OPCODE_UMUL:5634exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT);5635break;56365637case TGSI_OPCODE_IMUL_HI:5638exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT);5639break;56405641case TGSI_OPCODE_UMUL_HI:5642exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT);5643break;56445645case TGSI_OPCODE_USEQ:5646exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT);5647break;56485649case TGSI_OPCODE_USGE:5650exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT);5651break;56525653case TGSI_OPCODE_USHR:5654exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT);5655break;56565657case TGSI_OPCODE_USLT:5658exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT);5659break;56605661case TGSI_OPCODE_USNE:5662exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT);5663break;56645665case TGSI_OPCODE_SWITCH:5666exec_switch(mach, inst);5667break;56685669case TGSI_OPCODE_CASE:5670exec_case(mach, inst);5671break;56725673case TGSI_OPCODE_DEFAULT:5674exec_default(mach);5675break;56765677case TGSI_OPCODE_ENDSWITCH:5678exec_endswitch(mach);5679break;56805681case TGSI_OPCODE_SAMPLE_I:5682exec_txf(mach, inst);5683break;56845685case TGSI_OPCODE_SAMPLE_I_MS:5686exec_txf(mach, inst);5687break;56885689case TGSI_OPCODE_SAMPLE:5690exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);5691break;56925693case TGSI_OPCODE_SAMPLE_B:5694exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);5695break;56965697case TGSI_OPCODE_SAMPLE_C:5698exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);5699break;57005701case TGSI_OPCODE_SAMPLE_C_LZ:5702exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);5703break;57045705case TGSI_OPCODE_SAMPLE_D:5706exec_sample_d(mach, inst);5707break;57085709case TGSI_OPCODE_SAMPLE_L:5710exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);5711break;57125713case TGSI_OPCODE_GATHER4:5714exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE);5715break;57165717case TGSI_OPCODE_SVIEWINFO:5718exec_txq(mach, inst);5719break;57205721case TGSI_OPCODE_SAMPLE_POS:5722assert(0);5723break;57245725case TGSI_OPCODE_SAMPLE_INFO:5726assert(0);5727break;57285729case TGSI_OPCODE_LOD:5730exec_lodq(mach, inst);5731break;57325733case TGSI_OPCODE_UARL:5734exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_UINT);5735break;57365737case TGSI_OPCODE_UCMP:5738exec_ucmp(mach, inst);5739break;57405741case TGSI_OPCODE_IABS:5742exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT);5743break;57445745case TGSI_OPCODE_ISSG:5746exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT);5747break;57485749case TGSI_OPCODE_TEX2:5750/* simple texture lookup */5751/* src[0] = texcoord */5752/* src[1] = compare */5753/* src[2] = sampler unit */5754exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);5755break;5756case TGSI_OPCODE_TXB2:5757/* simple texture lookup */5758/* src[0] = texcoord */5759/* src[1] = bias */5760/* src[2] = sampler unit */5761exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);5762break;5763case TGSI_OPCODE_TXL2:5764/* simple texture lookup */5765/* src[0] = texcoord */5766/* src[1] = lod */5767/* src[2] = sampler unit */5768exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);5769break;57705771case TGSI_OPCODE_IBFE:5772exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT);5773break;5774case TGSI_OPCODE_UBFE:5775exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT);5776break;5777case TGSI_OPCODE_BFI:5778exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT);5779break;5780case TGSI_OPCODE_BREV:5781exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT);5782break;5783case TGSI_OPCODE_POPC:5784exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT);5785break;5786case TGSI_OPCODE_LSB:5787exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_UINT);5788break;5789case TGSI_OPCODE_IMSB:5790exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT);5791break;5792case TGSI_OPCODE_UMSB:5793exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_UINT);5794break;57955796case TGSI_OPCODE_F2D:5797exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);5798break;57995800case TGSI_OPCODE_D2F:5801exec_64_2_t(mach, inst, micro_d2f);5802break;58035804case TGSI_OPCODE_DABS:5805exec_double_unary(mach, inst, micro_dabs);5806break;58075808case TGSI_OPCODE_DNEG:5809exec_double_unary(mach, inst, micro_dneg);5810break;58115812case TGSI_OPCODE_DADD:5813exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);5814break;58155816case TGSI_OPCODE_DDIV:5817exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);5818break;58195820case TGSI_OPCODE_DMUL:5821exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);5822break;58235824case TGSI_OPCODE_DMAX:5825exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);5826break;58275828case TGSI_OPCODE_DMIN:5829exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);5830break;58315832case TGSI_OPCODE_DSLT:5833exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);5834break;58355836case TGSI_OPCODE_DSGE:5837exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);5838break;58395840case TGSI_OPCODE_DSEQ:5841exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);5842break;58435844case TGSI_OPCODE_DSNE:5845exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);5846break;58475848case TGSI_OPCODE_DRCP:5849exec_double_unary(mach, inst, micro_drcp);5850break;58515852case TGSI_OPCODE_DSQRT:5853exec_double_unary(mach, inst, micro_dsqrt);5854break;58555856case TGSI_OPCODE_DRSQ:5857exec_double_unary(mach, inst, micro_drsq);5858break;58595860case TGSI_OPCODE_DMAD:5861exec_double_trinary(mach, inst, micro_dmad);5862break;58635864case TGSI_OPCODE_DFRAC:5865exec_double_unary(mach, inst, micro_dfrac);5866break;58675868case TGSI_OPCODE_DFLR:5869exec_double_unary(mach, inst, micro_dflr);5870break;58715872case TGSI_OPCODE_DLDEXP:5873exec_dldexp(mach, inst);5874break;58755876case TGSI_OPCODE_DFRACEXP:5877exec_dfracexp(mach, inst);5878break;58795880case TGSI_OPCODE_I2D:5881exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT);5882break;58835884case TGSI_OPCODE_D2I:5885exec_64_2_t(mach, inst, micro_d2i);5886break;58875888case TGSI_OPCODE_U2D:5889exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_FLOAT);5890break;58915892case TGSI_OPCODE_D2U:5893exec_64_2_t(mach, inst, micro_d2u);5894break;58955896case TGSI_OPCODE_LOAD:5897exec_load(mach, inst);5898break;58995900case TGSI_OPCODE_STORE:5901exec_store(mach, inst);5902break;59035904case TGSI_OPCODE_ATOMUADD:5905case TGSI_OPCODE_ATOMXCHG:5906case TGSI_OPCODE_ATOMCAS:5907case TGSI_OPCODE_ATOMAND:5908case TGSI_OPCODE_ATOMOR:5909case TGSI_OPCODE_ATOMXOR:5910case TGSI_OPCODE_ATOMUMIN:5911case TGSI_OPCODE_ATOMUMAX:5912case TGSI_OPCODE_ATOMIMIN:5913case TGSI_OPCODE_ATOMIMAX:5914case TGSI_OPCODE_ATOMFADD:5915exec_atomop(mach, inst);5916break;59175918case TGSI_OPCODE_RESQ:5919exec_resq(mach, inst);5920break;5921case TGSI_OPCODE_BARRIER:5922case TGSI_OPCODE_MEMBAR:5923return TRUE;5924break;59255926case TGSI_OPCODE_I64ABS:5927exec_double_unary(mach, inst, micro_i64abs);5928break;59295930case TGSI_OPCODE_I64SSG:5931exec_double_unary(mach, inst, micro_i64sgn);5932break;59335934case TGSI_OPCODE_I64NEG:5935exec_double_unary(mach, inst, micro_i64neg);5936break;59375938case TGSI_OPCODE_U64SEQ:5939exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);5940break;59415942case TGSI_OPCODE_U64SNE:5943exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);5944break;59455946case TGSI_OPCODE_I64SLT:5947exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);5948break;5949case TGSI_OPCODE_U64SLT:5950exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);5951break;59525953case TGSI_OPCODE_I64SGE:5954exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);5955break;5956case TGSI_OPCODE_U64SGE:5957exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);5958break;59595960case TGSI_OPCODE_I64MIN:5961exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);5962break;5963case TGSI_OPCODE_U64MIN:5964exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);5965break;5966case TGSI_OPCODE_I64MAX:5967exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);5968break;5969case TGSI_OPCODE_U64MAX:5970exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);5971break;5972case TGSI_OPCODE_U64ADD:5973exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);5974break;5975case TGSI_OPCODE_U64MUL:5976exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);5977break;5978case TGSI_OPCODE_U64SHL:5979exec_arg0_64_arg1_32(mach, inst, micro_u64shl);5980break;5981case TGSI_OPCODE_I64SHR:5982exec_arg0_64_arg1_32(mach, inst, micro_i64shr);5983break;5984case TGSI_OPCODE_U64SHR:5985exec_arg0_64_arg1_32(mach, inst, micro_u64shr);5986break;5987case TGSI_OPCODE_U64DIV:5988exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);5989break;5990case TGSI_OPCODE_I64DIV:5991exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);5992break;5993case TGSI_OPCODE_U64MOD:5994exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);5995break;5996case TGSI_OPCODE_I64MOD:5997exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);5998break;59996000case TGSI_OPCODE_F2U64:6001exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);6002break;60036004case TGSI_OPCODE_F2I64:6005exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);6006break;60076008case TGSI_OPCODE_U2I64:6009exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);6010break;6011case TGSI_OPCODE_I2I64:6012exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);6013break;60146015case TGSI_OPCODE_D2U64:6016exec_double_unary(mach, inst, micro_d2u64);6017break;60186019case TGSI_OPCODE_D2I64:6020exec_double_unary(mach, inst, micro_d2i64);6021break;60226023case TGSI_OPCODE_U642F:6024exec_64_2_t(mach, inst, micro_u642f);6025break;6026case TGSI_OPCODE_I642F:6027exec_64_2_t(mach, inst, micro_i642f);6028break;60296030case TGSI_OPCODE_U642D:6031exec_double_unary(mach, inst, micro_u642d);6032break;6033case TGSI_OPCODE_I642D:6034exec_double_unary(mach, inst, micro_i642d);6035break;6036case TGSI_OPCODE_INTERP_SAMPLE:6037exec_interp_at_sample(mach, inst);6038break;6039case TGSI_OPCODE_INTERP_OFFSET:6040exec_interp_at_offset(mach, inst);6041break;6042case TGSI_OPCODE_INTERP_CENTROID:6043exec_interp_at_centroid(mach, inst);6044break;6045default:6046assert( 0 );6047}6048return FALSE;6049}60506051static void6052tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)6053{6054uint default_mask = 0xf;60556056mach->KillMask = 0;6057mach->OutputVertexOffset = 0;60586059if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {6060for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {6061mach->OutputPrimCount[i] = 0;6062mach->Primitives[i][0] = 0;6063}6064/* GS runs on a single primitive for now */6065default_mask = 0x1;6066}60676068if (mach->NonHelperMask == 0)6069mach->NonHelperMask = default_mask;6070mach->CondMask = default_mask;6071mach->LoopMask = default_mask;6072mach->ContMask = default_mask;6073mach->FuncMask = default_mask;6074mach->ExecMask = default_mask;60756076mach->Switch.mask = default_mask;60776078assert(mach->CondStackTop == 0);6079assert(mach->LoopStackTop == 0);6080assert(mach->ContStackTop == 0);6081assert(mach->SwitchStackTop == 0);6082assert(mach->BreakStackTop == 0);6083assert(mach->CallStackTop == 0);6084}60856086/**6087* Run TGSI interpreter.6088* \return bitmask of "alive" quad components6089*/6090uint6091tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )6092{6093uint i;60946095mach->pc = start_pc;60966097if (!start_pc) {6098tgsi_exec_machine_setup_masks(mach);60996100/* execute declarations (interpolants) */6101for (i = 0; i < mach->NumDeclarations; i++) {6102exec_declaration( mach, mach->Declarations+i );6103}6104}61056106{6107#if DEBUG_EXECUTION6108struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS];6109struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];6110uint inst = 1;61116112if (!start_pc) {6113memset(mach->Temps, 0, sizeof(temps));6114if (mach->Outputs)6115memset(mach->Outputs, 0, sizeof(outputs));6116memset(temps, 0, sizeof(temps));6117memset(outputs, 0, sizeof(outputs));6118}6119#endif61206121/* execute instructions, until pc is set to -1 */6122while (mach->pc != -1) {6123boolean barrier_hit;6124#if DEBUG_EXECUTION6125uint i;61266127tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);6128#endif61296130assert(mach->pc < (int) mach->NumInstructions);6131barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);61326133/* for compute shaders if we hit a barrier return now for later rescheduling */6134if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)6135return 0;61366137#if DEBUG_EXECUTION6138for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {6139if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {6140uint j;61416142memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));6143debug_printf("TEMP[%2u] = ", i);6144for (j = 0; j < 4; j++) {6145if (j > 0) {6146debug_printf(" ");6147}6148debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",6149temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],6150temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],6151temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],6152temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);6153}6154}6155}6156if (mach->Outputs) {6157for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {6158if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {6159uint j;61606161memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));6162debug_printf("OUT[%2u] = ", i);6163for (j = 0; j < 4; j++) {6164if (j > 0) {6165debug_printf(" ");6166}6167debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",6168outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],6169outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],6170outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],6171outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);6172}6173}6174}6175}6176#endif6177}6178}61796180#if 06181/* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */6182if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {6183/*6184* Scale back depth component.6185*/6186for (i = 0; i < 4; i++)6187mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;6188}6189#endif61906191/* Strictly speaking, these assertions aren't really needed but they6192* can potentially catch some bugs in the control flow code.6193*/6194assert(mach->CondStackTop == 0);6195assert(mach->LoopStackTop == 0);6196assert(mach->ContStackTop == 0);6197assert(mach->SwitchStackTop == 0);6198assert(mach->BreakStackTop == 0);6199assert(mach->CallStackTop == 0);62006201return ~mach->KillMask;6202}620362046205