Path: blob/21.2-virgl/src/gallium/drivers/i915/i915_fpc_optimize.c
4570 views
/**************************************************************************1*2* Copyright 2011 The Chromium OS authors.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include "i915_context.h"28#include "i915_fpc.h"29#include "i915_reg.h"3031#include "pipe/p_shader_tokens.h"32#include "tgsi/tgsi_dump.h"33#include "tgsi/tgsi_exec.h"34#include "tgsi/tgsi_parse.h"35#include "util/u_math.h"36#include "util/u_memory.h"37#include "util/u_string.h"3839struct i915_optimize_context {40int first_write[TGSI_EXEC_NUM_TEMPS];41int last_read[TGSI_EXEC_NUM_TEMPS];42};4344static bool45same_src_dst_reg(struct i915_full_src_register *s1,46struct i915_full_dst_register *d1)47{48return (s1->Register.File == d1->Register.File &&49s1->Register.Indirect == d1->Register.Indirect &&50s1->Register.Dimension == d1->Register.Dimension &&51s1->Register.Index == d1->Register.Index);52}5354static bool55same_dst_reg(struct i915_full_dst_register *d1,56struct i915_full_dst_register *d2)57{58return (d1->Register.File == d2->Register.File &&59d1->Register.Indirect == d2->Register.Indirect &&60d1->Register.Dimension == d2->Register.Dimension &&61d1->Register.Index == d2->Register.Index);62}6364static bool65same_src_reg(struct i915_full_src_register *d1,66struct i915_full_src_register *d2)67{68return (d1->Register.File == d2->Register.File &&69d1->Register.Indirect == d2->Register.Indirect &&70d1->Register.Dimension == d2->Register.Dimension &&71d1->Register.Index == d2->Register.Index &&72d1->Register.Absolute == d2->Register.Absolute &&73d1->Register.Negate == d2->Register.Negate);74}7576static const struct {77bool is_texture;78bool commutes;79unsigned neutral_element;80unsigned num_dst;81unsigned num_src;82} op_table[TGSI_OPCODE_LAST] = {83[TGSI_OPCODE_ADD] = {false, true, TGSI_SWIZZLE_ZERO, 1, 2},84[TGSI_OPCODE_CEIL] = {false, false, 0, 1, 1},85[TGSI_OPCODE_CMP] = {false, false, 0, 1, 2},86[TGSI_OPCODE_COS] = {false, false, 0, 1, 1},87[TGSI_OPCODE_DDX] = {false, false, 0, 1, 0},88[TGSI_OPCODE_DDY] = {false, false, 0, 1, 0},89[TGSI_OPCODE_DP2] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},90[TGSI_OPCODE_DP3] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},91[TGSI_OPCODE_DP4] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},92[TGSI_OPCODE_DST] = {false, false, 0, 1, 2},93[TGSI_OPCODE_END] = {false, false, 0, 0, 0},94[TGSI_OPCODE_EX2] = {false, false, 0, 1, 1},95[TGSI_OPCODE_FLR] = {false, false, 0, 1, 1},96[TGSI_OPCODE_FRC] = {false, false, 0, 1, 1},97[TGSI_OPCODE_KILL_IF] = {false, false, 0, 0, 1},98[TGSI_OPCODE_KILL] = {false, false, 0, 0, 0},99[TGSI_OPCODE_LG2] = {false, false, 0, 1, 1},100[TGSI_OPCODE_LIT] = {false, false, 0, 1, 1},101[TGSI_OPCODE_LRP] = {false, false, 0, 1, 3},102[TGSI_OPCODE_MAX] = {false, false, 0, 1, 2},103[TGSI_OPCODE_MAD] = {false, false, 0, 1, 3},104[TGSI_OPCODE_MIN] = {false, false, 0, 1, 2},105[TGSI_OPCODE_MOV] = {false, false, 0, 1, 1},106[TGSI_OPCODE_MUL] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},107[TGSI_OPCODE_NOP] = {false, false, 0, 0, 0},108[TGSI_OPCODE_POW] = {false, false, 0, 1, 2},109[TGSI_OPCODE_RCP] = {false, false, 0, 1, 1},110[TGSI_OPCODE_RET] = {false, false, 0, 0, 0},111[TGSI_OPCODE_RSQ] = {false, false, 0, 1, 1},112[TGSI_OPCODE_SEQ] = {false, false, 0, 1, 2},113[TGSI_OPCODE_SGE] = {false, false, 0, 1, 2},114[TGSI_OPCODE_SGT] = {false, false, 0, 1, 2},115[TGSI_OPCODE_SIN] = {false, false, 0, 1, 1},116[TGSI_OPCODE_SLE] = {false, false, 0, 1, 2},117[TGSI_OPCODE_SLT] = {false, false, 0, 1, 2},118[TGSI_OPCODE_SNE] = {false, false, 0, 1, 2},119[TGSI_OPCODE_SSG] = {false, false, 0, 1, 1},120[TGSI_OPCODE_TEX] = {true, false, 0, 1, 2},121[TGSI_OPCODE_TRUNC] = {false, false, 0, 1, 1},122[TGSI_OPCODE_TXB] = {true, false, 0, 1, 2},123[TGSI_OPCODE_TXP] = {true, false, 0, 1, 2},124};125126static bool127op_has_dst(unsigned opcode)128{129return (op_table[opcode].num_dst > 0);130}131132static int133op_num_dst(unsigned opcode)134{135return op_table[opcode].num_dst;136}137138static int139op_num_src(unsigned opcode)140{141return op_table[opcode].num_src;142}143144static bool145op_commutes(unsigned opcode)146{147return op_table[opcode].commutes;148}149150static unsigned151mask_for_unswizzled(int num_components)152{153unsigned mask = 0;154switch (num_components) {155case 4:156mask |= TGSI_WRITEMASK_W;157FALLTHROUGH;158case 3:159mask |= TGSI_WRITEMASK_Z;160FALLTHROUGH;161case 2:162mask |= TGSI_WRITEMASK_Y;163FALLTHROUGH;164case 1:165mask |= TGSI_WRITEMASK_X;166}167return mask;168}169170static bool171is_unswizzled(struct i915_full_src_register *r, unsigned write_mask)172{173if (write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)174return false;175if (write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)176return false;177if (write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)178return false;179if (write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)180return false;181return true;182}183184static bool185op_is_texture(unsigned opcode)186{187return op_table[opcode].is_texture;188}189190static unsigned191op_neutral_element(unsigned opcode)192{193unsigned ne = op_table[opcode].neutral_element;194if (!ne) {195debug_printf("No neutral element for opcode %d\n", opcode);196ne = TGSI_SWIZZLE_ZERO;197}198return ne;199}200201/*202* Sets the swizzle to the neutral element for the operation for the bits203* of writemask which are set, swizzle to identity otherwise.204*/205static void206set_neutral_element_swizzle(struct i915_full_src_register *r,207unsigned write_mask, unsigned neutral)208{209if (write_mask & TGSI_WRITEMASK_X)210r->Register.SwizzleX = neutral;211else212r->Register.SwizzleX = TGSI_SWIZZLE_X;213214if (write_mask & TGSI_WRITEMASK_Y)215r->Register.SwizzleY = neutral;216else217r->Register.SwizzleY = TGSI_SWIZZLE_Y;218219if (write_mask & TGSI_WRITEMASK_Z)220r->Register.SwizzleZ = neutral;221else222r->Register.SwizzleZ = TGSI_SWIZZLE_Z;223224if (write_mask & TGSI_WRITEMASK_W)225r->Register.SwizzleW = neutral;226else227r->Register.SwizzleW = TGSI_SWIZZLE_W;228}229230static void231copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)232{233o->File = i->File;234o->Indirect = i->Indirect;235o->Dimension = i->Dimension;236o->Index = i->Index;237o->SwizzleX = i->SwizzleX;238o->SwizzleY = i->SwizzleY;239o->SwizzleZ = i->SwizzleZ;240o->SwizzleW = i->SwizzleW;241o->Absolute = i->Absolute;242o->Negate = i->Negate;243}244245static void246copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)247{248o->File = i->File;249o->WriteMask = i->WriteMask;250o->Indirect = i->Indirect;251o->Dimension = i->Dimension;252o->Index = i->Index;253}254255static void256copy_instruction(struct i915_full_instruction *o,257const struct tgsi_full_instruction *i)258{259memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));260memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));261262copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);263264copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);265copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);266copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);267}268269static void270copy_token(union i915_full_token *o, union tgsi_full_token *i)271{272if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)273memcpy(o, i, sizeof(*o));274else275copy_instruction(&o->FullInstruction, &i->FullInstruction);276}277278static void279liveness_mark_written(struct i915_optimize_context *ctx,280struct i915_full_dst_register *dst_reg, int pos)281{282int dst_reg_index;283if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {284dst_reg_index = dst_reg->Register.Index;285assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);286/* dead -> live transition */287if (ctx->first_write[dst_reg_index] != -1)288ctx->first_write[dst_reg_index] = pos;289}290}291292static void293liveness_mark_read(struct i915_optimize_context *ctx,294struct i915_full_src_register *src_reg, int pos)295{296int src_reg_index;297if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {298src_reg_index = src_reg->Register.Index;299assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);300/* live -> dead transition */301if (ctx->last_read[src_reg_index] != -1)302ctx->last_read[src_reg_index] = pos;303}304}305306static void307liveness_analysis(struct i915_optimize_context *ctx,308struct i915_token_list *tokens)309{310struct i915_full_dst_register *dst_reg;311struct i915_full_src_register *src_reg;312union i915_full_token *current;313unsigned opcode;314int num_dst, num_src;315int i = 0;316317for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {318ctx->first_write[i] = -1;319ctx->last_read[i] = -1;320}321322for (i = 0; i < tokens->NumTokens; i++) {323current = &tokens->Tokens[i];324325if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)326continue;327328opcode = current->FullInstruction.Instruction.Opcode;329num_dst = op_num_dst(opcode);330331switch (num_dst) {332case 1:333dst_reg = ¤t->FullInstruction.Dst[0];334liveness_mark_written(ctx, dst_reg, i);335case 0:336break;337default:338debug_printf("Op %d has %d dst regs\n", opcode, num_dst);339break;340}341}342343for (i = tokens->NumTokens - 1; i >= 0; i--) {344current = &tokens->Tokens[i];345346if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)347continue;348349opcode = current->FullInstruction.Instruction.Opcode;350num_src = op_num_src(opcode);351352switch (num_src) {353case 3:354src_reg = ¤t->FullInstruction.Src[2];355liveness_mark_read(ctx, src_reg, i);356FALLTHROUGH;357case 2:358src_reg = ¤t->FullInstruction.Src[1];359liveness_mark_read(ctx, src_reg, i);360FALLTHROUGH;361case 1:362src_reg = ¤t->FullInstruction.Src[0];363liveness_mark_read(ctx, src_reg, i);364FALLTHROUGH;365case 0:366break;367default:368debug_printf("Op %d has %d src regs\n", opcode, num_src);369break;370}371}372}373374static int375unused_from(struct i915_optimize_context *ctx,376struct i915_full_dst_register *dst_reg, int from)377{378int dst_reg_index = dst_reg->Register.Index;379assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);380return (from >= ctx->last_read[dst_reg_index]);381}382383/* Returns a mask with the components used for a texture access instruction */384static unsigned385i915_tex_mask(union i915_full_token *instr)386{387unsigned mask;388389/* Get the number of coords */390mask = mask_for_unswizzled(391i915_num_coords(instr->FullInstruction.Texture.Texture));392393/* Add the W component if projective */394if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)395mask |= TGSI_WRITEMASK_W;396397return mask;398}399400static bool401target_is_texture2d(uint32_t tex)402{403switch (tex) {404case TGSI_TEXTURE_2D:405case TGSI_TEXTURE_RECT:406return true;407default:408return false;409}410}411412/*413* Optimize away useless indirect texture reads:414* MOV TEMP[0].xy, IN[0].xyyy415* TEX TEMP[1], TEMP[0], SAMP[0], 2D416* into:417* TEX TEMP[1], IN[0], SAMP[0], 2D418*419* note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..420*/421static void422i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,423struct i915_token_list *tokens, int index)424{425union i915_full_token *current = &tokens->Tokens[index - 1];426union i915_full_token *next = &tokens->Tokens[index];427428if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&429next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&430current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&431op_is_texture(next->FullInstruction.Instruction.Opcode) &&432target_is_texture2d(next->FullInstruction.Texture.Texture) &&433same_src_dst_reg(&next->FullInstruction.Src[0],434¤t->FullInstruction.Dst[0]) &&435is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) &&436unused_from(ctx, ¤t->FullInstruction.Dst[0], index)) {437memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0],438sizeof(struct i915_src_register));439current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;440}441}442443/*444* Optimize away things like:445* MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])446* MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])447* into:448* NOP449* MOV OUT[0].xyw, TEMP[1].xyww450*/451static void452i915_fpc_optimize_mov_after_mov(union i915_full_token *current,453union i915_full_token *next)454{455struct i915_full_src_register *src_reg1, *src_reg2;456struct i915_full_dst_register *dst_reg1, *dst_reg2;457unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;458459if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&460next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&461current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&462next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&463current->FullInstruction.Instruction.Saturate ==464next->FullInstruction.Instruction.Saturate &&465same_dst_reg(&next->FullInstruction.Dst[0],466¤t->FullInstruction.Dst[0]) &&467same_src_reg(&next->FullInstruction.Src[0],468¤t->FullInstruction.Src[0]) &&469!same_src_dst_reg(¤t->FullInstruction.Src[0],470¤t->FullInstruction.Dst[0])) {471src_reg1 = ¤t->FullInstruction.Src[0];472dst_reg1 = ¤t->FullInstruction.Dst[0];473src_reg2 = &next->FullInstruction.Src[0];474dst_reg2 = &next->FullInstruction.Dst[0];475476/* Start with swizzles from the first mov */477swizzle_x = src_reg1->Register.SwizzleX;478swizzle_y = src_reg1->Register.SwizzleY;479swizzle_z = src_reg1->Register.SwizzleZ;480swizzle_w = src_reg1->Register.SwizzleW;481482/* Pile the second mov on top */483if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)484swizzle_x = src_reg2->Register.SwizzleX;485if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)486swizzle_y = src_reg2->Register.SwizzleY;487if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)488swizzle_z = src_reg2->Register.SwizzleZ;489if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)490swizzle_w = src_reg2->Register.SwizzleW;491492dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;493src_reg2->Register.SwizzleX = swizzle_x;494src_reg2->Register.SwizzleY = swizzle_y;495src_reg2->Register.SwizzleZ = swizzle_z;496src_reg2->Register.SwizzleW = swizzle_w;497498current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;499500return;501}502}503504/*505* Optimize away things like:506* MUL OUT[0].xyz, TEMP[1], TEMP[2]507* MOV OUT[0].w, TEMP[2]508* into:509* MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]510* This is useful for optimizing texenv.511*/512static void513i915_fpc_optimize_mov_after_alu(union i915_full_token *current,514union i915_full_token *next)515{516if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&517next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&518op_commutes(current->FullInstruction.Instruction.Opcode) &&519current->FullInstruction.Instruction.Saturate ==520next->FullInstruction.Instruction.Saturate &&521next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&522same_dst_reg(&next->FullInstruction.Dst[0],523¤t->FullInstruction.Dst[0]) &&524same_src_reg(&next->FullInstruction.Src[0],525¤t->FullInstruction.Src[1]) &&526!same_src_dst_reg(&next->FullInstruction.Src[0],527¤t->FullInstruction.Dst[0]) &&528is_unswizzled(¤t->FullInstruction.Src[0],529current->FullInstruction.Dst[0].Register.WriteMask) &&530is_unswizzled(¤t->FullInstruction.Src[1],531current->FullInstruction.Dst[0].Register.WriteMask) &&532is_unswizzled(&next->FullInstruction.Src[0],533next->FullInstruction.Dst[0].Register.WriteMask)) {534next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;535536set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0);537set_neutral_element_swizzle(538¤t->FullInstruction.Src[0],539next->FullInstruction.Dst[0].Register.WriteMask,540op_neutral_element(current->FullInstruction.Instruction.Opcode));541542current->FullInstruction.Dst[0].Register.WriteMask =543current->FullInstruction.Dst[0].Register.WriteMask |544next->FullInstruction.Dst[0].Register.WriteMask;545return;546}547548if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&549next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&550op_commutes(current->FullInstruction.Instruction.Opcode) &&551current->FullInstruction.Instruction.Saturate ==552next->FullInstruction.Instruction.Saturate &&553next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&554same_dst_reg(&next->FullInstruction.Dst[0],555¤t->FullInstruction.Dst[0]) &&556same_src_reg(&next->FullInstruction.Src[0],557¤t->FullInstruction.Src[0]) &&558!same_src_dst_reg(&next->FullInstruction.Src[0],559¤t->FullInstruction.Dst[0]) &&560is_unswizzled(¤t->FullInstruction.Src[0],561current->FullInstruction.Dst[0].Register.WriteMask) &&562is_unswizzled(¤t->FullInstruction.Src[1],563current->FullInstruction.Dst[0].Register.WriteMask) &&564is_unswizzled(&next->FullInstruction.Src[0],565next->FullInstruction.Dst[0].Register.WriteMask)) {566next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;567568set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0);569set_neutral_element_swizzle(570¤t->FullInstruction.Src[1],571next->FullInstruction.Dst[0].Register.WriteMask,572op_neutral_element(current->FullInstruction.Instruction.Opcode));573574current->FullInstruction.Dst[0].Register.WriteMask =575current->FullInstruction.Dst[0].Register.WriteMask |576next->FullInstruction.Dst[0].Register.WriteMask;577return;578}579}580581/*582* Optimize away things like:583* MOV TEMP[0].xyz TEMP[0].xyzx584* into:585* NOP586*/587static bool588i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)589{590union i915_full_token current;591copy_token(¤t, tgsi_current);592if (current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&593current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&594op_has_dst(current.FullInstruction.Instruction.Opcode) &&595!current.FullInstruction.Instruction.Saturate &&596current.FullInstruction.Src[0].Register.Absolute == 0 &&597current.FullInstruction.Src[0].Register.Negate == 0 &&598is_unswizzled(¤t.FullInstruction.Src[0],599current.FullInstruction.Dst[0].Register.WriteMask) &&600same_src_dst_reg(¤t.FullInstruction.Src[0],601¤t.FullInstruction.Dst[0])) {602return true;603}604return false;605}606607/*608* Optimize away things like:609* *** TEMP[0], TEMP[1], TEMP[2]610* MOV OUT[0] TEMP[0]611* into:612* *** OUT[0], TEMP[1], TEMP[2]613*/614static void615i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,616struct i915_token_list *tokens,617int index)618{619union i915_full_token *current = &tokens->Tokens[index - 1];620union i915_full_token *next = &tokens->Tokens[index];621622// &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);623if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&624next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&625next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&626op_has_dst(current->FullInstruction.Instruction.Opcode) &&627!next->FullInstruction.Instruction.Saturate &&628next->FullInstruction.Src[0].Register.Absolute == 0 &&629next->FullInstruction.Src[0].Register.Negate == 0 &&630unused_from(ctx, ¤t->FullInstruction.Dst[0], index) &&631current->FullInstruction.Dst[0].Register.WriteMask ==632TGSI_WRITEMASK_XYZW &&633is_unswizzled(&next->FullInstruction.Src[0],634next->FullInstruction.Dst[0].Register.WriteMask) &&635current->FullInstruction.Dst[0].Register.WriteMask ==636next->FullInstruction.Dst[0].Register.WriteMask &&637same_src_dst_reg(&next->FullInstruction.Src[0],638¤t->FullInstruction.Dst[0])) {639next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;640641current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];642return;643}644}645646struct i915_token_list *647i915_optimize(const struct tgsi_token *tokens)648{649struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));650struct tgsi_parse_context parse;651struct i915_optimize_context *ctx;652int i = 0;653654ctx = malloc(sizeof(*ctx));655656out_tokens->NumTokens = 0;657658/* Count the tokens */659tgsi_parse_init(&parse, tokens);660while (!tgsi_parse_end_of_tokens(&parse)) {661tgsi_parse_token(&parse);662out_tokens->NumTokens++;663}664tgsi_parse_free(&parse);665666/* Allocate our tokens */667out_tokens->Tokens =668MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);669670tgsi_parse_init(&parse, tokens);671while (!tgsi_parse_end_of_tokens(&parse)) {672tgsi_parse_token(&parse);673674if (i915_fpc_useless_mov(&parse.FullToken)) {675out_tokens->NumTokens--;676continue;677}678679copy_token(&out_tokens->Tokens[i], &parse.FullToken);680681i++;682}683tgsi_parse_free(&parse);684685liveness_analysis(ctx, out_tokens);686687i = 1;688while (i < out_tokens->NumTokens) {689i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);690i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i - 1],691&out_tokens->Tokens[i]);692i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i - 1],693&out_tokens->Tokens[i]);694i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);695i++;696}697698free(ctx);699700return out_tokens;701}702703void704i915_optimize_free(struct i915_token_list *tokens)705{706free(tokens->Tokens);707free(tokens);708}709710711