Path: blob/21.2-virgl/src/gallium/drivers/etnaviv/etnaviv_compiler_tgsi.c
4570 views
/*1* Copyright (c) 2012-2015 Etnaviv Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22* Authors:23* Wladimir J. van der Laan <[email protected]>24*/2526/* TGSI->Vivante shader ISA conversion */2728/* What does the compiler return (see etna_shader_object)?29* 1) instruction data30* 2) input-to-temporary mapping (fixed for ps)31* *) in case of ps, semantic -> varying id mapping32* *) for each varying: number of components used (r, rg, rgb, rgba)33* 3) temporary-to-output mapping (in case of vs, fixed for ps)34* 4) for each input/output: possible semantic (position, color, glpointcoord, ...)35* 5) immediates base offset, immediates data36* 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to37* configure the hw, but useful for error checking38* 7) enough information to add the z=(z+w)/2.0 necessary for older chips39* (output reg id is enough)40*41* Empty shaders are not allowed, should always at least generate a NOP. Also42* if there is a label at the end of the shader, an extra NOP should be43* generated as jump target.44*45* TODO46* * Use an instruction scheduler47* * Indirect access to uniforms / temporaries using amode48*/4950#include "etnaviv_compiler.h"5152#include "etnaviv_asm.h"53#include "etnaviv_context.h"54#include "etnaviv_debug.h"55#include "etnaviv_uniforms.h"56#include "etnaviv_util.h"5758#include "nir/tgsi_to_nir.h"59#include "pipe/p_shader_tokens.h"60#include "tgsi/tgsi_info.h"61#include "tgsi/tgsi_iterate.h"62#include "tgsi/tgsi_lowering.h"63#include "tgsi/tgsi_strings.h"64#include "tgsi/tgsi_util.h"65#include "util/u_math.h"66#include "util/u_memory.h"6768#include <fcntl.h>69#include <stdio.h>70#include <sys/stat.h>71#include <sys/types.h>7273#define ETNA_MAX_INNER_TEMPS 27475static const float sincos_const[2][4] = {76{772., -1., 4., -4.,78},79{801. / (2. * M_PI), 0.75, 0.5, 0.0,81},82};8384/* Native register description structure */85struct etna_native_reg {86unsigned valid : 1;87unsigned is_tex : 1; /* is texture unit, overrides rgroup */88unsigned rgroup : 3;89unsigned id : 9;90};9192/* Register description */93struct etna_reg_desc {94enum tgsi_file_type file; /* IN, OUT, TEMP, ... */95int idx; /* index into file */96bool active; /* used in program */97int first_use; /* instruction id of first use (scope begin) */98int last_use; /* instruction id of last use (scope end, inclusive) */99100struct etna_native_reg native; /* native register to map to */101unsigned usage_mask : 4; /* usage, per channel */102bool has_semantic; /* register has associated TGSI semantic */103struct tgsi_declaration_semantic semantic; /* TGSI semantic */104struct tgsi_declaration_interp interp; /* Interpolation type */105};106107/* Label information structure */108struct etna_compile_label {109int inst_idx; /* Instruction id that label points to */110};111112enum etna_compile_frame_type {113ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */114ETNA_COMPILE_FRAME_LOOP,115};116117/* nesting scope frame (LOOP, IF, ...) during compilation118*/119struct etna_compile_frame {120enum etna_compile_frame_type type;121int lbl_else_idx;122int lbl_endif_idx;123int lbl_loop_bgn_idx;124int lbl_loop_end_idx;125};126127struct etna_compile_file {128/* Number of registers in each TGSI file (max register+1) */129size_t reg_size;130/* Register descriptions, per register index */131struct etna_reg_desc *reg;132};133134#define array_insert(arr, val) \135do { \136if (arr##_count == arr##_sz) { \137arr##_sz = MAX2(2 * arr##_sz, 16); \138arr = realloc(arr, arr##_sz * sizeof(arr[0])); \139} \140arr[arr##_count++] = val; \141} while (0)142143144/* scratch area for compiling shader, freed after compilation finishes */145struct etna_compile {146const struct tgsi_token *tokens;147bool free_tokens;148149struct tgsi_shader_info info;150151/* Register descriptions, per TGSI file, per register index */152struct etna_compile_file file[TGSI_FILE_COUNT];153154/* Keep track of TGSI register declarations */155struct etna_reg_desc decl[ETNA_MAX_DECL];156uint total_decls;157158/* Bitmap of dead instructions which are removed in a separate pass */159bool dead_inst[ETNA_MAX_TOKENS];160161/* Immediate data */162enum etna_uniform_contents imm_contents[ETNA_MAX_IMM];163uint32_t imm_data[ETNA_MAX_IMM];164uint32_t imm_base; /* base of immediates (in 32 bit units) */165uint32_t imm_size; /* size of immediates (in 32 bit units) */166167/* Next free native register, for register allocation */168uint32_t next_free_native;169170/* Temporary register for use within translated TGSI instruction,171* only allocated when needed.172*/173int inner_temps; /* number of inner temps used; only up to one available at174this point */175struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];176177/* Fields for handling nested conditionals */178struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];179int frame_sp;180int lbl_usage[ETNA_MAX_INSTRUCTIONS];181182unsigned labels_count, labels_sz;183struct etna_compile_label *labels;184185unsigned num_loops;186187/* Code generation */188int inst_ptr; /* current instruction pointer */189uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];190191/* I/O */192193/* Number of varyings (PS only) */194int num_varyings;195196/* GPU hardware specs */197const struct etna_specs *specs;198199const struct etna_shader_key *key;200};201202static struct etna_reg_desc *203etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)204{205return &c->file[dst.File].reg[dst.Index];206}207208static struct etna_reg_desc *209etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)210{211return &c->file[src.File].reg[src.Index];212}213214static struct etna_native_reg215etna_native_temp(unsigned reg)216{217return (struct etna_native_reg) {218.valid = 1,219.rgroup = INST_RGROUP_TEMP,220.id = reg221};222}223224static struct etna_native_reg225etna_native_internal(unsigned reg)226{227return (struct etna_native_reg) {228.valid = 1,229.rgroup = INST_RGROUP_INTERNAL,230.id = reg231};232}233234/** Register allocation **/235enum reg_sort_order {236FIRST_USE_ASC,237FIRST_USE_DESC,238LAST_USE_ASC,239LAST_USE_DESC240};241242/* Augmented register description for sorting */243struct sort_rec {244struct etna_reg_desc *ptr;245int key;246};247248static int249sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)250{251if (a->key < b->key)252return -1;253254if (a->key > b->key)255return 1;256257return 0;258}259260/* create an index on a register set based on certain criteria. */261static int262sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,263enum reg_sort_order so)264{265struct etna_reg_desc *regs = file->reg;266int ptr = 0;267268/* pre-populate keys from active registers */269for (int idx = 0; idx < file->reg_size; ++idx) {270/* only interested in active registers now; will only assign inactive ones271* if no space in active ones */272if (regs[idx].active) {273sorted[ptr].ptr = ®s[idx];274275switch (so) {276case FIRST_USE_ASC:277sorted[ptr].key = regs[idx].first_use;278break;279case LAST_USE_ASC:280sorted[ptr].key = regs[idx].last_use;281break;282case FIRST_USE_DESC:283sorted[ptr].key = -regs[idx].first_use;284break;285case LAST_USE_DESC:286sorted[ptr].key = -regs[idx].last_use;287break;288}289ptr++;290}291}292293/* sort index by key */294qsort(sorted, ptr, sizeof(struct sort_rec),295(int (*)(const void *, const void *))sort_rec_compar);296297return ptr;298}299300/* Allocate a new, unused, native temp register */301static struct etna_native_reg302alloc_new_native_reg(struct etna_compile *c)303{304assert(c->next_free_native < ETNA_MAX_TEMPS);305return etna_native_temp(c->next_free_native++);306}307308/* assign TEMPs to native registers */309static void310assign_temporaries_to_native(struct etna_compile *c,311struct etna_compile_file *file)312{313struct etna_reg_desc *temps = file->reg;314315for (int idx = 0; idx < file->reg_size; ++idx)316temps[idx].native = alloc_new_native_reg(c);317}318319/* assign inputs and outputs to temporaries320* Gallium assumes that the hardware has separate registers for taking input and321* output, however Vivante GPUs use temporaries both for passing in inputs and322* passing back outputs.323* Try to re-use temporary registers where possible. */324static void325assign_inouts_to_temporaries(struct etna_compile *c, uint file)326{327bool mode_inputs = (file == TGSI_FILE_INPUT);328int inout_ptr = 0, num_inouts;329int temp_ptr = 0, num_temps;330struct sort_rec inout_order[ETNA_MAX_TEMPS];331struct sort_rec temps_order[ETNA_MAX_TEMPS];332num_inouts = sort_registers(inout_order, &c->file[file],333mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);334num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],335mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);336337while (inout_ptr < num_inouts && temp_ptr < num_temps) {338struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;339struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;340341if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */342inout_ptr++;343continue;344}345346/* last usage of this input is before or in same instruction of first use347* of temporary? */348if (mode_inputs ? (inout->last_use <= temp->first_use)349: (inout->first_use >= temp->last_use)) {350/* assign it and advance to next input */351inout->native = temp->native;352inout_ptr++;353}354355temp_ptr++;356}357358/* if we couldn't reuse current ones, allocate new temporaries */359for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {360struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;361362if (inout->active && !inout->native.valid)363inout->native = alloc_new_native_reg(c);364}365}366367/* Allocate an immediate with a certain value and return the index. If368* there is already an immediate with that value, return that.369*/370static struct etna_inst_src371alloc_imm(struct etna_compile *c, enum etna_uniform_contents contents,372uint32_t value)373{374int idx;375376/* Could use a hash table to speed this up */377for (idx = 0; idx < c->imm_size; ++idx) {378if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)379break;380}381382/* look if there is an unused slot */383if (idx == c->imm_size) {384for (idx = 0; idx < c->imm_size; ++idx) {385if (c->imm_contents[idx] == ETNA_UNIFORM_UNUSED)386break;387}388}389390/* allocate new immediate */391if (idx == c->imm_size) {392assert(c->imm_size < ETNA_MAX_IMM);393idx = c->imm_size++;394c->imm_data[idx] = value;395c->imm_contents[idx] = contents;396}397398/* swizzle so that component with value is returned in all components */399idx += c->imm_base;400struct etna_inst_src imm_src = {401.use = 1,402.rgroup = INST_RGROUP_UNIFORM_0,403.reg = idx / 4,404.swiz = INST_SWIZ_BROADCAST(idx & 3)405};406407return imm_src;408}409410static struct etna_inst_src411alloc_imm_u32(struct etna_compile *c, uint32_t value)412{413return alloc_imm(c, ETNA_UNIFORM_CONSTANT, value);414}415416static struct etna_inst_src417alloc_imm_vec4u(struct etna_compile *c, enum etna_uniform_contents contents,418const uint32_t *values)419{420struct etna_inst_src imm_src = { };421int idx, i;422423for (idx = 0; idx + 3 < c->imm_size; idx += 4) {424/* What if we can use a uniform with a different swizzle? */425for (i = 0; i < 4; i++)426if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])427break;428if (i == 4)429break;430}431432if (idx + 3 >= c->imm_size) {433idx = align(c->imm_size, 4);434assert(idx + 4 <= ETNA_MAX_IMM);435436for (i = 0; i < 4; i++) {437c->imm_data[idx + i] = values[i];438c->imm_contents[idx + i] = contents;439}440441c->imm_size = idx + 4;442}443444assert((c->imm_base & 3) == 0);445idx += c->imm_base;446imm_src.use = 1;447imm_src.rgroup = INST_RGROUP_UNIFORM_0;448imm_src.reg = idx / 4;449imm_src.swiz = INST_SWIZ_IDENTITY;450451return imm_src;452}453454static uint32_t455get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,456unsigned swiz_idx)457{458assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);459unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);460461return c->imm_data[idx];462}463464/* Allocate immediate with a certain float value. If there is already an465* immediate with that value, return that.466*/467static struct etna_inst_src468alloc_imm_f32(struct etna_compile *c, float value)469{470return alloc_imm_u32(c, fui(value));471}472473static struct etna_inst_src474etna_imm_vec4f(struct etna_compile *c, const float *vec4)475{476uint32_t val[4];477478for (int i = 0; i < 4; i++)479val[i] = fui(vec4[i]);480481return alloc_imm_vec4u(c, ETNA_UNIFORM_CONSTANT, val);482}483484/* Pass -- check register file declarations and immediates */485static void486etna_compile_parse_declarations(struct etna_compile *c)487{488struct tgsi_parse_context ctx = { };489ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);490assert(status == TGSI_PARSE_OK);491492while (!tgsi_parse_end_of_tokens(&ctx)) {493tgsi_parse_token(&ctx);494495switch (ctx.FullToken.Token.Type) {496case TGSI_TOKEN_TYPE_IMMEDIATE: {497/* immediates are handled differently from other files; they are498* not declared explicitly, and always add four components */499const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;500assert(c->imm_size <= (ETNA_MAX_IMM - 4));501502for (int i = 0; i < 4; ++i) {503unsigned idx = c->imm_size++;504505c->imm_data[idx] = imm->u[i].Uint;506c->imm_contents[idx] = ETNA_UNIFORM_CONSTANT;507}508}509break;510}511}512513tgsi_parse_free(&ctx);514}515516/* Allocate register declarations for the registers in all register files */517static void518etna_allocate_decls(struct etna_compile *c)519{520uint idx = 0;521522for (int x = 0; x < TGSI_FILE_COUNT; ++x) {523c->file[x].reg = &c->decl[idx];524c->file[x].reg_size = c->info.file_max[x] + 1;525526for (int sub = 0; sub < c->file[x].reg_size; ++sub) {527c->decl[idx].file = x;528c->decl[idx].idx = sub;529idx++;530}531}532533c->total_decls = idx;534}535536/* Pass -- check and record usage of temporaries, inputs, outputs */537static void538etna_compile_pass_check_usage(struct etna_compile *c)539{540struct tgsi_parse_context ctx = { };541ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);542assert(status == TGSI_PARSE_OK);543544for (int idx = 0; idx < c->total_decls; ++idx) {545c->decl[idx].active = false;546c->decl[idx].first_use = c->decl[idx].last_use = -1;547}548549int inst_idx = 0;550while (!tgsi_parse_end_of_tokens(&ctx)) {551tgsi_parse_token(&ctx);552/* find out max register #s used553* For every register mark first and last instruction index where it's554* used this allows finding ranges where the temporary can be borrowed555* as input and/or output register556*557* XXX in the case of loops this needs special care, or even be completely558* disabled, as559* the last usage of a register inside a loop means it can still be used560* on next loop561* iteration (execution is no longer * chronological). The register can562* only be563* declared "free" after the loop finishes.564*565* Same for inputs: the first usage of a register inside a loop doesn't566* mean that the register567* won't have been overwritten in previous iteration. The register can568* only be declared free before the loop569* starts.570* The proper way would be to do full dominator / post-dominator analysis571* (especially with more complicated572* control flow such as direct branch instructions) but not for now...573*/574switch (ctx.FullToken.Token.Type) {575case TGSI_TOKEN_TYPE_DECLARATION: {576/* Declaration: fill in file details */577const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;578struct etna_compile_file *file = &c->file[decl->Declaration.File];579580for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {581file->reg[idx].usage_mask = 0; // we'll compute this ourselves582file->reg[idx].has_semantic = decl->Declaration.Semantic;583file->reg[idx].semantic = decl->Semantic;584file->reg[idx].interp = decl->Interp;585}586} break;587case TGSI_TOKEN_TYPE_INSTRUCTION: {588/* Instruction: iterate over operands of instruction */589const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;590591/* iterate over destination registers */592for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {593struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];594595if (reg_desc->first_use == -1)596reg_desc->first_use = inst_idx;597598reg_desc->last_use = inst_idx;599reg_desc->active = true;600}601602/* iterate over source registers */603for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {604struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];605606if (reg_desc->first_use == -1)607reg_desc->first_use = inst_idx;608609reg_desc->last_use = inst_idx;610reg_desc->active = true;611/* accumulate usage mask for register, this is used to determine how612* many slots for varyings613* should be allocated */614reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);615}616inst_idx += 1;617} break;618default:619break;620}621}622623tgsi_parse_free(&ctx);624}625626/* assign inputs that need to be assigned to specific registers */627static void628assign_special_inputs(struct etna_compile *c)629{630if (c->info.processor == PIPE_SHADER_FRAGMENT) {631/* never assign t0 as it is the position output, start assigning at t1 */632c->next_free_native = 1;633634for (int idx = 0; idx < c->total_decls; ++idx) {635struct etna_reg_desc *reg = &c->decl[idx];636637if (!reg->active)638continue;639640/* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */641if (reg->semantic.Name == TGSI_SEMANTIC_POSITION)642reg->native = etna_native_temp(0);643644/* hardwire TGSI_SEMANTIC_FACE to i0 */645if (reg->semantic.Name == TGSI_SEMANTIC_FACE)646reg->native = etna_native_internal(0);647}648}649}650651/* Check that a move instruction does not swizzle any of the components652* that it writes.653*/654static bool655etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,656const struct tgsi_src_register src)657{658return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&659(!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&660(!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&661(!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);662}663664/* Pass -- optimize outputs665* Mesa tends to generate code like this at the end if their shaders666* MOV OUT[1], TEMP[2]667* MOV OUT[0], TEMP[0]668* MOV OUT[2], TEMP[1]669* Recognize if670* a) there is only a single assignment to an output register and671* b) the temporary is not used after that672* Also recognize direct assignment of IN to OUT (passthrough)673**/674static void675etna_compile_pass_optimize_outputs(struct etna_compile *c)676{677struct tgsi_parse_context ctx = { };678int inst_idx = 0;679ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);680assert(status == TGSI_PARSE_OK);681682while (!tgsi_parse_end_of_tokens(&ctx)) {683tgsi_parse_token(&ctx);684685switch (ctx.FullToken.Token.Type) {686case TGSI_TOKEN_TYPE_INSTRUCTION: {687const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;688689/* iterate over operands */690switch (inst->Instruction.Opcode) {691case TGSI_OPCODE_MOV: {692/* We are only interested in eliminating MOVs which write to693* the shader outputs. Test for this early. */694if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)695break;696/* Elimination of a MOV must have no visible effect on the697* resulting shader: this means the MOV must not swizzle or698* saturate, and its source must not have the negate or699* absolute modifiers. */700if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||701inst->Instruction.Saturate || inst->Src[0].Register.Negate ||702inst->Src[0].Register.Absolute)703break;704705uint out_idx = inst->Dst[0].Register.Index;706uint in_idx = inst->Src[0].Register.Index;707/* assignment of temporary to output --708* and the output doesn't yet have a native register assigned709* and the last use of the temporary is this instruction710* and the MOV does not do a swizzle711*/712if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&713!c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&714c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {715c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =716c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;717/* prevent temp from being re-used for the rest of the shader */718c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;719/* mark this MOV instruction as a no-op */720c->dead_inst[inst_idx] = true;721}722/* direct assignment of input to output --723* and the input or output doesn't yet have a native register724* assigned725* and the output is only used in this instruction,726* allocate a new register, and associate both input and output to727* it728* and the MOV does not do a swizzle729*/730if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&731!c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&732!c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&733c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&734c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {735c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =736c->file[TGSI_FILE_INPUT].reg[in_idx].native =737alloc_new_native_reg(c);738/* mark this MOV instruction as a no-op */739c->dead_inst[inst_idx] = true;740}741} break;742default:;743}744inst_idx += 1;745} break;746}747}748749tgsi_parse_free(&ctx);750}751752/* Get a temporary to be used within one TGSI instruction.753* The first time that this function is called the temporary will be allocated.754* Each call to this function will return the same temporary.755*/756static struct etna_native_reg757etna_compile_get_inner_temp(struct etna_compile *c)758{759int inner_temp = c->inner_temps;760761if (inner_temp < ETNA_MAX_INNER_TEMPS) {762if (!c->inner_temp[inner_temp].valid)763c->inner_temp[inner_temp] = alloc_new_native_reg(c);764765/* alloc_new_native_reg() handles lack of registers */766c->inner_temps += 1;767} else {768BUG("Too many inner temporaries (%i) requested in one instruction",769inner_temp + 1);770}771772return c->inner_temp[inner_temp];773}774775static struct etna_inst_dst776etna_native_to_dst(struct etna_native_reg native, unsigned comps)777{778/* Can only assign to temporaries */779assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);780781struct etna_inst_dst rv = {782.write_mask = comps,783.use = 1,784.reg = native.id,785};786787return rv;788}789790static struct etna_inst_src791etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)792{793assert(native.valid && !native.is_tex);794795struct etna_inst_src rv = {796.use = 1,797.swiz = swizzle,798.rgroup = native.rgroup,799.reg = native.id,800.amode = INST_AMODE_DIRECT,801};802803return rv;804}805806static inline struct etna_inst_src807negate(struct etna_inst_src src)808{809src.neg = !src.neg;810811return src;812}813814static inline struct etna_inst_src815absolute(struct etna_inst_src src)816{817src.abs = 1;818819return src;820}821822static inline struct etna_inst_src823swizzle(struct etna_inst_src src, unsigned swizzle)824{825src.swiz = inst_swiz_compose(src.swiz, swizzle);826827return src;828}829830/* Emit instruction and append it to program */831static void832emit_inst(struct etna_compile *c, struct etna_inst *inst)833{834assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);835836/* Check for uniform conflicts (each instruction can only access one837* uniform),838* if detected, use an intermediate temporary */839unsigned uni_rgroup = -1;840unsigned uni_reg = -1;841842for (int src = 0; src < ETNA_NUM_SRC; ++src) {843if (inst->src[src].rgroup == INST_RGROUP_INTERNAL &&844c->info.processor == PIPE_SHADER_FRAGMENT &&845c->key->front_ccw) {846struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);847848/*849* Set temporary register to 0.0 or 1.0 based on the gl_FrontFacing850* configuration (CW or CCW).851*/852etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {853.opcode = INST_OPCODE_SET,854.cond = INST_CONDITION_NE,855.dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |856INST_COMPS_Z | INST_COMPS_W),857.src[0] = inst->src[src],858.src[1] = alloc_imm_f32(c, 1.0f)859});860c->inst_ptr++;861862/* Modify instruction to use temp register instead of uniform */863inst->src[src].use = 1;864inst->src[src].rgroup = INST_RGROUP_TEMP;865inst->src[src].reg = inner_temp.id;866inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */867inst->src[src].neg = 0; /* negation happens on MOV */868inst->src[src].abs = 0; /* abs happens on MOV */869inst->src[src].amode = 0; /* amode effects happen on MOV */870} else if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {871if (uni_reg == -1) { /* first unique uniform used */872uni_rgroup = inst->src[src].rgroup;873uni_reg = inst->src[src].reg;874} else { /* second or later; check that it is a re-use */875if (uni_rgroup != inst->src[src].rgroup ||876uni_reg != inst->src[src].reg) {877DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "878"accesses different uniforms, "879"need to generate extra MOV");880struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);881882/* Generate move instruction to temporary */883etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {884.opcode = INST_OPCODE_MOV,885.dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |886INST_COMPS_Z | INST_COMPS_W),887.src[2] = inst->src[src]888});889890c->inst_ptr++;891892/* Modify instruction to use temp register instead of uniform */893inst->src[src].use = 1;894inst->src[src].rgroup = INST_RGROUP_TEMP;895inst->src[src].reg = inner_temp.id;896inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */897inst->src[src].neg = 0; /* negation happens on MOV */898inst->src[src].abs = 0; /* abs happens on MOV */899inst->src[src].amode = 0; /* amode effects happen on MOV */900}901}902}903}904905/* Finally assemble the actual instruction */906etna_assemble(&c->code[c->inst_ptr * 4], inst);907c->inst_ptr++;908}909910static unsigned int911etna_amode(struct tgsi_ind_register indirect)912{913assert(indirect.File == TGSI_FILE_ADDRESS);914assert(indirect.Index == 0);915916switch (indirect.Swizzle) {917case TGSI_SWIZZLE_X:918return INST_AMODE_ADD_A_X;919case TGSI_SWIZZLE_Y:920return INST_AMODE_ADD_A_Y;921case TGSI_SWIZZLE_Z:922return INST_AMODE_ADD_A_Z;923case TGSI_SWIZZLE_W:924return INST_AMODE_ADD_A_W;925default:926assert(!"Invalid swizzle");927}928929unreachable("bad swizzle");930}931932/* convert destination operand */933static struct etna_inst_dst934convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)935{936struct etna_inst_dst rv = {937/// XXX .amode938.write_mask = in->Register.WriteMask,939};940941if (in->Register.File == TGSI_FILE_ADDRESS) {942assert(in->Register.Index == 0);943rv.reg = in->Register.Index;944rv.use = 0;945} else {946rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,947in->Register.WriteMask);948}949950if (in->Register.Indirect)951rv.amode = etna_amode(in->Indirect);952953return rv;954}955956/* convert texture operand */957static struct etna_inst_tex958convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,959const struct tgsi_instruction_texture *tex)960{961struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;962struct etna_inst_tex rv = {963// XXX .amode (to allow for an array of samplers?)964.swiz = INST_SWIZ_IDENTITY965};966967assert(native_reg.is_tex && native_reg.valid);968rv.id = native_reg.id;969970return rv;971}972973/* convert source operand */974static struct etna_inst_src975etna_create_src(const struct tgsi_full_src_register *tgsi,976const struct etna_native_reg *native)977{978const struct tgsi_src_register *reg = &tgsi->Register;979struct etna_inst_src rv = {980.use = 1,981.swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),982.neg = reg->Negate,983.abs = reg->Absolute,984.rgroup = native->rgroup,985.reg = native->id,986.amode = INST_AMODE_DIRECT,987};988989assert(native->valid && !native->is_tex);990991if (reg->Indirect)992rv.amode = etna_amode(tgsi->Indirect);993994return rv;995}996997static struct etna_inst_src998etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,999struct etna_native_reg temp)1000{1001struct etna_inst mov = { };10021003mov.opcode = INST_OPCODE_MOV;1004mov.sat = 0;1005mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |1006INST_COMPS_Z | INST_COMPS_W);1007mov.src[2] = src;1008emit_inst(c, &mov);10091010src.swiz = INST_SWIZ_IDENTITY;1011src.neg = src.abs = 0;1012src.rgroup = temp.rgroup;1013src.reg = temp.id;10141015return src;1016}10171018static struct etna_inst_src1019etna_mov_src(struct etna_compile *c, struct etna_inst_src src)1020{1021struct etna_native_reg temp = etna_compile_get_inner_temp(c);10221023return etna_mov_src_to_temp(c, src, temp);1024}10251026static bool1027etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)1028{1029return etna_rgroup_is_uniform(a.rgroup) &&1030etna_rgroup_is_uniform(b.rgroup) &&1031(a.rgroup != b.rgroup || a.reg != b.reg);1032}10331034/* create a new label */1035static unsigned int1036alloc_new_label(struct etna_compile *c)1037{1038struct etna_compile_label label = {1039.inst_idx = -1, /* start by point to no specific instruction */1040};10411042array_insert(c->labels, label);10431044return c->labels_count - 1;1045}10461047/* place label at current instruction pointer */1048static void1049label_place(struct etna_compile *c, struct etna_compile_label *label)1050{1051label->inst_idx = c->inst_ptr;1052}10531054/* mark label use at current instruction.1055* target of the label will be filled in in the marked instruction's src2.imm1056* slot as soon1057* as the value becomes known.1058*/1059static void1060label_mark_use(struct etna_compile *c, int lbl_idx)1061{1062assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);1063c->lbl_usage[c->inst_ptr] = lbl_idx;1064}10651066/* walk the frame stack and return first frame with matching type */1067static struct etna_compile_frame *1068find_frame(struct etna_compile *c, enum etna_compile_frame_type type)1069{1070for (int sp = c->frame_sp; sp >= 0; sp--)1071if (c->frame_stack[sp].type == type)1072return &c->frame_stack[sp];10731074assert(0);1075return NULL;1076}10771078struct instr_translater {1079void (*fxn)(const struct instr_translater *t, struct etna_compile *c,1080const struct tgsi_full_instruction *inst,1081struct etna_inst_src *src);1082unsigned tgsi_opc;1083uint8_t opc;10841085/* tgsi src -> etna src swizzle */1086int src[3];10871088unsigned cond;1089};10901091static void1092trans_instr(const struct instr_translater *t, struct etna_compile *c,1093const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1094{1095const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);1096struct etna_inst instr = { };10971098instr.opcode = t->opc;1099instr.cond = t->cond;1100instr.sat = inst->Instruction.Saturate;11011102assert(info->num_dst <= 1);1103if (info->num_dst)1104instr.dst = convert_dst(c, &inst->Dst[0]);11051106assert(info->num_src <= ETNA_NUM_SRC);11071108for (unsigned i = 0; i < info->num_src; i++) {1109int swizzle = t->src[i];11101111assert(swizzle != -1);1112instr.src[swizzle] = src[i];1113}11141115emit_inst(c, &instr);1116}11171118static void1119trans_min_max(const struct instr_translater *t, struct etna_compile *c,1120const struct tgsi_full_instruction *inst,1121struct etna_inst_src *src)1122{1123emit_inst(c, &(struct etna_inst) {1124.opcode = INST_OPCODE_SELECT,1125.cond = t->cond,1126.sat = inst->Instruction.Saturate,1127.dst = convert_dst(c, &inst->Dst[0]),1128.src[0] = src[0],1129.src[1] = src[1],1130.src[2] = src[0],1131});1132}11331134static void1135trans_if(const struct instr_translater *t, struct etna_compile *c,1136const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1137{1138struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];1139struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);11401141/* push IF to stack */1142f->type = ETNA_COMPILE_FRAME_IF;1143/* create "else" label */1144f->lbl_else_idx = alloc_new_label(c);1145f->lbl_endif_idx = -1;11461147/* We need to avoid the emit_inst() below becoming two instructions */1148if (etna_src_uniforms_conflict(src[0], imm_0))1149src[0] = etna_mov_src(c, src[0]);11501151/* mark position in instruction stream of label reference so that it can be1152* filled in in next pass */1153label_mark_use(c, f->lbl_else_idx);11541155/* create conditional branch to label if src0 EQ 0 */1156emit_inst(c, &(struct etna_inst){1157.opcode = INST_OPCODE_BRANCH,1158.cond = INST_CONDITION_EQ,1159.src[0] = src[0],1160.src[1] = imm_0,1161/* imm is filled in later */1162});1163}11641165static void1166trans_else(const struct instr_translater *t, struct etna_compile *c,1167const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1168{1169assert(c->frame_sp > 0);1170struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];1171assert(f->type == ETNA_COMPILE_FRAME_IF);11721173/* create "endif" label, and branch to endif label */1174f->lbl_endif_idx = alloc_new_label(c);1175label_mark_use(c, f->lbl_endif_idx);1176emit_inst(c, &(struct etna_inst) {1177.opcode = INST_OPCODE_BRANCH,1178.cond = INST_CONDITION_TRUE,1179/* imm is filled in later */1180});11811182/* mark "else" label at this position in instruction stream */1183label_place(c, &c->labels[f->lbl_else_idx]);1184}11851186static void1187trans_endif(const struct instr_translater *t, struct etna_compile *c,1188const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1189{1190assert(c->frame_sp > 0);1191struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];1192assert(f->type == ETNA_COMPILE_FRAME_IF);11931194/* assign "endif" or "else" (if no ELSE) label to current position in1195* instruction stream, pop IF */1196if (f->lbl_endif_idx != -1)1197label_place(c, &c->labels[f->lbl_endif_idx]);1198else1199label_place(c, &c->labels[f->lbl_else_idx]);1200}12011202static void1203trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,1204const struct tgsi_full_instruction *inst,1205struct etna_inst_src *src)1206{1207struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];12081209/* push LOOP to stack */1210f->type = ETNA_COMPILE_FRAME_LOOP;1211f->lbl_loop_bgn_idx = alloc_new_label(c);1212f->lbl_loop_end_idx = alloc_new_label(c);12131214label_place(c, &c->labels[f->lbl_loop_bgn_idx]);12151216c->num_loops++;1217}12181219static void1220trans_loop_end(const struct instr_translater *t, struct etna_compile *c,1221const struct tgsi_full_instruction *inst,1222struct etna_inst_src *src)1223{1224assert(c->frame_sp > 0);1225struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];1226assert(f->type == ETNA_COMPILE_FRAME_LOOP);12271228/* mark position in instruction stream of label reference so that it can be1229* filled in in next pass */1230label_mark_use(c, f->lbl_loop_bgn_idx);12311232/* create branch to loop_bgn label */1233emit_inst(c, &(struct etna_inst) {1234.opcode = INST_OPCODE_BRANCH,1235.cond = INST_CONDITION_TRUE,1236.src[0] = src[0],1237/* imm is filled in later */1238});12391240label_place(c, &c->labels[f->lbl_loop_end_idx]);1241}12421243static void1244trans_brk(const struct instr_translater *t, struct etna_compile *c,1245const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1246{1247assert(c->frame_sp > 0);1248struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);12491250/* mark position in instruction stream of label reference so that it can be1251* filled in in next pass */1252label_mark_use(c, f->lbl_loop_end_idx);12531254/* create branch to loop_end label */1255emit_inst(c, &(struct etna_inst) {1256.opcode = INST_OPCODE_BRANCH,1257.cond = INST_CONDITION_TRUE,1258.src[0] = src[0],1259/* imm is filled in later */1260});1261}12621263static void1264trans_cont(const struct instr_translater *t, struct etna_compile *c,1265const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1266{1267assert(c->frame_sp > 0);1268struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);12691270/* mark position in instruction stream of label reference so that it can be1271* filled in in next pass */1272label_mark_use(c, f->lbl_loop_bgn_idx);12731274/* create branch to loop_end label */1275emit_inst(c, &(struct etna_inst) {1276.opcode = INST_OPCODE_BRANCH,1277.cond = INST_CONDITION_TRUE,1278.src[0] = src[0],1279/* imm is filled in later */1280});1281}12821283static void1284trans_deriv(const struct instr_translater *t, struct etna_compile *c,1285const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1286{1287emit_inst(c, &(struct etna_inst) {1288.opcode = t->opc,1289.sat = inst->Instruction.Saturate,1290.dst = convert_dst(c, &inst->Dst[0]),1291.src[0] = src[0],1292.src[2] = src[0],1293});1294}12951296static void1297trans_arl(const struct instr_translater *t, struct etna_compile *c,1298const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1299{1300struct etna_native_reg temp = etna_compile_get_inner_temp(c);1301struct etna_inst arl = { };1302struct etna_inst_dst dst;13031304dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |1305INST_COMPS_W);13061307if (c->specs->has_sign_floor_ceil) {1308struct etna_inst floor = { };13091310floor.opcode = INST_OPCODE_FLOOR;1311floor.src[2] = src[0];1312floor.dst = dst;13131314emit_inst(c, &floor);1315} else {1316struct etna_inst floor[2] = { };13171318floor[0].opcode = INST_OPCODE_FRC;1319floor[0].sat = inst->Instruction.Saturate;1320floor[0].dst = dst;1321floor[0].src[2] = src[0];13221323floor[1].opcode = INST_OPCODE_ADD;1324floor[1].sat = inst->Instruction.Saturate;1325floor[1].dst = dst;1326floor[1].src[0] = src[0];1327floor[1].src[2].use = 1;1328floor[1].src[2].swiz = INST_SWIZ_IDENTITY;1329floor[1].src[2].neg = 1;1330floor[1].src[2].rgroup = temp.rgroup;1331floor[1].src[2].reg = temp.id;13321333emit_inst(c, &floor[0]);1334emit_inst(c, &floor[1]);1335}13361337arl.opcode = INST_OPCODE_MOVAR;1338arl.sat = inst->Instruction.Saturate;1339arl.dst = convert_dst(c, &inst->Dst[0]);1340arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);13411342emit_inst(c, &arl);1343}13441345static void1346trans_lrp(const struct instr_translater *t, struct etna_compile *c,1347const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1348{1349/* dst = src0 * src1 + (1 - src0) * src21350* => src0 * src1 - (src0 - 1) * src21351* => src0 * src1 - (src0 * src2 - src2)1352* MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw1353* MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw1354*/1355struct etna_native_reg temp = etna_compile_get_inner_temp(c);1356if (etna_src_uniforms_conflict(src[0], src[1]) ||1357etna_src_uniforms_conflict(src[0], src[2])) {1358src[0] = etna_mov_src(c, src[0]);1359}13601361struct etna_inst mad[2] = { };1362mad[0].opcode = INST_OPCODE_MAD;1363mad[0].sat = 0;1364mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |1365INST_COMPS_Z | INST_COMPS_W);1366mad[0].src[0] = src[0];1367mad[0].src[1] = src[2];1368mad[0].src[2] = negate(src[2]);1369mad[1].opcode = INST_OPCODE_MAD;1370mad[1].sat = inst->Instruction.Saturate;1371mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];1372mad[1].src[1] = src[1];1373mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));13741375emit_inst(c, &mad[0]);1376emit_inst(c, &mad[1]);1377}13781379static void1380trans_lit(const struct instr_translater *t, struct etna_compile *c,1381const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1382{1383/* SELECT.LT tmp._y__, 0, src.yyyy, 01384* - can be eliminated if src.y is a uniform and >= 01385* SELECT.GT tmp.___w, 128, src.wwww, 1281386* SELECT.LT tmp.___w, -128, tmp.wwww, -1281387* - can be eliminated if src.w is a uniform and fits clamp1388* LOG tmp.x, void, void, tmp.yyyy1389* MUL tmp.x, tmp.xxxx, tmp.wwww, void1390* LITP dst, undef, src.xxxx, tmp.xxxx1391*/1392struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);1393struct etna_inst_src src_y = { };13941395if (!etna_rgroup_is_uniform(src[0].rgroup)) {1396src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));13971398struct etna_inst ins = { };1399ins.opcode = INST_OPCODE_SELECT;1400ins.cond = INST_CONDITION_LT;1401ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);1402ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);1403ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));1404emit_inst(c, &ins);1405} else if (uif(get_imm_u32(c, &src[0], 1)) < 0)1406src_y = alloc_imm_f32(c, 0.0);1407else1408src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));14091410struct etna_inst_src src_w = { };14111412if (!etna_rgroup_is_uniform(src[0].rgroup)) {1413src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));14141415struct etna_inst ins = { };1416ins.opcode = INST_OPCODE_SELECT;1417ins.cond = INST_CONDITION_GT;1418ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);1419ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);1420ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));1421emit_inst(c, &ins);1422ins.cond = INST_CONDITION_LT;1423ins.src[0].neg = !ins.src[0].neg;1424ins.src[2].neg = !ins.src[2].neg;1425ins.src[1] = src_w;1426emit_inst(c, &ins);1427} else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)1428src_w = alloc_imm_f32(c, -128.);1429else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)1430src_w = alloc_imm_f32(c, 128.);1431else1432src_w = swizzle(src[0], SWIZZLE(W, W, W, W));14331434if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */1435emit_inst(c, &(struct etna_inst) {1436.opcode = INST_OPCODE_LOG,1437.dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),1438.src[2] = src_y,1439.tex = { .amode=1 }, /* Unknown bit needs to be set */1440});1441emit_inst(c, &(struct etna_inst) {1442.opcode = INST_OPCODE_MUL,1443.dst = etna_native_to_dst(inner_temp, INST_COMPS_X),1444.src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),1445.src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),1446});1447} else {1448struct etna_inst ins[3] = { };1449ins[0].opcode = INST_OPCODE_LOG;1450ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);1451ins[0].src[2] = src_y;14521453emit_inst(c, &ins[0]);1454}1455emit_inst(c, &(struct etna_inst) {1456.opcode = INST_OPCODE_MUL,1457.sat = 0,1458.dst = etna_native_to_dst(inner_temp, INST_COMPS_X),1459.src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),1460.src[1] = src_w,1461});1462emit_inst(c, &(struct etna_inst) {1463.opcode = INST_OPCODE_LITP,1464.sat = 0,1465.dst = convert_dst(c, &inst->Dst[0]),1466.src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),1467.src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),1468.src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),1469});1470}14711472static void1473trans_ssg(const struct instr_translater *t, struct etna_compile *c,1474const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1475{1476if (c->specs->has_sign_floor_ceil) {1477emit_inst(c, &(struct etna_inst){1478.opcode = INST_OPCODE_SIGN,1479.sat = inst->Instruction.Saturate,1480.dst = convert_dst(c, &inst->Dst[0]),1481.src[2] = src[0],1482});1483} else {1484struct etna_native_reg temp = etna_compile_get_inner_temp(c);1485struct etna_inst ins[2] = { };14861487ins[0].opcode = INST_OPCODE_SET;1488ins[0].cond = INST_CONDITION_NZ;1489ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |1490INST_COMPS_Z | INST_COMPS_W);1491ins[0].src[0] = src[0];14921493ins[1].opcode = INST_OPCODE_SELECT;1494ins[1].cond = INST_CONDITION_LZ;1495ins[1].sat = inst->Instruction.Saturate;1496ins[1].dst = convert_dst(c, &inst->Dst[0]);1497ins[1].src[0] = src[0];1498ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);1499ins[1].src[1] = negate(ins[1].src[2]);15001501emit_inst(c, &ins[0]);1502emit_inst(c, &ins[1]);1503}1504}15051506static void1507trans_trig(const struct instr_translater *t, struct etna_compile *c,1508const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1509{1510if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */1511/* On newer chips alternative SIN/COS instructions are implemented,1512* which:1513* - Need their input scaled by 1/pi instead of 2/pi1514* - Output an x and y component, which need to be multiplied to1515* get the result1516*/1517struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */1518emit_inst(c, &(struct etna_inst) {1519.opcode = INST_OPCODE_MUL,1520.sat = 0,1521.dst = etna_native_to_dst(temp, INST_COMPS_Z),1522.src[0] = src[0], /* any swizzling happens here */1523.src[1] = alloc_imm_f32(c, 1.0f / M_PI),1524});1525emit_inst(c, &(struct etna_inst) {1526.opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS1527? INST_OPCODE_COS1528: INST_OPCODE_SIN,1529.sat = 0,1530.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),1531.src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),1532.tex = { .amode=1 }, /* Unknown bit needs to be set */1533});1534emit_inst(c, &(struct etna_inst) {1535.opcode = INST_OPCODE_MUL,1536.sat = inst->Instruction.Saturate,1537.dst = convert_dst(c, &inst->Dst[0]),1538.src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),1539.src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),1540});15411542} else if (c->specs->has_sin_cos_sqrt) {1543struct etna_native_reg temp = etna_compile_get_inner_temp(c);1544/* add divide by PI/2, using a temp register. GC20001545* fails with src==dst for the trig instruction. */1546emit_inst(c, &(struct etna_inst) {1547.opcode = INST_OPCODE_MUL,1548.sat = 0,1549.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |1550INST_COMPS_Z | INST_COMPS_W),1551.src[0] = src[0], /* any swizzling happens here */1552.src[1] = alloc_imm_f32(c, 2.0f / M_PI),1553});1554emit_inst(c, &(struct etna_inst) {1555.opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS1556? INST_OPCODE_COS1557: INST_OPCODE_SIN,1558.sat = inst->Instruction.Saturate,1559.dst = convert_dst(c, &inst->Dst[0]),1560.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),1561});1562} else {1563/* Implement Nick's fast sine/cosine. Taken from:1564* http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/96481565* A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)1566* MAD t.x_zw, src.xxxx, A, B1567* FRC t.x_z_, void, void, t.xwzw1568* MAD t.x_z_, t.xwzw, 2, -11569* MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs)1570* DP3 t.x_z_, t.zyww, C, void (for sin)1571* DP3 t.__z_, t.zyww, C, void (for scs)1572* MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs)1573* DP3 t.x_z_, t.xyww, C, void (for cos)1574* DP3 t.x___, t.xyww, C, void (for scs)1575* MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz1576* MAD dst, t.ywyw, .2225, t.xzxz1577*/1578struct etna_inst *p, ins[9] = { };1579struct etna_native_reg t0 = etna_compile_get_inner_temp(c);1580struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);1581struct etna_inst_src sincos[3], in = src[0];1582sincos[0] = etna_imm_vec4f(c, sincos_const[0]);1583sincos[1] = etna_imm_vec4f(c, sincos_const[1]);15841585/* A uniform source will cause the inner temp limit to1586* be exceeded. Explicitly deal with that scenario.1587*/1588if (etna_rgroup_is_uniform(src[0].rgroup)) {1589struct etna_inst ins = { };1590ins.opcode = INST_OPCODE_MOV;1591ins.dst = etna_native_to_dst(t0, INST_COMPS_X);1592ins.src[2] = in;1593emit_inst(c, &ins);1594in = t0s;1595}15961597ins[0].opcode = INST_OPCODE_MAD;1598ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);1599ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));1600ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */1601ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */16021603ins[1].opcode = INST_OPCODE_FRC;1604ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);1605ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));16061607ins[2].opcode = INST_OPCODE_MAD;1608ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);1609ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));1610ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */1611ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */16121613unsigned mul_swiz, dp3_swiz;1614if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {1615mul_swiz = SWIZZLE(W, Z, W, W);1616dp3_swiz = SWIZZLE(Z, Y, W, W);1617} else {1618mul_swiz = SWIZZLE(W, X, W, W);1619dp3_swiz = SWIZZLE(X, Y, W, W);1620}16211622ins[3].opcode = INST_OPCODE_MUL;1623ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);1624ins[3].src[0] = swizzle(t0s, mul_swiz);1625ins[3].src[1] = absolute(ins[3].src[0]);16261627ins[4].opcode = INST_OPCODE_DP3;1628ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);1629ins[4].src[0] = swizzle(t0s, dp3_swiz);1630ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));16311632p = &ins[5];1633p->opcode = INST_OPCODE_MAD;1634p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);1635p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));1636p->src[1] = absolute(p->src[0]);1637p->src[2] = negate(p->src[0]);16381639p++;1640p->opcode = INST_OPCODE_MAD;1641p->sat = inst->Instruction.Saturate;1642p->dst = convert_dst(c, &inst->Dst[0]),1643p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));1644p->src[1] = alloc_imm_f32(c, 0.2225);1645p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));16461647for (int i = 0; &ins[i] <= p; i++)1648emit_inst(c, &ins[i]);1649}1650}16511652static void1653trans_lg2(const struct instr_translater *t, struct etna_compile *c,1654const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1655{1656if (c->specs->has_new_transcendentals) {1657/* On newer chips alternative LOG instruction is implemented,1658* which outputs an x and y component, which need to be multiplied to1659* get the result.1660*/1661struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */1662emit_inst(c, &(struct etna_inst) {1663.opcode = INST_OPCODE_LOG,1664.sat = 0,1665.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),1666.src[2] = src[0],1667.tex = { .amode=1 }, /* Unknown bit needs to be set */1668});1669emit_inst(c, &(struct etna_inst) {1670.opcode = INST_OPCODE_MUL,1671.sat = inst->Instruction.Saturate,1672.dst = convert_dst(c, &inst->Dst[0]),1673.src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),1674.src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),1675});1676} else {1677emit_inst(c, &(struct etna_inst) {1678.opcode = INST_OPCODE_LOG,1679.sat = inst->Instruction.Saturate,1680.dst = convert_dst(c, &inst->Dst[0]),1681.src[2] = src[0],1682});1683}1684}16851686static void1687trans_sampler(const struct instr_translater *t, struct etna_compile *c,1688const struct tgsi_full_instruction *inst,1689struct etna_inst_src *src)1690{1691/* There is no native support for GL texture rectangle coordinates, so1692* we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */1693if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {1694uint32_t unit = inst->Src[1].Register.Index;1695struct etna_inst ins[2] = { };1696struct etna_native_reg temp = etna_compile_get_inner_temp(c);16971698ins[0].opcode = INST_OPCODE_MUL;1699ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);1700ins[0].src[0] = src[0];1701ins[0].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_X, unit);17021703ins[1].opcode = INST_OPCODE_MUL;1704ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);1705ins[1].src[0] = src[0];1706ins[1].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_Y, unit);17071708emit_inst(c, &ins[0]);1709emit_inst(c, &ins[1]);17101711src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */1712}17131714switch (inst->Instruction.Opcode) {1715case TGSI_OPCODE_TEX:1716emit_inst(c, &(struct etna_inst) {1717.opcode = INST_OPCODE_TEXLD,1718.sat = 0,1719.dst = convert_dst(c, &inst->Dst[0]),1720.tex = convert_tex(c, &inst->Src[1], &inst->Texture),1721.src[0] = src[0],1722});1723break;17241725case TGSI_OPCODE_TXB:1726emit_inst(c, &(struct etna_inst) {1727.opcode = INST_OPCODE_TEXLDB,1728.sat = 0,1729.dst = convert_dst(c, &inst->Dst[0]),1730.tex = convert_tex(c, &inst->Src[1], &inst->Texture),1731.src[0] = src[0],1732});1733break;17341735case TGSI_OPCODE_TXL:1736emit_inst(c, &(struct etna_inst) {1737.opcode = INST_OPCODE_TEXLDL,1738.sat = 0,1739.dst = convert_dst(c, &inst->Dst[0]),1740.tex = convert_tex(c, &inst->Src[1], &inst->Texture),1741.src[0] = src[0],1742});1743break;17441745case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */1746struct etna_native_reg temp = etna_compile_get_inner_temp(c);17471748emit_inst(c, &(struct etna_inst) {1749.opcode = INST_OPCODE_RCP,1750.sat = 0,1751.dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */1752.src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),1753});1754emit_inst(c, &(struct etna_inst) {1755.opcode = INST_OPCODE_MUL,1756.sat = 0,1757.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |1758INST_COMPS_Z), /* tmp.xyz */1759.src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),1760.src[1] = src[0], /* src.xyzw */1761});1762emit_inst(c, &(struct etna_inst) {1763.opcode = INST_OPCODE_TEXLD,1764.sat = 0,1765.dst = convert_dst(c, &inst->Dst[0]),1766.tex = convert_tex(c, &inst->Src[1], &inst->Texture),1767.src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */1768});1769} break;17701771default:1772BUG("Unhandled instruction %s",1773tgsi_get_opcode_name(inst->Instruction.Opcode));1774assert(0);1775break;1776}1777}17781779static void1780trans_dummy(const struct instr_translater *t, struct etna_compile *c,1781const struct tgsi_full_instruction *inst, struct etna_inst_src *src)1782{1783/* nothing to do */1784}17851786static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {1787#define INSTR(n, f, ...) \1788[TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}17891790INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),1791INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),1792INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),1793INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),1794INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),1795INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),1796INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),1797INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),1798INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),1799INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),1800INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),1801INSTR(LG2, trans_lg2),1802INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),1803INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),1804INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),1805INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),1806INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),18071808INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),1809INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),18101811INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),1812INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),18131814INSTR(IF, trans_if),1815INSTR(ELSE, trans_else),1816INSTR(ENDIF, trans_endif),18171818INSTR(BGNLOOP, trans_loop_bgn),1819INSTR(ENDLOOP, trans_loop_end),1820INSTR(BRK, trans_brk),1821INSTR(CONT, trans_cont),18221823INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),1824INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),18251826INSTR(ARL, trans_arl),1827INSTR(LRP, trans_lrp),1828INSTR(LIT, trans_lit),1829INSTR(SSG, trans_ssg),18301831INSTR(SIN, trans_trig),1832INSTR(COS, trans_trig),18331834INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),1835INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),1836INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),1837INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),1838INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),1839INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),18401841INSTR(TEX, trans_sampler),1842INSTR(TXB, trans_sampler),1843INSTR(TXL, trans_sampler),1844INSTR(TXP, trans_sampler),18451846INSTR(NOP, trans_dummy),1847INSTR(END, trans_dummy),1848};18491850/* Pass -- compile instructions */1851static void1852etna_compile_pass_generate_code(struct etna_compile *c)1853{1854struct tgsi_parse_context ctx = { };1855ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);1856assert(status == TGSI_PARSE_OK);18571858int inst_idx = 0;1859while (!tgsi_parse_end_of_tokens(&ctx)) {1860const struct tgsi_full_instruction *inst = 0;18611862/* No inner temps used yet for this instruction, clear counter */1863c->inner_temps = 0;18641865tgsi_parse_token(&ctx);18661867switch (ctx.FullToken.Token.Type) {1868case TGSI_TOKEN_TYPE_INSTRUCTION:1869/* iterate over operands */1870inst = &ctx.FullToken.FullInstruction;1871if (c->dead_inst[inst_idx]) { /* skip dead instructions */1872inst_idx++;1873continue;1874}18751876/* Lookup the TGSI information and generate the source arguments */1877struct etna_inst_src src[ETNA_NUM_SRC];1878memset(src, 0, sizeof(src));18791880const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);18811882for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {1883const struct tgsi_full_src_register *reg = &inst->Src[i];1884const struct etna_reg_desc *srcreg = etna_get_src_reg(c, reg->Register);1885const struct etna_native_reg *n = &srcreg->native;18861887if (!n->valid || n->is_tex)1888continue;18891890src[i] = etna_create_src(reg, n);18911892/*1893* Replace W=1.0 for point sprite coordinates, since hardware1894* can only replace X,Y and leaves Z,W=0,0 instead of Z,W=0,11895*/1896if (srcreg && srcreg->has_semantic &&1897srcreg->semantic.Name == TGSI_SEMANTIC_TEXCOORD &&1898(c->key->sprite_coord_enable & BITFIELD_BIT(srcreg->semantic.Index))) {1899emit_inst(c, &(struct etna_inst) {1900.opcode = INST_OPCODE_SET,1901.cond = INST_CONDITION_TRUE,1902.dst = etna_native_to_dst(srcreg->native, INST_COMPS_W),1903});1904}1905}19061907const unsigned opc = inst->Instruction.Opcode;1908const struct instr_translater *t = &translaters[opc];19091910if (t->fxn) {1911t->fxn(t, c, inst, src);19121913inst_idx += 1;1914} else {1915BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));1916assert(0);1917}1918break;1919}1920}1921tgsi_parse_free(&ctx);1922}19231924/* Look up register by semantic */1925static struct etna_reg_desc *1926find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)1927{1928for (int idx = 0; idx < c->file[file].reg_size; ++idx) {1929struct etna_reg_desc *reg = &c->file[file].reg[idx];19301931if (reg->semantic.Name == name && reg->semantic.Index == index)1932return reg;1933}19341935return NULL; /* not found */1936}19371938/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:1939* - this is a vertex shader1940* - and this is an older GPU1941*/1942static void1943etna_compile_add_z_div_if_needed(struct etna_compile *c)1944{1945if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {1946/* find position out */1947struct etna_reg_desc *pos_reg =1948find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);19491950if (pos_reg != NULL) {1951/*1952* ADD tX.__z_, tX.zzzz, void, tX.wwww1953* MUL tX.__z_, tX.zzzz, 0.5, void1954*/1955emit_inst(c, &(struct etna_inst) {1956.opcode = INST_OPCODE_ADD,1957.dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),1958.src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),1959.src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),1960});1961emit_inst(c, &(struct etna_inst) {1962.opcode = INST_OPCODE_MUL,1963.dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),1964.src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),1965.src[1] = alloc_imm_f32(c, 0.5f),1966});1967}1968}1969}19701971static void1972etna_compile_frag_rb_swap(struct etna_compile *c)1973{1974if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {1975/* find color out */1976struct etna_reg_desc *color_reg =1977find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);19781979emit_inst(c, &(struct etna_inst) {1980.opcode = INST_OPCODE_MOV,1981.dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),1982.src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),1983});1984}1985}19861987/** add a NOP to the shader if1988* a) the shader is empty1989* or1990* b) there is a label at the end of the shader1991*/1992static void1993etna_compile_add_nop_if_needed(struct etna_compile *c)1994{1995bool label_at_last_inst = false;19961997for (int idx = 0; idx < c->labels_count; ++idx) {1998if (c->labels[idx].inst_idx == c->inst_ptr)1999label_at_last_inst = true;20002001}20022003if (c->inst_ptr == 0 || label_at_last_inst)2004emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});2005}20062007static void2008assign_uniforms(struct etna_compile_file *file, unsigned base)2009{2010for (int idx = 0; idx < file->reg_size; ++idx) {2011file->reg[idx].native.valid = 1;2012file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;2013file->reg[idx].native.id = base + idx;2014}2015}20162017/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).2018* CONST must be consecutive as const buffers are supposed to be consecutive,2019* and before IMM, as this is2020* more convenient because is possible for the compilation process itself to2021* generate extra2022* immediates for constants such as pi, one, zero.2023*/2024static void2025assign_constants_and_immediates(struct etna_compile *c)2026{2027assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);2028/* immediates start after the constants */2029c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;2030assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);2031DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,2032c->imm_size);2033}20342035/* Assign declared samplers to native texture units */2036static void2037assign_texture_units(struct etna_compile *c)2038{2039uint tex_base = 0;20402041if (c->info.processor == PIPE_SHADER_VERTEX)2042tex_base = c->specs->vertex_sampler_offset;20432044for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {2045c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;2046c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup2047c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;2048}2049}20502051/* Additional pass to fill in branch targets. This pass should be last2052* as no instruction reordering or removing/addition can be done anymore2053* once the branch targets are computed.2054*/2055static void2056etna_compile_fill_in_labels(struct etna_compile *c)2057{2058for (int idx = 0; idx < c->inst_ptr; ++idx) {2059if (c->lbl_usage[idx] != -1)2060etna_assemble_set_imm(&c->code[idx * 4],2061c->labels[c->lbl_usage[idx]].inst_idx);2062}2063}20642065/* compare two etna_native_reg structures, return true if equal */2066static bool2067cmp_etna_native_reg(const struct etna_native_reg to,2068const struct etna_native_reg from)2069{2070return to.valid == from.valid && to.is_tex == from.is_tex &&2071to.rgroup == from.rgroup && to.id == from.id;2072}20732074/* go through all declarations and swap native registers *to* and *from* */2075static void2076swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,2077const struct etna_native_reg from)2078{2079if (cmp_etna_native_reg(from, to))2080return; /* Nothing to do */20812082for (int idx = 0; idx < c->total_decls; ++idx) {2083if (cmp_etna_native_reg(c->decl[idx].native, from)) {2084c->decl[idx].native = to;2085} else if (cmp_etna_native_reg(c->decl[idx].native, to)) {2086c->decl[idx].native = from;2087}2088}2089}20902091/* For PS we need to permute so that inputs are always in temporary 0..N-1.2092* Semantic POS is always t0. If that semantic is not used, avoid t0.2093*/2094static void2095permute_ps_inputs(struct etna_compile *c)2096{2097/* Special inputs:2098* gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION2099* gl_FrontFacing VARYING_SLOT_FACE TGSI_SEMANTIC_FACE2100* gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD2101* gl_TexCoord VARYING_SLOT_TEX TGSI_SEMANTIC_TEXCOORD2102*/2103uint native_idx = 1;21042105for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {2106struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];2107uint input_id;2108assert(reg->has_semantic);21092110if (!reg->active ||2111reg->semantic.Name == TGSI_SEMANTIC_POSITION ||2112reg->semantic.Name == TGSI_SEMANTIC_FACE)2113continue;21142115input_id = native_idx++;2116swap_native_registers(c, etna_native_temp(input_id),2117c->file[TGSI_FILE_INPUT].reg[idx].native);2118}21192120c->num_varyings = native_idx - 1;21212122if (native_idx > c->next_free_native)2123c->next_free_native = native_idx;2124}21252126static inline int sem2slot(const struct tgsi_declaration_semantic *semantic)2127{2128return tgsi_varying_semantic_to_slot(semantic->Name, semantic->Index);2129}21302131/* fill in ps inputs into shader object */2132static void2133fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)2134{2135struct etna_shader_io_file *sf = &sobj->infile;21362137sf->num_reg = 0;21382139for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {2140struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];21412142if (reg->native.id > 0) {2143assert(sf->num_reg < ETNA_NUM_INPUTS);2144sf->reg[sf->num_reg].reg = reg->native.id;2145sf->reg[sf->num_reg].slot = sem2slot(®->semantic);2146/* convert usage mask to number of components (*=wildcard)2147* .r (0..1) -> 1 component2148* .*g (2..3) -> 2 component2149* .**b (4..7) -> 3 components2150* .***a (8..15) -> 4 components2151*/2152sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);2153sf->num_reg++;2154}2155}21562157assert(sf->num_reg == c->num_varyings);2158sobj->input_count_unk8 = 31; /* XXX what is this */2159}21602161/* fill in output mapping for ps into shader object */2162static void2163fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)2164{2165sobj->outfile.num_reg = 0;21662167for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {2168struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];21692170switch (reg->semantic.Name) {2171case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */2172sobj->ps_color_out_reg = reg->native.id;2173break;2174case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */2175sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */2176break;2177default:2178assert(0); /* only outputs supported are COLOR and POSITION at the moment */2179}2180}2181}21822183/* fill in inputs for vs into shader object */2184static void2185fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)2186{2187struct etna_shader_io_file *sf = &sobj->infile;21882189sf->num_reg = 0;2190for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {2191struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];2192assert(sf->num_reg < ETNA_NUM_INPUTS);21932194if (!reg->native.valid)2195continue;21962197/* XXX exclude inputs with special semantics such as gl_frontFacing */2198sf->reg[sf->num_reg].reg = reg->native.id;2199sf->reg[sf->num_reg].slot = sem2slot(®->semantic);2200sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);2201sf->num_reg++;2202}22032204sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */2205}22062207/* fill in outputs for vs into shader object */2208static void2209fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)2210{2211struct etna_shader_io_file *sf = &sobj->outfile;22122213sf->num_reg = 0;2214for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {2215struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];2216assert(sf->num_reg < ETNA_NUM_INPUTS);22172218switch (reg->semantic.Name) {2219case TGSI_SEMANTIC_POSITION:2220sobj->vs_pos_out_reg = reg->native.id;2221break;2222case TGSI_SEMANTIC_PSIZE:2223sobj->vs_pointsize_out_reg = reg->native.id;2224break;2225default:2226sf->reg[sf->num_reg].reg = reg->native.id;2227sf->reg[sf->num_reg].slot = sem2slot(®->semantic);2228sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;2229sf->num_reg++;2230}2231}22322233/* fill in "mystery meat" load balancing value. This value determines how2234* work is scheduled between VS and PS2235* in the unified shader architecture. More precisely, it is determined from2236* the number of VS outputs, as well as chip-specific2237* vertex output buffer size, vertex cache size, and the number of shader2238* cores.2239*2240* XXX this is a conservative estimate, the "optimal" value is only known for2241* sure at link time because some2242* outputs may be unused and thus unmapped. Then again, in the general use2243* case with GLSL the vertex and fragment2244* shaders are linked already before submitting to Gallium, thus all outputs2245* are used.2246*/2247int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;2248assert(half_out);22492250uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -22512 * half_out * c->specs->vertex_cache_size)) +22529) /225310;2254uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;2255sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |2256VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |2257VIVS_VS_LOAD_BALANCING_C(0x3f) |2258VIVS_VS_LOAD_BALANCING_D(0x0f);2259}22602261static bool2262etna_compile_check_limits(struct etna_compile *c)2263{2264int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)2265? c->specs->max_vs_uniforms2266: c->specs->max_ps_uniforms;2267/* round up number of uniforms, including immediates, in units of four */2268int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;22692270if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {2271DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,2272c->specs->max_instructions);2273return false;2274}22752276if (c->next_free_native > c->specs->max_registers) {2277DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,2278c->specs->max_registers);2279return false;2280}22812282if (num_uniforms > max_uniforms) {2283DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,2284max_uniforms);2285return false;2286}22872288if (c->num_varyings > c->specs->max_varyings) {2289DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,2290c->specs->max_varyings);2291return false;2292}22932294if (c->imm_base > c->specs->num_constants) {2295DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,2296c->specs->num_constants);2297}22982299return true;2300}23012302static void2303copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)2304{2305uint32_t count = c->imm_base + c->imm_size;2306struct etna_shader_uniform_info *uinfo = &sobj->uniforms;23072308uinfo->count = count;23092310uinfo->data = malloc(count * sizeof(*c->imm_data));2311for (unsigned i = 0; i < c->imm_base; i++)2312uinfo->data[i] = i;2313memcpy(&uinfo->data[c->imm_base], c->imm_data, c->imm_size * sizeof(*c->imm_data));23142315uinfo->contents = malloc(count * sizeof(*c->imm_contents));2316for (unsigned i = 0; i < c->imm_base; i++)2317uinfo->contents[i] = ETNA_UNIFORM_UNIFORM;2318memcpy(&uinfo->contents[c->imm_base], c->imm_contents, c->imm_size * sizeof(*c->imm_contents));23192320etna_set_shader_uniforms_dirty_flags(sobj);2321}23222323bool2324etna_compile_shader(struct etna_shader_variant *v)2325{2326if (DBG_ENABLED(ETNA_DBG_NIR))2327return etna_compile_shader_nir(v);23282329/* Create scratch space that may be too large to fit on stack2330*/2331bool ret;2332struct etna_compile *c;23332334if (unlikely(!v))2335return false;23362337const struct etna_specs *specs = v->shader->specs;23382339struct tgsi_lowering_config lconfig = {2340.lower_FLR = !specs->has_sign_floor_ceil,2341.lower_CEIL = !specs->has_sign_floor_ceil,2342.lower_POW = true,2343.lower_EXP = true,2344.lower_LOG = true,2345.lower_DP2 = !specs->has_halti2_instructions,2346.lower_TRUNC = true,2347};23482349c = CALLOC_STRUCT(etna_compile);2350if (!c)2351return false;23522353memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));23542355const struct tgsi_token *tokens = v->shader->tokens;23562357c->specs = specs;2358c->key = &v->key;2359c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);2360c->free_tokens = !!c->tokens;2361if (!c->tokens) {2362/* no lowering */2363c->tokens = tokens;2364}23652366/* Build a map from gallium register to native registers for files2367* CONST, SAMP, IMM, OUT, IN, TEMP.2368* SAMP will map as-is for fragment shaders, there will be a +8 offset for2369* vertex shaders.2370*/2371/* Pass one -- check register file declarations and immediates */2372etna_compile_parse_declarations(c);23732374etna_allocate_decls(c);23752376/* Pass two -- check usage of temporaries, inputs, outputs */2377etna_compile_pass_check_usage(c);23782379assign_special_inputs(c);23802381/* Assign native temp register to TEMPs */2382assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);23832384/* optimize outputs */2385etna_compile_pass_optimize_outputs(c);23862387/* assign inputs: last usage of input should be <= first usage of temp */2388/* potential optimization case:2389* if single MOV TEMP[y], IN[x] before which temp y is not used, and2390* after which IN[x]2391* is not read, temp[y] can be used as input register as-is2392*/2393/* sort temporaries by first use2394* sort inputs by last usage2395* iterate over inputs, temporaries2396* if last usage of input <= first usage of temp:2397* assign input to temp2398* advance input, temporary pointer2399* else2400* advance temporary pointer2401*2402* potential problem: instruction with multiple inputs of which one is the2403* temp and the other is the input;2404* however, as the temp is not used before this, how would this make2405* sense? uninitialized temporaries have an undefined2406* value, so this would be ok2407*/2408assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);24092410/* assign outputs: first usage of output should be >= last usage of temp */2411/* potential optimization case:2412* if single MOV OUT[x], TEMP[y] (with full write mask, or at least2413* writing all components that are used in2414* the shader) after which temp y is no longer used temp[y] can be2415* used as output register as-is2416*2417* potential problem: instruction with multiple outputs of which one is the2418* temp and the other is the output;2419* however, as the temp is not used after this, how would this make2420* sense? could just discard the output value2421*/2422/* sort temporaries by last use2423* sort outputs by first usage2424* iterate over outputs, temporaries2425* if first usage of output >= last usage of temp:2426* assign output to temp2427* advance output, temporary pointer2428* else2429* advance temporary pointer2430*/2431assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);24322433assign_constants_and_immediates(c);2434assign_texture_units(c);24352436/* list declarations */2437for (int x = 0; x < c->total_decls; ++x) {2438DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "2439"last_use=%i native=%i usage_mask=%x "2440"has_semantic=%i",2441x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,2442c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,2443c->decl[x].native.valid ? c->decl[x].native.id : -1,2444c->decl[x].usage_mask, c->decl[x].has_semantic);2445if (c->decl[x].has_semantic)2446DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",2447tgsi_semantic_names[c->decl[x].semantic.Name],2448c->decl[x].semantic.Index);2449}2450/* XXX for PS we need to permute so that inputs are always in temporary2451* 0..N-1.2452* There is no "switchboard" for varyings (AFAIK!). The output color,2453* however, can be routed2454* from an arbitrary temporary.2455*/2456if (c->info.processor == PIPE_SHADER_FRAGMENT)2457permute_ps_inputs(c);245824592460/* list declarations */2461for (int x = 0; x < c->total_decls; ++x) {2462DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "2463"last_use=%i native=%i usage_mask=%x "2464"has_semantic=%i",2465x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,2466c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,2467c->decl[x].native.valid ? c->decl[x].native.id : -1,2468c->decl[x].usage_mask, c->decl[x].has_semantic);2469if (c->decl[x].has_semantic)2470DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",2471tgsi_semantic_names[c->decl[x].semantic.Name],2472c->decl[x].semantic.Index);2473}24742475/* pass 3: generate instructions */2476etna_compile_pass_generate_code(c);2477etna_compile_add_z_div_if_needed(c);2478etna_compile_frag_rb_swap(c);2479etna_compile_add_nop_if_needed(c);24802481ret = etna_compile_check_limits(c);2482if (!ret)2483goto out;24842485etna_compile_fill_in_labels(c);24862487/* fill in output structure */2488v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX;2489v->uses_discard = c->info.uses_kill;2490v->code_size = c->inst_ptr * 4;2491v->code = mem_dup(c->code, c->inst_ptr * 16);2492v->num_loops = c->num_loops;2493v->num_temps = c->next_free_native;2494v->vs_id_in_reg = -1;2495v->vs_pos_out_reg = -1;2496v->vs_pointsize_out_reg = -1;2497v->ps_color_out_reg = -1;2498v->ps_depth_out_reg = -1;2499v->needs_icache = c->inst_ptr > c->specs->max_instructions;2500copy_uniform_state_to_shader(c, v);25012502if (c->info.processor == PIPE_SHADER_VERTEX) {2503fill_in_vs_inputs(v, c);2504fill_in_vs_outputs(v, c);2505} else if (c->info.processor == PIPE_SHADER_FRAGMENT) {2506fill_in_ps_inputs(v, c);2507fill_in_ps_outputs(v, c);2508}25092510out:2511if (c->free_tokens)2512FREE((void *)c->tokens);25132514FREE(c->labels);2515FREE(c);25162517return ret;2518}25192520static const struct etna_shader_inout *2521etna_shader_vs_lookup(const struct etna_shader_variant *sobj,2522const struct etna_shader_inout *in)2523{2524for (int i = 0; i < sobj->outfile.num_reg; i++)2525if (sobj->outfile.reg[i].slot == in->slot)2526return &sobj->outfile.reg[i];25272528return NULL;2529}25302531bool2532etna_link_shader(struct etna_shader_link_info *info,2533const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)2534{2535int comp_ofs = 0;2536/* For each fragment input we need to find the associated vertex shader2537* output, which can be found by matching on semantic name and index. A2538* binary search could be used because the vs outputs are sorted by their2539* semantic index and grouped by semantic type by fill_in_vs_outputs.2540*/2541assert(fs->infile.num_reg < ETNA_NUM_INPUTS);2542info->pcoord_varying_comp_ofs = -1;25432544for (int idx = 0; idx < fs->infile.num_reg; ++idx) {2545const struct etna_shader_inout *fsio = &fs->infile.reg[idx];2546const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);2547struct etna_varying *varying;2548bool interpolate_always = ((fsio->slot != VARYING_SLOT_COL0) &&2549(fsio->slot != VARYING_SLOT_COL1));25502551assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));25522553if (fsio->reg > info->num_varyings)2554info->num_varyings = fsio->reg;25552556varying = &info->varyings[fsio->reg - 1];2557varying->num_components = fsio->num_components;25582559if (!interpolate_always) /* colors affected by flat shading */2560varying->pa_attributes = 0x200;2561else /* texture coord or other bypasses flat shading */2562varying->pa_attributes = 0x2f1;25632564varying->use[0] = VARYING_COMPONENT_USE_UNUSED;2565varying->use[1] = VARYING_COMPONENT_USE_UNUSED;2566varying->use[2] = VARYING_COMPONENT_USE_UNUSED;2567varying->use[3] = VARYING_COMPONENT_USE_UNUSED;25682569/* point/tex coord is an input to the PS without matching VS output,2570* so it gets a varying slot without being assigned a VS register.2571*/2572if (util_varying_is_point_coord(fsio->slot, fs->key.sprite_coord_enable)) {2573varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;2574varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;25752576info->pcoord_varying_comp_ofs = comp_ofs;2577} else {2578if (vsio == NULL) { /* not found -- link error */2579BUG("Semantic value not found in vertex shader outputs\n");2580return true;2581}25822583varying->reg = vsio->reg;2584}25852586comp_ofs += varying->num_components;2587}25882589assert(info->num_varyings == fs->infile.num_reg);25902591return false;2592}259325942595