Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_compiler.c
4574 views
/*1* Copyright 2009 Nicolai Hähnle <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* on the rights to use, copy, modify, merge, publish, distribute, sub7* license, and/or sell copies of the Software, and to permit persons to whom8* the Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,18* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR19* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE20* USE OR OTHER DEALINGS IN THE SOFTWARE. */2122#include "radeon_compiler.h"2324#include <stdarg.h>25#include <stdio.h>26#include <stdlib.h>2728#include "radeon_dataflow.h"29#include "radeon_program.h"30#include "radeon_program_pair.h"31#include "radeon_regalloc.h"32#include "radeon_compiler_util.h"333435void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)36{37memset(c, 0, sizeof(*c));3839memory_pool_init(&c->Pool);40c->Program.Instructions.Prev = &c->Program.Instructions;41c->Program.Instructions.Next = &c->Program.Instructions;42c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;43c->regalloc_state = rs;44}4546void rc_destroy(struct radeon_compiler * c)47{48rc_constants_destroy(&c->Program.Constants);49memory_pool_destroy(&c->Pool);50free(c->ErrorMsg);51}5253void rc_debug(struct radeon_compiler * c, const char * fmt, ...)54{55va_list ap;5657if (!(c->Debug & RC_DBG_LOG))58return;5960va_start(ap, fmt);61vfprintf(stderr, fmt, ap);62va_end(ap);63}6465void rc_error(struct radeon_compiler * c, const char * fmt, ...)66{67va_list ap;6869c->Error = 1;7071if (!c->ErrorMsg) {72/* Only remember the first error */73char buf[1024];74int written;7576va_start(ap, fmt);77written = vsnprintf(buf, sizeof(buf), fmt, ap);78va_end(ap);7980if (written < sizeof(buf)) {81c->ErrorMsg = strdup(buf);82} else {83c->ErrorMsg = malloc(written + 1);8485va_start(ap, fmt);86vsnprintf(c->ErrorMsg, written + 1, fmt, ap);87va_end(ap);88}89}9091if (c->Debug & RC_DBG_LOG) {92fprintf(stderr, "r300compiler error: ");9394va_start(ap, fmt);95vfprintf(stderr, fmt, ap);96va_end(ap);97}98}99100int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)101{102rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);103return 1;104}105106/**107* Recompute c->Program.InputsRead and c->Program.OutputsWritten108* based on which inputs and outputs are actually referenced109* in program instructions.110*/111void rc_calculate_inputs_outputs(struct radeon_compiler * c)112{113struct rc_instruction *inst;114115c->Program.InputsRead = 0;116c->Program.OutputsWritten = 0;117118for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)119{120const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);121int i;122123for (i = 0; i < opcode->NumSrcRegs; ++i) {124if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)125c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;126}127128if (opcode->HasDstReg) {129if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)130c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;131}132}133}134135/**136* Rewrite the program such that everything that source the given input137* register will source new_input instead.138*/139void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)140{141struct rc_instruction * inst;142143c->Program.InputsRead &= ~(1 << input);144145for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {146const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);147unsigned i;148149for(i = 0; i < opcode->NumSrcRegs; ++i) {150if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {151inst->U.I.SrcReg[i].File = new_input.File;152inst->U.I.SrcReg[i].Index = new_input.Index;153inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);154if (!inst->U.I.SrcReg[i].Abs) {155inst->U.I.SrcReg[i].Negate ^= new_input.Negate;156inst->U.I.SrcReg[i].Abs = new_input.Abs;157}158159c->Program.InputsRead |= 1 << new_input.Index;160}161}162}163}164165166/**167* Rewrite the program such that everything that writes into the given168* output register will instead write to new_output. The new_output169* writemask is honoured.170*/171void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)172{173struct rc_instruction * inst;174175c->Program.OutputsWritten &= ~(1 << output);176177for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {178const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);179180if (opcode->HasDstReg) {181if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {182inst->U.I.DstReg.Index = new_output;183inst->U.I.DstReg.WriteMask &= writemask;184185c->Program.OutputsWritten |= 1 << new_output;186}187}188}189}190191192/**193* Rewrite the program such that a given output is duplicated.194*/195void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)196{197unsigned tempreg = rc_find_free_temporary(c);198struct rc_instruction * inst;199200for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {201const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);202203if (opcode->HasDstReg) {204if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {205inst->U.I.DstReg.File = RC_FILE_TEMPORARY;206inst->U.I.DstReg.Index = tempreg;207}208}209}210211inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);212inst->U.I.Opcode = RC_OPCODE_MOV;213inst->U.I.DstReg.File = RC_FILE_OUTPUT;214inst->U.I.DstReg.Index = output;215216inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;217inst->U.I.SrcReg[0].Index = tempreg;218inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;219220inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);221inst->U.I.Opcode = RC_OPCODE_MOV;222inst->U.I.DstReg.File = RC_FILE_OUTPUT;223inst->U.I.DstReg.Index = dup_output;224225inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;226inst->U.I.SrcReg[0].Index = tempreg;227inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;228229c->Program.OutputsWritten |= 1 << dup_output;230}231232233/**234* Introduce standard code fragment to deal with fragment.position.235*/236void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,237int full_vtransform)238{239unsigned tempregi = rc_find_free_temporary(c);240struct rc_instruction * inst_rcp;241struct rc_instruction * inst_mul;242struct rc_instruction * inst_mad;243struct rc_instruction * inst;244245c->Program.InputsRead &= ~(1 << wpos);246c->Program.InputsRead |= 1 << new_input;247248/* perspective divide */249inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);250inst_rcp->U.I.Opcode = RC_OPCODE_RCP;251252inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;253inst_rcp->U.I.DstReg.Index = tempregi;254inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;255256inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;257inst_rcp->U.I.SrcReg[0].Index = new_input;258inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;259260inst_mul = rc_insert_new_instruction(c, inst_rcp);261inst_mul->U.I.Opcode = RC_OPCODE_MUL;262263inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;264inst_mul->U.I.DstReg.Index = tempregi;265inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;266267inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;268inst_mul->U.I.SrcReg[0].Index = new_input;269270inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;271inst_mul->U.I.SrcReg[1].Index = tempregi;272inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;273274/* viewport transformation */275inst_mad = rc_insert_new_instruction(c, inst_mul);276inst_mad->U.I.Opcode = RC_OPCODE_MAD;277278inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;279inst_mad->U.I.DstReg.Index = tempregi;280inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;281282inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;283inst_mad->U.I.SrcReg[0].Index = tempregi;284inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;285286inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;287inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;288289inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;290inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;291292if (full_vtransform) {293inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);294inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);295} else {296inst_mad->U.I.SrcReg[1].Index =297inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);298}299300for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {301const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);302unsigned i;303304for(i = 0; i < opcode->NumSrcRegs; i++) {305if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&306inst->U.I.SrcReg[i].Index == wpos) {307inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;308inst->U.I.SrcReg[i].Index = tempregi;309}310}311}312}313314315/**316* The FACE input in hardware contains 1 if it's a back face, 0 otherwise.317* Gallium and OpenGL define it the other way around.318*319* So let's just negate FACE at the beginning of the shader and rewrite the rest320* of the shader to read from the newly allocated temporary.321*/322void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)323{324unsigned tempregi = rc_find_free_temporary(c);325struct rc_instruction *inst_add;326struct rc_instruction *inst;327328/* perspective divide */329inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);330inst_add->U.I.Opcode = RC_OPCODE_ADD;331332inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;333inst_add->U.I.DstReg.Index = tempregi;334inst_add->U.I.DstReg.WriteMask = RC_MASK_X;335336inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;337inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;338339inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;340inst_add->U.I.SrcReg[1].Index = face;341inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;342inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;343344for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {345const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);346unsigned i;347348for(i = 0; i < opcode->NumSrcRegs; i++) {349if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&350inst->U.I.SrcReg[i].Index == face) {351inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;352inst->U.I.SrcReg[i].Index = tempregi;353}354}355}356}357358static void reg_count_callback(void * userdata, struct rc_instruction * inst,359rc_register_file file, unsigned int index, unsigned int mask)360{361struct rc_program_stats *s = userdata;362if (file == RC_FILE_TEMPORARY)363(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;364if (file == RC_FILE_INLINE)365s->num_inline_literals++;366}367368void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)369{370struct rc_instruction * tmp;371memset(s, 0, sizeof(*s));372373for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;374tmp = tmp->Next){375const struct rc_opcode_info * info;376rc_for_all_reads_mask(tmp, reg_count_callback, s);377if (tmp->Type == RC_INSTRUCTION_NORMAL) {378info = rc_get_opcode_info(tmp->U.I.Opcode);379if (info->Opcode == RC_OPCODE_BEGIN_TEX)380continue;381if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)382s->num_presub_ops++;383} else {384if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)385s->num_presub_ops++;386if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)387s->num_presub_ops++;388/* Assuming alpha will never be a flow control or389* a tex instruction. */390if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)391s->num_alpha_insts++;392if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)393s->num_rgb_insts++;394if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&395tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {396s->num_omod_ops++;397}398if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&399tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {400s->num_omod_ops++;401}402info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);403}404if (info->IsFlowControl)405s->num_fc_insts++;406if (info->HasTexture)407s->num_tex_insts++;408s->num_insts++;409}410/* Increment here because the reg_count_callback store the max411* temporary reg index in s->nun_temp_regs. */412s->num_temp_regs++;413}414415static void print_stats(struct radeon_compiler * c)416{417struct rc_program_stats s;418419if (c->initial_num_insts <= 5)420return;421422rc_get_stats(c, &s);423424switch (c->type) {425case RC_VERTEX_PROGRAM:426fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"427"~%4u Instructions\n"428"~%4u Flow Control Instructions\n"429"~%4u Temporary Registers\n"430"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",431s.num_insts, s.num_fc_insts, s.num_temp_regs);432break;433434case RC_FRAGMENT_PROGRAM:435fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"436"~%4u Instructions\n"437"~%4u Vector Instructions (RGB)\n"438"~%4u Scalar Instructions (Alpha)\n"439"~%4u Flow Control Instructions\n"440"~%4u Texture Instructions\n"441"~%4u Presub Operations\n"442"~%4u OMOD Operations\n"443"~%4u Temporary Registers\n"444"~%4u Inline Literals\n"445"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",446s.num_insts, s.num_rgb_insts, s.num_alpha_insts,447s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,448s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);449break;450default:451assert(0);452}453}454455static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {456"Vertex Program",457"Fragment Program"458};459460void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)461{462for (unsigned i = 0; list[i].name; i++) {463if (list[i].predicate) {464list[i].run(c, list[i].user);465466if (c->Error)467return;468469if ((c->Debug & RC_DBG_LOG) && list[i].dump) {470fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);471rc_print_program(&c->Program);472}473}474}475}476477/* Executes a list of compiler passes given in the parameter 'list'. */478void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)479{480struct rc_program_stats s;481482rc_get_stats(c, &s);483c->initial_num_insts = s.num_insts;484485if (c->Debug & RC_DBG_LOG) {486fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);487rc_print_program(&c->Program);488}489490rc_run_compiler_passes(c, list);491492if (c->Debug & RC_DBG_STATS)493print_stats(c);494}495496void rc_validate_final_shader(struct radeon_compiler *c, void *user)497{498/* Check the number of constants. */499if (c->Program.Constants.Count > c->max_constants) {500rc_error(c, "Too many constants. Max: %i, Got: %i\n",501c->max_constants, c->Program.Constants.Count);502}503}504505506