Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_program_tex.c
4574 views
/*1* Copyright (C) 2010 Corbin Simpson2* Copyright (C) 2010 Marek Olšák <[email protected]>3*4* All Rights Reserved.5*6* Permission is hereby granted, free of charge, to any person obtaining7* a copy of this software and associated documentation files (the8* "Software"), to deal in the Software without restriction, including9* without limitation the rights to use, copy, modify, merge, publish,10* distribute, sublicense, and/or sell copies of the Software, and to11* permit persons to whom the Software is furnished to do so, subject to12* the following conditions:13*14* The above copyright notice and this permission notice (including the15* next paragraph) shall be included in all copies or substantial16* portions of the Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,19* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF20* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.21* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE22* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION23* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION24* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.25*26*/2728#include "radeon_program_tex.h"2930#include "radeon_compiler_util.h"3132/* Series of transformations to be done on textures. */3334static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,35int tmu)36{37struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };3839reg.File = RC_FILE_NONE;40reg.Swizzle = combine_swizzles(RC_SWIZZLE_0000,41compiler->state.unit[tmu].texture_swizzle);42return reg;43}4445static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,46int tmu)47{48struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };4950reg.File = RC_FILE_NONE;51reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,52compiler->state.unit[tmu].texture_swizzle);53return reg;54}5556static void scale_texcoords(struct r300_fragment_program_compiler *compiler,57struct rc_instruction *inst,58unsigned state_constant)59{60struct rc_instruction *inst_mov;6162unsigned temp = rc_find_free_temporary(&compiler->Base);6364inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);6566inst_mov->U.I.Opcode = RC_OPCODE_MUL;67inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;68inst_mov->U.I.DstReg.Index = temp;69inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];70inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;71inst_mov->U.I.SrcReg[1].Index =72rc_constants_add_state(&compiler->Base.Program.Constants,73state_constant, inst->U.I.TexSrcUnit);7475reset_srcreg(&inst->U.I.SrcReg[0]);76inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;77inst->U.I.SrcReg[0].Index = temp;78}7980static void projective_divide(struct r300_fragment_program_compiler *compiler,81struct rc_instruction *inst)82{83struct rc_instruction *inst_mul, *inst_rcp;8485unsigned temp = rc_find_free_temporary(&compiler->Base);8687inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);88inst_rcp->U.I.Opcode = RC_OPCODE_RCP;89inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;90inst_rcp->U.I.DstReg.Index = temp;91inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;92inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];93/* Because the input can be arbitrarily swizzled,94* read the component mapped to W. */95inst_rcp->U.I.SrcReg[0].Swizzle =96RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));9798inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);99inst_mul->U.I.Opcode = RC_OPCODE_MUL;100inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;101inst_mul->U.I.DstReg.Index = temp;102inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];103inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;104inst_mul->U.I.SrcReg[1].Index = temp;105inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;106107reset_srcreg(&inst->U.I.SrcReg[0]);108inst->U.I.Opcode = RC_OPCODE_TEX;109inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;110inst->U.I.SrcReg[0].Index = temp;111}112113/**114* Transform TEX, TXP, TXB, and KIL instructions in the following ways:115* - implement texture compare (shadow extensions)116* - extract non-native source / destination operands117* - premultiply texture coordinates for RECT118* - extract operand swizzles119* - introduce a temporary register when write masks are needed120*/121int radeonTransformTEX(122struct radeon_compiler * c,123struct rc_instruction * inst,124void* data)125{126struct r300_fragment_program_compiler *compiler =127(struct r300_fragment_program_compiler*)data;128rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;129int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||130compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;131132if (inst->U.I.Opcode != RC_OPCODE_TEX &&133inst->U.I.Opcode != RC_OPCODE_TXB &&134inst->U.I.Opcode != RC_OPCODE_TXP &&135inst->U.I.Opcode != RC_OPCODE_TXD &&136inst->U.I.Opcode != RC_OPCODE_TXL &&137inst->U.I.Opcode != RC_OPCODE_KIL)138return 0;139140/* ARB_shadow & EXT_shadow_funcs */141if (inst->U.I.Opcode != RC_OPCODE_KIL &&142((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||143(compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {144rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;145146if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {147inst->U.I.Opcode = RC_OPCODE_MOV;148149if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {150inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);151} else {152inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);153}154155return 1;156} else {157struct rc_instruction * inst_rcp = NULL;158struct rc_instruction *inst_mul, *inst_add, *inst_cmp;159unsigned tmp_texsample;160unsigned tmp_sum;161int pass, fail;162163/* Save the output register. */164struct rc_dst_register output_reg = inst->U.I.DstReg;165unsigned saturate_mode = inst->U.I.SaturateMode;166167/* Redirect TEX to a new temp. */168tmp_texsample = rc_find_free_temporary(c);169inst->U.I.SaturateMode = 0;170inst->U.I.DstReg.File = RC_FILE_TEMPORARY;171inst->U.I.DstReg.Index = tmp_texsample;172inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;173174tmp_sum = rc_find_free_temporary(c);175176if (inst->U.I.Opcode == RC_OPCODE_TXP) {177/* Compute 1/W. */178inst_rcp = rc_insert_new_instruction(c, inst);179inst_rcp->U.I.Opcode = RC_OPCODE_RCP;180inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;181inst_rcp->U.I.DstReg.Index = tmp_sum;182inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;183inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];184inst_rcp->U.I.SrcReg[0].Swizzle =185RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));186}187188/* Divide Z by W (if it's TXP) and saturate. */189inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);190inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;191inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;192inst_mul->U.I.DstReg.Index = tmp_sum;193inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;194inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;195inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];196inst_mul->U.I.SrcReg[0].Swizzle =197RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));198if (inst->U.I.Opcode == RC_OPCODE_TXP) {199inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;200inst_mul->U.I.SrcReg[1].Index = tmp_sum;201inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;202}203204/* Add the depth texture value. */205inst_add = rc_insert_new_instruction(c, inst_mul);206inst_add->U.I.Opcode = RC_OPCODE_ADD;207inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;208inst_add->U.I.DstReg.Index = tmp_sum;209inst_add->U.I.DstReg.WriteMask = RC_MASK_W;210inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;211inst_add->U.I.SrcReg[0].Index = tmp_sum;212inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;213inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;214inst_add->U.I.SrcReg[1].Index = tmp_texsample;215inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;216217/* Note that SrcReg[0] is r, SrcReg[1] is tex and:218* LESS: r < tex <=> -tex+r < 0219* GEQUAL: r >= tex <=> not (-tex+r < 0)220* GREATER: r > tex <=> tex-r < 0221* LEQUAL: r <= tex <=> not ( tex-r < 0)222* EQUAL: GEQUAL223* NOTEQUAL:LESS224*/225226/* This negates either r or tex: */227if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||228comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)229inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;230else231inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;232233/* This negates the whole expresion: */234if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||235comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {236pass = 1;237fail = 2;238} else {239pass = 2;240fail = 1;241}242243inst_cmp = rc_insert_new_instruction(c, inst_add);244inst_cmp->U.I.Opcode = RC_OPCODE_CMP;245inst_cmp->U.I.SaturateMode = saturate_mode;246inst_cmp->U.I.DstReg = output_reg;247inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;248inst_cmp->U.I.SrcReg[0].Index = tmp_sum;249inst_cmp->U.I.SrcReg[0].Swizzle =250combine_swizzles(RC_SWIZZLE_WWWW,251compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);252inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);253inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);254255assert(tmp_texsample != tmp_sum);256}257}258259/* R300 cannot sample from rectangles and the wrap mode fallback needs260* normalized coordinates anyway. */261if (inst->U.I.Opcode != RC_OPCODE_KIL &&262is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {263scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);264inst->U.I.TexSrcTarget = RC_TEXTURE_2D;265}266267/* Divide by W if needed. */268if (inst->U.I.Opcode == RC_OPCODE_TXP &&269(wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||270compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {271projective_divide(compiler, inst);272}273274/* Texture wrap modes don't work on NPOT textures.275*276* Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and277* mirroring are not. If we need to repeat, we do:278*279* MUL temp, texcoord, <scaling factor constant>280* FRC temp, temp ; Discard integer portion of coords281*282* This gives us coords in [0, 1].283*284* Mirroring is trickier. We're going to start out like repeat:285*286* MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes287* MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]288* ; so scale to [0, 1]289* FRC temp, temp ; Make the pattern repeat290* MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]291* ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.292* ; The pattern is backwards, so reverse it (1-x).293*294* This gives us coords in [0, 1].295*296* ~ C & M. ;)297*/298if (inst->U.I.Opcode != RC_OPCODE_KIL &&299wrapmode != RC_WRAP_NONE) {300struct rc_instruction *inst_mov;301unsigned temp = rc_find_free_temporary(c);302303if (wrapmode == RC_WRAP_REPEAT) {304/* Both instructions will be paired up. */305struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);306307inst_frc->U.I.Opcode = RC_OPCODE_FRC;308inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;309inst_frc->U.I.DstReg.Index = temp;310inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;311inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];312} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {313/*314* Function:315* f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)316*317* Code:318* MUL temp, src0, 0.5319* FRC temp, temp320* MAD temp, temp, 2, -1321* ADD temp, 1, -abs(temp)322*/323324struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;325unsigned two, two_swizzle;326327inst_mul = rc_insert_new_instruction(c, inst->Prev);328329inst_mul->U.I.Opcode = RC_OPCODE_MUL;330inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;331inst_mul->U.I.DstReg.Index = temp;332inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;333inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];334inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;335336inst_frc = rc_insert_new_instruction(c, inst->Prev);337338inst_frc->U.I.Opcode = RC_OPCODE_FRC;339inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;340inst_frc->U.I.DstReg.Index = temp;341inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;342inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;343inst_frc->U.I.SrcReg[0].Index = temp;344inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;345346two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);347inst_mad = rc_insert_new_instruction(c, inst->Prev);348349inst_mad->U.I.Opcode = RC_OPCODE_MAD;350inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;351inst_mad->U.I.DstReg.Index = temp;352inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;353inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;354inst_mad->U.I.SrcReg[0].Index = temp;355inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;356inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;357inst_mad->U.I.SrcReg[1].Index = two;358inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;359inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;360inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;361362inst_add = rc_insert_new_instruction(c, inst->Prev);363364inst_add->U.I.Opcode = RC_OPCODE_ADD;365inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;366inst_add->U.I.DstReg.Index = temp;367inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;368inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;369inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;370inst_add->U.I.SrcReg[1].Index = temp;371inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;372inst_add->U.I.SrcReg[1].Abs = 1;373inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;374} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {375/*376* Mirrored clamp modes are bloody simple, we just use abs377* to mirror [0, 1] into [-1, 0]. This works for378* all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.379*/380struct rc_instruction *inst_mov;381382inst_mov = rc_insert_new_instruction(c, inst->Prev);383384inst_mov->U.I.Opcode = RC_OPCODE_MOV;385inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;386inst_mov->U.I.DstReg.Index = temp;387inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;388inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];389inst_mov->U.I.SrcReg[0].Abs = 1;390}391392/* Preserve W for TXP/TXB. */393inst_mov = rc_insert_new_instruction(c, inst->Prev);394395inst_mov->U.I.Opcode = RC_OPCODE_MOV;396inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;397inst_mov->U.I.DstReg.Index = temp;398inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;399inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];400401reset_srcreg(&inst->U.I.SrcReg[0]);402inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;403inst->U.I.SrcReg[0].Index = temp;404}405406/* NPOT -> POT conversion for 3D textures. */407if (inst->U.I.Opcode != RC_OPCODE_KIL &&408compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {409struct rc_instruction *inst_mov;410unsigned temp = rc_find_free_temporary(c);411412/* Saturate XYZ. */413inst_mov = rc_insert_new_instruction(c, inst->Prev);414inst_mov->U.I.Opcode = RC_OPCODE_MOV;415inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;416inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;417inst_mov->U.I.DstReg.Index = temp;418inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;419inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];420421/* Copy W. */422inst_mov = rc_insert_new_instruction(c, inst->Prev);423inst_mov->U.I.Opcode = RC_OPCODE_MOV;424inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;425inst_mov->U.I.DstReg.Index = temp;426inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;427inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];428429reset_srcreg(&inst->U.I.SrcReg[0]);430inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;431inst->U.I.SrcReg[0].Index = temp;432433scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);434}435436/* Cannot write texture to output registers or with saturate (all chips),437* or with masks (non-r500). */438if (inst->U.I.Opcode != RC_OPCODE_KIL &&439(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||440inst->U.I.SaturateMode ||441(!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {442struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);443444inst_mov->U.I.Opcode = RC_OPCODE_MOV;445inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;446inst_mov->U.I.DstReg = inst->U.I.DstReg;447inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;448inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);449450inst->U.I.SaturateMode = 0;451inst->U.I.DstReg.File = RC_FILE_TEMPORARY;452inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;453inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;454}455456/* Cannot read texture coordinate from constants file */457if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {458struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);459460inst_mov->U.I.Opcode = RC_OPCODE_MOV;461inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;462inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);463inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];464465reset_srcreg(&inst->U.I.SrcReg[0]);466inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;467inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;468}469470return 1;471}472473474