Path: blob/21.2-virgl/src/microsoft/compiler/dxil_nir_lower_int_samplers.c
4564 views
/*1* Copyright © Microsoft Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* on the rights to use, copy, modify, merge, publish, distribute, sub7* license, and/or sell copies of the Software, and to permit persons to whom8* the Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,18* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR19* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE20* USE OR OTHER DEALINGS IN THE SOFTWARE.21*/2223#include "dxil_nir_lower_int_samplers.h"24#include "nir_builder.h"25#include "nir_builtin_builder.h"2627static bool28lower_sample_to_txf_for_integer_tex_filter(const nir_instr *instr,29UNUSED const void *_options)30{31if (instr->type != nir_instr_type_tex)32return false;3334nir_tex_instr *tex = nir_instr_as_tex(instr);35if (tex->op != nir_texop_tex &&36tex->op != nir_texop_txb &&37tex->op != nir_texop_txl &&38tex->op != nir_texop_txd)39return false;4041return (tex->dest_type & (nir_type_int | nir_type_uint));42}4344static nir_ssa_def *45dx_get_texture_lod(nir_builder *b, nir_tex_instr *tex)46{47nir_tex_instr *tql;4849unsigned num_srcs = 0;50for (unsigned i = 0; i < tex->num_srcs; i++) {51if (tex->src[i].src_type == nir_tex_src_coord ||52tex->src[i].src_type == nir_tex_src_texture_deref ||53tex->src[i].src_type == nir_tex_src_sampler_deref ||54tex->src[i].src_type == nir_tex_src_texture_offset ||55tex->src[i].src_type == nir_tex_src_sampler_offset ||56tex->src[i].src_type == nir_tex_src_texture_handle ||57tex->src[i].src_type == nir_tex_src_sampler_handle)58num_srcs++;59}6061tql = nir_tex_instr_create(b->shader, num_srcs);62tql->op = nir_texop_lod;63unsigned coord_components = tex->coord_components;64if (tex->is_array)65--coord_components;6667tql->coord_components = coord_components;68tql->sampler_dim = tex->sampler_dim;69tql->is_shadow = tex->is_shadow;70tql->is_new_style_shadow = tex->is_new_style_shadow;71tql->texture_index = tex->texture_index;72tql->sampler_index = tex->sampler_index;73tql->dest_type = nir_type_float32;7475/* The coordinate needs special handling because we might have76* to strip the array index. Don't clutter the code with an additional77* check for is_array though, in the worst case we create an additional78* move the the optimization will remove later again. */79int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);80nir_ssa_def *ssa_src = nir_channels(b, tex->src[coord_index].src.ssa,81(1 << coord_components) - 1);82nir_src src = nir_src_for_ssa(ssa_src);83nir_src_copy(&tql->src[0].src, &src, tql);84tql->src[0].src_type = nir_tex_src_coord;8586unsigned idx = 1;87for (unsigned i = 0; i < tex->num_srcs; i++) {88if (tex->src[i].src_type == nir_tex_src_texture_deref ||89tex->src[i].src_type == nir_tex_src_sampler_deref ||90tex->src[i].src_type == nir_tex_src_texture_offset ||91tex->src[i].src_type == nir_tex_src_sampler_offset ||92tex->src[i].src_type == nir_tex_src_texture_handle ||93tex->src[i].src_type == nir_tex_src_sampler_handle) {94nir_src_copy(&tql->src[idx].src, &tex->src[i].src, tql);95tql->src[idx].src_type = tex->src[i].src_type;96idx++;97}98}99100nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL);101nir_builder_instr_insert(b, &tql->instr);102103/* DirectX LOD only has a value in x channel */104return nir_channel(b, &tql->dest.ssa, 0);105}106107typedef struct {108nir_ssa_def *coords;109nir_ssa_def *use_border_color;110} wrap_result_t;111112typedef struct {113nir_ssa_def *lod;114nir_ssa_def *size;115int ncoord_comp;116wrap_result_t wrap[3];117} wrap_lower_param_t;118119static void120wrap_clamp_to_edge(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size)121{122/* clamp(coord, 0, size - 1) */123wrap_params->coords = nir_fmin(b, nir_fsub(b, size, nir_imm_float(b, 1.0f)),124nir_fmax(b, wrap_params->coords, nir_imm_float(b, 0.0f)));125}126127static void128wrap_repeat(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size)129{130/* mod(coord, size)131* This instruction must be exact, otherwise certain sizes result in132* incorrect sampling */133wrap_params->coords = nir_fmod(b, wrap_params->coords, size);134nir_instr_as_alu(wrap_params->coords->parent_instr)->exact = true;135}136137static nir_ssa_def *138mirror(nir_builder *b, nir_ssa_def *coord)139{140/* coord if >= 0, otherwise -(1 + coord) */141return nir_bcsel(b, nir_fge(b, coord, nir_imm_float(b, 0.0f)), coord,142nir_fneg(b, nir_fadd(b, nir_imm_float(b, 1.0f), coord)));143}144145static void146wrap_mirror_repeat(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size)147{148/* (size − 1) − mirror(mod(coord, 2 * size) − size) */149nir_ssa_def *coord_mod2size = nir_fmod(b, wrap_params->coords, nir_fmul(b, nir_imm_float(b, 2.0f), size));150nir_instr_as_alu(coord_mod2size->parent_instr)->exact = true;151nir_ssa_def *a = nir_fsub(b, coord_mod2size, size);152wrap_params->coords = nir_fsub(b, nir_fsub(b, size, nir_imm_float(b, 1.0f)), mirror(b, a));153}154155static void156wrap_mirror_clamp_to_edge(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size)157{158/* clamp(mirror(coord), 0, size - 1) */159wrap_params->coords = nir_fmin(b, nir_fsub(b, size, nir_imm_float(b, 1.0f)),160nir_fmax(b, mirror(b, wrap_params->coords), nir_imm_float(b, 0.0f)));161}162163static void164wrap_clamp(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size)165{166nir_ssa_def *is_low = nir_flt(b, wrap_params->coords, nir_imm_float(b, 0.0));167nir_ssa_def *is_high = nir_fge(b, wrap_params->coords, size);168wrap_params->use_border_color = nir_ior(b, is_low, is_high);169}170171static void172wrap_mirror_clamp(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size)173{174/* We have to take care of the boundaries */175nir_ssa_def *is_low = nir_flt(b, wrap_params->coords, nir_fmul(b, size, nir_imm_float(b, -1.0)));176nir_ssa_def *is_high = nir_flt(b, nir_fmul(b, size, nir_imm_float(b, 2.0)), wrap_params->coords);177wrap_params->use_border_color = nir_ior(b, is_low, is_high);178179/* Within the boundaries this acts like mirror_repeat */180wrap_mirror_repeat(b, wrap_params, size);181182}183184static wrap_result_t185wrap_coords(nir_builder *b, nir_ssa_def *coords, enum pipe_tex_wrap wrap,186nir_ssa_def *size)187{188wrap_result_t result = {coords, nir_imm_false(b)};189190switch (wrap) {191case PIPE_TEX_WRAP_CLAMP_TO_EDGE:192wrap_clamp_to_edge(b, &result, size);193break;194case PIPE_TEX_WRAP_REPEAT:195wrap_repeat(b, &result, size);196break;197case PIPE_TEX_WRAP_MIRROR_REPEAT:198wrap_mirror_repeat(b, &result, size);199break;200case PIPE_TEX_WRAP_MIRROR_CLAMP:201case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:202wrap_mirror_clamp_to_edge(b, &result, size);203break;204case PIPE_TEX_WRAP_CLAMP:205case PIPE_TEX_WRAP_CLAMP_TO_BORDER:206wrap_clamp(b, &result, size);207break;208case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:209wrap_mirror_clamp(b, &result, size);210break;211}212return result;213}214215static nir_ssa_def *216load_bordercolor(nir_builder *b, nir_tex_instr *tex, dxil_wrap_sampler_state *active_state,217const dxil_texture_swizzle_state *tex_swizzle)218{219nir_const_value const_value[4] = {{0}};220int ndest_comp = nir_dest_num_components(tex->dest);221222unsigned swizzle[4] = {223tex_swizzle->swizzle_r,224tex_swizzle->swizzle_g,225tex_swizzle->swizzle_b,226tex_swizzle->swizzle_a227};228229for (int i = 0; i < ndest_comp; ++i) {230switch (swizzle[i]) {231case PIPE_SWIZZLE_0:232const_value[i].f32 = 0;233break;234case PIPE_SWIZZLE_1:235const_value[i].i32 = 1;236break;237case PIPE_SWIZZLE_X:238case PIPE_SWIZZLE_Y:239case PIPE_SWIZZLE_Z:240case PIPE_SWIZZLE_W:241const_value[i].f32 = active_state->border_color[swizzle[i]];242break;243default:244unreachable("Unexpected swizzle value");245}246}247248return nir_build_imm(b, ndest_comp, 32, const_value);249}250251static nir_tex_instr *252create_txf_from_tex(nir_builder *b, nir_tex_instr *tex)253{254nir_tex_instr *txf;255256unsigned num_srcs = 0;257for (unsigned i = 0; i < tex->num_srcs; i++) {258if (tex->src[i].src_type == nir_tex_src_texture_deref ||259tex->src[i].src_type == nir_tex_src_texture_offset ||260tex->src[i].src_type == nir_tex_src_texture_handle)261num_srcs++;262}263264txf = nir_tex_instr_create(b->shader, num_srcs);265txf->op = nir_texop_txf;266txf->sampler_dim = tex->sampler_dim;267txf->is_array = tex->is_array;268txf->is_shadow = tex->is_shadow;269txf->is_new_style_shadow = tex->is_new_style_shadow;270txf->texture_index = tex->texture_index;271txf->sampler_index = tex->sampler_index;272txf->dest_type = tex->dest_type;273274unsigned idx = 0;275for (unsigned i = 0; i < tex->num_srcs; i++) {276if (tex->src[i].src_type == nir_tex_src_texture_deref ||277tex->src[i].src_type == nir_tex_src_texture_offset ||278tex->src[i].src_type == nir_tex_src_texture_handle) {279nir_src_copy(&txf->src[idx].src, &tex->src[i].src, txf);280txf->src[idx].src_type = tex->src[i].src_type;281idx++;282}283}284285nir_ssa_dest_init(&txf->instr, &txf->dest,286nir_tex_instr_dest_size(txf), 32, NULL);287nir_builder_instr_insert(b, &txf->instr);288289return txf;290}291292static nir_ssa_def *293load_texel(nir_builder *b, nir_tex_instr *tex, wrap_lower_param_t *params)294{295nir_ssa_def *texcoord = NULL;296297/* Put coordinates back together */298switch (tex->coord_components) {299case 1:300texcoord = params->wrap[0].coords;301break;302case 2:303texcoord = nir_vec2(b, params->wrap[0].coords, params->wrap[1].coords);304break;305case 3:306texcoord = nir_vec3(b, params->wrap[0].coords, params->wrap[1].coords, params->wrap[2].coords);307break;308default:309;310}311312texcoord = nir_f2i32(b, texcoord);313314nir_tex_instr *load = create_txf_from_tex(b, tex);315nir_tex_instr_add_src(load, nir_tex_src_lod, nir_src_for_ssa(params->lod));316nir_tex_instr_add_src(load, nir_tex_src_coord, nir_src_for_ssa(texcoord));317b->cursor = nir_after_instr(&load->instr);318return &load->dest.ssa;319}320321typedef struct {322dxil_wrap_sampler_state *aws;323float max_bias;324nir_ssa_def *size;325int ncoord_comp;326} lod_params;327328static nir_ssa_def *329evalute_active_lod(nir_builder *b, nir_tex_instr *tex, lod_params *params)330{331static nir_ssa_def *lod = NULL;332333/* Later we use min_lod for clamping the LOD to a legal value */334float min_lod = MAX2(params->aws->min_lod, 0.0f);335336/* Evaluate the LOD to be used for the texel fetch */337if (unlikely(tex->op == nir_texop_txl)) {338int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod);339/* if we have an explicite LOD, take it */340lod = tex->src[lod_index].src.ssa;341} else if (unlikely(tex->op == nir_texop_txd)) {342int ddx_index = nir_tex_instr_src_index(tex, nir_tex_src_ddx);343int ddy_index = nir_tex_instr_src_index(tex, nir_tex_src_ddy);344assert(ddx_index >= 0 && ddy_index >= 0);345346nir_ssa_def *grad = nir_fmax(b,347tex->src[ddx_index].src.ssa,348tex->src[ddy_index].src.ssa);349350nir_ssa_def *r = nir_fmul(b, grad, nir_i2f32(b, params->size));351nir_ssa_def *rho = nir_channel(b, r, 0);352for (int i = 1; i < params->ncoord_comp; ++i)353rho = nir_fmax(b, rho, nir_channel(b, r, i));354lod = nir_flog2(b, rho);355} else if (b->shader->info.stage == MESA_SHADER_FRAGMENT){356lod = dx_get_texture_lod(b, tex);357} else {358/* Only fragment shaders provide the gradient information to evaluate a LOD,359* so force 0 otherwise */360lod = nir_imm_float(b, 0.0);361}362363/* Evaluate bias according to OpenGL (4.6 (Compatibility Profile) October 22, 2019),364* sec. 8.14.1, eq. (8.9)365*366* lod' = lambda + CLAMP(bias_texobj + bias_texunit + bias_shader)367*368* bias_texobj is the value of TEXTURE_LOD_BIAS for the bound texture object. ...369* bias_textunt is the value of TEXTURE_LOD_BIAS for the current texture unit, ...370* bias shader is the value of the optional bias parameter in the texture371* lookup functions available to fragment shaders. ... The sum of these values372* is clamped to the range [−bias_max, bias_max] where bias_max is the value373* of the implementation defined constant MAX_TEXTURE_LOD_BIAS.374* In core contexts the value bias_texunit is dropped from above equation.375*376* Gallium provides the value lod_bias as the sum of bias_texobj and bias_texunit377* in compatibility contexts and as bias_texobj in core contexts, hence the378* implementation here is the same in both cases.379*/380nir_ssa_def *lod_bias = nir_imm_float(b, params->aws->lod_bias);381382if (unlikely(tex->op == nir_texop_txb)) {383int bias_index = nir_tex_instr_src_index(tex, nir_tex_src_bias);384lod_bias = nir_fadd(b, lod_bias, tex->src[bias_index].src.ssa);385}386387lod = nir_fadd(b, lod, nir_fclamp(b, lod_bias,388nir_imm_float(b, -params->max_bias),389nir_imm_float(b, params->max_bias)));390391/* Clamp lod according to ibid. eq. (8.10) */392lod = nir_fmax(b, lod, nir_imm_float(b, min_lod));393394/* If the max lod is > max_bias = log2(max_texture_size), the lod will be clamped395* by the number of levels, no need to clamp it againt the max_lod first. */396if (params->aws->max_lod <= params->max_bias)397lod = nir_fmin(b, lod, nir_imm_float(b, params->aws->max_lod));398399/* Pick nearest LOD */400lod = nir_f2i32(b, nir_fround_even(b, lod));401402/* cap actual lod by number of available levels */403return nir_imin(b, lod, nir_imm_int(b, params->aws->last_level));404}405406typedef struct {407dxil_wrap_sampler_state *wrap_states;408dxil_texture_swizzle_state *tex_swizzles;409float max_bias;410} sampler_states;411412413static nir_ssa_def *414lower_sample_to_txf_for_integer_tex_impl(nir_builder *b, nir_instr *instr,415void *options)416{417sampler_states *states = (sampler_states *)options;418wrap_lower_param_t params = {0};419420nir_tex_instr *tex = nir_instr_as_tex(instr);421dxil_wrap_sampler_state *active_wrap_state = &states->wrap_states[tex->sampler_index];422423b->cursor = nir_before_instr(instr);424425int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);426nir_ssa_def *old_coord = tex->src[coord_index].src.ssa;427params.ncoord_comp = tex->coord_components;428if (tex->is_array)429params.ncoord_comp -= 1;430431/* This helper to get the texture size always uses LOD 0, and DirectX doesn't support432* giving another LOD when querying the texture size */433nir_ssa_def *size0 = nir_get_texture_size(b, tex);434435params.lod = nir_imm_int(b, 0);436437if (active_wrap_state->last_level > 0) {438lod_params p = {439.aws = active_wrap_state,440.max_bias = states->max_bias,441.size = size0,442.ncoord_comp = params.ncoord_comp443};444params.lod = evalute_active_lod(b, tex, &p);445446/* Evaluate actual level size*/447params.size = nir_i2f32(b, nir_imax(b, nir_ishr(b, size0, params.lod),448nir_imm_int(b, 1)));449} else {450params.size = nir_i2f32(b, size0);451}452453nir_ssa_def *new_coord = old_coord;454if (!active_wrap_state->is_nonnormalized_coords) {455/* Evaluate the integer lookup coordinates for the requested LOD, don't touch the456* array index */457if (!tex->is_array) {458new_coord = nir_fmul(b, params.size, old_coord);459} else {460nir_ssa_def *array_index = nir_channel(b, old_coord, params.ncoord_comp);461int mask = (1 << params.ncoord_comp) - 1;462nir_ssa_def *coord = nir_fmul(b, nir_channels(b, params.size, mask),463nir_channels(b, old_coord, mask));464switch (params.ncoord_comp) {465case 1:466new_coord = nir_vec2(b, coord, array_index);467break;468case 2:469new_coord = nir_vec3(b, nir_channel(b, coord, 0),470nir_channel(b, coord, 1),471array_index);472break;473default:474unreachable("unsupported number of non-array coordinates");475}476}477}478479nir_ssa_def *coord_help[3];480for (int i = 0; i < params.ncoord_comp; ++i)481coord_help[i] = nir_ffloor(b, nir_channel(b, new_coord, i));482483// Note: array index needs to be rounded to nearest before clamp rather than floored484if (tex->is_array)485coord_help[params.ncoord_comp] = nir_fround_even(b, nir_channel(b, new_coord, params.ncoord_comp));486487/* Correct the texture coordinates for the offsets. */488int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);489if (offset_index >= 0) {490nir_ssa_def *offset = tex->src[offset_index].src.ssa;491for (int i = 0; i < params.ncoord_comp; ++i)492coord_help[i] = nir_fadd(b, coord_help[i], nir_i2f32(b, nir_channel(b, offset, i)));493}494495nir_ssa_def *use_border_color = nir_imm_false(b);496497if (!active_wrap_state->skip_boundary_conditions) {498499for (int i = 0; i < params.ncoord_comp; ++i) {500params.wrap[i] = wrap_coords(b, coord_help[i], active_wrap_state->wrap[i], nir_channel(b, params.size, i));501use_border_color = nir_ior(b, use_border_color, params.wrap[i].use_border_color);502}503504if (tex->is_array)505params.wrap[params.ncoord_comp] =506wrap_coords(b, coord_help[params.ncoord_comp],507PIPE_TEX_WRAP_CLAMP_TO_EDGE,508nir_i2f32(b, nir_channel(b, size0, params.ncoord_comp)));509} else {510/* When we emulate a cube map by using a texture array, the coordinates are always511* in range, and we don't have to take care of boundary conditions */512for (unsigned i = 0; i < 3; ++i) {513params.wrap[i].coords = coord_help[i];514params.wrap[i].use_border_color = nir_imm_false(b);515}516}517518const dxil_texture_swizzle_state one2one = {519PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W520};521522nir_if *border_if = nir_push_if(b, use_border_color);523const dxil_texture_swizzle_state *swizzle = states->tex_swizzles ?524&states->tex_swizzles[tex->sampler_index]:525&one2one;526527nir_ssa_def *border_color = load_bordercolor(b, tex, active_wrap_state, swizzle);528nir_if *border_else = nir_push_else(b, border_if);529nir_ssa_def *sampler_color = load_texel(b, tex, ¶ms);530nir_pop_if(b, border_else);531532return nir_if_phi(b, border_color, sampler_color);533}534535/* Sampling from integer textures is not allowed in DirectX, so we have536* to use texel fetches. For this we have to scale the coordiantes537* to be integer based, and evaluate the LOD the texel fetch has to be538* applied on, and take care of the boundary conditions .539*/540bool541dxil_lower_sample_to_txf_for_integer_tex(nir_shader *s,542dxil_wrap_sampler_state *wrap_states,543dxil_texture_swizzle_state *tex_swizzles,544float max_bias)545{546sampler_states states = {wrap_states, tex_swizzles, max_bias};547548bool result =549nir_shader_lower_instructions(s,550lower_sample_to_txf_for_integer_tex_filter,551lower_sample_to_txf_for_integer_tex_impl,552&states);553return result;554}555556557