Path: blob/21.2-virgl/src/compiler/nir/nir_lower_blend.c
4545 views
/*1* Copyright (C) 2019-2021 Collabora, Ltd.2* Copyright (C) 2019 Alyssa Rosenzweig3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS21* IN THE SOFTWARE.22*/2324/**25* @file26*27* Implements the fragment pipeline (blending and writeout) in software, to be28* run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment29* shader variant on typical GPUs. This pass is useful if hardware lacks30* fixed-function blending in part or in full.31*/3233#include "compiler/nir/nir.h"34#include "compiler/nir/nir_builder.h"35#include "compiler/nir/nir_format_convert.h"36#include "nir_lower_blend.h"3738/* Given processed factors, combine them per a blend function */3940static nir_ssa_def *41nir_blend_func(42nir_builder *b,43enum blend_func func,44nir_ssa_def *src, nir_ssa_def *dst)45{46switch (func) {47case BLEND_FUNC_ADD:48return nir_fadd(b, src, dst);49case BLEND_FUNC_SUBTRACT:50return nir_fsub(b, src, dst);51case BLEND_FUNC_REVERSE_SUBTRACT:52return nir_fsub(b, dst, src);53case BLEND_FUNC_MIN:54return nir_fmin(b, src, dst);55case BLEND_FUNC_MAX:56return nir_fmax(b, src, dst);57}5859unreachable("Invalid blend function");60}6162/* Does this blend function multiply by a blend factor? */6364static bool65nir_blend_factored(enum blend_func func)66{67switch (func) {68case BLEND_FUNC_ADD:69case BLEND_FUNC_SUBTRACT:70case BLEND_FUNC_REVERSE_SUBTRACT:71return true;72default:73return false;74}75}7677/* Compute a src_alpha_saturate factor */78static nir_ssa_def *79nir_alpha_saturate(80nir_builder *b,81nir_ssa_def *src, nir_ssa_def *dst,82unsigned chan)83{84nir_ssa_def *Asrc = nir_channel(b, src, 3);85nir_ssa_def *Adst = nir_channel(b, dst, 3);86nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);87nir_ssa_def *Adsti = nir_fsub(b, one, Adst);8889return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;90}9192/* Returns a scalar single factor, unmultiplied */9394static nir_ssa_def *95nir_blend_factor_value(96nir_builder *b,97nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,98unsigned chan,99enum blend_factor factor)100{101switch (factor) {102case BLEND_FACTOR_ZERO:103return nir_imm_floatN_t(b, 0.0, src->bit_size);104case BLEND_FACTOR_SRC_COLOR:105return nir_channel(b, src, chan);106case BLEND_FACTOR_SRC1_COLOR:107return nir_channel(b, src1, chan);108case BLEND_FACTOR_DST_COLOR:109return nir_channel(b, dst, chan);110case BLEND_FACTOR_SRC_ALPHA:111return nir_channel(b, src, 3);112case BLEND_FACTOR_SRC1_ALPHA:113return nir_channel(b, src1, 3);114case BLEND_FACTOR_DST_ALPHA:115return nir_channel(b, dst, 3);116case BLEND_FACTOR_CONSTANT_COLOR:117return nir_channel(b, bconst, chan);118case BLEND_FACTOR_CONSTANT_ALPHA:119return nir_channel(b, bconst, 3);120case BLEND_FACTOR_SRC_ALPHA_SATURATE:121return nir_alpha_saturate(b, src, dst, chan);122}123124unreachable("Invalid blend factor");125}126127static nir_ssa_def *128nir_blend_factor(129nir_builder *b,130nir_ssa_def *raw_scalar,131nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,132unsigned chan,133enum blend_factor factor,134bool inverted)135{136nir_ssa_def *f =137nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor);138139if (inverted)140f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);141142return nir_fmul(b, raw_scalar, f);143}144145/* Given a colormask, "blend" with the destination */146147static nir_ssa_def *148nir_color_mask(149nir_builder *b,150unsigned mask,151nir_ssa_def *src,152nir_ssa_def *dst)153{154return nir_vec4(b,155nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),156nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),157nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),158nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));159}160161static nir_ssa_def *162nir_logicop_func(163nir_builder *b,164unsigned func,165nir_ssa_def *src, nir_ssa_def *dst)166{167switch (func) {168case PIPE_LOGICOP_CLEAR:169return nir_imm_ivec4(b, 0, 0, 0, 0);170case PIPE_LOGICOP_NOR:171return nir_inot(b, nir_ior(b, src, dst));172case PIPE_LOGICOP_AND_INVERTED:173return nir_iand(b, nir_inot(b, src), dst);174case PIPE_LOGICOP_COPY_INVERTED:175return nir_inot(b, src);176case PIPE_LOGICOP_AND_REVERSE:177return nir_iand(b, src, nir_inot(b, dst));178case PIPE_LOGICOP_INVERT:179return nir_inot(b, dst);180case PIPE_LOGICOP_XOR:181return nir_ixor(b, src, dst);182case PIPE_LOGICOP_NAND:183return nir_inot(b, nir_iand(b, src, dst));184case PIPE_LOGICOP_AND:185return nir_iand(b, src, dst);186case PIPE_LOGICOP_EQUIV:187return nir_inot(b, nir_ixor(b, src, dst));188case PIPE_LOGICOP_NOOP:189return dst;190case PIPE_LOGICOP_OR_INVERTED:191return nir_ior(b, nir_inot(b, src), dst);192case PIPE_LOGICOP_COPY:193return src;194case PIPE_LOGICOP_OR_REVERSE:195return nir_ior(b, src, nir_inot(b, dst));196case PIPE_LOGICOP_OR:197return nir_ior(b, src, dst);198case PIPE_LOGICOP_SET:199return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);200}201202unreachable("Invalid logciop function");203}204205static nir_ssa_def *206nir_blend_logicop(207nir_builder *b,208nir_lower_blend_options options,209unsigned rt,210nir_ssa_def *src, nir_ssa_def *dst)211{212unsigned bit_size = src->bit_size;213const struct util_format_description *format_desc =214util_format_description(options.format[rt]);215216if (bit_size != 32) {217src = nir_f2f32(b, src);218dst = nir_f2f32(b, dst);219}220221assert(src->num_components <= 4);222assert(dst->num_components <= 4);223224unsigned bits[4];225for (int i = 0; i < 4; ++i)226bits[i] = format_desc->channel[i].size;227228src = nir_format_float_to_unorm(b, src, bits);229dst = nir_format_float_to_unorm(b, dst, bits);230231nir_ssa_def *out = nir_logicop_func(b, options.logicop_func, src, dst);232233if (bits[0] < 32) {234nir_const_value mask[4];235for (int i = 0; i < 4; ++i)236mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);237238out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));239}240241out = nir_format_unorm_to_float(b, out, bits);242243if (bit_size == 16)244out = nir_f2f16(b, out);245246return out;247}248249/* Given a blend state, the source color, and the destination color,250* return the blended color251*/252253static nir_ssa_def *254nir_blend(255nir_builder *b,256nir_lower_blend_options options,257unsigned rt,258nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst)259{260/* Grab the blend constant ahead of time */261nir_ssa_def *bconst;262if (options.scalar_blend_const) {263bconst = nir_vec4(b,264nir_load_blend_const_color_r_float(b),265nir_load_blend_const_color_g_float(b),266nir_load_blend_const_color_b_float(b),267nir_load_blend_const_color_a_float(b));268} else {269bconst = nir_load_blend_const_color_rgba(b);270}271272if (src->bit_size == 16)273bconst = nir_f2f16(b, bconst);274275/* Fixed-point framebuffers require their inputs clamped. */276enum pipe_format format = options.format[rt];277278if (!util_format_is_float(format))279src = nir_fsat(b, src);280281/* DST_ALPHA reads back 1.0 if there is no alpha channel */282const struct util_format_description *desc =283util_format_description(format);284285if (desc->nr_channels < 4) {286nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);287nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);288289dst = nir_vec4(b, nir_channel(b, dst, 0),290desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero,291desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero,292desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one);293}294295/* We blend per channel and recombine later */296nir_ssa_def *channels[4];297298for (unsigned c = 0; c < 4; ++c) {299/* Decide properties based on channel */300nir_lower_blend_channel chan =301(c < 3) ? options.rt[rt].rgb : options.rt[rt].alpha;302303nir_ssa_def *psrc = nir_channel(b, src, c);304nir_ssa_def *pdst = nir_channel(b, dst, c);305306if (nir_blend_factored(chan.func)) {307psrc = nir_blend_factor(308b, psrc,309src, src1, dst, bconst, c,310chan.src_factor, chan.invert_src_factor);311312pdst = nir_blend_factor(313b, pdst,314src, src1, dst, bconst, c,315chan.dst_factor, chan.invert_dst_factor);316}317318channels[c] = nir_blend_func(b, chan.func, psrc, pdst);319}320321return nir_vec(b, channels, 4);322}323324static bool325nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)326{327nir_lower_blend_options *options = data;328if (instr->type != nir_instr_type_intrinsic)329return false;330331nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);332if (intr->intrinsic != nir_intrinsic_store_deref)333return false;334335nir_variable *var = nir_intrinsic_get_var(intr, 0);336if (var->data.mode != nir_var_shader_out ||337(var->data.location != FRAG_RESULT_COLOR &&338var->data.location < FRAG_RESULT_DATA0))339return false;340341/* Determine render target for per-RT blending */342unsigned rt =343(var->data.location == FRAG_RESULT_COLOR) ? 0 :344(var->data.location - FRAG_RESULT_DATA0);345346b->cursor = nir_before_instr(instr);347348/* Grab the input color */349nir_ssa_def *src = nir_ssa_for_src(b, intr->src[1], 4);350351/* Grab the previous fragment color */352var->data.fb_fetch_output = true;353b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location);354b->shader->info.fs.uses_fbfetch_output = true;355nir_ssa_def *dst = nir_load_var(b, var);356357/* Blend the two colors per the passed options */358nir_ssa_def *blended = src;359360if (options->logicop_enable)361blended = nir_blend_logicop(b, *options, rt, src, dst);362else if (!util_format_is_pure_integer(options->format[rt]))363blended = nir_blend(b, *options, rt, src, options->src1, dst);364365/* Apply a colormask */366blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);367368/* Write out the final color instead of the input */369nir_instr_rewrite_src_ssa(instr, &intr->src[1], blended);370return true;371}372373void374nir_lower_blend(nir_shader *shader, nir_lower_blend_options options)375{376assert(shader->info.stage == MESA_SHADER_FRAGMENT);377378nir_shader_instructions_pass(shader, nir_lower_blend_instr,379nir_metadata_block_index | nir_metadata_dominance, &options);380}381382383