Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
4570 views
/*1* Copyright © 2015 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/**24* Implements most of the fixed function fragment pipeline in shader code.25*26* VC4 doesn't have any hardware support for blending, alpha test, logic ops,27* or color mask. Instead, you read the current contents of the destination28* from the tile buffer after having waited for the scoreboard (which is29* handled by vc4_qpu_emit.c), then do math using your output color and that30* destination value, and update the output color appropriately.31*32* Once this pass is done, the color write will either have one component (for33* single sample) with packed argb8888, or 4 components with the per-sample34* argb8888 result.35*/3637/**38* Lowers fixed-function blending to a load of the destination color and a39* series of ALU operations before the store of the output.40*/41#include "util/format/u_format.h"42#include "vc4_qir.h"43#include "compiler/nir/nir_builder.h"44#include "compiler/nir/nir_format_convert.h"45#include "vc4_context.h"4647static bool48blend_depends_on_dst_color(struct vc4_compile *c)49{50return (c->fs_key->blend.blend_enable ||51c->fs_key->blend.colormask != 0xf ||52c->fs_key->logicop_func != PIPE_LOGICOP_COPY);53}5455/** Emits a load of the previous fragment color from the tile buffer. */56static nir_ssa_def *57vc4_nir_get_dst_color(nir_builder *b, int sample)58{59return nir_load_input(b, 1, 32, nir_imm_int(b, 0),60.base = VC4_NIR_TLB_COLOR_READ_INPUT + sample);61}6263static nir_ssa_def *64vc4_blend_channel_f(nir_builder *b,65nir_ssa_def **src,66nir_ssa_def **dst,67unsigned factor,68int channel)69{70switch(factor) {71case PIPE_BLENDFACTOR_ONE:72return nir_imm_float(b, 1.0);73case PIPE_BLENDFACTOR_SRC_COLOR:74return src[channel];75case PIPE_BLENDFACTOR_SRC_ALPHA:76return src[3];77case PIPE_BLENDFACTOR_DST_ALPHA:78return dst[3];79case PIPE_BLENDFACTOR_DST_COLOR:80return dst[channel];81case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:82if (channel != 3) {83return nir_fmin(b,84src[3],85nir_fsub(b,86nir_imm_float(b, 1.0),87dst[3]));88} else {89return nir_imm_float(b, 1.0);90}91case PIPE_BLENDFACTOR_CONST_COLOR:92return nir_load_system_value(b,93nir_intrinsic_load_blend_const_color_r_float +94channel,950, 1, 32);96case PIPE_BLENDFACTOR_CONST_ALPHA:97return nir_load_blend_const_color_a_float(b);98case PIPE_BLENDFACTOR_ZERO:99return nir_imm_float(b, 0.0);100case PIPE_BLENDFACTOR_INV_SRC_COLOR:101return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);102case PIPE_BLENDFACTOR_INV_SRC_ALPHA:103return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);104case PIPE_BLENDFACTOR_INV_DST_ALPHA:105return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);106case PIPE_BLENDFACTOR_INV_DST_COLOR:107return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);108case PIPE_BLENDFACTOR_INV_CONST_COLOR:109return nir_fsub(b, nir_imm_float(b, 1.0),110nir_load_system_value(b,111nir_intrinsic_load_blend_const_color_r_float +112channel,1130, 1, 32));114case PIPE_BLENDFACTOR_INV_CONST_ALPHA:115return nir_fsub(b, nir_imm_float(b, 1.0),116nir_load_blend_const_color_a_float(b));117118default:119case PIPE_BLENDFACTOR_SRC1_COLOR:120case PIPE_BLENDFACTOR_SRC1_ALPHA:121case PIPE_BLENDFACTOR_INV_SRC1_COLOR:122case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:123/* Unsupported. */124fprintf(stderr, "Unknown blend factor %d\n", factor);125return nir_imm_float(b, 1.0);126}127}128129static nir_ssa_def *130vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,131int chan)132{133unsigned chan_mask = 0xff << (chan * 8);134return nir_ior(b,135nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),136nir_iand(b, src1, nir_imm_int(b, chan_mask)));137}138139static nir_ssa_def *140vc4_blend_channel_i(nir_builder *b,141nir_ssa_def *src,142nir_ssa_def *dst,143nir_ssa_def *src_a,144nir_ssa_def *dst_a,145unsigned factor,146int a_chan)147{148switch (factor) {149case PIPE_BLENDFACTOR_ONE:150return nir_imm_int(b, ~0);151case PIPE_BLENDFACTOR_SRC_COLOR:152return src;153case PIPE_BLENDFACTOR_SRC_ALPHA:154return src_a;155case PIPE_BLENDFACTOR_DST_ALPHA:156return dst_a;157case PIPE_BLENDFACTOR_DST_COLOR:158return dst;159case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:160return vc4_nir_set_packed_chan(b,161nir_umin_4x8_vc4(b,162src_a,163nir_inot(b, dst_a)),164nir_imm_int(b, ~0),165a_chan);166case PIPE_BLENDFACTOR_CONST_COLOR:167return nir_load_blend_const_color_rgba8888_unorm(b);168case PIPE_BLENDFACTOR_CONST_ALPHA:169return nir_load_blend_const_color_aaaa8888_unorm(b);170case PIPE_BLENDFACTOR_ZERO:171return nir_imm_int(b, 0);172case PIPE_BLENDFACTOR_INV_SRC_COLOR:173return nir_inot(b, src);174case PIPE_BLENDFACTOR_INV_SRC_ALPHA:175return nir_inot(b, src_a);176case PIPE_BLENDFACTOR_INV_DST_ALPHA:177return nir_inot(b, dst_a);178case PIPE_BLENDFACTOR_INV_DST_COLOR:179return nir_inot(b, dst);180case PIPE_BLENDFACTOR_INV_CONST_COLOR:181return nir_inot(b,182nir_load_blend_const_color_rgba8888_unorm(b));183case PIPE_BLENDFACTOR_INV_CONST_ALPHA:184return nir_inot(b,185nir_load_blend_const_color_aaaa8888_unorm(b));186187default:188case PIPE_BLENDFACTOR_SRC1_COLOR:189case PIPE_BLENDFACTOR_SRC1_ALPHA:190case PIPE_BLENDFACTOR_INV_SRC1_COLOR:191case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:192/* Unsupported. */193fprintf(stderr, "Unknown blend factor %d\n", factor);194return nir_imm_int(b, ~0);195}196}197198static nir_ssa_def *199vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,200unsigned func)201{202switch (func) {203case PIPE_BLEND_ADD:204return nir_fadd(b, src, dst);205case PIPE_BLEND_SUBTRACT:206return nir_fsub(b, src, dst);207case PIPE_BLEND_REVERSE_SUBTRACT:208return nir_fsub(b, dst, src);209case PIPE_BLEND_MIN:210return nir_fmin(b, src, dst);211case PIPE_BLEND_MAX:212return nir_fmax(b, src, dst);213214default:215/* Unsupported. */216fprintf(stderr, "Unknown blend func %d\n", func);217return src;218219}220}221222static nir_ssa_def *223vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,224unsigned func)225{226switch (func) {227case PIPE_BLEND_ADD:228return nir_usadd_4x8_vc4(b, src, dst);229case PIPE_BLEND_SUBTRACT:230return nir_ussub_4x8_vc4(b, src, dst);231case PIPE_BLEND_REVERSE_SUBTRACT:232return nir_ussub_4x8_vc4(b, dst, src);233case PIPE_BLEND_MIN:234return nir_umin_4x8_vc4(b, src, dst);235case PIPE_BLEND_MAX:236return nir_umax_4x8_vc4(b, src, dst);237238default:239/* Unsupported. */240fprintf(stderr, "Unknown blend func %d\n", func);241return src;242243}244}245246static void247vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,248nir_ssa_def **src_color, nir_ssa_def **dst_color)249{250struct pipe_rt_blend_state *blend = &c->fs_key->blend;251252if (!blend->blend_enable) {253for (int i = 0; i < 4; i++)254result[i] = src_color[i];255return;256}257258/* Clamp the src color to [0, 1]. Dest is already clamped. */259for (int i = 0; i < 4; i++)260src_color[i] = nir_fsat(b, src_color[i]);261262nir_ssa_def *src_blend[4], *dst_blend[4];263for (int i = 0; i < 4; i++) {264int src_factor = ((i != 3) ? blend->rgb_src_factor :265blend->alpha_src_factor);266int dst_factor = ((i != 3) ? blend->rgb_dst_factor :267blend->alpha_dst_factor);268src_blend[i] = nir_fmul(b, src_color[i],269vc4_blend_channel_f(b,270src_color, dst_color,271src_factor, i));272dst_blend[i] = nir_fmul(b, dst_color[i],273vc4_blend_channel_f(b,274src_color, dst_color,275dst_factor, i));276}277278for (int i = 0; i < 4; i++) {279result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],280((i != 3) ? blend->rgb_func :281blend->alpha_func));282}283}284285static nir_ssa_def *286vc4_nir_splat(nir_builder *b, nir_ssa_def *src)287{288nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));289return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));290}291292static nir_ssa_def *293vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,294nir_ssa_def *src_color, nir_ssa_def *dst_color,295nir_ssa_def *src_float_a)296{297struct pipe_rt_blend_state *blend = &c->fs_key->blend;298299if (!blend->blend_enable)300return src_color;301302enum pipe_format color_format = c->fs_key->color_format;303const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);304nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);305nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);306nir_ssa_def *dst_a;307int alpha_chan;308for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {309if (format_swiz[alpha_chan] == 3)310break;311}312if (alpha_chan != 4) {313nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);314dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,315shift), imm_0xff));316} else {317dst_a = nir_imm_int(b, ~0);318}319320nir_ssa_def *src_factor = vc4_blend_channel_i(b,321src_color, dst_color,322src_a, dst_a,323blend->rgb_src_factor,324alpha_chan);325nir_ssa_def *dst_factor = vc4_blend_channel_i(b,326src_color, dst_color,327src_a, dst_a,328blend->rgb_dst_factor,329alpha_chan);330331if (alpha_chan != 4 &&332blend->alpha_src_factor != blend->rgb_src_factor) {333nir_ssa_def *src_alpha_factor =334vc4_blend_channel_i(b,335src_color, dst_color,336src_a, dst_a,337blend->alpha_src_factor,338alpha_chan);339src_factor = vc4_nir_set_packed_chan(b, src_factor,340src_alpha_factor,341alpha_chan);342}343if (alpha_chan != 4 &&344blend->alpha_dst_factor != blend->rgb_dst_factor) {345nir_ssa_def *dst_alpha_factor =346vc4_blend_channel_i(b,347src_color, dst_color,348src_a, dst_a,349blend->alpha_dst_factor,350alpha_chan);351dst_factor = vc4_nir_set_packed_chan(b, dst_factor,352dst_alpha_factor,353alpha_chan);354}355nir_ssa_def *src_blend = nir_umul_unorm_4x8_vc4(b, src_color, src_factor);356nir_ssa_def *dst_blend = nir_umul_unorm_4x8_vc4(b, dst_color, dst_factor);357358nir_ssa_def *result =359vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);360if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {361nir_ssa_def *result_a = vc4_blend_func_i(b,362src_blend,363dst_blend,364blend->alpha_func);365result = vc4_nir_set_packed_chan(b, result, result_a,366alpha_chan);367}368return result;369}370371static nir_ssa_def *372vc4_logicop(nir_builder *b, int logicop_func,373nir_ssa_def *src, nir_ssa_def *dst)374{375switch (logicop_func) {376case PIPE_LOGICOP_CLEAR:377return nir_imm_int(b, 0);378case PIPE_LOGICOP_NOR:379return nir_inot(b, nir_ior(b, src, dst));380case PIPE_LOGICOP_AND_INVERTED:381return nir_iand(b, nir_inot(b, src), dst);382case PIPE_LOGICOP_COPY_INVERTED:383return nir_inot(b, src);384case PIPE_LOGICOP_AND_REVERSE:385return nir_iand(b, src, nir_inot(b, dst));386case PIPE_LOGICOP_INVERT:387return nir_inot(b, dst);388case PIPE_LOGICOP_XOR:389return nir_ixor(b, src, dst);390case PIPE_LOGICOP_NAND:391return nir_inot(b, nir_iand(b, src, dst));392case PIPE_LOGICOP_AND:393return nir_iand(b, src, dst);394case PIPE_LOGICOP_EQUIV:395return nir_inot(b, nir_ixor(b, src, dst));396case PIPE_LOGICOP_NOOP:397return dst;398case PIPE_LOGICOP_OR_INVERTED:399return nir_ior(b, nir_inot(b, src), dst);400case PIPE_LOGICOP_OR_REVERSE:401return nir_ior(b, src, nir_inot(b, dst));402case PIPE_LOGICOP_OR:403return nir_ior(b, src, dst);404case PIPE_LOGICOP_SET:405return nir_imm_int(b, ~0);406default:407fprintf(stderr, "Unknown logic op %d\n", logicop_func);408FALLTHROUGH;409case PIPE_LOGICOP_COPY:410return src;411}412}413414static nir_ssa_def *415vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,416nir_ssa_def **colors)417{418enum pipe_format color_format = c->fs_key->color_format;419const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);420421nir_ssa_def *swizzled[4];422for (int i = 0; i < 4; i++) {423swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,424format_swiz[i]);425}426427return nir_pack_unorm_4x8(b,428nir_vec4(b,429swizzled[0], swizzled[1],430swizzled[2], swizzled[3]));431432}433434static nir_ssa_def *435vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,436int sample)437{438enum pipe_format color_format = c->fs_key->color_format;439const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);440bool srgb = util_format_is_srgb(color_format);441442/* Pull out the float src/dst color components. */443nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);444nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);445nir_ssa_def *src_color[4], *unpacked_dst_color[4];446for (unsigned i = 0; i < 4; i++) {447src_color[i] = nir_channel(b, src, i);448unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);449}450451if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)452src_color[3] = nir_imm_float(b, 1.0);453454nir_ssa_def *packed_color;455if (srgb) {456/* Unswizzle the destination color. */457nir_ssa_def *dst_color[4];458for (unsigned i = 0; i < 4; i++) {459dst_color[i] = vc4_nir_get_swizzled_channel(b,460unpacked_dst_color,461format_swiz[i]);462}463464/* Turn dst color to linear. */465for (int i = 0; i < 3; i++)466dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);467468nir_ssa_def *blend_color[4];469vc4_do_blending_f(c, b, blend_color, src_color, dst_color);470471/* sRGB encode the output color */472for (int i = 0; i < 3; i++)473blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);474475packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);476} else {477nir_ssa_def *packed_src_color =478vc4_nir_swizzle_and_pack(c, b, src_color);479480packed_color =481vc4_do_blending_i(c, b,482packed_src_color, packed_dst_color,483src_color[3]);484}485486packed_color = vc4_logicop(b, c->fs_key->logicop_func,487packed_color, packed_dst_color);488489/* If the bit isn't set in the color mask, then just return the490* original dst color, instead.491*/492uint32_t colormask = 0xffffffff;493for (int i = 0; i < 4; i++) {494if (format_swiz[i] < 4 &&495!(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {496colormask &= ~(0xff << (i * 8));497}498}499500return nir_ior(b,501nir_iand(b, packed_color,502nir_imm_int(b, colormask)),503nir_iand(b, packed_dst_color,504nir_imm_int(b, ~colormask)));505}506507static void508vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,509nir_ssa_def *val)510{511nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,512glsl_uint_type(),513"sample_mask");514sample_mask->data.driver_location = c->s->num_outputs++;515sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;516517nir_store_output(b, val, nir_imm_int(b, 0),518.base = sample_mask->data.driver_location);519}520521static void522vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,523nir_intrinsic_instr *intr)524{525nir_ssa_def *frag_color = intr->src[0].ssa;526527if (c->fs_key->sample_alpha_to_coverage) {528nir_ssa_def *a = nir_channel(b, frag_color, 3);529530/* XXX: We should do a nice dither based on the fragment531* coordinate, instead.532*/533nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);534nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));535nir_ssa_def *bitmask = nir_isub(b,536nir_ishl(b,537nir_imm_int(b, 1),538num_bits),539nir_imm_int(b, 1));540vc4_nir_store_sample_mask(c, b, bitmask);541}542543/* The TLB color read returns each sample in turn, so if our blending544* depends on the destination color, we're going to have to run the545* blending function separately for each destination sample value, and546* then output the per-sample color using TLB_COLOR_MS.547*/548nir_ssa_def *blend_output;549if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {550c->msaa_per_sample_output = true;551552nir_ssa_def *samples[4];553for (int i = 0; i < VC4_MAX_SAMPLES; i++)554samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);555blend_output = nir_vec4(b,556samples[0], samples[1],557samples[2], samples[3]);558} else {559blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);560}561562nir_instr_rewrite_src(&intr->instr, &intr->src[0],563nir_src_for_ssa(blend_output));564intr->num_components = blend_output->num_components;565}566567static bool568vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)569{570nir_foreach_instr_safe(instr, block) {571if (instr->type != nir_instr_type_intrinsic)572continue;573nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);574if (intr->intrinsic != nir_intrinsic_store_output)575continue;576577nir_variable *output_var = NULL;578nir_foreach_shader_out_variable(var, c->s) {579if (var->data.driver_location ==580nir_intrinsic_base(intr)) {581output_var = var;582break;583}584}585assert(output_var);586587if (output_var->data.location != FRAG_RESULT_COLOR &&588output_var->data.location != FRAG_RESULT_DATA0) {589continue;590}591592nir_function_impl *impl =593nir_cf_node_get_function(&block->cf_node);594nir_builder b;595nir_builder_init(&b, impl);596b.cursor = nir_before_instr(&intr->instr);597vc4_nir_lower_blend_instr(c, &b, intr);598}599return true;600}601602void603vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)604{605nir_foreach_function(function, s) {606if (function->impl) {607nir_foreach_block(block, function->impl) {608vc4_nir_lower_blend_block(block, c);609}610611nir_metadata_preserve(function->impl,612nir_metadata_block_index |613nir_metadata_dominance);614}615}616617/* If we didn't do alpha-to-coverage on the output color, we still618* need to pass glSampleMask() through.619*/620if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {621nir_function_impl *impl = nir_shader_get_entrypoint(s);622nir_builder b;623nir_builder_init(&b, impl);624b.cursor = nir_after_block(nir_impl_last_block(impl));625626vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));627}628}629630631