Path: blob/21.2-virgl/src/panfrost/bifrost/bi_helper_invocations.c
4564 views
/*1* Copyright (C) 2019-2021 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors (Collabora):23* Alyssa Rosenzweig <[email protected]>24*/2526#include "compiler.h"2728/* Bifrost texture operations have a `skip` bit, instructinh helper invocations29* to skip execution. Each clause has a `terminate_discarded_threads` bit,30* which will terminate helper invocations.31*32* The terminate bit should be set on the last clause requiring helper33* invocations. Without control flow, that's the last source-order instruction;34* with control flow, there may be multiple such instructions (with ifs) or no35* such instruction (with loops).36*37* The skip bit should be set unless the value of this instruction is required38* by a future instruction requiring helper invocations. Consider:39*40* 0 = texture ...41* 1 = fmul 0, #1042* 2 = dfdx 143* store 244*45* Since the derivative calculation 2 requires helper invocations, the value 146* must be calculated by helper invocations, and since it depends on 0, 0 must47* be calculated by helpers. Hence the texture op does NOT have the skip bit48* set, and the clause containing the derivative has the terminate bit set.49*50* Calculating the terminate bit occurs by forward dataflow analysis to51* determine which blocks require helper invocations. A block requires52* invocations in if any of its instructions use helper invocations, or if it53* depends on a block that requires invocation. With that analysis, the54* terminate bit is set on the last instruction using invocations within any55* block that does *not* require invocations out.56*57* Likewise, calculating the execute bit requires backward dataflow analysis58* with union as the join operation and the generating set being the union of59* sources of instructions writing executed values. The skip bit is the inverse60* of the execute bit.61*/6263static bool64bi_has_skip_bit(enum bi_opcode op)65{66switch (op) {67case BI_OPCODE_TEXC:68case BI_OPCODE_TEXS_2D_F16:69case BI_OPCODE_TEXS_2D_F32:70case BI_OPCODE_TEXS_CUBE_F16:71case BI_OPCODE_TEXS_CUBE_F32:72case BI_OPCODE_VAR_TEX_F16:73case BI_OPCODE_VAR_TEX_F32:74return true;75default:76return false;77}78}7980/* Does a given instruction require helper threads to be active (because it81* reads from other subgroup lanes)? This only applies to fragment shaders.82* Other shader stages do not have a notion of helper threads. */8384static bool85bi_instr_uses_helpers(bi_instr *I)86{87switch (I->op) {88case BI_OPCODE_TEXC:89case BI_OPCODE_TEXS_2D_F16:90case BI_OPCODE_TEXS_2D_F32:91case BI_OPCODE_TEXS_CUBE_F16:92case BI_OPCODE_TEXS_CUBE_F32:93case BI_OPCODE_VAR_TEX_F16:94case BI_OPCODE_VAR_TEX_F32:95return !I->lod_mode; /* set for zero, clear for computed */96case BI_OPCODE_CLPER_V6_I32:97case BI_OPCODE_CLPER_V7_I32:98/* Fragment shaders require helpers to implement derivatives.99* Other shader stages don't have helpers at all */100return true;101default:102return false;103}104}105106/* Does a block use helpers directly */107static bool108bi_block_uses_helpers(bi_block *block)109{110bi_foreach_instr_in_block(block, I) {111if (bi_instr_uses_helpers(I))112return true;113}114115return false;116}117118static bool119bi_block_terminates_helpers(bi_block *block)120{121/* Can't terminate if a successor needs helpers */122pan_foreach_successor((&block->base), succ) {123if (((bi_block *) succ)->pass_flags & 1)124return false;125}126127/* Otherwise we terminate */128return true;129}130131void132bi_analyze_helper_terminate(bi_context *ctx)133{134/* Other shader stages do not have a notion of helper threads, so we135* can skip the analysis */136if (ctx->stage != MESA_SHADER_FRAGMENT)137return;138139/* Set blocks as directly requiring helpers, and if they do add them to140* the worklist to propagate to their predecessors */141142struct set *worklist = _mesa_set_create(NULL,143_mesa_hash_pointer,144_mesa_key_pointer_equal);145146struct set *visited = _mesa_set_create(NULL,147_mesa_hash_pointer,148_mesa_key_pointer_equal);149150bi_foreach_block(ctx, _block) {151bi_block *block = (bi_block *) _block;152block->pass_flags = bi_block_uses_helpers(block) ? 1 : 0;153154if (block->pass_flags & 1)155_mesa_set_add(worklist, _block);156}157158/* Next, propagate back. Since there are a finite number of blocks, the159* worklist (a subset of all the blocks) is finite. Since a block can160* only be added to the worklist if it is not on the visited list and161* the visited list - also a subset of the blocks - grows every162* iteration, the algorithm must terminate. */163164struct set_entry *cur;165166while((cur = _mesa_set_next_entry(worklist, NULL)) != NULL) {167/* Pop off a block requiring helpers */168pan_block *blk = (struct pan_block *) cur->key;169_mesa_set_remove(worklist, cur);170171/* Its predecessors also require helpers */172pan_foreach_predecessor(blk, pred) {173if (!_mesa_set_search(visited, pred)) {174((bi_block *) pred)->pass_flags |= 1;175_mesa_set_add(worklist, pred);176}177}178179_mesa_set_add(visited, blk);180}181182_mesa_set_destroy(visited, NULL);183_mesa_set_destroy(worklist, NULL);184185/* Finally, mark clauses requiring helpers */186bi_foreach_block(ctx, _block) {187bi_block *block = (bi_block *) _block;188189/* At the end, there are helpers iff we don't terminate */190bool helpers = !bi_block_terminates_helpers(block);191192bi_foreach_clause_in_block_rev(block, clause) {193bi_foreach_instr_in_clause_rev(block, clause, I) {194helpers |= bi_instr_uses_helpers(I);195}196197clause->td = !helpers;198}199}200}201202static bool203bi_helper_block_update(BITSET_WORD *deps, bi_block *block)204{205bool progress = false;206207bi_foreach_instr_in_block_rev(block, I) {208/* If our destination is required by helper invocation... */209if (I->dest[0].type != BI_INDEX_NORMAL)210continue;211212if (!BITSET_TEST(deps, bi_get_node(I->dest[0])))213continue;214215/* ...so are our sources */216bi_foreach_src(I, s) {217if (I->src[s].type == BI_INDEX_NORMAL) {218unsigned node = bi_get_node(I->src[s]);219progress |= !BITSET_TEST(deps, node);220BITSET_SET(deps, node);221}222}223}224225return progress;226}227228void229bi_analyze_helper_requirements(bi_context *ctx)230{231unsigned temp_count = bi_max_temp(ctx);232BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), BITSET_WORDS(temp_count));233234/* Initialize with the sources of instructions consuming235* derivatives */236237bi_foreach_instr_global(ctx, I) {238if (I->dest[0].type != BI_INDEX_NORMAL) continue;239if (!bi_instr_uses_helpers(I)) continue;240241bi_foreach_src(I, s) {242if (I->src[s].type == BI_INDEX_NORMAL)243BITSET_SET(deps, bi_get_node(I->src[s]));244}245}246247/* Propagate that up */248249struct set *work_list = _mesa_set_create(NULL,250_mesa_hash_pointer,251_mesa_key_pointer_equal);252253struct set *visited = _mesa_set_create(NULL,254_mesa_hash_pointer,255_mesa_key_pointer_equal);256257struct set_entry *cur = _mesa_set_add(work_list, pan_exit_block(&ctx->blocks));258259do {260pan_block *blk = (struct pan_block *) cur->key;261_mesa_set_remove(work_list, cur);262263bool progress = bi_helper_block_update(deps, (bi_block *) blk);264265if (progress || !_mesa_set_search(visited, blk)) {266pan_foreach_predecessor(blk, pred)267_mesa_set_add(work_list, pred);268}269270_mesa_set_add(visited, blk);271} while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);272273_mesa_set_destroy(visited, NULL);274_mesa_set_destroy(work_list, NULL);275276/* Set the execute bits */277278bi_foreach_instr_global(ctx, I) {279if (!bi_has_skip_bit(I->op)) continue;280if (I->dest[0].type != BI_INDEX_NORMAL) continue;281282I->skip = !BITSET_TEST(deps, bi_get_node(I->dest[0]));283}284285free(deps);286}287288289