Path: blob/21.2-virgl/src/amd/llvm/ac_llvm_cull.c
7246 views
/*1* Copyright 2019 Advanced Micro Devices, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sub license, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR12* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,13* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL14* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,15* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR16* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE17* USE OR OTHER DEALINGS IN THE SOFTWARE.18*19* The above copyright notice and this permission notice (including the20* next paragraph) shall be included in all copies or substantial portions21* of the Software.22*23*/2425#include "ac_llvm_cull.h"2627#include <llvm-c/Core.h>2829struct ac_position_w_info {30/* If a primitive intersects the W=0 plane, it causes a reflection31* of the determinant used for face culling. Every vertex behind32* the W=0 plane negates the determinant, so having 2 vertices behind33* the plane has no effect. This is i1 true if the determinant should be34* negated.35*/36LLVMValueRef w_reflection;3738/* If we simplify the "-w <= p <= w" view culling equation, we get39* "-w <= w", which can't be satisfied when w is negative.40* In perspective projection, a negative W means that the primitive41* is behind the viewer, but the equation is independent of the type42* of projection.43*44* w_accepted is false when all W are negative and therefore45* the primitive is invisible.46*/47LLVMValueRef w_accepted;4849/* The bounding box culling doesn't work and should be skipped when this is true. */50LLVMValueRef any_w_negative;51};5253static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],54struct ac_position_w_info *w)55{56LLVMBuilderRef builder = ctx->builder;57LLVMValueRef all_w_negative = ctx->i1true;5859w->w_reflection = ctx->i1false;60w->any_w_negative = ctx->i1false;6162for (unsigned i = 0; i < 3; i++) {63LLVMValueRef neg_w;6465neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");66/* If neg_w is true, negate w_reflection. */67w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");68w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");69all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");70}71w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");72}7374/* Perform front/back face culling and return true if the primitive is accepted. */75static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],76struct ac_position_w_info *w, bool cull_front, bool cull_back,77bool cull_zero_area)78{79LLVMBuilderRef builder = ctx->builder;8081if (cull_front && cull_back)82return ctx->i1false;8384if (!cull_front && !cull_back && !cull_zero_area)85return ctx->i1true;8687/* Front/back face culling. Also if the determinant == 0, the triangle88* area is 0.89*/90LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");91LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");92LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");93LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");94LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");95LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");96LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");9798/* Negative W negates the determinant. */99det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, "");100101LLVMValueRef accepted = NULL;102if (cull_front) {103LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;104accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");105} else if (cull_back) {106LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;107accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");108} else if (cull_zero_area) {109accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");110}111return accepted;112}113114/* Perform view culling and small primitive elimination and return true115* if the primitive is accepted and initially_accepted == true. */116static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],117LLVMValueRef initially_accepted, struct ac_position_w_info *w,118LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],119LLVMValueRef small_prim_precision, bool cull_view_xy,120bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims,121bool use_halfz_clip_space, ac_cull_accept_func accept_func,122void *userdata)123{124LLVMBuilderRef builder = ctx->builder;125126if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims) {127if (accept_func)128accept_func(ctx, initially_accepted, userdata);129return;130}131132ac_build_ifcc(ctx, initially_accepted, 10000000);133{134LLVMValueRef bbox_min[3], bbox_max[3];135LLVMValueRef accepted = ctx->i1true;136137/* Compute the primitive bounding box for easy culling. */138for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) {139bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);140bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);141142bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);143bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);144}145146/* View culling. */147if (cull_view_xy || cull_view_near_z || cull_view_far_z) {148for (unsigned chan = 0; chan < 3; chan++) {149LLVMValueRef visible;150151if ((cull_view_xy && chan <= 1) || (cull_view_near_z && chan == 2)) {152float t = chan == 2 && use_halfz_clip_space ? 0 : -1;153visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],154LLVMConstReal(ctx->f32, t), "");155accepted = LLVMBuildAnd(builder, accepted, visible, "");156}157158if ((cull_view_xy && chan <= 1) || (cull_view_far_z && chan == 2)) {159visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, "");160accepted = LLVMBuildAnd(builder, accepted, visible, "");161}162}163}164165/* Small primitive elimination. */166if (cull_small_prims) {167/* Assuming a sample position at (0.5, 0.5), if we round168* the bounding box min/max extents and the results of169* the rounding are equal in either the X or Y direction,170* the bounding box does not intersect the sample.171*172* See these GDC slides for pictures:173* https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf174*/175LLVMValueRef min, max, not_equal[2], visible;176177for (unsigned chan = 0; chan < 2; chan++) {178/* Convert the position to screen-space coordinates. */179min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]);180max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]);181/* Scale the bounding box according to the precision of182* the rasterizer and the number of MSAA samples. */183min = LLVMBuildFSub(builder, min, small_prim_precision, "");184max = LLVMBuildFAdd(builder, max, small_prim_precision, "");185186/* Determine if the bbox intersects the sample point.187* It also works for MSAA, but vp_scale, vp_translate,188* and small_prim_precision are computed differently.189*/190min = ac_build_round(ctx, min);191max = ac_build_round(ctx, max);192not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");193}194visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");195accepted = LLVMBuildAnd(builder, accepted, visible, "");196}197198/* Disregard the bounding box culling if any W is negative because the code199* doesn't work with that.200*/201accepted = LLVMBuildOr(builder, accepted, w->any_w_negative, "");202203if (accept_func)204accept_func(ctx, accepted, userdata);205}206ac_build_endif(ctx, 10000000);207}208209/**210* Return i1 true if the primitive is accepted (not culled).211*212* \param pos Vertex positions 3x vec4213* \param initially_accepted AND'ed with the result. Some computations can be214* skipped if this is false.215* \param vp_scale Viewport scale XY.216* For MSAA, multiply them by the number of samples.217* \param vp_translate Viewport translation XY.218* For MSAA, multiply them by the number of samples.219* \param small_prim_precision Precision of small primitive culling. This should220* be the same as or greater than the precision of221* the rasterizer. Set to num_samples / 2^subpixel_bits.222* subpixel_bits are defined by the quantization mode.223* \param options See ac_cull_options.224* \param accept_func Callback invoked in the inner-most branch where the primitive is accepted.225*/226void ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],227LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],228LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,229struct ac_cull_options *options, ac_cull_accept_func accept_func,230void *userdata)231{232struct ac_position_w_info w;233ac_analyze_position_w(ctx, pos, &w);234235/* W culling. */236LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;237accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");238239/* Face culling. */240accepted = LLVMBuildAnd(241ctx->builder, accepted,242ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area),243"");244245/* View culling and small primitive elimination. */246cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision,247options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z,248options->cull_small_prims, options->use_halfz_clip_space, accept_func,249userdata);250}251252253