Path: blob/21.2-virgl/src/gallium/drivers/softpipe/sp_quad_depth_test.c
4570 views
/**************************************************************************1*2* Copyright 2007 VMware, Inc.3* Copyright 2010 VMware, Inc.4* All Rights Reserved.5*6* Permission is hereby granted, free of charge, to any person obtaining a7* copy of this software and associated documentation files (the8* "Software"), to deal in the Software without restriction, including9* without limitation the rights to use, copy, modify, merge, publish,10* distribute, sub license, and/or sell copies of the Software, and to11* permit persons to whom the Software is furnished to do so, subject to12* the following conditions:13*14* The above copyright notice and this permission notice (including the15* next paragraph) shall be included in all copies or substantial portions16* of the Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS19* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF20* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.21* IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR22* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,23* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE24* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.25*26**************************************************************************/2728/**29* \brief Quad depth / stencil testing30*/3132#include "pipe/p_defines.h"33#include "util/format/u_format.h"34#include "util/u_math.h"35#include "util/u_memory.h"36#include "tgsi/tgsi_scan.h"37#include "sp_context.h"38#include "sp_quad.h"39#include "sp_quad_pipe.h"40#include "sp_tile_cache.h"41#include "sp_state.h" /* for sp_fragment_shader */424344struct depth_data {45struct pipe_surface *ps;46enum pipe_format format;47unsigned bzzzz[TGSI_QUAD_SIZE]; /**< Z values fetched from depth buffer */48unsigned qzzzz[TGSI_QUAD_SIZE]; /**< Z values from the quad */49ubyte stencilVals[TGSI_QUAD_SIZE];50boolean use_shader_stencil_refs;51ubyte shader_stencil_refs[TGSI_QUAD_SIZE];52struct softpipe_cached_tile *tile;53float minval, maxval;54bool clamp;55};56575859static void60get_depth_stencil_values( struct depth_data *data,61const struct quad_header *quad )62{63unsigned j;64const struct softpipe_cached_tile *tile = data->tile;6566switch (data->format) {67case PIPE_FORMAT_Z16_UNORM:68for (j = 0; j < TGSI_QUAD_SIZE; j++) {69int x = quad->input.x0 % TILE_SIZE + (j & 1);70int y = quad->input.y0 % TILE_SIZE + (j >> 1);71data->bzzzz[j] = tile->data.depth16[y][x];72}73break;74case PIPE_FORMAT_Z32_UNORM:75for (j = 0; j < TGSI_QUAD_SIZE; j++) {76int x = quad->input.x0 % TILE_SIZE + (j & 1);77int y = quad->input.y0 % TILE_SIZE + (j >> 1);78data->bzzzz[j] = tile->data.depth32[y][x];79}80break;81case PIPE_FORMAT_Z24X8_UNORM:82case PIPE_FORMAT_Z24_UNORM_S8_UINT:83for (j = 0; j < TGSI_QUAD_SIZE; j++) {84int x = quad->input.x0 % TILE_SIZE + (j & 1);85int y = quad->input.y0 % TILE_SIZE + (j >> 1);86data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;87data->stencilVals[j] = tile->data.depth32[y][x] >> 24;88}89break;90case PIPE_FORMAT_X8Z24_UNORM:91case PIPE_FORMAT_S8_UINT_Z24_UNORM:92for (j = 0; j < TGSI_QUAD_SIZE; j++) {93int x = quad->input.x0 % TILE_SIZE + (j & 1);94int y = quad->input.y0 % TILE_SIZE + (j >> 1);95data->bzzzz[j] = tile->data.depth32[y][x] >> 8;96data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;97}98break;99case PIPE_FORMAT_S8_UINT:100for (j = 0; j < TGSI_QUAD_SIZE; j++) {101int x = quad->input.x0 % TILE_SIZE + (j & 1);102int y = quad->input.y0 % TILE_SIZE + (j >> 1);103data->bzzzz[j] = 0;104data->stencilVals[j] = tile->data.stencil8[y][x];105}106break;107case PIPE_FORMAT_Z32_FLOAT:108for (j = 0; j < TGSI_QUAD_SIZE; j++) {109int x = quad->input.x0 % TILE_SIZE + (j & 1);110int y = quad->input.y0 % TILE_SIZE + (j >> 1);111data->bzzzz[j] = tile->data.depth32[y][x];112}113break;114case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:115for (j = 0; j < TGSI_QUAD_SIZE; j++) {116int x = quad->input.x0 % TILE_SIZE + (j & 1);117int y = quad->input.y0 % TILE_SIZE + (j >> 1);118data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;119data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;120}121break;122default:123assert(0);124}125}126127128/**129* If the shader has not been run, interpolate the depth values130* ourselves.131*/132static void133interpolate_quad_depth( struct quad_header *quad )134{135const float fx = (float) quad->input.x0;136const float fy = (float) quad->input.y0;137const float dzdx = quad->posCoef->dadx[2];138const float dzdy = quad->posCoef->dady[2];139const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;140141quad->output.depth[0] = z0;142quad->output.depth[1] = z0 + dzdx;143quad->output.depth[2] = z0 + dzdy;144quad->output.depth[3] = z0 + dzdx + dzdy;145}146147148/**149* Compute the depth_data::qzzzz[] values from the float fragment Z values.150*/151static void152convert_quad_depth( struct depth_data *data,153const struct quad_header *quad )154{155unsigned j;156float dvals[TGSI_QUAD_SIZE];157158/* Convert quad's float depth values to int depth values (qzzzz).159* If the Z buffer stores integer values, we _have_ to do the depth160* compares with integers (not floats). Otherwise, the float->int->float161* conversion of Z values (which isn't an identity function) will cause162* Z-fighting errors.163*/164if (data->clamp) {165for (j = 0; j < TGSI_QUAD_SIZE; j++) {166dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval);167}168} else {169for (j = 0; j < TGSI_QUAD_SIZE; j++) {170dvals[j] = quad->output.depth[j];171}172}173174switch (data->format) {175case PIPE_FORMAT_Z16_UNORM:176{177float scale = 65535.0;178179for (j = 0; j < TGSI_QUAD_SIZE; j++) {180data->qzzzz[j] = (unsigned) (dvals[j] * scale);181}182}183break;184case PIPE_FORMAT_Z32_UNORM:185{186double scale = (double) (uint) ~0UL;187188for (j = 0; j < TGSI_QUAD_SIZE; j++) {189data->qzzzz[j] = (unsigned) (dvals[j] * scale);190}191}192break;193case PIPE_FORMAT_Z24X8_UNORM:194case PIPE_FORMAT_Z24_UNORM_S8_UINT:195{196float scale = (float) ((1 << 24) - 1);197198for (j = 0; j < TGSI_QUAD_SIZE; j++) {199data->qzzzz[j] = (unsigned) (dvals[j] * scale);200}201}202break;203case PIPE_FORMAT_X8Z24_UNORM:204case PIPE_FORMAT_S8_UINT_Z24_UNORM:205{206float scale = (float) ((1 << 24) - 1);207208for (j = 0; j < TGSI_QUAD_SIZE; j++) {209data->qzzzz[j] = (unsigned) (dvals[j] * scale);210}211}212break;213case PIPE_FORMAT_Z32_FLOAT:214case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:215{216union fi fui;217218for (j = 0; j < TGSI_QUAD_SIZE; j++) {219fui.f = dvals[j];220data->qzzzz[j] = fui.ui;221}222}223break;224default:225assert(0);226}227}228229230/**231* Compute the depth_data::shader_stencil_refs[] values from the float232* fragment stencil values.233*/234static void235convert_quad_stencil( struct depth_data *data,236const struct quad_header *quad )237{238unsigned j;239240data->use_shader_stencil_refs = TRUE;241/* Copy quads stencil values242*/243switch (data->format) {244case PIPE_FORMAT_Z24X8_UNORM:245case PIPE_FORMAT_Z24_UNORM_S8_UINT:246case PIPE_FORMAT_X8Z24_UNORM:247case PIPE_FORMAT_S8_UINT_Z24_UNORM:248case PIPE_FORMAT_S8_UINT:249case PIPE_FORMAT_Z32_FLOAT:250case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:251for (j = 0; j < TGSI_QUAD_SIZE; j++) {252data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));253}254break;255default:256assert(0);257}258}259260261/**262* Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.263*/264static void265write_depth_stencil_values( struct depth_data *data,266struct quad_header *quad )267{268struct softpipe_cached_tile *tile = data->tile;269unsigned j;270271/* put updated Z values back into cached tile */272switch (data->format) {273case PIPE_FORMAT_Z16_UNORM:274for (j = 0; j < TGSI_QUAD_SIZE; j++) {275int x = quad->input.x0 % TILE_SIZE + (j & 1);276int y = quad->input.y0 % TILE_SIZE + (j >> 1);277tile->data.depth16[y][x] = (ushort) data->bzzzz[j];278}279break;280case PIPE_FORMAT_Z24X8_UNORM:281case PIPE_FORMAT_Z32_UNORM:282for (j = 0; j < TGSI_QUAD_SIZE; j++) {283int x = quad->input.x0 % TILE_SIZE + (j & 1);284int y = quad->input.y0 % TILE_SIZE + (j >> 1);285tile->data.depth32[y][x] = data->bzzzz[j];286}287break;288case PIPE_FORMAT_Z24_UNORM_S8_UINT:289for (j = 0; j < TGSI_QUAD_SIZE; j++) {290int x = quad->input.x0 % TILE_SIZE + (j & 1);291int y = quad->input.y0 % TILE_SIZE + (j >> 1);292tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];293}294break;295case PIPE_FORMAT_S8_UINT_Z24_UNORM:296for (j = 0; j < TGSI_QUAD_SIZE; j++) {297int x = quad->input.x0 % TILE_SIZE + (j & 1);298int y = quad->input.y0 % TILE_SIZE + (j >> 1);299tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];300}301break;302case PIPE_FORMAT_X8Z24_UNORM:303for (j = 0; j < TGSI_QUAD_SIZE; j++) {304int x = quad->input.x0 % TILE_SIZE + (j & 1);305int y = quad->input.y0 % TILE_SIZE + (j >> 1);306tile->data.depth32[y][x] = data->bzzzz[j] << 8;307}308break;309case PIPE_FORMAT_S8_UINT:310for (j = 0; j < TGSI_QUAD_SIZE; j++) {311int x = quad->input.x0 % TILE_SIZE + (j & 1);312int y = quad->input.y0 % TILE_SIZE + (j >> 1);313tile->data.stencil8[y][x] = data->stencilVals[j];314}315break;316case PIPE_FORMAT_Z32_FLOAT:317for (j = 0; j < TGSI_QUAD_SIZE; j++) {318int x = quad->input.x0 % TILE_SIZE + (j & 1);319int y = quad->input.y0 % TILE_SIZE + (j >> 1);320tile->data.depth32[y][x] = data->bzzzz[j];321}322break;323case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:324for (j = 0; j < TGSI_QUAD_SIZE; j++) {325int x = quad->input.x0 % TILE_SIZE + (j & 1);326int y = quad->input.y0 % TILE_SIZE + (j >> 1);327tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);328}329break;330default:331assert(0);332}333}334335336337/** Only 8-bit stencil supported */338#define STENCIL_MAX 0xff339340341/**342* Do the basic stencil test (compare stencil buffer values against the343* reference value.344*345* \param data->stencilVals the stencil values from the stencil buffer346* \param func the stencil func (PIPE_FUNC_x)347* \param ref the stencil reference value348* \param valMask the stencil value mask indicating which bits of the stencil349* values and ref value are to be used.350* \return mask indicating which pixels passed the stencil test351*/352static unsigned353do_stencil_test(struct depth_data *data,354unsigned func,355unsigned ref, unsigned valMask)356{357unsigned passMask = 0x0;358unsigned j;359ubyte refs[TGSI_QUAD_SIZE];360361for (j = 0; j < TGSI_QUAD_SIZE; j++) {362if (data->use_shader_stencil_refs)363refs[j] = data->shader_stencil_refs[j] & valMask;364else365refs[j] = ref & valMask;366}367368switch (func) {369case PIPE_FUNC_NEVER:370/* passMask = 0x0 */371break;372case PIPE_FUNC_LESS:373for (j = 0; j < TGSI_QUAD_SIZE; j++) {374if (refs[j] < (data->stencilVals[j] & valMask)) {375passMask |= (1 << j);376}377}378break;379case PIPE_FUNC_EQUAL:380for (j = 0; j < TGSI_QUAD_SIZE; j++) {381if (refs[j] == (data->stencilVals[j] & valMask)) {382passMask |= (1 << j);383}384}385break;386case PIPE_FUNC_LEQUAL:387for (j = 0; j < TGSI_QUAD_SIZE; j++) {388if (refs[j] <= (data->stencilVals[j] & valMask)) {389passMask |= (1 << j);390}391}392break;393case PIPE_FUNC_GREATER:394for (j = 0; j < TGSI_QUAD_SIZE; j++) {395if (refs[j] > (data->stencilVals[j] & valMask)) {396passMask |= (1 << j);397}398}399break;400case PIPE_FUNC_NOTEQUAL:401for (j = 0; j < TGSI_QUAD_SIZE; j++) {402if (refs[j] != (data->stencilVals[j] & valMask)) {403passMask |= (1 << j);404}405}406break;407case PIPE_FUNC_GEQUAL:408for (j = 0; j < TGSI_QUAD_SIZE; j++) {409if (refs[j] >= (data->stencilVals[j] & valMask)) {410passMask |= (1 << j);411}412}413break;414case PIPE_FUNC_ALWAYS:415passMask = MASK_ALL;416break;417default:418assert(0);419}420421return passMask;422}423424425/**426* Apply the stencil operator to stencil values.427*428* \param data->stencilVals the stencil buffer values (read and written)429* \param mask indicates which pixels to update430* \param op the stencil operator (PIPE_STENCIL_OP_x)431* \param ref the stencil reference value432* \param wrtMask writemask controlling which bits are changed in the433* stencil values434*/435static void436apply_stencil_op(struct depth_data *data,437unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)438{439unsigned j;440ubyte newstencil[TGSI_QUAD_SIZE];441ubyte refs[TGSI_QUAD_SIZE];442443for (j = 0; j < TGSI_QUAD_SIZE; j++) {444newstencil[j] = data->stencilVals[j];445if (data->use_shader_stencil_refs)446refs[j] = data->shader_stencil_refs[j];447else448refs[j] = ref;449}450451switch (op) {452case PIPE_STENCIL_OP_KEEP:453/* no-op */454break;455case PIPE_STENCIL_OP_ZERO:456for (j = 0; j < TGSI_QUAD_SIZE; j++) {457if (mask & (1 << j)) {458newstencil[j] = 0;459}460}461break;462case PIPE_STENCIL_OP_REPLACE:463for (j = 0; j < TGSI_QUAD_SIZE; j++) {464if (mask & (1 << j)) {465newstencil[j] = refs[j];466}467}468break;469case PIPE_STENCIL_OP_INCR:470for (j = 0; j < TGSI_QUAD_SIZE; j++) {471if (mask & (1 << j)) {472if (data->stencilVals[j] < STENCIL_MAX) {473newstencil[j] = data->stencilVals[j] + 1;474}475}476}477break;478case PIPE_STENCIL_OP_DECR:479for (j = 0; j < TGSI_QUAD_SIZE; j++) {480if (mask & (1 << j)) {481if (data->stencilVals[j] > 0) {482newstencil[j] = data->stencilVals[j] - 1;483}484}485}486break;487case PIPE_STENCIL_OP_INCR_WRAP:488for (j = 0; j < TGSI_QUAD_SIZE; j++) {489if (mask & (1 << j)) {490newstencil[j] = data->stencilVals[j] + 1;491}492}493break;494case PIPE_STENCIL_OP_DECR_WRAP:495for (j = 0; j < TGSI_QUAD_SIZE; j++) {496if (mask & (1 << j)) {497newstencil[j] = data->stencilVals[j] - 1;498}499}500break;501case PIPE_STENCIL_OP_INVERT:502for (j = 0; j < TGSI_QUAD_SIZE; j++) {503if (mask & (1 << j)) {504newstencil[j] = ~data->stencilVals[j];505}506}507break;508default:509assert(0);510}511512/*513* update the stencil values514*/515if (wrtMask != STENCIL_MAX) {516/* apply bit-wise stencil buffer writemask */517for (j = 0; j < TGSI_QUAD_SIZE; j++) {518data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);519}520}521else {522for (j = 0; j < TGSI_QUAD_SIZE; j++) {523data->stencilVals[j] = newstencil[j];524}525}526}527528529530/**531* To increase efficiency, we should probably have multiple versions532* of this function that are specifically for Z16, Z32 and FP Z buffers.533* Try to effectively do that with codegen...534*/535static boolean536depth_test_quad(struct quad_stage *qs,537struct depth_data *data,538struct quad_header *quad)539{540struct softpipe_context *softpipe = qs->softpipe;541unsigned zmask = 0;542unsigned j;543544#define DEPTHTEST(l, op, r) do { \545if (data->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || \546data->format == PIPE_FORMAT_Z32_FLOAT) { \547for (j = 0; j < TGSI_QUAD_SIZE; j++) { \548if (((float *)l)[j] op ((float *)r)[j]) \549zmask |= (1 << j); \550} \551} else { \552for (j = 0; j < TGSI_QUAD_SIZE; j++) { \553if (l[j] op r[j]) \554zmask |= (1 << j); \555} \556} \557} while (0)558559switch (softpipe->depth_stencil->depth_func) {560case PIPE_FUNC_NEVER:561/* zmask = 0 */562break;563case PIPE_FUNC_LESS:564/* Note this is pretty much a single sse or cell instruction.565* Like this: quad->mask &= (quad->outputs.depth < zzzz);566*/567DEPTHTEST(data->qzzzz, <, data->bzzzz);568break;569case PIPE_FUNC_EQUAL:570DEPTHTEST(data->qzzzz, ==, data->bzzzz);571break;572case PIPE_FUNC_LEQUAL:573DEPTHTEST(data->qzzzz, <=, data->bzzzz);574break;575case PIPE_FUNC_GREATER:576DEPTHTEST(data->qzzzz, >, data->bzzzz);577break;578case PIPE_FUNC_NOTEQUAL:579DEPTHTEST(data->qzzzz, !=, data->bzzzz);580break;581case PIPE_FUNC_GEQUAL:582DEPTHTEST(data->qzzzz, >=, data->bzzzz);583break;584case PIPE_FUNC_ALWAYS:585zmask = MASK_ALL;586break;587default:588assert(0);589}590591quad->inout.mask &= zmask;592if (quad->inout.mask == 0)593return FALSE;594595/* Update our internal copy only if writemask set. Even if596* depth.writemask is FALSE, may still need to write out buffer597* data due to stencil changes.598*/599if (softpipe->depth_stencil->depth_writemask) {600for (j = 0; j < TGSI_QUAD_SIZE; j++) {601if (quad->inout.mask & (1 << j)) {602data->bzzzz[j] = data->qzzzz[j];603}604}605}606607return TRUE;608}609610611612/**613* Do stencil (and depth) testing. Stenciling depends on the outcome of614* depth testing.615*/616static void617depth_stencil_test_quad(struct quad_stage *qs,618struct depth_data *data,619struct quad_header *quad)620{621struct softpipe_context *softpipe = qs->softpipe;622unsigned func, zFailOp, zPassOp, failOp;623ubyte ref, wrtMask, valMask;624uint face = quad->input.facing;625626if (!softpipe->depth_stencil->stencil[1].enabled) {627/* single-sided stencil test, use front (face=0) state */628face = 0;629}630631/* 0 = front-face, 1 = back-face */632assert(face == 0 || face == 1);633634/* choose front or back face function, operator, etc */635/* XXX we could do these initializations once per primitive */636func = softpipe->depth_stencil->stencil[face].func;637failOp = softpipe->depth_stencil->stencil[face].fail_op;638zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;639zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;640ref = softpipe->stencil_ref.ref_value[face];641wrtMask = softpipe->depth_stencil->stencil[face].writemask;642valMask = softpipe->depth_stencil->stencil[face].valuemask;643644/* do the stencil test first */645{646unsigned passMask, failMask;647passMask = do_stencil_test(data, func, ref, valMask);648failMask = quad->inout.mask & ~passMask;649quad->inout.mask &= passMask;650651if (failOp != PIPE_STENCIL_OP_KEEP) {652apply_stencil_op(data, failMask, failOp, ref, wrtMask);653}654}655656if (quad->inout.mask) {657/* now the pixels that passed the stencil test are depth tested */658if (softpipe->depth_stencil->depth_enabled) {659const unsigned origMask = quad->inout.mask;660661depth_test_quad(qs, data, quad); /* quad->mask is updated */662663/* update stencil buffer values according to z pass/fail result */664if (zFailOp != PIPE_STENCIL_OP_KEEP) {665const unsigned zFailMask = origMask & ~quad->inout.mask;666apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);667}668669if (zPassOp != PIPE_STENCIL_OP_KEEP) {670const unsigned zPassMask = origMask & quad->inout.mask;671apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);672}673}674else {675/* no depth test, apply Zpass operator to stencil buffer values */676apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);677}678}679}680681682#define ALPHATEST( FUNC, COMP ) \683static unsigned \684alpha_test_quads_##FUNC( struct quad_stage *qs, \685struct quad_header *quads[], \686unsigned nr ) \687{ \688const float ref = qs->softpipe->depth_stencil->alpha_ref_value; \689const uint cbuf = 0; /* only output[0].alpha is tested */ \690unsigned pass_nr = 0; \691unsigned i; \692\693for (i = 0; i < nr; i++) { \694const float *aaaa = quads[i]->output.color[cbuf][3]; \695unsigned passMask = 0; \696\697if (aaaa[0] COMP ref) passMask |= (1 << 0); \698if (aaaa[1] COMP ref) passMask |= (1 << 1); \699if (aaaa[2] COMP ref) passMask |= (1 << 2); \700if (aaaa[3] COMP ref) passMask |= (1 << 3); \701\702quads[i]->inout.mask &= passMask; \703\704if (quads[i]->inout.mask) \705quads[pass_nr++] = quads[i]; \706} \707\708return pass_nr; \709}710711712ALPHATEST( LESS, < )713ALPHATEST( EQUAL, == )714ALPHATEST( LEQUAL, <= )715ALPHATEST( GREATER, > )716ALPHATEST( NOTEQUAL, != )717ALPHATEST( GEQUAL, >= )718719720/* XXX: Incorporate into shader using KILL_IF.721*/722static unsigned723alpha_test_quads(struct quad_stage *qs,724struct quad_header *quads[],725unsigned nr)726{727switch (qs->softpipe->depth_stencil->alpha_func) {728case PIPE_FUNC_LESS:729return alpha_test_quads_LESS( qs, quads, nr );730case PIPE_FUNC_EQUAL:731return alpha_test_quads_EQUAL( qs, quads, nr );732case PIPE_FUNC_LEQUAL:733return alpha_test_quads_LEQUAL( qs, quads, nr );734case PIPE_FUNC_GREATER:735return alpha_test_quads_GREATER( qs, quads, nr );736case PIPE_FUNC_NOTEQUAL:737return alpha_test_quads_NOTEQUAL( qs, quads, nr );738case PIPE_FUNC_GEQUAL:739return alpha_test_quads_GEQUAL( qs, quads, nr );740case PIPE_FUNC_ALWAYS:741return nr;742case PIPE_FUNC_NEVER:743default:744return 0;745}746}747748749/**750* EXT_depth_bounds_test has some careful language about precision:751*752* At what precision is the depth bounds test carried out?753*754* RESOLUTION: For the purposes of the test, the bounds are converted755* to fixed-point as though they were to be written to the depth buffer,756* and the comparison uses those quantized bounds.757*758* We choose the obvious interpretation that Z32F needs no such conversion.759*/760static unsigned761depth_bounds_test_quads(struct quad_stage *qs,762struct quad_header *quads[],763unsigned nr,764struct depth_data *data)765{766struct pipe_depth_stencil_alpha_state *dsa = qs->softpipe->depth_stencil;767unsigned i = 0, pass_nr = 0;768enum pipe_format format = util_format_get_depth_only(data->format);769double min = dsa->depth_bounds_min;770double max = dsa->depth_bounds_max;771772for (i = 0; i < nr; i++) {773unsigned j = 0, passMask = 0;774775get_depth_stencil_values(data, quads[i]);776777if (format == PIPE_FORMAT_Z32_FLOAT) {778for (j = 0; j < TGSI_QUAD_SIZE; j++) {779double z = uif(data->bzzzz[j]);780781if (z >= min && z <= max)782passMask |= (1 << j);783}784} else {785unsigned imin, imax;786787if (format == PIPE_FORMAT_Z16_UNORM) {788imin = ((unsigned) (min * 65535.0)) & 0xffff;789imax = ((unsigned) (max * 65535.0)) & 0xffff;790} else if (format == PIPE_FORMAT_Z32_UNORM) {791imin = (unsigned) (min * 4294967295.0);792imax = (unsigned) (max * 4294967295.0);793} else if (format == PIPE_FORMAT_Z24X8_UNORM ||794format == PIPE_FORMAT_X8Z24_UNORM) {795imin = ((unsigned) (min * 16777215.0)) & 0xffffff;796imax = ((unsigned) (max * 16777215.0)) & 0xffffff;797} else {798unreachable("Unknown depth buffer format");799}800801for (j = 0; j < TGSI_QUAD_SIZE; j++) {802unsigned iz = data->bzzzz[j];803804if (iz >= imin && iz <= imax)805passMask |= (1 << j);806}807}808809quads[i]->inout.mask &= passMask;810811if (quads[i]->inout.mask)812quads[pass_nr++] = quads[i];813}814815return pass_nr;816}817818819static unsigned mask_count[16] =820{8210, /* 0x0 */8221, /* 0x1 */8231, /* 0x2 */8242, /* 0x3 */8251, /* 0x4 */8262, /* 0x5 */8272, /* 0x6 */8283, /* 0x7 */8291, /* 0x8 */8302, /* 0x9 */8312, /* 0xa */8323, /* 0xb */8332, /* 0xc */8343, /* 0xd */8353, /* 0xe */8364, /* 0xf */837};838839840841/**842* General depth/stencil test function. Used when there's no fast-path.843*/844static void845depth_test_quads_fallback(struct quad_stage *qs,846struct quad_header *quads[],847unsigned nr)848{849unsigned i, pass = 0;850const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;851boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;852boolean shader_stencil_ref = fsInfo->writes_stencil;853boolean have_zs = !!qs->softpipe->framebuffer.zsbuf;854struct depth_data data;855unsigned vp_idx = quads[0]->input.viewport_index;856857data.use_shader_stencil_refs = FALSE;858859if (have_zs && (qs->softpipe->depth_stencil->depth_enabled ||860qs->softpipe->depth_stencil->stencil[0].enabled ||861qs->softpipe->depth_stencil->depth_bounds_test)) {862float near_val, far_val;863864data.ps = qs->softpipe->framebuffer.zsbuf;865data.format = data.ps->format;866data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,867quads[0]->input.x0,868quads[0]->input.y0, quads[0]->input.layer);869data.clamp = !qs->softpipe->rasterizer->depth_clip_near;870871near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2];872far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0);873data.minval = MIN2(near_val, far_val);874data.maxval = MAX2(near_val, far_val);875}876877/* EXT_depth_bounds_test says:878*879* Where should the depth bounds test take place in the OpenGL fragment880* processing pipeline?881*882* RESOLUTION: After scissor test, before alpha test. In practice,883* this is a logical placement of the test. An implementation is884* free to perform the test in a manner that is consistent with the885* specified ordering.886*/887888if (have_zs && qs->softpipe->depth_stencil->depth_bounds_test) {889nr = depth_bounds_test_quads(qs, quads, nr, &data);890}891892if (qs->softpipe->depth_stencil->alpha_enabled) {893nr = alpha_test_quads(qs, quads, nr);894}895896if (have_zs && (qs->softpipe->depth_stencil->depth_enabled ||897qs->softpipe->depth_stencil->stencil[0].enabled)) {898for (i = 0; i < nr; i++) {899get_depth_stencil_values(&data, quads[i]);900901if (qs->softpipe->depth_stencil->depth_enabled) {902if (interp_depth)903interpolate_quad_depth(quads[i]);904905convert_quad_depth(&data, quads[i]);906}907908if (qs->softpipe->depth_stencil->stencil[0].enabled) {909if (shader_stencil_ref)910convert_quad_stencil(&data, quads[i]);911912depth_stencil_test_quad(qs, &data, quads[i]);913write_depth_stencil_values(&data, quads[i]);914}915else {916if (!depth_test_quad(qs, &data, quads[i]))917continue;918919if (qs->softpipe->depth_stencil->depth_writemask)920write_depth_stencil_values(&data, quads[i]);921}922923quads[pass++] = quads[i];924}925926nr = pass;927}928929if (qs->softpipe->active_query_count) {930for (i = 0; i < nr; i++)931qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];932}933934if (nr)935qs->next->run(qs->next, quads, nr);936}937938939/**940* Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.941*/942943#define NAME depth_interp_z16_less_write944#define OPERATOR <945#include "sp_quad_depth_test_tmp.h"946947#define NAME depth_interp_z16_equal_write948#define OPERATOR ==949#include "sp_quad_depth_test_tmp.h"950951#define NAME depth_interp_z16_lequal_write952#define OPERATOR <=953#include "sp_quad_depth_test_tmp.h"954955#define NAME depth_interp_z16_greater_write956#define OPERATOR >957#include "sp_quad_depth_test_tmp.h"958959#define NAME depth_interp_z16_notequal_write960#define OPERATOR !=961#include "sp_quad_depth_test_tmp.h"962963#define NAME depth_interp_z16_gequal_write964#define OPERATOR >=965#include "sp_quad_depth_test_tmp.h"966967#define NAME depth_interp_z16_always_write968#define ALWAYS 1969#include "sp_quad_depth_test_tmp.h"970971972973static void974depth_noop(struct quad_stage *qs,975struct quad_header *quads[],976unsigned nr)977{978qs->next->run(qs->next, quads, nr);979}980981982983static void984choose_depth_test(struct quad_stage *qs,985struct quad_header *quads[],986unsigned nr)987{988const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;989990boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;991992boolean alpha = qs->softpipe->depth_stencil->alpha_enabled;993994boolean depth = qs->softpipe->depth_stencil->depth_enabled;995996unsigned depthfunc = qs->softpipe->depth_stencil->depth_func;997998boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;9991000boolean depthwrite = qs->softpipe->depth_stencil->depth_writemask;10011002boolean occlusion = qs->softpipe->active_query_count;10031004boolean clipped = !qs->softpipe->rasterizer->depth_clip_near;10051006boolean depth_bounds = qs->softpipe->depth_stencil->depth_bounds_test;10071008if(!qs->softpipe->framebuffer.zsbuf)1009depth = depthwrite = stencil = FALSE;10101011/* default */1012qs->run = depth_test_quads_fallback;10131014/* look for special cases */1015if (!alpha &&1016!depth &&1017!occlusion &&1018!clipped &&1019!stencil &&1020!depth_bounds) {1021qs->run = depth_noop;1022}1023else if (!alpha &&1024interp_depth &&1025depth &&1026depthwrite &&1027!occlusion &&1028!clipped &&1029!stencil &&1030!depth_bounds)1031{1032if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {1033switch (depthfunc) {1034case PIPE_FUNC_NEVER:1035qs->run = depth_test_quads_fallback;1036break;1037case PIPE_FUNC_LESS:1038qs->run = depth_interp_z16_less_write;1039break;1040case PIPE_FUNC_EQUAL:1041qs->run = depth_interp_z16_equal_write;1042break;1043case PIPE_FUNC_LEQUAL:1044qs->run = depth_interp_z16_lequal_write;1045break;1046case PIPE_FUNC_GREATER:1047qs->run = depth_interp_z16_greater_write;1048break;1049case PIPE_FUNC_NOTEQUAL:1050qs->run = depth_interp_z16_notequal_write;1051break;1052case PIPE_FUNC_GEQUAL:1053qs->run = depth_interp_z16_gequal_write;1054break;1055case PIPE_FUNC_ALWAYS:1056qs->run = depth_interp_z16_always_write;1057break;1058default:1059qs->run = depth_test_quads_fallback;1060break;1061}1062}1063}10641065/* next quad/fragment stage */1066qs->run( qs, quads, nr );1067}1068106910701071static void1072depth_test_begin(struct quad_stage *qs)1073{1074qs->run = choose_depth_test;1075qs->next->begin(qs->next);1076}107710781079static void1080depth_test_destroy(struct quad_stage *qs)1081{1082FREE( qs );1083}108410851086struct quad_stage *1087sp_quad_depth_test_stage(struct softpipe_context *softpipe)1088{1089struct quad_stage *stage = CALLOC_STRUCT(quad_stage);10901091stage->softpipe = softpipe;1092stage->begin = depth_test_begin;1093stage->run = choose_depth_test;1094stage->destroy = depth_test_destroy;10951096return stage;1097}109810991100