Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/llvmpipe/lp_bld_depth.c
4570 views
1
/**************************************************************************
2
*
3
* Copyright 2009-2010 VMware, Inc.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial portions
16
* of the Software.
17
*
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
*
26
**************************************************************************/
27
28
/**
29
* @file
30
* Depth/stencil testing to LLVM IR translation.
31
*
32
* To be done accurately/efficiently the depth/stencil test must be done with
33
* the same type/format of the depth/stencil buffer, which implies massaging
34
* the incoming depths to fit into place. Using a more straightforward
35
* type/format for depth/stencil values internally and only convert when
36
* flushing would avoid this, but it would most likely result in depth fighting
37
* artifacts.
38
*
39
* Since we're using linear layout for everything, but we need to deal with
40
* 2x2 quads, we need to load/store multiple values and swizzle them into
41
* place (we could avoid this by doing depth/stencil testing in linear format,
42
* which would be easy for late depth/stencil test as we could do that after
43
* the fragment shader loop just as we do for color buffers, but more tricky
44
* for early depth test as we'd need both masks and interpolated depth in
45
* linear format).
46
*
47
*
48
* @author Jose Fonseca <[email protected]>
49
* @author Brian Paul <[email protected]>
50
*/
51
52
#include "pipe/p_state.h"
53
#include "util/format/u_format.h"
54
#include "util/u_cpu_detect.h"
55
56
#include "gallivm/lp_bld_type.h"
57
#include "gallivm/lp_bld_arit.h"
58
#include "gallivm/lp_bld_bitarit.h"
59
#include "gallivm/lp_bld_const.h"
60
#include "gallivm/lp_bld_conv.h"
61
#include "gallivm/lp_bld_logic.h"
62
#include "gallivm/lp_bld_flow.h"
63
#include "gallivm/lp_bld_intr.h"
64
#include "gallivm/lp_bld_debug.h"
65
#include "gallivm/lp_bld_swizzle.h"
66
#include "gallivm/lp_bld_pack.h"
67
68
#include "lp_bld_depth.h"
69
#include "lp_state_fs.h"
70
71
72
/** Used to select fields from pipe_stencil_state */
73
enum stencil_op {
74
S_FAIL_OP,
75
Z_FAIL_OP,
76
Z_PASS_OP
77
};
78
79
80
81
/**
82
* Do the stencil test comparison (compare FB stencil values against ref value).
83
* This will be used twice when generating two-sided stencil code.
84
* \param stencil the front/back stencil state
85
* \param stencilRef the stencil reference value, replicated as a vector
86
* \param stencilVals vector of stencil values from framebuffer
87
* \return vector mask of pass/fail values (~0 or 0)
88
*/
89
static LLVMValueRef
90
lp_build_stencil_test_single(struct lp_build_context *bld,
91
const struct pipe_stencil_state *stencil,
92
LLVMValueRef stencilRef,
93
LLVMValueRef stencilVals)
94
{
95
LLVMBuilderRef builder = bld->gallivm->builder;
96
const unsigned stencilMax = 255; /* XXX fix */
97
struct lp_type type = bld->type;
98
LLVMValueRef res;
99
100
/*
101
* SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
102
* are between 0..255 so ensure we generate the fastest comparisons for
103
* wider elements.
104
*/
105
if (type.width <= 8) {
106
assert(!type.sign);
107
} else {
108
assert(type.sign);
109
}
110
111
assert(stencil->enabled);
112
113
if (stencil->valuemask != stencilMax) {
114
/* compute stencilRef = stencilRef & valuemask */
115
LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
116
stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
117
/* compute stencilVals = stencilVals & valuemask */
118
stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
119
}
120
121
res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
122
123
return res;
124
}
125
126
127
/**
128
* Do the one or two-sided stencil test comparison.
129
* \sa lp_build_stencil_test_single
130
* \param front_facing an integer vector mask, indicating front (~0) or back
131
* (0) facing polygon. If NULL, assume front-facing.
132
*/
133
static LLVMValueRef
134
lp_build_stencil_test(struct lp_build_context *bld,
135
const struct pipe_stencil_state stencil[2],
136
LLVMValueRef stencilRefs[2],
137
LLVMValueRef stencilVals,
138
LLVMValueRef front_facing)
139
{
140
LLVMValueRef res;
141
142
assert(stencil[0].enabled);
143
144
/* do front face test */
145
res = lp_build_stencil_test_single(bld, &stencil[0],
146
stencilRefs[0], stencilVals);
147
148
if (stencil[1].enabled && front_facing != NULL) {
149
/* do back face test */
150
LLVMValueRef back_res;
151
152
back_res = lp_build_stencil_test_single(bld, &stencil[1],
153
stencilRefs[1], stencilVals);
154
155
res = lp_build_select(bld, front_facing, res, back_res);
156
}
157
158
return res;
159
}
160
161
162
/**
163
* Apply the stencil operator (add/sub/keep/etc) to the given vector
164
* of stencil values.
165
* \return new stencil values vector
166
*/
167
static LLVMValueRef
168
lp_build_stencil_op_single(struct lp_build_context *bld,
169
const struct pipe_stencil_state *stencil,
170
enum stencil_op op,
171
LLVMValueRef stencilRef,
172
LLVMValueRef stencilVals)
173
174
{
175
LLVMBuilderRef builder = bld->gallivm->builder;
176
struct lp_type type = bld->type;
177
LLVMValueRef res;
178
LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
179
unsigned stencil_op;
180
181
assert(type.sign);
182
183
switch (op) {
184
case S_FAIL_OP:
185
stencil_op = stencil->fail_op;
186
break;
187
case Z_FAIL_OP:
188
stencil_op = stencil->zfail_op;
189
break;
190
case Z_PASS_OP:
191
stencil_op = stencil->zpass_op;
192
break;
193
default:
194
assert(0 && "Invalid stencil_op mode");
195
stencil_op = PIPE_STENCIL_OP_KEEP;
196
}
197
198
switch (stencil_op) {
199
case PIPE_STENCIL_OP_KEEP:
200
res = stencilVals;
201
/* we can return early for this case */
202
return res;
203
case PIPE_STENCIL_OP_ZERO:
204
res = bld->zero;
205
break;
206
case PIPE_STENCIL_OP_REPLACE:
207
res = stencilRef;
208
break;
209
case PIPE_STENCIL_OP_INCR:
210
res = lp_build_add(bld, stencilVals, bld->one);
211
res = lp_build_min(bld, res, max);
212
break;
213
case PIPE_STENCIL_OP_DECR:
214
res = lp_build_sub(bld, stencilVals, bld->one);
215
res = lp_build_max(bld, res, bld->zero);
216
break;
217
case PIPE_STENCIL_OP_INCR_WRAP:
218
res = lp_build_add(bld, stencilVals, bld->one);
219
res = LLVMBuildAnd(builder, res, max, "");
220
break;
221
case PIPE_STENCIL_OP_DECR_WRAP:
222
res = lp_build_sub(bld, stencilVals, bld->one);
223
res = LLVMBuildAnd(builder, res, max, "");
224
break;
225
case PIPE_STENCIL_OP_INVERT:
226
res = LLVMBuildNot(builder, stencilVals, "");
227
res = LLVMBuildAnd(builder, res, max, "");
228
break;
229
default:
230
assert(0 && "bad stencil op mode");
231
res = bld->undef;
232
}
233
234
return res;
235
}
236
237
238
/**
239
* Do the one or two-sided stencil test op/update.
240
*/
241
static LLVMValueRef
242
lp_build_stencil_op(struct lp_build_context *bld,
243
const struct pipe_stencil_state stencil[2],
244
enum stencil_op op,
245
LLVMValueRef stencilRefs[2],
246
LLVMValueRef stencilVals,
247
LLVMValueRef mask,
248
LLVMValueRef front_facing)
249
250
{
251
LLVMBuilderRef builder = bld->gallivm->builder;
252
LLVMValueRef res;
253
254
assert(stencil[0].enabled);
255
256
/* do front face op */
257
res = lp_build_stencil_op_single(bld, &stencil[0], op,
258
stencilRefs[0], stencilVals);
259
260
if (stencil[1].enabled && front_facing != NULL) {
261
/* do back face op */
262
LLVMValueRef back_res;
263
264
back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
265
stencilRefs[1], stencilVals);
266
267
res = lp_build_select(bld, front_facing, res, back_res);
268
}
269
270
if (stencil[0].writemask != 0xff ||
271
(stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) {
272
/* mask &= stencil[0].writemask */
273
LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
274
stencil[0].writemask);
275
if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) {
276
LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
277
stencil[1].writemask);
278
writemask = lp_build_select(bld, front_facing, writemask, back_writemask);
279
}
280
281
mask = LLVMBuildAnd(builder, mask, writemask, "");
282
/* res = (res & mask) | (stencilVals & ~mask) */
283
res = lp_build_select_bitwise(bld, mask, res, stencilVals);
284
}
285
else {
286
/* res = mask ? res : stencilVals */
287
res = lp_build_select(bld, mask, res, stencilVals);
288
}
289
290
return res;
291
}
292
293
294
295
/**
296
* Return a type that matches the depth/stencil format.
297
*/
298
struct lp_type
299
lp_depth_type(const struct util_format_description *format_desc,
300
unsigned length)
301
{
302
struct lp_type type;
303
unsigned z_swizzle;
304
305
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
306
assert(format_desc->block.width == 1);
307
assert(format_desc->block.height == 1);
308
309
memset(&type, 0, sizeof type);
310
type.width = format_desc->block.bits;
311
312
z_swizzle = format_desc->swizzle[0];
313
if (z_swizzle < 4) {
314
if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
315
type.floating = TRUE;
316
assert(z_swizzle == 0);
317
assert(format_desc->channel[z_swizzle].size == 32);
318
}
319
else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
320
assert(format_desc->block.bits <= 32);
321
assert(format_desc->channel[z_swizzle].normalized);
322
if (format_desc->channel[z_swizzle].size < format_desc->block.bits) {
323
/* Prefer signed integers when possible, as SSE has less support
324
* for unsigned comparison;
325
*/
326
type.sign = TRUE;
327
}
328
}
329
else
330
assert(0);
331
}
332
333
type.length = length;
334
335
return type;
336
}
337
338
339
/**
340
* Compute bitmask and bit shift to apply to the incoming fragment Z values
341
* and the Z buffer values needed before doing the Z comparison.
342
*
343
* Note that we leave the Z bits in the position that we find them
344
* in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us
345
* get by with fewer bit twiddling steps.
346
*/
347
static boolean
348
get_z_shift_and_mask(const struct util_format_description *format_desc,
349
unsigned *shift, unsigned *width, unsigned *mask)
350
{
351
unsigned total_bits;
352
unsigned z_swizzle;
353
354
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
355
assert(format_desc->block.width == 1);
356
assert(format_desc->block.height == 1);
357
358
/* 64bit d/s format is special already extracted 32 bits */
359
total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits;
360
361
z_swizzle = format_desc->swizzle[0];
362
363
if (z_swizzle == PIPE_SWIZZLE_NONE)
364
return FALSE;
365
366
*width = format_desc->channel[z_swizzle].size;
367
/* & 31 is for the same reason as the 32-bit limit above */
368
*shift = format_desc->channel[z_swizzle].shift & 31;
369
370
if (*width == total_bits) {
371
*mask = 0xffffffff;
372
} else {
373
*mask = ((1 << *width) - 1) << *shift;
374
}
375
376
return TRUE;
377
}
378
379
380
/**
381
* Compute bitmask and bit shift to apply to the framebuffer pixel values
382
* to put the stencil bits in the least significant position.
383
* (i.e. 0x000000ff)
384
*/
385
static boolean
386
get_s_shift_and_mask(const struct util_format_description *format_desc,
387
unsigned *shift, unsigned *mask)
388
{
389
unsigned s_swizzle;
390
unsigned sz;
391
392
s_swizzle = format_desc->swizzle[1];
393
394
if (s_swizzle == PIPE_SWIZZLE_NONE)
395
return FALSE;
396
397
/* just special case 64bit d/s format */
398
if (format_desc->block.bits > 32) {
399
/* XXX big-endian? */
400
assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
401
*shift = 0;
402
*mask = 0xff;
403
return TRUE;
404
}
405
406
*shift = format_desc->channel[s_swizzle].shift;
407
sz = format_desc->channel[s_swizzle].size;
408
*mask = (1U << sz) - 1U;
409
410
return TRUE;
411
}
412
413
414
/**
415
* Perform the occlusion test and increase the counter.
416
* Test the depth mask. Add the number of channel which has none zero mask
417
* into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
418
* The counter will add 4.
419
* TODO: could get that out of the fs loop.
420
*
421
* \param type holds element type of the mask vector.
422
* \param maskvalue is the depth test mask.
423
* \param counter is a pointer of the uint32 counter.
424
*/
425
void
426
lp_build_occlusion_count(struct gallivm_state *gallivm,
427
struct lp_type type,
428
LLVMValueRef maskvalue,
429
LLVMValueRef counter)
430
{
431
LLVMBuilderRef builder = gallivm->builder;
432
LLVMContextRef context = gallivm->context;
433
LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
434
LLVMValueRef count, newcount;
435
436
assert(type.length <= 16);
437
assert(type.floating);
438
439
if(util_get_cpu_caps()->has_sse && type.length == 4) {
440
const char *movmskintr = "llvm.x86.sse.movmsk.ps";
441
const char *popcntintr = "llvm.ctpop.i32";
442
LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
443
lp_build_vec_type(gallivm, type), "");
444
bits = lp_build_intrinsic_unary(builder, movmskintr,
445
LLVMInt32TypeInContext(context), bits);
446
count = lp_build_intrinsic_unary(builder, popcntintr,
447
LLVMInt32TypeInContext(context), bits);
448
count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
449
}
450
else if(util_get_cpu_caps()->has_avx && type.length == 8) {
451
const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
452
const char *popcntintr = "llvm.ctpop.i32";
453
LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
454
lp_build_vec_type(gallivm, type), "");
455
bits = lp_build_intrinsic_unary(builder, movmskintr,
456
LLVMInt32TypeInContext(context), bits);
457
count = lp_build_intrinsic_unary(builder, popcntintr,
458
LLVMInt32TypeInContext(context), bits);
459
count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
460
}
461
else {
462
unsigned i;
463
LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
464
LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
465
LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
466
LLVMValueRef shufflev, countd;
467
LLVMValueRef shuffles[16];
468
const char *popcntintr = NULL;
469
470
countv = LLVMBuildBitCast(builder, countv, i8vntype, "");
471
472
for (i = 0; i < type.length; i++) {
473
#if UTIL_ARCH_LITTLE_ENDIAN
474
shuffles[i] = lp_build_const_int32(gallivm, 4*i);
475
#else
476
shuffles[i] = lp_build_const_int32(gallivm, (4*i) + 3);
477
#endif
478
}
479
480
shufflev = LLVMConstVector(shuffles, type.length);
481
countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
482
countd = LLVMBuildBitCast(builder, countd, counttype, "countd");
483
484
/*
485
* XXX FIXME
486
* this is bad on cpus without popcount (on x86 supported by intel
487
* nehalem, amd barcelona, and up - not tied to sse42).
488
* Would be much faster to just sum the 4 elements of the vector with
489
* some horizontal add (shuffle/add/shuffle/add after the initial and).
490
*/
491
switch (type.length) {
492
case 4:
493
popcntintr = "llvm.ctpop.i32";
494
break;
495
case 8:
496
popcntintr = "llvm.ctpop.i64";
497
break;
498
case 16:
499
popcntintr = "llvm.ctpop.i128";
500
break;
501
default:
502
assert(0);
503
}
504
count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
505
506
if (type.length > 8) {
507
count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), "");
508
}
509
else if (type.length < 8) {
510
count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
511
}
512
}
513
newcount = LLVMBuildLoad(builder, counter, "origcount");
514
newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
515
LLVMBuildStore(builder, newcount, counter);
516
}
517
518
519
/**
520
* Load depth/stencil values.
521
* The stored values are linear, swizzle them.
522
*
523
* \param type the data type of the fragment depth/stencil values
524
* \param format_desc description of the depth/stencil surface
525
* \param is_1d whether this resource has only one dimension
526
* \param loop_counter the current loop iteration
527
* \param depth_ptr pointer to the depth/stencil values of this 4x4 block
528
* \param depth_stride stride of the depth/stencil buffer
529
* \param z_fb contains z values loaded from fb (may include padding)
530
* \param s_fb contains s values loaded from fb (may include padding)
531
*/
532
void
533
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
534
struct lp_type z_src_type,
535
const struct util_format_description *format_desc,
536
boolean is_1d,
537
LLVMValueRef depth_ptr,
538
LLVMValueRef depth_stride,
539
LLVMValueRef *z_fb,
540
LLVMValueRef *s_fb,
541
LLVMValueRef loop_counter)
542
{
543
LLVMBuilderRef builder = gallivm->builder;
544
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
545
LLVMValueRef zs_dst1, zs_dst2;
546
LLVMValueRef zs_dst_ptr;
547
LLVMValueRef depth_offset1, depth_offset2;
548
LLVMTypeRef load_ptr_type;
549
unsigned depth_bytes = format_desc->block.bits / 8;
550
struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
551
struct lp_type zs_load_type = zs_type;
552
553
zs_load_type.length = zs_load_type.length / 2;
554
load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
555
556
if (z_src_type.length == 4) {
557
unsigned i;
558
LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
559
lp_build_const_int32(gallivm, 1), "");
560
LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
561
lp_build_const_int32(gallivm, 2), "");
562
LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
563
depth_stride, "");
564
depth_offset1 = LLVMBuildMul(builder, looplsb,
565
lp_build_const_int32(gallivm, depth_bytes * 2), "");
566
depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
567
568
/* just concatenate the loaded 2x2 values into 4-wide vector */
569
for (i = 0; i < 4; i++) {
570
shuffles[i] = lp_build_const_int32(gallivm, i);
571
}
572
}
573
else {
574
unsigned i;
575
LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
576
lp_build_const_int32(gallivm, 1), "");
577
assert(z_src_type.length == 8);
578
depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
579
/*
580
* We load 2x4 values, and need to swizzle them (order
581
* 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
582
*/
583
for (i = 0; i < 8; i++) {
584
shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
585
}
586
}
587
588
depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
589
590
/* Load current z/stencil values from z/stencil buffer */
591
zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
592
zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
593
zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
594
if (is_1d) {
595
zs_dst2 = lp_build_undef(gallivm, zs_load_type);
596
}
597
else {
598
zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
599
zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
600
zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
601
}
602
603
*z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
604
LLVMConstVector(shuffles, zs_type.length), "");
605
*s_fb = *z_fb;
606
607
if (format_desc->block.bits == 8) {
608
/* Extend stencil-only 8 bit values (S8_UINT) */
609
*s_fb = LLVMBuildZExt(builder, *s_fb,
610
lp_build_int_vec_type(gallivm, z_src_type), "");
611
}
612
613
if (format_desc->block.bits < z_src_type.width) {
614
/* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
615
*z_fb = LLVMBuildZExt(builder, *z_fb,
616
lp_build_int_vec_type(gallivm, z_src_type), "");
617
}
618
619
else if (format_desc->block.bits > 32) {
620
/* rely on llvm to handle too wide vector we have here nicely */
621
unsigned i;
622
struct lp_type typex2 = zs_type;
623
struct lp_type s_type = zs_type;
624
LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
625
LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
626
LLVMValueRef tmp;
627
628
typex2.width = typex2.width / 2;
629
typex2.length = typex2.length * 2;
630
s_type.width = s_type.width / 2;
631
s_type.floating = 0;
632
633
tmp = LLVMBuildBitCast(builder, *z_fb,
634
lp_build_vec_type(gallivm, typex2), "");
635
636
for (i = 0; i < zs_type.length; i++) {
637
shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
638
shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
639
}
640
*z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
641
LLVMConstVector(shuffles1, zs_type.length), "");
642
*s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
643
LLVMConstVector(shuffles2, zs_type.length), "");
644
*s_fb = LLVMBuildBitCast(builder, *s_fb,
645
lp_build_vec_type(gallivm, s_type), "");
646
lp_build_name(*s_fb, "s_dst");
647
}
648
649
lp_build_name(*z_fb, "z_dst");
650
lp_build_name(*s_fb, "s_dst");
651
lp_build_name(*z_fb, "z_dst");
652
}
653
654
/**
655
* Store depth/stencil values.
656
* Incoming values are swizzled (typically n 2x2 quads), stored linear.
657
* If there's a mask it will do select/store otherwise just store.
658
*
659
* \param type the data type of the fragment depth/stencil values
660
* \param format_desc description of the depth/stencil surface
661
* \param is_1d whether this resource has only one dimension
662
* \param mask_value the alive/dead pixel mask for the quad (vector)
663
* \param z_fb z values read from fb (with padding)
664
* \param s_fb s values read from fb (with padding)
665
* \param loop_counter the current loop iteration
666
* \param depth_ptr pointer to the depth/stencil values of this 4x4 block
667
* \param depth_stride stride of the depth/stencil buffer
668
* \param z_value the depth values to store (with padding)
669
* \param s_value the stencil values to store (with padding)
670
*/
671
void
672
lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
673
struct lp_type z_src_type,
674
const struct util_format_description *format_desc,
675
boolean is_1d,
676
LLVMValueRef mask_value,
677
LLVMValueRef z_fb,
678
LLVMValueRef s_fb,
679
LLVMValueRef loop_counter,
680
LLVMValueRef depth_ptr,
681
LLVMValueRef depth_stride,
682
LLVMValueRef z_value,
683
LLVMValueRef s_value)
684
{
685
struct lp_build_context z_bld;
686
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
687
LLVMBuilderRef builder = gallivm->builder;
688
LLVMValueRef zs_dst1, zs_dst2;
689
LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
690
LLVMValueRef depth_offset1, depth_offset2;
691
LLVMTypeRef load_ptr_type;
692
unsigned depth_bytes = format_desc->block.bits / 8;
693
struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
694
struct lp_type z_type = zs_type;
695
struct lp_type zs_load_type = zs_type;
696
697
zs_load_type.length = zs_load_type.length / 2;
698
load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
699
700
z_type.width = z_src_type.width;
701
702
lp_build_context_init(&z_bld, gallivm, z_type);
703
704
/*
705
* This is far from ideal, at least for late depth write we should do this
706
* outside the fs loop to avoid all the swizzle stuff.
707
*/
708
if (z_src_type.length == 4) {
709
LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
710
lp_build_const_int32(gallivm, 1), "");
711
LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
712
lp_build_const_int32(gallivm, 2), "");
713
LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
714
depth_stride, "");
715
depth_offset1 = LLVMBuildMul(builder, looplsb,
716
lp_build_const_int32(gallivm, depth_bytes * 2), "");
717
depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
718
}
719
else {
720
unsigned i;
721
LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
722
lp_build_const_int32(gallivm, 1), "");
723
assert(z_src_type.length == 8);
724
depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
725
/*
726
* We load 2x4 values, and need to swizzle them (order
727
* 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
728
*/
729
for (i = 0; i < 8; i++) {
730
shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
731
}
732
}
733
734
depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
735
736
zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
737
zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
738
zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
739
zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");
740
741
if (format_desc->block.bits > 32) {
742
s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
743
}
744
745
if (mask_value) {
746
z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
747
if (format_desc->block.bits > 32) {
748
s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
749
s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb);
750
}
751
}
752
753
if (zs_type.width < z_src_type.width) {
754
/* Truncate ZS values (e.g., when writing to Z16_UNORM) */
755
z_value = LLVMBuildTrunc(builder, z_value,
756
lp_build_int_vec_type(gallivm, zs_type), "");
757
}
758
759
if (format_desc->block.bits <= 32) {
760
if (z_src_type.length == 4) {
761
zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2);
762
zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2);
763
}
764
else {
765
assert(z_src_type.length == 8);
766
zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value,
767
LLVMConstVector(&shuffles[0],
768
zs_load_type.length), "");
769
zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value,
770
LLVMConstVector(&shuffles[4],
771
zs_load_type.length), "");
772
}
773
}
774
else {
775
if (z_src_type.length == 4) {
776
zs_dst1 = lp_build_interleave2(gallivm, z_type,
777
z_value, s_value, 0);
778
zs_dst2 = lp_build_interleave2(gallivm, z_type,
779
z_value, s_value, 1);
780
}
781
else {
782
unsigned i;
783
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
784
assert(z_src_type.length == 8);
785
for (i = 0; i < 8; i++) {
786
shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
787
shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
788
z_src_type.length);
789
}
790
zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value,
791
LLVMConstVector(&shuffles[0],
792
z_src_type.length), "");
793
zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value,
794
LLVMConstVector(&shuffles[8],
795
z_src_type.length), "");
796
}
797
zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
798
lp_build_vec_type(gallivm, zs_load_type), "");
799
zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
800
lp_build_vec_type(gallivm, zs_load_type), "");
801
}
802
803
LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
804
if (!is_1d) {
805
LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
806
}
807
}
808
809
/**
810
* Generate code for performing depth and/or stencil tests.
811
* We operate on a vector of values (typically n 2x2 quads).
812
*
813
* \param depth the depth test state
814
* \param stencil the front/back stencil state
815
* \param type the data type of the fragment depth/stencil values
816
* \param format_desc description of the depth/stencil surface
817
* \param mask the alive/dead pixel mask for the quad (vector)
818
* \param cov_mask coverage mask
819
* \param stencil_refs the front/back stencil ref values (scalar)
820
* \param z_src the incoming depth/stencil values (n 2x2 quad values, float32)
821
* \param zs_dst the depth/stencil values in framebuffer
822
* \param face contains boolean value indicating front/back facing polygon
823
*/
824
void
825
lp_build_depth_stencil_test(struct gallivm_state *gallivm,
826
const struct lp_depth_state *depth,
827
const struct pipe_stencil_state stencil[2],
828
struct lp_type z_src_type,
829
const struct util_format_description *format_desc,
830
struct lp_build_mask_context *mask,
831
LLVMValueRef *cov_mask,
832
LLVMValueRef stencil_refs[2],
833
LLVMValueRef z_src,
834
LLVMValueRef z_fb,
835
LLVMValueRef s_fb,
836
LLVMValueRef face,
837
LLVMValueRef *z_value,
838
LLVMValueRef *s_value,
839
boolean do_branch)
840
{
841
LLVMBuilderRef builder = gallivm->builder;
842
struct lp_type z_type;
843
struct lp_build_context z_bld;
844
struct lp_build_context s_bld;
845
struct lp_type s_type;
846
unsigned z_shift = 0, z_width = 0, z_mask = 0;
847
LLVMValueRef z_dst = NULL;
848
LLVMValueRef stencil_vals = NULL;
849
LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
850
LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
851
LLVMValueRef current_mask = mask ? lp_build_mask_value(mask) : *cov_mask;
852
LLVMValueRef front_facing = NULL;
853
boolean have_z, have_s;
854
855
/*
856
* Depths are expected to be between 0 and 1, even if they are stored in
857
* floats. Setting these bits here will ensure that the lp_build_conv() call
858
* below won't try to unnecessarily clamp the incoming values.
859
*/
860
if(z_src_type.floating) {
861
z_src_type.sign = FALSE;
862
z_src_type.norm = TRUE;
863
}
864
else {
865
assert(!z_src_type.sign);
866
assert(z_src_type.norm);
867
}
868
869
/* Pick the type matching the depth-stencil format. */
870
z_type = lp_depth_type(format_desc, z_src_type.length);
871
872
/* Pick the intermediate type for depth operations. */
873
z_type.width = z_src_type.width;
874
assert(z_type.length == z_src_type.length);
875
876
/* FIXME: for non-float depth/stencil might generate better code
877
* if we'd always split it up to use 128bit operations.
878
* For stencil we'd almost certainly want to pack to 8xi16 values,
879
* for z just run twice.
880
*/
881
882
/* Sanity checking */
883
{
884
ASSERTED const unsigned z_swizzle = format_desc->swizzle[0];
885
ASSERTED const unsigned s_swizzle = format_desc->swizzle[1];
886
887
assert(z_swizzle != PIPE_SWIZZLE_NONE ||
888
s_swizzle != PIPE_SWIZZLE_NONE);
889
890
assert(depth->enabled || stencil[0].enabled);
891
892
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
893
assert(format_desc->block.width == 1);
894
assert(format_desc->block.height == 1);
895
896
if (stencil[0].enabled) {
897
assert(s_swizzle < 4);
898
assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
899
assert(format_desc->channel[s_swizzle].pure_integer);
900
assert(!format_desc->channel[s_swizzle].normalized);
901
assert(format_desc->channel[s_swizzle].size == 8);
902
}
903
904
if (depth->enabled) {
905
assert(z_swizzle < 4);
906
if (z_type.floating) {
907
assert(z_swizzle == 0);
908
assert(format_desc->channel[z_swizzle].type ==
909
UTIL_FORMAT_TYPE_FLOAT);
910
assert(format_desc->channel[z_swizzle].size == 32);
911
}
912
else {
913
assert(format_desc->channel[z_swizzle].type ==
914
UTIL_FORMAT_TYPE_UNSIGNED);
915
assert(format_desc->channel[z_swizzle].normalized);
916
assert(!z_type.fixed);
917
}
918
}
919
}
920
921
922
/* Setup build context for Z vals */
923
lp_build_context_init(&z_bld, gallivm, z_type);
924
925
/* Setup build context for stencil vals */
926
s_type = lp_int_type(z_type);
927
lp_build_context_init(&s_bld, gallivm, s_type);
928
929
/* Compute and apply the Z/stencil bitmasks and shifts.
930
*/
931
{
932
unsigned s_shift, s_mask;
933
934
z_dst = z_fb;
935
stencil_vals = s_fb;
936
937
have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
938
have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);
939
940
if (have_z) {
941
if (z_mask != 0xffffffff) {
942
z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
943
}
944
945
/*
946
* Align the framebuffer Z 's LSB to the right.
947
*/
948
if (z_shift) {
949
LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
950
z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
951
} else if (z_bitmask) {
952
z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
953
} else {
954
lp_build_name(z_dst, "z_dst");
955
}
956
}
957
958
if (have_s) {
959
if (s_shift) {
960
LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
961
stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
962
stencil_shift = shift; /* used below */
963
}
964
965
if (s_mask != 0xffffffff) {
966
LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
967
stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
968
}
969
970
lp_build_name(stencil_vals, "s_dst");
971
}
972
}
973
974
if (stencil[0].enabled) {
975
976
if (face) {
977
if (0) {
978
/*
979
* XXX: the scalar expansion below produces atrocious code
980
* (basically producing a 64bit scalar value, then moving the 2
981
* 32bit pieces separately to simd, plus 4 shuffles, which is
982
* seriously lame). But the scalar-simd transitions are always
983
* tricky, so no big surprise there.
984
* This here would be way better, however llvm has some serious
985
* trouble later using it in the select, probably because it will
986
* recognize the expression as constant and move the simd value
987
* away (out of the loop) - and then it will suddenly try
988
* constructing i1 high-bit masks out of it later...
989
* (Try piglit stencil-twoside.)
990
* Note this is NOT due to using SExt/Trunc, it fails exactly the
991
* same even when using native compare/select.
992
* I cannot reproduce this problem when using stand-alone compiler
993
* though, suggesting some problem with optimization passes...
994
* (With stand-alone compilation, the construction of this mask
995
* value, no matter if the easy 3 instruction here or the complex
996
* 16+ one below, never gets separated from where it's used.)
997
* The scalar code still has the same problem, but the generated
998
* code looks a bit better at least for some reason, even if
999
* mostly by luck (the fundamental issue clearly is the same).
1000
*/
1001
front_facing = lp_build_broadcast(gallivm, s_bld.vec_type, face);
1002
/* front_facing = face != 0 ? ~0 : 0 */
1003
front_facing = lp_build_compare(gallivm, s_bld.type,
1004
PIPE_FUNC_NOTEQUAL,
1005
front_facing, s_bld.zero);
1006
} else {
1007
LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1008
1009
/* front_facing = face != 0 ? ~0 : 0 */
1010
front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
1011
front_facing = LLVMBuildSExt(builder, front_facing,
1012
LLVMIntTypeInContext(gallivm->context,
1013
s_bld.type.length*s_bld.type.width),
1014
"");
1015
front_facing = LLVMBuildBitCast(builder, front_facing,
1016
s_bld.int_vec_type, "");
1017
1018
}
1019
}
1020
1021
s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
1022
stencil_refs, stencil_vals,
1023
front_facing);
1024
1025
/* apply stencil-fail operator */
1026
{
1027
LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask);
1028
stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
1029
stencil_refs, stencil_vals,
1030
s_fail_mask, front_facing);
1031
}
1032
}
1033
1034
if (depth->enabled) {
1035
/*
1036
* Convert fragment Z to the desired type, aligning the LSB to the right.
1037
*/
1038
1039
assert(z_type.width == z_src_type.width);
1040
assert(z_type.length == z_src_type.length);
1041
assert(lp_check_value(z_src_type, z_src));
1042
if (z_src_type.floating) {
1043
/*
1044
* Convert from floating point values
1045
*/
1046
1047
if (!z_type.floating) {
1048
z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
1049
z_src_type,
1050
z_width,
1051
z_src);
1052
}
1053
} else {
1054
/*
1055
* Convert from unsigned normalized values.
1056
*/
1057
1058
assert(!z_src_type.sign);
1059
assert(!z_src_type.fixed);
1060
assert(z_src_type.norm);
1061
assert(!z_type.floating);
1062
if (z_src_type.width > z_width) {
1063
LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
1064
z_src_type.width - z_width);
1065
z_src = LLVMBuildLShr(builder, z_src, shift, "");
1066
}
1067
}
1068
assert(lp_check_value(z_type, z_src));
1069
1070
lp_build_name(z_src, "z_src");
1071
1072
/* compare src Z to dst Z, returning 'pass' mask */
1073
z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
1074
1075
/* mask off bits that failed stencil test */
1076
if (s_pass_mask) {
1077
current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
1078
}
1079
1080
if (!stencil[0].enabled && mask) {
1081
/* We can potentially skip all remaining operations here, but only
1082
* if stencil is disabled because we still need to update the stencil
1083
* buffer values. Don't need to update Z buffer values.
1084
*/
1085
lp_build_mask_update(mask, z_pass);
1086
1087
if (do_branch) {
1088
lp_build_mask_check(mask);
1089
}
1090
}
1091
1092
if (depth->writemask) {
1093
LLVMValueRef z_pass_mask;
1094
1095
/* mask off bits that failed Z test */
1096
z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
1097
1098
/* Mix the old and new Z buffer values.
1099
* z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
1100
*/
1101
z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst);
1102
}
1103
1104
if (stencil[0].enabled) {
1105
/* update stencil buffer values according to z pass/fail result */
1106
LLVMValueRef z_fail_mask, z_pass_mask;
1107
1108
/* apply Z-fail operator */
1109
z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass);
1110
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
1111
stencil_refs, stencil_vals,
1112
z_fail_mask, front_facing);
1113
1114
/* apply Z-pass operator */
1115
z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
1116
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
1117
stencil_refs, stencil_vals,
1118
z_pass_mask, front_facing);
1119
}
1120
}
1121
else {
1122
/* No depth test: apply Z-pass operator to stencil buffer values which
1123
* passed the stencil test.
1124
*/
1125
s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
1126
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
1127
stencil_refs, stencil_vals,
1128
s_pass_mask, front_facing);
1129
}
1130
1131
/* Put Z and stencil bits in the right place */
1132
if (have_z && z_shift) {
1133
LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
1134
z_dst = LLVMBuildShl(builder, z_dst, shift, "");
1135
}
1136
if (stencil_vals && stencil_shift)
1137
stencil_vals = LLVMBuildShl(builder, stencil_vals,
1138
stencil_shift, "");
1139
1140
/* Finally, merge the z/stencil values */
1141
if (format_desc->block.bits <= 32) {
1142
if (have_z && have_s)
1143
*z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
1144
else if (have_z)
1145
*z_value = z_dst;
1146
else
1147
*z_value = stencil_vals;
1148
*s_value = *z_value;
1149
}
1150
else {
1151
*z_value = z_dst;
1152
*s_value = stencil_vals;
1153
}
1154
1155
if (mask) {
1156
if (s_pass_mask)
1157
lp_build_mask_update(mask, s_pass_mask);
1158
1159
if (depth->enabled && stencil[0].enabled)
1160
lp_build_mask_update(mask, z_pass);
1161
} else {
1162
LLVMValueRef tmp_mask = *cov_mask;
1163
if (s_pass_mask)
1164
tmp_mask = LLVMBuildAnd(builder, tmp_mask, s_pass_mask, "");
1165
1166
/* for multisample we don't do the stencil optimisation so update always */
1167
if (depth->enabled)
1168
tmp_mask = LLVMBuildAnd(builder, tmp_mask, z_pass, "");
1169
*cov_mask = tmp_mask;
1170
}
1171
}
1172
1173
1174