Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/llvmpipe/lp_bld_interp.c
4570 views
1
/**************************************************************************
2
*
3
* Copyright 2009 VMware, Inc.
4
* Copyright 2007-2008 VMware, Inc.
5
* All Rights Reserved.
6
*
7
* Permission is hereby granted, free of charge, to any person obtaining a
8
* copy of this software and associated documentation files (the
9
* "Software"), to deal in the Software without restriction, including
10
* without limitation the rights to use, copy, modify, merge, publish,
11
* distribute, sub license, and/or sell copies of the Software, and to
12
* permit persons to whom the Software is furnished to do so, subject to
13
* the following conditions:
14
*
15
* The above copyright notice and this permission notice (including the
16
* next paragraph) shall be included in all copies or substantial portions
17
* of the Software.
18
*
19
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
*
27
**************************************************************************/
28
29
/**
30
* @file
31
* Position and shader input interpolation.
32
*
33
* @author Jose Fonseca <[email protected]>
34
*/
35
36
#include "pipe/p_shader_tokens.h"
37
#include "util/compiler.h"
38
#include "util/u_debug.h"
39
#include "util/u_memory.h"
40
#include "util/u_math.h"
41
#include "tgsi/tgsi_scan.h"
42
#include "gallivm/lp_bld_debug.h"
43
#include "gallivm/lp_bld_const.h"
44
#include "gallivm/lp_bld_arit.h"
45
#include "gallivm/lp_bld_swizzle.h"
46
#include "gallivm/lp_bld_flow.h"
47
#include "gallivm/lp_bld_logic.h"
48
#include "gallivm/lp_bld_struct.h"
49
#include "gallivm/lp_bld_gather.h"
50
#include "lp_bld_interp.h"
51
52
53
/*
54
* The shader JIT function operates on blocks of quads.
55
* Each block has 2x2 quads and each quad has 2x2 pixels.
56
*
57
* We iterate over the quads in order 0, 1, 2, 3:
58
*
59
* #################
60
* # | # | #
61
* #---0---#---1---#
62
* # | # | #
63
* #################
64
* # | # | #
65
* #---2---#---3---#
66
* # | # | #
67
* #################
68
*
69
* If we iterate over multiple quads at once, quads 01 and 23 are processed
70
* together.
71
*
72
* Within each quad, we have four pixels which are represented in SOA
73
* order:
74
*
75
* #########
76
* # 0 | 1 #
77
* #---+---#
78
* # 2 | 3 #
79
* #########
80
*
81
* So the green channel (for example) of the four pixels is stored in
82
* a single vector register: {g0, g1, g2, g3}.
83
* The order stays the same even with multiple quads:
84
* 0 1 4 5
85
* 2 3 6 7
86
* is stored as g0..g7
87
*/
88
89
90
/**
91
* Do one perspective divide per quad.
92
*
93
* For perspective interpolation, the final attribute value is given
94
*
95
* a' = a/w = a * oow
96
*
97
* where
98
*
99
* a = a0 + dadx*x + dady*y
100
* w = w0 + dwdx*x + dwdy*y
101
* oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
102
*
103
* Instead of computing the division per pixel, with this macro we compute the
104
* division on the upper left pixel of each quad, and use a linear
105
* approximation in the remaining pixels, given by:
106
*
107
* da'dx = (dadx - dwdx*a)*oow
108
* da'dy = (dady - dwdy*a)*oow
109
*
110
* Ironically, this actually makes things slower -- probably because the
111
* divide hardware unit is rarely used, whereas the multiply unit is typically
112
* already saturated.
113
*/
114
#define PERSPECTIVE_DIVIDE_PER_QUAD 0
115
116
117
static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
118
static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
119
120
121
static void
122
attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
123
{
124
if(attrib == 0)
125
lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
126
else
127
lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
128
}
129
130
static void
131
calc_offsets(struct lp_build_context *coeff_bld,
132
unsigned quad_start_index,
133
LLVMValueRef *pixoffx,
134
LLVMValueRef *pixoffy)
135
{
136
unsigned i;
137
unsigned num_pix = coeff_bld->type.length;
138
struct gallivm_state *gallivm = coeff_bld->gallivm;
139
LLVMBuilderRef builder = coeff_bld->gallivm->builder;
140
LLVMValueRef nr, pixxf, pixyf;
141
142
*pixoffx = coeff_bld->undef;
143
*pixoffy = coeff_bld->undef;
144
145
for (i = 0; i < num_pix; i++) {
146
nr = lp_build_const_int32(gallivm, i);
147
pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
148
(quad_start_index & 1) * 2);
149
pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
150
(quad_start_index & 2));
151
*pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
152
*pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
153
}
154
}
155
156
static void
157
calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
158
struct gallivm_state *gallivm,
159
LLVMValueRef loop_iter,
160
LLVMValueRef mask_store,
161
LLVMValueRef pix_center_offset,
162
LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
163
{
164
struct lp_build_context *coeff_bld = &bld->coeff_bld;
165
LLVMBuilderRef builder = gallivm->builder;
166
LLVMValueRef s_mask_and = NULL;
167
LLVMValueRef centroid_x_offset = pix_center_offset;
168
LLVMValueRef centroid_y_offset = pix_center_offset;
169
for (int s = bld->coverage_samples - 1; s >= 0; s--) {
170
LLVMValueRef sample_cov;
171
LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
172
173
s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
174
sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);
175
if (s == bld->coverage_samples - 1)
176
s_mask_and = sample_cov;
177
else
178
s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
179
180
LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
181
LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
182
183
x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
184
y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
185
x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
186
y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
187
centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
188
centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
189
}
190
*centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
191
*centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
192
}
193
194
/* Much easier, and significantly less instructions in the per-stamp
195
* part (less than half) but overall more instructions so a loss if
196
* most quads are active. Might be a win though with larger vectors.
197
* No ability to do per-quad divide (doable but not implemented)
198
* Could be made to work with passed in pixel offsets (i.e. active quad merging).
199
*/
200
static void
201
coeffs_init_simple(struct lp_build_interp_soa_context *bld,
202
LLVMValueRef a0_ptr,
203
LLVMValueRef dadx_ptr,
204
LLVMValueRef dady_ptr)
205
{
206
struct lp_build_context *coeff_bld = &bld->coeff_bld;
207
struct lp_build_context *setup_bld = &bld->setup_bld;
208
struct gallivm_state *gallivm = coeff_bld->gallivm;
209
LLVMBuilderRef builder = gallivm->builder;
210
unsigned attrib;
211
212
for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
213
/*
214
* always fetch all 4 values for performance/simplicity
215
* Note: we do that here because it seems to generate better
216
* code. It generates a lot of moves initially but less
217
* moves later. As far as I can tell this looks like a
218
* llvm issue, instead of simply reloading the values from
219
* the passed in pointers it if it runs out of registers
220
* it spills/reloads them. Maybe some optimization passes
221
* would help.
222
* Might want to investigate this again later.
223
*/
224
const unsigned interp = bld->interp[attrib];
225
LLVMValueRef index = lp_build_const_int32(gallivm,
226
attrib * TGSI_NUM_CHANNELS);
227
LLVMValueRef ptr;
228
LLVMValueRef dadxaos = setup_bld->zero;
229
LLVMValueRef dadyaos = setup_bld->zero;
230
LLVMValueRef a0aos = setup_bld->zero;
231
232
switch (interp) {
233
case LP_INTERP_PERSPECTIVE:
234
FALLTHROUGH;
235
236
case LP_INTERP_LINEAR:
237
ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
238
ptr = LLVMBuildBitCast(builder, ptr,
239
LLVMPointerType(setup_bld->vec_type, 0), "");
240
dadxaos = LLVMBuildLoad(builder, ptr, "");
241
242
ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
243
ptr = LLVMBuildBitCast(builder, ptr,
244
LLVMPointerType(setup_bld->vec_type, 0), "");
245
dadyaos = LLVMBuildLoad(builder, ptr, "");
246
247
attrib_name(dadxaos, attrib, 0, ".dadxaos");
248
attrib_name(dadyaos, attrib, 0, ".dadyaos");
249
FALLTHROUGH;
250
251
case LP_INTERP_CONSTANT:
252
case LP_INTERP_FACING:
253
ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
254
ptr = LLVMBuildBitCast(builder, ptr,
255
LLVMPointerType(setup_bld->vec_type, 0), "");
256
a0aos = LLVMBuildLoad(builder, ptr, "");
257
attrib_name(a0aos, attrib, 0, ".a0aos");
258
break;
259
260
case LP_INTERP_POSITION:
261
/* Nothing to do as the position coeffs are already setup in slot 0 */
262
continue;
263
264
default:
265
assert(0);
266
break;
267
}
268
bld->a0aos[attrib] = a0aos;
269
bld->dadxaos[attrib] = dadxaos;
270
bld->dadyaos[attrib] = dadyaos;
271
}
272
}
273
274
/**
275
* Interpolate the shader input attribute values.
276
* This is called for each (group of) quad(s).
277
*/
278
static void
279
attribs_update_simple(struct lp_build_interp_soa_context *bld,
280
struct gallivm_state *gallivm,
281
LLVMValueRef loop_iter,
282
LLVMValueRef mask_store,
283
LLVMValueRef sample_id,
284
int start,
285
int end)
286
{
287
LLVMBuilderRef builder = gallivm->builder;
288
struct lp_build_context *coeff_bld = &bld->coeff_bld;
289
struct lp_build_context *setup_bld = &bld->setup_bld;
290
LLVMValueRef oow = NULL;
291
unsigned attrib;
292
LLVMValueRef pixoffx;
293
LLVMValueRef pixoffy;
294
LLVMValueRef ptr;
295
LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
296
297
/* could do this with code-generated passed in pixel offsets too */
298
299
assert(loop_iter);
300
ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
301
pixoffx = LLVMBuildLoad(builder, ptr, "");
302
ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
303
pixoffy = LLVMBuildLoad(builder, ptr, "");
304
305
pixoffx = LLVMBuildFAdd(builder, pixoffx,
306
lp_build_broadcast_scalar(coeff_bld, bld->x), "");
307
pixoffy = LLVMBuildFAdd(builder, pixoffy,
308
lp_build_broadcast_scalar(coeff_bld, bld->y), "");
309
310
for (attrib = start; attrib < end; attrib++) {
311
const unsigned mask = bld->mask[attrib];
312
const unsigned interp = bld->interp[attrib];
313
const unsigned loc = bld->interp_loc[attrib];
314
unsigned chan;
315
316
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
317
if (mask & (1 << chan)) {
318
LLVMValueRef index;
319
LLVMValueRef dadx = coeff_bld->zero;
320
LLVMValueRef dady = coeff_bld->zero;
321
LLVMValueRef a = coeff_bld->zero;
322
LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
323
324
index = lp_build_const_int32(gallivm, chan);
325
switch (interp) {
326
case LP_INTERP_PERSPECTIVE:
327
FALLTHROUGH;
328
329
case LP_INTERP_LINEAR:
330
if (attrib == 0 && chan == 0) {
331
dadx = coeff_bld->one;
332
if (sample_id) {
333
LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
334
x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
335
a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
336
} else {
337
a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
338
}
339
}
340
else if (attrib == 0 && chan == 1) {
341
dady = coeff_bld->one;
342
if (sample_id) {
343
LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
344
y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
345
y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
346
a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
347
} else {
348
a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
349
}
350
}
351
else {
352
dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
353
coeff_bld->type, bld->dadxaos[attrib],
354
index);
355
dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
356
coeff_bld->type, bld->dadyaos[attrib],
357
index);
358
a = lp_build_extract_broadcast(gallivm, setup_bld->type,
359
coeff_bld->type, bld->a0aos[attrib],
360
index);
361
362
if (bld->coverage_samples > 1) {
363
LLVMValueRef xoffset = pix_center_offset;
364
LLVMValueRef yoffset = pix_center_offset;
365
if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {
366
LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
367
LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
368
369
x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
370
y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
371
xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
372
yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
373
} else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
374
calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
375
pix_center_offset, &xoffset, &yoffset);
376
}
377
chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
378
chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
379
}
380
}
381
382
/*
383
* a = a0 + (x * dadx + y * dady)
384
*/
385
a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
386
a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
387
388
if (interp == LP_INTERP_PERSPECTIVE) {
389
if (oow == NULL) {
390
LLVMValueRef w = bld->attribs[0][3];
391
assert(attrib != 0);
392
assert(bld->mask[0] & TGSI_WRITEMASK_W);
393
oow = lp_build_rcp(coeff_bld, w);
394
}
395
a = lp_build_mul(coeff_bld, a, oow);
396
}
397
break;
398
399
case LP_INTERP_CONSTANT:
400
case LP_INTERP_FACING:
401
a = lp_build_extract_broadcast(gallivm, setup_bld->type,
402
coeff_bld->type, bld->a0aos[attrib],
403
index);
404
break;
405
406
case LP_INTERP_POSITION:
407
assert(attrib > 0);
408
a = bld->attribs[0][chan];
409
break;
410
411
default:
412
assert(0);
413
break;
414
}
415
416
if ((attrib == 0) && (chan == 2) && !bld->depth_clamp){
417
/* FIXME: Depth values can exceed 1.0, due to the fact that
418
* setup interpolation coefficients refer to (0,0) which causes
419
* precision loss. So we must clamp to 1.0 here to avoid artifacts.
420
* Note though values outside [0,1] are perfectly valid with
421
* depth clip disabled.
422
* XXX: If depth clip is disabled but we force depth clamp
423
* we may get values larger than 1.0 in the fs (but not in
424
* depth test). Not sure if that's an issue...
425
* Also, on a similar note, it is not obvious if the depth values
426
* appearing in fs (with depth clip disabled) should be clamped
427
* to [0,1], clamped to near/far or not be clamped at all...
428
*/
429
a = lp_build_min(coeff_bld, a, coeff_bld->one);
430
}
431
bld->attribs[attrib][chan] = a;
432
}
433
}
434
}
435
}
436
437
static LLVMValueRef
438
lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
439
struct gallivm_state *gallivm,
440
unsigned attrib, unsigned chan,
441
LLVMValueRef indir_index,
442
LLVMValueRef pixoffx,
443
LLVMValueRef pixoffy)
444
{
445
LLVMBuilderRef builder = gallivm->builder;
446
struct lp_build_context *coeff_bld = &bld->coeff_bld;
447
const unsigned interp = bld->interp[attrib];
448
LLVMValueRef dadx = coeff_bld->zero;
449
LLVMValueRef dady = coeff_bld->zero;
450
LLVMValueRef a = coeff_bld->zero;
451
452
LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
453
454
indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");
455
LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
456
index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");
457
458
/* size up to byte indices */
459
index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
460
461
struct lp_type dst_type = coeff_bld->type;
462
dst_type.length = 1;
463
switch (interp) {
464
case LP_INTERP_PERSPECTIVE:
465
FALLTHROUGH;
466
case LP_INTERP_LINEAR:
467
468
dadx = lp_build_gather(gallivm, coeff_bld->type.length,
469
coeff_bld->type.width, dst_type,
470
true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);
471
472
dady = lp_build_gather(gallivm, coeff_bld->type.length,
473
coeff_bld->type.width, dst_type,
474
true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);
475
476
a = lp_build_gather(gallivm, coeff_bld->type.length,
477
coeff_bld->type.width, dst_type,
478
true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
479
480
/*
481
* a = a0 + (x * dadx + y * dady)
482
*/
483
a = lp_build_fmuladd(builder, dadx, pixoffx, a);
484
a = lp_build_fmuladd(builder, dady, pixoffy, a);
485
486
if (interp == LP_INTERP_PERSPECTIVE) {
487
LLVMValueRef w = bld->attribs[0][3];
488
assert(attrib != 0);
489
assert(bld->mask[0] & TGSI_WRITEMASK_W);
490
LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
491
a = lp_build_mul(coeff_bld, a, oow);
492
}
493
494
break;
495
case LP_INTERP_CONSTANT:
496
case LP_INTERP_FACING:
497
a = lp_build_gather(gallivm, coeff_bld->type.length,
498
coeff_bld->type.width, dst_type,
499
true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
500
break;
501
default:
502
assert(0);
503
break;
504
}
505
return a;
506
}
507
508
LLVMValueRef
509
lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
510
struct gallivm_state *gallivm,
511
LLVMValueRef loop_iter,
512
LLVMValueRef mask_store,
513
unsigned attrib, unsigned chan,
514
unsigned loc,
515
LLVMValueRef indir_index,
516
LLVMValueRef offsets[2])
517
{
518
LLVMBuilderRef builder = gallivm->builder;
519
struct lp_build_context *coeff_bld = &bld->coeff_bld;
520
struct lp_build_context *setup_bld = &bld->setup_bld;
521
LLVMValueRef pixoffx;
522
LLVMValueRef pixoffy;
523
LLVMValueRef ptr;
524
525
/* could do this with code-generated passed in pixel offsets too */
526
527
assert(loop_iter);
528
ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
529
pixoffx = LLVMBuildLoad(builder, ptr, "");
530
ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
531
pixoffy = LLVMBuildLoad(builder, ptr, "");
532
533
pixoffx = LLVMBuildFAdd(builder, pixoffx,
534
lp_build_broadcast_scalar(coeff_bld, bld->x), "");
535
pixoffy = LLVMBuildFAdd(builder, pixoffy,
536
lp_build_broadcast_scalar(coeff_bld, bld->y), "");
537
538
LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
539
540
if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
541
if (bld->coverage_samples > 1) {
542
pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
543
pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
544
}
545
546
if (offsets[0])
547
pixoffx = LLVMBuildFAdd(builder, pixoffx,
548
offsets[0], "");
549
if (offsets[1])
550
pixoffy = LLVMBuildFAdd(builder, pixoffy,
551
offsets[1], "");
552
} else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
553
LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
554
LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
555
556
LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,
557
LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
558
LLVMValueRef xoffset = lp_build_gather(gallivm,
559
bld->coeff_bld.type.length,
560
bld->coeff_bld.type.width,
561
lp_elem_type(bld->coeff_bld.type),
562
false,
563
base_ptr,
564
x_val_idx, true);
565
LLVMValueRef yoffset = lp_build_gather(gallivm,
566
bld->coeff_bld.type.length,
567
bld->coeff_bld.type.width,
568
lp_elem_type(bld->coeff_bld.type),
569
false,
570
base_ptr,
571
y_val_idx, true);
572
573
if (bld->coverage_samples > 1) {
574
pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
575
pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
576
}
577
} else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
578
LLVMValueRef centroid_x_offset, centroid_y_offset;
579
580
/* for centroid find covered samples for this quad. */
581
/* if all samples are covered use pixel centers */
582
if (bld->coverage_samples > 1) {
583
calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
584
pix_center_offset, &centroid_x_offset, &centroid_y_offset);
585
586
pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
587
pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
588
}
589
}
590
591
// remap attrib properly.
592
attrib++;
593
594
if (indir_index)
595
return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
596
indir_index, pixoffx, pixoffy);
597
598
599
const unsigned interp = bld->interp[attrib];
600
LLVMValueRef dadx = coeff_bld->zero;
601
LLVMValueRef dady = coeff_bld->zero;
602
LLVMValueRef a = coeff_bld->zero;
603
604
LLVMValueRef index = lp_build_const_int32(gallivm, chan);
605
606
switch (interp) {
607
case LP_INTERP_PERSPECTIVE:
608
FALLTHROUGH;
609
case LP_INTERP_LINEAR:
610
dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
611
coeff_bld->type, bld->dadxaos[attrib],
612
index);
613
614
dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
615
coeff_bld->type, bld->dadyaos[attrib],
616
index);
617
618
a = lp_build_extract_broadcast(gallivm, setup_bld->type,
619
coeff_bld->type, bld->a0aos[attrib],
620
index);
621
622
/*
623
* a = a0 + (x * dadx + y * dady)
624
*/
625
a = lp_build_fmuladd(builder, dadx, pixoffx, a);
626
a = lp_build_fmuladd(builder, dady, pixoffy, a);
627
628
if (interp == LP_INTERP_PERSPECTIVE) {
629
LLVMValueRef w = bld->attribs[0][3];
630
assert(attrib != 0);
631
assert(bld->mask[0] & TGSI_WRITEMASK_W);
632
LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
633
a = lp_build_mul(coeff_bld, a, oow);
634
}
635
636
break;
637
case LP_INTERP_CONSTANT:
638
case LP_INTERP_FACING:
639
a = lp_build_extract_broadcast(gallivm, setup_bld->type,
640
coeff_bld->type, bld->a0aos[attrib],
641
index);
642
break;
643
default:
644
assert(0);
645
break;
646
}
647
return a;
648
}
649
650
/**
651
* Generate the position vectors.
652
*
653
* Parameter x0, y0 are the integer values with upper left coordinates.
654
*/
655
static void
656
pos_init(struct lp_build_interp_soa_context *bld,
657
LLVMValueRef x0,
658
LLVMValueRef y0)
659
{
660
LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
661
struct lp_build_context *coeff_bld = &bld->coeff_bld;
662
663
bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
664
bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
665
}
666
667
668
/**
669
* Initialize fragment shader input attribute info.
670
*/
671
void
672
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
673
struct gallivm_state *gallivm,
674
unsigned num_inputs,
675
const struct lp_shader_input *inputs,
676
boolean pixel_center_integer,
677
unsigned coverage_samples,
678
LLVMValueRef sample_pos_array,
679
LLVMValueRef num_loop,
680
boolean depth_clamp,
681
LLVMBuilderRef builder,
682
struct lp_type type,
683
LLVMValueRef a0_ptr,
684
LLVMValueRef dadx_ptr,
685
LLVMValueRef dady_ptr,
686
LLVMValueRef x0,
687
LLVMValueRef y0)
688
{
689
struct lp_type coeff_type;
690
struct lp_type setup_type;
691
unsigned attrib;
692
unsigned chan;
693
694
memset(bld, 0, sizeof *bld);
695
696
memset(&coeff_type, 0, sizeof coeff_type);
697
coeff_type.floating = TRUE;
698
coeff_type.sign = TRUE;
699
coeff_type.width = 32;
700
coeff_type.length = type.length;
701
702
memset(&setup_type, 0, sizeof setup_type);
703
setup_type.floating = TRUE;
704
setup_type.sign = TRUE;
705
setup_type.width = 32;
706
setup_type.length = TGSI_NUM_CHANNELS;
707
708
709
/* XXX: we don't support interpolating into any other types */
710
assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
711
712
lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
713
lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
714
715
/* For convenience */
716
bld->pos = bld->attribs[0];
717
bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
718
719
/* Position */
720
bld->mask[0] = TGSI_WRITEMASK_XYZW;
721
bld->interp[0] = LP_INTERP_LINEAR;
722
bld->interp_loc[0] = 0;
723
724
/* Inputs */
725
for (attrib = 0; attrib < num_inputs; ++attrib) {
726
bld->mask[1 + attrib] = inputs[attrib].usage_mask;
727
bld->interp[1 + attrib] = inputs[attrib].interp;
728
bld->interp_loc[1 + attrib] = inputs[attrib].location;
729
}
730
bld->num_attribs = 1 + num_inputs;
731
732
/* needed for indirect */
733
bld->a0_ptr = a0_ptr;
734
bld->dadx_ptr = dadx_ptr;
735
bld->dady_ptr = dady_ptr;
736
737
/* Ensure all masked out input channels have a valid value */
738
for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
739
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
740
bld->attribs[attrib][chan] = bld->coeff_bld.undef;
741
}
742
}
743
744
if (pixel_center_integer) {
745
bld->pos_offset = 0.0;
746
} else {
747
bld->pos_offset = 0.5;
748
}
749
bld->depth_clamp = depth_clamp;
750
bld->coverage_samples = coverage_samples;
751
bld->num_loop = num_loop;
752
bld->sample_pos_array = sample_pos_array;
753
754
pos_init(bld, x0, y0);
755
756
/*
757
* Simple method (single step interpolation) may be slower if vector length
758
* is just 4, but the results are different (generally less accurate) with
759
* the other method, so always use more accurate version.
760
*/
761
{
762
/* XXX this should use a global static table */
763
unsigned i;
764
unsigned num_loops = 16 / type.length;
765
LLVMValueRef pixoffx, pixoffy, index;
766
LLVMValueRef ptr;
767
768
bld->xoffset_store = lp_build_array_alloca(gallivm,
769
lp_build_vec_type(gallivm, type),
770
lp_build_const_int32(gallivm, num_loops),
771
"");
772
bld->yoffset_store = lp_build_array_alloca(gallivm,
773
lp_build_vec_type(gallivm, type),
774
lp_build_const_int32(gallivm, num_loops),
775
"");
776
for (i = 0; i < num_loops; i++) {
777
index = lp_build_const_int32(gallivm, i);
778
calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
779
ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
780
LLVMBuildStore(builder, pixoffx, ptr);
781
ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
782
LLVMBuildStore(builder, pixoffy, ptr);
783
}
784
}
785
coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
786
}
787
788
789
/*
790
* Advance the position and inputs to the given quad within the block.
791
*/
792
793
void
794
lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
795
struct gallivm_state *gallivm,
796
LLVMValueRef quad_start_index,
797
LLVMValueRef mask_store,
798
LLVMValueRef sample_id)
799
{
800
attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);
801
}
802
803
void
804
lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
805
struct gallivm_state *gallivm,
806
LLVMValueRef quad_start_index,
807
LLVMValueRef sample_id)
808
{
809
attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);
810
}
811
812
813