CoCalc -- lp_bld

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/llvmpipe/lp_bld_interp.c
⁷⁴⁵³ views
1
/**************************************************************************
2
 * 
3
 * Copyright 2009 VMware, Inc.
4
 * Copyright 2007-2008 VMware, Inc.
5
 * All Rights Reserved.
6
 * 
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the
9
 * "Software"), to deal in the Software without restriction, including
10
 * without limitation the rights to use, copy, modify, merge, publish,
11
 * distribute, sub license, and/or sell copies of the Software, and to
12
 * permit persons to whom the Software is furnished to do so, subject to
13
 * the following conditions:
14
 * 
15
 * The above copyright notice and this permission notice (including the
16
 * next paragraph) shall be included in all copies or substantial portions
17
 * of the Software.
18
 * 
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 * 
27
 **************************************************************************/
28

29
/**
30
 * @file
31
 * Position and shader input interpolation.
32
 *
33
 * @author Jose Fonseca <[email protected]>
34
 */
35

36
#include "pipe/p_shader_tokens.h"
37
#include "util/compiler.h"
38
#include "util/u_debug.h"
39
#include "util/u_memory.h"
40
#include "util/u_math.h"
41
#include "tgsi/tgsi_scan.h"
42
#include "gallivm/lp_bld_debug.h"
43
#include "gallivm/lp_bld_const.h"
44
#include "gallivm/lp_bld_arit.h"
45
#include "gallivm/lp_bld_swizzle.h"
46
#include "gallivm/lp_bld_flow.h"
47
#include "gallivm/lp_bld_logic.h"
48
#include "gallivm/lp_bld_struct.h"
49
#include "gallivm/lp_bld_gather.h"
50
#include "lp_bld_interp.h"
51

52

53
/*
54
 * The shader JIT function operates on blocks of quads.
55
 * Each block has 2x2 quads and each quad has 2x2 pixels.
56
 *
57
 * We iterate over the quads in order 0, 1, 2, 3:
58
 *
59
 * #################
60
 * #   |   #   |   #
61
 * #---0---#---1---#
62
 * #   |   #   |   #
63
 * #################
64
 * #   |   #   |   #
65
 * #---2---#---3---#
66
 * #   |   #   |   #
67
 * #################
68
 *
69
 * If we iterate over multiple quads at once, quads 01 and 23 are processed
70
 * together.
71
 *
72
 * Within each quad, we have four pixels which are represented in SOA
73
 * order:
74
 *
75
 * #########
76
 * # 0 | 1 #
77
 * #---+---#
78
 * # 2 | 3 #
79
 * #########
80
 *
81
 * So the green channel (for example) of the four pixels is stored in
82
 * a single vector register: {g0, g1, g2, g3}.
83
 * The order stays the same even with multiple quads:
84
 * 0 1 4 5
85
 * 2 3 6 7
86
 * is stored as g0..g7
87
 */
88

89

90
/**
91
 * Do one perspective divide per quad.
92
 *
93
 * For perspective interpolation, the final attribute value is given
94
 *
95
 *  a' = a/w = a * oow
96
 *
97
 * where
98
 *
99
 *  a = a0 + dadx*x + dady*y
100
 *  w = w0 + dwdx*x + dwdy*y
101
 *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
102
 *
103
 * Instead of computing the division per pixel, with this macro we compute the
104
 * division on the upper left pixel of each quad, and use a linear
105
 * approximation in the remaining pixels, given by:
106
 *
107
 *  da'dx = (dadx - dwdx*a)*oow
108
 *  da'dy = (dady - dwdy*a)*oow
109
 *
110
 * Ironically, this actually makes things slower -- probably because the
111
 * divide hardware unit is rarely used, whereas the multiply unit is typically
112
 * already saturated.
113
 */
114
#define PERSPECTIVE_DIVIDE_PER_QUAD 0
115

116

117
static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
118
static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
119

120

121
static void
122
attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
123
{
124
   if(attrib == 0)
125
      lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
126
   else
127
      lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
128
}
129

130
static void
131
calc_offsets(struct lp_build_context *coeff_bld,
132
             unsigned quad_start_index,
133
             LLVMValueRef *pixoffx,
134
             LLVMValueRef *pixoffy)
135
{
136
   unsigned i;
137
   unsigned num_pix = coeff_bld->type.length;
138
   struct gallivm_state *gallivm = coeff_bld->gallivm;
139
   LLVMBuilderRef builder = coeff_bld->gallivm->builder;
140
   LLVMValueRef nr, pixxf, pixyf;
141

142
   *pixoffx = coeff_bld->undef;
143
   *pixoffy = coeff_bld->undef;
144

145
   for (i = 0; i < num_pix; i++) {
146
      nr = lp_build_const_int32(gallivm, i);
147
      pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
148
                                   (quad_start_index & 1) * 2);
149
      pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
150
                                   (quad_start_index & 2));
151
      *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
152
      *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
153
   }
154
}
155

156
static void
157
calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
158
                      struct gallivm_state *gallivm,
159
                      LLVMValueRef loop_iter,
160
                      LLVMValueRef mask_store,
161
                      LLVMValueRef pix_center_offset,
162
                      LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
163
{
164
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
165
   LLVMBuilderRef builder = gallivm->builder;
166
   LLVMValueRef s_mask_and = NULL;
167
   LLVMValueRef centroid_x_offset = pix_center_offset;
168
   LLVMValueRef centroid_y_offset = pix_center_offset;
169
   for (int s = bld->coverage_samples - 1; s >= 0; s--) {
170
      LLVMValueRef sample_cov;
171
      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
172

173
      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
174
      sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);
175
      if (s == bld->coverage_samples - 1)
176
         s_mask_and = sample_cov;
177
      else
178
         s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
179

180
      LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
181
      LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
182

183
      x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
184
      y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
185
      x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
186
      y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
187
      centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
188
      centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
189
   }
190
   *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
191
   *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
192
}
193

194
/* Much easier, and significantly less instructions in the per-stamp
195
 * part (less than half) but overall more instructions so a loss if
196
 * most quads are active. Might be a win though with larger vectors.
197
 * No ability to do per-quad divide (doable but not implemented)
198
 * Could be made to work with passed in pixel offsets (i.e. active quad merging).
199
 */
200
static void
201
coeffs_init_simple(struct lp_build_interp_soa_context *bld,
202
                   LLVMValueRef a0_ptr,
203
                   LLVMValueRef dadx_ptr,
204
                   LLVMValueRef dady_ptr)
205
{
206
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
207
   struct lp_build_context *setup_bld = &bld->setup_bld;
208
   struct gallivm_state *gallivm = coeff_bld->gallivm;
209
   LLVMBuilderRef builder = gallivm->builder;
210
   unsigned attrib;
211

212
   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
213
      /*
214
       * always fetch all 4 values for performance/simplicity
215
       * Note: we do that here because it seems to generate better
216
       * code. It generates a lot of moves initially but less
217
       * moves later. As far as I can tell this looks like a
218
       * llvm issue, instead of simply reloading the values from
219
       * the passed in pointers it if it runs out of registers
220
       * it spills/reloads them. Maybe some optimization passes
221
       * would help.
222
       * Might want to investigate this again later.
223
       */
224
      const unsigned interp = bld->interp[attrib];
225
      LLVMValueRef index = lp_build_const_int32(gallivm,
226
                                attrib * TGSI_NUM_CHANNELS);
227
      LLVMValueRef ptr;
228
      LLVMValueRef dadxaos = setup_bld->zero;
229
      LLVMValueRef dadyaos = setup_bld->zero;
230
      LLVMValueRef a0aos = setup_bld->zero;
231

232
      switch (interp) {
233
      case LP_INTERP_PERSPECTIVE:
234
         FALLTHROUGH;
235

236
      case LP_INTERP_LINEAR:
237
         ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
238
         ptr = LLVMBuildBitCast(builder, ptr,
239
               LLVMPointerType(setup_bld->vec_type, 0), "");
240
         dadxaos = LLVMBuildLoad(builder, ptr, "");
241

242
         ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
243
         ptr = LLVMBuildBitCast(builder, ptr,
244
               LLVMPointerType(setup_bld->vec_type, 0), "");
245
         dadyaos = LLVMBuildLoad(builder, ptr, "");
246

247
         attrib_name(dadxaos, attrib, 0, ".dadxaos");
248
         attrib_name(dadyaos, attrib, 0, ".dadyaos");
249
         FALLTHROUGH;
250

251
      case LP_INTERP_CONSTANT:
252
      case LP_INTERP_FACING:
253
         ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
254
         ptr = LLVMBuildBitCast(builder, ptr,
255
               LLVMPointerType(setup_bld->vec_type, 0), "");
256
         a0aos = LLVMBuildLoad(builder, ptr, "");
257
         attrib_name(a0aos, attrib, 0, ".a0aos");
258
         break;
259

260
      case LP_INTERP_POSITION:
261
         /* Nothing to do as the position coeffs are already setup in slot 0 */
262
         continue;
263

264
      default:
265
         assert(0);
266
         break;
267
      }
268
      bld->a0aos[attrib] = a0aos;
269
      bld->dadxaos[attrib] = dadxaos;
270
      bld->dadyaos[attrib] = dadyaos;
271
   }
272
}
273

274
/**
275
 * Interpolate the shader input attribute values.
276
 * This is called for each (group of) quad(s).
277
 */
278
static void
279
attribs_update_simple(struct lp_build_interp_soa_context *bld,
280
                      struct gallivm_state *gallivm,
281
                      LLVMValueRef loop_iter,
282
                      LLVMValueRef mask_store,
283
                      LLVMValueRef sample_id,
284
                      int start,
285
                      int end)
286
{
287
   LLVMBuilderRef builder = gallivm->builder;
288
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
289
   struct lp_build_context *setup_bld = &bld->setup_bld;
290
   LLVMValueRef oow = NULL;
291
   unsigned attrib;
292
   LLVMValueRef pixoffx;
293
   LLVMValueRef pixoffy;
294
   LLVMValueRef ptr;
295
   LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
296

297
   /* could do this with code-generated passed in pixel offsets too */
298

299
   assert(loop_iter);
300
   ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
301
   pixoffx = LLVMBuildLoad(builder, ptr, "");
302
   ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
303
   pixoffy = LLVMBuildLoad(builder, ptr, "");
304

305
   pixoffx = LLVMBuildFAdd(builder, pixoffx,
306
                           lp_build_broadcast_scalar(coeff_bld, bld->x), "");
307
   pixoffy = LLVMBuildFAdd(builder, pixoffy,
308
                           lp_build_broadcast_scalar(coeff_bld, bld->y), "");
309

310
   for (attrib = start; attrib < end; attrib++) {
311
      const unsigned mask = bld->mask[attrib];
312
      const unsigned interp = bld->interp[attrib];
313
      const unsigned loc = bld->interp_loc[attrib];
314
      unsigned chan;
315

316
      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
317
         if (mask & (1 << chan)) {
318
            LLVMValueRef index;
319
            LLVMValueRef dadx = coeff_bld->zero;
320
            LLVMValueRef dady = coeff_bld->zero;
321
            LLVMValueRef a = coeff_bld->zero;
322
            LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
323

324
            index = lp_build_const_int32(gallivm, chan);
325
            switch (interp) {
326
            case LP_INTERP_PERSPECTIVE:
327
               FALLTHROUGH;
328

329
            case LP_INTERP_LINEAR:
330
               if (attrib == 0 && chan == 0) {
331
                  dadx = coeff_bld->one;
332
                  if (sample_id) {
333
                     LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
334
                     x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
335
                     a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
336
                  } else {
337
                     a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
338
                  }
339
               }
340
               else if (attrib == 0 && chan == 1) {
341
                  dady = coeff_bld->one;
342
                  if (sample_id) {
343
                     LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
344
                     y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
345
                     y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
346
                     a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
347
                  } else {
348
                     a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
349
                  }
350
               }
351
               else {
352
                  dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
353
                                                    coeff_bld->type, bld->dadxaos[attrib],
354
                                                    index);
355
                  dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
356
                                                    coeff_bld->type, bld->dadyaos[attrib],
357
                                                    index);
358
                  a = lp_build_extract_broadcast(gallivm, setup_bld->type,
359
                                                 coeff_bld->type, bld->a0aos[attrib],
360
                                                 index);
361

362
                  if (bld->coverage_samples > 1) {
363
                     LLVMValueRef xoffset = pix_center_offset;
364
                     LLVMValueRef yoffset = pix_center_offset;
365
                     if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {
366
                        LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
367
                        LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
368

369
                        x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
370
                        y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
371
                        xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
372
                        yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
373
                     } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
374
                        calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
375
                                              pix_center_offset, &xoffset, &yoffset);
376
                     }
377
                     chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
378
                     chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
379
                  }
380
               }
381

382
               /*
383
                * a = a0 + (x * dadx + y * dady)
384
                */
385
               a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
386
               a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
387

388
               if (interp == LP_INTERP_PERSPECTIVE) {
389
                  if (oow == NULL) {
390
                     LLVMValueRef w = bld->attribs[0][3];
391
                     assert(attrib != 0);
392
                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
393
                     oow = lp_build_rcp(coeff_bld, w);
394
                  }
395
                  a = lp_build_mul(coeff_bld, a, oow);
396
               }
397
               break;
398

399
            case LP_INTERP_CONSTANT:
400
            case LP_INTERP_FACING:
401
               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
402
                                              coeff_bld->type, bld->a0aos[attrib],
403
                                              index);
404
               break;
405

406
            case LP_INTERP_POSITION:
407
               assert(attrib > 0);
408
               a = bld->attribs[0][chan];
409
               break;
410

411
            default:
412
               assert(0);
413
               break;
414
            }
415

416
            if ((attrib == 0) && (chan == 2) && !bld->depth_clamp){
417
               /* FIXME: Depth values can exceed 1.0, due to the fact that
418
                * setup interpolation coefficients refer to (0,0) which causes
419
                * precision loss. So we must clamp to 1.0 here to avoid artifacts.
420
                * Note though values outside [0,1] are perfectly valid with
421
                * depth clip disabled.
422
                * XXX: If depth clip is disabled but we force depth clamp
423
                * we may get values larger than 1.0 in the fs (but not in
424
                * depth test). Not sure if that's an issue...
425
                * Also, on a similar note, it is not obvious if the depth values
426
                * appearing in fs (with depth clip disabled) should be clamped
427
                * to [0,1], clamped to near/far or not be clamped at all...
428
                */
429
               a = lp_build_min(coeff_bld, a, coeff_bld->one);
430
            }
431
            bld->attribs[attrib][chan] = a;
432
         }
433
      }
434
   }
435
}
436

437
static LLVMValueRef
438
lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
439
                             struct gallivm_state *gallivm,
440
                             unsigned attrib, unsigned chan,
441
                             LLVMValueRef indir_index,
442
                             LLVMValueRef pixoffx,
443
                             LLVMValueRef pixoffy)
444
{
445
   LLVMBuilderRef builder = gallivm->builder;
446
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
447
   const unsigned interp = bld->interp[attrib];
448
   LLVMValueRef dadx = coeff_bld->zero;
449
   LLVMValueRef dady = coeff_bld->zero;
450
   LLVMValueRef a = coeff_bld->zero;
451

452
   LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
453

454
   indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");
455
   LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
456
   index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");
457

458
   /* size up to byte indices */
459
   index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
460

461
   struct lp_type dst_type = coeff_bld->type;
462
   dst_type.length = 1;
463
   switch (interp) {
464
   case LP_INTERP_PERSPECTIVE:
465
      FALLTHROUGH;
466
   case LP_INTERP_LINEAR:
467

468
      dadx = lp_build_gather(gallivm, coeff_bld->type.length,
469
                             coeff_bld->type.width, dst_type,
470
                             true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);
471

472
      dady = lp_build_gather(gallivm, coeff_bld->type.length,
473
                             coeff_bld->type.width, dst_type,
474
                             true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);
475

476
      a = lp_build_gather(gallivm, coeff_bld->type.length,
477
                          coeff_bld->type.width, dst_type,
478
                          true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
479

480
      /*
481
       * a = a0 + (x * dadx + y * dady)
482
       */
483
      a = lp_build_fmuladd(builder, dadx, pixoffx, a);
484
      a = lp_build_fmuladd(builder, dady, pixoffy, a);
485

486
      if (interp == LP_INTERP_PERSPECTIVE) {
487
        LLVMValueRef w = bld->attribs[0][3];
488
        assert(attrib != 0);
489
        assert(bld->mask[0] & TGSI_WRITEMASK_W);
490
        LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
491
        a = lp_build_mul(coeff_bld, a, oow);
492
      }
493

494
      break;
495
   case LP_INTERP_CONSTANT:
496
   case LP_INTERP_FACING:
497
      a = lp_build_gather(gallivm, coeff_bld->type.length,
498
                          coeff_bld->type.width, dst_type,
499
                          true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
500
      break;
501
   default:
502
      assert(0);
503
      break;
504
   }
505
   return a;
506
}
507

508
LLVMValueRef
509
lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
510
                    struct gallivm_state *gallivm,
511
                    LLVMValueRef loop_iter,
512
                    LLVMValueRef mask_store,
513
                    unsigned attrib, unsigned chan,
514
                    unsigned loc,
515
                    LLVMValueRef indir_index,
516
                    LLVMValueRef offsets[2])
517
{
518
   LLVMBuilderRef builder = gallivm->builder;
519
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
520
   struct lp_build_context *setup_bld = &bld->setup_bld;
521
   LLVMValueRef pixoffx;
522
   LLVMValueRef pixoffy;
523
   LLVMValueRef ptr;
524

525
   /* could do this with code-generated passed in pixel offsets too */
526

527
   assert(loop_iter);
528
   ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
529
   pixoffx = LLVMBuildLoad(builder, ptr, "");
530
   ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
531
   pixoffy = LLVMBuildLoad(builder, ptr, "");
532

533
   pixoffx = LLVMBuildFAdd(builder, pixoffx,
534
                           lp_build_broadcast_scalar(coeff_bld, bld->x), "");
535
   pixoffy = LLVMBuildFAdd(builder, pixoffy,
536
                           lp_build_broadcast_scalar(coeff_bld, bld->y), "");
537

538
   LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
539

540
   if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
541
      if (bld->coverage_samples > 1) {
542
         pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
543
         pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
544
      }
545

546
      if (offsets[0])
547
         pixoffx = LLVMBuildFAdd(builder, pixoffx,
548
                                 offsets[0], "");
549
      if (offsets[1])
550
         pixoffy = LLVMBuildFAdd(builder, pixoffy,
551
                                 offsets[1], "");
552
   } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
553
      LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
554
      LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
555

556
      LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,
557
                                               LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
558
      LLVMValueRef xoffset = lp_build_gather(gallivm,
559
                                             bld->coeff_bld.type.length,
560
                                             bld->coeff_bld.type.width,
561
                                             lp_elem_type(bld->coeff_bld.type),
562
                                             false,
563
                                             base_ptr,
564
                                             x_val_idx, true);
565
      LLVMValueRef yoffset = lp_build_gather(gallivm,
566
                                             bld->coeff_bld.type.length,
567
                                             bld->coeff_bld.type.width,
568
                                             lp_elem_type(bld->coeff_bld.type),
569
                                             false,
570
                                             base_ptr,
571
                                             y_val_idx, true);      
572

573
      if (bld->coverage_samples > 1) {
574
         pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
575
         pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
576
      }
577
   } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
578
      LLVMValueRef centroid_x_offset, centroid_y_offset;
579

580
      /* for centroid find covered samples for this quad. */
581
      /* if all samples are covered use pixel centers */
582
      if (bld->coverage_samples > 1) {
583
         calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
584
			       pix_center_offset, &centroid_x_offset, &centroid_y_offset);
585

586
         pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
587
         pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
588
      }
589
   }
590

591
   // remap attrib properly.
592
   attrib++;
593

594
   if (indir_index)
595
     return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
596
					 indir_index, pixoffx, pixoffy);
597

598

599
   const unsigned interp = bld->interp[attrib];
600
   LLVMValueRef dadx = coeff_bld->zero;
601
   LLVMValueRef dady = coeff_bld->zero;
602
   LLVMValueRef a = coeff_bld->zero;
603

604
   LLVMValueRef index = lp_build_const_int32(gallivm, chan);
605

606
   switch (interp) {
607
   case LP_INTERP_PERSPECTIVE:
608
      FALLTHROUGH;
609
   case LP_INTERP_LINEAR:
610
      dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
611
                                        coeff_bld->type, bld->dadxaos[attrib],
612
                                        index);
613

614
      dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
615
                                        coeff_bld->type, bld->dadyaos[attrib],
616
                                        index);
617

618
      a = lp_build_extract_broadcast(gallivm, setup_bld->type,
619
                                     coeff_bld->type, bld->a0aos[attrib],
620
                                     index);
621

622
      /*
623
       * a = a0 + (x * dadx + y * dady)
624
       */
625
      a = lp_build_fmuladd(builder, dadx, pixoffx, a);
626
      a = lp_build_fmuladd(builder, dady, pixoffy, a);
627

628
      if (interp == LP_INTERP_PERSPECTIVE) {
629
        LLVMValueRef w = bld->attribs[0][3];
630
        assert(attrib != 0);
631
        assert(bld->mask[0] & TGSI_WRITEMASK_W);
632
        LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
633
        a = lp_build_mul(coeff_bld, a, oow);
634
      }
635

636
      break;
637
   case LP_INTERP_CONSTANT:
638
   case LP_INTERP_FACING:
639
      a = lp_build_extract_broadcast(gallivm, setup_bld->type,
640
                                     coeff_bld->type, bld->a0aos[attrib],
641
                                     index);
642
      break;
643
   default:
644
      assert(0);
645
      break;
646
   }
647
   return a;
648
}
649

650
/**
651
 * Generate the position vectors.
652
 *
653
 * Parameter x0, y0 are the integer values with upper left coordinates.
654
 */
655
static void
656
pos_init(struct lp_build_interp_soa_context *bld,
657
         LLVMValueRef x0,
658
         LLVMValueRef y0)
659
{
660
   LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
661
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
662

663
   bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
664
   bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
665
}
666

667

668
/**
669
 * Initialize fragment shader input attribute info.
670
 */
671
void
672
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
673
                         struct gallivm_state *gallivm,
674
                         unsigned num_inputs,
675
                         const struct lp_shader_input *inputs,
676
                         boolean pixel_center_integer,
677
                         unsigned coverage_samples,
678
                         LLVMValueRef sample_pos_array,
679
                         LLVMValueRef num_loop,
680
                         boolean depth_clamp,
681
                         LLVMBuilderRef builder,
682
                         struct lp_type type,
683
                         LLVMValueRef a0_ptr,
684
                         LLVMValueRef dadx_ptr,
685
                         LLVMValueRef dady_ptr,
686
                         LLVMValueRef x0,
687
                         LLVMValueRef y0)
688
{
689
   struct lp_type coeff_type;
690
   struct lp_type setup_type;
691
   unsigned attrib;
692
   unsigned chan;
693

694
   memset(bld, 0, sizeof *bld);
695

696
   memset(&coeff_type, 0, sizeof coeff_type);
697
   coeff_type.floating = TRUE;
698
   coeff_type.sign = TRUE;
699
   coeff_type.width = 32;
700
   coeff_type.length = type.length;
701

702
   memset(&setup_type, 0, sizeof setup_type);
703
   setup_type.floating = TRUE;
704
   setup_type.sign = TRUE;
705
   setup_type.width = 32;
706
   setup_type.length = TGSI_NUM_CHANNELS;
707

708

709
   /* XXX: we don't support interpolating into any other types */
710
   assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
711

712
   lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
713
   lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
714

715
   /* For convenience */
716
   bld->pos = bld->attribs[0];
717
   bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
718

719
   /* Position */
720
   bld->mask[0] = TGSI_WRITEMASK_XYZW;
721
   bld->interp[0] = LP_INTERP_LINEAR;
722
   bld->interp_loc[0] = 0;
723

724
   /* Inputs */
725
   for (attrib = 0; attrib < num_inputs; ++attrib) {
726
      bld->mask[1 + attrib] = inputs[attrib].usage_mask;
727
      bld->interp[1 + attrib] = inputs[attrib].interp;
728
      bld->interp_loc[1 + attrib] = inputs[attrib].location;
729
   }
730
   bld->num_attribs = 1 + num_inputs;
731

732
   /* needed for indirect */
733
   bld->a0_ptr = a0_ptr;
734
   bld->dadx_ptr = dadx_ptr;
735
   bld->dady_ptr = dady_ptr;
736

737
   /* Ensure all masked out input channels have a valid value */
738
   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
739
      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
740
         bld->attribs[attrib][chan] = bld->coeff_bld.undef;
741
      }
742
   }
743

744
   if (pixel_center_integer) {
745
      bld->pos_offset = 0.0;
746
   } else {
747
      bld->pos_offset = 0.5;
748
   }
749
   bld->depth_clamp = depth_clamp;
750
   bld->coverage_samples = coverage_samples;
751
   bld->num_loop = num_loop;
752
   bld->sample_pos_array = sample_pos_array;
753

754
   pos_init(bld, x0, y0);
755

756
   /*
757
    * Simple method (single step interpolation) may be slower if vector length
758
    * is just 4, but the results are different (generally less accurate) with
759
    * the other method, so always use more accurate version.
760
    */
761
   {
762
      /* XXX this should use a global static table */
763
      unsigned i;
764
      unsigned num_loops = 16 / type.length;
765
      LLVMValueRef pixoffx, pixoffy, index;
766
      LLVMValueRef ptr;
767

768
      bld->xoffset_store = lp_build_array_alloca(gallivm,
769
                                                 lp_build_vec_type(gallivm, type),
770
                                                 lp_build_const_int32(gallivm, num_loops),
771
                                                 "");
772
      bld->yoffset_store = lp_build_array_alloca(gallivm,
773
                                                 lp_build_vec_type(gallivm, type),
774
                                                 lp_build_const_int32(gallivm, num_loops),
775
                                                 "");
776
      for (i = 0; i < num_loops; i++) {
777
         index = lp_build_const_int32(gallivm, i);
778
         calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
779
         ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
780
         LLVMBuildStore(builder, pixoffx, ptr);
781
         ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
782
         LLVMBuildStore(builder, pixoffy, ptr);
783
      }
784
   }
785
   coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
786
}
787

788

789
/*
790
 * Advance the position and inputs to the given quad within the block.
791
 */
792

793
void
794
lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
795
                                      struct gallivm_state *gallivm,
796
                                      LLVMValueRef quad_start_index,
797
                                      LLVMValueRef mask_store,
798
                                      LLVMValueRef sample_id)
799
{
800
   attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);
801
}
802

803
void
804
lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
805
                                   struct gallivm_state *gallivm,
806
                                   LLVMValueRef quad_start_index,
807
                                   LLVMValueRef sample_id)
808
{
809
   attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);
810
}
811

812

813
Product

Resources

Company