CoCalc -- brw_compile_ff

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/compiler/brw_compile_ff_gs.c
⁴⁵⁵⁰ views
1
/*
2
 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3
 Intel funded Tungsten Graphics to
4
 develop this 3D driver.
5

6
 Permission is hereby granted, free of charge, to any person obtaining
7
 a copy of this software and associated documentation files (the
8
 "Software"), to deal in the Software without restriction, including
9
 without limitation the rights to use, copy, modify, merge, publish,
10
 distribute, sublicense, and/or sell copies of the Software, and to
11
 permit persons to whom the Software is furnished to do so, subject to
12
 the following conditions:
13

14
 The above copyright notice and this permission notice (including the
15
 next paragraph) shall be included in all copies or substantial
16
 portions of the Software.
17

18
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25

26
 **********************************************************************/
27
 /*
28
  * Authors:
29
  *   Keith Whitwell <[email protected]>
30
  */
31

32
#include "brw_compiler.h"
33
#include "brw_eu.h"
34

35
#include "dev/intel_debug.h"
36

37
#define MAX_GS_VERTS (4)
38

39
struct brw_ff_gs_compile {
40
   struct brw_codegen func;
41
   struct brw_ff_gs_prog_key key;
42
   struct brw_ff_gs_prog_data *prog_data;
43

44
   struct {
45
      struct brw_reg R0;
46

47
      /**
48
       * Register holding streamed vertex buffer pointers -- see the Sandy
49
       * Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload
50
       * [DevSNB]).  These pointers are delivered in GRF 1.
51
       */
52
      struct brw_reg SVBI;
53

54
      struct brw_reg vertex[MAX_GS_VERTS];
55
      struct brw_reg header;
56
      struct brw_reg temp;
57

58
      /**
59
       * Register holding destination indices for streamed buffer writes.
60
       * Only used for SOL programs.
61
       */
62
      struct brw_reg destination_indices;
63
   } reg;
64

65
   /* Number of registers used to store vertex data */
66
   GLuint nr_regs;
67

68
   struct brw_vue_map vue_map;
69
};
70

71
/**
72
 * Allocate registers for GS.
73
 *
74
 * If sol_program is true, then:
75
 *
76
 * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF
77
 *   1 needs to be set aside to hold the streamed vertex buffer indices.
78
 *
79
 * - The thread will need to use the destination_indices register.
80
 */
81
static void brw_ff_gs_alloc_regs(struct brw_ff_gs_compile *c,
82
                                 GLuint nr_verts,
83
                                 bool sol_program)
84
{
85
   GLuint i = 0,j;
86

87
   /* Register usage is static, precompute here:
88
    */
89
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
90

91
   /* Streamed vertex buffer indices */
92
   if (sol_program)
93
      c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
94

95
   /* Payload vertices plus space for more generated vertices:
96
    */
97
   for (j = 0; j < nr_verts; j++) {
98
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
99
      i += c->nr_regs;
100
   }
101

102
   c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
103
   c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
104

105
   if (sol_program) {
106
      c->reg.destination_indices =
107
         retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD);
108
   }
109

110
   c->prog_data->urb_read_length = c->nr_regs;
111
   c->prog_data->total_grf = i;
112
}
113

114

115
/**
116
 * Set up the initial value of c->reg.header register based on c->reg.R0.
117
 *
118
 * The following information is passed to the GS thread in R0, and needs to be
119
 * included in the first URB_WRITE or FF_SYNC message sent by the GS:
120
 *
121
 * - DWORD 0 [31:0] handle info (Gen4 only)
122
 * - DWORD 5 [7:0] FFTID
123
 * - DWORD 6 [31:0] Debug info
124
 * - DWORD 7 [31:0] Debug info
125
 *
126
 * This function sets up the above data by copying by copying the contents of
127
 * R0 to the header register.
128
 */
129
static void brw_ff_gs_initialize_header(struct brw_ff_gs_compile *c)
130
{
131
   struct brw_codegen *p = &c->func;
132
   brw_MOV(p, c->reg.header, c->reg.R0);
133
}
134

135
/**
136
 * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value.
137
 *
138
 * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart,
139
 * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we
140
 * need to be able to update on a per-vertex basis.
141
 */
142
static void brw_ff_gs_overwrite_header_dw2(struct brw_ff_gs_compile *c,
143
                                           unsigned dw2)
144
{
145
   struct brw_codegen *p = &c->func;
146
   brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2));
147
}
148

149
/**
150
 * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0.
151
 *
152
 * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0
153
 * of DWORD 2.  URB_WRITE messages need the primitive type in bits 6:2 of
154
 * DWORD 2.  So this function extracts the primitive type field, bitshifts it
155
 * appropriately, and stores it in c->reg.header.
156
 */
157
static void brw_ff_gs_overwrite_header_dw2_from_r0(struct brw_ff_gs_compile *c)
158
{
159
   struct brw_codegen *p = &c->func;
160
   brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
161
           brw_imm_ud(0x1f));
162
   brw_SHL(p, get_element_ud(c->reg.header, 2),
163
           get_element_ud(c->reg.header, 2), brw_imm_ud(2));
164
}
165

166
/**
167
 * Apply an additive offset to DWORD 2 of c->reg.header.
168
 *
169
 * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately
170
 * for each vertex.
171
 */
172
static void brw_ff_gs_offset_header_dw2(struct brw_ff_gs_compile *c,
173
                                        int offset)
174
{
175
   struct brw_codegen *p = &c->func;
176
   brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2),
177
           brw_imm_d(offset));
178
}
179

180

181
/**
182
 * Emit a vertex using the URB_WRITE message.  Use the contents of
183
 * c->reg.header for the message header, and the registers starting at \c vert
184
 * for the vertex data.
185
 *
186
 * If \c last is true, then this is the last vertex, so no further URB space
187
 * should be allocated, and this message should end the thread.
188
 *
189
 * If \c last is false, then a new URB entry will be allocated, and its handle
190
 * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE
191
 * message.
192
 */
193
static void brw_ff_gs_emit_vue(struct brw_ff_gs_compile *c,
194
                               struct brw_reg vert,
195
                               bool last)
196
{
197
   struct brw_codegen *p = &c->func;
198
   int write_offset = 0;
199
   bool complete = false;
200

201
   do {
202
      /* We can't write more than 14 registers at a time to the URB */
203
      int write_len = MIN2(c->nr_regs - write_offset, 14);
204
      if (write_len == c->nr_regs - write_offset)
205
         complete = true;
206

207
      /* Copy the vertex from vertn into m1..mN+1:
208
       */
209
      brw_copy8(p, brw_message_reg(1), offset(vert, write_offset), write_len);
210

211
      /* Send the vertex data to the URB.  If this is the last write for this
212
       * vertex, then we mark it as complete, and either end the thread or
213
       * allocate another vertex URB entry (depending whether this is the last
214
       * vertex).
215
       */
216
      enum brw_urb_write_flags flags;
217
      if (!complete)
218
         flags = BRW_URB_WRITE_NO_FLAGS;
219
      else if (last)
220
         flags = BRW_URB_WRITE_EOT_COMPLETE;
221
      else
222
         flags = BRW_URB_WRITE_ALLOCATE_COMPLETE;
223
      brw_urb_WRITE(p,
224
                    (flags & BRW_URB_WRITE_ALLOCATE) ? c->reg.temp
225
                    : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
226
                    0,
227
                    c->reg.header,
228
                    flags,
229
                    write_len + 1, /* msg length */
230
                    (flags & BRW_URB_WRITE_ALLOCATE) ? 1
231
                    : 0, /* response length */
232
                    write_offset,  /* urb offset */
233
                    BRW_URB_SWIZZLE_NONE);
234
      write_offset += write_len;
235
   } while (!complete);
236

237
   if (!last) {
238
      brw_MOV(p, get_element_ud(c->reg.header, 0),
239
              get_element_ud(c->reg.temp, 0));
240
   }
241
}
242

243
/**
244
 * Send an FF_SYNC message to ensure that all previously spawned GS threads
245
 * have finished sending primitives down the pipeline, and to allocate a URB
246
 * entry for the first output vertex.  Only needed on Ironlake+.
247
 *
248
 * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which
249
 * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to
250
 * the allocated URB entry (which will be needed by the URB_WRITE meesage that
251
 * follows).
252
 */
253
static void brw_ff_gs_ff_sync(struct brw_ff_gs_compile *c, int num_prim)
254
{
255
   struct brw_codegen *p = &c->func;
256

257
   brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim));
258
   brw_ff_sync(p,
259
               c->reg.temp,
260
               0,
261
               c->reg.header,
262
               1, /* allocate */
263
               1, /* response length */
264
               0 /* eot */);
265
   brw_MOV(p, get_element_ud(c->reg.header, 0),
266
           get_element_ud(c->reg.temp, 0));
267
}
268

269

270
static void
271
brw_ff_gs_quads(struct brw_ff_gs_compile *c,
272
		const struct brw_ff_gs_prog_key *key)
273
{
274
   brw_ff_gs_alloc_regs(c, 4, false);
275
   brw_ff_gs_initialize_header(c);
276
   /* Use polygons for correct edgeflag behaviour. Note that vertex 3
277
    * is the PV for quads, but vertex 0 for polygons:
278
    */
279
   if (c->func.devinfo->ver == 5)
280
      brw_ff_gs_ff_sync(c, 1);
281
   brw_ff_gs_overwrite_header_dw2(
282
      c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
283
          | URB_WRITE_PRIM_START));
284
   if (key->pv_first) {
285
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
286
      brw_ff_gs_overwrite_header_dw2(
287
         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
288
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
289
      brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
290
      brw_ff_gs_overwrite_header_dw2(
291
         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
292
             | URB_WRITE_PRIM_END));
293
      brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1);
294
   }
295
   else {
296
      brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0);
297
      brw_ff_gs_overwrite_header_dw2(
298
         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
299
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
300
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
301
      brw_ff_gs_overwrite_header_dw2(
302
         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
303
             | URB_WRITE_PRIM_END));
304
      brw_ff_gs_emit_vue(c, c->reg.vertex[2], 1);
305
   }
306
}
307

308
static void
309
brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c,
310
                     const struct brw_ff_gs_prog_key *key)
311
{
312
   brw_ff_gs_alloc_regs(c, 4, false);
313
   brw_ff_gs_initialize_header(c);
314

315
   if (c->func.devinfo->ver == 5)
316
      brw_ff_gs_ff_sync(c, 1);
317
   brw_ff_gs_overwrite_header_dw2(
318
      c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
319
          | URB_WRITE_PRIM_START));
320
   if (key->pv_first) {
321
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
322
      brw_ff_gs_overwrite_header_dw2(
323
         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
324
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
325
      brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
326
      brw_ff_gs_overwrite_header_dw2(
327
         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
328
             | URB_WRITE_PRIM_END));
329
      brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1);
330
   }
331
   else {
332
      brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
333
      brw_ff_gs_overwrite_header_dw2(
334
         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
335
      brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0);
336
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
337
      brw_ff_gs_overwrite_header_dw2(
338
         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
339
             | URB_WRITE_PRIM_END));
340
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1);
341
   }
342
}
343

344
static void brw_ff_gs_lines(struct brw_ff_gs_compile *c)
345
{
346
   brw_ff_gs_alloc_regs(c, 2, false);
347
   brw_ff_gs_initialize_header(c);
348

349
   if (c->func.devinfo->ver == 5)
350
      brw_ff_gs_ff_sync(c, 1);
351
   brw_ff_gs_overwrite_header_dw2(
352
      c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
353
          | URB_WRITE_PRIM_START));
354
   brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
355
   brw_ff_gs_overwrite_header_dw2(
356
      c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
357
          | URB_WRITE_PRIM_END));
358
   brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1);
359
}
360

361
/**
362
 * Generate the geometry shader program used on Gen6 to perform stream output
363
 * (transform feedback).
364
 */
365
static void
366
gfx6_sol_program(struct brw_ff_gs_compile *c, const struct brw_ff_gs_prog_key *key,
367
                 unsigned num_verts, bool check_edge_flags)
368
{
369
   struct brw_codegen *p = &c->func;
370
   brw_inst *inst;
371
   c->prog_data->svbi_postincrement_value = num_verts;
372

373
   brw_ff_gs_alloc_regs(c, num_verts, true);
374
   brw_ff_gs_initialize_header(c);
375

376
   if (key->num_transform_feedback_bindings > 0) {
377
      unsigned vertex, binding;
378
      struct brw_reg destination_indices_uw =
379
         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));
380

381
      /* Note: since we use the binding table to keep track of buffer offsets
382
       * and stride, the GS doesn't need to keep track of a separate pointer
383
       * into each buffer; it uses a single pointer which increments by 1 for
384
       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
385
       * transform feedback is in interleaved or separate attribs mode.
386
       *
387
       * Make sure that the buffers have enough room for all the vertices.
388
       */
389
      brw_ADD(p, get_element_ud(c->reg.temp, 0),
390
                 get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
391
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
392
                 get_element_ud(c->reg.temp, 0),
393
                 get_element_ud(c->reg.SVBI, 4));
394
      brw_IF(p, BRW_EXECUTE_1);
395

396
      /* Compute the destination indices to write to.  Usually we use SVBI[0]
397
       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
398
       * vertices come down the pipeline in reversed winding order, so we need
399
       * to flip the order when writing to the transform feedback buffer.  To
400
       * ensure that flatshading accuracy is preserved, we need to write them
401
       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
402
       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
403
       * the last provoking vertex convention.
404
       *
405
       * Note: since brw_imm_v can only be used in instructions in
406
       * packed-word execution mode, and SVBI is a double-word, we need to
407
       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
408
       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
409
       * using a separate instruction.  Also, since the immediate constant is
410
       * expressed as packed words, and we need to load double-words into
411
       * destination_indices, we need to intersperse zeros to fill the upper
412
       * halves of each double-word.
413
       */
414
      brw_MOV(p, destination_indices_uw,
415
              brw_imm_v(0x00020100)); /* (0, 1, 2) */
416
      if (num_verts == 3) {
417
         /* Get primitive type into temp register. */
418
         brw_AND(p, get_element_ud(c->reg.temp, 0),
419
                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));
420

421
         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
422
          * an 8-wide comparison so that the conditional MOV that follows
423
          * moves all 8 words correctly.
424
          */
425
         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
426
                 get_element_ud(c->reg.temp, 0),
427
                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
428

429
         /* If so, then overwrite destination_indices_uw with the appropriate
430
          * reordering.
431
          */
432
         inst = brw_MOV(p, destination_indices_uw,
433
                        brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
434
                                                : 0x00020001)); /* (1, 0, 2) */
435
         brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL);
436
      }
437

438
      assert(c->reg.destination_indices.width == BRW_EXECUTE_4);
439
      brw_push_insn_state(p);
440
      brw_set_default_exec_size(p, BRW_EXECUTE_4);
441
      brw_ADD(p, c->reg.destination_indices,
442
              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));
443
      brw_pop_insn_state(p);
444
      /* For each vertex, generate code to output each varying using the
445
       * appropriate binding table entry.
446
       */
447
      for (vertex = 0; vertex < num_verts; ++vertex) {
448
         /* Set up the correct destination index for this vertex */
449
         brw_MOV(p, get_element_ud(c->reg.header, 5),
450
                 get_element_ud(c->reg.destination_indices, vertex));
451

452
         for (binding = 0; binding < key->num_transform_feedback_bindings;
453
              ++binding) {
454
            unsigned char varying =
455
               key->transform_feedback_bindings[binding];
456
            unsigned char slot = c->vue_map.varying_to_slot[varying];
457
            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
458
             *
459
             *   "Prior to End of Thread with a URB_WRITE, the kernel must
460
             *   ensure that all writes are complete by sending the final
461
             *   write as a committed write."
462
             */
463
            bool final_write =
464
               binding == key->num_transform_feedback_bindings - 1 &&
465
               vertex == num_verts - 1;
466
            struct brw_reg vertex_slot = c->reg.vertex[vertex];
467
            vertex_slot.nr += slot / 2;
468
            vertex_slot.subnr = (slot % 2) * 16;
469
            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
470
            vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ
471
               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
472
            brw_set_default_access_mode(p, BRW_ALIGN_16);
473
            brw_push_insn_state(p);
474
            brw_set_default_exec_size(p, BRW_EXECUTE_4);
475

476
            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
477
                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
478
            brw_pop_insn_state(p);
479

480
            brw_set_default_access_mode(p, BRW_ALIGN_1);
481
            brw_svb_write(p,
482
                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
483
                          1, /* msg_reg_nr */
484
                          c->reg.header, /* src0 */
485
                          BRW_GFX6_SOL_BINDING_START + binding, /* binding_table_index */
486
                          final_write); /* send_commit_msg */
487
         }
488
      }
489
      brw_ENDIF(p);
490

491
      /* Now, reinitialize the header register from R0 to restore the parts of
492
       * the register that we overwrote while streaming out transform feedback
493
       * data.
494
       */
495
      brw_ff_gs_initialize_header(c);
496

497
      /* Finally, wait for the write commit to occur so that we can proceed to
498
       * other things safely.
499
       *
500
       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
501
       *
502
       *   The write commit does not modify the destination register, but
503
       *   merely clears the dependency associated with the destination
504
       *   register. Thus, a simple “mov” instruction using the register as a
505
       *   source is sufficient to wait for the write commit to occur.
506
       */
507
      brw_MOV(p, c->reg.temp, c->reg.temp);
508
   }
509

510
   brw_ff_gs_ff_sync(c, 1);
511

512
   brw_ff_gs_overwrite_header_dw2_from_r0(c);
513
   switch (num_verts) {
514
   case 1:
515
      brw_ff_gs_offset_header_dw2(c,
516
                                  URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
517
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], true);
518
      break;
519
   case 2:
520
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
521
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
522
      brw_ff_gs_offset_header_dw2(c,
523
                                  URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
524
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], true);
525
      break;
526
   case 3:
527
      if (check_edge_flags) {
528
         /* Only emit vertices 0 and 1 if this is the first triangle of the
529
          * polygon.  Otherwise they are redundant.
530
          */
531
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
532
                 get_element_ud(c->reg.R0, 2),
533
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
534
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
535
         brw_IF(p, BRW_EXECUTE_1);
536
      }
537
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
538
      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
539
      brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
540
      brw_ff_gs_emit_vue(c, c->reg.vertex[1], false);
541
      if (check_edge_flags) {
542
         brw_ENDIF(p);
543
         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
544
          * of the polygon.  Otherwise leave the primitive incomplete because
545
          * there are more polygon vertices coming.
546
          */
547
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
548
                 get_element_ud(c->reg.R0, 2),
549
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
550
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
551
         brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
552
      }
553
      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
554
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
555
      brw_ff_gs_emit_vue(c, c->reg.vertex[2], true);
556
      break;
557
   }
558
}
559

560
const unsigned *
561
brw_compile_ff_gs_prog(struct brw_compiler *compiler,
562
		       void *mem_ctx,
563
		       const struct brw_ff_gs_prog_key *key,
564
		       struct brw_ff_gs_prog_data *prog_data,
565
		       struct brw_vue_map *vue_map,
566
		       unsigned *final_assembly_size)
567
{
568
   struct brw_ff_gs_compile c;
569
   const GLuint *program;
570

571
   memset(&c, 0, sizeof(c));
572

573
   c.key = *key;
574
   c.vue_map = *vue_map;
575
   c.nr_regs = (c.vue_map.num_slots + 1)/2;
576
   c.prog_data = prog_data;
577

578
   mem_ctx = ralloc_context(NULL);
579

580
   /* Begin the compilation:
581
    */
582
   brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);
583

584
   c.func.single_program_flow = 1;
585

586
   /* For some reason the thread is spawned with only 4 channels
587
    * unmasked.
588
    */
589
   brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE);
590

591
   if (compiler->devinfo->ver >= 6) {
592
      unsigned num_verts;
593
      bool check_edge_flag;
594
      /* On Sandybridge, we use the GS for implementing transform feedback
595
       * (called "Stream Out" in the PRM).
596
       */
597
      switch (key->primitive) {
598
      case _3DPRIM_POINTLIST:
599
         num_verts = 1;
600
         check_edge_flag = false;
601
         break;
602
      case _3DPRIM_LINELIST:
603
      case _3DPRIM_LINESTRIP:
604
      case _3DPRIM_LINELOOP:
605
         num_verts = 2;
606
         check_edge_flag = false;
607
         break;
608
      case _3DPRIM_TRILIST:
609
      case _3DPRIM_TRIFAN:
610
      case _3DPRIM_TRISTRIP:
611
      case _3DPRIM_RECTLIST:
612
         num_verts = 3;
613
         check_edge_flag = false;
614
         break;
615
      case _3DPRIM_QUADLIST:
616
      case _3DPRIM_QUADSTRIP:
617
      case _3DPRIM_POLYGON:
618
         num_verts = 3;
619
         check_edge_flag = true;
620
         break;
621
      default:
622
         unreachable("Unexpected primitive type in Gen6 SOL program.");
623
      }
624
      gfx6_sol_program(&c, key, num_verts, check_edge_flag);
625
   } else {
626
      /* On Gen4-5, we use the GS to decompose certain types of primitives.
627
       * Note that primitives which don't require a GS program have already
628
       * been weeded out by now.
629
       */
630
      switch (key->primitive) {
631
      case _3DPRIM_QUADLIST:
632
         brw_ff_gs_quads( &c, key );
633
         break;
634
      case _3DPRIM_QUADSTRIP:
635
         brw_ff_gs_quad_strip( &c, key );
636
         break;
637
      case _3DPRIM_LINELOOP:
638
         brw_ff_gs_lines( &c );
639
         break;
640
      default:
641
         return NULL;
642
      }
643
   }
644

645
   brw_compact_instructions(&c.func, 0, NULL);
646

647
   /* get the program
648
    */
649
   program = brw_get_program(&c.func, final_assembly_size);
650

651
   if (INTEL_DEBUG & DEBUG_GS) {
652
      fprintf(stderr, "gs:\n");
653
      brw_disassemble_with_labels(compiler->devinfo, c.func.store,
654
                                  0, *final_assembly_size, stderr);
655
      fprintf(stderr, "\n");
656
    }
657

658
   return program;
659
}
660

661

662
Product

Resources

Company