Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/compiler/brw_compile_ff_gs.c
4550 views
1
/*
2
Copyright (C) Intel Corp. 2006. All Rights Reserved.
3
Intel funded Tungsten Graphics to
4
develop this 3D driver.
5
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
"Software"), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
13
14
The above copyright notice and this permission notice (including the
15
next paragraph) shall be included in all copies or substantial
16
portions of the Software.
17
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26
**********************************************************************/
27
/*
28
* Authors:
29
* Keith Whitwell <[email protected]>
30
*/
31
32
#include "brw_compiler.h"
33
#include "brw_eu.h"
34
35
#include "dev/intel_debug.h"
36
37
#define MAX_GS_VERTS (4)
38
39
struct brw_ff_gs_compile {
40
struct brw_codegen func;
41
struct brw_ff_gs_prog_key key;
42
struct brw_ff_gs_prog_data *prog_data;
43
44
struct {
45
struct brw_reg R0;
46
47
/**
48
* Register holding streamed vertex buffer pointers -- see the Sandy
49
* Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload
50
* [DevSNB]). These pointers are delivered in GRF 1.
51
*/
52
struct brw_reg SVBI;
53
54
struct brw_reg vertex[MAX_GS_VERTS];
55
struct brw_reg header;
56
struct brw_reg temp;
57
58
/**
59
* Register holding destination indices for streamed buffer writes.
60
* Only used for SOL programs.
61
*/
62
struct brw_reg destination_indices;
63
} reg;
64
65
/* Number of registers used to store vertex data */
66
GLuint nr_regs;
67
68
struct brw_vue_map vue_map;
69
};
70
71
/**
72
* Allocate registers for GS.
73
*
74
* If sol_program is true, then:
75
*
76
* - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF
77
* 1 needs to be set aside to hold the streamed vertex buffer indices.
78
*
79
* - The thread will need to use the destination_indices register.
80
*/
81
static void brw_ff_gs_alloc_regs(struct brw_ff_gs_compile *c,
82
GLuint nr_verts,
83
bool sol_program)
84
{
85
GLuint i = 0,j;
86
87
/* Register usage is static, precompute here:
88
*/
89
c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
90
91
/* Streamed vertex buffer indices */
92
if (sol_program)
93
c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
94
95
/* Payload vertices plus space for more generated vertices:
96
*/
97
for (j = 0; j < nr_verts; j++) {
98
c->reg.vertex[j] = brw_vec4_grf(i, 0);
99
i += c->nr_regs;
100
}
101
102
c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
103
c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
104
105
if (sol_program) {
106
c->reg.destination_indices =
107
retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD);
108
}
109
110
c->prog_data->urb_read_length = c->nr_regs;
111
c->prog_data->total_grf = i;
112
}
113
114
115
/**
116
* Set up the initial value of c->reg.header register based on c->reg.R0.
117
*
118
* The following information is passed to the GS thread in R0, and needs to be
119
* included in the first URB_WRITE or FF_SYNC message sent by the GS:
120
*
121
* - DWORD 0 [31:0] handle info (Gen4 only)
122
* - DWORD 5 [7:0] FFTID
123
* - DWORD 6 [31:0] Debug info
124
* - DWORD 7 [31:0] Debug info
125
*
126
* This function sets up the above data by copying by copying the contents of
127
* R0 to the header register.
128
*/
129
static void brw_ff_gs_initialize_header(struct brw_ff_gs_compile *c)
130
{
131
struct brw_codegen *p = &c->func;
132
brw_MOV(p, c->reg.header, c->reg.R0);
133
}
134
135
/**
136
* Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value.
137
*
138
* In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart,
139
* PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we
140
* need to be able to update on a per-vertex basis.
141
*/
142
static void brw_ff_gs_overwrite_header_dw2(struct brw_ff_gs_compile *c,
143
unsigned dw2)
144
{
145
struct brw_codegen *p = &c->func;
146
brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2));
147
}
148
149
/**
150
* Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0.
151
*
152
* When the thread is spawned, GRF 0 contains the primitive type in bits 4:0
153
* of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of
154
* DWORD 2. So this function extracts the primitive type field, bitshifts it
155
* appropriately, and stores it in c->reg.header.
156
*/
157
static void brw_ff_gs_overwrite_header_dw2_from_r0(struct brw_ff_gs_compile *c)
158
{
159
struct brw_codegen *p = &c->func;
160
brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
161
brw_imm_ud(0x1f));
162
brw_SHL(p, get_element_ud(c->reg.header, 2),
163
get_element_ud(c->reg.header, 2), brw_imm_ud(2));
164
}
165
166
/**
167
* Apply an additive offset to DWORD 2 of c->reg.header.
168
*
169
* This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately
170
* for each vertex.
171
*/
172
static void brw_ff_gs_offset_header_dw2(struct brw_ff_gs_compile *c,
173
int offset)
174
{
175
struct brw_codegen *p = &c->func;
176
brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2),
177
brw_imm_d(offset));
178
}
179
180
181
/**
182
* Emit a vertex using the URB_WRITE message. Use the contents of
183
* c->reg.header for the message header, and the registers starting at \c vert
184
* for the vertex data.
185
*
186
* If \c last is true, then this is the last vertex, so no further URB space
187
* should be allocated, and this message should end the thread.
188
*
189
* If \c last is false, then a new URB entry will be allocated, and its handle
190
* will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE
191
* message.
192
*/
193
static void brw_ff_gs_emit_vue(struct brw_ff_gs_compile *c,
194
struct brw_reg vert,
195
bool last)
196
{
197
struct brw_codegen *p = &c->func;
198
int write_offset = 0;
199
bool complete = false;
200
201
do {
202
/* We can't write more than 14 registers at a time to the URB */
203
int write_len = MIN2(c->nr_regs - write_offset, 14);
204
if (write_len == c->nr_regs - write_offset)
205
complete = true;
206
207
/* Copy the vertex from vertn into m1..mN+1:
208
*/
209
brw_copy8(p, brw_message_reg(1), offset(vert, write_offset), write_len);
210
211
/* Send the vertex data to the URB. If this is the last write for this
212
* vertex, then we mark it as complete, and either end the thread or
213
* allocate another vertex URB entry (depending whether this is the last
214
* vertex).
215
*/
216
enum brw_urb_write_flags flags;
217
if (!complete)
218
flags = BRW_URB_WRITE_NO_FLAGS;
219
else if (last)
220
flags = BRW_URB_WRITE_EOT_COMPLETE;
221
else
222
flags = BRW_URB_WRITE_ALLOCATE_COMPLETE;
223
brw_urb_WRITE(p,
224
(flags & BRW_URB_WRITE_ALLOCATE) ? c->reg.temp
225
: retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
226
0,
227
c->reg.header,
228
flags,
229
write_len + 1, /* msg length */
230
(flags & BRW_URB_WRITE_ALLOCATE) ? 1
231
: 0, /* response length */
232
write_offset, /* urb offset */
233
BRW_URB_SWIZZLE_NONE);
234
write_offset += write_len;
235
} while (!complete);
236
237
if (!last) {
238
brw_MOV(p, get_element_ud(c->reg.header, 0),
239
get_element_ud(c->reg.temp, 0));
240
}
241
}
242
243
/**
244
* Send an FF_SYNC message to ensure that all previously spawned GS threads
245
* have finished sending primitives down the pipeline, and to allocate a URB
246
* entry for the first output vertex. Only needed on Ironlake+.
247
*
248
* This function modifies c->reg.header: in DWORD 1, it stores num_prim (which
249
* is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to
250
* the allocated URB entry (which will be needed by the URB_WRITE meesage that
251
* follows).
252
*/
253
static void brw_ff_gs_ff_sync(struct brw_ff_gs_compile *c, int num_prim)
254
{
255
struct brw_codegen *p = &c->func;
256
257
brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim));
258
brw_ff_sync(p,
259
c->reg.temp,
260
0,
261
c->reg.header,
262
1, /* allocate */
263
1, /* response length */
264
0 /* eot */);
265
brw_MOV(p, get_element_ud(c->reg.header, 0),
266
get_element_ud(c->reg.temp, 0));
267
}
268
269
270
static void
271
brw_ff_gs_quads(struct brw_ff_gs_compile *c,
272
const struct brw_ff_gs_prog_key *key)
273
{
274
brw_ff_gs_alloc_regs(c, 4, false);
275
brw_ff_gs_initialize_header(c);
276
/* Use polygons for correct edgeflag behaviour. Note that vertex 3
277
* is the PV for quads, but vertex 0 for polygons:
278
*/
279
if (c->func.devinfo->ver == 5)
280
brw_ff_gs_ff_sync(c, 1);
281
brw_ff_gs_overwrite_header_dw2(
282
c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
283
| URB_WRITE_PRIM_START));
284
if (key->pv_first) {
285
brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
286
brw_ff_gs_overwrite_header_dw2(
287
c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
288
brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
289
brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
290
brw_ff_gs_overwrite_header_dw2(
291
c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
292
| URB_WRITE_PRIM_END));
293
brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1);
294
}
295
else {
296
brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0);
297
brw_ff_gs_overwrite_header_dw2(
298
c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
299
brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
300
brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
301
brw_ff_gs_overwrite_header_dw2(
302
c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
303
| URB_WRITE_PRIM_END));
304
brw_ff_gs_emit_vue(c, c->reg.vertex[2], 1);
305
}
306
}
307
308
static void
309
brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c,
310
const struct brw_ff_gs_prog_key *key)
311
{
312
brw_ff_gs_alloc_regs(c, 4, false);
313
brw_ff_gs_initialize_header(c);
314
315
if (c->func.devinfo->ver == 5)
316
brw_ff_gs_ff_sync(c, 1);
317
brw_ff_gs_overwrite_header_dw2(
318
c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
319
| URB_WRITE_PRIM_START));
320
if (key->pv_first) {
321
brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
322
brw_ff_gs_overwrite_header_dw2(
323
c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
324
brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
325
brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
326
brw_ff_gs_overwrite_header_dw2(
327
c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
328
| URB_WRITE_PRIM_END));
329
brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1);
330
}
331
else {
332
brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
333
brw_ff_gs_overwrite_header_dw2(
334
c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
335
brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0);
336
brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
337
brw_ff_gs_overwrite_header_dw2(
338
c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
339
| URB_WRITE_PRIM_END));
340
brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1);
341
}
342
}
343
344
static void brw_ff_gs_lines(struct brw_ff_gs_compile *c)
345
{
346
brw_ff_gs_alloc_regs(c, 2, false);
347
brw_ff_gs_initialize_header(c);
348
349
if (c->func.devinfo->ver == 5)
350
brw_ff_gs_ff_sync(c, 1);
351
brw_ff_gs_overwrite_header_dw2(
352
c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
353
| URB_WRITE_PRIM_START));
354
brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
355
brw_ff_gs_overwrite_header_dw2(
356
c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
357
| URB_WRITE_PRIM_END));
358
brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1);
359
}
360
361
/**
362
* Generate the geometry shader program used on Gen6 to perform stream output
363
* (transform feedback).
364
*/
365
static void
366
gfx6_sol_program(struct brw_ff_gs_compile *c, const struct brw_ff_gs_prog_key *key,
367
unsigned num_verts, bool check_edge_flags)
368
{
369
struct brw_codegen *p = &c->func;
370
brw_inst *inst;
371
c->prog_data->svbi_postincrement_value = num_verts;
372
373
brw_ff_gs_alloc_regs(c, num_verts, true);
374
brw_ff_gs_initialize_header(c);
375
376
if (key->num_transform_feedback_bindings > 0) {
377
unsigned vertex, binding;
378
struct brw_reg destination_indices_uw =
379
vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));
380
381
/* Note: since we use the binding table to keep track of buffer offsets
382
* and stride, the GS doesn't need to keep track of a separate pointer
383
* into each buffer; it uses a single pointer which increments by 1 for
384
* each vertex. So we use SVBI0 for this pointer, regardless of whether
385
* transform feedback is in interleaved or separate attribs mode.
386
*
387
* Make sure that the buffers have enough room for all the vertices.
388
*/
389
brw_ADD(p, get_element_ud(c->reg.temp, 0),
390
get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
391
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
392
get_element_ud(c->reg.temp, 0),
393
get_element_ud(c->reg.SVBI, 4));
394
brw_IF(p, BRW_EXECUTE_1);
395
396
/* Compute the destination indices to write to. Usually we use SVBI[0]
397
* + (0, 1, 2). However, for odd-numbered triangles in tristrips, the
398
* vertices come down the pipeline in reversed winding order, so we need
399
* to flip the order when writing to the transform feedback buffer. To
400
* ensure that flatshading accuracy is preserved, we need to write them
401
* in order SVBI[0] + (0, 2, 1) if we're using the first provoking
402
* vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
403
* the last provoking vertex convention.
404
*
405
* Note: since brw_imm_v can only be used in instructions in
406
* packed-word execution mode, and SVBI is a double-word, we need to
407
* first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
408
* or (1, 0, 2)) to the destination_indices register, and then add SVBI
409
* using a separate instruction. Also, since the immediate constant is
410
* expressed as packed words, and we need to load double-words into
411
* destination_indices, we need to intersperse zeros to fill the upper
412
* halves of each double-word.
413
*/
414
brw_MOV(p, destination_indices_uw,
415
brw_imm_v(0x00020100)); /* (0, 1, 2) */
416
if (num_verts == 3) {
417
/* Get primitive type into temp register. */
418
brw_AND(p, get_element_ud(c->reg.temp, 0),
419
get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));
420
421
/* Test if primitive type is TRISTRIP_REVERSE. We need to do this as
422
* an 8-wide comparison so that the conditional MOV that follows
423
* moves all 8 words correctly.
424
*/
425
brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
426
get_element_ud(c->reg.temp, 0),
427
brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
428
429
/* If so, then overwrite destination_indices_uw with the appropriate
430
* reordering.
431
*/
432
inst = brw_MOV(p, destination_indices_uw,
433
brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */
434
: 0x00020001)); /* (1, 0, 2) */
435
brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL);
436
}
437
438
assert(c->reg.destination_indices.width == BRW_EXECUTE_4);
439
brw_push_insn_state(p);
440
brw_set_default_exec_size(p, BRW_EXECUTE_4);
441
brw_ADD(p, c->reg.destination_indices,
442
c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));
443
brw_pop_insn_state(p);
444
/* For each vertex, generate code to output each varying using the
445
* appropriate binding table entry.
446
*/
447
for (vertex = 0; vertex < num_verts; ++vertex) {
448
/* Set up the correct destination index for this vertex */
449
brw_MOV(p, get_element_ud(c->reg.header, 5),
450
get_element_ud(c->reg.destination_indices, vertex));
451
452
for (binding = 0; binding < key->num_transform_feedback_bindings;
453
++binding) {
454
unsigned char varying =
455
key->transform_feedback_bindings[binding];
456
unsigned char slot = c->vue_map.varying_to_slot[varying];
457
/* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
458
*
459
* "Prior to End of Thread with a URB_WRITE, the kernel must
460
* ensure that all writes are complete by sending the final
461
* write as a committed write."
462
*/
463
bool final_write =
464
binding == key->num_transform_feedback_bindings - 1 &&
465
vertex == num_verts - 1;
466
struct brw_reg vertex_slot = c->reg.vertex[vertex];
467
vertex_slot.nr += slot / 2;
468
vertex_slot.subnr = (slot % 2) * 16;
469
/* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
470
vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ
471
? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
472
brw_set_default_access_mode(p, BRW_ALIGN_16);
473
brw_push_insn_state(p);
474
brw_set_default_exec_size(p, BRW_EXECUTE_4);
475
476
brw_MOV(p, stride(c->reg.header, 4, 4, 1),
477
retype(vertex_slot, BRW_REGISTER_TYPE_UD));
478
brw_pop_insn_state(p);
479
480
brw_set_default_access_mode(p, BRW_ALIGN_1);
481
brw_svb_write(p,
482
final_write ? c->reg.temp : brw_null_reg(), /* dest */
483
1, /* msg_reg_nr */
484
c->reg.header, /* src0 */
485
BRW_GFX6_SOL_BINDING_START + binding, /* binding_table_index */
486
final_write); /* send_commit_msg */
487
}
488
}
489
brw_ENDIF(p);
490
491
/* Now, reinitialize the header register from R0 to restore the parts of
492
* the register that we overwrote while streaming out transform feedback
493
* data.
494
*/
495
brw_ff_gs_initialize_header(c);
496
497
/* Finally, wait for the write commit to occur so that we can proceed to
498
* other things safely.
499
*
500
* From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
501
*
502
* The write commit does not modify the destination register, but
503
* merely clears the dependency associated with the destination
504
* register. Thus, a simple “mov” instruction using the register as a
505
* source is sufficient to wait for the write commit to occur.
506
*/
507
brw_MOV(p, c->reg.temp, c->reg.temp);
508
}
509
510
brw_ff_gs_ff_sync(c, 1);
511
512
brw_ff_gs_overwrite_header_dw2_from_r0(c);
513
switch (num_verts) {
514
case 1:
515
brw_ff_gs_offset_header_dw2(c,
516
URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
517
brw_ff_gs_emit_vue(c, c->reg.vertex[0], true);
518
break;
519
case 2:
520
brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
521
brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
522
brw_ff_gs_offset_header_dw2(c,
523
URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
524
brw_ff_gs_emit_vue(c, c->reg.vertex[1], true);
525
break;
526
case 3:
527
if (check_edge_flags) {
528
/* Only emit vertices 0 and 1 if this is the first triangle of the
529
* polygon. Otherwise they are redundant.
530
*/
531
brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
532
get_element_ud(c->reg.R0, 2),
533
brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
534
brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
535
brw_IF(p, BRW_EXECUTE_1);
536
}
537
brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
538
brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
539
brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
540
brw_ff_gs_emit_vue(c, c->reg.vertex[1], false);
541
if (check_edge_flags) {
542
brw_ENDIF(p);
543
/* Only emit vertex 2 in PRIM_END mode if this is the last triangle
544
* of the polygon. Otherwise leave the primitive incomplete because
545
* there are more polygon vertices coming.
546
*/
547
brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
548
get_element_ud(c->reg.R0, 2),
549
brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
550
brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
551
brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
552
}
553
brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
554
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
555
brw_ff_gs_emit_vue(c, c->reg.vertex[2], true);
556
break;
557
}
558
}
559
560
const unsigned *
561
brw_compile_ff_gs_prog(struct brw_compiler *compiler,
562
void *mem_ctx,
563
const struct brw_ff_gs_prog_key *key,
564
struct brw_ff_gs_prog_data *prog_data,
565
struct brw_vue_map *vue_map,
566
unsigned *final_assembly_size)
567
{
568
struct brw_ff_gs_compile c;
569
const GLuint *program;
570
571
memset(&c, 0, sizeof(c));
572
573
c.key = *key;
574
c.vue_map = *vue_map;
575
c.nr_regs = (c.vue_map.num_slots + 1)/2;
576
c.prog_data = prog_data;
577
578
mem_ctx = ralloc_context(NULL);
579
580
/* Begin the compilation:
581
*/
582
brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);
583
584
c.func.single_program_flow = 1;
585
586
/* For some reason the thread is spawned with only 4 channels
587
* unmasked.
588
*/
589
brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE);
590
591
if (compiler->devinfo->ver >= 6) {
592
unsigned num_verts;
593
bool check_edge_flag;
594
/* On Sandybridge, we use the GS for implementing transform feedback
595
* (called "Stream Out" in the PRM).
596
*/
597
switch (key->primitive) {
598
case _3DPRIM_POINTLIST:
599
num_verts = 1;
600
check_edge_flag = false;
601
break;
602
case _3DPRIM_LINELIST:
603
case _3DPRIM_LINESTRIP:
604
case _3DPRIM_LINELOOP:
605
num_verts = 2;
606
check_edge_flag = false;
607
break;
608
case _3DPRIM_TRILIST:
609
case _3DPRIM_TRIFAN:
610
case _3DPRIM_TRISTRIP:
611
case _3DPRIM_RECTLIST:
612
num_verts = 3;
613
check_edge_flag = false;
614
break;
615
case _3DPRIM_QUADLIST:
616
case _3DPRIM_QUADSTRIP:
617
case _3DPRIM_POLYGON:
618
num_verts = 3;
619
check_edge_flag = true;
620
break;
621
default:
622
unreachable("Unexpected primitive type in Gen6 SOL program.");
623
}
624
gfx6_sol_program(&c, key, num_verts, check_edge_flag);
625
} else {
626
/* On Gen4-5, we use the GS to decompose certain types of primitives.
627
* Note that primitives which don't require a GS program have already
628
* been weeded out by now.
629
*/
630
switch (key->primitive) {
631
case _3DPRIM_QUADLIST:
632
brw_ff_gs_quads( &c, key );
633
break;
634
case _3DPRIM_QUADSTRIP:
635
brw_ff_gs_quad_strip( &c, key );
636
break;
637
case _3DPRIM_LINELOOP:
638
brw_ff_gs_lines( &c );
639
break;
640
default:
641
return NULL;
642
}
643
}
644
645
brw_compact_instructions(&c.func, 0, NULL);
646
647
/* get the program
648
*/
649
program = brw_get_program(&c.func, final_assembly_size);
650
651
if (INTEL_DEBUG & DEBUG_GS) {
652
fprintf(stderr, "gs:\n");
653
brw_disassemble_with_labels(compiler->devinfo, c.func.store,
654
0, *final_assembly_size, stderr);
655
fprintf(stderr, "\n");
656
}
657
658
return program;
659
}
660
661
662