Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/compiler/brw_compile_sf.c
4550 views
1
/*
2
* Copyright © 2006 - 2017 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "brw_compiler.h"
25
#include "brw_eu.h"
26
27
#include "dev/intel_debug.h"
28
29
struct brw_sf_compile {
30
struct brw_codegen func;
31
struct brw_sf_prog_key key;
32
struct brw_sf_prog_data prog_data;
33
34
struct brw_reg pv;
35
struct brw_reg det;
36
struct brw_reg dx0;
37
struct brw_reg dx2;
38
struct brw_reg dy0;
39
struct brw_reg dy2;
40
41
/* z and 1/w passed in seperately:
42
*/
43
struct brw_reg z[3];
44
struct brw_reg inv_w[3];
45
46
/* The vertices:
47
*/
48
struct brw_reg vert[3];
49
50
/* Temporaries, allocated after last vertex reg.
51
*/
52
struct brw_reg inv_det;
53
struct brw_reg a1_sub_a0;
54
struct brw_reg a2_sub_a0;
55
struct brw_reg tmp;
56
57
struct brw_reg m1Cx;
58
struct brw_reg m2Cy;
59
struct brw_reg m3C0;
60
61
GLuint nr_verts;
62
GLuint nr_attr_regs;
63
GLuint nr_setup_regs;
64
int urb_entry_read_offset;
65
66
/** The last known value of the f0.0 flag register. */
67
unsigned flag_value;
68
69
struct brw_vue_map vue_map;
70
};
71
72
/**
73
* Determine the vue slot corresponding to the given half of the given register.
74
*/
75
static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
76
int half)
77
{
78
return (reg + c->urb_entry_read_offset) * 2 + half;
79
}
80
81
/**
82
* Determine the varying corresponding to the given half of the given
83
* register. half=0 means the first half of a register, half=1 means the
84
* second half.
85
*/
86
static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
87
int half)
88
{
89
int vue_slot = vert_reg_to_vue_slot(c, reg, half);
90
return c->vue_map.slot_to_varying[vue_slot];
91
}
92
93
/**
94
* Determine the register corresponding to the given vue slot
95
*/
96
static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
97
struct brw_reg vert,
98
int vue_slot)
99
{
100
GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
101
GLuint sub = vue_slot % 2;
102
103
return brw_vec4_grf(vert.nr + off, sub * 4);
104
}
105
106
/**
107
* Determine the register corresponding to the given varying.
108
*/
109
static struct brw_reg get_varying(struct brw_sf_compile *c,
110
struct brw_reg vert,
111
GLuint varying)
112
{
113
int vue_slot = c->vue_map.varying_to_slot[varying];
114
assert (vue_slot >= c->urb_entry_read_offset);
115
return get_vue_slot(c, vert, vue_slot);
116
}
117
118
static bool
119
have_attr(struct brw_sf_compile *c, GLuint attr)
120
{
121
return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
122
}
123
124
/***********************************************************************
125
* Twoside lighting
126
*/
127
static void copy_bfc( struct brw_sf_compile *c,
128
struct brw_reg vert )
129
{
130
struct brw_codegen *p = &c->func;
131
GLuint i;
132
133
for (i = 0; i < 2; i++) {
134
if (have_attr(c, VARYING_SLOT_COL0+i) &&
135
have_attr(c, VARYING_SLOT_BFC0+i))
136
brw_MOV(p,
137
get_varying(c, vert, VARYING_SLOT_COL0+i),
138
get_varying(c, vert, VARYING_SLOT_BFC0+i));
139
}
140
}
141
142
143
static void do_twoside_color( struct brw_sf_compile *c )
144
{
145
struct brw_codegen *p = &c->func;
146
GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
147
148
/* Already done in clip program:
149
*/
150
if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
151
return;
152
153
/* If the vertex shader provides backface color, do the selection. The VS
154
* promises to set up the front color if the backface color is provided, but
155
* it may contain junk if never written to.
156
*/
157
if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
158
!(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
159
return;
160
161
/* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
162
* to get all channels active inside the IF. In the clipping code
163
* we run with NoMask, so it's not an option and we can use
164
* BRW_EXECUTE_1 for all comparisions.
165
*/
166
brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
167
brw_IF(p, BRW_EXECUTE_4);
168
{
169
switch (c->nr_verts) {
170
case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
171
case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
172
case 1: copy_bfc(c, c->vert[0]);
173
}
174
}
175
brw_ENDIF(p);
176
}
177
178
179
180
/***********************************************************************
181
* Flat shading
182
*/
183
184
static void copy_flatshaded_attributes(struct brw_sf_compile *c,
185
struct brw_reg dst,
186
struct brw_reg src)
187
{
188
struct brw_codegen *p = &c->func;
189
int i;
190
191
for (i = 0; i < c->vue_map.num_slots; i++) {
192
if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
193
brw_MOV(p,
194
get_vue_slot(c, dst, i),
195
get_vue_slot(c, src, i));
196
}
197
}
198
}
199
200
static int count_flatshaded_attributes(struct brw_sf_compile *c)
201
{
202
int i;
203
int count = 0;
204
205
for (i = 0; i < c->vue_map.num_slots; i++)
206
if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
207
count++;
208
209
return count;
210
}
211
212
213
214
/* Need to use a computed jump to copy flatshaded attributes as the
215
* vertices are ordered according to y-coordinate before reaching this
216
* point, so the PV could be anywhere.
217
*/
218
static void do_flatshade_triangle( struct brw_sf_compile *c )
219
{
220
struct brw_codegen *p = &c->func;
221
GLuint nr;
222
GLuint jmpi = 1;
223
224
/* Already done in clip program:
225
*/
226
if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
227
return;
228
229
if (p->devinfo->ver == 5)
230
jmpi = 2;
231
232
nr = count_flatshaded_attributes(c);
233
234
brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
235
brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
236
237
copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
238
copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
239
brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
240
241
copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
242
copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
243
brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
244
245
copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
246
copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
247
}
248
249
250
static void do_flatshade_line( struct brw_sf_compile *c )
251
{
252
struct brw_codegen *p = &c->func;
253
GLuint nr;
254
GLuint jmpi = 1;
255
256
/* Already done in clip program:
257
*/
258
if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
259
return;
260
261
if (p->devinfo->ver == 5)
262
jmpi = 2;
263
264
nr = count_flatshaded_attributes(c);
265
266
brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
267
brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
268
copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
269
270
brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
271
copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
272
}
273
274
275
/***********************************************************************
276
* Triangle setup.
277
*/
278
279
280
static void alloc_regs( struct brw_sf_compile *c )
281
{
282
GLuint reg, i;
283
284
/* Values computed by fixed function unit:
285
*/
286
c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
287
c->det = brw_vec1_grf(1, 2);
288
c->dx0 = brw_vec1_grf(1, 3);
289
c->dx2 = brw_vec1_grf(1, 4);
290
c->dy0 = brw_vec1_grf(1, 5);
291
c->dy2 = brw_vec1_grf(1, 6);
292
293
/* z and 1/w passed in seperately:
294
*/
295
c->z[0] = brw_vec1_grf(2, 0);
296
c->inv_w[0] = brw_vec1_grf(2, 1);
297
c->z[1] = brw_vec1_grf(2, 2);
298
c->inv_w[1] = brw_vec1_grf(2, 3);
299
c->z[2] = brw_vec1_grf(2, 4);
300
c->inv_w[2] = brw_vec1_grf(2, 5);
301
302
/* The vertices:
303
*/
304
reg = 3;
305
for (i = 0; i < c->nr_verts; i++) {
306
c->vert[i] = brw_vec8_grf(reg, 0);
307
reg += c->nr_attr_regs;
308
}
309
310
/* Temporaries, allocated after last vertex reg.
311
*/
312
c->inv_det = brw_vec1_grf(reg, 0); reg++;
313
c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
314
c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
315
c->tmp = brw_vec8_grf(reg, 0); reg++;
316
317
/* Note grf allocation:
318
*/
319
c->prog_data.total_grf = reg;
320
321
322
/* Outputs of this program - interpolation coefficients for
323
* rasterization:
324
*/
325
c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
326
c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
327
c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
328
}
329
330
331
static void copy_z_inv_w( struct brw_sf_compile *c )
332
{
333
struct brw_codegen *p = &c->func;
334
GLuint i;
335
336
/* Copy both scalars with a single MOV:
337
*/
338
for (i = 0; i < c->nr_verts; i++)
339
brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
340
}
341
342
343
static void invert_det( struct brw_sf_compile *c)
344
{
345
/* Looks like we invert all 8 elements just to get 1/det in
346
* position 2 !?!
347
*/
348
gfx4_math(&c->func,
349
c->inv_det,
350
BRW_MATH_FUNCTION_INV,
351
0,
352
c->det,
353
BRW_MATH_PRECISION_FULL);
354
355
}
356
357
358
static bool
359
calculate_masks(struct brw_sf_compile *c,
360
GLuint reg,
361
GLushort *pc,
362
GLushort *pc_persp,
363
GLushort *pc_linear)
364
{
365
bool is_last_attr = (reg == c->nr_setup_regs - 1);
366
enum glsl_interp_mode interp;
367
368
*pc_persp = 0;
369
*pc_linear = 0;
370
*pc = 0xf;
371
372
interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
373
if (interp == INTERP_MODE_SMOOTH) {
374
*pc_linear = 0xf;
375
*pc_persp = 0xf;
376
} else if (interp == INTERP_MODE_NOPERSPECTIVE)
377
*pc_linear = 0xf;
378
379
/* Maybe only processs one attribute on the final round:
380
*/
381
if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
382
*pc |= 0xf0;
383
384
interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
385
if (interp == INTERP_MODE_SMOOTH) {
386
*pc_linear |= 0xf0;
387
*pc_persp |= 0xf0;
388
} else if (interp == INTERP_MODE_NOPERSPECTIVE)
389
*pc_linear |= 0xf0;
390
}
391
392
return is_last_attr;
393
}
394
395
/* Calculates the predicate control for which channels of a reg
396
* (containing 2 attrs) to do point sprite coordinate replacement on.
397
*/
398
static uint16_t
399
calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
400
{
401
int varying1, varying2;
402
uint16_t pc = 0;
403
404
varying1 = vert_reg_to_varying(c, reg, 0);
405
if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
406
if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
407
pc |= 0x0f;
408
}
409
if (varying1 == BRW_VARYING_SLOT_PNTC)
410
pc |= 0x0f;
411
412
varying2 = vert_reg_to_varying(c, reg, 1);
413
if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
414
if (c->key.point_sprite_coord_replace & (1 << (varying2 -
415
VARYING_SLOT_TEX0)))
416
pc |= 0xf0;
417
}
418
if (varying2 == BRW_VARYING_SLOT_PNTC)
419
pc |= 0xf0;
420
421
return pc;
422
}
423
424
static void
425
set_predicate_control_flag_value(struct brw_codegen *p,
426
struct brw_sf_compile *c,
427
unsigned value)
428
{
429
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
430
431
if (value != 0xff) {
432
if (value != c->flag_value) {
433
brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
434
c->flag_value = value;
435
}
436
437
brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
438
}
439
}
440
441
static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
442
{
443
struct brw_codegen *p = &c->func;
444
GLuint i;
445
446
c->flag_value = 0xff;
447
c->nr_verts = 3;
448
449
if (allocate)
450
alloc_regs(c);
451
452
invert_det(c);
453
copy_z_inv_w(c);
454
455
if (c->key.do_twoside_color)
456
do_twoside_color(c);
457
458
if (c->key.contains_flat_varying)
459
do_flatshade_triangle(c);
460
461
462
for (i = 0; i < c->nr_setup_regs; i++)
463
{
464
/* Pair of incoming attributes:
465
*/
466
struct brw_reg a0 = offset(c->vert[0], i);
467
struct brw_reg a1 = offset(c->vert[1], i);
468
struct brw_reg a2 = offset(c->vert[2], i);
469
GLushort pc, pc_persp, pc_linear;
470
bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
471
472
if (pc_persp)
473
{
474
set_predicate_control_flag_value(p, c, pc_persp);
475
brw_MUL(p, a0, a0, c->inv_w[0]);
476
brw_MUL(p, a1, a1, c->inv_w[1]);
477
brw_MUL(p, a2, a2, c->inv_w[2]);
478
}
479
480
481
/* Calculate coefficients for interpolated values:
482
*/
483
if (pc_linear)
484
{
485
set_predicate_control_flag_value(p, c, pc_linear);
486
487
brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
488
brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
489
490
/* calculate dA/dx
491
*/
492
brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
493
brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
494
brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
495
496
/* calculate dA/dy
497
*/
498
brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
499
brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
500
brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
501
}
502
503
{
504
set_predicate_control_flag_value(p, c, pc);
505
/* start point for interpolation
506
*/
507
brw_MOV(p, c->m3C0, a0);
508
509
/* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
510
* the send instruction:
511
*/
512
brw_urb_WRITE(p,
513
brw_null_reg(),
514
0,
515
brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
516
last ? BRW_URB_WRITE_EOT_COMPLETE
517
: BRW_URB_WRITE_NO_FLAGS,
518
4, /* msg len */
519
0, /* response len */
520
i*4, /* offset */
521
BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
522
}
523
}
524
525
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
526
}
527
528
529
530
static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
531
{
532
struct brw_codegen *p = &c->func;
533
GLuint i;
534
535
c->flag_value = 0xff;
536
c->nr_verts = 2;
537
538
if (allocate)
539
alloc_regs(c);
540
541
invert_det(c);
542
copy_z_inv_w(c);
543
544
if (c->key.contains_flat_varying)
545
do_flatshade_line(c);
546
547
for (i = 0; i < c->nr_setup_regs; i++)
548
{
549
/* Pair of incoming attributes:
550
*/
551
struct brw_reg a0 = offset(c->vert[0], i);
552
struct brw_reg a1 = offset(c->vert[1], i);
553
GLushort pc, pc_persp, pc_linear;
554
bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
555
556
if (pc_persp)
557
{
558
set_predicate_control_flag_value(p, c, pc_persp);
559
brw_MUL(p, a0, a0, c->inv_w[0]);
560
brw_MUL(p, a1, a1, c->inv_w[1]);
561
}
562
563
/* Calculate coefficients for position, color:
564
*/
565
if (pc_linear) {
566
set_predicate_control_flag_value(p, c, pc_linear);
567
568
brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
569
570
brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
571
brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
572
573
brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
574
brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
575
}
576
577
{
578
set_predicate_control_flag_value(p, c, pc);
579
580
/* start point for interpolation
581
*/
582
brw_MOV(p, c->m3C0, a0);
583
584
/* Copy m0..m3 to URB.
585
*/
586
brw_urb_WRITE(p,
587
brw_null_reg(),
588
0,
589
brw_vec8_grf(0, 0),
590
last ? BRW_URB_WRITE_EOT_COMPLETE
591
: BRW_URB_WRITE_NO_FLAGS,
592
4, /* msg len */
593
0, /* response len */
594
i*4, /* urb destination offset */
595
BRW_URB_SWIZZLE_TRANSPOSE);
596
}
597
}
598
599
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
600
}
601
602
static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
603
{
604
struct brw_codegen *p = &c->func;
605
GLuint i;
606
607
c->flag_value = 0xff;
608
c->nr_verts = 1;
609
610
if (allocate)
611
alloc_regs(c);
612
613
copy_z_inv_w(c);
614
for (i = 0; i < c->nr_setup_regs; i++)
615
{
616
struct brw_reg a0 = offset(c->vert[0], i);
617
GLushort pc, pc_persp, pc_linear, pc_coord_replace;
618
bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
619
620
pc_coord_replace = calculate_point_sprite_mask(c, i);
621
pc_persp &= ~pc_coord_replace;
622
623
if (pc_persp) {
624
set_predicate_control_flag_value(p, c, pc_persp);
625
brw_MUL(p, a0, a0, c->inv_w[0]);
626
}
627
628
/* Point sprite coordinate replacement: A texcoord with this
629
* enabled gets replaced with the value (x, y, 0, 1) where x and
630
* y vary from 0 to 1 across the horizontal and vertical of the
631
* point.
632
*/
633
if (pc_coord_replace) {
634
set_predicate_control_flag_value(p, c, pc_coord_replace);
635
/* Caculate 1.0/PointWidth */
636
gfx4_math(&c->func,
637
c->tmp,
638
BRW_MATH_FUNCTION_INV,
639
0,
640
c->dx0,
641
BRW_MATH_PRECISION_FULL);
642
643
brw_set_default_access_mode(p, BRW_ALIGN_16);
644
645
/* dA/dx, dA/dy */
646
brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
647
brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
648
brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
649
if (c->key.sprite_origin_lower_left) {
650
brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
651
} else {
652
brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
653
}
654
655
/* attribute constant offset */
656
brw_MOV(p, c->m3C0, brw_imm_f(0.0));
657
if (c->key.sprite_origin_lower_left) {
658
brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
659
} else {
660
brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
661
}
662
663
brw_set_default_access_mode(p, BRW_ALIGN_1);
664
}
665
666
if (pc & ~pc_coord_replace) {
667
set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
668
brw_MOV(p, c->m1Cx, brw_imm_ud(0));
669
brw_MOV(p, c->m2Cy, brw_imm_ud(0));
670
brw_MOV(p, c->m3C0, a0); /* constant value */
671
}
672
673
674
set_predicate_control_flag_value(p, c, pc);
675
/* Copy m0..m3 to URB. */
676
brw_urb_WRITE(p,
677
brw_null_reg(),
678
0,
679
brw_vec8_grf(0, 0),
680
last ? BRW_URB_WRITE_EOT_COMPLETE
681
: BRW_URB_WRITE_NO_FLAGS,
682
4, /* msg len */
683
0, /* response len */
684
i*4, /* urb destination offset */
685
BRW_URB_SWIZZLE_TRANSPOSE);
686
}
687
688
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
689
}
690
691
/* Points setup - several simplifications as all attributes are
692
* constant across the face of the point (point sprites excluded!)
693
*/
694
static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
695
{
696
struct brw_codegen *p = &c->func;
697
GLuint i;
698
699
c->flag_value = 0xff;
700
c->nr_verts = 1;
701
702
if (allocate)
703
alloc_regs(c);
704
705
copy_z_inv_w(c);
706
707
brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
708
brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
709
710
for (i = 0; i < c->nr_setup_regs; i++)
711
{
712
struct brw_reg a0 = offset(c->vert[0], i);
713
GLushort pc, pc_persp, pc_linear;
714
bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
715
716
if (pc_persp)
717
{
718
/* This seems odd as the values are all constant, but the
719
* fragment shader will be expecting it:
720
*/
721
set_predicate_control_flag_value(p, c, pc_persp);
722
brw_MUL(p, a0, a0, c->inv_w[0]);
723
}
724
725
726
/* The delta values are always zero, just send the starting
727
* coordinate. Again, this is to fit in with the interpolation
728
* code in the fragment shader.
729
*/
730
{
731
set_predicate_control_flag_value(p, c, pc);
732
733
brw_MOV(p, c->m3C0, a0); /* constant value */
734
735
/* Copy m0..m3 to URB.
736
*/
737
brw_urb_WRITE(p,
738
brw_null_reg(),
739
0,
740
brw_vec8_grf(0, 0),
741
last ? BRW_URB_WRITE_EOT_COMPLETE
742
: BRW_URB_WRITE_NO_FLAGS,
743
4, /* msg len */
744
0, /* response len */
745
i*4, /* urb destination offset */
746
BRW_URB_SWIZZLE_TRANSPOSE);
747
}
748
}
749
750
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
751
}
752
753
static void brw_emit_anyprim_setup( struct brw_sf_compile *c )
754
{
755
struct brw_codegen *p = &c->func;
756
struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
757
struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
758
struct brw_reg primmask;
759
int jmp;
760
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
761
762
c->nr_verts = 3;
763
alloc_regs(c);
764
765
primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
766
767
brw_MOV(p, primmask, brw_imm_ud(1));
768
brw_SHL(p, primmask, primmask, payload_prim);
769
770
brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
771
(1<<_3DPRIM_TRISTRIP) |
772
(1<<_3DPRIM_TRIFAN) |
773
(1<<_3DPRIM_TRISTRIP_REVERSE) |
774
(1<<_3DPRIM_POLYGON) |
775
(1<<_3DPRIM_RECTLIST) |
776
(1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
777
brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
778
jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
779
brw_emit_tri_setup(c, false);
780
brw_land_fwd_jump(p, jmp);
781
782
brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
783
(1<<_3DPRIM_LINESTRIP) |
784
(1<<_3DPRIM_LINELOOP) |
785
(1<<_3DPRIM_LINESTRIP_CONT) |
786
(1<<_3DPRIM_LINESTRIP_BF) |
787
(1<<_3DPRIM_LINESTRIP_CONT_BF)));
788
brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
789
jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
790
brw_emit_line_setup(c, false);
791
brw_land_fwd_jump(p, jmp);
792
793
brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
794
brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
795
jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
796
brw_emit_point_sprite_setup(c, false);
797
brw_land_fwd_jump(p, jmp);
798
799
brw_emit_point_setup( c, false );
800
}
801
802
const unsigned *
803
brw_compile_sf(const struct brw_compiler *compiler,
804
void *mem_ctx,
805
const struct brw_sf_prog_key *key,
806
struct brw_sf_prog_data *prog_data,
807
struct brw_vue_map *vue_map,
808
unsigned *final_assembly_size)
809
{
810
struct brw_sf_compile c;
811
memset(&c, 0, sizeof(c));
812
813
/* Begin the compilation:
814
*/
815
brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);
816
817
c.key = *key;
818
c.vue_map = *vue_map;
819
if (c.key.do_point_coord) {
820
/*
821
* gl_PointCoord is a FS instead of VS builtin variable, thus it's
822
* not included in c.vue_map generated in VS stage. Here we add
823
* it manually to let SF shader generate the needed interpolation
824
* coefficient for FS shader.
825
*/
826
c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
827
c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
828
}
829
c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
830
c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
831
c.nr_setup_regs = c.nr_attr_regs;
832
833
c.prog_data.urb_read_length = c.nr_attr_regs;
834
c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
835
836
/* Which primitive? Or all three?
837
*/
838
switch (key->primitive) {
839
case BRW_SF_PRIM_TRIANGLES:
840
c.nr_verts = 3;
841
brw_emit_tri_setup( &c, true );
842
break;
843
case BRW_SF_PRIM_LINES:
844
c.nr_verts = 2;
845
brw_emit_line_setup( &c, true );
846
break;
847
case BRW_SF_PRIM_POINTS:
848
c.nr_verts = 1;
849
if (key->do_point_sprite)
850
brw_emit_point_sprite_setup( &c, true );
851
else
852
brw_emit_point_setup( &c, true );
853
break;
854
case BRW_SF_PRIM_UNFILLED_TRIS:
855
c.nr_verts = 3;
856
brw_emit_anyprim_setup( &c );
857
break;
858
default:
859
unreachable("not reached");
860
}
861
862
/* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
863
* source). Compacting would be difficult.
864
*/
865
/* brw_compact_instructions(&c.func, 0, 0, NULL); */
866
867
*prog_data = c.prog_data;
868
869
const unsigned *program = brw_get_program(&c.func, final_assembly_size);
870
871
if (INTEL_DEBUG & DEBUG_SF) {
872
fprintf(stderr, "sf:\n");
873
brw_disassemble_with_labels(compiler->devinfo,
874
program, 0, *final_assembly_size, stderr);
875
fprintf(stderr, "\n");
876
}
877
878
return program;
879
}
880
881