Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_state.c
4570 views
1
/*
2
* Copyright © 2017 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included
12
* in all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
* DEALINGS IN THE SOFTWARE.
21
*/
22
23
/**
24
* @file crocus_state.c
25
*
26
* ============================= GENXML CODE =============================
27
* [This file is compiled once per generation.]
28
* =======================================================================
29
*
30
* This is the main state upload code.
31
*
32
* Gallium uses Constant State Objects, or CSOs, for most state. Large,
33
* complex, or highly reusable state can be created once, and bound and
34
* rebound multiple times. This is modeled with the pipe->create_*_state()
35
* and pipe->bind_*_state() hooks. Highly dynamic or inexpensive state is
36
* streamed out on the fly, via pipe->set_*_state() hooks.
37
*
38
* OpenGL involves frequently mutating context state, which is mirrored in
39
* core Mesa by highly mutable data structures. However, most applications
40
* typically draw the same things over and over - from frame to frame, most
41
* of the same objects are still visible and need to be redrawn. So, rather
42
* than inventing new state all the time, applications usually mutate to swap
43
* between known states that we've seen before.
44
*
45
* Gallium isolates us from this mutation by tracking API state, and
46
* distilling it into a set of Constant State Objects, or CSOs. Large,
47
* complex, or typically reusable state can be created once, then reused
48
* multiple times. Drivers can create and store their own associated data.
49
* This create/bind model corresponds to the pipe->create_*_state() and
50
* pipe->bind_*_state() driver hooks.
51
*
52
* Some state is cheap to create, or expected to be highly dynamic. Rather
53
* than creating and caching piles of CSOs for these, Gallium simply streams
54
* them out, via the pipe->set_*_state() driver hooks.
55
*
56
* To reduce draw time overhead, we try to compute as much state at create
57
* time as possible. Wherever possible, we translate the Gallium pipe state
58
* to 3DSTATE commands, and store those commands in the CSO. At draw time,
59
* we can simply memcpy them into a batch buffer.
60
*
61
* No hardware matches the abstraction perfectly, so some commands require
62
* information from multiple CSOs. In this case, we can store two copies
63
* of the packet (one in each CSO), and simply | together their DWords at
64
* draw time. Sometimes the second set is trivial (one or two fields), so
65
* we simply pack it at draw time.
66
*
67
* There are two main components in the file below. First, the CSO hooks
68
* create/bind/track state. The second are the draw-time upload functions,
69
* crocus_upload_render_state() and crocus_upload_compute_state(), which read
70
* the context state and emit the commands into the actual batch.
71
*/
72
73
#include <errno.h>
74
#include <stdio.h>
75
76
#if HAVE_VALGRIND
77
#include <memcheck.h>
78
#include <valgrind.h>
79
#define VG(x) x
80
#ifdef DEBUG
81
#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
82
#endif
83
#else
84
#define VG(x)
85
#endif
86
87
#include "drm-uapi/i915_drm.h"
88
#include "intel/common/intel_l3_config.h"
89
#include "intel/common/intel_sample_positions.h"
90
#include "intel/compiler/brw_compiler.h"
91
#include "pipe/p_context.h"
92
#include "pipe/p_defines.h"
93
#include "pipe/p_screen.h"
94
#include "pipe/p_state.h"
95
#include "util/format/u_format.h"
96
#include "util/half_float.h"
97
#include "util/u_dual_blend.h"
98
#include "util/u_framebuffer.h"
99
#include "util/u_helpers.h"
100
#include "util/u_inlines.h"
101
#include "util/u_memory.h"
102
#include "util/u_prim.h"
103
#include "util/u_transfer.h"
104
#include "util/u_upload_mgr.h"
105
#include "util/u_viewport.h"
106
#include "crocus_batch.h"
107
#include "crocus_context.h"
108
#include "crocus_defines.h"
109
#include "crocus_pipe.h"
110
#include "crocus_resource.h"
111
112
#include "crocus_genx_macros.h"
113
#include "intel/common/intel_guardband.h"
114
115
/**
116
* Statically assert that PIPE_* enums match the hardware packets.
117
* (As long as they match, we don't need to translate them.)
118
*/
119
UNUSED static void pipe_asserts()
120
{
121
#define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
122
123
/* pipe_logicop happens to match the hardware. */
124
PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR);
125
PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR);
126
PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED);
127
PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED);
128
PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE);
129
PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT);
130
PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR);
131
PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND);
132
PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND);
133
PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV);
134
PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP);
135
PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED);
136
PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY);
137
PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE);
138
PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR);
139
PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET);
140
141
/* pipe_blend_func happens to match the hardware. */
142
PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE);
143
PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR);
144
PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA);
145
PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA);
146
PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR);
147
PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE);
148
PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR);
149
PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA);
150
PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR);
151
PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA);
152
PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO);
153
PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR);
154
PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA);
155
PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA);
156
PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR);
157
PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR);
158
PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA);
159
PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR);
160
PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA);
161
162
/* pipe_blend_func happens to match the hardware. */
163
PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD);
164
PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT);
165
PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT);
166
PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN);
167
PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX);
168
169
/* pipe_stencil_op happens to match the hardware. */
170
PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP);
171
PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO);
172
PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE);
173
PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT);
174
PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT);
175
PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR);
176
PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR);
177
PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT);
178
179
#if GFX_VER >= 6
180
/* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */
181
PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT);
182
PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT);
183
#endif
184
#undef PIPE_ASSERT
185
}
186
187
static unsigned
188
translate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch)
189
{
190
static const unsigned map[] = {
191
[PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
192
[PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
193
[PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
194
[PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
195
[PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
196
[PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
197
[PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
198
[PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
199
[PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
200
[PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
201
#if GFX_VER >= 6
202
[PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
203
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
204
[PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
205
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
206
#endif
207
#if GFX_VER >= 7
208
[PIPE_PRIM_PATCHES] = _3DPRIM_PATCHLIST_1 - 1,
209
#endif
210
};
211
212
return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0);
213
}
214
215
static unsigned
216
translate_compare_func(enum pipe_compare_func pipe_func)
217
{
218
static const unsigned map[] = {
219
[PIPE_FUNC_NEVER] = COMPAREFUNCTION_NEVER,
220
[PIPE_FUNC_LESS] = COMPAREFUNCTION_LESS,
221
[PIPE_FUNC_EQUAL] = COMPAREFUNCTION_EQUAL,
222
[PIPE_FUNC_LEQUAL] = COMPAREFUNCTION_LEQUAL,
223
[PIPE_FUNC_GREATER] = COMPAREFUNCTION_GREATER,
224
[PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL,
225
[PIPE_FUNC_GEQUAL] = COMPAREFUNCTION_GEQUAL,
226
[PIPE_FUNC_ALWAYS] = COMPAREFUNCTION_ALWAYS,
227
};
228
return map[pipe_func];
229
}
230
231
static unsigned
232
translate_shadow_func(enum pipe_compare_func pipe_func)
233
{
234
/* Gallium specifies the result of shadow comparisons as:
235
*
236
* 1 if ref <op> texel,
237
* 0 otherwise.
238
*
239
* The hardware does:
240
*
241
* 0 if texel <op> ref,
242
* 1 otherwise.
243
*
244
* So we need to flip the operator and also negate.
245
*/
246
static const unsigned map[] = {
247
[PIPE_FUNC_NEVER] = PREFILTEROP_ALWAYS,
248
[PIPE_FUNC_LESS] = PREFILTEROP_LEQUAL,
249
[PIPE_FUNC_EQUAL] = PREFILTEROP_NOTEQUAL,
250
[PIPE_FUNC_LEQUAL] = PREFILTEROP_LESS,
251
[PIPE_FUNC_GREATER] = PREFILTEROP_GEQUAL,
252
[PIPE_FUNC_NOTEQUAL] = PREFILTEROP_EQUAL,
253
[PIPE_FUNC_GEQUAL] = PREFILTEROP_GREATER,
254
[PIPE_FUNC_ALWAYS] = PREFILTEROP_NEVER,
255
};
256
return map[pipe_func];
257
}
258
259
static unsigned
260
translate_cull_mode(unsigned pipe_face)
261
{
262
static const unsigned map[4] = {
263
[PIPE_FACE_NONE] = CULLMODE_NONE,
264
[PIPE_FACE_FRONT] = CULLMODE_FRONT,
265
[PIPE_FACE_BACK] = CULLMODE_BACK,
266
[PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH,
267
};
268
return map[pipe_face];
269
}
270
271
#if GFX_VER >= 6
272
static unsigned
273
translate_fill_mode(unsigned pipe_polymode)
274
{
275
static const unsigned map[4] = {
276
[PIPE_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
277
[PIPE_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
278
[PIPE_POLYGON_MODE_POINT] = FILL_MODE_POINT,
279
[PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID,
280
};
281
return map[pipe_polymode];
282
}
283
#endif
284
285
static unsigned
286
translate_mip_filter(enum pipe_tex_mipfilter pipe_mip)
287
{
288
static const unsigned map[] = {
289
[PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,
290
[PIPE_TEX_MIPFILTER_LINEAR] = MIPFILTER_LINEAR,
291
[PIPE_TEX_MIPFILTER_NONE] = MIPFILTER_NONE,
292
};
293
return map[pipe_mip];
294
}
295
296
static uint32_t
297
translate_wrap(unsigned pipe_wrap, bool either_nearest)
298
{
299
static const unsigned map[] = {
300
[PIPE_TEX_WRAP_REPEAT] = TCM_WRAP,
301
#if GFX_VER == 8
302
[PIPE_TEX_WRAP_CLAMP] = TCM_HALF_BORDER,
303
#else
304
[PIPE_TEX_WRAP_CLAMP] = TCM_CLAMP_BORDER,
305
#endif
306
[PIPE_TEX_WRAP_CLAMP_TO_EDGE] = TCM_CLAMP,
307
[PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,
308
[PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR,
309
[PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,
310
311
/* These are unsupported. */
312
[PIPE_TEX_WRAP_MIRROR_CLAMP] = -1,
313
[PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1,
314
};
315
#if GFX_VER < 8
316
if (pipe_wrap == PIPE_TEX_WRAP_CLAMP && either_nearest)
317
return TCM_CLAMP;
318
#endif
319
return map[pipe_wrap];
320
}
321
322
/**
323
* Equiv if brw_state_batch
324
*/
325
static uint32_t *
326
stream_state(struct crocus_batch *batch,
327
unsigned size,
328
unsigned alignment,
329
uint32_t *out_offset)
330
{
331
uint32_t offset = ALIGN(batch->state.used, alignment);
332
333
if (offset + size >= STATE_SZ && !batch->no_wrap) {
334
crocus_batch_flush(batch);
335
offset = ALIGN(batch->state.used, alignment);
336
} else if (offset + size >= batch->state.bo->size) {
337
const unsigned new_size =
338
MIN2(batch->state.bo->size + batch->state.bo->size / 2,
339
MAX_STATE_SIZE);
340
crocus_grow_buffer(batch, true, batch->state.used, new_size);
341
assert(offset + size < batch->state.bo->size);
342
}
343
344
crocus_record_state_size(batch->state_sizes, offset, size);
345
346
batch->state.used = offset + size;
347
*out_offset = offset;
348
349
return (uint32_t *)batch->state.map + (offset >> 2);
350
}
351
352
/**
353
* stream_state() + memcpy.
354
*/
355
static uint32_t
356
emit_state(struct crocus_batch *batch, const void *data, unsigned size,
357
unsigned alignment)
358
{
359
unsigned offset = 0;
360
uint32_t *map = stream_state(batch, size, alignment, &offset);
361
362
if (map)
363
memcpy(map, data, size);
364
365
return offset;
366
}
367
368
#if GFX_VER <= 5
369
static void
370
upload_pipelined_state_pointers(struct crocus_batch *batch,
371
bool gs_active, uint32_t gs_offset,
372
uint32_t vs_offset, uint32_t sf_offset,
373
uint32_t clip_offset, uint32_t wm_offset, uint32_t cc_offset)
374
{
375
#if GFX_VER == 5
376
/* Need to flush before changing clip max threads for errata. */
377
crocus_emit_cmd(batch, GENX(MI_FLUSH), foo);
378
#endif
379
380
crocus_emit_cmd(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
381
pp.PointertoVSState = ro_bo(batch->state.bo, vs_offset);
382
pp.GSEnable = gs_active;
383
if (gs_active)
384
pp.PointertoGSState = ro_bo(batch->state.bo, gs_offset);
385
pp.ClipEnable = true;
386
pp.PointertoCLIPState = ro_bo(batch->state.bo, clip_offset);
387
pp.PointertoSFState = ro_bo(batch->state.bo, sf_offset);
388
pp.PointertoWMState = ro_bo(batch->state.bo, wm_offset);
389
pp.PointertoColorCalcState = ro_bo(batch->state.bo, cc_offset);
390
}
391
}
392
393
#endif
394
/**
395
* Did field 'x' change between 'old_cso' and 'new_cso'?
396
*
397
* (If so, we may want to set some dirty flags.)
398
*/
399
#define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x))
400
#define cso_changed_memcmp(x) \
401
(!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0)
402
403
static void
404
flush_before_state_base_change(struct crocus_batch *batch)
405
{
406
#if GFX_VER >= 6
407
/* Flush before emitting STATE_BASE_ADDRESS.
408
*
409
* This isn't documented anywhere in the PRM. However, it seems to be
410
* necessary prior to changing the surface state base adress. We've
411
* seen issues in Vulkan where we get GPU hangs when using multi-level
412
* command buffers which clear depth, reset state base address, and then
413
* go render stuff.
414
*
415
* Normally, in GL, we would trust the kernel to do sufficient stalls
416
* and flushes prior to executing our batch. However, it doesn't seem
417
* as if the kernel's flushing is always sufficient and we don't want to
418
* rely on it.
419
*
420
* We make this an end-of-pipe sync instead of a normal flush because we
421
* do not know the current status of the GPU. On Haswell at least,
422
* having a fast-clear operation in flight at the same time as a normal
423
* rendering operation can cause hangs. Since the kernel's flushing is
424
* insufficient, we need to ensure that any rendering operations from
425
* other processes are definitely complete before we try to do our own
426
* rendering. It's a bit of a big hammer but it appears to work.
427
*/
428
const unsigned dc_flush =
429
batch->screen->devinfo.ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
430
crocus_emit_end_of_pipe_sync(batch,
431
"change STATE_BASE_ADDRESS (flushes)",
432
PIPE_CONTROL_RENDER_TARGET_FLUSH |
433
dc_flush |
434
PIPE_CONTROL_DEPTH_CACHE_FLUSH);
435
#endif
436
}
437
438
static void
439
flush_after_state_base_change(struct crocus_batch *batch)
440
{
441
/* After re-setting the surface state base address, we have to do some
442
* cache flusing so that the sampler engine will pick up the new
443
* SURFACE_STATE objects and binding tables. From the Broadwell PRM,
444
* Shared Function > 3D Sampler > State > State Caching (page 96):
445
*
446
* Coherency with system memory in the state cache, like the texture
447
* cache is handled partially by software. It is expected that the
448
* command stream or shader will issue Cache Flush operation or
449
* Cache_Flush sampler message to ensure that the L1 cache remains
450
* coherent with system memory.
451
*
452
* [...]
453
*
454
* Whenever the value of the Dynamic_State_Base_Addr,
455
* Surface_State_Base_Addr are altered, the L1 state cache must be
456
* invalidated to ensure the new surface or sampler state is fetched
457
* from system memory.
458
*
459
* The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
460
* which, according the PIPE_CONTROL instruction documentation in the
461
* Broadwell PRM:
462
*
463
* Setting this bit is independent of any other bit in this packet.
464
* This bit controls the invalidation of the L1 and L2 state caches
465
* at the top of the pipe i.e. at the parsing time.
466
*
467
* Unfortunately, experimentation seems to indicate that state cache
468
* invalidation through a PIPE_CONTROL does nothing whatsoever in
469
* regards to surface state and binding tables. In stead, it seems that
470
* invalidating the texture cache is what is actually needed.
471
*
472
* XXX: As far as we have been able to determine through
473
* experimentation, shows that flush the texture cache appears to be
474
* sufficient. The theory here is that all of the sampling/rendering
475
* units cache the binding table in the texture cache. However, we have
476
* yet to be able to actually confirm this.
477
*/
478
#if GFX_VER >= 6
479
crocus_emit_end_of_pipe_sync(batch,
480
"change STATE_BASE_ADDRESS (invalidates)",
481
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
482
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
483
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
484
PIPE_CONTROL_STATE_CACHE_INVALIDATE);
485
#endif
486
}
487
488
#if GFX_VER >= 6
489
static void
490
crocus_store_register_mem32(struct crocus_batch *batch, uint32_t reg,
491
struct crocus_bo *bo, uint32_t offset,
492
bool predicated)
493
{
494
crocus_emit_cmd(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
495
srm.RegisterAddress = reg;
496
srm.MemoryAddress = ggtt_bo(bo, offset);
497
#if GFX_VERx10 >= 75
498
srm.PredicateEnable = predicated;
499
#else
500
if (predicated)
501
unreachable("unsupported predication");
502
#endif
503
}
504
}
505
506
static void
507
crocus_store_register_mem64(struct crocus_batch *batch, uint32_t reg,
508
struct crocus_bo *bo, uint32_t offset,
509
bool predicated)
510
{
511
crocus_store_register_mem32(batch, reg + 0, bo, offset + 0, predicated);
512
crocus_store_register_mem32(batch, reg + 4, bo, offset + 4, predicated);
513
}
514
#endif
515
516
#if GFX_VER >= 7
517
static void
518
_crocus_emit_lri(struct crocus_batch *batch, uint32_t reg, uint32_t val)
519
{
520
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
521
lri.RegisterOffset = reg;
522
lri.DataDWord = val;
523
}
524
}
525
#define crocus_emit_lri(b, r, v) _crocus_emit_lri(b, GENX(r##_num), v)
526
527
#if GFX_VERx10 >= 75
528
static void
529
_crocus_emit_lrr(struct crocus_batch *batch, uint32_t dst, uint32_t src)
530
{
531
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_REG), lrr) {
532
lrr.SourceRegisterAddress = src;
533
lrr.DestinationRegisterAddress = dst;
534
}
535
}
536
537
static void
538
crocus_load_register_reg32(struct crocus_batch *batch, uint32_t dst,
539
uint32_t src)
540
{
541
_crocus_emit_lrr(batch, dst, src);
542
}
543
544
static void
545
crocus_load_register_reg64(struct crocus_batch *batch, uint32_t dst,
546
uint32_t src)
547
{
548
_crocus_emit_lrr(batch, dst, src);
549
_crocus_emit_lrr(batch, dst + 4, src + 4);
550
}
551
#endif
552
553
static void
554
crocus_load_register_imm32(struct crocus_batch *batch, uint32_t reg,
555
uint32_t val)
556
{
557
_crocus_emit_lri(batch, reg, val);
558
}
559
560
static void
561
crocus_load_register_imm64(struct crocus_batch *batch, uint32_t reg,
562
uint64_t val)
563
{
564
_crocus_emit_lri(batch, reg + 0, val & 0xffffffff);
565
_crocus_emit_lri(batch, reg + 4, val >> 32);
566
}
567
568
/**
569
* Emit MI_LOAD_REGISTER_MEM to load a 32-bit MMIO register from a buffer.
570
*/
571
static void
572
crocus_load_register_mem32(struct crocus_batch *batch, uint32_t reg,
573
struct crocus_bo *bo, uint32_t offset)
574
{
575
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
576
lrm.RegisterAddress = reg;
577
lrm.MemoryAddress = ro_bo(bo, offset);
578
}
579
}
580
581
/**
582
* Load a 64-bit value from a buffer into a MMIO register via
583
* two MI_LOAD_REGISTER_MEM commands.
584
*/
585
static void
586
crocus_load_register_mem64(struct crocus_batch *batch, uint32_t reg,
587
struct crocus_bo *bo, uint32_t offset)
588
{
589
crocus_load_register_mem32(batch, reg + 0, bo, offset + 0);
590
crocus_load_register_mem32(batch, reg + 4, bo, offset + 4);
591
}
592
593
#if GFX_VERx10 >= 75
594
static void
595
crocus_store_data_imm32(struct crocus_batch *batch,
596
struct crocus_bo *bo, uint32_t offset,
597
uint32_t imm)
598
{
599
crocus_emit_cmd(batch, GENX(MI_STORE_DATA_IMM), sdi) {
600
sdi.Address = rw_bo(bo, offset);
601
#if GFX_VER >= 6
602
sdi.ImmediateData = imm;
603
#endif
604
}
605
}
606
607
static void
608
crocus_store_data_imm64(struct crocus_batch *batch,
609
struct crocus_bo *bo, uint32_t offset,
610
uint64_t imm)
611
{
612
/* Can't use crocus_emit_cmd because MI_STORE_DATA_IMM has a length of
613
* 2 in genxml but it's actually variable length and we need 5 DWords.
614
*/
615
void *map = crocus_get_command_space(batch, 4 * 5);
616
_crocus_pack_command(batch, GENX(MI_STORE_DATA_IMM), map, sdi) {
617
sdi.DWordLength = 5 - 2;
618
sdi.Address = rw_bo(bo, offset);
619
#if GFX_VER >= 6
620
sdi.ImmediateData = imm;
621
#endif
622
}
623
}
624
#endif
625
626
static void
627
crocus_copy_mem_mem(struct crocus_batch *batch,
628
struct crocus_bo *dst_bo, uint32_t dst_offset,
629
struct crocus_bo *src_bo, uint32_t src_offset,
630
unsigned bytes)
631
{
632
assert(bytes % 4 == 0);
633
assert(dst_offset % 4 == 0);
634
assert(src_offset % 4 == 0);
635
636
#define CROCUS_TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
637
for (unsigned i = 0; i < bytes; i += 4) {
638
crocus_load_register_mem32(batch, CROCUS_TEMP_REG,
639
src_bo, src_offset + i);
640
crocus_store_register_mem32(batch, CROCUS_TEMP_REG,
641
dst_bo, dst_offset + i, false);
642
}
643
}
644
#endif
645
646
/**
647
* Gallium CSO for rasterizer state.
648
*/
649
struct crocus_rasterizer_state {
650
struct pipe_rasterizer_state cso;
651
#if GFX_VER >= 6
652
uint32_t sf[GENX(3DSTATE_SF_length)];
653
uint32_t clip[GENX(3DSTATE_CLIP_length)];
654
#endif
655
#if GFX_VER >= 8
656
uint32_t raster[GENX(3DSTATE_RASTER_length)];
657
#endif
658
uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)];
659
660
uint8_t num_clip_plane_consts;
661
bool fill_mode_point_or_line;
662
};
663
664
#if GFX_VER <= 5
665
#define URB_VS 0
666
#define URB_GS 1
667
#define URB_CLP 2
668
#define URB_SF 3
669
#define URB_CS 4
670
671
static const struct {
672
uint32_t min_nr_entries;
673
uint32_t preferred_nr_entries;
674
uint32_t min_entry_size;
675
uint32_t max_entry_size;
676
} limits[URB_CS+1] = {
677
{ 16, 32, 1, 5 }, /* vs */
678
{ 4, 8, 1, 5 }, /* gs */
679
{ 5, 10, 1, 5 }, /* clp */
680
{ 1, 8, 1, 12 }, /* sf */
681
{ 1, 4, 1, 32 } /* cs */
682
};
683
684
static bool check_urb_layout(struct crocus_context *ice)
685
{
686
ice->urb.vs_start = 0;
687
ice->urb.gs_start = ice->urb.nr_vs_entries * ice->urb.vsize;
688
ice->urb.clip_start = ice->urb.gs_start + ice->urb.nr_gs_entries * ice->urb.vsize;
689
ice->urb.sf_start = ice->urb.clip_start + ice->urb.nr_clip_entries * ice->urb.vsize;
690
ice->urb.cs_start = ice->urb.sf_start + ice->urb.nr_sf_entries * ice->urb.sfsize;
691
692
return ice->urb.cs_start + ice->urb.nr_cs_entries *
693
ice->urb.csize <= ice->urb.size;
694
}
695
696
697
static bool
698
crocus_calculate_urb_fence(struct crocus_batch *batch, unsigned csize,
699
unsigned vsize, unsigned sfsize)
700
{
701
const struct intel_device_info *devinfo = &batch->screen->devinfo;
702
struct crocus_context *ice = batch->ice;
703
if (csize < limits[URB_CS].min_entry_size)
704
csize = limits[URB_CS].min_entry_size;
705
706
if (vsize < limits[URB_VS].min_entry_size)
707
vsize = limits[URB_VS].min_entry_size;
708
709
if (sfsize < limits[URB_SF].min_entry_size)
710
sfsize = limits[URB_SF].min_entry_size;
711
712
if (ice->urb.vsize < vsize ||
713
ice->urb.sfsize < sfsize ||
714
ice->urb.csize < csize ||
715
(ice->urb.constrained && (ice->urb.vsize > vsize ||
716
ice->urb.sfsize > sfsize ||
717
ice->urb.csize > csize))) {
718
719
720
ice->urb.csize = csize;
721
ice->urb.sfsize = sfsize;
722
ice->urb.vsize = vsize;
723
724
ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries;
725
ice->urb.nr_gs_entries = limits[URB_GS].preferred_nr_entries;
726
ice->urb.nr_clip_entries = limits[URB_CLP].preferred_nr_entries;
727
ice->urb.nr_sf_entries = limits[URB_SF].preferred_nr_entries;
728
ice->urb.nr_cs_entries = limits[URB_CS].preferred_nr_entries;
729
730
ice->urb.constrained = 0;
731
732
if (devinfo->ver == 5) {
733
ice->urb.nr_vs_entries = 128;
734
ice->urb.nr_sf_entries = 48;
735
if (check_urb_layout(ice)) {
736
goto done;
737
} else {
738
ice->urb.constrained = 1;
739
ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries;
740
ice->urb.nr_sf_entries = limits[URB_SF].preferred_nr_entries;
741
}
742
} else if (devinfo->is_g4x) {
743
ice->urb.nr_vs_entries = 64;
744
if (check_urb_layout(ice)) {
745
goto done;
746
} else {
747
ice->urb.constrained = 1;
748
ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries;
749
}
750
}
751
752
if (!check_urb_layout(ice)) {
753
ice->urb.nr_vs_entries = limits[URB_VS].min_nr_entries;
754
ice->urb.nr_gs_entries = limits[URB_GS].min_nr_entries;
755
ice->urb.nr_clip_entries = limits[URB_CLP].min_nr_entries;
756
ice->urb.nr_sf_entries = limits[URB_SF].min_nr_entries;
757
ice->urb.nr_cs_entries = limits[URB_CS].min_nr_entries;
758
759
/* Mark us as operating with constrained nr_entries, so that next
760
* time we recalculate we'll resize the fences in the hope of
761
* escaping constrained mode and getting back to normal performance.
762
*/
763
ice->urb.constrained = 1;
764
765
if (!check_urb_layout(ice)) {
766
/* This is impossible, given the maximal sizes of urb
767
* entries and the values for minimum nr of entries
768
* provided above.
769
*/
770
fprintf(stderr, "couldn't calculate URB layout!\n");
771
exit(1);
772
}
773
774
if (unlikely(INTEL_DEBUG & (DEBUG_URB|DEBUG_PERF)))
775
fprintf(stderr, "URB CONSTRAINED\n");
776
}
777
778
done:
779
if (unlikely(INTEL_DEBUG & DEBUG_URB))
780
fprintf(stderr,
781
"URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
782
ice->urb.vs_start,
783
ice->urb.gs_start,
784
ice->urb.clip_start,
785
ice->urb.sf_start,
786
ice->urb.cs_start,
787
ice->urb.size);
788
return true;
789
}
790
return false;
791
}
792
793
static void
794
crocus_upload_urb_fence(struct crocus_batch *batch)
795
{
796
uint32_t urb_fence[3];
797
_crocus_pack_command(batch, GENX(URB_FENCE), urb_fence, urb) {
798
urb.VSUnitURBReallocationRequest = 1;
799
urb.GSUnitURBReallocationRequest = 1;
800
urb.CLIPUnitURBReallocationRequest = 1;
801
urb.SFUnitURBReallocationRequest = 1;
802
urb.VFEUnitURBReallocationRequest = 1;
803
urb.CSUnitURBReallocationRequest = 1;
804
805
urb.VSFence = batch->ice->urb.gs_start;
806
urb.GSFence = batch->ice->urb.clip_start;
807
urb.CLIPFence = batch->ice->urb.sf_start;
808
urb.SFFence = batch->ice->urb.cs_start;
809
urb.CSFence = batch->ice->urb.size;
810
}
811
812
/* erratum: URB_FENCE must not cross a 64byte cacheline */
813
if ((crocus_batch_bytes_used(batch) & 15) > 12) {
814
int pad = 16 - (crocus_batch_bytes_used(batch) & 15);
815
do {
816
*(uint32_t *)batch->command.map_next = 0;
817
batch->command.map_next += sizeof(uint32_t);
818
} while (--pad);
819
}
820
821
crocus_batch_emit(batch, urb_fence, sizeof(uint32_t) * 3);
822
}
823
824
static bool
825
calculate_curbe_offsets(struct crocus_batch *batch)
826
{
827
struct crocus_context *ice = batch->ice;
828
829
unsigned nr_fp_regs, nr_vp_regs, nr_clip_regs = 0;
830
unsigned total_regs;
831
832
nr_fp_regs = 0;
833
for (int i = 0; i < 4; i++) {
834
const struct brw_ubo_range *range = &ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data->ubo_ranges[i];
835
if (range->length == 0)
836
continue;
837
838
/* ubo range tracks at 256-bit, we need 512-bit */
839
nr_fp_regs += (range->length + 1) / 2;
840
}
841
842
if (ice->state.cso_rast->cso.clip_plane_enable) {
843
unsigned nr_planes = 6 + util_bitcount(ice->state.cso_rast->cso.clip_plane_enable);
844
nr_clip_regs = (nr_planes * 4 + 15) / 16;
845
}
846
847
nr_vp_regs = 0;
848
for (int i = 0; i < 4; i++) {
849
const struct brw_ubo_range *range = &ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data->ubo_ranges[i];
850
if (range->length == 0)
851
continue;
852
853
/* ubo range tracks at 256-bit, we need 512-bit */
854
nr_vp_regs += (range->length + 1) / 2;
855
}
856
if (nr_vp_regs == 0) {
857
/* The pre-gen6 VS requires that some push constants get loaded no
858
* matter what, or the GPU would hang.
859
*/
860
nr_vp_regs = 1;
861
}
862
total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
863
864
/* The CURBE allocation size is limited to 32 512-bit units (128 EU
865
* registers, or 1024 floats). See CS_URB_STATE in the gen4 or gen5
866
* (volume 1, part 1) PRMs.
867
*
868
* Note that in brw_fs.cpp we're only loading up to 16 EU registers of
869
* values as push constants before spilling to pull constants, and in
870
* brw_vec4.cpp we're loading up to 32 registers of push constants. An EU
871
* register is 1/2 of one of these URB entry units, so that leaves us 16 EU
872
* regs for clip.
873
*/
874
assert(total_regs <= 32);
875
876
/* Lazy resize:
877
*/
878
if (nr_fp_regs > ice->curbe.wm_size ||
879
nr_vp_regs > ice->curbe.vs_size ||
880
nr_clip_regs != ice->curbe.clip_size ||
881
(total_regs < ice->curbe.total_size / 4 &&
882
ice->curbe.total_size > 16)) {
883
884
GLuint reg = 0;
885
886
/* Calculate a new layout:
887
*/
888
reg = 0;
889
ice->curbe.wm_start = reg;
890
ice->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
891
ice->curbe.clip_start = reg;
892
ice->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
893
ice->curbe.vs_start = reg;
894
ice->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
895
ice->curbe.total_size = reg;
896
897
if (0)
898
fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n",
899
ice->curbe.wm_start,
900
ice->curbe.wm_size,
901
ice->curbe.clip_start,
902
ice->curbe.clip_size,
903
ice->curbe.vs_start,
904
ice->curbe.vs_size );
905
return true;
906
}
907
return false;
908
}
909
910
static void
911
upload_shader_consts(struct crocus_context *ice,
912
gl_shader_stage stage,
913
uint32_t *map,
914
unsigned start)
915
{
916
struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
917
struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
918
uint32_t *cmap;
919
bool found = false;
920
unsigned offset = start * 16;
921
int total = 0;
922
for (int i = 0; i < 4; i++) {
923
const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
924
925
if (range->length == 0)
926
continue;
927
928
unsigned block_index = crocus_bti_to_group_index(
929
&shader->bt, CROCUS_SURFACE_GROUP_UBO, range->block);
930
unsigned len = range->length * 8 * sizeof(float);
931
unsigned start = range->start * 8 * sizeof(float);
932
struct pipe_transfer *transfer;
933
934
cmap = pipe_buffer_map_range(&ice->ctx, ice->state.shaders[stage].constbufs[block_index].buffer,
935
ice->state.shaders[stage].constbufs[block_index].buffer_offset + start, len,
936
PIPE_MAP_READ | PIPE_MAP_UNSYNCHRONIZED, &transfer);
937
if (cmap)
938
memcpy(&map[offset + (total * 8)], cmap, len);
939
pipe_buffer_unmap(&ice->ctx, transfer);
940
total += range->length;
941
found = true;
942
}
943
944
if (stage == MESA_SHADER_VERTEX && !found) {
945
/* The pre-gen6 VS requires that some push constants get loaded no
946
* matter what, or the GPU would hang.
947
*/
948
unsigned len = 16;
949
memset(&map[offset], 0, len);
950
}
951
}
952
953
static const float fixed_plane[6][4] = {
954
{ 0, 0, -1, 1 },
955
{ 0, 0, 1, 1 },
956
{ 0, -1, 0, 1 },
957
{ 0, 1, 0, 1 },
958
{-1, 0, 0, 1 },
959
{ 1, 0, 0, 1 }
960
};
961
962
static void
963
gen4_upload_curbe(struct crocus_batch *batch)
964
{
965
struct crocus_context *ice = batch->ice;
966
const unsigned sz = ice->curbe.total_size;
967
const unsigned buf_sz = sz * 16 * sizeof(float);
968
969
if (sz == 0)
970
goto emit;
971
972
uint32_t *map;
973
u_upload_alloc(ice->ctx.const_uploader, 0, buf_sz, 64,
974
&ice->curbe.curbe_offset, (struct pipe_resource **)&ice->curbe.curbe_res, (void **) &map);
975
976
/* fragment shader constants */
977
if (ice->curbe.wm_size) {
978
upload_shader_consts(ice, MESA_SHADER_FRAGMENT, map, ice->curbe.wm_start);
979
}
980
981
/* clipper constants */
982
if (ice->curbe.clip_size) {
983
unsigned offset = ice->curbe.clip_start * 16;
984
float *fmap = (float *)map;
985
unsigned i;
986
/* If any planes are going this way, send them all this way:
987
*/
988
for (i = 0; i < 6; i++) {
989
fmap[offset + i * 4 + 0] = fixed_plane[i][0];
990
fmap[offset + i * 4 + 1] = fixed_plane[i][1];
991
fmap[offset + i * 4 + 2] = fixed_plane[i][2];
992
fmap[offset + i * 4 + 3] = fixed_plane[i][3];
993
}
994
995
unsigned mask = ice->state.cso_rast->cso.clip_plane_enable;
996
struct pipe_clip_state *cp = &ice->state.clip_planes;
997
while (mask) {
998
const int j = u_bit_scan(&mask);
999
fmap[offset + i * 4 + 0] = cp->ucp[j][0];
1000
fmap[offset + i * 4 + 1] = cp->ucp[j][1];
1001
fmap[offset + i * 4 + 2] = cp->ucp[j][2];
1002
fmap[offset + i * 4 + 3] = cp->ucp[j][3];
1003
i++;
1004
}
1005
}
1006
1007
/* vertex shader constants */
1008
if (ice->curbe.vs_size) {
1009
upload_shader_consts(ice, MESA_SHADER_VERTEX, map, ice->curbe.vs_start);
1010
}
1011
if (0) {
1012
for (int i = 0; i < sz*16; i+=4) {
1013
float *f = (float *)map;
1014
fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
1015
f[i+0], f[i+1], f[i+2], f[i+3]);
1016
}
1017
}
1018
1019
emit:
1020
crocus_emit_cmd(batch, GENX(CONSTANT_BUFFER), cb) {
1021
if (ice->curbe.curbe_res) {
1022
cb.BufferLength = ice->curbe.total_size - 1;
1023
cb.Valid = 1;
1024
cb.BufferStartingAddress = ro_bo(ice->curbe.curbe_res->bo, ice->curbe.curbe_offset);
1025
}
1026
}
1027
1028
#if GFX_VER == 4 && GFX_VERx10 != 45
1029
/* Work around a Broadwater/Crestline depth interpolator bug. The
1030
* following sequence will cause GPU hangs:
1031
*
1032
* 1. Change state so that all depth related fields in CC_STATE are
1033
* disabled, and in WM_STATE, only "PS Use Source Depth" is enabled.
1034
* 2. Emit a CONSTANT_BUFFER packet.
1035
* 3. Draw via 3DPRIMITIVE.
1036
*
1037
* The recommended workaround is to emit a non-pipelined state change after
1038
* emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline.
1039
*
1040
* We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small),
1041
* and always emit it when "PS Use Source Depth" is set. We could be more
1042
* precise, but the additional complexity is probably not worth it.
1043
*
1044
*/
1045
const struct shader_info *fs_info =
1046
crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
1047
1048
if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
1049
ice->state.global_depth_offset_clamp = 0;
1050
crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp);
1051
}
1052
#endif
1053
}
1054
#endif
1055
1056
#if GFX_VER >= 7
1057
1058
#define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000
1059
#define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000
1060
#define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000
1061
1062
static void
1063
setup_l3_config(struct crocus_batch *batch, const struct intel_l3_config *cfg)
1064
{
1065
#if GFX_VER == 7
1066
const struct intel_device_info *devinfo = &batch->screen->devinfo;
1067
const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL];
1068
const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] ||
1069
cfg->n[INTEL_L3P_ALL];
1070
const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] ||
1071
cfg->n[INTEL_L3P_ALL];
1072
const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] ||
1073
cfg->n[INTEL_L3P_ALL];
1074
const bool has_slm = cfg->n[INTEL_L3P_SLM];
1075
#endif
1076
1077
/* According to the hardware docs, the L3 partitioning can only be changed
1078
* while the pipeline is completely drained and the caches are flushed,
1079
* which involves a first PIPE_CONTROL flush which stalls the pipeline...
1080
*/
1081
crocus_emit_pipe_control_flush(batch, "l3_config",
1082
PIPE_CONTROL_DATA_CACHE_FLUSH |
1083
PIPE_CONTROL_CS_STALL);
1084
1085
/* ...followed by a second pipelined PIPE_CONTROL that initiates
1086
* invalidation of the relevant caches. Note that because RO invalidation
1087
* happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
1088
* command is processed by the CS) we cannot combine it with the previous
1089
* stalling flush as the hardware documentation suggests, because that
1090
* would cause the CS to stall on previous rendering *after* RO
1091
* invalidation and wouldn't prevent the RO caches from being polluted by
1092
* concurrent rendering before the stall completes. This intentionally
1093
* doesn't implement the SKL+ hardware workaround suggesting to enable CS
1094
* stall on PIPE_CONTROLs with the texture cache invalidation bit set for
1095
* GPGPU workloads because the previous and subsequent PIPE_CONTROLs
1096
* already guarantee that there is no concurrent GPGPU kernel execution
1097
* (see SKL HSD 2132585).
1098
*/
1099
crocus_emit_pipe_control_flush(batch, "l3 config",
1100
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1101
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
1102
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
1103
PIPE_CONTROL_STATE_CACHE_INVALIDATE);
1104
1105
/* Now send a third stalling flush to make sure that invalidation is
1106
* complete when the L3 configuration registers are modified.
1107
*/
1108
crocus_emit_pipe_control_flush(batch, "l3 config",
1109
PIPE_CONTROL_DATA_CACHE_FLUSH |
1110
PIPE_CONTROL_CS_STALL);
1111
1112
#if GFX_VER == 8
1113
assert(!cfg->n[INTEL_L3P_IS] && !cfg->n[INTEL_L3P_C] && !cfg->n[INTEL_L3P_T]);
1114
crocus_emit_reg(batch, GENX(L3CNTLREG), reg) {
1115
reg.SLMEnable = cfg->n[INTEL_L3P_SLM] > 0;
1116
reg.URBAllocation = cfg->n[INTEL_L3P_URB];
1117
reg.ROAllocation = cfg->n[INTEL_L3P_RO];
1118
reg.DCAllocation = cfg->n[INTEL_L3P_DC];
1119
reg.AllAllocation = cfg->n[INTEL_L3P_ALL];
1120
}
1121
#else
1122
assert(!cfg->n[INTEL_L3P_ALL]);
1123
1124
/* When enabled SLM only uses a portion of the L3 on half of the banks,
1125
* the matching space on the remaining banks has to be allocated to a
1126
* client (URB for all validated configurations) set to the
1127
* lower-bandwidth 2-bank address hashing mode.
1128
*/
1129
const bool urb_low_bw = has_slm && !devinfo->is_baytrail;
1130
assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]);
1131
1132
/* Minimum number of ways that can be allocated to the URB. */
1133
const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0);
1134
assert(cfg->n[INTEL_L3P_URB] >= n0_urb);
1135
1136
uint32_t l3sqcr1, l3cr2, l3cr3;
1137
1138
crocus_pack_state(GENX(L3SQCREG1), &l3sqcr1, reg) {
1139
reg.ConvertDC_UC = !has_dc;
1140
reg.ConvertIS_UC = !has_is;
1141
reg.ConvertC_UC = !has_c;
1142
reg.ConvertT_UC = !has_t;
1143
#if GFX_VERx10 == 75
1144
reg.L3SQGeneralPriorityCreditInitialization = SQGPCI_DEFAULT;
1145
#else
1146
reg.L3SQGeneralPriorityCreditInitialization =
1147
devinfo->is_baytrail ? BYT_SQGPCI_DEFAULT : SQGPCI_DEFAULT;
1148
#endif
1149
reg.L3SQHighPriorityCreditInitialization = SQHPCI_DEFAULT;
1150
};
1151
1152
crocus_pack_state(GENX(L3CNTLREG2), &l3cr2, reg) {
1153
reg.SLMEnable = has_slm;
1154
reg.URBLowBandwidth = urb_low_bw;
1155
reg.URBAllocation = cfg->n[INTEL_L3P_URB] - n0_urb;
1156
#if !(GFX_VERx10 == 75)
1157
reg.ALLAllocation = cfg->n[INTEL_L3P_ALL];
1158
#endif
1159
reg.ROAllocation = cfg->n[INTEL_L3P_RO];
1160
reg.DCAllocation = cfg->n[INTEL_L3P_DC];
1161
};
1162
1163
crocus_pack_state(GENX(L3CNTLREG3), &l3cr3, reg) {
1164
reg.ISAllocation = cfg->n[INTEL_L3P_IS];
1165
reg.ISLowBandwidth = 0;
1166
reg.CAllocation = cfg->n[INTEL_L3P_C];
1167
reg.CLowBandwidth = 0;
1168
reg.TAllocation = cfg->n[INTEL_L3P_T];
1169
reg.TLowBandwidth = 0;
1170
};
1171
1172
/* Set up the L3 partitioning. */
1173
crocus_emit_lri(batch, L3SQCREG1, l3sqcr1);
1174
crocus_emit_lri(batch, L3CNTLREG2, l3cr2);
1175
crocus_emit_lri(batch, L3CNTLREG3, l3cr3);
1176
1177
#if GFX_VERSIONx10 == 75
1178
/* TODO: Fail screen creation if command parser version < 4 */
1179
uint32_t scratch1, chicken3;
1180
crocus_pack_state(GENX(SCRATCH1), &scratch1, reg) {
1181
reg.L3AtomicDisable = !has_dc;
1182
}
1183
crocus_pack_state(GENX(CHICKEN3), &chicken3, reg) {
1184
reg.L3AtomicDisableMask = true;
1185
reg.L3AtomicDisable = !has_dc;
1186
}
1187
crocus_emit_lri(batch, SCRATCH1, scratch1);
1188
crocus_emit_lri(batch, CHICKEN3, chicken3);
1189
#endif
1190
#endif
1191
}
1192
1193
static void
1194
emit_l3_state(struct crocus_batch *batch, bool compute)
1195
{
1196
const struct intel_l3_config *const cfg =
1197
compute ? batch->screen->l3_config_cs : batch->screen->l3_config_3d;
1198
1199
setup_l3_config(batch, cfg);
1200
if (unlikely(INTEL_DEBUG & DEBUG_L3)) {
1201
intel_dump_l3_config(cfg, stderr);
1202
}
1203
}
1204
1205
/**
1206
* Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
1207
*/
1208
static void
1209
gen7_emit_cs_stall_flush(struct crocus_batch *batch)
1210
{
1211
crocus_emit_pipe_control_write(batch,
1212
"workaround",
1213
PIPE_CONTROL_CS_STALL
1214
| PIPE_CONTROL_WRITE_IMMEDIATE,
1215
batch->ice->workaround_bo,
1216
batch->ice->workaround_offset, 0);
1217
}
1218
#endif
1219
1220
static void
1221
emit_pipeline_select(struct crocus_batch *batch, uint32_t pipeline)
1222
{
1223
#if GFX_VER == 8
1224
/* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
1225
*
1226
* Software must clear the COLOR_CALC_STATE Valid field in
1227
* 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
1228
* with Pipeline Select set to GPGPU.
1229
*
1230
* The internal hardware docs recommend the same workaround for Gfx9
1231
* hardware too.
1232
*/
1233
if (pipeline == GPGPU)
1234
crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), t);
1235
#endif
1236
1237
#if GFX_VER >= 6
1238
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
1239
* PIPELINE_SELECT [DevBWR+]":
1240
*
1241
* "Project: DEVSNB+
1242
*
1243
* Software must ensure all the write caches are flushed through a
1244
* stalling PIPE_CONTROL command followed by another PIPE_CONTROL
1245
* command to invalidate read only caches prior to programming
1246
* MI_PIPELINE_SELECT command to change the Pipeline Select Mode."
1247
*/
1248
const unsigned dc_flush =
1249
batch->screen->devinfo.ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
1250
crocus_emit_pipe_control_flush(batch,
1251
"workaround: PIPELINE_SELECT flushes (1/2)",
1252
PIPE_CONTROL_RENDER_TARGET_FLUSH |
1253
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1254
dc_flush |
1255
PIPE_CONTROL_CS_STALL);
1256
1257
crocus_emit_pipe_control_flush(batch,
1258
"workaround: PIPELINE_SELECT flushes (2/2)",
1259
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1260
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
1261
PIPE_CONTROL_STATE_CACHE_INVALIDATE |
1262
PIPE_CONTROL_INSTRUCTION_INVALIDATE);
1263
#else
1264
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
1265
* PIPELINE_SELECT [DevBWR+]":
1266
*
1267
* Project: PRE-DEVSNB
1268
*
1269
* Software must ensure the current pipeline is flushed via an
1270
* MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
1271
*/
1272
crocus_emit_cmd(batch, GENX(MI_FLUSH), foo);
1273
#endif
1274
1275
crocus_emit_cmd(batch, GENX(PIPELINE_SELECT), sel) {
1276
sel.PipelineSelection = pipeline;
1277
}
1278
1279
#if GFX_VER == 7 && !(GFX_VERx10 == 75)
1280
if (pipeline == _3D) {
1281
gen7_emit_cs_stall_flush(batch);
1282
1283
crocus_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
1284
prim.PrimitiveTopologyType = _3DPRIM_POINTLIST;
1285
};
1286
}
1287
#endif
1288
}
1289
1290
/**
1291
* The following diagram shows how we partition the URB:
1292
*
1293
* 16kB or 32kB Rest of the URB space
1294
* __________-__________ _________________-_________________
1295
* / \ / \
1296
* +-------------------------------------------------------------+
1297
* | VS/HS/DS/GS/FS Push | VS/HS/DS/GS URB |
1298
* | Constants | Entries |
1299
* +-------------------------------------------------------------+
1300
*
1301
* Notably, push constants must be stored at the beginning of the URB
1302
* space, while entries can be stored anywhere. Ivybridge and Haswell
1303
* GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
1304
* doubles this (32kB).
1305
*
1306
* Ivybridge and Haswell GT1/GT2 allow push constants to be located (and
1307
* sized) in increments of 1kB. Haswell GT3 requires them to be located and
1308
* sized in increments of 2kB.
1309
*
1310
* Currently we split the constant buffer space evenly among whatever stages
1311
* are active. This is probably not ideal, but simple.
1312
*
1313
* Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
1314
* Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
1315
* Haswell GT3 has 512kB of URB space.
1316
*
1317
* See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
1318
* and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
1319
*/
1320
#if GFX_VER >= 7
1321
static void
1322
crocus_alloc_push_constants(struct crocus_batch *batch)
1323
{
1324
#if GFX_VERx10 == 75
1325
const unsigned push_constant_kb = batch->screen->devinfo.gt == 3 ? 32 : 16;
1326
#elif GFX_VER == 8
1327
const unsigned push_constant_kb = 32;
1328
#else
1329
const unsigned push_constant_kb = 16;
1330
#endif
1331
unsigned size_per_stage = push_constant_kb / 5;
1332
1333
/* For now, we set a static partitioning of the push constant area,
1334
* assuming that all stages could be in use.
1335
*
1336
* TODO: Try lazily allocating the HS/DS/GS sections as needed, and
1337
* see if that improves performance by offering more space to
1338
* the VS/FS when those aren't in use. Also, try dynamically
1339
* enabling/disabling it like i965 does. This would be more
1340
* stalls and may not actually help; we don't know yet.
1341
*/
1342
for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
1343
crocus_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
1344
alloc._3DCommandSubOpcode = 18 + i;
1345
alloc.ConstantBufferOffset = size_per_stage * i;
1346
alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? (push_constant_kb - 4 * size_per_stage) : size_per_stage;
1347
}
1348
}
1349
1350
/* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
1351
*
1352
* A PIPE_CONTROL command with the CS Stall bit set must be programmed
1353
* in the ring after this instruction.
1354
*
1355
* No such restriction exists for Haswell or Baytrail.
1356
*/
1357
if (!(GFX_VERx10 == 75) && !batch->screen->devinfo.is_baytrail)
1358
gen7_emit_cs_stall_flush(batch);
1359
}
1360
#endif
1361
1362
/**
1363
* Upload the initial GPU state for a render context.
1364
*
1365
* This sets some invariant state that needs to be programmed a particular
1366
* way, but we never actually change.
1367
*/
1368
static void
1369
crocus_init_render_context(struct crocus_batch *batch)
1370
{
1371
UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
1372
1373
emit_pipeline_select(batch, _3D);
1374
1375
crocus_emit_cmd(batch, GENX(STATE_SIP), foo);
1376
1377
#if GFX_VER >= 7
1378
emit_l3_state(batch, false);
1379
#endif
1380
#if (GFX_VERx10 == 70 || GFX_VERx10 == 80)
1381
crocus_emit_reg(batch, GENX(INSTPM), reg) {
1382
reg.CONSTANT_BUFFERAddressOffsetDisable = true;
1383
reg.CONSTANT_BUFFERAddressOffsetDisableMask = true;
1384
}
1385
#endif
1386
#if GFX_VER >= 5 || GFX_VERx10 == 45
1387
/* Use the legacy AA line coverage computation. */
1388
crocus_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo);
1389
#endif
1390
1391
/* No polygon stippling offsets are necessary. */
1392
/* TODO: may need to set an offset for origin-UL framebuffers */
1393
crocus_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo);
1394
1395
#if GFX_VER >= 7
1396
crocus_alloc_push_constants(batch);
1397
#endif
1398
1399
#if GFX_VER == 8
1400
/* Set the initial MSAA sample positions. */
1401
crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) {
1402
INTEL_SAMPLE_POS_1X(pat._1xSample);
1403
INTEL_SAMPLE_POS_2X(pat._2xSample);
1404
INTEL_SAMPLE_POS_4X(pat._4xSample);
1405
INTEL_SAMPLE_POS_8X(pat._8xSample);
1406
}
1407
1408
/* Disable chromakeying (it's for media) */
1409
crocus_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo);
1410
1411
/* We want regular rendering, not special HiZ operations. */
1412
crocus_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo);
1413
#endif
1414
}
1415
1416
#if GFX_VER >= 7
1417
static void
1418
crocus_init_compute_context(struct crocus_batch *batch)
1419
{
1420
UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
1421
1422
emit_pipeline_select(batch, GPGPU);
1423
1424
#if GFX_VER >= 7
1425
emit_l3_state(batch, true);
1426
#endif
1427
}
1428
#endif
1429
1430
/**
1431
* Generation-specific context state (ice->state.genx->...).
1432
*
1433
* Most state can go in crocus_context directly, but these encode hardware
1434
* packets which vary by generation.
1435
*/
1436
struct crocus_genx_state {
1437
struct {
1438
#if GFX_VER >= 7
1439
struct brw_image_param image_param[PIPE_MAX_SHADER_IMAGES];
1440
#endif
1441
} shaders[MESA_SHADER_STAGES];
1442
1443
#if GFX_VER == 8
1444
bool pma_fix_enabled;
1445
#endif
1446
};
1447
1448
/**
1449
* The pipe->set_blend_color() driver hook.
1450
*
1451
* This corresponds to our COLOR_CALC_STATE.
1452
*/
1453
static void
1454
crocus_set_blend_color(struct pipe_context *ctx,
1455
const struct pipe_blend_color *state)
1456
{
1457
struct crocus_context *ice = (struct crocus_context *) ctx;
1458
1459
/* Our COLOR_CALC_STATE is exactly pipe_blend_color, so just memcpy */
1460
memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color));
1461
#if GFX_VER <= 5
1462
ice->state.dirty |= CROCUS_DIRTY_GEN4_CONSTANT_COLOR;
1463
#else
1464
ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
1465
#endif
1466
}
1467
1468
/**
1469
* Gallium CSO for blend state (see pipe_blend_state).
1470
*/
1471
struct crocus_blend_state {
1472
#if GFX_VER == 8
1473
/** Partial 3DSTATE_PS_BLEND */
1474
uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)];
1475
#endif
1476
1477
/** copy of BLEND_STATE */
1478
struct pipe_blend_state cso;
1479
1480
/** Bitfield of whether blending is enabled for RT[i] - for aux resolves */
1481
uint8_t blend_enables;
1482
1483
/** Bitfield of whether color writes are enabled for RT[i] */
1484
uint8_t color_write_enables;
1485
1486
/** Does RT[0] use dual color blending? */
1487
bool dual_color_blending;
1488
};
1489
1490
static enum pipe_blendfactor
1491
fix_blendfactor(enum pipe_blendfactor f, bool alpha_to_one)
1492
{
1493
if (alpha_to_one) {
1494
if (f == PIPE_BLENDFACTOR_SRC1_ALPHA)
1495
return PIPE_BLENDFACTOR_ONE;
1496
1497
if (f == PIPE_BLENDFACTOR_INV_SRC1_ALPHA)
1498
return PIPE_BLENDFACTOR_ZERO;
1499
}
1500
1501
return f;
1502
}
1503
1504
#if GFX_VER >= 6
1505
typedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML;
1506
#else
1507
typedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML;
1508
#endif
1509
1510
static bool
1511
can_emit_logic_op(struct crocus_context *ice)
1512
{
1513
/* all pre gen8 have logicop restricted to unorm */
1514
enum pipe_format pformat = PIPE_FORMAT_NONE;
1515
for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
1516
if (ice->state.framebuffer.cbufs[i]) {
1517
pformat = ice->state.framebuffer.cbufs[i]->format;
1518
break;
1519
}
1520
}
1521
return (pformat == PIPE_FORMAT_NONE || util_format_is_unorm(pformat));
1522
}
1523
1524
static bool
1525
set_blend_entry_bits(struct crocus_batch *batch, BLEND_ENTRY_GENXML *entry,
1526
struct crocus_blend_state *cso_blend,
1527
int idx)
1528
{
1529
struct crocus_context *ice = batch->ice;
1530
bool independent_alpha_blend = false;
1531
const struct pipe_rt_blend_state *rt =
1532
&cso_blend->cso.rt[cso_blend->cso.independent_blend_enable ? idx : 0];
1533
const unsigned blend_enabled = rt->blend_enable;
1534
1535
enum pipe_blendfactor src_rgb =
1536
fix_blendfactor(rt->rgb_src_factor, cso_blend->cso.alpha_to_one);
1537
enum pipe_blendfactor src_alpha =
1538
fix_blendfactor(rt->alpha_src_factor, cso_blend->cso.alpha_to_one);
1539
enum pipe_blendfactor dst_rgb =
1540
fix_blendfactor(rt->rgb_dst_factor, cso_blend->cso.alpha_to_one);
1541
enum pipe_blendfactor dst_alpha =
1542
fix_blendfactor(rt->alpha_dst_factor, cso_blend->cso.alpha_to_one);
1543
1544
if (rt->rgb_func != rt->alpha_func ||
1545
src_rgb != src_alpha || dst_rgb != dst_alpha)
1546
independent_alpha_blend = true;
1547
if (cso_blend->cso.logicop_enable) {
1548
if (GFX_VER >= 8 || can_emit_logic_op(ice)) {
1549
entry->LogicOpEnable = cso_blend->cso.logicop_enable;
1550
entry->LogicOpFunction = cso_blend->cso.logicop_func;
1551
}
1552
} else if (blend_enabled) {
1553
if (idx == 0) {
1554
struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
1555
struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
1556
entry->ColorBufferBlendEnable =
1557
(!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend);
1558
} else
1559
entry->ColorBufferBlendEnable = 1;
1560
1561
entry->ColorBlendFunction = rt->rgb_func;
1562
entry->AlphaBlendFunction = rt->alpha_func;
1563
entry->SourceBlendFactor = (int) src_rgb;
1564
entry->SourceAlphaBlendFactor = (int) src_alpha;
1565
entry->DestinationBlendFactor = (int) dst_rgb;
1566
entry->DestinationAlphaBlendFactor = (int) dst_alpha;
1567
}
1568
#if GFX_VER <= 5
1569
/*
1570
* Gen4/GM45/ILK can't handle have ColorBufferBlendEnable == 0
1571
* when a dual src blend shader is in use. Setup dummy blending.
1572
*/
1573
struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
1574
struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
1575
if (idx == 0 && !blend_enabled && wm_prog_data->dual_src_blend) {
1576
entry->ColorBufferBlendEnable = 1;
1577
entry->ColorBlendFunction = PIPE_BLEND_ADD;
1578
entry->AlphaBlendFunction = PIPE_BLEND_ADD;
1579
entry->SourceBlendFactor = PIPE_BLENDFACTOR_ONE;
1580
entry->SourceAlphaBlendFactor = PIPE_BLENDFACTOR_ONE;
1581
entry->DestinationBlendFactor = PIPE_BLENDFACTOR_ZERO;
1582
entry->DestinationAlphaBlendFactor = PIPE_BLENDFACTOR_ZERO;
1583
}
1584
#endif
1585
return independent_alpha_blend;
1586
}
1587
1588
/**
1589
* The pipe->create_blend_state() driver hook.
1590
*
1591
* Translates a pipe_blend_state into crocus_blend_state.
1592
*/
1593
static void *
1594
crocus_create_blend_state(struct pipe_context *ctx,
1595
const struct pipe_blend_state *state)
1596
{
1597
struct crocus_blend_state *cso = malloc(sizeof(struct crocus_blend_state));
1598
1599
cso->blend_enables = 0;
1600
cso->color_write_enables = 0;
1601
STATIC_ASSERT(BRW_MAX_DRAW_BUFFERS <= 8);
1602
1603
cso->cso = *state;
1604
cso->dual_color_blending = util_blend_state_is_dual(state, 0);
1605
1606
#if GFX_VER == 8
1607
bool indep_alpha_blend = false;
1608
#endif
1609
for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
1610
const struct pipe_rt_blend_state *rt =
1611
&state->rt[state->independent_blend_enable ? i : 0];
1612
if (rt->blend_enable)
1613
cso->blend_enables |= 1u << i;
1614
if (rt->colormask)
1615
cso->color_write_enables |= 1u << i;
1616
#if GFX_VER == 8
1617
enum pipe_blendfactor src_rgb =
1618
fix_blendfactor(rt->rgb_src_factor, state->alpha_to_one);
1619
enum pipe_blendfactor src_alpha =
1620
fix_blendfactor(rt->alpha_src_factor, state->alpha_to_one);
1621
enum pipe_blendfactor dst_rgb =
1622
fix_blendfactor(rt->rgb_dst_factor, state->alpha_to_one);
1623
enum pipe_blendfactor dst_alpha =
1624
fix_blendfactor(rt->alpha_dst_factor, state->alpha_to_one);
1625
1626
if (rt->rgb_func != rt->alpha_func ||
1627
src_rgb != src_alpha || dst_rgb != dst_alpha)
1628
indep_alpha_blend = true;
1629
#endif
1630
}
1631
1632
#if GFX_VER == 8
1633
crocus_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) {
1634
/* pb.HasWriteableRT is filled in at draw time.
1635
* pb.AlphaTestEnable is filled in at draw time.
1636
*
1637
* pb.ColorBufferBlendEnable is filled in at draw time so we can avoid
1638
* setting it when dual color blending without an appropriate shader.
1639
*/
1640
1641
pb.AlphaToCoverageEnable = state->alpha_to_coverage;
1642
pb.IndependentAlphaBlendEnable = indep_alpha_blend;
1643
1644
/* The casts prevent warnings about implicit enum type conversions. */
1645
pb.SourceBlendFactor =
1646
(int) fix_blendfactor(state->rt[0].rgb_src_factor, state->alpha_to_one);
1647
pb.SourceAlphaBlendFactor =
1648
(int) fix_blendfactor(state->rt[0].alpha_src_factor, state->alpha_to_one);
1649
pb.DestinationBlendFactor =
1650
(int) fix_blendfactor(state->rt[0].rgb_dst_factor, state->alpha_to_one);
1651
pb.DestinationAlphaBlendFactor =
1652
(int) fix_blendfactor(state->rt[0].alpha_dst_factor, state->alpha_to_one);
1653
}
1654
#endif
1655
return cso;
1656
}
1657
1658
/**
1659
* The pipe->bind_blend_state() driver hook.
1660
*
1661
* Bind a blending CSO and flag related dirty bits.
1662
*/
1663
static void
1664
crocus_bind_blend_state(struct pipe_context *ctx, void *state)
1665
{
1666
struct crocus_context *ice = (struct crocus_context *) ctx;
1667
struct crocus_blend_state *cso = state;
1668
1669
ice->state.cso_blend = cso;
1670
ice->state.blend_enables = cso ? cso->blend_enables : 0;
1671
1672
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_FS;
1673
ice->state.dirty |= CROCUS_DIRTY_WM;
1674
#if GFX_VER >= 6
1675
ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
1676
#endif
1677
#if GFX_VER >= 7
1678
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS;
1679
#endif
1680
#if GFX_VER == 8
1681
ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;
1682
ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND;
1683
#endif
1684
ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
1685
ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
1686
ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_BLEND];
1687
}
1688
1689
/**
1690
* Return true if the FS writes to any color outputs which are not disabled
1691
* via color masking.
1692
*/
1693
static bool
1694
has_writeable_rt(const struct crocus_blend_state *cso_blend,
1695
const struct shader_info *fs_info)
1696
{
1697
if (!fs_info)
1698
return false;
1699
1700
unsigned rt_outputs = fs_info->outputs_written >> FRAG_RESULT_DATA0;
1701
1702
if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
1703
rt_outputs = (1 << BRW_MAX_DRAW_BUFFERS) - 1;
1704
1705
return cso_blend->color_write_enables & rt_outputs;
1706
}
1707
1708
/**
1709
* Gallium CSO for depth, stencil, and alpha testing state.
1710
*/
1711
struct crocus_depth_stencil_alpha_state {
1712
struct pipe_depth_stencil_alpha_state cso;
1713
1714
bool depth_writes_enabled;
1715
bool stencil_writes_enabled;
1716
};
1717
1718
/**
1719
* The pipe->create_depth_stencil_alpha_state() driver hook.
1720
*
1721
* We encode most of 3DSTATE_WM_DEPTH_STENCIL, and just save off the alpha
1722
* testing state since we need pieces of it in a variety of places.
1723
*/
1724
static void *
1725
crocus_create_zsa_state(struct pipe_context *ctx,
1726
const struct pipe_depth_stencil_alpha_state *state)
1727
{
1728
struct crocus_depth_stencil_alpha_state *cso =
1729
malloc(sizeof(struct crocus_depth_stencil_alpha_state));
1730
1731
bool two_sided_stencil = state->stencil[1].enabled;
1732
cso->cso = *state;
1733
1734
cso->depth_writes_enabled = state->depth_writemask;
1735
cso->stencil_writes_enabled =
1736
state->stencil[0].writemask != 0 ||
1737
(two_sided_stencil && state->stencil[1].writemask != 0);
1738
1739
/* The state tracker needs to optimize away EQUAL writes for us. */
1740
assert(!(state->depth_func == PIPE_FUNC_EQUAL && state->depth_writemask));
1741
1742
return cso;
1743
}
1744
1745
/**
1746
* The pipe->bind_depth_stencil_alpha_state() driver hook.
1747
*
1748
* Bind a depth/stencil/alpha CSO and flag related dirty bits.
1749
*/
1750
static void
1751
crocus_bind_zsa_state(struct pipe_context *ctx, void *state)
1752
{
1753
struct crocus_context *ice = (struct crocus_context *) ctx;
1754
struct crocus_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa;
1755
struct crocus_depth_stencil_alpha_state *new_cso = state;
1756
1757
if (new_cso) {
1758
if (cso_changed(cso.alpha_ref_value))
1759
ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
1760
1761
if (cso_changed(cso.alpha_enabled))
1762
ice->state.dirty |= CROCUS_DIRTY_WM;
1763
#if GFX_VER >= 6
1764
if (cso_changed(cso.alpha_enabled))
1765
ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
1766
1767
if (cso_changed(cso.alpha_func))
1768
ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
1769
#endif
1770
#if GFX_VER == 8
1771
if (cso_changed(cso.alpha_enabled))
1772
ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND;
1773
#endif
1774
1775
if (cso_changed(depth_writes_enabled))
1776
ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
1777
1778
ice->state.depth_writes_enabled = new_cso->depth_writes_enabled;
1779
ice->state.stencil_writes_enabled = new_cso->stencil_writes_enabled;
1780
1781
#if GFX_VER <= 5
1782
ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
1783
#endif
1784
}
1785
1786
ice->state.cso_zsa = new_cso;
1787
ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;
1788
#if GFX_VER >= 6
1789
ice->state.dirty |= CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL;
1790
#endif
1791
#if GFX_VER == 8
1792
ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;
1793
#endif
1794
ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_DEPTH_STENCIL_ALPHA];
1795
}
1796
1797
#if GFX_VER == 8
1798
static bool
1799
want_pma_fix(struct crocus_context *ice)
1800
{
1801
UNUSED struct crocus_screen *screen = (void *) ice->ctx.screen;
1802
UNUSED const struct intel_device_info *devinfo = &screen->devinfo;
1803
const struct brw_wm_prog_data *wm_prog_data = (void *)
1804
ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
1805
const struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
1806
const struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
1807
const struct crocus_blend_state *cso_blend = ice->state.cso_blend;
1808
1809
/* In very specific combinations of state, we can instruct Gfx8-9 hardware
1810
* to avoid stalling at the pixel mask array. The state equations are
1811
* documented in these places:
1812
*
1813
* - Gfx8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE
1814
* - Gfx9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable
1815
*
1816
* Both equations share some common elements:
1817
*
1818
* no_hiz_op =
1819
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
1820
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
1821
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
1822
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
1823
*
1824
* killpixels =
1825
* 3DSTATE_WM::ForceKillPix != ForceOff &&
1826
* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1827
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1828
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1829
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
1830
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1831
*
1832
* (Technically the stencil PMA treats ForceKillPix differently,
1833
* but I think this is a documentation oversight, and we don't
1834
* ever use it in this way, so it doesn't matter).
1835
*
1836
* common_pma_fix =
1837
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
1838
* 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 &&
1839
* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
1840
* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
1841
* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS &&
1842
* 3DSTATE_PS_EXTRA::PixelShaderValid &&
1843
* no_hiz_op
1844
*
1845
* These are always true:
1846
*
1847
* 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0
1848
* 3DSTATE_PS_EXTRA::PixelShaderValid
1849
*
1850
* Also, we never use the normal drawing path for HiZ ops; these are true:
1851
*
1852
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
1853
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
1854
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
1855
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
1856
*
1857
* This happens sometimes:
1858
*
1859
* 3DSTATE_WM::ForceThreadDispatch != 1
1860
*
1861
* However, we choose to ignore it as it either agrees with the signal
1862
* (dispatch was already enabled, so nothing out of the ordinary), or
1863
* there are no framebuffer attachments (so no depth or HiZ anyway,
1864
* meaning the PMA signal will already be disabled).
1865
*/
1866
1867
if (!cso_fb->zsbuf)
1868
return false;
1869
1870
struct crocus_resource *zres, *sres;
1871
crocus_get_depth_stencil_resources(devinfo,
1872
cso_fb->zsbuf->texture, &zres, &sres);
1873
1874
/* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
1875
* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
1876
*/
1877
if (!zres || !crocus_resource_level_has_hiz(zres, cso_fb->zsbuf->u.tex.level))
1878
return false;
1879
1880
/* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS */
1881
if (wm_prog_data->early_fragment_tests)
1882
return false;
1883
1884
/* 3DSTATE_WM::ForceKillPix != ForceOff &&
1885
* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1886
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1887
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1888
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
1889
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1890
*/
1891
bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask ||
1892
cso_blend->cso.alpha_to_coverage || cso_zsa->cso.alpha_enabled;
1893
1894
/* The Gfx8 depth PMA equation becomes:
1895
*
1896
* depth_writes =
1897
* 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
1898
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE
1899
*
1900
* stencil_writes =
1901
* 3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
1902
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
1903
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE
1904
*
1905
* Z_PMA_OPT =
1906
* common_pma_fix &&
1907
* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable &&
1908
* ((killpixels && (depth_writes || stencil_writes)) ||
1909
* 3DSTATE_PS_EXTRA::PixelShaderComputedDepthMode != PSCDEPTH_OFF)
1910
*
1911
*/
1912
if (!cso_zsa->cso.depth_enabled)
1913
return false;
1914
1915
return wm_prog_data->computed_depth_mode != PSCDEPTH_OFF ||
1916
(killpixels && (cso_zsa->depth_writes_enabled ||
1917
(sres && cso_zsa->stencil_writes_enabled)));
1918
}
1919
#endif
1920
void
1921
genX(crocus_update_pma_fix)(struct crocus_context *ice,
1922
struct crocus_batch *batch,
1923
bool enable)
1924
{
1925
#if GFX_VER == 8
1926
struct crocus_genx_state *genx = ice->state.genx;
1927
1928
if (genx->pma_fix_enabled == enable)
1929
return;
1930
1931
genx->pma_fix_enabled = enable;
1932
1933
/* According to the Broadwell PIPE_CONTROL documentation, software should
1934
* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
1935
* prior to the LRI. If stencil buffer writes are enabled, then a Render * Cache Flush is also necessary.
1936
*
1937
* The Gfx9 docs say to use a depth stall rather than a command streamer
1938
* stall. However, the hardware seems to violently disagree. A full
1939
* command streamer stall seems to be needed in both cases.
1940
*/
1941
crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
1942
PIPE_CONTROL_CS_STALL |
1943
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1944
PIPE_CONTROL_RENDER_TARGET_FLUSH);
1945
1946
crocus_emit_reg(batch, GENX(CACHE_MODE_1), reg) {
1947
reg.NPPMAFixEnable = enable;
1948
reg.NPEarlyZFailsDisable = enable;
1949
reg.NPPMAFixEnableMask = true;
1950
reg.NPEarlyZFailsDisableMask = true;
1951
}
1952
1953
/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
1954
* Flush bits is often necessary. We do it regardless because it's easier.
1955
* The render cache flush is also necessary if stencil writes are enabled.
1956
*
1957
* Again, the Gfx9 docs give a different set of flushes but the Broadwell
1958
* flushes seem to work just as well.
1959
*/
1960
crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
1961
PIPE_CONTROL_DEPTH_STALL |
1962
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1963
PIPE_CONTROL_RENDER_TARGET_FLUSH);
1964
#endif
1965
}
1966
1967
static float
1968
get_line_width(const struct pipe_rasterizer_state *state)
1969
{
1970
float line_width = state->line_width;
1971
1972
/* From the OpenGL 4.4 spec:
1973
*
1974
* "The actual width of non-antialiased lines is determined by rounding
1975
* the supplied width to the nearest integer, then clamping it to the
1976
* implementation-dependent maximum non-antialiased line width."
1977
*/
1978
if (!state->multisample && !state->line_smooth)
1979
line_width = roundf(state->line_width);
1980
1981
if (!state->multisample && state->line_smooth && line_width < 1.5f) {
1982
/* For 1 pixel line thickness or less, the general anti-aliasing
1983
* algorithm gives up, and a garbage line is generated. Setting a
1984
* Line Width of 0.0 specifies the rasterization of the "thinnest"
1985
* (one-pixel-wide), non-antialiased lines.
1986
*
1987
* Lines rendered with zero Line Width are rasterized using the
1988
* "Grid Intersection Quantization" rules as specified by the
1989
* "Zero-Width (Cosmetic) Line Rasterization" section of the docs.
1990
*/
1991
line_width = 0.0f;
1992
}
1993
1994
return line_width;
1995
}
1996
1997
/**
1998
* The pipe->create_rasterizer_state() driver hook.
1999
*/
2000
static void *
2001
crocus_create_rasterizer_state(struct pipe_context *ctx,
2002
const struct pipe_rasterizer_state *state)
2003
{
2004
struct crocus_rasterizer_state *cso =
2005
malloc(sizeof(struct crocus_rasterizer_state));
2006
2007
cso->fill_mode_point_or_line =
2008
state->fill_front == PIPE_POLYGON_MODE_LINE ||
2009
state->fill_front == PIPE_POLYGON_MODE_POINT ||
2010
state->fill_back == PIPE_POLYGON_MODE_LINE ||
2011
state->fill_back == PIPE_POLYGON_MODE_POINT;
2012
2013
if (state->clip_plane_enable != 0)
2014
cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1;
2015
else
2016
cso->num_clip_plane_consts = 0;
2017
2018
cso->cso = *state;
2019
2020
#if GFX_VER >= 6
2021
float line_width = get_line_width(state);
2022
2023
crocus_pack_command(GENX(3DSTATE_SF), cso->sf, sf) {
2024
sf.StatisticsEnable = true;
2025
sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
2026
sf.LineEndCapAntialiasingRegionWidth =
2027
state->line_smooth ? _10pixels : _05pixels;
2028
sf.LastPixelEnable = state->line_last_pixel;
2029
#if GFX_VER == 8
2030
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
2031
if (screen->devinfo.is_cherryview)
2032
sf.CHVLineWidth = line_width;
2033
else
2034
sf.LineWidth = line_width;
2035
#else
2036
sf.LineWidth = line_width;
2037
#endif
2038
sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State;
2039
sf.PointWidth = state->point_size;
2040
2041
if (state->flatshade_first) {
2042
sf.TriangleFanProvokingVertexSelect = 1;
2043
} else {
2044
sf.TriangleStripListProvokingVertexSelect = 2;
2045
sf.TriangleFanProvokingVertexSelect = 2;
2046
sf.LineStripListProvokingVertexSelect = 1;
2047
}
2048
2049
#if GFX_VER == 6
2050
sf.AttributeSwizzleEnable = true;
2051
if (state->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
2052
sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
2053
else
2054
sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
2055
#endif
2056
2057
#if GFX_VER <= 7
2058
sf.FrontWinding = state->front_ccw ? 1 : 0; // Or the other way...
2059
2060
#if GFX_VER >= 6
2061
sf.GlobalDepthOffsetEnableSolid = state->offset_tri;
2062
sf.GlobalDepthOffsetEnableWireframe = state->offset_line;
2063
sf.GlobalDepthOffsetEnablePoint = state->offset_point;
2064
sf.GlobalDepthOffsetConstant = state->offset_units * 2;
2065
sf.GlobalDepthOffsetScale = state->offset_scale;
2066
sf.GlobalDepthOffsetClamp = state->offset_clamp;
2067
2068
sf.FrontFaceFillMode = translate_fill_mode(state->fill_front);
2069
sf.BackFaceFillMode = translate_fill_mode(state->fill_back);
2070
#endif
2071
2072
sf.CullMode = translate_cull_mode(state->cull_face);
2073
sf.ScissorRectangleEnable = true;
2074
2075
#if GFX_VERx10 == 75
2076
sf.LineStippleEnable = state->line_stipple_enable;
2077
#endif
2078
#endif
2079
}
2080
#endif
2081
2082
#if GFX_VER == 8
2083
crocus_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) {
2084
rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise;
2085
rr.CullMode = translate_cull_mode(state->cull_face);
2086
rr.FrontFaceFillMode = translate_fill_mode(state->fill_front);
2087
rr.BackFaceFillMode = translate_fill_mode(state->fill_back);
2088
rr.DXMultisampleRasterizationEnable = state->multisample;
2089
rr.GlobalDepthOffsetEnableSolid = state->offset_tri;
2090
rr.GlobalDepthOffsetEnableWireframe = state->offset_line;
2091
rr.GlobalDepthOffsetEnablePoint = state->offset_point;
2092
rr.GlobalDepthOffsetConstant = state->offset_units * 2;
2093
rr.GlobalDepthOffsetScale = state->offset_scale;
2094
rr.GlobalDepthOffsetClamp = state->offset_clamp;
2095
rr.SmoothPointEnable = state->point_smooth;
2096
rr.AntialiasingEnable = state->line_smooth;
2097
rr.ScissorRectangleEnable = state->scissor;
2098
rr.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far);
2099
}
2100
#endif
2101
2102
#if GFX_VER >= 6
2103
crocus_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) {
2104
/* cl.NonPerspectiveBarycentricEnable is filled in at draw time from
2105
* the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB.
2106
*/
2107
#if GFX_VER >= 7
2108
cl.EarlyCullEnable = true;
2109
#endif
2110
2111
#if GFX_VER == 7
2112
cl.FrontWinding = state->front_ccw ? 1 : 0;
2113
cl.CullMode = translate_cull_mode(state->cull_face);
2114
#endif
2115
cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable;
2116
#if GFX_VER < 8
2117
cl.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far);
2118
#endif
2119
cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;
2120
cl.GuardbandClipTestEnable = true;
2121
cl.ClipEnable = true;
2122
cl.MinimumPointWidth = 0.125;
2123
cl.MaximumPointWidth = 255.875;
2124
2125
#if GFX_VER == 8
2126
cl.ForceUserClipDistanceClipTestEnableBitmask = true;
2127
#endif
2128
2129
if (state->flatshade_first) {
2130
cl.TriangleFanProvokingVertexSelect = 1;
2131
} else {
2132
cl.TriangleStripListProvokingVertexSelect = 2;
2133
cl.TriangleFanProvokingVertexSelect = 2;
2134
cl.LineStripListProvokingVertexSelect = 1;
2135
}
2136
}
2137
#endif
2138
2139
/* Remap from 0..255 back to 1..256 */
2140
const unsigned line_stipple_factor = state->line_stipple_factor + 1;
2141
2142
crocus_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) {
2143
if (state->line_stipple_enable) {
2144
line.LineStipplePattern = state->line_stipple_pattern;
2145
line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor;
2146
line.LineStippleRepeatCount = line_stipple_factor;
2147
}
2148
}
2149
2150
return cso;
2151
}
2152
2153
/**
2154
* The pipe->bind_rasterizer_state() driver hook.
2155
*
2156
* Bind a rasterizer CSO and flag related dirty bits.
2157
*/
2158
static void
2159
crocus_bind_rasterizer_state(struct pipe_context *ctx, void *state)
2160
{
2161
struct crocus_context *ice = (struct crocus_context *) ctx;
2162
struct crocus_rasterizer_state *old_cso = ice->state.cso_rast;
2163
struct crocus_rasterizer_state *new_cso = state;
2164
2165
if (new_cso) {
2166
/* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */
2167
if (cso_changed_memcmp(line_stipple))
2168
ice->state.dirty |= CROCUS_DIRTY_LINE_STIPPLE;
2169
#if GFX_VER >= 6
2170
if (cso_changed(cso.half_pixel_center))
2171
ice->state.dirty |= CROCUS_DIRTY_GEN6_MULTISAMPLE;
2172
if (cso_changed(cso.scissor))
2173
ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
2174
if (cso_changed(cso.multisample))
2175
ice->state.dirty |= CROCUS_DIRTY_WM;
2176
#else
2177
if (cso_changed(cso.scissor))
2178
ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;
2179
#endif
2180
2181
if (cso_changed(cso.line_stipple_enable) || cso_changed(cso.poly_stipple_enable))
2182
ice->state.dirty |= CROCUS_DIRTY_WM;
2183
2184
#if GFX_VER >= 6
2185
if (cso_changed(cso.rasterizer_discard))
2186
ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
2187
2188
if (cso_changed(cso.flatshade_first))
2189
ice->state.dirty |= CROCUS_DIRTY_STREAMOUT;
2190
#endif
2191
2192
if (cso_changed(cso.depth_clip_near) || cso_changed(cso.depth_clip_far) ||
2193
cso_changed(cso.clip_halfz))
2194
ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;
2195
2196
#if GFX_VER >= 7
2197
if (cso_changed(cso.sprite_coord_enable) ||
2198
cso_changed(cso.sprite_coord_mode) ||
2199
cso_changed(cso.light_twoside))
2200
ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;
2201
#endif
2202
#if GFX_VER <= 5
2203
if (cso_changed(cso.clip_plane_enable))
2204
ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE;
2205
#endif
2206
}
2207
2208
ice->state.cso_rast = new_cso;
2209
ice->state.dirty |= CROCUS_DIRTY_RASTER;
2210
ice->state.dirty |= CROCUS_DIRTY_CLIP;
2211
#if GFX_VER <= 5
2212
ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;
2213
ice->state.dirty |= CROCUS_DIRTY_WM;
2214
#endif
2215
#if GFX_VER <= 6
2216
ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
2217
#endif
2218
ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_RASTERIZER];
2219
}
2220
2221
/**
2222
* Return true if the given wrap mode requires the border color to exist.
2223
*
2224
* (We can skip uploading it if the sampler isn't going to use it.)
2225
*/
2226
static bool
2227
wrap_mode_needs_border_color(unsigned wrap_mode)
2228
{
2229
#if GFX_VER == 8
2230
return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER;
2231
#else
2232
return wrap_mode == TCM_CLAMP_BORDER;
2233
#endif
2234
}
2235
2236
/**
2237
* Gallium CSO for sampler state.
2238
*/
2239
struct crocus_sampler_state {
2240
struct pipe_sampler_state pstate;
2241
union pipe_color_union border_color;
2242
bool needs_border_color;
2243
unsigned wrap_s;
2244
unsigned wrap_t;
2245
unsigned wrap_r;
2246
unsigned mag_img_filter;
2247
float min_lod;
2248
};
2249
2250
/**
2251
* The pipe->create_sampler_state() driver hook.
2252
*
2253
* We fill out SAMPLER_STATE (except for the border color pointer), and
2254
* store that on the CPU. It doesn't make sense to upload it to a GPU
2255
* buffer object yet, because 3DSTATE_SAMPLER_STATE_POINTERS requires
2256
* all bound sampler states to be in contiguous memor.
2257
*/
2258
static void *
2259
crocus_create_sampler_state(struct pipe_context *ctx,
2260
const struct pipe_sampler_state *state)
2261
{
2262
struct crocus_sampler_state *cso = CALLOC_STRUCT(crocus_sampler_state);
2263
2264
if (!cso)
2265
return NULL;
2266
2267
STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST);
2268
STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR);
2269
2270
bool either_nearest = state->min_img_filter == PIPE_TEX_FILTER_NEAREST ||
2271
state->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
2272
cso->wrap_s = translate_wrap(state->wrap_s, either_nearest);
2273
cso->wrap_t = translate_wrap(state->wrap_t, either_nearest);
2274
cso->wrap_r = translate_wrap(state->wrap_r, either_nearest);
2275
2276
cso->pstate = *state;
2277
2278
memcpy(&cso->border_color, &state->border_color, sizeof(cso->border_color));
2279
2280
cso->needs_border_color = wrap_mode_needs_border_color(cso->wrap_s) ||
2281
wrap_mode_needs_border_color(cso->wrap_t) ||
2282
wrap_mode_needs_border_color(cso->wrap_r);
2283
2284
cso->min_lod = state->min_lod;
2285
cso->mag_img_filter = state->mag_img_filter;
2286
2287
// XXX: explain this code ported from ilo...I don't get it at all...
2288
if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2289
state->min_lod > 0.0f) {
2290
cso->min_lod = 0.0f;
2291
cso->mag_img_filter = state->min_img_filter;
2292
}
2293
2294
return cso;
2295
}
2296
2297
/**
2298
* The pipe->bind_sampler_states() driver hook.
2299
*/
2300
static void
2301
crocus_bind_sampler_states(struct pipe_context *ctx,
2302
enum pipe_shader_type p_stage,
2303
unsigned start, unsigned count,
2304
void **states)
2305
{
2306
struct crocus_context *ice = (struct crocus_context *) ctx;
2307
gl_shader_stage stage = stage_from_pipe(p_stage);
2308
struct crocus_shader_state *shs = &ice->state.shaders[stage];
2309
2310
assert(start + count <= CROCUS_MAX_TEXTURE_SAMPLERS);
2311
2312
bool dirty = false;
2313
2314
for (int i = 0; i < count; i++) {
2315
if (shs->samplers[start + i] != states[i]) {
2316
shs->samplers[start + i] = states[i];
2317
dirty = true;
2318
}
2319
}
2320
2321
if (dirty) {
2322
#if GFX_VER <= 5
2323
if (p_stage == PIPE_SHADER_FRAGMENT)
2324
ice->state.dirty |= CROCUS_DIRTY_WM;
2325
else if (p_stage == PIPE_SHADER_VERTEX)
2326
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS;
2327
#endif
2328
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
2329
ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_TEXTURES];
2330
}
2331
}
2332
2333
enum samp_workaround {
2334
SAMP_NORMAL,
2335
SAMP_CUBE_CLAMP,
2336
SAMP_CUBE_CUBE,
2337
SAMP_T_WRAP,
2338
};
2339
2340
static void
2341
crocus_upload_sampler_state(struct crocus_batch *batch,
2342
struct crocus_sampler_state *cso,
2343
uint32_t border_color_offset,
2344
enum samp_workaround samp_workaround,
2345
uint32_t first_level,
2346
void *map)
2347
{
2348
struct pipe_sampler_state *state = &cso->pstate;
2349
uint32_t wrap_s, wrap_t, wrap_r;
2350
2351
wrap_s = cso->wrap_s;
2352
wrap_t = cso->wrap_t;
2353
wrap_r = cso->wrap_r;
2354
2355
switch (samp_workaround) {
2356
case SAMP_CUBE_CLAMP:
2357
wrap_s = TCM_CLAMP;
2358
wrap_t = TCM_CLAMP;
2359
wrap_r = TCM_CLAMP;
2360
break;
2361
case SAMP_CUBE_CUBE:
2362
wrap_s = TCM_CUBE;
2363
wrap_t = TCM_CUBE;
2364
wrap_r = TCM_CUBE;
2365
break;
2366
case SAMP_T_WRAP:
2367
wrap_t = TCM_WRAP;
2368
break;
2369
default:
2370
break;
2371
}
2372
2373
_crocus_pack_state(batch, GENX(SAMPLER_STATE), map, samp) {
2374
samp.TCXAddressControlMode = wrap_s;
2375
samp.TCYAddressControlMode = wrap_t;
2376
samp.TCZAddressControlMode = wrap_r;
2377
2378
#if GFX_VER >= 6
2379
samp.NonnormalizedCoordinateEnable = !state->normalized_coords;
2380
#endif
2381
samp.MinModeFilter = state->min_img_filter;
2382
samp.MagModeFilter = cso->mag_img_filter;
2383
samp.MipModeFilter = translate_mip_filter(state->min_mip_filter);
2384
samp.MaximumAnisotropy = RATIO21;
2385
2386
if (state->max_anisotropy >= 2) {
2387
if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
2388
samp.MinModeFilter = MAPFILTER_ANISOTROPIC;
2389
#if GFX_VER >= 7
2390
samp.AnisotropicAlgorithm = EWAApproximation;
2391
#endif
2392
}
2393
2394
if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)
2395
samp.MagModeFilter = MAPFILTER_ANISOTROPIC;
2396
2397
samp.MaximumAnisotropy =
2398
MIN2((state->max_anisotropy - 2) / 2, RATIO161);
2399
}
2400
2401
/* Set address rounding bits if not using nearest filtering. */
2402
if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) {
2403
samp.UAddressMinFilterRoundingEnable = true;
2404
samp.VAddressMinFilterRoundingEnable = true;
2405
samp.RAddressMinFilterRoundingEnable = true;
2406
}
2407
2408
if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) {
2409
samp.UAddressMagFilterRoundingEnable = true;
2410
samp.VAddressMagFilterRoundingEnable = true;
2411
samp.RAddressMagFilterRoundingEnable = true;
2412
}
2413
2414
if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
2415
samp.ShadowFunction = translate_shadow_func(state->compare_func);
2416
2417
const float hw_max_lod = GFX_VER >= 7 ? 14 : 13;
2418
2419
#if GFX_VER == 8
2420
samp.LODPreClampMode = CLAMP_MODE_OGL;
2421
#else
2422
samp.LODPreClampEnable = true;
2423
#endif
2424
samp.MinLOD = CLAMP(cso->min_lod, 0, hw_max_lod);
2425
samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod);
2426
samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15);
2427
2428
#if GFX_VER == 6
2429
samp.BaseMipLevel = CLAMP(first_level, 0, hw_max_lod);
2430
samp.MinandMagStateNotEqual = samp.MinModeFilter != samp.MagModeFilter;
2431
#endif
2432
2433
#if GFX_VER < 6
2434
samp.BorderColorPointer =
2435
ro_bo(batch->state.bo, border_color_offset);
2436
#else
2437
samp.BorderColorPointer = border_color_offset;
2438
#endif
2439
}
2440
}
2441
2442
static void
2443
crocus_upload_border_color(struct crocus_batch *batch,
2444
struct crocus_sampler_state *cso,
2445
struct crocus_sampler_view *tex,
2446
uint32_t *bc_offset)
2447
{
2448
/* We may need to swizzle the border color for format faking.
2449
* A/LA formats are faked as R/RG with 000R or R00G swizzles.
2450
* This means we need to move the border color's A channel into
2451
* the R or G channels so that those read swizzles will move it
2452
* back into A.
2453
*/
2454
enum pipe_format internal_format = PIPE_FORMAT_NONE;
2455
union pipe_color_union *color = &cso->border_color;
2456
union pipe_color_union tmp;
2457
if (tex) {
2458
internal_format = tex->res->internal_format;
2459
2460
if (util_format_is_alpha(internal_format)) {
2461
unsigned char swz[4] = {
2462
PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
2463
PIPE_SWIZZLE_0, PIPE_SWIZZLE_W,
2464
};
2465
util_format_apply_color_swizzle(&tmp, color, swz, true);
2466
color = &tmp;
2467
} else if (util_format_is_luminance_alpha(internal_format) &&
2468
internal_format != PIPE_FORMAT_L8A8_SRGB) {
2469
unsigned char swz[4] = {
2470
PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
2471
PIPE_SWIZZLE_X, PIPE_SWIZZLE_W
2472
};
2473
util_format_apply_color_swizzle(&tmp, color, swz, true);
2474
color = &tmp;
2475
}
2476
}
2477
bool is_integer_format = util_format_is_pure_integer(internal_format);
2478
unsigned sbc_size = GENX(SAMPLER_BORDER_COLOR_STATE_length) * 4;
2479
const int sbc_align = (GFX_VER == 8 ? 64 : ((GFX_VERx10 == 75 && is_integer_format) ? 512 : 32));
2480
uint32_t *sbc = stream_state(batch, sbc_size, sbc_align, bc_offset);
2481
2482
struct GENX(SAMPLER_BORDER_COLOR_STATE) state = { 0 };
2483
2484
#define ASSIGN(dst, src) \
2485
do { \
2486
dst = src; \
2487
} while (0)
2488
2489
#define ASSIGNu16(dst, src) \
2490
do { \
2491
dst = (uint16_t)src; \
2492
} while (0)
2493
2494
#define ASSIGNu8(dst, src) \
2495
do { \
2496
dst = (uint8_t)src; \
2497
} while (0)
2498
2499
#define BORDER_COLOR_ATTR(macro, _color_type, src) \
2500
macro(state.BorderColor ## _color_type ## Red, src[0]); \
2501
macro(state.BorderColor ## _color_type ## Green, src[1]); \
2502
macro(state.BorderColor ## _color_type ## Blue, src[2]); \
2503
macro(state.BorderColor ## _color_type ## Alpha, src[3]);
2504
2505
#if GFX_VER >= 8
2506
/* On Broadwell, the border color is represented as four 32-bit floats,
2507
* integers, or unsigned values, interpreted according to the surface
2508
* format. This matches the sampler->BorderColor union exactly; just
2509
* memcpy the values.
2510
*/
2511
BORDER_COLOR_ATTR(ASSIGN, 32bit, color->ui);
2512
#elif GFX_VERx10 == 75
2513
if (is_integer_format) {
2514
const struct util_format_description *format_desc =
2515
util_format_description(internal_format);
2516
2517
/* From the Haswell PRM, "Command Reference: Structures", Page 36:
2518
* "If any color channel is missing from the surface format,
2519
* corresponding border color should be programmed as zero and if
2520
* alpha channel is missing, corresponding Alpha border color should
2521
* be programmed as 1."
2522
*/
2523
unsigned c[4] = { 0, 0, 0, 1 };
2524
for (int i = 0; i < 4; i++) {
2525
if (format_desc->channel[i].size)
2526
c[i] = color->ui[i];
2527
}
2528
2529
switch (format_desc->channel[0].size) {
2530
case 8:
2531
/* Copy RGBA in order. */
2532
BORDER_COLOR_ATTR(ASSIGNu8, 8bit, c);
2533
break;
2534
case 10:
2535
/* R10G10B10A2_UINT is treated like a 16-bit format. */
2536
case 16:
2537
BORDER_COLOR_ATTR(ASSIGNu16, 16bit, c);
2538
break;
2539
case 32:
2540
if (format_desc->channel[1].size && !format_desc->channel[2].size) {
2541
/* Careful inspection of the tables reveals that for RG32 formats,
2542
* the green channel needs to go where blue normally belongs.
2543
*/
2544
state.BorderColor32bitRed = c[0];
2545
state.BorderColor32bitBlue = c[1];
2546
state.BorderColor32bitAlpha = 1;
2547
} else {
2548
/* Copy RGBA in order. */
2549
BORDER_COLOR_ATTR(ASSIGN, 32bit, c);
2550
}
2551
break;
2552
default:
2553
assert(!"Invalid number of bits per channel in integer format.");
2554
break;
2555
}
2556
} else {
2557
BORDER_COLOR_ATTR(ASSIGN, Float, color->f);
2558
}
2559
#elif GFX_VER == 5 || GFX_VER == 6
2560
BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_UBYTE, Unorm, color->f);
2561
BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_USHORT, Unorm16, color->f);
2562
BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_SHORT, Snorm16, color->f);
2563
2564
#define MESA_FLOAT_TO_HALF(dst, src) \
2565
dst = _mesa_float_to_half(src);
2566
2567
BORDER_COLOR_ATTR(MESA_FLOAT_TO_HALF, Float16, color->f);
2568
2569
#undef MESA_FLOAT_TO_HALF
2570
2571
state.BorderColorSnorm8Red = state.BorderColorSnorm16Red >> 8;
2572
state.BorderColorSnorm8Green = state.BorderColorSnorm16Green >> 8;
2573
state.BorderColorSnorm8Blue = state.BorderColorSnorm16Blue >> 8;
2574
state.BorderColorSnorm8Alpha = state.BorderColorSnorm16Alpha >> 8;
2575
2576
BORDER_COLOR_ATTR(ASSIGN, Float, color->f);
2577
2578
#elif GFX_VER == 4
2579
BORDER_COLOR_ATTR(ASSIGN, , color->f);
2580
#else
2581
BORDER_COLOR_ATTR(ASSIGN, Float, color->f);
2582
#endif
2583
2584
#undef ASSIGN
2585
#undef BORDER_COLOR_ATTR
2586
2587
GENX(SAMPLER_BORDER_COLOR_STATE_pack)(batch, sbc, &state);
2588
}
2589
2590
/**
2591
* Upload the sampler states into a contiguous area of GPU memory, for
2592
* for 3DSTATE_SAMPLER_STATE_POINTERS_*.
2593
*
2594
* Also fill out the border color state pointers.
2595
*/
2596
static void
2597
crocus_upload_sampler_states(struct crocus_context *ice,
2598
struct crocus_batch *batch, gl_shader_stage stage)
2599
{
2600
struct crocus_shader_state *shs = &ice->state.shaders[stage];
2601
const struct shader_info *info = crocus_get_shader_info(ice, stage);
2602
2603
/* We assume the state tracker will call pipe->bind_sampler_states()
2604
* if the program's number of textures changes.
2605
*/
2606
unsigned count = info ? BITSET_LAST_BIT(info->textures_used) : 0;
2607
2608
if (!count)
2609
return;
2610
2611
/* Assemble the SAMPLER_STATEs into a contiguous table that lives
2612
* in the dynamic state memory zone, so we can point to it via the
2613
* 3DSTATE_SAMPLER_STATE_POINTERS_* commands.
2614
*/
2615
unsigned size = count * 4 * GENX(SAMPLER_STATE_length);
2616
uint32_t *map = stream_state(batch, size, 32, &shs->sampler_offset);
2617
2618
if (unlikely(!map))
2619
return;
2620
2621
for (int i = 0; i < count; i++) {
2622
struct crocus_sampler_state *state = shs->samplers[i];
2623
struct crocus_sampler_view *tex = shs->textures[i];
2624
2625
if (!state || !tex) {
2626
memset(map, 0, 4 * GENX(SAMPLER_STATE_length));
2627
} else {
2628
unsigned border_color_offset = 0;
2629
if (state->needs_border_color) {
2630
crocus_upload_border_color(batch, state, tex, &border_color_offset);
2631
}
2632
2633
enum samp_workaround wa = SAMP_NORMAL;
2634
/* There's a bug in 1D texture sampling - it actually pays
2635
* attention to the wrap_t value, though it should not.
2636
* Override the wrap_t value here to GL_REPEAT to keep
2637
* any nonexistent border pixels from floating in.
2638
*/
2639
if (tex->base.target == PIPE_TEXTURE_1D)
2640
wa = SAMP_T_WRAP;
2641
else if (tex->base.target == PIPE_TEXTURE_CUBE ||
2642
tex->base.target == PIPE_TEXTURE_CUBE_ARRAY) {
2643
/* Cube maps must use the same wrap mode for all three coordinate
2644
* dimensions. Prior to Haswell, only CUBE and CLAMP are valid.
2645
*
2646
* Ivybridge and Baytrail seem to have problems with CUBE mode and
2647
* integer formats. Fall back to CLAMP for now.
2648
*/
2649
if (state->pstate.seamless_cube_map &&
2650
!(GFX_VERx10 == 70 && util_format_is_pure_integer(tex->base.format)))
2651
wa = SAMP_CUBE_CUBE;
2652
else
2653
wa = SAMP_CUBE_CLAMP;
2654
}
2655
2656
uint32_t first_level = 0;
2657
if (tex->base.target != PIPE_BUFFER)
2658
first_level = tex->base.u.tex.first_level;
2659
2660
crocus_upload_sampler_state(batch, state, border_color_offset, wa, first_level, map);
2661
}
2662
2663
map += GENX(SAMPLER_STATE_length);
2664
}
2665
}
2666
2667
/**
2668
* The pipe->create_sampler_view() driver hook.
2669
*/
2670
static struct pipe_sampler_view *
2671
crocus_create_sampler_view(struct pipe_context *ctx,
2672
struct pipe_resource *tex,
2673
const struct pipe_sampler_view *tmpl)
2674
{
2675
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
2676
const struct intel_device_info *devinfo = &screen->devinfo;
2677
struct crocus_sampler_view *isv = calloc(1, sizeof(struct crocus_sampler_view));
2678
2679
if (!isv)
2680
return NULL;
2681
2682
/* initialize base object */
2683
isv->base = *tmpl;
2684
isv->base.context = ctx;
2685
isv->base.texture = NULL;
2686
pipe_reference_init(&isv->base.reference, 1);
2687
pipe_resource_reference(&isv->base.texture, tex);
2688
2689
if (util_format_is_depth_or_stencil(tmpl->format)) {
2690
struct crocus_resource *zres, *sres;
2691
const struct util_format_description *desc =
2692
util_format_description(tmpl->format);
2693
2694
crocus_get_depth_stencil_resources(devinfo, tex, &zres, &sres);
2695
2696
tex = util_format_has_depth(desc) ? &zres->base.b : &sres->base.b;
2697
2698
if (tex->format == PIPE_FORMAT_S8_UINT)
2699
if (devinfo->ver == 7 && sres->shadow)
2700
tex = &sres->shadow->base.b;
2701
}
2702
2703
isv->res = (struct crocus_resource *) tex;
2704
2705
isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT;
2706
2707
if (isv->base.target == PIPE_TEXTURE_CUBE ||
2708
isv->base.target == PIPE_TEXTURE_CUBE_ARRAY)
2709
usage |= ISL_SURF_USAGE_CUBE_BIT;
2710
2711
const struct crocus_format_info fmt =
2712
crocus_format_for_usage(devinfo, tmpl->format, usage);
2713
2714
enum pipe_swizzle vswz[4] = { tmpl->swizzle_r, tmpl->swizzle_g, tmpl->swizzle_b, tmpl->swizzle_a };
2715
crocus_combine_swizzle(isv->swizzle, fmt.swizzles, vswz);
2716
2717
/* hardcode stencil swizzles - hw returns 0G01, we want GGGG */
2718
if (devinfo->ver < 6 &&
2719
(tmpl->format == PIPE_FORMAT_X32_S8X24_UINT ||
2720
tmpl->format == PIPE_FORMAT_X24S8_UINT)) {
2721
isv->swizzle[0] = tmpl->swizzle_g;
2722
isv->swizzle[1] = tmpl->swizzle_g;
2723
isv->swizzle[2] = tmpl->swizzle_g;
2724
isv->swizzle[3] = tmpl->swizzle_g;
2725
}
2726
2727
isv->clear_color = isv->res->aux.clear_color;
2728
2729
isv->view = (struct isl_view) {
2730
.format = fmt.fmt,
2731
#if GFX_VERx10 >= 75
2732
.swizzle = (struct isl_swizzle) {
2733
.r = pipe_to_isl_swizzle(isv->swizzle[0], false),
2734
.g = pipe_to_isl_swizzle(isv->swizzle[1], false),
2735
.b = pipe_to_isl_swizzle(isv->swizzle[2], false),
2736
.a = pipe_to_isl_swizzle(isv->swizzle[3], false),
2737
},
2738
#else
2739
/* swizzling handled in shader code */
2740
.swizzle = ISL_SWIZZLE_IDENTITY,
2741
#endif
2742
.usage = usage,
2743
};
2744
2745
/* Fill out SURFACE_STATE for this view. */
2746
if (tmpl->target != PIPE_BUFFER) {
2747
isv->view.base_level = tmpl->u.tex.first_level;
2748
isv->view.levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1;
2749
// XXX: do I need to port f9fd0cf4790cb2a530e75d1a2206dbb9d8af7cb2?
2750
isv->view.base_array_layer = tmpl->u.tex.first_layer;
2751
isv->view.array_len =
2752
tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1;
2753
}
2754
#if GFX_VER >= 6
2755
/* just create a second view struct for texture gather just in case */
2756
isv->gather_view = isv->view;
2757
2758
#if GFX_VER == 7
2759
if (fmt.fmt == ISL_FORMAT_R32G32_FLOAT ||
2760
fmt.fmt == ISL_FORMAT_R32G32_SINT ||
2761
fmt.fmt == ISL_FORMAT_R32G32_UINT) {
2762
isv->gather_view.format = ISL_FORMAT_R32G32_FLOAT_LD;
2763
#if GFX_VERx10 >= 75
2764
isv->gather_view.swizzle = (struct isl_swizzle) {
2765
.r = pipe_to_isl_swizzle(isv->swizzle[0], GFX_VERx10 == 75),
2766
.g = pipe_to_isl_swizzle(isv->swizzle[1], GFX_VERx10 == 75),
2767
.b = pipe_to_isl_swizzle(isv->swizzle[2], GFX_VERx10 == 75),
2768
.a = pipe_to_isl_swizzle(isv->swizzle[3], GFX_VERx10 == 75),
2769
};
2770
#endif
2771
}
2772
#endif
2773
#if GFX_VER == 6
2774
/* Sandybridge's gather4 message is broken for integer formats.
2775
* To work around this, we pretend the surface is UNORM for
2776
* 8 or 16-bit formats, and emit shader instructions to recover
2777
* the real INT/UINT value. For 32-bit formats, we pretend
2778
* the surface is FLOAT, and simply reinterpret the resulting
2779
* bits.
2780
*/
2781
switch (fmt.fmt) {
2782
case ISL_FORMAT_R8_SINT:
2783
case ISL_FORMAT_R8_UINT:
2784
isv->gather_view.format = ISL_FORMAT_R8_UNORM;
2785
break;
2786
2787
case ISL_FORMAT_R16_SINT:
2788
case ISL_FORMAT_R16_UINT:
2789
isv->gather_view.format = ISL_FORMAT_R16_UNORM;
2790
break;
2791
2792
case ISL_FORMAT_R32_SINT:
2793
case ISL_FORMAT_R32_UINT:
2794
isv->gather_view.format = ISL_FORMAT_R32_FLOAT;
2795
break;
2796
2797
default:
2798
break;
2799
}
2800
#endif
2801
#endif
2802
/* Fill out SURFACE_STATE for this view. */
2803
if (tmpl->target != PIPE_BUFFER) {
2804
if (crocus_resource_unfinished_aux_import(isv->res))
2805
crocus_resource_finish_aux_import(&screen->base, isv->res);
2806
2807
}
2808
2809
return &isv->base;
2810
}
2811
2812
static void
2813
crocus_sampler_view_destroy(struct pipe_context *ctx,
2814
struct pipe_sampler_view *state)
2815
{
2816
struct crocus_sampler_view *isv = (void *) state;
2817
pipe_resource_reference(&state->texture, NULL);
2818
free(isv);
2819
}
2820
2821
/**
2822
* The pipe->create_surface() driver hook.
2823
*
2824
* In Gallium nomenclature, "surfaces" are a view of a resource that
2825
* can be bound as a render target or depth/stencil buffer.
2826
*/
2827
static struct pipe_surface *
2828
crocus_create_surface(struct pipe_context *ctx,
2829
struct pipe_resource *tex,
2830
const struct pipe_surface *tmpl)
2831
{
2832
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
2833
const struct intel_device_info *devinfo = &screen->devinfo;
2834
2835
isl_surf_usage_flags_t usage = 0;
2836
if (tmpl->writable)
2837
usage = ISL_SURF_USAGE_STORAGE_BIT;
2838
else if (util_format_is_depth_or_stencil(tmpl->format))
2839
usage = ISL_SURF_USAGE_DEPTH_BIT;
2840
else
2841
usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
2842
2843
const struct crocus_format_info fmt =
2844
crocus_format_for_usage(devinfo, tmpl->format, usage);
2845
2846
if ((usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&
2847
!isl_format_supports_rendering(devinfo, fmt.fmt)) {
2848
/* Framebuffer validation will reject this invalid case, but it
2849
* hasn't had the opportunity yet. In the meantime, we need to
2850
* avoid hitting ISL asserts about unsupported formats below.
2851
*/
2852
return NULL;
2853
}
2854
2855
struct crocus_surface *surf = calloc(1, sizeof(struct crocus_surface));
2856
struct pipe_surface *psurf = &surf->base;
2857
struct crocus_resource *res = (struct crocus_resource *) tex;
2858
2859
if (!surf)
2860
return NULL;
2861
2862
pipe_reference_init(&psurf->reference, 1);
2863
pipe_resource_reference(&psurf->texture, tex);
2864
psurf->context = ctx;
2865
psurf->format = tmpl->format;
2866
psurf->width = tex->width0;
2867
psurf->height = tex->height0;
2868
psurf->texture = tex;
2869
psurf->u.tex.first_layer = tmpl->u.tex.first_layer;
2870
psurf->u.tex.last_layer = tmpl->u.tex.last_layer;
2871
psurf->u.tex.level = tmpl->u.tex.level;
2872
2873
uint32_t array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1;
2874
2875
struct isl_view *view = &surf->view;
2876
*view = (struct isl_view) {
2877
.format = fmt.fmt,
2878
.base_level = tmpl->u.tex.level,
2879
.levels = 1,
2880
.base_array_layer = tmpl->u.tex.first_layer,
2881
.array_len = array_len,
2882
.swizzle = ISL_SWIZZLE_IDENTITY,
2883
.usage = usage,
2884
};
2885
2886
#if GFX_VER >= 6
2887
struct isl_view *read_view = &surf->read_view;
2888
*read_view = (struct isl_view) {
2889
.format = fmt.fmt,
2890
.base_level = tmpl->u.tex.level,
2891
.levels = 1,
2892
.base_array_layer = tmpl->u.tex.first_layer,
2893
.array_len = array_len,
2894
.swizzle = ISL_SWIZZLE_IDENTITY,
2895
.usage = ISL_SURF_USAGE_TEXTURE_BIT,
2896
};
2897
#endif
2898
2899
surf->clear_color = res->aux.clear_color;
2900
2901
/* Bail early for depth/stencil - we don't want SURFACE_STATE for them. */
2902
if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT |
2903
ISL_SURF_USAGE_STENCIL_BIT))
2904
return psurf;
2905
2906
if (!isl_format_is_compressed(res->surf.format)) {
2907
if (crocus_resource_unfinished_aux_import(res))
2908
crocus_resource_finish_aux_import(&screen->base, res);
2909
2910
memcpy(&surf->surf, &res->surf, sizeof(surf->surf));
2911
uint32_t temp_offset, temp_x, temp_y;
2912
2913
isl_surf_get_image_offset_B_tile_sa(&res->surf, tmpl->u.tex.level,
2914
res->base.b.target == PIPE_TEXTURE_3D ? 0 : tmpl->u.tex.first_layer,
2915
res->base.b.target == PIPE_TEXTURE_3D ? tmpl->u.tex.first_layer : 0,
2916
&temp_offset, &temp_x, &temp_y);
2917
if (!devinfo->has_surface_tile_offset &&
2918
(temp_x || temp_y)) {
2919
/* Original gfx4 hardware couldn't draw to a non-tile-aligned
2920
* destination.
2921
*/
2922
/* move to temp */
2923
struct pipe_resource wa_templ = (struct pipe_resource) {
2924
.width0 = u_minify(res->base.b.width0, tmpl->u.tex.level),
2925
.height0 = u_minify(res->base.b.height0, tmpl->u.tex.level),
2926
.depth0 = 1,
2927
.array_size = 1,
2928
.format = res->base.b.format,
2929
.target = PIPE_TEXTURE_2D,
2930
.bind = (usage & ISL_SURF_USAGE_DEPTH_BIT ? PIPE_BIND_DEPTH_STENCIL : PIPE_BIND_RENDER_TARGET) | PIPE_BIND_SAMPLER_VIEW,
2931
};
2932
surf->align_res = screen->base.resource_create(&screen->base, &wa_templ);
2933
view->base_level = 0;
2934
view->base_array_layer = 0;
2935
view->array_len = 1;
2936
struct crocus_resource *align_res = (struct crocus_resource *)surf->align_res;
2937
memcpy(&surf->surf, &align_res->surf, sizeof(surf->surf));
2938
}
2939
return psurf;
2940
}
2941
2942
/* The resource has a compressed format, which is not renderable, but we
2943
* have a renderable view format. We must be attempting to upload blocks
2944
* of compressed data via an uncompressed view.
2945
*
2946
* In this case, we can assume there are no auxiliary buffers, a single
2947
* miplevel, and that the resource is single-sampled. Gallium may try
2948
* and create an uncompressed view with multiple layers, however.
2949
*/
2950
assert(!isl_format_is_compressed(fmt.fmt));
2951
assert(res->surf.samples == 1);
2952
assert(view->levels == 1);
2953
2954
/* TODO: compressed pbo uploads aren't working here */
2955
return NULL;
2956
2957
uint32_t offset_B = 0, tile_x_sa = 0, tile_y_sa = 0;
2958
2959
if (view->base_level > 0) {
2960
/* We can't rely on the hardware's miplevel selection with such
2961
* a substantial lie about the format, so we select a single image
2962
* using the Tile X/Y Offset fields. In this case, we can't handle
2963
* multiple array slices.
2964
*
2965
* On Broadwell, HALIGN and VALIGN are specified in pixels and are
2966
* hard-coded to align to exactly the block size of the compressed
2967
* texture. This means that, when reinterpreted as a non-compressed
2968
* texture, the tile offsets may be anything and we can't rely on
2969
* X/Y Offset.
2970
*
2971
* Return NULL to force the state tracker to take fallback paths.
2972
*/
2973
// TODO: check if the gen7 check is right, originally gen8
2974
if (view->array_len > 1 || GFX_VER == 7)
2975
return NULL;
2976
2977
const bool is_3d = res->surf.dim == ISL_SURF_DIM_3D;
2978
isl_surf_get_image_surf(&screen->isl_dev, &res->surf,
2979
view->base_level,
2980
is_3d ? 0 : view->base_array_layer,
2981
is_3d ? view->base_array_layer : 0,
2982
&surf->surf,
2983
&offset_B, &tile_x_sa, &tile_y_sa);
2984
2985
/* We use address and tile offsets to access a single level/layer
2986
* as a subimage, so reset level/layer so it doesn't offset again.
2987
*/
2988
view->base_array_layer = 0;
2989
view->base_level = 0;
2990
} else {
2991
/* Level 0 doesn't require tile offsets, and the hardware can find
2992
* array slices using QPitch even with the format override, so we
2993
* can allow layers in this case. Copy the original ISL surface.
2994
*/
2995
memcpy(&surf->surf, &res->surf, sizeof(surf->surf));
2996
}
2997
2998
/* Scale down the image dimensions by the block size. */
2999
const struct isl_format_layout *fmtl =
3000
isl_format_get_layout(res->surf.format);
3001
surf->surf.format = fmt.fmt;
3002
surf->surf.logical_level0_px = isl_surf_get_logical_level0_el(&surf->surf);
3003
surf->surf.phys_level0_sa = isl_surf_get_phys_level0_el(&surf->surf);
3004
tile_x_sa /= fmtl->bw;
3005
tile_y_sa /= fmtl->bh;
3006
3007
psurf->width = surf->surf.logical_level0_px.width;
3008
psurf->height = surf->surf.logical_level0_px.height;
3009
3010
return psurf;
3011
}
3012
3013
#if GFX_VER >= 7
3014
static void
3015
fill_default_image_param(struct brw_image_param *param)
3016
{
3017
memset(param, 0, sizeof(*param));
3018
/* Set the swizzling shifts to all-ones to effectively disable swizzling --
3019
* See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
3020
* detailed explanation of these parameters.
3021
*/
3022
param->swizzling[0] = 0xff;
3023
param->swizzling[1] = 0xff;
3024
}
3025
3026
static void
3027
fill_buffer_image_param(struct brw_image_param *param,
3028
enum pipe_format pfmt,
3029
unsigned size)
3030
{
3031
const unsigned cpp = util_format_get_blocksize(pfmt);
3032
3033
fill_default_image_param(param);
3034
param->size[0] = size / cpp;
3035
param->stride[0] = cpp;
3036
}
3037
3038
#endif
3039
3040
/**
3041
* The pipe->set_shader_images() driver hook.
3042
*/
3043
static void
3044
crocus_set_shader_images(struct pipe_context *ctx,
3045
enum pipe_shader_type p_stage,
3046
unsigned start_slot, unsigned count,
3047
unsigned unbind_num_trailing_slots,
3048
const struct pipe_image_view *p_images)
3049
{
3050
#if GFX_VER >= 7
3051
struct crocus_context *ice = (struct crocus_context *) ctx;
3052
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
3053
const struct intel_device_info *devinfo = &screen->devinfo;
3054
gl_shader_stage stage = stage_from_pipe(p_stage);
3055
struct crocus_shader_state *shs = &ice->state.shaders[stage];
3056
struct crocus_genx_state *genx = ice->state.genx;
3057
struct brw_image_param *image_params = genx->shaders[stage].image_param;
3058
3059
shs->bound_image_views &= ~u_bit_consecutive(start_slot, count);
3060
3061
for (unsigned i = 0; i < count; i++) {
3062
struct crocus_image_view *iv = &shs->image[start_slot + i];
3063
3064
if (p_images && p_images[i].resource) {
3065
const struct pipe_image_view *img = &p_images[i];
3066
struct crocus_resource *res = (void *) img->resource;
3067
3068
util_copy_image_view(&iv->base, img);
3069
3070
shs->bound_image_views |= 1 << (start_slot + i);
3071
3072
res->bind_history |= PIPE_BIND_SHADER_IMAGE;
3073
res->bind_stages |= 1 << stage;
3074
3075
isl_surf_usage_flags_t usage = ISL_SURF_USAGE_STORAGE_BIT;
3076
struct crocus_format_info fmt =
3077
crocus_format_for_usage(devinfo, img->format, usage);
3078
3079
struct isl_swizzle swiz = pipe_to_isl_swizzles(fmt.swizzles);
3080
if (img->shader_access & PIPE_IMAGE_ACCESS_READ) {
3081
/* On Gen8, try to use typed surfaces reads (which support a
3082
* limited number of formats), and if not possible, fall back
3083
* to untyped reads.
3084
*/
3085
if (!isl_has_matching_typed_storage_image_format(devinfo, fmt.fmt))
3086
fmt.fmt = ISL_FORMAT_RAW;
3087
else
3088
fmt.fmt = isl_lower_storage_image_format(devinfo, fmt.fmt);
3089
}
3090
3091
if (res->base.b.target != PIPE_BUFFER) {
3092
struct isl_view view = {
3093
.format = fmt.fmt,
3094
.base_level = img->u.tex.level,
3095
.levels = 1,
3096
.base_array_layer = img->u.tex.first_layer,
3097
.array_len = img->u.tex.last_layer - img->u.tex.first_layer + 1,
3098
.swizzle = swiz,
3099
.usage = usage,
3100
};
3101
3102
iv->view = view;
3103
3104
isl_surf_fill_image_param(&screen->isl_dev,
3105
&image_params[start_slot + i],
3106
&res->surf, &view);
3107
} else {
3108
struct isl_view view = {
3109
.format = fmt.fmt,
3110
.swizzle = swiz,
3111
.usage = usage,
3112
};
3113
iv->view = view;
3114
3115
util_range_add(&res->base.b, &res->valid_buffer_range, img->u.buf.offset,
3116
img->u.buf.offset + img->u.buf.size);
3117
fill_buffer_image_param(&image_params[start_slot + i],
3118
img->format, img->u.buf.size);
3119
}
3120
} else {
3121
pipe_resource_reference(&iv->base.resource, NULL);
3122
fill_default_image_param(&image_params[start_slot + i]);
3123
}
3124
}
3125
3126
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage;
3127
ice->state.dirty |=
3128
stage == MESA_SHADER_COMPUTE ? CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES
3129
: CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
3130
3131
/* Broadwell also needs brw_image_params re-uploaded */
3132
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage;
3133
shs->sysvals_need_upload = true;
3134
#endif
3135
}
3136
3137
3138
/**
3139
* The pipe->set_sampler_views() driver hook.
3140
*/
3141
static void
3142
crocus_set_sampler_views(struct pipe_context *ctx,
3143
enum pipe_shader_type p_stage,
3144
unsigned start, unsigned count,
3145
unsigned unbind_num_trailing_slots,
3146
struct pipe_sampler_view **views)
3147
{
3148
struct crocus_context *ice = (struct crocus_context *) ctx;
3149
gl_shader_stage stage = stage_from_pipe(p_stage);
3150
struct crocus_shader_state *shs = &ice->state.shaders[stage];
3151
3152
shs->bound_sampler_views &= ~u_bit_consecutive(start, count);
3153
3154
for (unsigned i = 0; i < count; i++) {
3155
struct pipe_sampler_view *pview = views ? views[i] : NULL;
3156
pipe_sampler_view_reference((struct pipe_sampler_view **)
3157
&shs->textures[start + i], pview);
3158
struct crocus_sampler_view *view = (void *) pview;
3159
if (view) {
3160
view->res->bind_history |= PIPE_BIND_SAMPLER_VIEW;
3161
view->res->bind_stages |= 1 << stage;
3162
3163
shs->bound_sampler_views |= 1 << (start + i);
3164
}
3165
}
3166
#if GFX_VER == 6
3167
/* first level parameters to crocus_upload_sampler_state is gfx6 only */
3168
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3169
#endif
3170
ice->state.stage_dirty |= (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage);
3171
ice->state.dirty |=
3172
stage == MESA_SHADER_COMPUTE ? CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES
3173
: CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
3174
ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_TEXTURES];
3175
}
3176
3177
/**
3178
* The pipe->set_tess_state() driver hook.
3179
*/
3180
static void
3181
crocus_set_tess_state(struct pipe_context *ctx,
3182
const float default_outer_level[4],
3183
const float default_inner_level[2])
3184
{
3185
struct crocus_context *ice = (struct crocus_context *) ctx;
3186
struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
3187
3188
memcpy(&ice->state.default_outer_level[0], &default_outer_level[0], 4 * sizeof(float));
3189
memcpy(&ice->state.default_inner_level[0], &default_inner_level[0], 2 * sizeof(float));
3190
3191
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;
3192
shs->sysvals_need_upload = true;
3193
}
3194
3195
static void
3196
crocus_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)
3197
{
3198
struct crocus_surface *surf = (void *) p_surf;
3199
pipe_resource_reference(&p_surf->texture, NULL);
3200
3201
pipe_resource_reference(&surf->align_res, NULL);
3202
free(surf);
3203
}
3204
3205
static void
3206
crocus_set_clip_state(struct pipe_context *ctx,
3207
const struct pipe_clip_state *state)
3208
{
3209
struct crocus_context *ice = (struct crocus_context *) ctx;
3210
struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
3211
struct crocus_shader_state *gshs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
3212
struct crocus_shader_state *tshs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
3213
3214
memcpy(&ice->state.clip_planes, state, sizeof(*state));
3215
3216
#if GFX_VER <= 5
3217
ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE;
3218
#endif
3219
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS | CROCUS_STAGE_DIRTY_CONSTANTS_GS |
3220
CROCUS_STAGE_DIRTY_CONSTANTS_TES;
3221
shs->sysvals_need_upload = true;
3222
gshs->sysvals_need_upload = true;
3223
tshs->sysvals_need_upload = true;
3224
}
3225
3226
/**
3227
* The pipe->set_polygon_stipple() driver hook.
3228
*/
3229
static void
3230
crocus_set_polygon_stipple(struct pipe_context *ctx,
3231
const struct pipe_poly_stipple *state)
3232
{
3233
struct crocus_context *ice = (struct crocus_context *) ctx;
3234
memcpy(&ice->state.poly_stipple, state, sizeof(*state));
3235
ice->state.dirty |= CROCUS_DIRTY_POLYGON_STIPPLE;
3236
}
3237
3238
/**
3239
* The pipe->set_sample_mask() driver hook.
3240
*/
3241
static void
3242
crocus_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
3243
{
3244
struct crocus_context *ice = (struct crocus_context *) ctx;
3245
3246
/* We only support 16x MSAA, so we have 16 bits of sample maks.
3247
* st/mesa may pass us 0xffffffff though, meaning "enable all samples".
3248
*/
3249
ice->state.sample_mask = sample_mask & 0xff;
3250
ice->state.dirty |= CROCUS_DIRTY_GEN6_SAMPLE_MASK;
3251
}
3252
3253
static void
3254
crocus_fill_scissor_rect(struct crocus_context *ice,
3255
int idx,
3256
struct pipe_scissor_state *ss)
3257
{
3258
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
3259
struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso;
3260
const struct pipe_viewport_state *vp = &ice->state.viewports[idx];
3261
struct pipe_scissor_state scissor = (struct pipe_scissor_state) {
3262
.minx = MAX2(-fabsf(vp->scale[0]) + vp->translate[0], 0),
3263
.maxx = MIN2( fabsf(vp->scale[0]) + vp->translate[0], cso_fb->width) - 1,
3264
.miny = MAX2(-fabsf(vp->scale[1]) + vp->translate[1], 0),
3265
.maxy = MIN2( fabsf(vp->scale[1]) + vp->translate[1], cso_fb->height) - 1,
3266
};
3267
if (cso_state->scissor) {
3268
struct pipe_scissor_state *s = &ice->state.scissors[idx];
3269
scissor.minx = MAX2(scissor.minx, s->minx);
3270
scissor.miny = MAX2(scissor.miny, s->miny);
3271
scissor.maxx = MIN2(scissor.maxx, s->maxx);
3272
scissor.maxy = MIN2(scissor.maxy, s->maxy);
3273
}
3274
*ss = scissor;
3275
}
3276
3277
/**
3278
* The pipe->set_scissor_states() driver hook.
3279
*
3280
* This corresponds to our SCISSOR_RECT state structures. It's an
3281
* exact match, so we just store them, and memcpy them out later.
3282
*/
3283
static void
3284
crocus_set_scissor_states(struct pipe_context *ctx,
3285
unsigned start_slot,
3286
unsigned num_scissors,
3287
const struct pipe_scissor_state *rects)
3288
{
3289
struct crocus_context *ice = (struct crocus_context *) ctx;
3290
3291
for (unsigned i = 0; i < num_scissors; i++) {
3292
if (rects[i].minx == rects[i].maxx || rects[i].miny == rects[i].maxy) {
3293
/* If the scissor was out of bounds and got clamped to 0 width/height
3294
* at the bounds, the subtraction of 1 from maximums could produce a
3295
* negative number and thus not clip anything. Instead, just provide
3296
* a min > max scissor inside the bounds, which produces the expected
3297
* no rendering.
3298
*/
3299
ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) {
3300
.minx = 1, .maxx = 0, .miny = 1, .maxy = 0,
3301
};
3302
} else {
3303
ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) {
3304
.minx = rects[i].minx, .miny = rects[i].miny,
3305
.maxx = rects[i].maxx - 1, .maxy = rects[i].maxy - 1,
3306
};
3307
}
3308
}
3309
3310
#if GFX_VER < 6
3311
ice->state.dirty |= CROCUS_DIRTY_RASTER; /* SF state */
3312
#else
3313
ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
3314
#endif
3315
ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;
3316
3317
}
3318
3319
/**
3320
* The pipe->set_stencil_ref() driver hook.
3321
*
3322
* This is added to 3DSTATE_WM_DEPTH_STENCIL dynamically at draw time.
3323
*/
3324
static void
3325
crocus_set_stencil_ref(struct pipe_context *ctx,
3326
const struct pipe_stencil_ref ref)
3327
{
3328
struct crocus_context *ice = (struct crocus_context *) ctx;
3329
ice->state.stencil_ref = ref;
3330
ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
3331
}
3332
3333
#if GFX_VER == 8
3334
static float
3335
viewport_extent(const struct pipe_viewport_state *state, int axis, float sign)
3336
{
3337
return copysignf(state->scale[axis], sign) + state->translate[axis];
3338
}
3339
#endif
3340
3341
/**
3342
* The pipe->set_viewport_states() driver hook.
3343
*
3344
* This corresponds to our SF_CLIP_VIEWPORT states. We can't calculate
3345
* the guardband yet, as we need the framebuffer dimensions, but we can
3346
* at least fill out the rest.
3347
*/
3348
static void
3349
crocus_set_viewport_states(struct pipe_context *ctx,
3350
unsigned start_slot,
3351
unsigned count,
3352
const struct pipe_viewport_state *states)
3353
{
3354
struct crocus_context *ice = (struct crocus_context *) ctx;
3355
3356
memcpy(&ice->state.viewports[start_slot], states, sizeof(*states) * count);
3357
3358
ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;
3359
ice->state.dirty |= CROCUS_DIRTY_RASTER;
3360
#if GFX_VER >= 6
3361
ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
3362
#endif
3363
3364
if (ice->state.cso_rast && (!ice->state.cso_rast->cso.depth_clip_near ||
3365
!ice->state.cso_rast->cso.depth_clip_far))
3366
ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;
3367
}
3368
3369
/**
3370
* The pipe->set_framebuffer_state() driver hook.
3371
*
3372
* Sets the current draw FBO, including color render targets, depth,
3373
* and stencil buffers.
3374
*/
3375
static void
3376
crocus_set_framebuffer_state(struct pipe_context *ctx,
3377
const struct pipe_framebuffer_state *state)
3378
{
3379
struct crocus_context *ice = (struct crocus_context *) ctx;
3380
struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
3381
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
3382
const struct intel_device_info *devinfo = &screen->devinfo;
3383
#if 0
3384
struct isl_device *isl_dev = &screen->isl_dev;
3385
struct crocus_resource *zres;
3386
struct crocus_resource *stencil_res;
3387
#endif
3388
3389
unsigned samples = util_framebuffer_get_num_samples(state);
3390
unsigned layers = util_framebuffer_get_num_layers(state);
3391
3392
#if GFX_VER >= 6
3393
if (cso->samples != samples) {
3394
ice->state.dirty |= CROCUS_DIRTY_GEN6_MULTISAMPLE;
3395
ice->state.dirty |= CROCUS_DIRTY_GEN6_SAMPLE_MASK;
3396
ice->state.dirty |= CROCUS_DIRTY_RASTER;
3397
#if GFX_VERx10 == 75
3398
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS;
3399
#endif
3400
}
3401
#endif
3402
3403
#if GFX_VER >= 6 && GFX_VER < 8
3404
ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
3405
#endif
3406
3407
if ((cso->layers == 0) != (layers == 0)) {
3408
ice->state.dirty |= CROCUS_DIRTY_CLIP;
3409
}
3410
3411
if (cso->width != state->width || cso->height != state->height) {
3412
ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;
3413
ice->state.dirty |= CROCUS_DIRTY_RASTER;
3414
ice->state.dirty |= CROCUS_DIRTY_DRAWING_RECTANGLE;
3415
#if GFX_VER >= 6
3416
ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
3417
#endif
3418
}
3419
3420
if (cso->zsbuf || state->zsbuf) {
3421
ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER;
3422
3423
/* update SF's depth buffer format */
3424
if (GFX_VER == 7 && cso->zsbuf)
3425
ice->state.dirty |= CROCUS_DIRTY_RASTER;
3426
}
3427
3428
/* wm thread dispatch enable */
3429
ice->state.dirty |= CROCUS_DIRTY_WM;
3430
util_copy_framebuffer_state(cso, state);
3431
cso->samples = samples;
3432
cso->layers = layers;
3433
3434
if (cso->zsbuf) {
3435
struct crocus_resource *zres;
3436
struct crocus_resource *stencil_res;
3437
enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
3438
crocus_get_depth_stencil_resources(devinfo, cso->zsbuf->texture, &zres,
3439
&stencil_res);
3440
if (zres && crocus_resource_level_has_hiz(zres, cso->zsbuf->u.tex.level)) {
3441
aux_usage = zres->aux.usage;
3442
}
3443
ice->state.hiz_usage = aux_usage;
3444
}
3445
3446
/* Render target change */
3447
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_FS;
3448
3449
ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
3450
3451
ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_FRAMEBUFFER];
3452
}
3453
3454
/**
3455
* The pipe->set_constant_buffer() driver hook.
3456
*
3457
* This uploads any constant data in user buffers, and references
3458
* any UBO resources containing constant data.
3459
*/
3460
static void
3461
crocus_set_constant_buffer(struct pipe_context *ctx,
3462
enum pipe_shader_type p_stage, unsigned index,
3463
bool take_ownership,
3464
const struct pipe_constant_buffer *input)
3465
{
3466
struct crocus_context *ice = (struct crocus_context *) ctx;
3467
gl_shader_stage stage = stage_from_pipe(p_stage);
3468
struct crocus_shader_state *shs = &ice->state.shaders[stage];
3469
struct pipe_constant_buffer *cbuf = &shs->constbufs[index];
3470
3471
util_copy_constant_buffer(&shs->constbufs[index], input, take_ownership);
3472
3473
if (input && input->buffer_size && (input->buffer || input->user_buffer)) {
3474
shs->bound_cbufs |= 1u << index;
3475
3476
if (input->user_buffer) {
3477
void *map = NULL;
3478
pipe_resource_reference(&cbuf->buffer, NULL);
3479
u_upload_alloc(ice->ctx.const_uploader, 0, input->buffer_size, 64,
3480
&cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
3481
3482
if (!cbuf->buffer) {
3483
/* Allocation was unsuccessful - just unbind */
3484
crocus_set_constant_buffer(ctx, p_stage, index, false, NULL);
3485
return;
3486
}
3487
3488
assert(map);
3489
memcpy(map, input->user_buffer, input->buffer_size);
3490
}
3491
cbuf->buffer_size =
3492
MIN2(input->buffer_size,
3493
crocus_resource_bo(cbuf->buffer)->size - cbuf->buffer_offset);
3494
3495
struct crocus_resource *res = (void *) cbuf->buffer;
3496
res->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
3497
res->bind_stages |= 1 << stage;
3498
} else {
3499
shs->bound_cbufs &= ~(1u << index);
3500
}
3501
3502
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage;
3503
}
3504
3505
static void
3506
upload_sysvals(struct crocus_context *ice,
3507
gl_shader_stage stage)
3508
{
3509
UNUSED struct crocus_genx_state *genx = ice->state.genx;
3510
struct crocus_shader_state *shs = &ice->state.shaders[stage];
3511
3512
struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
3513
if (!shader || shader->num_system_values == 0)
3514
return;
3515
3516
assert(shader->num_cbufs > 0);
3517
3518
unsigned sysval_cbuf_index = shader->num_cbufs - 1;
3519
struct pipe_constant_buffer *cbuf = &shs->constbufs[sysval_cbuf_index];
3520
unsigned upload_size = shader->num_system_values * sizeof(uint32_t);
3521
uint32_t *map = NULL;
3522
3523
assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
3524
u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
3525
&cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
3526
3527
for (int i = 0; i < shader->num_system_values; i++) {
3528
uint32_t sysval = shader->system_values[i];
3529
uint32_t value = 0;
3530
3531
if (BRW_PARAM_DOMAIN(sysval) == BRW_PARAM_DOMAIN_IMAGE) {
3532
#if GFX_VER >= 7
3533
unsigned img = BRW_PARAM_IMAGE_IDX(sysval);
3534
unsigned offset = BRW_PARAM_IMAGE_OFFSET(sysval);
3535
struct brw_image_param *param =
3536
&genx->shaders[stage].image_param[img];
3537
3538
assert(offset < sizeof(struct brw_image_param));
3539
value = ((uint32_t *) param)[offset];
3540
#endif
3541
} else if (sysval == BRW_PARAM_BUILTIN_ZERO) {
3542
value = 0;
3543
} else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) {
3544
int plane = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(sysval);
3545
int comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(sysval);
3546
value = fui(ice->state.clip_planes.ucp[plane][comp]);
3547
} else if (sysval == BRW_PARAM_BUILTIN_PATCH_VERTICES_IN) {
3548
if (stage == MESA_SHADER_TESS_CTRL) {
3549
value = ice->state.vertices_per_patch;
3550
} else {
3551
assert(stage == MESA_SHADER_TESS_EVAL);
3552
const struct shader_info *tcs_info =
3553
crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
3554
if (tcs_info)
3555
value = tcs_info->tess.tcs_vertices_out;
3556
else
3557
value = ice->state.vertices_per_patch;
3558
}
3559
} else if (sysval >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X &&
3560
sysval <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) {
3561
unsigned i = sysval - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
3562
value = fui(ice->state.default_outer_level[i]);
3563
} else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) {
3564
value = fui(ice->state.default_inner_level[0]);
3565
} else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) {
3566
value = fui(ice->state.default_inner_level[1]);
3567
} else if (sysval >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X &&
3568
sysval <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) {
3569
unsigned i = sysval - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X;
3570
value = ice->state.last_block[i];
3571
} else {
3572
assert(!"unhandled system value");
3573
}
3574
3575
*map++ = value;
3576
}
3577
3578
cbuf->buffer_size = upload_size;
3579
shs->sysvals_need_upload = false;
3580
}
3581
3582
/**
3583
* The pipe->set_shader_buffers() driver hook.
3584
*
3585
* This binds SSBOs and ABOs. Unfortunately, we need to stream out
3586
* SURFACE_STATE here, as the buffer offset may change each time.
3587
*/
3588
static void
3589
crocus_set_shader_buffers(struct pipe_context *ctx,
3590
enum pipe_shader_type p_stage,
3591
unsigned start_slot, unsigned count,
3592
const struct pipe_shader_buffer *buffers,
3593
unsigned writable_bitmask)
3594
{
3595
struct crocus_context *ice = (struct crocus_context *) ctx;
3596
gl_shader_stage stage = stage_from_pipe(p_stage);
3597
struct crocus_shader_state *shs = &ice->state.shaders[stage];
3598
3599
unsigned modified_bits = u_bit_consecutive(start_slot, count);
3600
3601
shs->bound_ssbos &= ~modified_bits;
3602
shs->writable_ssbos &= ~modified_bits;
3603
shs->writable_ssbos |= writable_bitmask << start_slot;
3604
3605
for (unsigned i = 0; i < count; i++) {
3606
if (buffers && buffers[i].buffer) {
3607
struct crocus_resource *res = (void *) buffers[i].buffer;
3608
struct pipe_shader_buffer *ssbo = &shs->ssbo[start_slot + i];
3609
pipe_resource_reference(&ssbo->buffer, &res->base.b);
3610
ssbo->buffer_offset = buffers[i].buffer_offset;
3611
ssbo->buffer_size =
3612
MIN2(buffers[i].buffer_size, res->bo->size - ssbo->buffer_offset);
3613
3614
shs->bound_ssbos |= 1 << (start_slot + i);
3615
3616
res->bind_history |= PIPE_BIND_SHADER_BUFFER;
3617
res->bind_stages |= 1 << stage;
3618
3619
util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset,
3620
ssbo->buffer_offset + ssbo->buffer_size);
3621
} else {
3622
pipe_resource_reference(&shs->ssbo[start_slot + i].buffer, NULL);
3623
}
3624
}
3625
3626
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage;
3627
}
3628
3629
static void
3630
crocus_delete_state(struct pipe_context *ctx, void *state)
3631
{
3632
free(state);
3633
}
3634
3635
/**
3636
* The pipe->set_vertex_buffers() driver hook.
3637
*
3638
* This translates pipe_vertex_buffer to our 3DSTATE_VERTEX_BUFFERS packet.
3639
*/
3640
static void
3641
crocus_set_vertex_buffers(struct pipe_context *ctx,
3642
unsigned start_slot, unsigned count,
3643
unsigned unbind_num_trailing_slots,
3644
bool take_ownership,
3645
const struct pipe_vertex_buffer *buffers)
3646
{
3647
struct crocus_context *ice = (struct crocus_context *) ctx;
3648
struct crocus_screen *screen = (struct crocus_screen *) ctx->screen;
3649
const unsigned padding =
3650
(GFX_VERx10 < 75 && !screen->devinfo.is_baytrail) * 2;
3651
ice->state.bound_vertex_buffers &=
3652
~u_bit_consecutive64(start_slot, count + unbind_num_trailing_slots);
3653
3654
util_set_vertex_buffers_mask(ice->state.vertex_buffers, &ice->state.bound_vertex_buffers,
3655
buffers, start_slot, count, unbind_num_trailing_slots,
3656
take_ownership);
3657
3658
for (unsigned i = 0; i < count; i++) {
3659
struct pipe_vertex_buffer *state =
3660
&ice->state.vertex_buffers[start_slot + i];
3661
3662
if (!state->is_user_buffer && state->buffer.resource) {
3663
struct crocus_resource *res = (void *)state->buffer.resource;
3664
res->bind_history |= PIPE_BIND_VERTEX_BUFFER;
3665
}
3666
3667
uint32_t end = 0;
3668
if (state->buffer.resource)
3669
end = state->buffer.resource->width0 + padding;
3670
ice->state.vb_end[start_slot + i] = end;
3671
}
3672
ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS;
3673
}
3674
3675
#if GFX_VERx10 < 75
3676
static uint8_t get_wa_flags(enum isl_format format)
3677
{
3678
uint8_t wa_flags = 0;
3679
3680
switch (format) {
3681
case ISL_FORMAT_R10G10B10A2_USCALED:
3682
wa_flags = BRW_ATTRIB_WA_SCALE;
3683
break;
3684
case ISL_FORMAT_R10G10B10A2_SSCALED:
3685
wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_SCALE;
3686
break;
3687
case ISL_FORMAT_R10G10B10A2_UNORM:
3688
wa_flags = BRW_ATTRIB_WA_NORMALIZE;
3689
break;
3690
case ISL_FORMAT_R10G10B10A2_SNORM:
3691
wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_NORMALIZE;
3692
break;
3693
case ISL_FORMAT_R10G10B10A2_SINT:
3694
wa_flags = BRW_ATTRIB_WA_SIGN;
3695
break;
3696
case ISL_FORMAT_B10G10R10A2_USCALED:
3697
wa_flags = BRW_ATTRIB_WA_SCALE | BRW_ATTRIB_WA_BGRA;
3698
break;
3699
case ISL_FORMAT_B10G10R10A2_SSCALED:
3700
wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_SCALE | BRW_ATTRIB_WA_BGRA;
3701
break;
3702
case ISL_FORMAT_B10G10R10A2_UNORM:
3703
wa_flags = BRW_ATTRIB_WA_NORMALIZE | BRW_ATTRIB_WA_BGRA;
3704
break;
3705
case ISL_FORMAT_B10G10R10A2_SNORM:
3706
wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_NORMALIZE | BRW_ATTRIB_WA_BGRA;
3707
break;
3708
case ISL_FORMAT_B10G10R10A2_SINT:
3709
wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_BGRA;
3710
break;
3711
case ISL_FORMAT_B10G10R10A2_UINT:
3712
wa_flags = BRW_ATTRIB_WA_BGRA;
3713
break;
3714
default:
3715
break;
3716
}
3717
return wa_flags;
3718
}
3719
#endif
3720
3721
/**
3722
* Gallium CSO for vertex elements.
3723
*/
3724
struct crocus_vertex_element_state {
3725
uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
3726
#if GFX_VER == 8
3727
uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)];
3728
#endif
3729
uint32_t edgeflag_ve[GENX(VERTEX_ELEMENT_STATE_length)];
3730
#if GFX_VER == 8
3731
uint32_t edgeflag_vfi[GENX(3DSTATE_VF_INSTANCING_length)];
3732
#endif
3733
uint32_t step_rate[16];
3734
uint8_t wa_flags[33];
3735
unsigned count;
3736
};
3737
3738
/**
3739
* The pipe->create_vertex_elements() driver hook.
3740
*
3741
* This translates pipe_vertex_element to our 3DSTATE_VERTEX_ELEMENTS
3742
* and 3DSTATE_VF_INSTANCING commands. The vertex_elements and vf_instancing
3743
* arrays are ready to be emitted at draw time if no EdgeFlag or SGVs are
3744
* needed. In these cases we will need information available at draw time.
3745
* We setup edgeflag_ve and edgeflag_vfi as alternatives last
3746
* 3DSTATE_VERTEX_ELEMENT and 3DSTATE_VF_INSTANCING that can be used at
3747
* draw time if we detect that EdgeFlag is needed by the Vertex Shader.
3748
*/
3749
static void *
3750
crocus_create_vertex_elements(struct pipe_context *ctx,
3751
unsigned count,
3752
const struct pipe_vertex_element *state)
3753
{
3754
struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
3755
const struct intel_device_info *devinfo = &screen->devinfo;
3756
struct crocus_vertex_element_state *cso =
3757
malloc(sizeof(struct crocus_vertex_element_state));
3758
3759
cso->count = count;
3760
3761
crocus_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) {
3762
ve.DWordLength =
3763
1 + GENX(VERTEX_ELEMENT_STATE_length) * MAX2(count, 1) - 2;
3764
}
3765
3766
uint32_t *ve_pack_dest = &cso->vertex_elements[1];
3767
#if GFX_VER == 8
3768
uint32_t *vfi_pack_dest = cso->vf_instancing;
3769
#endif
3770
3771
if (count == 0) {
3772
crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
3773
ve.Valid = true;
3774
ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
3775
ve.Component0Control = VFCOMP_STORE_0;
3776
ve.Component1Control = VFCOMP_STORE_0;
3777
ve.Component2Control = VFCOMP_STORE_0;
3778
ve.Component3Control = VFCOMP_STORE_1_FP;
3779
}
3780
#if GFX_VER == 8
3781
crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
3782
}
3783
#endif
3784
}
3785
3786
for (int i = 0; i < count; i++) {
3787
const struct crocus_format_info fmt =
3788
crocus_format_for_usage(devinfo, state[i].src_format, 0);
3789
unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC,
3790
VFCOMP_STORE_SRC, VFCOMP_STORE_SRC };
3791
enum isl_format actual_fmt = fmt.fmt;
3792
3793
#if GFX_VERx10 < 75
3794
cso->wa_flags[i] = get_wa_flags(fmt.fmt);
3795
3796
if (fmt.fmt == ISL_FORMAT_R10G10B10A2_USCALED ||
3797
fmt.fmt == ISL_FORMAT_R10G10B10A2_SSCALED ||
3798
fmt.fmt == ISL_FORMAT_R10G10B10A2_UNORM ||
3799
fmt.fmt == ISL_FORMAT_R10G10B10A2_SNORM ||
3800
fmt.fmt == ISL_FORMAT_R10G10B10A2_SINT ||
3801
fmt.fmt == ISL_FORMAT_B10G10R10A2_USCALED ||
3802
fmt.fmt == ISL_FORMAT_B10G10R10A2_SSCALED ||
3803
fmt.fmt == ISL_FORMAT_B10G10R10A2_UNORM ||
3804
fmt.fmt == ISL_FORMAT_B10G10R10A2_SNORM ||
3805
fmt.fmt == ISL_FORMAT_B10G10R10A2_UINT ||
3806
fmt.fmt == ISL_FORMAT_B10G10R10A2_SINT)
3807
actual_fmt = ISL_FORMAT_R10G10B10A2_UINT;
3808
if (fmt.fmt == ISL_FORMAT_R8G8B8_SINT)
3809
actual_fmt = ISL_FORMAT_R8G8B8A8_SINT;
3810
if (fmt.fmt == ISL_FORMAT_R8G8B8_UINT)
3811
actual_fmt = ISL_FORMAT_R8G8B8A8_UINT;
3812
if (fmt.fmt == ISL_FORMAT_R16G16B16_SINT)
3813
actual_fmt = ISL_FORMAT_R16G16B16A16_SINT;
3814
if (fmt.fmt == ISL_FORMAT_R16G16B16_UINT)
3815
actual_fmt = ISL_FORMAT_R16G16B16A16_UINT;
3816
#endif
3817
3818
cso->step_rate[state[i].vertex_buffer_index] = state[i].instance_divisor;
3819
3820
switch (isl_format_get_num_channels(fmt.fmt)) {
3821
case 0: comp[0] = VFCOMP_STORE_0; FALLTHROUGH;
3822
case 1: comp[1] = VFCOMP_STORE_0; FALLTHROUGH;
3823
case 2: comp[2] = VFCOMP_STORE_0; FALLTHROUGH;
3824
case 3:
3825
comp[3] = isl_format_has_int_channel(fmt.fmt) ? VFCOMP_STORE_1_INT
3826
: VFCOMP_STORE_1_FP;
3827
break;
3828
}
3829
crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
3830
#if GFX_VER >= 6
3831
ve.EdgeFlagEnable = false;
3832
#endif
3833
ve.VertexBufferIndex = state[i].vertex_buffer_index;
3834
ve.Valid = true;
3835
ve.SourceElementOffset = state[i].src_offset;
3836
ve.SourceElementFormat = actual_fmt;
3837
ve.Component0Control = comp[0];
3838
ve.Component1Control = comp[1];
3839
ve.Component2Control = comp[2];
3840
ve.Component3Control = comp[3];
3841
#if GFX_VER < 5
3842
ve.DestinationElementOffset = i * 4;
3843
#endif
3844
}
3845
3846
#if GFX_VER == 8
3847
crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
3848
vi.VertexElementIndex = i;
3849
vi.InstancingEnable = state[i].instance_divisor > 0;
3850
vi.InstanceDataStepRate = state[i].instance_divisor;
3851
}
3852
#endif
3853
ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
3854
#if GFX_VER == 8
3855
vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length);
3856
#endif
3857
}
3858
3859
/* An alternative version of the last VE and VFI is stored so it
3860
* can be used at draw time in case Vertex Shader uses EdgeFlag
3861
*/
3862
if (count) {
3863
const unsigned edgeflag_index = count - 1;
3864
const struct crocus_format_info fmt =
3865
crocus_format_for_usage(devinfo, state[edgeflag_index].src_format, 0);
3866
crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), cso->edgeflag_ve, ve) {
3867
#if GFX_VER >= 6
3868
ve.EdgeFlagEnable = true;
3869
#endif
3870
ve.VertexBufferIndex = state[edgeflag_index].vertex_buffer_index;
3871
ve.Valid = true;
3872
ve.SourceElementOffset = state[edgeflag_index].src_offset;
3873
ve.SourceElementFormat = fmt.fmt;
3874
ve.Component0Control = VFCOMP_STORE_SRC;
3875
ve.Component1Control = VFCOMP_STORE_0;
3876
ve.Component2Control = VFCOMP_STORE_0;
3877
ve.Component3Control = VFCOMP_STORE_0;
3878
}
3879
#if GFX_VER == 8
3880
crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->edgeflag_vfi, vi) {
3881
/* The vi.VertexElementIndex of the EdgeFlag Vertex Element is filled
3882
* at draw time, as it should change if SGVs are emitted.
3883
*/
3884
vi.InstancingEnable = state[edgeflag_index].instance_divisor > 0;
3885
vi.InstanceDataStepRate = state[edgeflag_index].instance_divisor;
3886
}
3887
#endif
3888
}
3889
3890
return cso;
3891
}
3892
3893
/**
3894
* The pipe->bind_vertex_elements_state() driver hook.
3895
*/
3896
static void
3897
crocus_bind_vertex_elements_state(struct pipe_context *ctx, void *state)
3898
{
3899
struct crocus_context *ice = (struct crocus_context *) ctx;
3900
#if GFX_VER == 8
3901
struct crocus_vertex_element_state *old_cso = ice->state.cso_vertex_elements;
3902
struct crocus_vertex_element_state *new_cso = state;
3903
3904
if (new_cso && cso_changed(count))
3905
ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_SGVS;
3906
#endif
3907
ice->state.cso_vertex_elements = state;
3908
ice->state.dirty |= CROCUS_DIRTY_VERTEX_ELEMENTS | CROCUS_DIRTY_VERTEX_BUFFERS;
3909
ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_VERTEX_ELEMENTS];
3910
}
3911
3912
#if GFX_VER >= 6
3913
struct crocus_streamout_counter {
3914
uint32_t offset_start;
3915
uint32_t offset_end;
3916
3917
uint64_t accum;
3918
};
3919
3920
/**
3921
* Gallium CSO for stream output (transform feedback) targets.
3922
*/
3923
struct crocus_stream_output_target {
3924
struct pipe_stream_output_target base;
3925
3926
/** Stride (bytes-per-vertex) during this transform feedback operation */
3927
uint16_t stride;
3928
3929
/** Has 3DSTATE_SO_BUFFER actually been emitted, zeroing the offsets? */
3930
bool zeroed;
3931
3932
struct crocus_resource *offset_res;
3933
uint32_t offset_offset;
3934
3935
#if GFX_VER == 6
3936
void *prim_map;
3937
struct crocus_streamout_counter prev_count;
3938
struct crocus_streamout_counter count;
3939
#endif
3940
#if GFX_VER == 8
3941
/** Does the next 3DSTATE_SO_BUFFER need to zero the offsets? */
3942
bool zero_offset;
3943
#endif
3944
};
3945
3946
#if GFX_VER >= 7
3947
static uint32_t
3948
crocus_get_so_offset(struct pipe_stream_output_target *so)
3949
{
3950
struct crocus_stream_output_target *tgt = (void *)so;
3951
struct pipe_transfer *transfer;
3952
struct pipe_box box;
3953
uint32_t result;
3954
u_box_1d(tgt->offset_offset, 4, &box);
3955
void *val = so->context->buffer_map(so->context, &tgt->offset_res->base.b,
3956
0, PIPE_MAP_DIRECTLY,
3957
&box, &transfer);
3958
assert(val);
3959
result = *(uint32_t *)val;
3960
so->context->buffer_unmap(so->context, transfer);
3961
3962
return result / tgt->stride;
3963
}
3964
#endif
3965
3966
#if GFX_VER == 6
3967
static void
3968
compute_vertices_written_so_far(struct crocus_context *ice,
3969
struct crocus_stream_output_target *tgt,
3970
struct crocus_streamout_counter *count,
3971
uint64_t *svbi);
3972
3973
static uint32_t
3974
crocus_get_so_offset(struct pipe_stream_output_target *so)
3975
{
3976
struct crocus_stream_output_target *tgt = (void *)so;
3977
struct crocus_context *ice = (void *)so->context;
3978
3979
uint64_t vert_written;
3980
compute_vertices_written_so_far(ice, tgt, &tgt->prev_count, &vert_written);
3981
return vert_written;
3982
}
3983
#endif
3984
3985
/**
3986
* The pipe->create_stream_output_target() driver hook.
3987
*
3988
* "Target" here refers to a destination buffer. We translate this into
3989
* a 3DSTATE_SO_BUFFER packet. We can handle most fields, but don't yet
3990
* know which buffer this represents, or whether we ought to zero the
3991
* write-offsets, or append. Those are handled in the set() hook.
3992
*/
3993
static struct pipe_stream_output_target *
3994
crocus_create_stream_output_target(struct pipe_context *ctx,
3995
struct pipe_resource *p_res,
3996
unsigned buffer_offset,
3997
unsigned buffer_size)
3998
{
3999
struct crocus_resource *res = (void *) p_res;
4000
struct crocus_stream_output_target *cso = calloc(1, sizeof(*cso));
4001
if (!cso)
4002
return NULL;
4003
4004
res->bind_history |= PIPE_BIND_STREAM_OUTPUT;
4005
4006
pipe_reference_init(&cso->base.reference, 1);
4007
pipe_resource_reference(&cso->base.buffer, p_res);
4008
cso->base.buffer_offset = buffer_offset;
4009
cso->base.buffer_size = buffer_size;
4010
cso->base.context = ctx;
4011
4012
util_range_add(&res->base.b, &res->valid_buffer_range, buffer_offset,
4013
buffer_offset + buffer_size);
4014
#if GFX_VER >= 7
4015
struct crocus_context *ice = (struct crocus_context *) ctx;
4016
void *temp;
4017
u_upload_alloc(ice->ctx.stream_uploader, 0, sizeof(uint32_t), 4,
4018
&cso->offset_offset,
4019
(struct pipe_resource **)&cso->offset_res,
4020
&temp);
4021
#endif
4022
4023
return &cso->base;
4024
}
4025
4026
static void
4027
crocus_stream_output_target_destroy(struct pipe_context *ctx,
4028
struct pipe_stream_output_target *state)
4029
{
4030
struct crocus_stream_output_target *cso = (void *) state;
4031
4032
pipe_resource_reference((struct pipe_resource **)&cso->offset_res, NULL);
4033
pipe_resource_reference(&cso->base.buffer, NULL);
4034
4035
free(cso);
4036
}
4037
4038
#define GEN6_SO_NUM_PRIMS_WRITTEN 0x2288
4039
#define GEN7_SO_WRITE_OFFSET(n) (0x5280 + (n) * 4)
4040
4041
#if GFX_VER == 6
4042
static void
4043
aggregate_stream_counter(struct crocus_batch *batch, struct crocus_stream_output_target *tgt,
4044
struct crocus_streamout_counter *counter)
4045
{
4046
uint64_t *prim_counts = tgt->prim_map;
4047
4048
if (crocus_batch_references(batch, tgt->offset_res->bo)) {
4049
struct pipe_fence_handle *out_fence = NULL;
4050
batch->ice->ctx.flush(&batch->ice->ctx, &out_fence, 0);
4051
batch->screen->base.fence_finish(&batch->screen->base, &batch->ice->ctx, out_fence, UINT64_MAX);
4052
batch->screen->base.fence_reference(&batch->screen->base, &out_fence, NULL);
4053
}
4054
4055
for (unsigned i = counter->offset_start / sizeof(uint64_t); i < counter->offset_end / sizeof(uint64_t); i += 2) {
4056
counter->accum += prim_counts[i + 1] - prim_counts[i];
4057
}
4058
tgt->count.offset_start = tgt->count.offset_end = 0;
4059
}
4060
4061
static void
4062
crocus_stream_store_prims_written(struct crocus_batch *batch,
4063
struct crocus_stream_output_target *tgt)
4064
{
4065
if (!tgt->offset_res) {
4066
u_upload_alloc(batch->ice->ctx.stream_uploader, 0, 4096, 4,
4067
&tgt->offset_offset,
4068
(struct pipe_resource **)&tgt->offset_res,
4069
&tgt->prim_map);
4070
tgt->count.offset_start = tgt->count.offset_end = 0;
4071
}
4072
4073
if (tgt->count.offset_end + 16 >= 4096) {
4074
aggregate_stream_counter(batch, tgt, &tgt->prev_count);
4075
aggregate_stream_counter(batch, tgt, &tgt->count);
4076
}
4077
4078
crocus_emit_mi_flush(batch);
4079
crocus_store_register_mem64(batch, GEN6_SO_NUM_PRIMS_WRITTEN,
4080
tgt->offset_res->bo,
4081
tgt->count.offset_end + tgt->offset_offset, false);
4082
tgt->count.offset_end += 8;
4083
}
4084
4085
static void
4086
compute_vertices_written_so_far(struct crocus_context *ice,
4087
struct crocus_stream_output_target *tgt,
4088
struct crocus_streamout_counter *counter,
4089
uint64_t *svbi)
4090
{
4091
//TODO vertices per prim
4092
aggregate_stream_counter(&ice->batches[0], tgt, counter);
4093
4094
*svbi = counter->accum * ice->state.last_xfb_verts_per_prim;
4095
}
4096
#endif
4097
/**
4098
* The pipe->set_stream_output_targets() driver hook.
4099
*
4100
* At this point, we know which targets are bound to a particular index,
4101
* and also whether we want to append or start over. We can finish the
4102
* 3DSTATE_SO_BUFFER packets we started earlier.
4103
*/
4104
static void
4105
crocus_set_stream_output_targets(struct pipe_context *ctx,
4106
unsigned num_targets,
4107
struct pipe_stream_output_target **targets,
4108
const unsigned *offsets)
4109
{
4110
struct crocus_context *ice = (struct crocus_context *) ctx;
4111
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
4112
struct pipe_stream_output_target *old_tgt[4] = { NULL, NULL, NULL, NULL };
4113
const bool active = num_targets > 0;
4114
if (ice->state.streamout_active != active) {
4115
ice->state.streamout_active = active;
4116
#if GFX_VER >= 7
4117
ice->state.dirty |= CROCUS_DIRTY_STREAMOUT;
4118
#else
4119
ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
4120
#endif
4121
4122
/* We only emit 3DSTATE_SO_DECL_LIST when streamout is active, because
4123
* it's a non-pipelined command. If we're switching streamout on, we
4124
* may have missed emitting it earlier, so do so now. (We're already
4125
* taking a stall to update 3DSTATE_SO_BUFFERS anyway...)
4126
*/
4127
if (active) {
4128
#if GFX_VER >= 7
4129
ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST;
4130
#endif
4131
} else {
4132
uint32_t flush = 0;
4133
for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
4134
struct crocus_stream_output_target *tgt =
4135
(void *) ice->state.so_target[i];
4136
if (tgt) {
4137
struct crocus_resource *res = (void *) tgt->base.buffer;
4138
4139
flush |= crocus_flush_bits_for_history(res);
4140
crocus_dirty_for_history(ice, res);
4141
}
4142
}
4143
crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],
4144
"make streamout results visible", flush);
4145
}
4146
}
4147
4148
ice->state.so_targets = num_targets;
4149
for (int i = 0; i < 4; i++) {
4150
pipe_so_target_reference(&old_tgt[i], ice->state.so_target[i]);
4151
pipe_so_target_reference(&ice->state.so_target[i],
4152
i < num_targets ? targets[i] : NULL);
4153
}
4154
4155
#if GFX_VER == 6
4156
bool stored_num_prims = false;
4157
for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
4158
if (num_targets) {
4159
struct crocus_stream_output_target *tgt =
4160
(void *) ice->state.so_target[i];
4161
4162
if (!tgt)
4163
continue;
4164
if (offsets[i] == 0) {
4165
// This means that we're supposed to ignore anything written to
4166
// the buffer before. We can do this by just clearing out the
4167
// count of writes to the prim count buffer.
4168
tgt->count.offset_start = tgt->count.offset_end;
4169
tgt->count.accum = 0;
4170
ice->state.svbi = 0;
4171
} else {
4172
if (tgt->offset_res) {
4173
compute_vertices_written_so_far(ice, tgt, &tgt->count, &ice->state.svbi);
4174
tgt->count.offset_start = tgt->count.offset_end;
4175
}
4176
}
4177
4178
if (!stored_num_prims) {
4179
crocus_stream_store_prims_written(batch, tgt);
4180
stored_num_prims = true;
4181
}
4182
} else {
4183
struct crocus_stream_output_target *tgt =
4184
(void *) old_tgt[i];
4185
if (tgt) {
4186
if (!stored_num_prims) {
4187
crocus_stream_store_prims_written(batch, tgt);
4188
stored_num_prims = true;
4189
}
4190
4191
if (tgt->offset_res) {
4192
tgt->prev_count = tgt->count;
4193
}
4194
}
4195
}
4196
pipe_so_target_reference(&old_tgt[i], NULL);
4197
}
4198
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_GS;
4199
#else
4200
for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
4201
if (num_targets) {
4202
struct crocus_stream_output_target *tgt =
4203
(void *) ice->state.so_target[i];
4204
4205
if (offsets[i] == 0) {
4206
#if GFX_VER == 8
4207
if (tgt)
4208
tgt->zero_offset = true;
4209
#endif
4210
crocus_load_register_imm32(batch, GEN7_SO_WRITE_OFFSET(i), 0);
4211
}
4212
else if (tgt)
4213
crocus_load_register_mem32(batch, GEN7_SO_WRITE_OFFSET(i),
4214
tgt->offset_res->bo,
4215
tgt->offset_offset);
4216
} else {
4217
struct crocus_stream_output_target *tgt =
4218
(void *) old_tgt[i];
4219
if (tgt)
4220
crocus_store_register_mem32(batch, GEN7_SO_WRITE_OFFSET(i),
4221
tgt->offset_res->bo,
4222
tgt->offset_offset, false);
4223
}
4224
pipe_so_target_reference(&old_tgt[i], NULL);
4225
}
4226
#endif
4227
/* No need to update 3DSTATE_SO_BUFFER unless SOL is active. */
4228
if (!active)
4229
return;
4230
#if GFX_VER >= 7
4231
ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS;
4232
#elif GFX_VER == 6
4233
ice->state.dirty |= CROCUS_DIRTY_GEN6_SVBI;
4234
#endif
4235
}
4236
4237
#endif
4238
4239
#if GFX_VER >= 7
4240
/**
4241
* An crocus-vtable helper for encoding the 3DSTATE_SO_DECL_LIST and
4242
* 3DSTATE_STREAMOUT packets.
4243
*
4244
* 3DSTATE_SO_DECL_LIST is a list of shader outputs we want the streamout
4245
* hardware to record. We can create it entirely based on the shader, with
4246
* no dynamic state dependencies.
4247
*
4248
* 3DSTATE_STREAMOUT is an annoying mix of shader-based information and
4249
* state-based settings. We capture the shader-related ones here, and merge
4250
* the rest in at draw time.
4251
*/
4252
static uint32_t *
4253
crocus_create_so_decl_list(const struct pipe_stream_output_info *info,
4254
const struct brw_vue_map *vue_map)
4255
{
4256
struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
4257
int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
4258
int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
4259
int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
4260
int max_decls = 0;
4261
STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
4262
4263
memset(so_decl, 0, sizeof(so_decl));
4264
4265
/* Construct the list of SO_DECLs to be emitted. The formatting of the
4266
* command feels strange -- each dword pair contains a SO_DECL per stream.
4267
*/
4268
for (unsigned i = 0; i < info->num_outputs; i++) {
4269
const struct pipe_stream_output *output = &info->output[i];
4270
const int buffer = output->output_buffer;
4271
const int varying = output->register_index;
4272
const unsigned stream_id = output->stream;
4273
assert(stream_id < MAX_VERTEX_STREAMS);
4274
4275
buffer_mask[stream_id] |= 1 << buffer;
4276
4277
assert(vue_map->varying_to_slot[varying] >= 0);
4278
4279
/* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
4280
* array. Instead, it simply increments DstOffset for the following
4281
* input by the number of components that should be skipped.
4282
*
4283
* Our hardware is unusual in that it requires us to program SO_DECLs
4284
* for fake "hole" components, rather than simply taking the offset
4285
* for each real varying. Each hole can have size 1, 2, 3, or 4; we
4286
* program as many size = 4 holes as we can, then a final hole to
4287
* accommodate the final 1, 2, or 3 remaining.
4288
*/
4289
int skip_components = output->dst_offset - next_offset[buffer];
4290
4291
while (skip_components > 0) {
4292
so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
4293
.HoleFlag = 1,
4294
.OutputBufferSlot = output->output_buffer,
4295
.ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
4296
};
4297
skip_components -= 4;
4298
}
4299
4300
next_offset[buffer] = output->dst_offset + output->num_components;
4301
4302
so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
4303
.OutputBufferSlot = output->output_buffer,
4304
.RegisterIndex = vue_map->varying_to_slot[varying],
4305
.ComponentMask =
4306
((1 << output->num_components) - 1) << output->start_component,
4307
};
4308
4309
if (decls[stream_id] > max_decls)
4310
max_decls = decls[stream_id];
4311
}
4312
4313
unsigned dwords = GENX(3DSTATE_STREAMOUT_length) + (3 + 2 * max_decls);
4314
uint32_t *map = ralloc_size(NULL, sizeof(uint32_t) * dwords);
4315
uint32_t *so_decl_map = map + GENX(3DSTATE_STREAMOUT_length);
4316
4317
crocus_pack_command(GENX(3DSTATE_STREAMOUT), map, sol) {
4318
int urb_entry_read_offset = 0;
4319
int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
4320
urb_entry_read_offset;
4321
4322
/* We always read the whole vertex. This could be reduced at some
4323
* point by reading less and offsetting the register index in the
4324
* SO_DECLs.
4325
*/
4326
sol.Stream0VertexReadOffset = urb_entry_read_offset;
4327
sol.Stream0VertexReadLength = urb_entry_read_length - 1;
4328
sol.Stream1VertexReadOffset = urb_entry_read_offset;
4329
sol.Stream1VertexReadLength = urb_entry_read_length - 1;
4330
sol.Stream2VertexReadOffset = urb_entry_read_offset;
4331
sol.Stream2VertexReadLength = urb_entry_read_length - 1;
4332
sol.Stream3VertexReadOffset = urb_entry_read_offset;
4333
sol.Stream3VertexReadLength = urb_entry_read_length - 1;
4334
4335
// TODO: Double-check that stride == 0 means no buffer. Probably this
4336
// needs to go elsewhere, where the buffer enable stuff is actually
4337
// known.
4338
#if GFX_VER < 8
4339
sol.SOBufferEnable0 = !!info->stride[0];
4340
sol.SOBufferEnable1 = !!info->stride[1];
4341
sol.SOBufferEnable2 = !!info->stride[2];
4342
sol.SOBufferEnable3 = !!info->stride[3];
4343
#else
4344
/* Set buffer pitches; 0 means unbound. */
4345
sol.Buffer0SurfacePitch = 4 * info->stride[0];
4346
sol.Buffer1SurfacePitch = 4 * info->stride[1];
4347
sol.Buffer2SurfacePitch = 4 * info->stride[2];
4348
sol.Buffer3SurfacePitch = 4 * info->stride[3];
4349
#endif
4350
}
4351
4352
crocus_pack_command(GENX(3DSTATE_SO_DECL_LIST), so_decl_map, list) {
4353
list.DWordLength = 3 + 2 * max_decls - 2;
4354
list.StreamtoBufferSelects0 = buffer_mask[0];
4355
list.StreamtoBufferSelects1 = buffer_mask[1];
4356
list.StreamtoBufferSelects2 = buffer_mask[2];
4357
list.StreamtoBufferSelects3 = buffer_mask[3];
4358
list.NumEntries0 = decls[0];
4359
list.NumEntries1 = decls[1];
4360
list.NumEntries2 = decls[2];
4361
list.NumEntries3 = decls[3];
4362
}
4363
4364
for (int i = 0; i < max_decls; i++) {
4365
crocus_pack_state(GENX(SO_DECL_ENTRY), so_decl_map + 3 + i * 2, entry) {
4366
entry.Stream0Decl = so_decl[0][i];
4367
entry.Stream1Decl = so_decl[1][i];
4368
entry.Stream2Decl = so_decl[2][i];
4369
entry.Stream3Decl = so_decl[3][i];
4370
}
4371
}
4372
4373
return map;
4374
}
4375
#endif
4376
4377
#if GFX_VER == 6
4378
static void
4379
crocus_emit_so_svbi(struct crocus_context *ice)
4380
{
4381
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
4382
4383
unsigned max_vertex = 0xffffffff;
4384
for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
4385
struct crocus_stream_output_target *tgt =
4386
(void *) ice->state.so_target[i];
4387
if (tgt)
4388
max_vertex = MIN2(max_vertex, tgt->base.buffer_size / tgt->stride);
4389
}
4390
4391
crocus_emit_cmd(batch, GENX(3DSTATE_GS_SVB_INDEX), svbi) {
4392
svbi.IndexNumber = 0;
4393
svbi.StreamedVertexBufferIndex = (uint32_t)ice->state.svbi; /* fix when resuming, based on target's prim count */
4394
svbi.MaximumIndex = max_vertex;
4395
}
4396
4397
/* initialize the rest of the SVBI's to reasonable values so that we don't
4398
* run out of room writing the regular data.
4399
*/
4400
for (int i = 1; i < 4; i++) {
4401
crocus_emit_cmd(batch, GENX(3DSTATE_GS_SVB_INDEX), svbi) {
4402
svbi.IndexNumber = i;
4403
svbi.StreamedVertexBufferIndex = 0;
4404
svbi.MaximumIndex = 0xffffffff;
4405
}
4406
}
4407
}
4408
4409
#endif
4410
4411
4412
#if GFX_VER >= 6
4413
static bool
4414
crocus_is_drawing_points(const struct crocus_context *ice)
4415
{
4416
const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4417
4418
if (cso_rast->cso.fill_front == PIPE_POLYGON_MODE_POINT ||
4419
cso_rast->cso.fill_back == PIPE_POLYGON_MODE_POINT)
4420
return true;
4421
4422
if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
4423
const struct brw_gs_prog_data *gs_prog_data =
4424
(void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data;
4425
return gs_prog_data->output_topology == _3DPRIM_POINTLIST;
4426
} else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
4427
const struct brw_tes_prog_data *tes_data =
4428
(void *) ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data;
4429
return tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
4430
} else {
4431
return ice->state.prim_mode == PIPE_PRIM_POINTS;
4432
}
4433
}
4434
#endif
4435
4436
#if GFX_VER >= 6
4437
static void
4438
get_attr_override(
4439
struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,
4440
const struct brw_vue_map *vue_map,
4441
int urb_entry_read_offset, int fs_attr,
4442
bool two_side_color, uint32_t *max_source_attr)
4443
{
4444
/* Find the VUE slot for this attribute. */
4445
int slot = vue_map->varying_to_slot[fs_attr];
4446
4447
/* Viewport and Layer are stored in the VUE header. We need to override
4448
* them to zero if earlier stages didn't write them, as GL requires that
4449
* they read back as zero when not explicitly set.
4450
*/
4451
if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
4452
attr->ComponentOverrideX = true;
4453
attr->ComponentOverrideW = true;
4454
attr->ConstantSource = CONST_0000;
4455
4456
if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
4457
attr->ComponentOverrideY = true;
4458
if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
4459
attr->ComponentOverrideZ = true;
4460
4461
return;
4462
}
4463
4464
/* If there was only a back color written but not front, use back
4465
* as the color instead of undefined
4466
*/
4467
if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
4468
slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
4469
if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
4470
slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
4471
4472
if (slot == -1) {
4473
/* This attribute does not exist in the VUE--that means that the vertex
4474
* shader did not write to it. This means that either:
4475
*
4476
* (a) This attribute is a texture coordinate, and it is going to be
4477
* replaced with point coordinates (as a consequence of a call to
4478
* glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
4479
* hardware will ignore whatever attribute override we supply.
4480
*
4481
* (b) This attribute is read by the fragment shader but not written by
4482
* the vertex shader, so its value is undefined. Therefore the
4483
* attribute override we supply doesn't matter.
4484
*
4485
* (c) This attribute is gl_PrimitiveID, and it wasn't written by the
4486
* previous shader stage.
4487
*
4488
* Note that we don't have to worry about the cases where the attribute
4489
* is gl_PointCoord or is undergoing point sprite coordinate
4490
* replacement, because in those cases, this function isn't called.
4491
*
4492
* In case (c), we need to program the attribute overrides so that the
4493
* primitive ID will be stored in this slot. In every other case, the
4494
* attribute override we supply doesn't matter. So just go ahead and
4495
* program primitive ID in every case.
4496
*/
4497
attr->ComponentOverrideW = true;
4498
attr->ComponentOverrideX = true;
4499
attr->ComponentOverrideY = true;
4500
attr->ComponentOverrideZ = true;
4501
attr->ConstantSource = PRIM_ID;
4502
return;
4503
}
4504
4505
/* Compute the location of the attribute relative to urb_entry_read_offset.
4506
* Each increment of urb_entry_read_offset represents a 256-bit value, so
4507
* it counts for two 128-bit VUE slots.
4508
*/
4509
int source_attr = slot - 2 * urb_entry_read_offset;
4510
assert(source_attr >= 0 && source_attr < 32);
4511
4512
/* If we are doing two-sided color, and the VUE slot following this one
4513
* represents a back-facing color, then we need to instruct the SF unit to
4514
* do back-facing swizzling.
4515
*/
4516
bool swizzling = two_side_color &&
4517
((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
4518
vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
4519
(vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
4520
vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
4521
4522
/* Update max_source_attr. If swizzling, the SF will read this slot + 1. */
4523
if (*max_source_attr < source_attr + swizzling)
4524
*max_source_attr = source_attr + swizzling;
4525
4526
attr->SourceAttribute = source_attr;
4527
if (swizzling)
4528
attr->SwizzleSelect = INPUTATTR_FACING;
4529
}
4530
4531
static void
4532
calculate_attr_overrides(
4533
const struct crocus_context *ice,
4534
struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,
4535
uint32_t *point_sprite_enables,
4536
uint32_t *urb_entry_read_length,
4537
uint32_t *urb_entry_read_offset)
4538
{
4539
const struct brw_wm_prog_data *wm_prog_data = (void *)
4540
ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
4541
const struct brw_vue_map *vue_map = ice->shaders.last_vue_map;
4542
const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4543
uint32_t max_source_attr = 0;
4544
const struct shader_info *fs_info =
4545
crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
4546
4547
int first_slot =
4548
brw_compute_first_urb_slot_required(fs_info->inputs_read, vue_map);
4549
4550
/* Each URB offset packs two varying slots */
4551
assert(first_slot % 2 == 0);
4552
*urb_entry_read_offset = first_slot / 2;
4553
*point_sprite_enables = 0;
4554
4555
for (int fs_attr = 0; fs_attr < VARYING_SLOT_MAX; fs_attr++) {
4556
const int input_index = wm_prog_data->urb_setup[fs_attr];
4557
4558
if (input_index < 0)
4559
continue;
4560
4561
bool point_sprite = false;
4562
if (crocus_is_drawing_points(ice)) {
4563
if (fs_attr >= VARYING_SLOT_TEX0 &&
4564
fs_attr <= VARYING_SLOT_TEX7 &&
4565
cso_rast->cso.sprite_coord_enable & (1 << (fs_attr - VARYING_SLOT_TEX0)))
4566
point_sprite = true;
4567
4568
if (fs_attr == VARYING_SLOT_PNTC)
4569
point_sprite = true;
4570
4571
if (point_sprite)
4572
*point_sprite_enables |= 1U << input_index;
4573
}
4574
4575
struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };
4576
if (!point_sprite) {
4577
get_attr_override(&attribute, vue_map, *urb_entry_read_offset, fs_attr,
4578
cso_rast->cso.light_twoside, &max_source_attr);
4579
}
4580
4581
/* The hardware can only do the overrides on 16 overrides at a
4582
* time, and the other up to 16 have to be lined up so that the
4583
* input index = the output index. We'll need to do some
4584
* tweaking to make sure that's the case.
4585
*/
4586
if (input_index < 16)
4587
attr_overrides[input_index] = attribute;
4588
else
4589
assert(attribute.SourceAttribute == input_index);
4590
}
4591
4592
/* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
4593
* 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
4594
*
4595
* "This field should be set to the minimum length required to read the
4596
* maximum source attribute. The maximum source attribute is indicated
4597
* by the maximum value of the enabled Attribute # Source Attribute if
4598
* Attribute Swizzle Enable is set, Number of Output Attributes-1 if
4599
* enable is not set.
4600
* read_length = ceiling((max_source_attr + 1) / 2)
4601
*
4602
* [errata] Corruption/Hang possible if length programmed larger than
4603
* recommended"
4604
*
4605
* Similar text exists for Ivy Bridge.
4606
*/
4607
*urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
4608
}
4609
#endif
4610
4611
#if GFX_VER >= 7
4612
static void
4613
crocus_emit_sbe(struct crocus_batch *batch, const struct crocus_context *ice)
4614
{
4615
const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4616
const struct brw_wm_prog_data *wm_prog_data = (void *)
4617
ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
4618
#if GFX_VER >= 8
4619
struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } };
4620
#else
4621
#define attr_overrides sbe.Attribute
4622
#endif
4623
4624
uint32_t urb_entry_read_length;
4625
uint32_t urb_entry_read_offset;
4626
uint32_t point_sprite_enables;
4627
4628
crocus_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
4629
sbe.AttributeSwizzleEnable = true;
4630
sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
4631
sbe.PointSpriteTextureCoordinateOrigin = cso_rast->cso.sprite_coord_mode;
4632
4633
calculate_attr_overrides(ice,
4634
attr_overrides,
4635
&point_sprite_enables,
4636
&urb_entry_read_length,
4637
&urb_entry_read_offset);
4638
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
4639
sbe.VertexURBEntryReadLength = urb_entry_read_length;
4640
sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
4641
sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables;
4642
#if GFX_VER >= 8
4643
sbe.ForceVertexURBEntryReadLength = true;
4644
sbe.ForceVertexURBEntryReadOffset = true;
4645
#endif
4646
}
4647
#if GFX_VER >= 8
4648
crocus_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) {
4649
for (int i = 0; i < 16; i++)
4650
sbes.Attribute[i] = attr_overrides[i];
4651
}
4652
#endif
4653
}
4654
#endif
4655
4656
/* ------------------------------------------------------------------- */
4657
4658
/**
4659
* Populate VS program key fields based on the current state.
4660
*/
4661
static void
4662
crocus_populate_vs_key(const struct crocus_context *ice,
4663
const struct shader_info *info,
4664
gl_shader_stage last_stage,
4665
struct brw_vs_prog_key *key)
4666
{
4667
const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4668
4669
if (info->clip_distance_array_size == 0 &&
4670
(info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&
4671
last_stage == MESA_SHADER_VERTEX)
4672
key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
4673
4674
#if GFX_VER <= 5
4675
key->copy_edgeflag = (cso_rast->cso.fill_back != PIPE_POLYGON_MODE_FILL ||
4676
cso_rast->cso.fill_front != PIPE_POLYGON_MODE_FILL);
4677
key->point_coord_replace = cso_rast->cso.sprite_coord_enable & 0xff;
4678
#endif
4679
4680
key->clamp_vertex_color = cso_rast->cso.clamp_vertex_color;
4681
4682
#if GFX_VERx10 < 75
4683
uint64_t inputs_read = info->inputs_read;
4684
int ve_idx = 0;
4685
while (inputs_read) {
4686
int i = u_bit_scan64(&inputs_read);
4687
key->gl_attrib_wa_flags[i] = ice->state.cso_vertex_elements->wa_flags[ve_idx];
4688
ve_idx++;
4689
}
4690
#endif
4691
}
4692
4693
/**
4694
* Populate TCS program key fields based on the current state.
4695
*/
4696
static void
4697
crocus_populate_tcs_key(const struct crocus_context *ice,
4698
struct brw_tcs_prog_key *key)
4699
{
4700
}
4701
4702
/**
4703
* Populate TES program key fields based on the current state.
4704
*/
4705
static void
4706
crocus_populate_tes_key(const struct crocus_context *ice,
4707
const struct shader_info *info,
4708
gl_shader_stage last_stage,
4709
struct brw_tes_prog_key *key)
4710
{
4711
const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4712
4713
if (info->clip_distance_array_size == 0 &&
4714
(info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&
4715
last_stage == MESA_SHADER_TESS_EVAL)
4716
key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
4717
}
4718
4719
/**
4720
* Populate GS program key fields based on the current state.
4721
*/
4722
static void
4723
crocus_populate_gs_key(const struct crocus_context *ice,
4724
const struct shader_info *info,
4725
gl_shader_stage last_stage,
4726
struct brw_gs_prog_key *key)
4727
{
4728
const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4729
4730
if (info->clip_distance_array_size == 0 &&
4731
(info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&
4732
last_stage == MESA_SHADER_GEOMETRY)
4733
key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
4734
}
4735
4736
/**
4737
* Populate FS program key fields based on the current state.
4738
*/
4739
static void
4740
crocus_populate_fs_key(const struct crocus_context *ice,
4741
const struct shader_info *info,
4742
struct brw_wm_prog_key *key)
4743
{
4744
struct crocus_screen *screen = (void *) ice->ctx.screen;
4745
const struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
4746
const struct crocus_depth_stencil_alpha_state *zsa = ice->state.cso_zsa;
4747
const struct crocus_rasterizer_state *rast = ice->state.cso_rast;
4748
const struct crocus_blend_state *blend = ice->state.cso_blend;
4749
4750
#if GFX_VER < 6
4751
uint32_t lookup = 0;
4752
4753
if (info->fs.uses_discard || zsa->cso.alpha_enabled)
4754
lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
4755
4756
if (info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
4757
lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
4758
4759
if (fb->zsbuf && zsa->cso.depth_enabled) {
4760
lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
4761
4762
if (zsa->cso.depth_writemask)
4763
lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
4764
4765
}
4766
if (zsa->cso.stencil[0].enabled || zsa->cso.stencil[1].enabled) {
4767
lookup |= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT;
4768
if (zsa->cso.stencil[0].writemask || zsa->cso.stencil[1].writemask)
4769
lookup |= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT;
4770
}
4771
key->iz_lookup = lookup;
4772
key->stats_wm = ice->state.stats_wm;
4773
#endif
4774
4775
uint32_t line_aa = BRW_WM_AA_NEVER;
4776
if (rast->cso.line_smooth) {
4777
int reduced_prim = u_reduced_prim(ice->state.prim_mode);
4778
if (reduced_prim == PIPE_PRIM_LINES)
4779
line_aa = BRW_WM_AA_ALWAYS;
4780
else if (reduced_prim == PIPE_PRIM_TRIANGLES) {
4781
if (rast->cso.fill_front == PIPE_POLYGON_MODE_LINE) {
4782
line_aa = BRW_WM_AA_SOMETIMES;
4783
4784
if (rast->cso.fill_back == PIPE_POLYGON_MODE_LINE ||
4785
rast->cso.cull_face == PIPE_FACE_BACK)
4786
line_aa = BRW_WM_AA_ALWAYS;
4787
} else if (rast->cso.fill_back == PIPE_POLYGON_MODE_LINE) {
4788
line_aa = BRW_WM_AA_SOMETIMES;
4789
4790
if (rast->cso.cull_face == PIPE_FACE_FRONT)
4791
line_aa = BRW_WM_AA_ALWAYS;
4792
}
4793
}
4794
}
4795
key->line_aa = line_aa;
4796
4797
key->nr_color_regions = fb->nr_cbufs;
4798
4799
key->clamp_fragment_color = rast->cso.clamp_fragment_color;
4800
4801
key->alpha_to_coverage = blend->cso.alpha_to_coverage;
4802
4803
key->alpha_test_replicate_alpha = fb->nr_cbufs > 1 && zsa->cso.alpha_enabled;
4804
4805
key->flat_shade = rast->cso.flatshade &&
4806
(info->inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1));
4807
4808
key->persample_interp = rast->cso.force_persample_interp;
4809
key->multisample_fbo = rast->cso.multisample && fb->samples > 1;
4810
4811
key->ignore_sample_mask_out = !key->multisample_fbo;
4812
key->coherent_fb_fetch = false; // TODO: needed?
4813
4814
key->force_dual_color_blend =
4815
screen->driconf.dual_color_blend_by_location &&
4816
(blend->blend_enables & 1) && blend->dual_color_blending;
4817
4818
/* TODO: Respect glHint for key->high_quality_derivatives */
4819
4820
#if GFX_VER <= 5
4821
if (fb->nr_cbufs > 1 && zsa->cso.alpha_enabled) {
4822
key->alpha_test_func = zsa->cso.alpha_func;
4823
key->alpha_test_ref = zsa->cso.alpha_ref_value;
4824
}
4825
#endif
4826
}
4827
4828
static void
4829
crocus_populate_cs_key(const struct crocus_context *ice,
4830
struct brw_cs_prog_key *key)
4831
{
4832
}
4833
4834
#if GFX_VER == 4
4835
#define KSP(ice, shader) ro_bo((ice)->shaders.cache_bo, (shader)->offset);
4836
#elif GFX_VER >= 5
4837
static uint64_t
4838
KSP(const struct crocus_context *ice, const struct crocus_compiled_shader *shader)
4839
{
4840
return shader->offset;
4841
}
4842
#endif
4843
4844
/* Gen11 workaround table #2056 WABTPPrefetchDisable suggests to disable
4845
* prefetching of binding tables in A0 and B0 steppings. XXX: Revisit
4846
* this WA on C0 stepping.
4847
*
4848
* TODO: Fill out SamplerCount for prefetching?
4849
*/
4850
4851
#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix, stage) \
4852
pkt.KernelStartPointer = KSP(ice, shader); \
4853
pkt.BindingTableEntryCount = shader->bt.size_bytes / 4; \
4854
pkt.FloatingPointMode = prog_data->use_alt_mode; \
4855
\
4856
pkt.DispatchGRFStartRegisterForURBData = \
4857
prog_data->dispatch_grf_start_reg; \
4858
pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \
4859
pkt.prefix##URBEntryReadOffset = 0; \
4860
\
4861
pkt.StatisticsEnable = true; \
4862
pkt.Enable = true; \
4863
\
4864
if (prog_data->total_scratch) { \
4865
struct crocus_bo *bo = \
4866
crocus_get_scratch_space(ice, prog_data->total_scratch, stage); \
4867
pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \
4868
pkt.ScratchSpaceBasePointer = rw_bo(bo, 0); \
4869
}
4870
4871
/* ------------------------------------------------------------------- */
4872
#if GFX_VER >= 6
4873
static const uint32_t push_constant_opcodes[] = {
4874
[MESA_SHADER_VERTEX] = 21,
4875
[MESA_SHADER_TESS_CTRL] = 25, /* HS */
4876
[MESA_SHADER_TESS_EVAL] = 26, /* DS */
4877
[MESA_SHADER_GEOMETRY] = 22,
4878
[MESA_SHADER_FRAGMENT] = 23,
4879
[MESA_SHADER_COMPUTE] = 0,
4880
};
4881
#endif
4882
4883
static void
4884
emit_sized_null_surface(struct crocus_batch *batch,
4885
unsigned width, unsigned height,
4886
unsigned layers, unsigned levels,
4887
unsigned minimum_array_element,
4888
uint32_t *out_offset)
4889
{
4890
struct isl_device *isl_dev = &batch->screen->isl_dev;
4891
uint32_t *surf = stream_state(batch, isl_dev->ss.size,
4892
isl_dev->ss.align,
4893
out_offset);
4894
//TODO gen 6 multisample crash
4895
isl_null_fill_state(isl_dev, surf,
4896
.size = isl_extent3d(width, height, layers),
4897
.levels = levels,
4898
.minimum_array_element = minimum_array_element);
4899
}
4900
static void
4901
emit_null_surface(struct crocus_batch *batch,
4902
uint32_t *out_offset)
4903
{
4904
emit_sized_null_surface(batch, 1, 1, 1, 0, 0, out_offset);
4905
}
4906
4907
static void
4908
emit_null_fb_surface(struct crocus_batch *batch,
4909
struct crocus_context *ice,
4910
uint32_t *out_offset)
4911
{
4912
uint32_t width, height, layers, level, layer;
4913
/* If set_framebuffer_state() was never called, fall back to 1x1x1 */
4914
if (ice->state.framebuffer.width == 0 && ice->state.framebuffer.height == 0) {
4915
emit_null_surface(batch, out_offset);
4916
return;
4917
}
4918
4919
struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
4920
width = MAX2(cso->width, 1);
4921
height = MAX2(cso->height, 1);
4922
layers = cso->layers ? cso->layers : 1;
4923
level = 0;
4924
layer = 0;
4925
4926
if (cso->nr_cbufs == 0 && cso->zsbuf) {
4927
width = cso->zsbuf->width;
4928
height = cso->zsbuf->height;
4929
level = cso->zsbuf->u.tex.level;
4930
layer = cso->zsbuf->u.tex.first_layer;
4931
}
4932
emit_sized_null_surface(batch, width, height,
4933
layers, level, layer,
4934
out_offset);
4935
}
4936
4937
static void
4938
emit_surface_state(struct crocus_batch *batch,
4939
struct crocus_resource *res,
4940
const struct isl_surf *in_surf,
4941
bool adjust_surf,
4942
struct isl_view *view,
4943
bool writeable,
4944
enum isl_aux_usage aux_usage,
4945
bool blend_enable,
4946
uint32_t write_disables,
4947
uint32_t *surf_state,
4948
uint32_t addr_offset)
4949
{
4950
const struct intel_device_info *devinfo = &batch->screen->devinfo;
4951
struct isl_device *isl_dev = &batch->screen->isl_dev;
4952
uint32_t reloc = RELOC_32BIT;
4953
uint32_t offset = res->offset, tile_x_sa = 0, tile_y_sa = 0;
4954
4955
if (writeable)
4956
reloc |= RELOC_WRITE;
4957
4958
struct isl_surf surf = *in_surf;
4959
if (adjust_surf) {
4960
if (res->base.b.target == PIPE_TEXTURE_3D && view->array_len == 1) {
4961
isl_surf_get_image_surf(isl_dev, in_surf,
4962
view->base_level, 0,
4963
view->base_array_layer,
4964
&surf, &offset,
4965
&tile_x_sa, &tile_y_sa);
4966
view->base_array_layer = 0;
4967
view->base_level = 0;
4968
} else if (res->base.b.target == PIPE_TEXTURE_CUBE && devinfo->ver == 4) {
4969
isl_surf_get_image_surf(isl_dev, in_surf,
4970
view->base_level, view->base_array_layer,
4971
0,
4972
&surf, &offset,
4973
&tile_x_sa, &tile_y_sa);
4974
view->base_array_layer = 0;
4975
view->base_level = 0;
4976
} else if (res->base.b.target == PIPE_TEXTURE_1D_ARRAY)
4977
surf.dim = ISL_SURF_DIM_2D;
4978
}
4979
4980
union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
4981
struct crocus_bo *aux_bo = NULL;
4982
uint32_t aux_offset = 0;
4983
struct isl_surf *aux_surf = NULL;
4984
if (aux_usage != ISL_AUX_USAGE_NONE) {
4985
aux_surf = &res->aux.surf;
4986
aux_offset = res->aux.offset;
4987
aux_bo = res->aux.bo;
4988
4989
clear_color = crocus_resource_get_clear_color(res);
4990
}
4991
4992
isl_surf_fill_state(isl_dev, surf_state,
4993
.surf = &surf,
4994
.view = view,
4995
.address = crocus_state_reloc(batch,
4996
addr_offset + isl_dev->ss.addr_offset,
4997
res->bo, offset, reloc),
4998
.aux_surf = aux_surf,
4999
.aux_usage = aux_usage,
5000
.aux_address = aux_offset,
5001
.mocs = crocus_mocs(res->bo, isl_dev),
5002
.clear_color = clear_color,
5003
.use_clear_address = false,
5004
.clear_address = 0,
5005
.x_offset_sa = tile_x_sa,
5006
.y_offset_sa = tile_y_sa,
5007
#if GFX_VER <= 5
5008
.blend_enable = blend_enable,
5009
.write_disables = write_disables,
5010
#endif
5011
);
5012
5013
if (aux_surf) {
5014
/* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
5015
* upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
5016
* contain other control information. Since buffer addresses are always
5017
* on 4k boundaries (and thus have their lower 12 bits zero), we can use
5018
* an ordinary reloc to do the necessary address translation.
5019
*
5020
* FIXME: move to the point of assignment.
5021
*/
5022
if (devinfo->ver == 8) {
5023
uint64_t *aux_addr = (uint64_t *)(surf_state + (isl_dev->ss.aux_addr_offset / 4));
5024
*aux_addr = crocus_state_reloc(batch,
5025
addr_offset + isl_dev->ss.aux_addr_offset,
5026
aux_bo, *aux_addr,
5027
reloc);
5028
} else {
5029
uint32_t *aux_addr = surf_state + (isl_dev->ss.aux_addr_offset / 4);
5030
*aux_addr = crocus_state_reloc(batch,
5031
addr_offset + isl_dev->ss.aux_addr_offset,
5032
aux_bo, *aux_addr,
5033
reloc);
5034
}
5035
}
5036
5037
}
5038
5039
static uint32_t
5040
emit_surface(struct crocus_batch *batch,
5041
struct crocus_surface *surf,
5042
enum isl_aux_usage aux_usage,
5043
bool blend_enable,
5044
uint32_t write_disables)
5045
{
5046
const struct intel_device_info *devinfo = &batch->screen->devinfo;
5047
struct isl_device *isl_dev = &batch->screen->isl_dev;
5048
struct crocus_resource *res = (struct crocus_resource *)surf->base.texture;
5049
struct isl_view *view = &surf->view;
5050
uint32_t offset = 0;
5051
enum pipe_texture_target target = res->base.b.target;
5052
bool adjust_surf = false;
5053
5054
if (devinfo->ver == 4 && target == PIPE_TEXTURE_CUBE)
5055
adjust_surf = true;
5056
5057
if (surf->align_res)
5058
res = (struct crocus_resource *)surf->align_res;
5059
5060
uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, isl_dev->ss.align, &offset);
5061
5062
emit_surface_state(batch, res, &surf->surf, adjust_surf, view, true,
5063
aux_usage, blend_enable,
5064
write_disables,
5065
surf_state, offset);
5066
return offset;
5067
}
5068
5069
static uint32_t
5070
emit_rt_surface(struct crocus_batch *batch,
5071
struct crocus_surface *surf,
5072
enum isl_aux_usage aux_usage)
5073
{
5074
struct isl_device *isl_dev = &batch->screen->isl_dev;
5075
struct crocus_resource *res = (struct crocus_resource *)surf->base.texture;
5076
struct isl_view *view = &surf->read_view;
5077
uint32_t offset = 0;
5078
uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, isl_dev->ss.align, &offset);
5079
5080
emit_surface_state(batch, res, &surf->surf, true, view, false,
5081
aux_usage, 0, false,
5082
surf_state, offset);
5083
return offset;
5084
}
5085
5086
static uint32_t
5087
emit_grid(struct crocus_context *ice,
5088
struct crocus_batch *batch)
5089
{
5090
UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5091
uint32_t offset = 0;
5092
struct crocus_state_ref *grid_ref = &ice->state.grid_size;
5093
uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5094
isl_dev->ss.align, &offset);
5095
isl_buffer_fill_state(isl_dev, surf_state,
5096
.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5097
crocus_resource_bo(grid_ref->res),
5098
grid_ref->offset,
5099
RELOC_32BIT),
5100
.size_B = 12,
5101
.format = ISL_FORMAT_RAW,
5102
.stride_B = 1,
5103
.mocs = crocus_mocs(crocus_resource_bo(grid_ref->res), isl_dev));
5104
return offset;
5105
}
5106
5107
static uint32_t
5108
emit_ubo_buffer(struct crocus_context *ice,
5109
struct crocus_batch *batch,
5110
struct pipe_constant_buffer *buffer)
5111
{
5112
UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5113
uint32_t offset = 0;
5114
5115
uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5116
isl_dev->ss.align, &offset);
5117
isl_buffer_fill_state(isl_dev, surf_state,
5118
.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5119
crocus_resource_bo(buffer->buffer),
5120
buffer->buffer_offset,
5121
RELOC_32BIT),
5122
.size_B = buffer->buffer_size,
5123
.format = 0,
5124
.swizzle = ISL_SWIZZLE_IDENTITY,
5125
.stride_B = 1,
5126
.mocs = crocus_mocs(crocus_resource_bo(buffer->buffer), isl_dev));
5127
5128
return offset;
5129
}
5130
5131
static uint32_t
5132
emit_ssbo_buffer(struct crocus_context *ice,
5133
struct crocus_batch *batch,
5134
struct pipe_shader_buffer *buffer, bool writeable)
5135
{
5136
UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5137
uint32_t offset = 0;
5138
uint32_t reloc = RELOC_32BIT;
5139
5140
if (writeable)
5141
reloc |= RELOC_WRITE;
5142
uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5143
isl_dev->ss.align, &offset);
5144
isl_buffer_fill_state(isl_dev, surf_state,
5145
.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5146
crocus_resource_bo(buffer->buffer),
5147
buffer->buffer_offset,
5148
reloc),
5149
.size_B = buffer->buffer_size,
5150
.format = ISL_FORMAT_RAW,
5151
.swizzle = ISL_SWIZZLE_IDENTITY,
5152
.stride_B = 1,
5153
.mocs = crocus_mocs(crocus_resource_bo(buffer->buffer), isl_dev));
5154
5155
return offset;
5156
}
5157
5158
static uint32_t
5159
emit_sampler_view(struct crocus_context *ice,
5160
struct crocus_batch *batch,
5161
bool for_gather,
5162
struct crocus_sampler_view *isv)
5163
{
5164
UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5165
uint32_t offset = 0;
5166
5167
uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5168
isl_dev->ss.align, &offset);
5169
5170
if (isv->base.target == PIPE_BUFFER) {
5171
const struct isl_format_layout *fmtl = isl_format_get_layout(isv->view.format);
5172
const unsigned cpp = isv->view.format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8;
5173
unsigned final_size =
5174
MIN3(isv->base.u.buf.size, isv->res->bo->size - isv->res->offset,
5175
CROCUS_MAX_TEXTURE_BUFFER_SIZE * cpp);
5176
isl_buffer_fill_state(isl_dev, surf_state,
5177
.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5178
isv->res->bo,
5179
isv->res->offset + isv->base.u.buf.offset, RELOC_32BIT),
5180
.size_B = final_size,
5181
.format = isv->view.format,
5182
.swizzle = isv->view.swizzle,
5183
.stride_B = cpp,
5184
.mocs = crocus_mocs(isv->res->bo, isl_dev)
5185
);
5186
} else {
5187
enum isl_aux_usage aux_usage =
5188
crocus_resource_texture_aux_usage(isv->res);
5189
5190
emit_surface_state(batch, isv->res, &isv->res->surf, false,
5191
for_gather ? &isv->gather_view : &isv->view,
5192
false, aux_usage, false,
5193
0, surf_state, offset);
5194
}
5195
return offset;
5196
}
5197
5198
static uint32_t
5199
emit_image_view(struct crocus_context *ice,
5200
struct crocus_batch *batch,
5201
struct crocus_image_view *iv)
5202
{
5203
UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5204
uint32_t offset = 0;
5205
5206
struct crocus_resource *res = (struct crocus_resource *)iv->base.resource;
5207
uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5208
isl_dev->ss.align, &offset);
5209
bool write = iv->base.shader_access & PIPE_IMAGE_ACCESS_WRITE;
5210
uint32_t reloc = RELOC_32BIT | (write ? RELOC_WRITE : 0);
5211
if (res->base.b.target == PIPE_BUFFER) {
5212
const struct isl_format_layout *fmtl = isl_format_get_layout(iv->view.format);
5213
const unsigned cpp = iv->view.format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8;
5214
unsigned final_size =
5215
MIN3(iv->base.u.buf.size, res->bo->size - res->offset - iv->base.u.buf.offset,
5216
CROCUS_MAX_TEXTURE_BUFFER_SIZE * cpp);
5217
isl_buffer_fill_state(isl_dev, surf_state,
5218
.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5219
res->bo,
5220
res->offset + iv->base.u.buf.offset, reloc),
5221
.size_B = final_size,
5222
.format = iv->view.format,
5223
.swizzle = iv->view.swizzle,
5224
.stride_B = cpp,
5225
.mocs = crocus_mocs(res->bo, isl_dev)
5226
);
5227
} else {
5228
if (iv->view.format == ISL_FORMAT_RAW) {
5229
isl_buffer_fill_state(isl_dev, surf_state,
5230
.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5231
res->bo,
5232
res->offset, reloc),
5233
.size_B = res->bo->size - res->offset,
5234
.format = iv->view.format,
5235
.swizzle = iv->view.swizzle,
5236
.stride_B = 1,
5237
.mocs = crocus_mocs(res->bo, isl_dev),
5238
);
5239
5240
5241
} else {
5242
emit_surface_state(batch, res,
5243
&res->surf, false, &iv->view,
5244
write, 0, false,
5245
0, surf_state, offset);
5246
}
5247
}
5248
5249
return offset;
5250
}
5251
5252
#if GFX_VER == 6
5253
static uint32_t
5254
emit_sol_surface(struct crocus_batch *batch,
5255
struct pipe_stream_output_info *so_info,
5256
uint32_t idx)
5257
{
5258
struct crocus_context *ice = batch->ice;
5259
5260
if (idx >= so_info->num_outputs || !ice->state.streamout_active)
5261
return 0;
5262
const struct pipe_stream_output *output = &so_info->output[idx];
5263
const int buffer = output->output_buffer;
5264
assert(output->stream == 0);
5265
5266
struct crocus_resource *buf = (struct crocus_resource *)ice->state.so_target[buffer]->buffer;
5267
unsigned stride_dwords = so_info->stride[buffer];
5268
unsigned offset_dwords = ice->state.so_target[buffer]->buffer_offset / 4 + output->dst_offset;
5269
5270
size_t size_dwords = (ice->state.so_target[buffer]->buffer_offset + ice->state.so_target[buffer]->buffer_size) / 4;
5271
unsigned num_vector_components = output->num_components;
5272
unsigned num_elements;
5273
/* FIXME: can we rely on core Mesa to ensure that the buffer isn't
5274
* too big to map using a single binding table entry?
5275
*/
5276
// assert((size_dwords - offset_dwords) / stride_dwords
5277
// <= BRW_MAX_NUM_BUFFER_ENTRIES);
5278
5279
if (size_dwords > offset_dwords + num_vector_components) {
5280
/* There is room for at least 1 transform feedback output in the buffer.
5281
* Compute the number of additional transform feedback outputs the
5282
* buffer has room for.
5283
*/
5284
num_elements =
5285
(size_dwords - offset_dwords - num_vector_components);
5286
} else {
5287
/* There isn't even room for a single transform feedback output in the
5288
* buffer. We can't configure the binding table entry to prevent output
5289
* entirely; we'll have to rely on the geometry shader to detect
5290
* overflow. But to minimize the damage in case of a bug, set up the
5291
* binding table entry to just allow a single output.
5292
*/
5293
num_elements = 0;
5294
}
5295
num_elements += stride_dwords;
5296
5297
uint32_t surface_format;
5298
switch (num_vector_components) {
5299
case 1:
5300
surface_format = ISL_FORMAT_R32_FLOAT;
5301
break;
5302
case 2:
5303
surface_format = ISL_FORMAT_R32G32_FLOAT;
5304
break;
5305
case 3:
5306
surface_format = ISL_FORMAT_R32G32B32_FLOAT;
5307
break;
5308
case 4:
5309
surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
5310
break;
5311
default:
5312
unreachable("Invalid vector size for transform feedback output");
5313
}
5314
5315
UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5316
uint32_t offset = 0;
5317
5318
uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5319
isl_dev->ss.align, &offset);
5320
isl_buffer_fill_state(isl_dev, surf_state,
5321
.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5322
crocus_resource_bo(&buf->base.b),
5323
offset_dwords * 4, RELOC_32BIT|RELOC_WRITE),
5324
.size_B = num_elements * 4,
5325
.stride_B = stride_dwords * 4,
5326
.swizzle = ISL_SWIZZLE_IDENTITY,
5327
.format = surface_format);
5328
return offset;
5329
}
5330
#endif
5331
5332
#define foreach_surface_used(index, group) \
5333
for (int index = 0; index < bt->sizes[group]; index++) \
5334
if (crocus_group_index_to_bti(bt, group, index) != \
5335
CROCUS_SURFACE_NOT_USED)
5336
5337
static void
5338
crocus_populate_binding_table(struct crocus_context *ice,
5339
struct crocus_batch *batch,
5340
gl_shader_stage stage, bool ff_gs)
5341
{
5342
struct crocus_compiled_shader *shader = ff_gs ? ice->shaders.ff_gs_prog : ice->shaders.prog[stage];
5343
struct crocus_shader_state *shs = ff_gs ? NULL : &ice->state.shaders[stage];
5344
if (!shader)
5345
return;
5346
5347
struct crocus_binding_table *bt = &shader->bt;
5348
int s = 0;
5349
uint32_t *surf_offsets = shader->surf_offset;
5350
5351
#if GFX_VER < 8
5352
const struct shader_info *info = crocus_get_shader_info(ice, stage);
5353
#endif
5354
5355
if (stage == MESA_SHADER_FRAGMENT) {
5356
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5357
/* Note that cso_fb->nr_cbufs == fs_key->nr_color_regions. */
5358
if (cso_fb->nr_cbufs) {
5359
for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
5360
uint32_t write_disables = 0;
5361
bool blend_enable = false;
5362
#if GFX_VER <= 5
5363
const struct pipe_rt_blend_state *rt =
5364
&ice->state.cso_blend->cso.rt[ice->state.cso_blend->cso.independent_blend_enable ? i : 0];
5365
struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
5366
struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
5367
write_disables |= (rt->colormask & PIPE_MASK_A) ? 0x0 : 0x8;
5368
write_disables |= (rt->colormask & PIPE_MASK_R) ? 0x0 : 0x4;
5369
write_disables |= (rt->colormask & PIPE_MASK_G) ? 0x0 : 0x2;
5370
write_disables |= (rt->colormask & PIPE_MASK_B) ? 0x0 : 0x1;
5371
/* Gen4/5 can't handle blending off when a dual src blend wm is enabled. */
5372
blend_enable = rt->blend_enable || wm_prog_data->dual_src_blend;
5373
#endif
5374
if (cso_fb->cbufs[i]) {
5375
surf_offsets[s] = emit_surface(batch,
5376
(struct crocus_surface *)cso_fb->cbufs[i],
5377
ice->state.draw_aux_usage[i],
5378
blend_enable,
5379
write_disables);
5380
} else {
5381
emit_null_fb_surface(batch, ice, &surf_offsets[s]);
5382
}
5383
s++;
5384
}
5385
} else {
5386
emit_null_fb_surface(batch, ice, &surf_offsets[s]);
5387
s++;
5388
}
5389
5390
foreach_surface_used(i, CROCUS_SURFACE_GROUP_RENDER_TARGET_READ) {
5391
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5392
if (cso_fb->cbufs[i]) {
5393
surf_offsets[s++] = emit_rt_surface(batch,
5394
(struct crocus_surface *)cso_fb->cbufs[i],
5395
ice->state.draw_aux_usage[i]);
5396
}
5397
}
5398
}
5399
5400
if (stage == MESA_SHADER_COMPUTE) {
5401
foreach_surface_used(i, CROCUS_SURFACE_GROUP_CS_WORK_GROUPS) {
5402
surf_offsets[s] = emit_grid(ice, batch);
5403
s++;
5404
}
5405
}
5406
5407
#if GFX_VER == 6
5408
if (stage == MESA_SHADER_GEOMETRY) {
5409
struct pipe_stream_output_info *so_info;
5410
if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
5411
so_info = &ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]->stream_output;
5412
else
5413
so_info = &ice->shaders.uncompiled[MESA_SHADER_VERTEX]->stream_output;
5414
5415
foreach_surface_used(i, CROCUS_SURFACE_GROUP_SOL) {
5416
surf_offsets[s] = emit_sol_surface(batch, so_info, i);
5417
s++;
5418
}
5419
}
5420
#endif
5421
5422
foreach_surface_used(i, CROCUS_SURFACE_GROUP_TEXTURE) {
5423
struct crocus_sampler_view *view = shs->textures[i];
5424
if (view)
5425
surf_offsets[s] = emit_sampler_view(ice, batch, false, view);
5426
else
5427
emit_null_surface(batch, &surf_offsets[s]);
5428
s++;
5429
}
5430
5431
#if GFX_VER < 8
5432
if (info && info->uses_texture_gather) {
5433
foreach_surface_used(i, CROCUS_SURFACE_GROUP_TEXTURE_GATHER) {
5434
struct crocus_sampler_view *view = shs->textures[i];
5435
if (view)
5436
surf_offsets[s] = emit_sampler_view(ice, batch, true, view);
5437
else
5438
emit_null_surface(batch, &surf_offsets[s]);
5439
s++;
5440
}
5441
}
5442
#endif
5443
5444
foreach_surface_used(i, CROCUS_SURFACE_GROUP_IMAGE) {
5445
struct crocus_image_view *view = &shs->image[i];
5446
if (view->base.resource)
5447
surf_offsets[s] = emit_image_view(ice, batch, view);
5448
else
5449
emit_null_surface(batch, &surf_offsets[s]);
5450
s++;
5451
}
5452
foreach_surface_used(i, CROCUS_SURFACE_GROUP_UBO) {
5453
if (shs->constbufs[i].buffer)
5454
surf_offsets[s] = emit_ubo_buffer(ice, batch, &shs->constbufs[i]);
5455
else
5456
emit_null_surface(batch, &surf_offsets[s]);
5457
s++;
5458
}
5459
foreach_surface_used(i, CROCUS_SURFACE_GROUP_SSBO) {
5460
if (shs->ssbo[i].buffer)
5461
surf_offsets[s] = emit_ssbo_buffer(ice, batch, &shs->ssbo[i],
5462
!!(shs->writable_ssbos & (1 << i)));
5463
else
5464
emit_null_surface(batch, &surf_offsets[s]);
5465
s++;
5466
}
5467
5468
}
5469
/* ------------------------------------------------------------------- */
5470
static uint32_t
5471
crocus_upload_binding_table(struct crocus_context *ice,
5472
struct crocus_batch *batch,
5473
uint32_t *table,
5474
uint32_t size)
5475
5476
{
5477
if (size == 0)
5478
return 0;
5479
return emit_state(batch, table, size, 32);
5480
}
5481
5482
/**
5483
* Possibly emit STATE_BASE_ADDRESS to update Surface State Base Address.
5484
*/
5485
5486
static void
5487
crocus_update_surface_base_address(struct crocus_batch *batch)
5488
{
5489
if (batch->state_base_address_emitted)
5490
return;
5491
#if GFX_VER >= 6
5492
uint32_t mocs = batch->screen->isl_dev.mocs.internal;
5493
#endif
5494
flush_before_state_base_change(batch);
5495
5496
crocus_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
5497
5498
sba.SurfaceStateBaseAddressModifyEnable = true;
5499
sba.SurfaceStateBaseAddress = ro_bo(batch->state.bo, 0);
5500
5501
#if GFX_VER >= 5
5502
sba.InstructionBaseAddress = ro_bo(batch->ice->shaders.cache_bo, 0); // TODO!
5503
#endif
5504
5505
sba.GeneralStateBaseAddressModifyEnable = true;
5506
sba.IndirectObjectBaseAddressModifyEnable = true;
5507
#if GFX_VER >= 5
5508
sba.InstructionBaseAddressModifyEnable = true;
5509
#endif
5510
5511
#if GFX_VER < 8
5512
sba.GeneralStateAccessUpperBoundModifyEnable = true;
5513
#endif
5514
#if GFX_VER >= 5 && GFX_VER < 8
5515
sba.IndirectObjectAccessUpperBoundModifyEnable = true;
5516
sba.InstructionAccessUpperBoundModifyEnable = true;
5517
#endif
5518
#if GFX_VER <= 5
5519
sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
5520
#endif
5521
#if GFX_VER >= 6
5522
/* The hardware appears to pay attention to the MOCS fields even
5523
* if you don't set the "Address Modify Enable" bit for the base.
5524
*/
5525
sba.GeneralStateMOCS = mocs;
5526
sba.StatelessDataPortAccessMOCS = mocs;
5527
#if GFX_VER == 8
5528
sba.DynamicStateMOCS = mocs;
5529
sba.IndirectObjectMOCS = mocs;
5530
sba.InstructionMOCS = mocs;
5531
sba.SurfaceStateMOCS = mocs;
5532
sba.GeneralStateBufferSize = 0xfffff;
5533
sba.IndirectObjectBufferSize = 0xfffff;
5534
sba.InstructionBufferSize = 0xfffff;
5535
sba.DynamicStateBufferSize = MAX_STATE_SIZE;
5536
5537
sba.GeneralStateBufferSizeModifyEnable = true;
5538
sba.DynamicStateBufferSizeModifyEnable = true;
5539
sba.IndirectObjectBufferSizeModifyEnable = true;
5540
sba.InstructionBuffersizeModifyEnable = true;
5541
#endif
5542
5543
sba.DynamicStateBaseAddressModifyEnable = true;
5544
5545
sba.DynamicStateBaseAddress = ro_bo(batch->state.bo, 0);
5546
5547
/* Dynamic state upper bound. Although the documentation says that
5548
* programming it to zero will cause it to be ignored, that is a lie.
5549
* If this isn't programmed to a real bound, the sampler border color
5550
* pointer is rejected, causing border color to mysteriously fail.
5551
*/
5552
#if GFX_VER < 8
5553
sba.DynamicStateAccessUpperBoundModifyEnable = true;
5554
sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
5555
#endif
5556
5557
#endif
5558
}
5559
5560
flush_after_state_base_change(batch);
5561
5562
/* According to section 3.6.1 of VOL1 of the 965 PRM,
5563
* STATE_BASE_ADDRESS updates require a reissue of:
5564
*
5565
* 3DSTATE_PIPELINE_POINTERS
5566
* 3DSTATE_BINDING_TABLE_POINTERS
5567
* MEDIA_STATE_POINTERS
5568
*
5569
* and this continues through Ironlake. The Sandy Bridge PRM, vol
5570
* 1 part 1 says that the folowing packets must be reissued:
5571
*
5572
* 3DSTATE_CC_POINTERS
5573
* 3DSTATE_BINDING_TABLE_POINTERS
5574
* 3DSTATE_SAMPLER_STATE_POINTERS
5575
* 3DSTATE_VIEWPORT_STATE_POINTERS
5576
* MEDIA_STATE_POINTERS
5577
*
5578
* Those are always reissued following SBA updates anyway (new
5579
* batch time), except in the case of the program cache BO
5580
* changing. Having a separate state flag makes the sequence more
5581
* obvious.
5582
*/
5583
#if GFX_VER <= 5
5584
batch->ice->state.dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS | CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS;
5585
#elif GFX_VER == 6
5586
batch->ice->state.dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS | CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS;
5587
#endif
5588
batch->state_base_address_emitted = true;
5589
}
5590
5591
static inline void
5592
crocus_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
5593
bool window_space_position, float *zmin, float *zmax)
5594
{
5595
if (window_space_position) {
5596
*zmin = 0.f;
5597
*zmax = 1.f;
5598
return;
5599
}
5600
util_viewport_zmin_zmax(vp, halfz, zmin, zmax);
5601
}
5602
5603
struct push_bos {
5604
struct {
5605
struct crocus_address addr;
5606
uint32_t length;
5607
} buffers[4];
5608
int buffer_count;
5609
uint32_t max_length;
5610
};
5611
5612
#if GFX_VER >= 6
5613
static void
5614
setup_constant_buffers(struct crocus_context *ice,
5615
struct crocus_batch *batch,
5616
int stage,
5617
struct push_bos *push_bos)
5618
{
5619
struct crocus_shader_state *shs = &ice->state.shaders[stage];
5620
struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
5621
struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
5622
5623
uint32_t push_range_sum = 0;
5624
5625
int n = 0;
5626
for (int i = 0; i < 4; i++) {
5627
const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
5628
5629
if (range->length == 0)
5630
continue;
5631
5632
push_range_sum += range->length;
5633
5634
if (range->length > push_bos->max_length)
5635
push_bos->max_length = range->length;
5636
5637
/* Range block is a binding table index, map back to UBO index. */
5638
unsigned block_index = crocus_bti_to_group_index(
5639
&shader->bt, CROCUS_SURFACE_GROUP_UBO, range->block);
5640
assert(block_index != CROCUS_SURFACE_NOT_USED);
5641
5642
struct pipe_constant_buffer *cbuf = &shs->constbufs[block_index];
5643
struct crocus_resource *res = (void *) cbuf->buffer;
5644
5645
assert(cbuf->buffer_offset % 32 == 0);
5646
5647
push_bos->buffers[n].length = range->length;
5648
push_bos->buffers[n].addr =
5649
res ? ro_bo(res->bo, range->start * 32 + cbuf->buffer_offset)
5650
: ro_bo(batch->ice->workaround_bo,
5651
batch->ice->workaround_offset);
5652
n++;
5653
}
5654
5655
/* From the 3DSTATE_CONSTANT_XS and 3DSTATE_CONSTANT_ALL programming notes:
5656
*
5657
* "The sum of all four read length fields must be less than or
5658
* equal to the size of 64."
5659
*/
5660
assert(push_range_sum <= 64);
5661
5662
push_bos->buffer_count = n;
5663
}
5664
5665
#if GFX_VER == 7
5666
static void
5667
gen7_emit_vs_workaround_flush(struct crocus_batch *batch)
5668
{
5669
ASSERTED const struct intel_device_info *devinfo = &batch->screen->devinfo;
5670
5671
assert(devinfo->ver == 7);
5672
crocus_emit_pipe_control_write(batch,
5673
"vs workaround",
5674
PIPE_CONTROL_WRITE_IMMEDIATE
5675
| PIPE_CONTROL_DEPTH_STALL,
5676
batch->ice->workaround_bo,
5677
batch->ice->workaround_offset, 0);
5678
}
5679
#endif
5680
5681
static void
5682
emit_push_constant_packets(struct crocus_context *ice,
5683
struct crocus_batch *batch,
5684
int stage,
5685
const struct push_bos *push_bos)
5686
{
5687
struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
5688
struct brw_stage_prog_data *prog_data = shader ? (void *) shader->prog_data : NULL;
5689
5690
#if GFX_VER == 7
5691
if (stage == MESA_SHADER_VERTEX) {
5692
if (!(GFX_VERx10 == 75) && !batch->screen->devinfo.is_baytrail)
5693
gen7_emit_vs_workaround_flush(batch);
5694
}
5695
#endif
5696
crocus_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) {
5697
pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
5698
#if GFX_VER >= 7
5699
if (prog_data) {
5700
/* The Skylake PRM contains the following restriction:
5701
*
5702
* "The driver must ensure The following case does not occur
5703
* without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
5704
* buffer 3 read length equal to zero committed followed by a
5705
* 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
5706
* zero committed."
5707
*
5708
* To avoid this, we program the buffers in the highest slots.
5709
* This way, slot 0 is only used if slot 3 is also used.
5710
*/
5711
int n = push_bos->buffer_count;
5712
assert(n <= 4);
5713
#if GFX_VERx10 >= 75
5714
const unsigned shift = 4 - n;
5715
#else
5716
const unsigned shift = 0;
5717
#endif
5718
for (int i = 0; i < n; i++) {
5719
pkt.ConstantBody.ReadLength[i + shift] =
5720
push_bos->buffers[i].length;
5721
pkt.ConstantBody.Buffer[i + shift] = push_bos->buffers[i].addr;
5722
}
5723
}
5724
#else
5725
if (prog_data) {
5726
int n = push_bos->buffer_count;
5727
assert (n <= 1);
5728
if (n == 1) {
5729
pkt.Buffer0Valid = true;
5730
pkt.ConstantBody.PointertoConstantBuffer0 = push_bos->buffers[0].addr.offset;
5731
pkt.ConstantBody.ConstantBuffer0ReadLength = push_bos->buffers[0].length - 1;
5732
}
5733
}
5734
#endif
5735
}
5736
}
5737
5738
#endif
5739
5740
#if GFX_VER == 8
5741
typedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML;
5742
#elif GFX_VER >= 6
5743
typedef struct GENX(DEPTH_STENCIL_STATE) DEPTH_STENCIL_GENXML;
5744
#else
5745
typedef struct GENX(COLOR_CALC_STATE) DEPTH_STENCIL_GENXML;
5746
#endif
5747
5748
static inline void
5749
set_depth_stencil_bits(struct crocus_context *ice, DEPTH_STENCIL_GENXML *ds)
5750
{
5751
struct crocus_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
5752
ds->DepthTestEnable = cso->cso.depth_enabled;
5753
ds->DepthBufferWriteEnable = cso->cso.depth_writemask;
5754
ds->DepthTestFunction = translate_compare_func(cso->cso.depth_func);
5755
5756
ds->StencilFailOp = cso->cso.stencil[0].fail_op;
5757
ds->StencilPassDepthFailOp = cso->cso.stencil[0].zfail_op;
5758
ds->StencilPassDepthPassOp = cso->cso.stencil[0].zpass_op;
5759
ds->StencilTestFunction = translate_compare_func(cso->cso.stencil[0].func);
5760
5761
ds->StencilTestMask = cso->cso.stencil[0].valuemask;
5762
ds->StencilWriteMask = cso->cso.stencil[0].writemask;
5763
5764
ds->BackfaceStencilFailOp = cso->cso.stencil[1].fail_op;
5765
ds->BackfaceStencilPassDepthFailOp = cso->cso.stencil[1].zfail_op;
5766
ds->BackfaceStencilPassDepthPassOp = cso->cso.stencil[1].zpass_op;
5767
ds->BackfaceStencilTestFunction = translate_compare_func(cso->cso.stencil[1].func);
5768
5769
ds->BackfaceStencilTestMask = cso->cso.stencil[1].valuemask;
5770
ds->BackfaceStencilWriteMask = cso->cso.stencil[1].writemask;
5771
ds->DoubleSidedStencilEnable = cso->cso.stencil[1].enabled;
5772
ds->StencilTestEnable = cso->cso.stencil[0].enabled;
5773
ds->StencilBufferWriteEnable =
5774
cso->cso.stencil[0].writemask != 0 ||
5775
(cso->cso.stencil[1].enabled && cso->cso.stencil[1].writemask != 0);
5776
}
5777
5778
static void
5779
emit_vertex_buffer_state(struct crocus_batch *batch,
5780
unsigned buffer_id,
5781
struct crocus_bo *bo,
5782
unsigned start_offset,
5783
unsigned end_offset,
5784
unsigned stride,
5785
unsigned step_rate,
5786
uint32_t **map)
5787
{
5788
const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
5789
_crocus_pack_state(batch, GENX(VERTEX_BUFFER_STATE), *map, vb) {
5790
vb.BufferStartingAddress = ro_bo(bo, start_offset);
5791
#if GFX_VER >= 8
5792
vb.BufferSize = end_offset - start_offset;
5793
#endif
5794
vb.VertexBufferIndex = buffer_id;
5795
vb.BufferPitch = stride;
5796
#if GFX_VER >= 7
5797
vb.AddressModifyEnable = true;
5798
#endif
5799
#if GFX_VER >= 6
5800
vb.MOCS = crocus_mocs(bo, &batch->screen->isl_dev);
5801
#endif
5802
#if GFX_VER < 8
5803
vb.BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA;
5804
vb.InstanceDataStepRate = step_rate;
5805
#if GFX_VER >= 5
5806
vb.EndAddress = ro_bo(bo, end_offset - 1);
5807
#endif
5808
#endif
5809
}
5810
*map += vb_dwords;
5811
}
5812
5813
#if GFX_VER >= 6
5814
static uint32_t
5815
determine_sample_mask(struct crocus_context *ice)
5816
{
5817
uint32_t num_samples = ice->state.framebuffer.samples;
5818
5819
if (num_samples <= 1)
5820
return 1;
5821
5822
uint32_t fb_mask = (1 << num_samples) - 1;
5823
return ice->state.sample_mask & fb_mask;
5824
}
5825
#endif
5826
5827
static void
5828
crocus_upload_dirty_render_state(struct crocus_context *ice,
5829
struct crocus_batch *batch,
5830
const struct pipe_draw_info *draw)
5831
{
5832
uint64_t dirty = ice->state.dirty;
5833
uint64_t stage_dirty = ice->state.stage_dirty;
5834
5835
if (!(dirty & CROCUS_ALL_DIRTY_FOR_RENDER) &&
5836
!(stage_dirty & CROCUS_ALL_STAGE_DIRTY_FOR_RENDER))
5837
return;
5838
5839
if (dirty & CROCUS_DIRTY_VF_STATISTICS) {
5840
crocus_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf) {
5841
vf.StatisticsEnable = true;
5842
}
5843
}
5844
5845
#if GFX_VER <= 5
5846
if (stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS |
5847
CROCUS_STAGE_DIRTY_CONSTANTS_FS)) {
5848
bool ret = calculate_curbe_offsets(batch);
5849
if (ret) {
5850
dirty |= CROCUS_DIRTY_GEN4_CURBE | CROCUS_DIRTY_WM | CROCUS_DIRTY_CLIP;
5851
stage_dirty |= CROCUS_STAGE_DIRTY_VS;
5852
}
5853
}
5854
5855
if (dirty & (CROCUS_DIRTY_GEN4_CURBE | CROCUS_DIRTY_RASTER) ||
5856
stage_dirty & CROCUS_STAGE_DIRTY_VS) {
5857
bool ret = crocus_calculate_urb_fence(batch, ice->curbe.total_size,
5858
brw_vue_prog_data(ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data)->urb_entry_size,
5859
((struct brw_sf_prog_data *)ice->shaders.sf_prog->prog_data)->urb_entry_size);
5860
if (ret)
5861
dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
5862
}
5863
#endif
5864
if (dirty & CROCUS_DIRTY_CC_VIEWPORT) {
5865
const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
5866
uint32_t cc_vp_address;
5867
5868
/* XXX: could avoid streaming for depth_clip [0,1] case. */
5869
uint32_t *cc_vp_map =
5870
stream_state(batch,
5871
4 * ice->state.num_viewports *
5872
GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
5873
for (int i = 0; i < ice->state.num_viewports; i++) {
5874
float zmin, zmax;
5875
crocus_viewport_zmin_zmax(&ice->state.viewports[i], cso_rast->cso.clip_halfz,
5876
ice->state.window_space_position,
5877
&zmin, &zmax);
5878
if (cso_rast->cso.depth_clip_near)
5879
zmin = 0.0;
5880
if (cso_rast->cso.depth_clip_far)
5881
zmax = 1.0;
5882
5883
crocus_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
5884
ccv.MinimumDepth = zmin;
5885
ccv.MaximumDepth = zmax;
5886
}
5887
5888
cc_vp_map += GENX(CC_VIEWPORT_length);
5889
}
5890
5891
#if GFX_VER >= 7
5892
crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
5893
ptr.CCViewportPointer = cc_vp_address;
5894
}
5895
#elif GFX_VER == 6
5896
crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
5897
vp.CCViewportStateChange = 1;
5898
vp.PointertoCC_VIEWPORT = cc_vp_address;
5899
}
5900
#else
5901
ice->state.cc_vp_address = cc_vp_address;
5902
dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
5903
#endif
5904
}
5905
5906
if (dirty & CROCUS_DIRTY_SF_CL_VIEWPORT) {
5907
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5908
#if GFX_VER >= 7
5909
uint32_t sf_cl_vp_address;
5910
uint32_t *vp_map =
5911
stream_state(batch,
5912
4 * ice->state.num_viewports *
5913
GENX(SF_CLIP_VIEWPORT_length), 64, &sf_cl_vp_address);
5914
#else
5915
uint32_t *vp_map =
5916
stream_state(batch,
5917
4 * ice->state.num_viewports * GENX(SF_VIEWPORT_length),
5918
32, &ice->state.sf_vp_address);
5919
uint32_t *clip_map =
5920
stream_state(batch,
5921
4 * ice->state.num_viewports * GENX(CLIP_VIEWPORT_length),
5922
32, &ice->state.clip_vp_address);
5923
#endif
5924
5925
for (unsigned i = 0; i < ice->state.num_viewports; i++) {
5926
const struct pipe_viewport_state *state = &ice->state.viewports[i];
5927
float gb_xmin, gb_xmax, gb_ymin, gb_ymax;
5928
5929
#if GFX_VER == 8
5930
float vp_xmin = viewport_extent(state, 0, -1.0f);
5931
float vp_xmax = viewport_extent(state, 0, 1.0f);
5932
float vp_ymin = viewport_extent(state, 1, -1.0f);
5933
float vp_ymax = viewport_extent(state, 1, 1.0f);
5934
#endif
5935
intel_calculate_guardband_size(cso_fb->width, cso_fb->height,
5936
state->scale[0], state->scale[1],
5937
state->translate[0], state->translate[1],
5938
&gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax);
5939
#if GFX_VER >= 7
5940
crocus_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp)
5941
#else
5942
crocus_pack_state(GENX(SF_VIEWPORT), vp_map, vp)
5943
#endif
5944
{
5945
vp.ViewportMatrixElementm00 = state->scale[0];
5946
vp.ViewportMatrixElementm11 = state->scale[1];
5947
vp.ViewportMatrixElementm22 = state->scale[2];
5948
vp.ViewportMatrixElementm30 = state->translate[0];
5949
vp.ViewportMatrixElementm31 = state->translate[1];
5950
vp.ViewportMatrixElementm32 = state->translate[2];
5951
#if GFX_VER < 6
5952
struct pipe_scissor_state scissor;
5953
crocus_fill_scissor_rect(ice, 0, &scissor);
5954
vp.ScissorRectangle.ScissorRectangleXMin = scissor.minx;
5955
vp.ScissorRectangle.ScissorRectangleXMax = scissor.maxx;
5956
vp.ScissorRectangle.ScissorRectangleYMin = scissor.miny;
5957
vp.ScissorRectangle.ScissorRectangleYMax = scissor.maxy;
5958
#endif
5959
5960
#if GFX_VER >= 7
5961
vp.XMinClipGuardband = gb_xmin;
5962
vp.XMaxClipGuardband = gb_xmax;
5963
vp.YMinClipGuardband = gb_ymin;
5964
vp.YMaxClipGuardband = gb_ymax;
5965
#endif
5966
#if GFX_VER == 8
5967
vp.XMinViewPort = MAX2(vp_xmin, 0);
5968
vp.XMaxViewPort = MIN2(vp_xmax, cso_fb->width) - 1;
5969
vp.YMinViewPort = MAX2(vp_ymin, 0);
5970
vp.YMaxViewPort = MIN2(vp_ymax, cso_fb->height) - 1;
5971
#endif
5972
}
5973
#if GFX_VER < 7
5974
crocus_pack_state(GENX(CLIP_VIEWPORT), clip_map, clip) {
5975
clip.XMinClipGuardband = gb_xmin;
5976
clip.XMaxClipGuardband = gb_xmax;
5977
clip.YMinClipGuardband = gb_ymin;
5978
clip.YMaxClipGuardband = gb_ymax;
5979
}
5980
#endif
5981
#if GFX_VER >= 7
5982
vp_map += GENX(SF_CLIP_VIEWPORT_length);
5983
#else
5984
vp_map += GENX(SF_VIEWPORT_length);
5985
clip_map += GENX(CLIP_VIEWPORT_length);
5986
#endif
5987
}
5988
#if GFX_VER >= 7
5989
crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
5990
ptr.SFClipViewportPointer = sf_cl_vp_address;
5991
}
5992
#elif GFX_VER == 6
5993
crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
5994
vp.SFViewportStateChange = 1;
5995
vp.CLIPViewportStateChange = 1;
5996
vp.PointertoCLIP_VIEWPORT = ice->state.clip_vp_address;
5997
vp.PointertoSF_VIEWPORT = ice->state.sf_vp_address;
5998
}
5999
#endif
6000
}
6001
6002
#if GFX_VER >= 6
6003
if (dirty & CROCUS_DIRTY_GEN6_URB) {
6004
#if GFX_VER == 6
6005
bool gs_present = ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL
6006
|| ice->shaders.ff_gs_prog;
6007
6008
struct brw_vue_prog_data *vue_prog_data =
6009
(void *) ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;
6010
const unsigned vs_size = vue_prog_data->urb_entry_size;
6011
unsigned gs_size = vs_size;
6012
if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
6013
struct brw_vue_prog_data *gs_vue_prog_data =
6014
(void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data;
6015
gs_size = gs_vue_prog_data->urb_entry_size;
6016
}
6017
6018
genX(crocus_upload_urb)(batch, vs_size, gs_present, gs_size);
6019
#endif
6020
#if GFX_VER >= 7
6021
const struct intel_device_info *devinfo = &batch->screen->devinfo;
6022
bool gs_present = ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL;
6023
bool tess_present = ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL;
6024
unsigned entry_size[4];
6025
6026
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
6027
if (!ice->shaders.prog[i]) {
6028
entry_size[i] = 1;
6029
} else {
6030
struct brw_vue_prog_data *vue_prog_data =
6031
(void *) ice->shaders.prog[i]->prog_data;
6032
entry_size[i] = vue_prog_data->urb_entry_size;
6033
}
6034
assert(entry_size[i] != 0);
6035
}
6036
6037
/* If we're just switching between programs with the same URB requirements,
6038
* skip the rest of the logic.
6039
*/
6040
bool no_change = false;
6041
if (ice->urb.vsize == entry_size[MESA_SHADER_VERTEX] &&
6042
ice->urb.gs_present == gs_present &&
6043
ice->urb.gsize == entry_size[MESA_SHADER_GEOMETRY] &&
6044
ice->urb.tess_present == tess_present &&
6045
ice->urb.hsize == entry_size[MESA_SHADER_TESS_CTRL] &&
6046
ice->urb.dsize == entry_size[MESA_SHADER_TESS_EVAL]) {
6047
no_change = true;
6048
}
6049
6050
if (!no_change) {
6051
ice->urb.vsize = entry_size[MESA_SHADER_VERTEX];
6052
ice->urb.gs_present = gs_present;
6053
ice->urb.gsize = entry_size[MESA_SHADER_GEOMETRY];
6054
ice->urb.tess_present = tess_present;
6055
ice->urb.hsize = entry_size[MESA_SHADER_TESS_CTRL];
6056
ice->urb.dsize = entry_size[MESA_SHADER_TESS_EVAL];
6057
6058
unsigned entries[4];
6059
unsigned start[4];
6060
bool constrained;
6061
intel_get_urb_config(devinfo,
6062
batch->screen->l3_config_3d,
6063
tess_present,
6064
gs_present,
6065
entry_size,
6066
entries, start, NULL, &constrained);
6067
6068
#if GFX_VER == 7
6069
if (GFX_VERx10 < 75 && !devinfo->is_baytrail)
6070
gen7_emit_vs_workaround_flush(batch);
6071
#endif
6072
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
6073
crocus_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {
6074
urb._3DCommandSubOpcode += i;
6075
urb.VSURBStartingAddress = start[i];
6076
urb.VSURBEntryAllocationSize = entry_size[i] - 1;
6077
urb.VSNumberofURBEntries = entries[i];
6078
}
6079
}
6080
}
6081
#endif
6082
}
6083
6084
if (dirty & CROCUS_DIRTY_GEN6_BLEND_STATE) {
6085
struct crocus_blend_state *cso_blend = ice->state.cso_blend;
6086
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
6087
struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
6088
6089
STATIC_ASSERT(GENX(BLEND_STATE_ENTRY_length) == 2);
6090
int rt_dwords =
6091
MAX2(cso_fb->nr_cbufs, 1) * GENX(BLEND_STATE_ENTRY_length);
6092
#if GFX_VER >= 8
6093
rt_dwords += GENX(BLEND_STATE_length);
6094
#endif
6095
uint32_t blend_offset;
6096
uint32_t *blend_map =
6097
stream_state(batch,
6098
4 * rt_dwords, 64, &blend_offset);
6099
6100
#if GFX_VER >= 8
6101
struct GENX(BLEND_STATE) be = { 0 };
6102
{
6103
#else
6104
for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
6105
struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
6106
#define be entry
6107
#endif
6108
6109
be.AlphaTestEnable = cso_zsa->cso.alpha_enabled;
6110
be.AlphaTestFunction = translate_compare_func(cso_zsa->cso.alpha_func);
6111
be.AlphaToCoverageEnable = cso_blend->cso.alpha_to_coverage;
6112
be.AlphaToOneEnable = cso_blend->cso.alpha_to_one;
6113
be.AlphaToCoverageDitherEnable = GFX_VER >= 7 && cso_blend->cso.alpha_to_coverage;
6114
be.ColorDitherEnable = cso_blend->cso.dither;
6115
6116
#if GFX_VER >= 8
6117
for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
6118
struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
6119
#else
6120
{
6121
#endif
6122
const struct pipe_rt_blend_state *rt =
6123
&cso_blend->cso.rt[cso_blend->cso.independent_blend_enable ? i : 0];
6124
6125
be.IndependentAlphaBlendEnable = set_blend_entry_bits(batch, &entry, cso_blend, i) ||
6126
be.IndependentAlphaBlendEnable;
6127
6128
if (GFX_VER >= 8 || can_emit_logic_op(ice)) {
6129
entry.LogicOpEnable = cso_blend->cso.logicop_enable;
6130
entry.LogicOpFunction = cso_blend->cso.logicop_func;
6131
}
6132
6133
entry.ColorClampRange = COLORCLAMP_RTFORMAT;
6134
entry.PreBlendColorClampEnable = true;
6135
entry.PostBlendColorClampEnable = true;
6136
6137
entry.WriteDisableRed = !(rt->colormask & PIPE_MASK_R);
6138
entry.WriteDisableGreen = !(rt->colormask & PIPE_MASK_G);
6139
entry.WriteDisableBlue = !(rt->colormask & PIPE_MASK_B);
6140
entry.WriteDisableAlpha = !(rt->colormask & PIPE_MASK_A);
6141
6142
#if GFX_VER >= 8
6143
GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
6144
#else
6145
GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[i * 2], &entry);
6146
#endif
6147
}
6148
}
6149
#if GFX_VER >= 8
6150
GENX(BLEND_STATE_pack)(NULL, blend_map, &be);
6151
#endif
6152
#if GFX_VER < 7
6153
crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
6154
ptr.PointertoBLEND_STATE = blend_offset;
6155
ptr.BLEND_STATEChange = true;
6156
}
6157
#else
6158
crocus_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {
6159
ptr.BlendStatePointer = blend_offset;
6160
#if GFX_VER >= 8
6161
ptr.BlendStatePointerValid = true;
6162
#endif
6163
}
6164
#endif
6165
}
6166
#endif
6167
6168
if (dirty & CROCUS_DIRTY_COLOR_CALC_STATE) {
6169
struct crocus_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
6170
UNUSED struct crocus_blend_state *cso_blend = ice->state.cso_blend;
6171
struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
6172
uint32_t cc_offset;
6173
void *cc_map =
6174
stream_state(batch,
6175
sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
6176
64, &cc_offset);
6177
#if GFX_VER <= 5
6178
dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
6179
#endif
6180
_crocus_pack_state(batch, GENX(COLOR_CALC_STATE), cc_map, cc) {
6181
cc.AlphaTestFormat = ALPHATEST_FLOAT32;
6182
cc.AlphaReferenceValueAsFLOAT32 = cso->cso.alpha_ref_value;
6183
6184
#if GFX_VER <= 5
6185
6186
set_depth_stencil_bits(ice, &cc);
6187
6188
if (cso_blend->cso.logicop_enable) {
6189
if (can_emit_logic_op(ice)) {
6190
cc.LogicOpEnable = cso_blend->cso.logicop_enable;
6191
cc.LogicOpFunction = cso_blend->cso.logicop_func;
6192
}
6193
}
6194
cc.ColorDitherEnable = cso_blend->cso.dither;
6195
6196
cc.IndependentAlphaBlendEnable = set_blend_entry_bits(batch, &cc, cso_blend, 0);
6197
6198
if (cso->cso.alpha_enabled && ice->state.framebuffer.nr_cbufs <= 1) {
6199
cc.AlphaTestEnable = cso->cso.alpha_enabled;
6200
cc.AlphaTestFunction = translate_compare_func(cso->cso.alpha_func);
6201
}
6202
cc.StatisticsEnable = ice->state.stats_wm ? 1 : 0;
6203
cc.CCViewportStatePointer = ro_bo(batch->state.bo, ice->state.cc_vp_address);
6204
#else
6205
cc.AlphaTestFormat = ALPHATEST_FLOAT32;
6206
cc.AlphaReferenceValueAsFLOAT32 = cso->cso.alpha_ref_value;
6207
6208
cc.BlendConstantColorRed = ice->state.blend_color.color[0];
6209
cc.BlendConstantColorGreen = ice->state.blend_color.color[1];
6210
cc.BlendConstantColorBlue = ice->state.blend_color.color[2];
6211
cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];
6212
#endif
6213
cc.StencilReferenceValue = p_stencil_refs->ref_value[0];
6214
cc.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];
6215
}
6216
ice->shaders.cc_offset = cc_offset;
6217
#if GFX_VER >= 6
6218
crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
6219
ptr.ColorCalcStatePointer = cc_offset;
6220
#if GFX_VER != 7
6221
ptr.ColorCalcStatePointerValid = true;
6222
#endif
6223
}
6224
#endif
6225
}
6226
#if GFX_VER <= 5
6227
if (dirty & CROCUS_DIRTY_GEN4_CONSTANT_COLOR) {
6228
crocus_emit_cmd(batch, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) {
6229
blend_cc.BlendConstantColorRed = ice->state.blend_color.color[0];
6230
blend_cc.BlendConstantColorGreen = ice->state.blend_color.color[1];
6231
blend_cc.BlendConstantColorBlue = ice->state.blend_color.color[2];
6232
blend_cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];
6233
}
6234
}
6235
#endif
6236
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
6237
if (!(stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage)))
6238
continue;
6239
6240
struct crocus_shader_state *shs = &ice->state.shaders[stage];
6241
struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
6242
6243
if (!shader)
6244
continue;
6245
6246
if (shs->sysvals_need_upload)
6247
upload_sysvals(ice, stage);
6248
6249
#if GFX_VER <= 5
6250
dirty |= CROCUS_DIRTY_GEN4_CURBE;
6251
#endif
6252
#if GFX_VER >= 7
6253
struct push_bos push_bos = {};
6254
setup_constant_buffers(ice, batch, stage, &push_bos);
6255
6256
emit_push_constant_packets(ice, batch, stage, &push_bos);
6257
#endif
6258
}
6259
6260
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
6261
if (stage_dirty & (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage)) {
6262
if (ice->shaders.prog[stage]) {
6263
#if GFX_VER <= 6
6264
dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS;
6265
#endif
6266
crocus_populate_binding_table(ice, batch, stage, false);
6267
ice->shaders.prog[stage]->bind_bo_offset =
6268
crocus_upload_binding_table(ice, batch,
6269
ice->shaders.prog[stage]->surf_offset,
6270
ice->shaders.prog[stage]->bt.size_bytes);
6271
6272
#if GFX_VER >= 7
6273
crocus_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) {
6274
ptr._3DCommandSubOpcode = 38 + stage;
6275
ptr.PointertoVSBindingTable = ice->shaders.prog[stage]->bind_bo_offset;
6276
}
6277
#endif
6278
#if GFX_VER == 6
6279
} else if (stage == MESA_SHADER_GEOMETRY && ice->shaders.ff_gs_prog) {
6280
dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS;
6281
crocus_populate_binding_table(ice, batch, stage, true);
6282
ice->shaders.ff_gs_prog->bind_bo_offset =
6283
crocus_upload_binding_table(ice, batch,
6284
ice->shaders.ff_gs_prog->surf_offset,
6285
ice->shaders.ff_gs_prog->bt.size_bytes);
6286
#endif
6287
}
6288
}
6289
}
6290
#if GFX_VER <= 6
6291
if (dirty & CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS) {
6292
struct crocus_compiled_shader *gs = ice->shaders.prog[MESA_SHADER_GEOMETRY];
6293
if (gs == NULL)
6294
gs = ice->shaders.ff_gs_prog;
6295
crocus_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), ptr) {
6296
ptr.PointertoVSBindingTable = ice->shaders.prog[MESA_SHADER_VERTEX]->bind_bo_offset;
6297
ptr.PointertoPSBindingTable = ice->shaders.prog[MESA_SHADER_FRAGMENT]->bind_bo_offset;
6298
#if GFX_VER == 6
6299
ptr.VSBindingTableChange = true;
6300
ptr.PSBindingTableChange = true;
6301
ptr.GSBindingTableChange = gs ? true : false;
6302
ptr.PointertoGSBindingTable = gs ? gs->bind_bo_offset : 0;
6303
#endif
6304
}
6305
}
6306
#endif
6307
6308
bool sampler_updates = dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS;
6309
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
6310
if (!(stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage)) ||
6311
!ice->shaders.prog[stage])
6312
continue;
6313
6314
crocus_upload_sampler_states(ice, batch, stage);
6315
6316
sampler_updates = true;
6317
6318
#if GFX_VER >= 7
6319
struct crocus_shader_state *shs = &ice->state.shaders[stage];
6320
6321
crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
6322
ptr._3DCommandSubOpcode = 43 + stage;
6323
ptr.PointertoVSSamplerState = shs->sampler_offset;
6324
}
6325
#endif
6326
}
6327
6328
if (sampler_updates) {
6329
#if GFX_VER == 6
6330
struct crocus_shader_state *shs_vs = &ice->state.shaders[MESA_SHADER_VERTEX];
6331
struct crocus_shader_state *shs_gs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
6332
struct crocus_shader_state *shs_fs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
6333
crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ptr) {
6334
if (ice->shaders.prog[MESA_SHADER_VERTEX] &&
6335
(dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS ||
6336
stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_VERTEX))) {
6337
ptr.VSSamplerStateChange = true;
6338
ptr.PointertoVSSamplerState = shs_vs->sampler_offset;
6339
}
6340
if (ice->shaders.prog[MESA_SHADER_GEOMETRY] &&
6341
(dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS ||
6342
stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_GEOMETRY))) {
6343
ptr.GSSamplerStateChange = true;
6344
ptr.PointertoGSSamplerState = shs_gs->sampler_offset;
6345
}
6346
if (ice->shaders.prog[MESA_SHADER_FRAGMENT] &&
6347
(dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS ||
6348
stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_FRAGMENT))) {
6349
ptr.PSSamplerStateChange = true;
6350
ptr.PointertoPSSamplerState = shs_fs->sampler_offset;
6351
}
6352
}
6353
#endif
6354
}
6355
6356
#if GFX_VER >= 6
6357
if (dirty & CROCUS_DIRTY_GEN6_MULTISAMPLE) {
6358
crocus_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
6359
ms.PixelLocation =
6360
ice->state.cso_rast->cso.half_pixel_center ? CENTER : UL_CORNER;
6361
if (ice->state.framebuffer.samples > 0)
6362
ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1;
6363
#if GFX_VER == 6
6364
INTEL_SAMPLE_POS_4X(ms.Sample);
6365
#elif GFX_VER == 7
6366
switch (ice->state.framebuffer.samples) {
6367
case 1:
6368
INTEL_SAMPLE_POS_1X(ms.Sample);
6369
break;
6370
case 2:
6371
INTEL_SAMPLE_POS_2X(ms.Sample);
6372
break;
6373
case 4:
6374
INTEL_SAMPLE_POS_4X(ms.Sample);
6375
break;
6376
case 8:
6377
INTEL_SAMPLE_POS_8X(ms.Sample);
6378
break;
6379
default:
6380
break;
6381
}
6382
#endif
6383
}
6384
}
6385
6386
if (dirty & CROCUS_DIRTY_GEN6_SAMPLE_MASK) {
6387
crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) {
6388
ms.SampleMask = determine_sample_mask(ice);
6389
}
6390
}
6391
#endif
6392
6393
#if GFX_VER >= 7
6394
struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
6395
if ((stage_dirty & CROCUS_STAGE_DIRTY_FS) && shader) {
6396
struct brw_stage_prog_data *prog_data = shader->prog_data;
6397
struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
6398
6399
crocus_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
6400
6401
/* Initialize the execution mask with VMask. Otherwise, derivatives are
6402
* incorrect for subspans where some of the pixels are unlit. We believe
6403
* the bit just didn't take effect in previous generations.
6404
*/
6405
ps.VectorMaskEnable = GFX_VER >= 8;
6406
6407
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
6408
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
6409
ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
6410
6411
ps.DispatchGRFStartRegisterForConstantSetupData0 =
6412
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
6413
ps.DispatchGRFStartRegisterForConstantSetupData1 =
6414
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
6415
ps.DispatchGRFStartRegisterForConstantSetupData2 =
6416
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
6417
6418
ps.KernelStartPointer0 = KSP(ice, shader) +
6419
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
6420
ps.KernelStartPointer1 = KSP(ice, shader) +
6421
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
6422
ps.KernelStartPointer2 = KSP(ice, shader) +
6423
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
6424
6425
#if GFX_VERx10 == 75
6426
ps.SampleMask = determine_sample_mask(ice);
6427
#endif
6428
// XXX: WABTPPrefetchDisable, see above, drop at C0
6429
ps.BindingTableEntryCount = shader->bt.size_bytes / 4;
6430
ps.FloatingPointMode = prog_data->use_alt_mode;
6431
#if GFX_VER >= 8
6432
ps.MaximumNumberofThreadsPerPSD = 64 - 2;
6433
#else
6434
ps.MaximumNumberofThreads = batch->screen->devinfo.max_wm_threads - 1;
6435
#endif
6436
6437
ps.PushConstantEnable = prog_data->ubo_ranges[0].length > 0;
6438
6439
#if GFX_VER < 8
6440
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
6441
ps.DualSourceBlendEnable = wm_prog_data->dual_src_blend && ice->state.cso_blend->dual_color_blending;
6442
ps.AttributeEnable = (wm_prog_data->num_varying_inputs != 0);
6443
#endif
6444
/* From the documentation for this packet:
6445
* "If the PS kernel does not need the Position XY Offsets to
6446
* compute a Position Value, then this field should be programmed
6447
* to POSOFFSET_NONE."
6448
*
6449
* "SW Recommendation: If the PS kernel needs the Position Offsets
6450
* to compute a Position XY value, this field should match Position
6451
* ZW Interpolation Mode to ensure a consistent position.xyzw
6452
* computation."
6453
*
6454
* We only require XY sample offsets. So, this recommendation doesn't
6455
* look useful at the moment. We might need this in future.
6456
*/
6457
ps.PositionXYOffsetSelect =
6458
wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
6459
6460
if (wm_prog_data->base.total_scratch) {
6461
struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch, MESA_SHADER_FRAGMENT);
6462
ps.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch) - 11;
6463
ps.ScratchSpaceBasePointer = rw_bo(bo, 0);
6464
}
6465
}
6466
#if GFX_VER == 8
6467
const struct shader_info *fs_info =
6468
crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
6469
crocus_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
6470
psx.PixelShaderValid = true;
6471
psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
6472
psx.PixelShaderKillsPixel = wm_prog_data->uses_kill;
6473
psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
6474
psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
6475
psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
6476
psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
6477
6478
/* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
6479
if (wm_prog_data->uses_sample_mask)
6480
psx.PixelShaderUsesInputCoverageMask = true;
6481
6482
psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
6483
6484
/* The stricter cross-primitive coherency guarantees that the hardware
6485
* gives us with the "Accesses UAV" bit set for at least one shader stage
6486
* and the "UAV coherency required" bit set on the 3DPRIMITIVE command
6487
* are redundant within the current image, atomic counter and SSBO GL
6488
* APIs, which all have very loose ordering and coherency requirements
6489
* and generally rely on the application to insert explicit barriers when
6490
* a shader invocation is expected to see the memory writes performed by
6491
* the invocations of some previous primitive. Regardless of the value
6492
* of "UAV coherency required", the "Accesses UAV" bits will implicitly
6493
* cause an in most cases useless DC flush when the lowermost stage with
6494
* the bit set finishes execution.
6495
*
6496
* It would be nice to disable it, but in some cases we can't because on
6497
* Gfx8+ it also has an influence on rasterization via the PS UAV-only
6498
* signal (which could be set independently from the coherency mechanism
6499
* in the 3DSTATE_WM command on Gfx7), and because in some cases it will
6500
* determine whether the hardware skips execution of the fragment shader
6501
* or not via the ThreadDispatchEnable signal. However if we know that
6502
* GFX8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
6503
* GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
6504
* difference so we may just disable it here.
6505
*
6506
* Gfx8 hardware tries to compute ThreadDispatchEnable for us but doesn't
6507
* take into account KillPixels when no depth or stencil writes are
6508
* enabled. In order for occlusion queries to work correctly with no
6509
* attachments, we need to force-enable here.
6510
*
6511
*/
6512
if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
6513
!(has_writeable_rt(ice->state.cso_blend, fs_info)))
6514
psx.PixelShaderHasUAV = true;
6515
}
6516
#endif
6517
}
6518
#endif
6519
6520
#if GFX_VER >= 7
6521
if (ice->state.streamout_active) {
6522
if (dirty & CROCUS_DIRTY_GEN7_SO_BUFFERS) {
6523
for (int i = 0; i < 4; i++) {
6524
struct crocus_stream_output_target *tgt =
6525
(void *) ice->state.so_target[i];
6526
6527
if (!tgt) {
6528
crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) {
6529
sob.SOBufferIndex = i;
6530
}
6531
continue;
6532
}
6533
struct crocus_resource *res = (void *) tgt->base.buffer;
6534
uint32_t start = tgt->base.buffer_offset;
6535
#if GFX_VER < 8
6536
uint32_t end = ALIGN(start + tgt->base.buffer_size, 4);
6537
#endif
6538
crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) {
6539
sob.SOBufferIndex = i;
6540
6541
sob.SurfaceBaseAddress = rw_bo(res->bo, start);
6542
#if GFX_VER < 8
6543
sob.SurfacePitch = tgt->stride;
6544
sob.SurfaceEndAddress = rw_bo(res->bo, end);
6545
#else
6546
sob.SOBufferEnable = true;
6547
sob.StreamOffsetWriteEnable = true;
6548
sob.StreamOutputBufferOffsetAddressEnable = true;
6549
sob.MOCS = crocus_mocs(res->bo, &batch->screen->isl_dev);
6550
6551
sob.SurfaceSize = MAX2(tgt->base.buffer_size / 4, 1) - 1;
6552
sob.StreamOutputBufferOffsetAddress =
6553
rw_bo(crocus_resource_bo(&tgt->offset_res->base.b), tgt->offset_offset);
6554
if (tgt->zero_offset) {
6555
sob.StreamOffset = 0;
6556
tgt->zero_offset = false;
6557
} else
6558
sob.StreamOffset = 0xFFFFFFFF; /* not offset, see above */
6559
#endif
6560
}
6561
}
6562
}
6563
6564
if ((dirty & CROCUS_DIRTY_SO_DECL_LIST) && ice->state.streamout) {
6565
uint32_t *decl_list =
6566
ice->state.streamout + GENX(3DSTATE_STREAMOUT_length);
6567
crocus_batch_emit(batch, decl_list, 4 * ((decl_list[0] & 0xff) + 2));
6568
}
6569
6570
if (dirty & CROCUS_DIRTY_STREAMOUT) {
6571
const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
6572
6573
uint32_t dynamic_sol[GENX(3DSTATE_STREAMOUT_length)];
6574
crocus_pack_command(GENX(3DSTATE_STREAMOUT), dynamic_sol, sol) {
6575
sol.SOFunctionEnable = true;
6576
sol.SOStatisticsEnable = true;
6577
6578
sol.RenderingDisable = cso_rast->cso.rasterizer_discard &&
6579
!ice->state.prims_generated_query_active;
6580
sol.ReorderMode = cso_rast->cso.flatshade_first ? LEADING : TRAILING;
6581
}
6582
6583
assert(ice->state.streamout);
6584
6585
crocus_emit_merge(batch, ice->state.streamout, dynamic_sol,
6586
GENX(3DSTATE_STREAMOUT_length));
6587
}
6588
} else {
6589
if (dirty & CROCUS_DIRTY_STREAMOUT) {
6590
crocus_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), sol);
6591
}
6592
}
6593
#endif
6594
#if GFX_VER == 6
6595
if (ice->state.streamout_active) {
6596
if (dirty & CROCUS_DIRTY_GEN6_SVBI) {
6597
crocus_emit_so_svbi(ice);
6598
}
6599
}
6600
#endif
6601
6602
if (dirty & CROCUS_DIRTY_CLIP) {
6603
#if GFX_VER < 6
6604
const struct brw_clip_prog_data *clip_prog_data = (struct brw_clip_prog_data *)ice->shaders.clip_prog->prog_data;
6605
struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso;
6606
6607
uint32_t *clip_ptr = stream_state(batch, GENX(CLIP_STATE_length) * 4, 32, &ice->shaders.clip_offset);
6608
dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
6609
_crocus_pack_state(batch, GENX(CLIP_STATE), clip_ptr, clip) {
6610
clip.KernelStartPointer = KSP(ice, ice->shaders.clip_prog);
6611
clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
6612
clip.SingleProgramFlow = true;
6613
clip.GRFRegisterCount = DIV_ROUND_UP(clip_prog_data->total_grf, 16) - 1;
6614
6615
clip.VertexURBEntryReadLength = clip_prog_data->urb_read_length;
6616
clip.ConstantURBEntryReadLength = clip_prog_data->curb_read_length;
6617
6618
clip.DispatchGRFStartRegisterForURBData = 1;
6619
clip.VertexURBEntryReadOffset = 0;
6620
clip.ConstantURBEntryReadOffset = ice->curbe.clip_start * 2;
6621
6622
clip.NumberofURBEntries = batch->ice->urb.nr_clip_entries;
6623
clip.URBEntryAllocationSize = batch->ice->urb.vsize - 1;
6624
6625
if (batch->ice->urb.nr_clip_entries >= 10) {
6626
/* Half of the URB entries go to each thread, and it has to be an
6627
* even number.
6628
*/
6629
assert(batch->ice->urb.nr_clip_entries % 2 == 0);
6630
6631
/* Although up to 16 concurrent Clip threads are allowed on Ironlake,
6632
* only 2 threads can output VUEs at a time.
6633
*/
6634
clip.MaximumNumberofThreads = (GFX_VER == 5 ? 16 : 2) - 1;
6635
} else {
6636
assert(batch->ice->urb.nr_clip_entries >= 5);
6637
clip.MaximumNumberofThreads = 1 - 1;
6638
}
6639
clip.VertexPositionSpace = VPOS_NDCSPACE;
6640
clip.UserClipFlagsMustClipEnable = true;
6641
clip.GuardbandClipTestEnable = true;
6642
6643
clip.ClipperViewportStatePointer = ro_bo(batch->state.bo, ice->state.clip_vp_address);
6644
clip.ScreenSpaceViewportXMin = -1.0;
6645
clip.ScreenSpaceViewportXMax = 1.0;
6646
clip.ScreenSpaceViewportYMin = -1.0;
6647
clip.ScreenSpaceViewportYMax = 1.0;
6648
clip.ViewportXYClipTestEnable = true;
6649
clip.ViewportZClipTestEnable = (cso_state->depth_clip_near || cso_state->depth_clip_far);
6650
6651
#if GFX_VER == 5 || GFX_VERx10 == 45
6652
clip.UserClipDistanceClipTestEnableBitmask = cso_state->clip_plane_enable;
6653
#else
6654
/* Up to 6 actual clip flags, plus the 7th for the negative RHW
6655
* workaround.
6656
*/
6657
clip.UserClipDistanceClipTestEnableBitmask = (cso_state->clip_plane_enable & 0x3f) | 0x40;
6658
#endif
6659
6660
clip.APIMode = cso_state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;
6661
clip.GuardbandClipTestEnable = true;
6662
6663
clip.ClipMode = clip_prog_data->clip_mode;
6664
#if GFX_VERx10 == 45
6665
clip.NegativeWClipTestEnable = true;
6666
#endif
6667
}
6668
6669
#else //if GFX_VER >= 6
6670
struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
6671
const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data );
6672
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
6673
bool gs_or_tes = ice->shaders.prog[MESA_SHADER_GEOMETRY] ||
6674
ice->shaders.prog[MESA_SHADER_TESS_EVAL];
6675
bool points_or_lines = cso_rast->fill_mode_point_or_line ||
6676
(gs_or_tes ? ice->shaders.output_topology_is_points_or_lines
6677
: ice->state.prim_is_points_or_lines);
6678
uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)];
6679
crocus_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) {
6680
cl.StatisticsEnable = ice->state.statistics_counters_enabled;
6681
if (cso_rast->cso.rasterizer_discard)
6682
cl.ClipMode = CLIPMODE_REJECT_ALL;
6683
else if (ice->state.window_space_position)
6684
cl.ClipMode = CLIPMODE_ACCEPT_ALL;
6685
else
6686
cl.ClipMode = CLIPMODE_NORMAL;
6687
6688
cl.PerspectiveDivideDisable = ice->state.window_space_position;
6689
cl.ViewportXYClipTestEnable = !points_or_lines;
6690
6691
cl.UserClipDistanceCullTestEnableBitmask =
6692
brw_vue_prog_data(ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data)->cull_distance_mask;
6693
6694
if (wm_prog_data->barycentric_interp_modes &
6695
BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
6696
cl.NonPerspectiveBarycentricEnable = true;
6697
6698
cl.ForceZeroRTAIndexEnable = cso_fb->layers <= 1;
6699
cl.MaximumVPIndex = ice->state.num_viewports - 1;
6700
}
6701
crocus_emit_merge(batch, cso_rast->clip, dynamic_clip,
6702
ARRAY_SIZE(cso_rast->clip));
6703
#endif
6704
}
6705
6706
if (stage_dirty & CROCUS_STAGE_DIRTY_VS) {
6707
struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_VERTEX];
6708
const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);
6709
const struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
6710
#if GFX_VER == 7
6711
if (batch->screen->devinfo.is_ivybridge)
6712
gen7_emit_vs_workaround_flush(batch);
6713
#endif
6714
6715
6716
#if GFX_VER == 6
6717
struct push_bos push_bos = {};
6718
setup_constant_buffers(ice, batch, MESA_SHADER_VERTEX, &push_bos);
6719
6720
emit_push_constant_packets(ice, batch, MESA_SHADER_VERTEX, &push_bos);
6721
#endif
6722
#if GFX_VER >= 6
6723
crocus_emit_cmd(batch, GENX(3DSTATE_VS), vs)
6724
#else
6725
uint32_t *vs_ptr = stream_state(batch,
6726
GENX(VS_STATE_length) * 4, 32, &ice->shaders.vs_offset);
6727
dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
6728
_crocus_pack_state(batch, GENX(VS_STATE), vs_ptr, vs)
6729
#endif
6730
{
6731
INIT_THREAD_DISPATCH_FIELDS(vs, Vertex, MESA_SHADER_VERTEX);
6732
6733
vs.MaximumNumberofThreads = batch->screen->devinfo.max_vs_threads - 1;
6734
6735
#if GFX_VER < 6
6736
vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
6737
vs.ConstantURBEntryReadLength = vue_prog_data->base.curb_read_length;
6738
vs.ConstantURBEntryReadOffset = ice->curbe.vs_start * 2;
6739
6740
vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries >> (GFX_VER == 5 ? 2 : 0);
6741
vs.URBEntryAllocationSize = batch->ice->urb.vsize - 1;
6742
6743
vs.MaximumNumberofThreads =
6744
CLAMP(batch->ice->urb.nr_vs_entries / 2, 1, batch->screen->devinfo.max_vs_threads) - 1;
6745
vs.StatisticsEnable = false;
6746
vs.SamplerStatePointer = ro_bo(batch->state.bo, ice->state.shaders[MESA_SHADER_VERTEX].sampler_offset);
6747
#endif
6748
#if GFX_VER == 5
6749
/* Force single program flow on Ironlake. We cannot reliably get
6750
* all applications working without it. See:
6751
* https://bugs.freedesktop.org/show_bug.cgi?id=29172
6752
*
6753
* The most notable and reliably failing application is the Humus
6754
* demo "CelShading"
6755
*/
6756
vs.SingleProgramFlow = true;
6757
vs.SamplerCount = 0; /* hardware requirement */
6758
6759
#endif
6760
#if GFX_VER >= 8
6761
vs.SIMD8DispatchEnable =
6762
vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
6763
6764
vs.UserClipDistanceCullTestEnableBitmask =
6765
vue_prog_data->cull_distance_mask;
6766
#endif
6767
}
6768
6769
#if GFX_VER == 6
6770
crocus_emit_pipe_control_flush(batch,
6771
"post VS const",
6772
PIPE_CONTROL_DEPTH_STALL |
6773
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
6774
PIPE_CONTROL_STATE_CACHE_INVALIDATE);
6775
#endif
6776
}
6777
6778
if (stage_dirty & CROCUS_STAGE_DIRTY_GS) {
6779
struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_GEOMETRY];
6780
bool active = GFX_VER >= 6 && shader;
6781
#if GFX_VER == 6
6782
struct push_bos push_bos = {};
6783
if (shader)
6784
setup_constant_buffers(ice, batch, MESA_SHADER_GEOMETRY, &push_bos);
6785
6786
emit_push_constant_packets(ice, batch, MESA_SHADER_GEOMETRY, &push_bos);
6787
#endif
6788
#if GFX_VER >= 6
6789
crocus_emit_cmd(batch, GENX(3DSTATE_GS), gs)
6790
#else
6791
uint32_t *gs_ptr = stream_state(batch,
6792
GENX(GS_STATE_length) * 4, 32, &ice->shaders.gs_offset);
6793
dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
6794
_crocus_pack_state(batch, GENX(GS_STATE), gs_ptr, gs)
6795
#endif
6796
{
6797
#if GFX_VER >= 6
6798
if (active) {
6799
const struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(shader->prog_data);
6800
const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);
6801
const struct brw_stage_prog_data *prog_data = &gs_prog_data->base.base;
6802
6803
INIT_THREAD_DISPATCH_FIELDS(gs, Vertex, MESA_SHADER_GEOMETRY);
6804
#if GFX_VER >= 7
6805
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
6806
gs.OutputTopology = gs_prog_data->output_topology;
6807
gs.ControlDataHeaderSize =
6808
gs_prog_data->control_data_header_size_hwords;
6809
6810
gs.InstanceControl = gs_prog_data->invocations - 1;
6811
gs.DispatchMode = vue_prog_data->dispatch_mode;
6812
6813
gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
6814
6815
gs.ControlDataFormat = gs_prog_data->control_data_format;
6816
#endif
6817
6818
/* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between
6819
* Ivy Bridge and Haswell.
6820
*
6821
* On Ivy Bridge, setting this bit causes the vertices of a triangle
6822
* strip to be delivered to the geometry shader in an order that does
6823
* not strictly follow the OpenGL spec, but preserves triangle
6824
* orientation. For example, if the vertices are (1, 2, 3, 4, 5), then
6825
* the geometry shader sees triangles:
6826
*
6827
* (1, 2, 3), (2, 4, 3), (3, 4, 5)
6828
*
6829
* (Clearing the bit is even worse, because it fails to preserve
6830
* orientation).
6831
*
6832
* Triangle strips with adjacency always ordered in a way that preserves
6833
* triangle orientation but does not strictly follow the OpenGL spec,
6834
* regardless of the setting of this bit.
6835
*
6836
* On Haswell, both triangle strips and triangle strips with adjacency
6837
* are always ordered in a way that preserves triangle orientation.
6838
* Setting this bit causes the ordering to strictly follow the OpenGL
6839
* spec.
6840
*
6841
* So in either case we want to set the bit. Unfortunately on Ivy
6842
* Bridge this will get the order close to correct but not perfect.
6843
*/
6844
gs.ReorderMode = TRAILING;
6845
gs.MaximumNumberofThreads =
6846
GFX_VER == 8 ? (batch->screen->devinfo.max_gs_threads / 2 - 1) :
6847
(batch->screen->devinfo.max_gs_threads - 1);
6848
#if GFX_VER < 7
6849
gs.SOStatisticsEnable = true;
6850
if (gs_prog_data->num_transform_feedback_bindings)
6851
gs.SVBIPayloadEnable = ice->state.streamout_active;
6852
6853
/* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
6854
* was previously done for gen6.
6855
*
6856
* TODO: test with both disabled to see if the HW is behaving
6857
* as expected, like in gen7.
6858
*/
6859
gs.SingleProgramFlow = true;
6860
gs.VectorMaskEnable = true;
6861
#endif
6862
#if GFX_VER >= 8
6863
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
6864
6865
if (gs_prog_data->static_vertex_count != -1) {
6866
gs.StaticOutput = true;
6867
gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
6868
}
6869
gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
6870
6871
gs.UserClipDistanceCullTestEnableBitmask =
6872
vue_prog_data->cull_distance_mask;
6873
6874
const int urb_entry_write_offset = 1;
6875
const uint32_t urb_entry_output_length =
6876
DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
6877
urb_entry_write_offset;
6878
6879
gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
6880
gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
6881
#endif
6882
}
6883
#endif
6884
#if GFX_VER <= 6
6885
if (!active && ice->shaders.ff_gs_prog) {
6886
const struct brw_ff_gs_prog_data *gs_prog_data = (struct brw_ff_gs_prog_data *)ice->shaders.ff_gs_prog->prog_data;
6887
/* In gen6, transform feedback for the VS stage is done with an
6888
* ad-hoc GS program. This function provides the needed 3DSTATE_GS
6889
* for this.
6890
*/
6891
gs.KernelStartPointer = KSP(ice, ice->shaders.ff_gs_prog);
6892
gs.SingleProgramFlow = true;
6893
gs.DispatchGRFStartRegisterForURBData = GFX_VER == 6 ? 2 : 1;
6894
gs.VertexURBEntryReadLength = gs_prog_data->urb_read_length;
6895
6896
#if GFX_VER <= 5
6897
gs.GRFRegisterCount =
6898
DIV_ROUND_UP(gs_prog_data->total_grf, 16) - 1;
6899
/* BRW_NEW_URB_FENCE */
6900
gs.NumberofURBEntries = batch->ice->urb.nr_gs_entries;
6901
gs.URBEntryAllocationSize = batch->ice->urb.vsize - 1;
6902
gs.MaximumNumberofThreads = batch->ice->urb.nr_gs_entries >= 8 ? 1 : 0;
6903
gs.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
6904
#else
6905
gs.Enable = true;
6906
gs.VectorMaskEnable = true;
6907
gs.SVBIPayloadEnable = true;
6908
gs.SVBIPostIncrementEnable = true;
6909
gs.SVBIPostIncrementValue = gs_prog_data->svbi_postincrement_value;
6910
gs.SOStatisticsEnable = true;
6911
gs.MaximumNumberofThreads = batch->screen->devinfo.max_gs_threads - 1;
6912
#endif
6913
}
6914
#endif
6915
if (!active && !ice->shaders.ff_gs_prog) {
6916
#if GFX_VER < 8
6917
gs.DispatchGRFStartRegisterForURBData = 1;
6918
#if GFX_VER >= 7
6919
gs.IncludeVertexHandles = true;
6920
#endif
6921
#endif
6922
}
6923
#if GFX_VER >= 6
6924
gs.StatisticsEnable = true;
6925
#endif
6926
#if GFX_VER == 5 || GFX_VER == 6
6927
gs.RenderingEnabled = true;
6928
#endif
6929
#if GFX_VER <= 5
6930
gs.MaximumVPIndex = ice->state.num_viewports - 1;
6931
#endif
6932
}
6933
}
6934
6935
#if GFX_VER >= 7
6936
if (stage_dirty & CROCUS_STAGE_DIRTY_TCS) {
6937
struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_TESS_CTRL];
6938
6939
if (shader) {
6940
const struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(shader->prog_data);
6941
const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);
6942
const struct brw_stage_prog_data *prog_data = &tcs_prog_data->base.base;
6943
6944
crocus_emit_cmd(batch, GENX(3DSTATE_HS), hs) {
6945
INIT_THREAD_DISPATCH_FIELDS(hs, Vertex, MESA_SHADER_TESS_CTRL);
6946
hs.InstanceCount = tcs_prog_data->instances - 1;
6947
hs.IncludeVertexHandles = true;
6948
hs.MaximumNumberofThreads = batch->screen->devinfo.max_tcs_threads - 1;
6949
}
6950
} else {
6951
crocus_emit_cmd(batch, GENX(3DSTATE_HS), hs);
6952
}
6953
6954
}
6955
6956
if (stage_dirty & CROCUS_STAGE_DIRTY_TES) {
6957
struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_TESS_EVAL];
6958
if (shader) {
6959
const struct brw_tes_prog_data *tes_prog_data = brw_tes_prog_data(shader->prog_data);
6960
const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);
6961
const struct brw_stage_prog_data *prog_data = &tes_prog_data->base.base;
6962
6963
crocus_emit_cmd(batch, GENX(3DSTATE_TE), te) {
6964
te.Partitioning = tes_prog_data->partitioning;
6965
te.OutputTopology = tes_prog_data->output_topology;
6966
te.TEDomain = tes_prog_data->domain;
6967
te.TEEnable = true;
6968
te.MaximumTessellationFactorOdd = 63.0;
6969
te.MaximumTessellationFactorNotOdd = 64.0;
6970
};
6971
crocus_emit_cmd(batch, GENX(3DSTATE_DS), ds) {
6972
INIT_THREAD_DISPATCH_FIELDS(ds, Patch, MESA_SHADER_TESS_EVAL);
6973
6974
ds.MaximumNumberofThreads = batch->screen->devinfo.max_tes_threads - 1;
6975
ds.ComputeWCoordinateEnable =
6976
tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
6977
6978
#if GFX_VER >= 8
6979
if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)
6980
ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
6981
ds.UserClipDistanceCullTestEnableBitmask =
6982
vue_prog_data->cull_distance_mask;
6983
#endif
6984
};
6985
} else {
6986
crocus_emit_cmd(batch, GENX(3DSTATE_TE), te);
6987
crocus_emit_cmd(batch, GENX(3DSTATE_DS), ds);
6988
}
6989
}
6990
#endif
6991
if (dirty & CROCUS_DIRTY_RASTER) {
6992
6993
#if GFX_VER < 6
6994
const struct brw_sf_prog_data *sf_prog_data = (struct brw_sf_prog_data *)ice->shaders.sf_prog->prog_data;
6995
struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso;
6996
uint32_t *sf_ptr = stream_state(batch,
6997
GENX(SF_STATE_length) * 4, 32, &ice->shaders.sf_offset);
6998
dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
6999
_crocus_pack_state(batch, GENX(SF_STATE), sf_ptr, sf) {
7000
sf.KernelStartPointer = KSP(ice, ice->shaders.sf_prog);
7001
sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
7002
sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;
7003
sf.DispatchGRFStartRegisterForURBData = 3;
7004
sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
7005
sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length;
7006
sf.URBEntryAllocationSize = batch->ice->urb.sfsize - 1;
7007
sf.NumberofURBEntries = batch->ice->urb.nr_sf_entries;
7008
sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
7009
7010
sf.SetupViewportStateOffset = ro_bo(batch->state.bo, ice->state.sf_vp_address);
7011
7012
sf.MaximumNumberofThreads =
7013
MIN2(GFX_VER == 5 ? 48 : 24, batch->ice->urb.nr_sf_entries) - 1;
7014
7015
sf.SpritePointEnable = cso_state->point_quad_rasterization;
7016
sf.DestinationOriginHorizontalBias = 0.5;
7017
sf.DestinationOriginVerticalBias = 0.5;
7018
7019
sf.LastPixelEnable = cso_state->line_last_pixel;
7020
sf.LineWidth = get_line_width(cso_state);
7021
sf.PointWidth = cso_state->point_size;
7022
sf.PointWidthSource = cso_state->point_size_per_vertex ? Vertex : State;
7023
#if GFX_VERx10 == 45 || GFX_VER >= 5
7024
sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
7025
#endif
7026
sf.ViewportTransformEnable = true;
7027
sf.FrontWinding = cso_state->front_ccw ? 1 : 0;
7028
sf.ScissorRectangleEnable = true;
7029
sf.CullMode = translate_cull_mode(cso_state->cull_face);
7030
7031
if (cso_state->flatshade_first) {
7032
sf.TriangleFanProvokingVertexSelect = 1;
7033
} else {
7034
sf.TriangleStripListProvokingVertexSelect = 2;
7035
sf.TriangleFanProvokingVertexSelect = 2;
7036
sf.LineStripListProvokingVertexSelect = 1;
7037
}
7038
}
7039
#else
7040
struct crocus_rasterizer_state *cso = ice->state.cso_rast;
7041
uint32_t dynamic_sf[GENX(3DSTATE_SF_length)];
7042
crocus_pack_command(GENX(3DSTATE_SF), &dynamic_sf, sf) {
7043
sf.ViewportTransformEnable = !ice->state.window_space_position;
7044
7045
#if GFX_VER == 6
7046
const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);
7047
uint32_t urb_entry_read_length;
7048
uint32_t urb_entry_read_offset;
7049
uint32_t point_sprite_enables;
7050
calculate_attr_overrides(ice, sf.Attribute, &point_sprite_enables,
7051
&urb_entry_read_length,
7052
&urb_entry_read_offset);
7053
sf.VertexURBEntryReadLength = urb_entry_read_length;
7054
sf.VertexURBEntryReadOffset = urb_entry_read_offset;
7055
sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;
7056
sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
7057
sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
7058
#endif
7059
7060
#if GFX_VER >= 6 && GFX_VER < 8
7061
if (ice->state.framebuffer.samples > 1 && ice->state.cso_rast->cso.multisample)
7062
sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
7063
#endif
7064
#if GFX_VER == 7
7065
if (ice->state.framebuffer.zsbuf) {
7066
struct crocus_resource *zres, *sres;
7067
crocus_get_depth_stencil_resources(&batch->screen->devinfo,
7068
ice->state.framebuffer.zsbuf->texture,
7069
&zres, &sres);
7070
/* ANV thinks that the stencil-ness doesn't matter, this is just
7071
* about handling polygon offset scaling.
7072
*/
7073
sf.DepthBufferSurfaceFormat = zres ? isl_format_get_depth_format(zres->surf.format, false) : D16_UNORM;
7074
}
7075
#endif
7076
}
7077
crocus_emit_merge(batch, cso->sf, dynamic_sf,
7078
ARRAY_SIZE(dynamic_sf));
7079
#if GFX_VER == 8
7080
crocus_batch_emit(batch, cso->raster, sizeof(cso->raster));
7081
#endif
7082
#endif
7083
}
7084
7085
if (dirty & CROCUS_DIRTY_WM) {
7086
struct crocus_rasterizer_state *cso = ice->state.cso_rast;
7087
const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);
7088
UNUSED bool writes_depth = wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
7089
UNUSED const struct shader_info *fs_info =
7090
crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
7091
7092
#if GFX_VER == 6
7093
struct push_bos push_bos = {};
7094
setup_constant_buffers(ice, batch, MESA_SHADER_FRAGMENT, &push_bos);
7095
7096
emit_push_constant_packets(ice, batch, MESA_SHADER_FRAGMENT, &push_bos);
7097
#endif
7098
#if GFX_VER >= 6
7099
crocus_emit_cmd(batch, GENX(3DSTATE_WM), wm)
7100
#else
7101
uint32_t *wm_ptr = stream_state(batch,
7102
GENX(WM_STATE_length) * 4, 32, &ice->shaders.wm_offset);
7103
7104
dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
7105
7106
_crocus_pack_state(batch, GENX(WM_STATE), wm_ptr, wm)
7107
#endif
7108
{
7109
#if GFX_VER <= 6
7110
wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
7111
wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
7112
wm._32PixelDispatchEnable = wm_prog_data->dispatch_32;
7113
#endif
7114
#if GFX_VER == 4
7115
/* On gen4, we only have one shader kernel */
7116
if (brw_wm_state_has_ksp(wm, 0)) {
7117
wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]);
7118
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
7119
wm.DispatchGRFStartRegisterForConstantSetupData0 =
7120
wm_prog_data->base.dispatch_grf_start_reg;
7121
}
7122
#elif GFX_VER == 5
7123
wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7124
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
7125
wm.KernelStartPointer1 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7126
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
7127
wm.KernelStartPointer2 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7128
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
7129
7130
wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
7131
wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1);
7132
wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2);
7133
7134
wm.DispatchGRFStartRegisterForConstantSetupData0 =
7135
wm_prog_data->base.dispatch_grf_start_reg;
7136
#elif GFX_VER == 6
7137
wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7138
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
7139
wm.KernelStartPointer1 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7140
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
7141
wm.KernelStartPointer2 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7142
brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
7143
7144
wm.DispatchGRFStartRegisterForConstantSetupData0 =
7145
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
7146
wm.DispatchGRFStartRegisterForConstantSetupData1 =
7147
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1);
7148
wm.DispatchGRFStartRegisterForConstantSetupData2 =
7149
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2);
7150
#endif
7151
#if GFX_VER <= 5
7152
wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
7153
wm.ConstantURBEntryReadOffset = ice->curbe.wm_start * 2;
7154
wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
7155
wm.SetupURBEntryReadOffset = 0;
7156
wm.EarlyDepthTestEnable = true;
7157
wm.LineAntialiasingRegionWidth = _05pixels;
7158
wm.LineEndCapAntialiasingRegionWidth = _10pixels;
7159
wm.DepthCoefficientURBReadOffset = 1;
7160
7161
if (cso->cso.offset_tri) {
7162
wm.GlobalDepthOffsetEnable = true;
7163
7164
/* Something weird going on with legacy_global_depth_bias,
7165
* offset_constant, scaling and MRD. This value passes glean
7166
* but gives some odd results elsewere (eg. the
7167
* quad-offset-units test).
7168
*/
7169
wm.GlobalDepthOffsetConstant = cso->cso.offset_units * 2;
7170
wm.GlobalDepthOffsetScale = cso->cso.offset_scale;
7171
}
7172
wm.SamplerStatePointer = ro_bo(batch->state.bo,
7173
ice->state.shaders[MESA_SHADER_FRAGMENT].sampler_offset);
7174
#endif
7175
7176
wm.StatisticsEnable = (GFX_VER >= 6 || ice->state.stats_wm) ?
7177
ice->state.statistics_counters_enabled : 0;
7178
7179
#if GFX_VER >= 6
7180
wm.LineAntialiasingRegionWidth = _10pixels;
7181
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
7182
7183
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
7184
wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
7185
#endif
7186
#if GFX_VER == 6
7187
wm.DualSourceBlendEnable = wm_prog_data->dual_src_blend &&
7188
ice->state.cso_blend->dual_color_blending;
7189
wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
7190
wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
7191
7192
/* From the SNB PRM, volume 2 part 1, page 281:
7193
* "If the PS kernel does not need the Position XY Offsets
7194
* to compute a Position XY value, then this field should be
7195
* programmed to POSOFFSET_NONE."
7196
*
7197
* "SW Recommendation: If the PS kernel needs the Position Offsets
7198
* to compute a Position XY value, this field should match Position
7199
* ZW Interpolation Mode to ensure a consistent position.xyzw
7200
* computation."
7201
* We only require XY sample offsets. So, this recommendation doesn't
7202
* look useful at the moment. We might need this in future.
7203
*/
7204
if (wm_prog_data->uses_pos_offset)
7205
wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
7206
else
7207
wm.PositionXYOffsetSelect = POSOFFSET_NONE;
7208
#endif
7209
wm.LineStippleEnable = cso->cso.line_stipple_enable;
7210
wm.PolygonStippleEnable = cso->cso.poly_stipple_enable;
7211
7212
#if GFX_VER < 7
7213
if (wm_prog_data->base.use_alt_mode)
7214
wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
7215
wm.BindingTableEntryCount = ice->shaders.prog[MESA_SHADER_FRAGMENT]->bt.size_bytes / 4;
7216
wm.MaximumNumberofThreads = batch->screen->devinfo.max_wm_threads - 1;
7217
#endif
7218
7219
#if GFX_VER < 8
7220
#if GFX_VER >= 6
7221
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
7222
7223
struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
7224
if (fb->samples > 1) {
7225
if (cso->cso.multisample)
7226
wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
7227
else
7228
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
7229
7230
if (wm_prog_data->persample_dispatch)
7231
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
7232
else
7233
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
7234
} else {
7235
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
7236
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
7237
}
7238
#endif
7239
7240
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
7241
7242
if (wm_prog_data->uses_kill ||
7243
ice->state.cso_zsa->cso.alpha_enabled ||
7244
ice->state.cso_blend->cso.alpha_to_coverage ||
7245
(GFX_VER >= 6 && wm_prog_data->uses_omask))
7246
wm.PixelShaderKillsPixel = true;
7247
7248
if (has_writeable_rt(ice->state.cso_blend, fs_info) ||
7249
writes_depth || wm.PixelShaderKillsPixel ||
7250
(GFX_VER >= 6 && wm_prog_data->has_side_effects))
7251
wm.ThreadDispatchEnable = true;
7252
7253
#if GFX_VER >= 7
7254
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
7255
wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
7256
#else
7257
if (wm_prog_data->base.total_scratch) {
7258
struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch,
7259
MESA_SHADER_FRAGMENT);
7260
wm.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch) - 11;
7261
wm.ScratchSpaceBasePointer = rw_bo(bo, 0);
7262
}
7263
7264
wm.PixelShaderComputedDepth = writes_depth;
7265
7266
#endif
7267
/* The "UAV access enable" bits are unnecessary on HSW because they only
7268
* seem to have an effect on the HW-assisted coherency mechanism which we
7269
* don't need, and the rasterization-related UAV_ONLY flag and the
7270
* DISPATCH_ENABLE bit can be set independently from it.
7271
* C.f. gen8_upload_ps_extra().
7272
*
7273
* BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS |
7274
* _NEW_COLOR
7275
*/
7276
#if GFX_VERx10 == 75
7277
if (!(has_writeable_rt(ice->state.cso_blend, fs_info) || writes_depth) &&
7278
wm_prog_data->has_side_effects)
7279
wm.PSUAVonly = ON;
7280
#endif
7281
#endif
7282
#if GFX_VER >= 7
7283
/* BRW_NEW_FS_PROG_DATA */
7284
if (wm_prog_data->early_fragment_tests)
7285
wm.EarlyDepthStencilControl = EDSC_PREPS;
7286
else if (wm_prog_data->has_side_effects)
7287
wm.EarlyDepthStencilControl = EDSC_PSEXEC;
7288
#endif
7289
#if GFX_VER == 8
7290
/* We could skip this bit if color writes are enabled. */
7291
if (wm_prog_data->has_side_effects || wm_prog_data->uses_kill)
7292
wm.ForceThreadDispatchEnable = ForceON;
7293
#endif
7294
};
7295
7296
#if GFX_VER <= 5
7297
if (ice->state.global_depth_offset_clamp != cso->cso.offset_clamp) {
7298
crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {
7299
clamp.GlobalDepthOffsetClamp = cso->cso.offset_clamp;
7300
}
7301
ice->state.global_depth_offset_clamp = cso->cso.offset_clamp;
7302
}
7303
#endif
7304
}
7305
7306
#if GFX_VER >= 7
7307
if (dirty & CROCUS_DIRTY_GEN7_SBE) {
7308
crocus_emit_sbe(batch, ice);
7309
}
7310
#endif
7311
7312
#if GFX_VER >= 8
7313
if (dirty & CROCUS_DIRTY_GEN8_PS_BLEND) {
7314
struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
7315
struct crocus_blend_state *cso_blend = ice->state.cso_blend;
7316
struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
7317
struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
7318
const struct shader_info *fs_info =
7319
crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
7320
uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)];
7321
crocus_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) {
7322
pb.HasWriteableRT = has_writeable_rt(cso_blend, fs_info);
7323
pb.AlphaTestEnable = cso_zsa->cso.alpha_enabled;
7324
pb.ColorBufferBlendEnable = (cso_blend->blend_enables & 1) &&
7325
(!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend);
7326
}
7327
crocus_emit_merge(batch, cso_blend->ps_blend, dynamic_pb,
7328
ARRAY_SIZE(cso_blend->ps_blend));
7329
}
7330
#endif
7331
7332
#if GFX_VER >= 6
7333
if (dirty & CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL) {
7334
7335
#if GFX_VER >= 8
7336
crocus_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
7337
set_depth_stencil_bits(ice, &wmds);
7338
}
7339
#else
7340
uint32_t ds_offset;
7341
void *ds_map = stream_state(batch,
7342
sizeof(uint32_t) * GENX(DEPTH_STENCIL_STATE_length),
7343
64, &ds_offset);
7344
_crocus_pack_state(batch, GENX(DEPTH_STENCIL_STATE), ds_map, ds) {
7345
set_depth_stencil_bits(ice, &ds);
7346
}
7347
7348
#if GFX_VER == 6
7349
crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
7350
ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
7351
ptr.DEPTH_STENCIL_STATEChange = true;
7352
}
7353
#else
7354
crocus_emit_cmd(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {
7355
ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
7356
}
7357
#endif
7358
#endif
7359
}
7360
7361
if (dirty & CROCUS_DIRTY_GEN6_SCISSOR_RECT) {
7362
/* Align to 64-byte boundary as per anv. */
7363
uint32_t scissor_offset;
7364
struct pipe_scissor_state *scissor_map = (void *)
7365
stream_state(batch, sizeof(struct pipe_scissor_state) * ice->state.num_viewports,
7366
64, &scissor_offset);
7367
for (int i = 0; i < ice->state.num_viewports; i++) {
7368
struct pipe_scissor_state scissor;
7369
crocus_fill_scissor_rect(ice, i, &scissor);
7370
scissor_map[i] = scissor;
7371
}
7372
7373
crocus_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
7374
ptr.ScissorRectPointer = scissor_offset;
7375
}
7376
}
7377
#endif
7378
7379
if (dirty & CROCUS_DIRTY_DEPTH_BUFFER) {
7380
struct isl_device *isl_dev = &batch->screen->isl_dev;
7381
#if GFX_VER >= 6
7382
crocus_emit_depth_stall_flushes(batch);
7383
#endif
7384
void *batch_ptr;
7385
struct crocus_resource *zres, *sres;
7386
struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
7387
batch_ptr = crocus_get_command_space(batch, isl_dev->ds.size);
7388
7389
struct isl_view view = {
7390
.base_level = 0,
7391
.levels = 1,
7392
.base_array_layer = 0,
7393
.array_len = 1,
7394
.swizzle = ISL_SWIZZLE_IDENTITY,
7395
};
7396
struct isl_depth_stencil_hiz_emit_info info = { .view = &view };
7397
7398
if (cso->zsbuf) {
7399
crocus_get_depth_stencil_resources(&batch->screen->devinfo, cso->zsbuf->texture, &zres, &sres);
7400
struct crocus_surface *zsbuf = (struct crocus_surface *)cso->zsbuf;
7401
if (zsbuf->align_res) {
7402
zres = (struct crocus_resource *)zsbuf->align_res;
7403
}
7404
view.base_level = cso->zsbuf->u.tex.level;
7405
view.base_array_layer = cso->zsbuf->u.tex.first_layer;
7406
view.array_len = cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1;
7407
7408
if (zres) {
7409
view.usage |= ISL_SURF_USAGE_DEPTH_BIT;
7410
7411
info.depth_surf = &zres->surf;
7412
info.depth_address = crocus_command_reloc(batch,
7413
(batch_ptr - batch->command.map) + isl_dev->ds.depth_offset,
7414
zres->bo, 0, RELOC_32BIT);
7415
7416
info.mocs = crocus_mocs(zres->bo, isl_dev);
7417
view.format = zres->surf.format;
7418
7419
if (crocus_resource_level_has_hiz(zres, view.base_level)) {
7420
info.hiz_usage = zres->aux.usage;
7421
info.hiz_surf = &zres->aux.surf;
7422
uint32_t hiz_offset = 0;
7423
7424
#if GFX_VER == 6
7425
/* HiZ surfaces on Sandy Bridge technically don't support
7426
* mip-mapping. However, we can fake it by offsetting to the
7427
* first slice of LOD0 in the HiZ surface.
7428
*/
7429
isl_surf_get_image_offset_B_tile_sa(&zres->aux.surf,
7430
view.base_level, 0, 0,
7431
&hiz_offset, NULL, NULL);
7432
#endif
7433
info.hiz_address = crocus_command_reloc(batch,
7434
(batch_ptr - batch->command.map) + isl_dev->ds.hiz_offset,
7435
zres->aux.bo, zres->aux.offset + hiz_offset,
7436
RELOC_32BIT);
7437
info.depth_clear_value = crocus_resource_get_clear_color(zres).f32[0];
7438
}
7439
}
7440
7441
#if GFX_VER >= 6
7442
if (sres) {
7443
view.usage |= ISL_SURF_USAGE_STENCIL_BIT;
7444
info.stencil_aux_usage = sres->aux.usage;
7445
info.stencil_surf = &sres->surf;
7446
7447
uint32_t stencil_offset = 0;
7448
#if GFX_VER == 6
7449
/* Stencil surfaces on Sandy Bridge technically don't support
7450
* mip-mapping. However, we can fake it by offsetting to the
7451
* first slice of LOD0 in the stencil surface.
7452
*/
7453
isl_surf_get_image_offset_B_tile_sa(&sres->surf,
7454
view.base_level, 0, 0,
7455
&stencil_offset, NULL, NULL);
7456
#endif
7457
7458
info.stencil_address = crocus_command_reloc(batch,
7459
(batch_ptr - batch->command.map) + isl_dev->ds.stencil_offset,
7460
sres->bo, stencil_offset, RELOC_32BIT);
7461
if (!zres) {
7462
view.format = sres->surf.format;
7463
info.mocs = crocus_mocs(sres->bo, isl_dev);
7464
}
7465
}
7466
#endif
7467
}
7468
isl_emit_depth_stencil_hiz_s(isl_dev, batch_ptr, &info);
7469
}
7470
7471
/* TODO: Disable emitting this until something uses a stipple. */
7472
if (dirty & CROCUS_DIRTY_POLYGON_STIPPLE) {
7473
crocus_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
7474
for (int i = 0; i < 32; i++) {
7475
poly.PatternRow[i] = ice->state.poly_stipple.stipple[i];
7476
}
7477
}
7478
}
7479
7480
if (dirty & CROCUS_DIRTY_LINE_STIPPLE) {
7481
struct crocus_rasterizer_state *cso = ice->state.cso_rast;
7482
crocus_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple));
7483
}
7484
7485
#if GFX_VER >= 8
7486
if (dirty & CROCUS_DIRTY_GEN8_VF_TOPOLOGY) {
7487
crocus_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
7488
topo.PrimitiveTopologyType =
7489
translate_prim_type(draw->mode, draw->vertices_per_patch);
7490
}
7491
}
7492
#endif
7493
7494
#if GFX_VER <= 5
7495
if (dirty & CROCUS_DIRTY_GEN5_PIPELINED_POINTERS) {
7496
upload_pipelined_state_pointers(batch, ice->shaders.ff_gs_prog ? true : false, ice->shaders.gs_offset,
7497
ice->shaders.vs_offset, ice->shaders.sf_offset,
7498
ice->shaders.clip_offset, ice->shaders.wm_offset, ice->shaders.cc_offset);
7499
crocus_upload_urb_fence(batch);
7500
7501
crocus_emit_cmd(batch, GENX(CS_URB_STATE), cs) {
7502
cs.NumberofURBEntries = ice->urb.nr_cs_entries;
7503
cs.URBEntryAllocationSize = ice->urb.csize - 1;
7504
}
7505
dirty |= CROCUS_DIRTY_GEN4_CURBE;
7506
}
7507
#endif
7508
if (dirty & CROCUS_DIRTY_DRAWING_RECTANGLE) {
7509
struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
7510
if (fb->width && fb->height) {
7511
crocus_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
7512
rect.ClippedDrawingRectangleXMax = fb->width - 1;
7513
rect.ClippedDrawingRectangleYMax = fb->height - 1;
7514
}
7515
}
7516
}
7517
7518
if (dirty & CROCUS_DIRTY_VERTEX_BUFFERS) {
7519
const uint32_t user_count = util_bitcount(ice->state.bound_vertex_buffers);
7520
const uint32_t count = user_count +
7521
ice->state.vs_uses_draw_params + ice->state.vs_uses_derived_draw_params;
7522
uint32_t dynamic_bound = ice->state.bound_vertex_buffers;
7523
7524
if (count) {
7525
const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
7526
7527
uint32_t *map =
7528
crocus_get_command_space(batch, 4 * (1 + vb_dwords * count));
7529
_crocus_pack_command(batch, GENX(3DSTATE_VERTEX_BUFFERS), map, vb) {
7530
vb.DWordLength = (vb_dwords * count + 1) - 2;
7531
}
7532
map += 1;
7533
7534
uint32_t bound = dynamic_bound;
7535
int i;
7536
while (bound) {
7537
i = u_bit_scan(&bound);
7538
struct pipe_vertex_buffer *buf = &ice->state.vertex_buffers[i];
7539
struct crocus_bo *bo = crocus_resource_bo(buf->buffer.resource);
7540
uint32_t step_rate = ice->state.cso_vertex_elements->step_rate[i];
7541
7542
emit_vertex_buffer_state(batch, i, bo,
7543
buf->buffer_offset,
7544
ice->state.vb_end[i],
7545
buf->stride,
7546
step_rate,
7547
&map);
7548
}
7549
i = user_count;
7550
if (ice->state.vs_uses_draw_params) {
7551
struct crocus_resource *res = (struct crocus_resource *)ice->draw.draw_params.res;
7552
emit_vertex_buffer_state(batch, i++,
7553
res->bo,
7554
ice->draw.draw_params.offset,
7555
ice->draw.draw_params.res->width0,
7556
0, 0, &map);
7557
}
7558
if (ice->state.vs_uses_derived_draw_params) {
7559
struct crocus_resource *res = (struct crocus_resource *)ice->draw.derived_draw_params.res;
7560
emit_vertex_buffer_state(batch, i++,
7561
res->bo,
7562
ice->draw.derived_draw_params.offset,
7563
ice->draw.derived_draw_params.res->width0,
7564
0, 0, &map);
7565
}
7566
}
7567
}
7568
7569
if (dirty & CROCUS_DIRTY_VERTEX_ELEMENTS) {
7570
struct crocus_vertex_element_state *cso = ice->state.cso_vertex_elements;
7571
const unsigned entries = MAX2(cso->count, 1);
7572
if (!(ice->state.vs_needs_sgvs_element ||
7573
ice->state.vs_uses_derived_draw_params ||
7574
ice->state.vs_needs_edge_flag)) {
7575
crocus_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) *
7576
(1 + entries * GENX(VERTEX_ELEMENT_STATE_length)));
7577
} else {
7578
uint32_t dynamic_ves[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
7579
const unsigned dyn_count = cso->count +
7580
ice->state.vs_needs_sgvs_element +
7581
ice->state.vs_uses_derived_draw_params;
7582
7583
crocus_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS),
7584
&dynamic_ves, ve) {
7585
ve.DWordLength =
7586
1 + GENX(VERTEX_ELEMENT_STATE_length) * dyn_count - 2;
7587
}
7588
memcpy(&dynamic_ves[1], &cso->vertex_elements[1],
7589
(cso->count - ice->state.vs_needs_edge_flag) *
7590
GENX(VERTEX_ELEMENT_STATE_length) * sizeof(uint32_t));
7591
uint32_t *ve_pack_dest =
7592
&dynamic_ves[1 + (cso->count - ice->state.vs_needs_edge_flag) *
7593
GENX(VERTEX_ELEMENT_STATE_length)];
7594
7595
if (ice->state.vs_needs_sgvs_element) {
7596
uint32_t base_ctrl = ice->state.vs_uses_draw_params ?
7597
VFCOMP_STORE_SRC : VFCOMP_STORE_0;
7598
crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
7599
ve.Valid = true;
7600
ve.VertexBufferIndex =
7601
util_bitcount64(ice->state.bound_vertex_buffers);
7602
ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
7603
ve.Component0Control = base_ctrl;
7604
ve.Component1Control = base_ctrl;
7605
#if GFX_VER < 8
7606
ve.Component2Control = ice->state.vs_uses_vertexid ? VFCOMP_STORE_VID : VFCOMP_STORE_0;
7607
ve.Component3Control = ice->state.vs_uses_instanceid ? VFCOMP_STORE_IID : VFCOMP_STORE_0;
7608
#else
7609
ve.Component2Control = VFCOMP_STORE_0;
7610
ve.Component3Control = VFCOMP_STORE_0;
7611
#endif
7612
#if GFX_VER < 5
7613
ve.DestinationElementOffset = cso->count * 4;
7614
#endif
7615
}
7616
ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
7617
}
7618
if (ice->state.vs_uses_derived_draw_params) {
7619
crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
7620
ve.Valid = true;
7621
ve.VertexBufferIndex =
7622
util_bitcount64(ice->state.bound_vertex_buffers) +
7623
ice->state.vs_uses_draw_params;
7624
ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
7625
ve.Component0Control = VFCOMP_STORE_SRC;
7626
ve.Component1Control = VFCOMP_STORE_SRC;
7627
ve.Component2Control = VFCOMP_STORE_0;
7628
ve.Component3Control = VFCOMP_STORE_0;
7629
#if GFX_VER < 5
7630
ve.DestinationElementOffset = (cso->count + ice->state.vs_needs_sgvs_element) * 4;
7631
#endif
7632
}
7633
ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
7634
}
7635
if (ice->state.vs_needs_edge_flag) {
7636
for (int i = 0; i < GENX(VERTEX_ELEMENT_STATE_length); i++)
7637
ve_pack_dest[i] = cso->edgeflag_ve[i];
7638
}
7639
7640
crocus_batch_emit(batch, &dynamic_ves, sizeof(uint32_t) *
7641
(1 + dyn_count * GENX(VERTEX_ELEMENT_STATE_length)));
7642
}
7643
7644
#if GFX_VER == 8
7645
if (!ice->state.vs_needs_edge_flag) {
7646
crocus_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) *
7647
entries * GENX(3DSTATE_VF_INSTANCING_length));
7648
} else {
7649
assert(cso->count > 0);
7650
const unsigned edgeflag_index = cso->count - 1;
7651
uint32_t dynamic_vfi[33 * GENX(3DSTATE_VF_INSTANCING_length)];
7652
memcpy(&dynamic_vfi[0], cso->vf_instancing, edgeflag_index *
7653
GENX(3DSTATE_VF_INSTANCING_length) * sizeof(uint32_t));
7654
7655
uint32_t *vfi_pack_dest = &dynamic_vfi[0] +
7656
edgeflag_index * GENX(3DSTATE_VF_INSTANCING_length);
7657
crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
7658
vi.VertexElementIndex = edgeflag_index +
7659
ice->state.vs_needs_sgvs_element +
7660
ice->state.vs_uses_derived_draw_params;
7661
}
7662
for (int i = 0; i < GENX(3DSTATE_VF_INSTANCING_length); i++)
7663
vfi_pack_dest[i] |= cso->edgeflag_vfi[i];
7664
7665
crocus_batch_emit(batch, &dynamic_vfi[0], sizeof(uint32_t) *
7666
entries * GENX(3DSTATE_VF_INSTANCING_length));
7667
}
7668
#endif
7669
}
7670
7671
#if GFX_VER == 8
7672
if (dirty & CROCUS_DIRTY_GEN8_VF_SGVS) {
7673
const struct brw_vs_prog_data *vs_prog_data = (void *)
7674
ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;
7675
struct crocus_vertex_element_state *cso = ice->state.cso_vertex_elements;
7676
7677
crocus_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgv) {
7678
if (vs_prog_data->uses_vertexid) {
7679
sgv.VertexIDEnable = true;
7680
sgv.VertexIDComponentNumber = 2;
7681
sgv.VertexIDElementOffset =
7682
cso->count - ice->state.vs_needs_edge_flag;
7683
}
7684
7685
if (vs_prog_data->uses_instanceid) {
7686
sgv.InstanceIDEnable = true;
7687
sgv.InstanceIDComponentNumber = 3;
7688
sgv.InstanceIDElementOffset =
7689
cso->count - ice->state.vs_needs_edge_flag;
7690
}
7691
}
7692
}
7693
#endif
7694
#if GFX_VERx10 >= 75
7695
if (dirty & CROCUS_DIRTY_GEN75_VF) {
7696
crocus_emit_cmd(batch, GENX(3DSTATE_VF), vf) {
7697
if (draw->primitive_restart) {
7698
vf.IndexedDrawCutIndexEnable = true;
7699
vf.CutIndex = draw->restart_index;
7700
}
7701
}
7702
}
7703
#endif
7704
7705
#if GFX_VER == 8
7706
if (dirty & CROCUS_DIRTY_GEN8_PMA_FIX) {
7707
bool enable = want_pma_fix(ice);
7708
genX(crocus_update_pma_fix)(ice, batch, enable);
7709
}
7710
#endif
7711
7712
#if GFX_VER <= 5
7713
if (dirty & CROCUS_DIRTY_GEN4_CURBE) {
7714
gen4_upload_curbe(batch);
7715
}
7716
#endif
7717
}
7718
7719
static void
7720
crocus_upload_render_state(struct crocus_context *ice,
7721
struct crocus_batch *batch,
7722
const struct pipe_draw_info *draw,
7723
unsigned drawid_offset,
7724
const struct pipe_draw_indirect_info *indirect,
7725
const struct pipe_draw_start_count_bias *sc)
7726
{
7727
#if GFX_VER >= 7
7728
bool use_predicate = ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT;
7729
#endif
7730
7731
batch->no_wrap = true;
7732
batch->contains_draw = true;
7733
7734
crocus_update_surface_base_address(batch);
7735
7736
crocus_upload_dirty_render_state(ice, batch, draw);
7737
7738
batch->no_wrap = false;
7739
if (draw->index_size > 0) {
7740
unsigned offset;
7741
unsigned size;
7742
bool emit_index = false;
7743
7744
if (draw->has_user_indices) {
7745
unsigned start_offset = draw->index_size * sc->start;
7746
u_upload_data(ice->ctx.stream_uploader, 0,
7747
sc->count * draw->index_size, 4,
7748
(char *)draw->index.user + start_offset,
7749
&offset, &ice->state.index_buffer.res);
7750
offset -= start_offset;
7751
size = start_offset + sc->count * draw->index_size;
7752
emit_index = true;
7753
} else {
7754
struct crocus_resource *res = (void *) draw->index.resource;
7755
7756
if (ice->state.index_buffer.res != draw->index.resource) {
7757
res->bind_history |= PIPE_BIND_INDEX_BUFFER;
7758
pipe_resource_reference(&ice->state.index_buffer.res,
7759
draw->index.resource);
7760
emit_index = true;
7761
}
7762
offset = 0;
7763
size = draw->index.resource->width0;
7764
}
7765
7766
if (!emit_index &&
7767
(ice->state.index_buffer.size != size ||
7768
ice->state.index_buffer.index_size != draw->index_size
7769
#if GFX_VERx10 < 75
7770
|| ice->state.index_buffer.prim_restart != draw->primitive_restart
7771
#endif
7772
)
7773
)
7774
emit_index = true;
7775
7776
if (emit_index) {
7777
struct crocus_bo *bo = crocus_resource_bo(ice->state.index_buffer.res);
7778
7779
crocus_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
7780
#if GFX_VERx10 < 75
7781
ib.CutIndexEnable = draw->primitive_restart;
7782
#endif
7783
ib.IndexFormat = draw->index_size >> 1;
7784
ib.BufferStartingAddress = ro_bo(bo, offset);
7785
#if GFX_VER >= 8
7786
ib.MOCS = crocus_mocs(bo, &batch->screen->isl_dev);
7787
ib.BufferSize = bo->size - offset;
7788
#else
7789
ib.BufferEndingAddress = ro_bo(bo, offset + size - 1);
7790
#endif
7791
}
7792
ice->state.index_buffer.size = size;
7793
ice->state.index_buffer.offset = offset;
7794
ice->state.index_buffer.index_size = draw->index_size;
7795
#if GFX_VERx10 < 75
7796
ice->state.index_buffer.prim_restart = draw->primitive_restart;
7797
#endif
7798
}
7799
}
7800
7801
#define _3DPRIM_END_OFFSET 0x2420
7802
#define _3DPRIM_START_VERTEX 0x2430
7803
#define _3DPRIM_VERTEX_COUNT 0x2434
7804
#define _3DPRIM_INSTANCE_COUNT 0x2438
7805
#define _3DPRIM_START_INSTANCE 0x243C
7806
#define _3DPRIM_BASE_VERTEX 0x2440
7807
7808
#if GFX_VER >= 7
7809
if (indirect && !indirect->count_from_stream_output) {
7810
if (indirect->indirect_draw_count) {
7811
use_predicate = true;
7812
7813
struct crocus_bo *draw_count_bo =
7814
crocus_resource_bo(indirect->indirect_draw_count);
7815
unsigned draw_count_offset =
7816
indirect->indirect_draw_count_offset;
7817
7818
crocus_emit_pipe_control_flush(batch,
7819
"ensure indirect draw buffer is flushed",
7820
PIPE_CONTROL_FLUSH_ENABLE);
7821
if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
7822
#if GFX_VERx10 >= 75
7823
struct mi_builder b;
7824
mi_builder_init(&b, &batch->screen->devinfo, batch);
7825
7826
/* comparison = draw id < draw count */
7827
struct mi_value comparison =
7828
mi_ult(&b, mi_imm(drawid_offset),
7829
mi_mem32(ro_bo(draw_count_bo,
7830
draw_count_offset)));
7831
#if GFX_VER == 8
7832
/* predicate = comparison & conditional rendering predicate */
7833
mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
7834
mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
7835
#else
7836
/* predicate = comparison & conditional rendering predicate */
7837
struct mi_value pred = mi_iand(&b, comparison,
7838
mi_reg32(CS_GPR(15)));
7839
7840
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), pred);
7841
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
7842
7843
unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
7844
MI_PREDICATE_COMBINEOP_SET |
7845
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
7846
7847
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
7848
#endif
7849
#endif
7850
} else {
7851
uint32_t mi_predicate;
7852
7853
/* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
7854
crocus_load_register_imm64(batch, MI_PREDICATE_SRC1, drawid_offset);
7855
/* Upload the current draw count from the draw parameters buffer
7856
* to MI_PREDICATE_SRC0.
7857
*/
7858
crocus_load_register_mem32(batch, MI_PREDICATE_SRC0,
7859
draw_count_bo, draw_count_offset);
7860
/* Zero the top 32-bits of MI_PREDICATE_SRC0 */
7861
crocus_load_register_imm32(batch, MI_PREDICATE_SRC0 + 4, 0);
7862
7863
if (drawid_offset == 0) {
7864
mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
7865
MI_PREDICATE_COMBINEOP_SET |
7866
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
7867
} else {
7868
/* While draw_index < draw_count the predicate's result will be
7869
* (draw_index == draw_count) ^ TRUE = TRUE
7870
* When draw_index == draw_count the result is
7871
* (TRUE) ^ TRUE = FALSE
7872
* After this all results will be:
7873
* (FALSE) ^ FALSE = FALSE
7874
*/
7875
mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
7876
MI_PREDICATE_COMBINEOP_XOR |
7877
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
7878
}
7879
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
7880
}
7881
}
7882
7883
#if GFX_VER >= 7
7884
struct crocus_bo *bo = crocus_resource_bo(indirect->buffer);
7885
assert(bo);
7886
7887
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7888
lrm.RegisterAddress = _3DPRIM_VERTEX_COUNT;
7889
lrm.MemoryAddress = ro_bo(bo, indirect->offset + 0);
7890
}
7891
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7892
lrm.RegisterAddress = _3DPRIM_INSTANCE_COUNT;
7893
lrm.MemoryAddress = ro_bo(bo, indirect->offset + 4);
7894
}
7895
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7896
lrm.RegisterAddress = _3DPRIM_START_VERTEX;
7897
lrm.MemoryAddress = ro_bo(bo, indirect->offset + 8);
7898
}
7899
if (draw->index_size) {
7900
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7901
lrm.RegisterAddress = _3DPRIM_BASE_VERTEX;
7902
lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12);
7903
}
7904
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7905
lrm.RegisterAddress = _3DPRIM_START_INSTANCE;
7906
lrm.MemoryAddress = ro_bo(bo, indirect->offset + 16);
7907
}
7908
} else {
7909
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7910
lrm.RegisterAddress = _3DPRIM_START_INSTANCE;
7911
lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12);
7912
}
7913
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
7914
lri.RegisterOffset = _3DPRIM_BASE_VERTEX;
7915
lri.DataDWord = 0;
7916
}
7917
}
7918
#endif
7919
} else if (indirect && indirect->count_from_stream_output) {
7920
#if GFX_VERx10 >= 75
7921
struct crocus_stream_output_target *so =
7922
(void *) indirect->count_from_stream_output;
7923
7924
/* XXX: Replace with actual cache tracking */
7925
crocus_emit_pipe_control_flush(batch,
7926
"draw count from stream output stall",
7927
PIPE_CONTROL_CS_STALL);
7928
7929
struct mi_builder b;
7930
mi_builder_init(&b, &batch->screen->devinfo, batch);
7931
7932
struct crocus_address addr =
7933
ro_bo(crocus_resource_bo(&so->offset_res->base.b), so->offset_offset);
7934
struct mi_value offset =
7935
mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);
7936
7937
mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
7938
mi_udiv32_imm(&b, offset, so->stride));
7939
7940
_crocus_emit_lri(batch, _3DPRIM_START_VERTEX, 0);
7941
_crocus_emit_lri(batch, _3DPRIM_BASE_VERTEX, 0);
7942
_crocus_emit_lri(batch, _3DPRIM_START_INSTANCE, 0);
7943
_crocus_emit_lri(batch, _3DPRIM_INSTANCE_COUNT, draw->instance_count);
7944
#endif
7945
}
7946
#else
7947
assert(!indirect);
7948
#endif
7949
7950
crocus_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
7951
prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
7952
#if GFX_VER >= 7
7953
prim.PredicateEnable = use_predicate;
7954
#endif
7955
7956
prim.PrimitiveTopologyType = translate_prim_type(ice->state.prim_mode, draw->vertices_per_patch);
7957
if (indirect) {
7958
// XXX Probably have to do something for gen6 here?
7959
#if GFX_VER >= 7
7960
prim.IndirectParameterEnable = true;
7961
#endif
7962
} else {
7963
#if GFX_VER >= 5
7964
prim.StartInstanceLocation = draw->start_instance;
7965
#endif
7966
prim.InstanceCount = draw->instance_count;
7967
prim.VertexCountPerInstance = sc->count;
7968
7969
prim.StartVertexLocation = sc->start;
7970
7971
if (draw->index_size) {
7972
prim.BaseVertexLocation += sc->index_bias;
7973
}
7974
}
7975
}
7976
}
7977
7978
#if GFX_VER >= 7
7979
7980
static void
7981
crocus_upload_compute_state(struct crocus_context *ice,
7982
struct crocus_batch *batch,
7983
const struct pipe_grid_info *grid)
7984
{
7985
const uint64_t stage_dirty = ice->state.stage_dirty;
7986
struct crocus_screen *screen = batch->screen;
7987
const struct intel_device_info *devinfo = &screen->devinfo;
7988
struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
7989
struct crocus_compiled_shader *shader =
7990
ice->shaders.prog[MESA_SHADER_COMPUTE];
7991
struct brw_stage_prog_data *prog_data = shader->prog_data;
7992
struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
7993
const struct brw_cs_dispatch_info dispatch =
7994
brw_cs_get_dispatch_info(devinfo, cs_prog_data, grid->block);
7995
7996
crocus_update_surface_base_address(batch);
7997
if ((stage_dirty & CROCUS_STAGE_DIRTY_CONSTANTS_CS) && shs->sysvals_need_upload)
7998
upload_sysvals(ice, MESA_SHADER_COMPUTE);
7999
8000
if (stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_CS) {
8001
crocus_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);
8002
ice->shaders.prog[MESA_SHADER_COMPUTE]->bind_bo_offset =
8003
crocus_upload_binding_table(ice, batch,
8004
ice->shaders.prog[MESA_SHADER_COMPUTE]->surf_offset,
8005
ice->shaders.prog[MESA_SHADER_COMPUTE]->bt.size_bytes);
8006
}
8007
8008
if (stage_dirty & CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS)
8009
crocus_upload_sampler_states(ice, batch, MESA_SHADER_COMPUTE);
8010
8011
if ((stage_dirty & CROCUS_STAGE_DIRTY_CS) ||
8012
cs_prog_data->local_size[0] == 0 /* Variable local group size */) {
8013
/* The MEDIA_VFE_STATE documentation for Gen8+ says:
8014
*
8015
* "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
8016
* the only bits that are changed are scoreboard related: Scoreboard
8017
* Enable, Scoreboard Type, Scoreboard Mask, Scoreboard Delta. For
8018
* these scoreboard related states, a MEDIA_STATE_FLUSH is
8019
* sufficient."
8020
*/
8021
crocus_emit_pipe_control_flush(batch,
8022
"workaround: stall before MEDIA_VFE_STATE",
8023
PIPE_CONTROL_CS_STALL);
8024
8025
crocus_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) {
8026
if (prog_data->total_scratch) {
8027
struct crocus_bo *bo =
8028
crocus_get_scratch_space(ice, prog_data->total_scratch,
8029
MESA_SHADER_COMPUTE);
8030
#if GFX_VER == 8
8031
/* Broadwell's Per Thread Scratch Space is in the range [0, 11]
8032
* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
8033
*/
8034
vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
8035
#elif GFX_VERx10 == 75
8036
/* Haswell's Per Thread Scratch Space is in the range [0, 10]
8037
* where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
8038
*/
8039
vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 12;
8040
#else
8041
/* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
8042
* where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
8043
*/
8044
vfe.PerThreadScratchSpace = prog_data->total_scratch / 1024 - 1;
8045
#endif
8046
vfe.ScratchSpaceBasePointer = rw_bo(bo, 0);
8047
}
8048
8049
vfe.MaximumNumberofThreads =
8050
devinfo->max_cs_threads * screen->subslice_total - 1;
8051
vfe.ResetGatewayTimer =
8052
Resettingrelativetimerandlatchingtheglobaltimestamp;
8053
vfe.BypassGatewayControl = true;
8054
#if GFX_VER == 7
8055
vfe.GPGPUMode = 1;
8056
#endif
8057
#if GFX_VER == 8
8058
vfe.BypassGatewayControl = true;
8059
#endif
8060
vfe.NumberofURBEntries = GFX_VER == 8 ? 2 : 0;
8061
vfe.URBEntryAllocationSize = GFX_VER == 8 ? 2 : 0;
8062
8063
vfe.CURBEAllocationSize =
8064
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
8065
cs_prog_data->push.cross_thread.regs, 2);
8066
}
8067
}
8068
8069
/* TODO: Combine subgroup-id with cbuf0 so we can push regular uniforms */
8070
if ((stage_dirty & CROCUS_STAGE_DIRTY_CS) ||
8071
cs_prog_data->local_size[0] == 0 /* Variable local group size */) {
8072
uint32_t curbe_data_offset = 0;
8073
assert(cs_prog_data->push.cross_thread.dwords == 0 &&
8074
cs_prog_data->push.per_thread.dwords == 1 &&
8075
cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
8076
const unsigned push_const_size =
8077
brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
8078
uint32_t *curbe_data_map =
8079
stream_state(batch,
8080
ALIGN(push_const_size, 64), 64,
8081
&curbe_data_offset);
8082
assert(curbe_data_map);
8083
memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
8084
crocus_fill_cs_push_const_buffer(cs_prog_data, dispatch.threads,
8085
curbe_data_map);
8086
8087
crocus_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
8088
curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
8089
curbe.CURBEDataStartAddress = curbe_data_offset;
8090
}
8091
}
8092
8093
if (stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS |
8094
CROCUS_STAGE_DIRTY_BINDINGS_CS |
8095
CROCUS_STAGE_DIRTY_CONSTANTS_CS |
8096
CROCUS_STAGE_DIRTY_CS)) {
8097
uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
8098
const uint64_t ksp = KSP(ice,shader) + brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size);
8099
crocus_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
8100
idd.KernelStartPointer = ksp;
8101
idd.SamplerStatePointer = shs->sampler_offset;
8102
idd.BindingTablePointer = ice->shaders.prog[MESA_SHADER_COMPUTE]->bind_bo_offset;
8103
idd.BindingTableEntryCount = MIN2(shader->bt.size_bytes / 4, 31);
8104
idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads;
8105
idd.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
8106
idd.BarrierEnable = cs_prog_data->uses_barrier;
8107
idd.SharedLocalMemorySize = encode_slm_size(GFX_VER,
8108
prog_data->total_shared);
8109
#if GFX_VERx10 >= 75
8110
idd.CrossThreadConstantDataReadLength = cs_prog_data->push.cross_thread.regs;
8111
#endif
8112
}
8113
8114
crocus_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
8115
load.InterfaceDescriptorTotalLength =
8116
GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
8117
load.InterfaceDescriptorDataStartAddress =
8118
emit_state(batch, desc, sizeof(desc), 64);
8119
}
8120
}
8121
8122
#define GPGPU_DISPATCHDIMX 0x2500
8123
#define GPGPU_DISPATCHDIMY 0x2504
8124
#define GPGPU_DISPATCHDIMZ 0x2508
8125
8126
if (grid->indirect) {
8127
struct crocus_state_ref *grid_size = &ice->state.grid_size;
8128
struct crocus_bo *bo = crocus_resource_bo(grid_size->res);
8129
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
8130
lrm.RegisterAddress = GPGPU_DISPATCHDIMX;
8131
lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0);
8132
}
8133
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
8134
lrm.RegisterAddress = GPGPU_DISPATCHDIMY;
8135
lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4);
8136
}
8137
crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
8138
lrm.RegisterAddress = GPGPU_DISPATCHDIMZ;
8139
lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8);
8140
}
8141
8142
#if GFX_VER == 7
8143
/* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */
8144
_crocus_emit_lri(batch, MI_PREDICATE_SRC0 + 4, 0);
8145
crocus_load_register_imm64(batch, MI_PREDICATE_SRC1, 0);
8146
8147
/* Load compute_dispatch_indirect_x_size into SRC0 */
8148
crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 0);
8149
8150
/* predicate = (compute_dispatch_indirect_x_size == 0); */
8151
crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {
8152
mip.LoadOperation = LOAD_LOAD;
8153
mip.CombineOperation = COMBINE_SET;
8154
mip.CompareOperation = COMPARE_SRCS_EQUAL;
8155
};
8156
8157
/* Load compute_dispatch_indirect_y_size into SRC0 */
8158
crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 4);
8159
8160
/* predicate = (compute_dispatch_indirect_y_size == 0); */
8161
crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {
8162
mip.LoadOperation = LOAD_LOAD;
8163
mip.CombineOperation = COMBINE_OR;
8164
mip.CompareOperation = COMPARE_SRCS_EQUAL;
8165
};
8166
8167
/* Load compute_dispatch_indirect_z_size into SRC0 */
8168
crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 8);
8169
8170
/* predicate = (compute_dispatch_indirect_z_size == 0); */
8171
crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {
8172
mip.LoadOperation = LOAD_LOAD;
8173
mip.CombineOperation = COMBINE_OR;
8174
mip.CompareOperation = COMPARE_SRCS_EQUAL;
8175
};
8176
8177
/* predicate = !predicate; */
8178
#define COMPARE_FALSE 1
8179
crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {
8180
mip.LoadOperation = LOAD_LOADINV;
8181
mip.CombineOperation = COMBINE_OR;
8182
mip.CompareOperation = COMPARE_FALSE;
8183
}
8184
#endif
8185
}
8186
8187
crocus_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) {
8188
ggw.IndirectParameterEnable = grid->indirect != NULL;
8189
ggw.PredicateEnable = GFX_VER <= 7 && grid->indirect != NULL;
8190
ggw.SIMDSize = dispatch.simd_size / 16;
8191
ggw.ThreadDepthCounterMaximum = 0;
8192
ggw.ThreadHeightCounterMaximum = 0;
8193
ggw.ThreadWidthCounterMaximum = dispatch.threads - 1;
8194
ggw.ThreadGroupIDXDimension = grid->grid[0];
8195
ggw.ThreadGroupIDYDimension = grid->grid[1];
8196
ggw.ThreadGroupIDZDimension = grid->grid[2];
8197
ggw.RightExecutionMask = dispatch.right_mask;
8198
ggw.BottomExecutionMask = 0xffffffff;
8199
}
8200
8201
crocus_emit_cmd(batch, GENX(MEDIA_STATE_FLUSH), msf);
8202
8203
batch->contains_draw = true;
8204
}
8205
8206
#endif /* GFX_VER >= 7 */
8207
8208
/**
8209
* State module teardown.
8210
*/
8211
static void
8212
crocus_destroy_state(struct crocus_context *ice)
8213
{
8214
pipe_resource_reference(&ice->draw.draw_params.res, NULL);
8215
pipe_resource_reference(&ice->draw.derived_draw_params.res, NULL);
8216
8217
free(ice->state.genx);
8218
8219
for (int i = 0; i < 4; i++) {
8220
pipe_so_target_reference(&ice->state.so_target[i], NULL);
8221
}
8222
8223
for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
8224
pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL);
8225
}
8226
pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL);
8227
8228
for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {
8229
struct crocus_shader_state *shs = &ice->state.shaders[stage];
8230
for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
8231
pipe_resource_reference(&shs->constbufs[i].buffer, NULL);
8232
}
8233
for (int i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) {
8234
pipe_resource_reference(&shs->image[i].base.resource, NULL);
8235
}
8236
for (int i = 0; i < PIPE_MAX_SHADER_BUFFERS; i++) {
8237
pipe_resource_reference(&shs->ssbo[i].buffer, NULL);
8238
}
8239
for (int i = 0; i < CROCUS_MAX_TEXTURE_SAMPLERS; i++) {
8240
pipe_sampler_view_reference((struct pipe_sampler_view **)
8241
&shs->textures[i], NULL);
8242
}
8243
}
8244
8245
for (int i = 0; i < 16; i++)
8246
pipe_resource_reference(&ice->state.vertex_buffers[i].buffer.resource, NULL);
8247
pipe_resource_reference(&ice->state.grid_size.res, NULL);
8248
8249
pipe_resource_reference(&ice->state.index_buffer.res, NULL);
8250
}
8251
8252
/* ------------------------------------------------------------------- */
8253
8254
static void
8255
crocus_rebind_buffer(struct crocus_context *ice,
8256
struct crocus_resource *res)
8257
{
8258
struct pipe_context *ctx = &ice->ctx;
8259
8260
assert(res->base.b.target == PIPE_BUFFER);
8261
8262
/* Buffers can't be framebuffer attachments, nor display related,
8263
* and we don't have upstream Clover support.
8264
*/
8265
assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL |
8266
PIPE_BIND_RENDER_TARGET |
8267
PIPE_BIND_BLENDABLE |
8268
PIPE_BIND_DISPLAY_TARGET |
8269
PIPE_BIND_CURSOR |
8270
PIPE_BIND_COMPUTE_RESOURCE |
8271
PIPE_BIND_GLOBAL)));
8272
8273
if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) {
8274
uint64_t bound_vbs = ice->state.bound_vertex_buffers;
8275
while (bound_vbs) {
8276
const int i = u_bit_scan64(&bound_vbs);
8277
struct pipe_vertex_buffer *buffer = &ice->state.vertex_buffers[i];
8278
8279
if (!buffer->is_user_buffer && &res->base.b == buffer->buffer.resource)
8280
ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS;
8281
}
8282
}
8283
8284
if ((res->bind_history & PIPE_BIND_INDEX_BUFFER) &&
8285
ice->state.index_buffer.res) {
8286
if (res->bo == crocus_resource_bo(ice->state.index_buffer.res))
8287
pipe_resource_reference(&ice->state.index_buffer.res, NULL);
8288
}
8289
/* There is no need to handle these:
8290
* - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw)
8291
* - PIPE_BIND_QUERY_BUFFER (no persistent state references)
8292
*/
8293
8294
if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) {
8295
/* XXX: be careful about resetting vs appending... */
8296
for (int i = 0; i < 4; i++) {
8297
if (ice->state.so_target[i] &&
8298
(ice->state.so_target[i]->buffer == &res->base.b)) {
8299
#if GFX_VER == 6
8300
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_GS;
8301
#else
8302
ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS;
8303
#endif
8304
}
8305
}
8306
}
8307
8308
for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) {
8309
struct crocus_shader_state *shs = &ice->state.shaders[s];
8310
enum pipe_shader_type p_stage = stage_to_pipe(s);
8311
8312
if (!(res->bind_stages & (1 << s)))
8313
continue;
8314
8315
if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
8316
/* Skip constant buffer 0, it's for regular uniforms, not UBOs */
8317
uint32_t bound_cbufs = shs->bound_cbufs & ~1u;
8318
while (bound_cbufs) {
8319
const int i = u_bit_scan(&bound_cbufs);
8320
struct pipe_constant_buffer *cbuf = &shs->constbufs[i];
8321
8322
if (res->bo == crocus_resource_bo(cbuf->buffer)) {
8323
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << s;
8324
}
8325
}
8326
}
8327
8328
if (res->bind_history & PIPE_BIND_SHADER_BUFFER) {
8329
uint32_t bound_ssbos = shs->bound_ssbos;
8330
while (bound_ssbos) {
8331
const int i = u_bit_scan(&bound_ssbos);
8332
struct pipe_shader_buffer *ssbo = &shs->ssbo[i];
8333
8334
if (res->bo == crocus_resource_bo(ssbo->buffer)) {
8335
struct pipe_shader_buffer buf = {
8336
.buffer = &res->base.b,
8337
.buffer_offset = ssbo->buffer_offset,
8338
.buffer_size = ssbo->buffer_size,
8339
};
8340
crocus_set_shader_buffers(ctx, p_stage, i, 1, &buf,
8341
(shs->writable_ssbos >> i) & 1);
8342
}
8343
}
8344
}
8345
8346
if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) {
8347
uint32_t bound_sampler_views = shs->bound_sampler_views;
8348
while (bound_sampler_views) {
8349
const int i = u_bit_scan(&bound_sampler_views);
8350
struct crocus_sampler_view *isv = shs->textures[i];
8351
struct crocus_bo *bo = isv->res->bo;
8352
8353
if (res->bo == bo) {
8354
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << s;
8355
}
8356
}
8357
}
8358
8359
if (res->bind_history & PIPE_BIND_SHADER_IMAGE) {
8360
uint32_t bound_image_views = shs->bound_image_views;
8361
while (bound_image_views) {
8362
const int i = u_bit_scan(&bound_image_views);
8363
struct crocus_image_view *iv = &shs->image[i];
8364
struct crocus_bo *bo = crocus_resource_bo(iv->base.resource);
8365
8366
if (res->bo == bo)
8367
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << s;
8368
}
8369
}
8370
}
8371
}
8372
8373
/* ------------------------------------------------------------------- */
8374
8375
static unsigned
8376
flags_to_post_sync_op(uint32_t flags)
8377
{
8378
if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
8379
return WriteImmediateData;
8380
8381
if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
8382
return WritePSDepthCount;
8383
8384
if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
8385
return WriteTimestamp;
8386
8387
return 0;
8388
}
8389
8390
/*
8391
* Do the given flags have a Post Sync or LRI Post Sync operation?
8392
*/
8393
static enum pipe_control_flags
8394
get_post_sync_flags(enum pipe_control_flags flags)
8395
{
8396
flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
8397
PIPE_CONTROL_WRITE_DEPTH_COUNT |
8398
PIPE_CONTROL_WRITE_TIMESTAMP |
8399
PIPE_CONTROL_LRI_POST_SYNC_OP;
8400
8401
/* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
8402
* "LRI Post Sync Operation". So more than one bit set would be illegal.
8403
*/
8404
assert(util_bitcount(flags) <= 1);
8405
8406
return flags;
8407
}
8408
8409
#define IS_COMPUTE_PIPELINE(batch) (batch->name == CROCUS_BATCH_COMPUTE)
8410
8411
/**
8412
* Emit a series of PIPE_CONTROL commands, taking into account any
8413
* workarounds necessary to actually accomplish the caller's request.
8414
*
8415
* Unless otherwise noted, spec quotations in this function come from:
8416
*
8417
* Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
8418
* Restrictions for PIPE_CONTROL.
8419
*
8420
* You should not use this function directly. Use the helpers in
8421
* crocus_pipe_control.c instead, which may split the pipe control further.
8422
*/
8423
static void
8424
crocus_emit_raw_pipe_control(struct crocus_batch *batch,
8425
const char *reason,
8426
uint32_t flags,
8427
struct crocus_bo *bo,
8428
uint32_t offset,
8429
uint64_t imm)
8430
{
8431
UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
8432
enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
8433
UNUSED enum pipe_control_flags non_lri_post_sync_flags =
8434
post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
8435
8436
/* Recursive PIPE_CONTROL workarounds --------------------------------
8437
* (http://knowyourmeme.com/memes/xzibit-yo-dawg)
8438
*
8439
* We do these first because we want to look at the original operation,
8440
* rather than any workarounds we set.
8441
*/
8442
8443
/* "Flush Types" workarounds ---------------------------------------------
8444
* We do these now because they may add post-sync operations or CS stalls.
8445
*/
8446
8447
if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
8448
/* Hardware workaround: SNB B-Spec says:
8449
*
8450
* "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
8451
* Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
8452
* required."
8453
*/
8454
crocus_emit_post_sync_nonzero_flush(batch);
8455
}
8456
8457
#if GFX_VER == 8
8458
if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
8459
/* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
8460
*
8461
* "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
8462
* 'Write PS Depth Count' or 'Write Timestamp'."
8463
*/
8464
if (!bo) {
8465
flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
8466
post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
8467
non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
8468
bo = batch->ice->workaround_bo;
8469
offset = batch->ice->workaround_offset;
8470
}
8471
}
8472
#endif
8473
8474
#if GFX_VERx10 < 75
8475
if (flags & PIPE_CONTROL_DEPTH_STALL) {
8476
/* Project: PRE-HSW / Argument: Depth Stall
8477
*
8478
* "The following bits must be clear:
8479
* - Render Target Cache Flush Enable ([12] of DW1)
8480
* - Depth Cache Flush Enable ([0] of DW1)"
8481
*/
8482
assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
8483
PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
8484
}
8485
#endif
8486
if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
8487
/* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
8488
*
8489
* "This bit must be DISABLED for operations other than writing
8490
* PS_DEPTH_COUNT."
8491
*
8492
* This seems like nonsense. An Ivybridge workaround requires us to
8493
* emit a PIPE_CONTROL with a depth stall and write immediate post-sync
8494
* operation. Gen8+ requires us to emit depth stalls and depth cache
8495
* flushes together. So, it's hard to imagine this means anything other
8496
* than "we originally intended this to be used for PS_DEPTH_COUNT".
8497
*
8498
* We ignore the supposed restriction and do nothing.
8499
*/
8500
}
8501
8502
if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
8503
/* Project: PRE-HSW / Argument: Depth Cache Flush
8504
*
8505
* "Depth Stall must be clear ([13] of DW1)."
8506
*/
8507
assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
8508
}
8509
8510
if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
8511
PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
8512
/* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
8513
*
8514
* "This bit must be DISABLED for End-of-pipe (Read) fences,
8515
* PS_DEPTH_COUNT or TIMESTAMP queries."
8516
*
8517
* TODO: Implement end-of-pipe checking.
8518
*/
8519
assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
8520
PIPE_CONTROL_WRITE_TIMESTAMP)));
8521
}
8522
8523
if (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) {
8524
/* From the PIPE_CONTROL instruction table, bit 1:
8525
*
8526
* "This bit is ignored if Depth Stall Enable is set.
8527
* Further, the render cache is not flushed even if Write Cache
8528
* Flush Enable bit is set."
8529
*
8530
* We assert that the caller doesn't do this combination, to try and
8531
* prevent mistakes. It shouldn't hurt the GPU, though.
8532
*
8533
* We skip this check on Gen11+ as the "Stall at Pixel Scoreboard"
8534
* and "Render Target Flush" combo is explicitly required for BTI
8535
* update workarounds.
8536
*/
8537
assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
8538
PIPE_CONTROL_RENDER_TARGET_FLUSH)));
8539
}
8540
8541
/* PIPE_CONTROL page workarounds ------------------------------------- */
8542
8543
if (GFX_VER >= 7 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
8544
/* From the PIPE_CONTROL page itself:
8545
*
8546
* "IVB, HSW, BDW
8547
* Restriction: Pipe_control with CS-stall bit set must be issued
8548
* before a pipe-control command that has the State Cache
8549
* Invalidate bit set."
8550
*/
8551
flags |= PIPE_CONTROL_CS_STALL;
8552
}
8553
8554
if ((GFX_VERx10 == 75)) {
8555
/* From the PIPE_CONTROL page itself:
8556
*
8557
* "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
8558
* Prior to programming a PIPECONTROL command with any of the RO
8559
* cache invalidation bit set, program a PIPECONTROL flush command
8560
* with “CS stall” bit and “HDC Flush” bit set."
8561
*
8562
* TODO: Actually implement this. What's an HDC Flush?
8563
*/
8564
}
8565
8566
if (flags & PIPE_CONTROL_FLUSH_LLC) {
8567
/* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
8568
*
8569
* "Project: ALL
8570
* SW must always program Post-Sync Operation to "Write Immediate
8571
* Data" when Flush LLC is set."
8572
*
8573
* For now, we just require the caller to do it.
8574
*/
8575
assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
8576
}
8577
8578
/* "Post-Sync Operation" workarounds -------------------------------- */
8579
8580
/* Project: All / Argument: Global Snapshot Count Reset [19]
8581
*
8582
* "This bit must not be exercised on any product.
8583
* Requires stall bit ([20] of DW1) set."
8584
*
8585
* We don't use this, so we just assert that it isn't used. The
8586
* PIPE_CONTROL instruction page indicates that they intended this
8587
* as a debug feature and don't think it is useful in production,
8588
* but it may actually be usable, should we ever want to.
8589
*/
8590
assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
8591
8592
if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
8593
PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
8594
/* Project: All / Arguments:
8595
*
8596
* - Generic Media State Clear [16]
8597
* - Indirect State Pointers Disable [16]
8598
*
8599
* "Requires stall bit ([20] of DW1) set."
8600
*
8601
* Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
8602
* State Clear) says:
8603
*
8604
* "PIPECONTROL command with “Command Streamer Stall Enable” must be
8605
* programmed prior to programming a PIPECONTROL command with "Media
8606
* State Clear" set in GPGPU mode of operation"
8607
*
8608
* This is a subset of the earlier rule, so there's nothing to do.
8609
*/
8610
flags |= PIPE_CONTROL_CS_STALL;
8611
}
8612
8613
if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
8614
/* Project: All / Argument: Store Data Index
8615
*
8616
* "Post-Sync Operation ([15:14] of DW1) must be set to something other
8617
* than '0'."
8618
*
8619
* For now, we just assert that the caller does this. We might want to
8620
* automatically add a write to the workaround BO...
8621
*/
8622
assert(non_lri_post_sync_flags != 0);
8623
}
8624
8625
if (flags & PIPE_CONTROL_SYNC_GFDT) {
8626
/* Project: All / Argument: Sync GFDT
8627
*
8628
* "Post-Sync Operation ([15:14] of DW1) must be set to something other
8629
* than '0' or 0x2520[13] must be set."
8630
*
8631
* For now, we just assert that the caller does this.
8632
*/
8633
assert(non_lri_post_sync_flags != 0);
8634
}
8635
8636
if (GFX_VER >= 6 && GFX_VER < 8 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
8637
/* Project: SNB, IVB, HSW / Argument: TLB inv
8638
*
8639
* "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
8640
* must be set to something other than '0'."
8641
*
8642
* For now, we just assert that the caller does this.
8643
*/
8644
assert(non_lri_post_sync_flags != 0);
8645
}
8646
8647
if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
8648
/* Project: IVB+ / Argument: TLB inv
8649
*
8650
* "Requires stall bit ([20] of DW1) set."
8651
*
8652
* Also, from the PIPE_CONTROL instruction table:
8653
*
8654
* "Project: SKL+
8655
* Post Sync Operation or CS stall must be set to ensure a TLB
8656
* invalidation occurs. Otherwise no cycle will occur to the TLB
8657
* cache to invalidate."
8658
*
8659
* This is not a subset of the earlier rule, so there's nothing to do.
8660
*/
8661
flags |= PIPE_CONTROL_CS_STALL;
8662
}
8663
#if GFX_VER == 8
8664
if (IS_COMPUTE_PIPELINE(batch)) {
8665
if (post_sync_flags ||
8666
(flags & (PIPE_CONTROL_NOTIFY_ENABLE |
8667
PIPE_CONTROL_DEPTH_STALL |
8668
PIPE_CONTROL_RENDER_TARGET_FLUSH |
8669
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
8670
PIPE_CONTROL_DATA_CACHE_FLUSH))) {
8671
/* Project: BDW / Arguments:
8672
*
8673
* - LRI Post Sync Operation [23]
8674
* - Post Sync Op [15:14]
8675
* - Notify En [8]
8676
* - Depth Stall [13]
8677
* - Render Target Cache Flush [12]
8678
* - Depth Cache Flush [0]
8679
* - DC Flush Enable [5]
8680
*
8681
* "Requires stall bit ([20] of DW) set for all GPGPU and Media
8682
* Workloads."
8683
*
8684
* (The docs have separate table rows for each bit, with essentially
8685
* the same workaround text. We've combined them here.)
8686
*/
8687
flags |= PIPE_CONTROL_CS_STALL;
8688
8689
/* Also, from the PIPE_CONTROL instruction table, bit 20:
8690
*
8691
* "Project: BDW
8692
* This bit must be always set when PIPE_CONTROL command is
8693
* programmed by GPGPU and MEDIA workloads, except for the cases
8694
* when only Read Only Cache Invalidation bits are set (State
8695
* Cache Invalidation Enable, Instruction cache Invalidation
8696
* Enable, Texture Cache Invalidation Enable, Constant Cache
8697
* Invalidation Enable). This is to WA FFDOP CG issue, this WA
8698
* need not implemented when FF_DOP_CG is disable via "Fixed
8699
* Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
8700
*
8701
* It sounds like we could avoid CS stalls in some cases, but we
8702
* don't currently bother. This list isn't exactly the list above,
8703
* either...
8704
*/
8705
}
8706
}
8707
#endif
8708
/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
8709
*
8710
* "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
8711
* only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
8712
*
8713
* Note that the kernel does CS stalls between batches, so we only need
8714
* to count them within a batch. We currently naively count every 4, and
8715
* don't skip the ones with only read-cache-invalidate bits set. This
8716
* may or may not be a problem...
8717
*/
8718
if (GFX_VER == 7 && !(GFX_VERx10 == 75)) {
8719
if (flags & PIPE_CONTROL_CS_STALL) {
8720
/* If we're doing a CS stall, reset the counter and carry on. */
8721
batch->pipe_controls_since_last_cs_stall = 0;
8722
}
8723
8724
/* If this is the fourth pipe control without a CS stall, do one now. */
8725
if (++batch->pipe_controls_since_last_cs_stall == 4) {
8726
batch->pipe_controls_since_last_cs_stall = 0;
8727
flags |= PIPE_CONTROL_CS_STALL;
8728
}
8729
}
8730
8731
/* "Stall" workarounds ----------------------------------------------
8732
* These have to come after the earlier ones because we may have added
8733
* some additional CS stalls above.
8734
*/
8735
8736
if (flags & PIPE_CONTROL_CS_STALL) {
8737
/* Project: PRE-SKL, VLV, CHV
8738
*
8739
* "[All Stepping][All SKUs]:
8740
*
8741
* One of the following must also be set:
8742
*
8743
* - Render Target Cache Flush Enable ([12] of DW1)
8744
* - Depth Cache Flush Enable ([0] of DW1)
8745
* - Stall at Pixel Scoreboard ([1] of DW1)
8746
* - Depth Stall ([13] of DW1)
8747
* - Post-Sync Operation ([13] of DW1)
8748
* - DC Flush Enable ([5] of DW1)"
8749
*
8750
* If we don't already have one of those bits set, we choose to add
8751
* "Stall at Pixel Scoreboard". Some of the other bits require a
8752
* CS stall as a workaround (see above), which would send us into
8753
* an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard"
8754
* appears to be safe, so we choose that.
8755
*/
8756
const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
8757
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
8758
PIPE_CONTROL_WRITE_IMMEDIATE |
8759
PIPE_CONTROL_WRITE_DEPTH_COUNT |
8760
PIPE_CONTROL_WRITE_TIMESTAMP |
8761
PIPE_CONTROL_STALL_AT_SCOREBOARD |
8762
PIPE_CONTROL_DEPTH_STALL |
8763
PIPE_CONTROL_DATA_CACHE_FLUSH;
8764
if (!(flags & wa_bits))
8765
flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
8766
}
8767
8768
/* Emit --------------------------------------------------------------- */
8769
8770
if (INTEL_DEBUG & DEBUG_PIPE_CONTROL) {
8771
fprintf(stderr,
8772
" PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n",
8773
(flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "",
8774
(flags & PIPE_CONTROL_CS_STALL) ? "CS " : "",
8775
(flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "",
8776
(flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) ? "VF " : "",
8777
(flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) ? "RT " : "",
8778
(flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE) ? "Const " : "",
8779
(flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "",
8780
(flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "",
8781
(flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "",
8782
(flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "",
8783
(flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "",
8784
(flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "",
8785
(flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE) ? "Inst " : "",
8786
(flags & PIPE_CONTROL_MEDIA_STATE_CLEAR) ? "MediaClear " : "",
8787
(flags & PIPE_CONTROL_NOTIFY_ENABLE) ? "Notify " : "",
8788
(flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) ?
8789
"SnapRes" : "",
8790
(flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE) ?
8791
"ISPDis" : "",
8792
(flags & PIPE_CONTROL_WRITE_IMMEDIATE) ? "WriteImm " : "",
8793
(flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) ? "WriteZCount " : "",
8794
(flags & PIPE_CONTROL_WRITE_TIMESTAMP) ? "WriteTimestamp " : "",
8795
imm, reason);
8796
}
8797
8798
crocus_emit_cmd(batch, GENX(PIPE_CONTROL), pc) {
8799
#if GFX_VER >= 7
8800
pc.LRIPostSyncOperation = NoLRIOperation;
8801
pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
8802
pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
8803
#endif
8804
#if GFX_VER >= 6
8805
pc.StoreDataIndex = 0;
8806
pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
8807
pc.GlobalSnapshotCountReset =
8808
flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
8809
pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
8810
pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
8811
pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
8812
pc.RenderTargetCacheFlushEnable =
8813
flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
8814
pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
8815
pc.StateCacheInvalidationEnable =
8816
flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
8817
pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
8818
pc.ConstantCacheInvalidationEnable =
8819
flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
8820
#else
8821
pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
8822
#endif
8823
pc.PostSyncOperation = flags_to_post_sync_op(flags);
8824
pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
8825
pc.InstructionCacheInvalidateEnable =
8826
flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
8827
pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
8828
#if GFX_VER >= 5 || GFX_VERx10 == 45
8829
pc.IndirectStatePointersDisable =
8830
flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
8831
#endif
8832
#if GFX_VER >= 6
8833
pc.TextureCacheInvalidationEnable =
8834
flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
8835
#elif GFX_VER == 5 || GFX_VERx10 == 45
8836
pc.TextureCacheFlushEnable =
8837
flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
8838
#endif
8839
pc.Address = ggtt_bo(bo, offset);
8840
if (GFX_VER < 7 && bo)
8841
pc.DestinationAddressType = DAT_GGTT;
8842
pc.ImmediateData = imm;
8843
}
8844
}
8845
8846
#if GFX_VER == 6
8847
void
8848
genX(crocus_upload_urb)(struct crocus_batch *batch,
8849
unsigned vs_size,
8850
bool gs_present,
8851
unsigned gs_size)
8852
{
8853
struct crocus_context *ice = batch->ice;
8854
int nr_vs_entries, nr_gs_entries;
8855
int total_urb_size = ice->urb.size * 1024; /* in bytes */
8856
const struct intel_device_info *devinfo = &batch->screen->devinfo;
8857
8858
/* Calculate how many entries fit in each stage's section of the URB */
8859
if (gs_present) {
8860
nr_vs_entries = (total_urb_size/2) / (vs_size * 128);
8861
nr_gs_entries = (total_urb_size/2) / (gs_size * 128);
8862
} else {
8863
nr_vs_entries = total_urb_size / (vs_size * 128);
8864
nr_gs_entries = 0;
8865
}
8866
8867
/* Then clamp to the maximum allowed by the hardware */
8868
if (nr_vs_entries > devinfo->urb.max_entries[MESA_SHADER_VERTEX])
8869
nr_vs_entries = devinfo->urb.max_entries[MESA_SHADER_VERTEX];
8870
8871
if (nr_gs_entries > devinfo->urb.max_entries[MESA_SHADER_GEOMETRY])
8872
nr_gs_entries = devinfo->urb.max_entries[MESA_SHADER_GEOMETRY];
8873
8874
/* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */
8875
ice->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
8876
ice->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4);
8877
8878
assert(ice->urb.nr_vs_entries >=
8879
devinfo->urb.min_entries[MESA_SHADER_VERTEX]);
8880
assert(ice->urb.nr_vs_entries % 4 == 0);
8881
assert(ice->urb.nr_gs_entries % 4 == 0);
8882
assert(vs_size <= 5);
8883
assert(gs_size <= 5);
8884
8885
crocus_emit_cmd(batch, GENX(3DSTATE_URB), urb) {
8886
urb.VSNumberofURBEntries = ice->urb.nr_vs_entries;
8887
urb.VSURBEntryAllocationSize = vs_size - 1;
8888
8889
urb.GSNumberofURBEntries = ice->urb.nr_gs_entries;
8890
urb.GSURBEntryAllocationSize = gs_size - 1;
8891
};
8892
/* From the PRM Volume 2 part 1, section 1.4.7:
8893
*
8894
* Because of a urb corruption caused by allocating a previous gsunit’s
8895
* urb entry to vsunit software is required to send a "GS NULL
8896
* Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus
8897
* a dummy DRAW call before any case where VS will be taking over GS URB
8898
* space.
8899
*
8900
* It is not clear exactly what this means ("URB fence" is a command that
8901
* doesn't exist on Gen6). So for now we just do a full pipeline flush as
8902
* a workaround.
8903
*/
8904
if (ice->urb.gs_present && !gs_present)
8905
crocus_emit_mi_flush(batch);
8906
ice->urb.gs_present = gs_present;
8907
}
8908
#endif
8909
8910
static void
8911
crocus_lost_genx_state(struct crocus_context *ice, struct crocus_batch *batch)
8912
{
8913
}
8914
8915
static void
8916
crocus_emit_mi_report_perf_count(struct crocus_batch *batch,
8917
struct crocus_bo *bo,
8918
uint32_t offset_in_bytes,
8919
uint32_t report_id)
8920
{
8921
#if GFX_VER >= 7
8922
crocus_emit_cmd(batch, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
8923
mi_rpc.MemoryAddress = rw_bo(bo, offset_in_bytes);
8924
mi_rpc.ReportID = report_id;
8925
}
8926
#endif
8927
}
8928
8929
/**
8930
* From the PRM, Volume 2a:
8931
*
8932
* "Indirect State Pointers Disable
8933
*
8934
* At the completion of the post-sync operation associated with this pipe
8935
* control packet, the indirect state pointers in the hardware are
8936
* considered invalid; the indirect pointers are not saved in the context.
8937
* If any new indirect state commands are executed in the command stream
8938
* while the pipe control is pending, the new indirect state commands are
8939
* preserved.
8940
*
8941
* [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context
8942
* restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant
8943
* commands are only considered as Indirect State Pointers. Once ISP is
8944
* issued in a context, SW must initialize by programming push constant
8945
* commands for all the shaders (at least to zero length) before attempting
8946
* any rendering operation for the same context."
8947
*
8948
* 3DSTATE_CONSTANT_* packets are restored during a context restore,
8949
* even though they point to a BO that has been already unreferenced at
8950
* the end of the previous batch buffer. This has been fine so far since
8951
* we are protected by these scratch page (every address not covered by
8952
* a BO should be pointing to the scratch page). But on CNL, it is
8953
* causing a GPU hang during context restore at the 3DSTATE_CONSTANT_*
8954
* instruction.
8955
*
8956
* The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the
8957
* hardware to ignore previous 3DSTATE_CONSTANT_* packets during a
8958
* context restore, so the mentioned hang doesn't happen. However,
8959
* software must program push constant commands for all stages prior to
8960
* rendering anything, so we flag them as dirty.
8961
*
8962
* Finally, we also make sure to stall at pixel scoreboard to make sure the
8963
* constants have been loaded into the EUs prior to disable the push constants
8964
* so that it doesn't hang a previous 3DPRIMITIVE.
8965
*/
8966
#if GFX_VER >= 7
8967
static void
8968
gen7_emit_isp_disable(struct crocus_batch *batch)
8969
{
8970
crocus_emit_raw_pipe_control(batch, "isp disable",
8971
PIPE_CONTROL_STALL_AT_SCOREBOARD |
8972
PIPE_CONTROL_CS_STALL,
8973
NULL, 0, 0);
8974
crocus_emit_raw_pipe_control(batch, "isp disable",
8975
PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
8976
PIPE_CONTROL_CS_STALL,
8977
NULL, 0, 0);
8978
8979
struct crocus_context *ice = batch->ice;
8980
ice->state.stage_dirty |= (CROCUS_STAGE_DIRTY_CONSTANTS_VS |
8981
CROCUS_STAGE_DIRTY_CONSTANTS_TCS |
8982
CROCUS_STAGE_DIRTY_CONSTANTS_TES |
8983
CROCUS_STAGE_DIRTY_CONSTANTS_GS |
8984
CROCUS_STAGE_DIRTY_CONSTANTS_FS);
8985
}
8986
#endif
8987
8988
#if GFX_VER >= 7
8989
static void
8990
crocus_state_finish_batch(struct crocus_batch *batch)
8991
{
8992
#if GFX_VERx10 == 75
8993
if (batch->name == CROCUS_BATCH_RENDER) {
8994
crocus_emit_mi_flush(batch);
8995
crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
8996
ptr.ColorCalcStatePointer = batch->ice->shaders.cc_offset;
8997
}
8998
8999
crocus_emit_pipe_control_flush(batch, "hsw wa", PIPE_CONTROL_RENDER_TARGET_FLUSH |
9000
PIPE_CONTROL_CS_STALL);
9001
}
9002
#endif
9003
gen7_emit_isp_disable(batch);
9004
}
9005
#endif
9006
9007
static void
9008
crocus_batch_reset_dirty(struct crocus_batch *batch)
9009
{
9010
/* unreference any index buffer so it get reemitted. */
9011
pipe_resource_reference(&batch->ice->state.index_buffer.res, NULL);
9012
9013
/* for GEN4/5 need to reemit anything that ends up in the state batch that points to anything in the state batch
9014
* as the old state batch won't still be available.
9015
*/
9016
batch->ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER |
9017
CROCUS_DIRTY_COLOR_CALC_STATE;
9018
9019
batch->ice->state.dirty |= CROCUS_DIRTY_VERTEX_ELEMENTS | CROCUS_DIRTY_VERTEX_BUFFERS;
9020
9021
batch->ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;
9022
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS;
9023
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES;
9024
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS;
9025
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS;
9026
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS;
9027
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS;
9028
9029
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS;
9030
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TES;
9031
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;
9032
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_GS;
9033
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_FS;
9034
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_CS;
9035
9036
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS;
9037
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS;
9038
batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CS;
9039
batch->ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT | CROCUS_DIRTY_SF_CL_VIEWPORT;
9040
9041
#if GFX_VER >= 6
9042
/* SCISSOR_STATE */
9043
batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
9044
batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
9045
batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL;
9046
9047
#endif
9048
#if GFX_VER <= 5
9049
/* dirty the SF state on gen4/5 */
9050
batch->ice->state.dirty |= CROCUS_DIRTY_RASTER;
9051
batch->ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE;
9052
batch->ice->state.dirty |= CROCUS_DIRTY_CLIP;
9053
batch->ice->state.dirty |= CROCUS_DIRTY_WM;
9054
#endif
9055
#if GFX_VER >= 7
9056
/* Streamout dirty */
9057
batch->ice->state.dirty |= CROCUS_DIRTY_STREAMOUT;
9058
batch->ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST;
9059
batch->ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS;
9060
#endif
9061
}
9062
9063
#if GFX_VERx10 == 75
9064
struct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ice)
9065
{
9066
return &ice->state.cso_rast->cso;
9067
}
9068
#endif
9069
9070
#if GFX_VER >= 6
9071
static void update_so_strides(struct crocus_context *ice,
9072
uint16_t *strides)
9073
{
9074
for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
9075
struct crocus_stream_output_target *so = (void *)ice->state.so_target[i];
9076
if (so)
9077
so->stride = strides[i] * sizeof(uint32_t);
9078
}
9079
}
9080
#endif
9081
9082
static void crocus_fill_clamp_mask(const struct crocus_sampler_state *samp,
9083
int s,
9084
uint32_t *clamp_mask)
9085
{
9086
#if GFX_VER < 8
9087
if (samp->pstate.min_img_filter != PIPE_TEX_FILTER_NEAREST &&
9088
samp->pstate.mag_img_filter != PIPE_TEX_FILTER_NEAREST) {
9089
if (samp->pstate.wrap_s == PIPE_TEX_WRAP_CLAMP)
9090
clamp_mask[0] |= (1 << s);
9091
if (samp->pstate.wrap_t == PIPE_TEX_WRAP_CLAMP)
9092
clamp_mask[1] |= (1 << s);
9093
if (samp->pstate.wrap_r == PIPE_TEX_WRAP_CLAMP)
9094
clamp_mask[2] |= (1 << s);
9095
}
9096
#endif
9097
}
9098
9099
static void
9100
crocus_set_frontend_noop(struct pipe_context *ctx, bool enable)
9101
{
9102
struct crocus_context *ice = (struct crocus_context *) ctx;
9103
9104
if (crocus_batch_prepare_noop(&ice->batches[CROCUS_BATCH_RENDER], enable)) {
9105
ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_RENDER;
9106
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
9107
}
9108
9109
if (ice->batch_count == 1)
9110
return;
9111
9112
if (crocus_batch_prepare_noop(&ice->batches[CROCUS_BATCH_COMPUTE], enable)) {
9113
ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_COMPUTE;
9114
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;
9115
}
9116
}
9117
9118
void
9119
genX(crocus_init_screen_state)(struct crocus_screen *screen)
9120
{
9121
assert(screen->devinfo.verx10 == GFX_VERx10);
9122
screen->vtbl.destroy_state = crocus_destroy_state;
9123
screen->vtbl.init_render_context = crocus_init_render_context;
9124
screen->vtbl.upload_render_state = crocus_upload_render_state;
9125
#if GFX_VER >= 7
9126
screen->vtbl.init_compute_context = crocus_init_compute_context;
9127
screen->vtbl.upload_compute_state = crocus_upload_compute_state;
9128
#endif
9129
screen->vtbl.emit_raw_pipe_control = crocus_emit_raw_pipe_control;
9130
screen->vtbl.emit_mi_report_perf_count = crocus_emit_mi_report_perf_count;
9131
screen->vtbl.rebind_buffer = crocus_rebind_buffer;
9132
#if GFX_VERx10 >= 75
9133
screen->vtbl.load_register_reg32 = crocus_load_register_reg32;
9134
screen->vtbl.load_register_reg64 = crocus_load_register_reg64;
9135
screen->vtbl.load_register_imm32 = crocus_load_register_imm32;
9136
screen->vtbl.load_register_imm64 = crocus_load_register_imm64;
9137
screen->vtbl.store_data_imm32 = crocus_store_data_imm32;
9138
screen->vtbl.store_data_imm64 = crocus_store_data_imm64;
9139
#endif
9140
#if GFX_VER >= 7
9141
screen->vtbl.load_register_mem32 = crocus_load_register_mem32;
9142
screen->vtbl.load_register_mem64 = crocus_load_register_mem64;
9143
screen->vtbl.copy_mem_mem = crocus_copy_mem_mem;
9144
screen->vtbl.create_so_decl_list = crocus_create_so_decl_list;
9145
#endif
9146
screen->vtbl.update_surface_base_address = crocus_update_surface_base_address;
9147
#if GFX_VER >= 6
9148
screen->vtbl.store_register_mem32 = crocus_store_register_mem32;
9149
screen->vtbl.store_register_mem64 = crocus_store_register_mem64;
9150
#endif
9151
screen->vtbl.populate_vs_key = crocus_populate_vs_key;
9152
screen->vtbl.populate_tcs_key = crocus_populate_tcs_key;
9153
screen->vtbl.populate_tes_key = crocus_populate_tes_key;
9154
screen->vtbl.populate_gs_key = crocus_populate_gs_key;
9155
screen->vtbl.populate_fs_key = crocus_populate_fs_key;
9156
screen->vtbl.populate_cs_key = crocus_populate_cs_key;
9157
screen->vtbl.lost_genx_state = crocus_lost_genx_state;
9158
#if GFX_VER >= 7
9159
screen->vtbl.finish_batch = crocus_state_finish_batch;
9160
#endif
9161
#if GFX_VER <= 5
9162
screen->vtbl.upload_urb_fence = crocus_upload_urb_fence;
9163
screen->vtbl.calculate_urb_fence = crocus_calculate_urb_fence;
9164
#endif
9165
screen->vtbl.fill_clamp_mask = crocus_fill_clamp_mask;
9166
screen->vtbl.batch_reset_dirty = crocus_batch_reset_dirty;
9167
screen->vtbl.translate_prim_type = translate_prim_type;
9168
#if GFX_VER >= 6
9169
screen->vtbl.update_so_strides = update_so_strides;
9170
screen->vtbl.get_so_offset = crocus_get_so_offset;
9171
#endif
9172
9173
genX(crocus_init_blt)(screen);
9174
}
9175
9176
void
9177
genX(crocus_init_state)(struct crocus_context *ice)
9178
{
9179
struct pipe_context *ctx = &ice->ctx;
9180
9181
ctx->create_blend_state = crocus_create_blend_state;
9182
ctx->create_depth_stencil_alpha_state = crocus_create_zsa_state;
9183
ctx->create_rasterizer_state = crocus_create_rasterizer_state;
9184
ctx->create_sampler_state = crocus_create_sampler_state;
9185
ctx->create_sampler_view = crocus_create_sampler_view;
9186
ctx->create_surface = crocus_create_surface;
9187
ctx->create_vertex_elements_state = crocus_create_vertex_elements;
9188
ctx->bind_blend_state = crocus_bind_blend_state;
9189
ctx->bind_depth_stencil_alpha_state = crocus_bind_zsa_state;
9190
ctx->bind_sampler_states = crocus_bind_sampler_states;
9191
ctx->bind_rasterizer_state = crocus_bind_rasterizer_state;
9192
ctx->bind_vertex_elements_state = crocus_bind_vertex_elements_state;
9193
ctx->delete_blend_state = crocus_delete_state;
9194
ctx->delete_depth_stencil_alpha_state = crocus_delete_state;
9195
ctx->delete_rasterizer_state = crocus_delete_state;
9196
ctx->delete_sampler_state = crocus_delete_state;
9197
ctx->delete_vertex_elements_state = crocus_delete_state;
9198
ctx->set_blend_color = crocus_set_blend_color;
9199
ctx->set_clip_state = crocus_set_clip_state;
9200
ctx->set_constant_buffer = crocus_set_constant_buffer;
9201
ctx->set_shader_buffers = crocus_set_shader_buffers;
9202
ctx->set_shader_images = crocus_set_shader_images;
9203
ctx->set_sampler_views = crocus_set_sampler_views;
9204
ctx->set_tess_state = crocus_set_tess_state;
9205
ctx->set_framebuffer_state = crocus_set_framebuffer_state;
9206
ctx->set_polygon_stipple = crocus_set_polygon_stipple;
9207
ctx->set_sample_mask = crocus_set_sample_mask;
9208
ctx->set_scissor_states = crocus_set_scissor_states;
9209
ctx->set_stencil_ref = crocus_set_stencil_ref;
9210
ctx->set_vertex_buffers = crocus_set_vertex_buffers;
9211
ctx->set_viewport_states = crocus_set_viewport_states;
9212
ctx->sampler_view_destroy = crocus_sampler_view_destroy;
9213
ctx->surface_destroy = crocus_surface_destroy;
9214
ctx->draw_vbo = crocus_draw_vbo;
9215
ctx->launch_grid = crocus_launch_grid;
9216
9217
ctx->set_frontend_noop = crocus_set_frontend_noop;
9218
9219
#if GFX_VER >= 6
9220
ctx->create_stream_output_target = crocus_create_stream_output_target;
9221
ctx->stream_output_target_destroy = crocus_stream_output_target_destroy;
9222
ctx->set_stream_output_targets = crocus_set_stream_output_targets;
9223
#endif
9224
9225
ice->state.dirty = ~0ull;
9226
ice->state.stage_dirty = ~0ull;
9227
9228
ice->state.statistics_counters_enabled = true;
9229
9230
ice->state.sample_mask = 0xff;
9231
ice->state.num_viewports = 1;
9232
ice->state.prim_mode = PIPE_PRIM_MAX;
9233
ice->state.genx = calloc(1, sizeof(struct crocus_genx_state));
9234
ice->draw.derived_params.drawid = -1;
9235
9236
/* Default all scissor rectangles to be empty regions. */
9237
for (int i = 0; i < CROCUS_MAX_VIEWPORTS; i++) {
9238
ice->state.scissors[i] = (struct pipe_scissor_state) {
9239
.minx = 1, .maxx = 0, .miny = 1, .maxy = 0,
9240
};
9241
}
9242
}
9243
9244