CoCalc -- iris

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/iris/iris_state.c
⁴⁵⁶⁵ views
1
/*
2
 * Copyright © 2017 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included
12
 * in all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
 * DEALINGS IN THE SOFTWARE.
21
 */
22

23
/**
24
 * @file iris_state.c
25
 *
26
 * ============================= GENXML CODE =============================
27
 *              [This file is compiled once per generation.]
28
 * =======================================================================
29
 *
30
 * This is the main state upload code.
31
 *
32
 * Gallium uses Constant State Objects, or CSOs, for most state.  Large,
33
 * complex, or highly reusable state can be created once, and bound and
34
 * rebound multiple times.  This is modeled with the pipe->create_*_state()
35
 * and pipe->bind_*_state() hooks.  Highly dynamic or inexpensive state is
36
 * streamed out on the fly, via pipe->set_*_state() hooks.
37
 *
38
 * OpenGL involves frequently mutating context state, which is mirrored in
39
 * core Mesa by highly mutable data structures.  However, most applications
40
 * typically draw the same things over and over - from frame to frame, most
41
 * of the same objects are still visible and need to be redrawn.  So, rather
42
 * than inventing new state all the time, applications usually mutate to swap
43
 * between known states that we've seen before.
44
 *
45
 * Gallium isolates us from this mutation by tracking API state, and
46
 * distilling it into a set of Constant State Objects, or CSOs.  Large,
47
 * complex, or typically reusable state can be created once, then reused
48
 * multiple times.  Drivers can create and store their own associated data.
49
 * This create/bind model corresponds to the pipe->create_*_state() and
50
 * pipe->bind_*_state() driver hooks.
51
 *
52
 * Some state is cheap to create, or expected to be highly dynamic.  Rather
53
 * than creating and caching piles of CSOs for these, Gallium simply streams
54
 * them out, via the pipe->set_*_state() driver hooks.
55
 *
56
 * To reduce draw time overhead, we try to compute as much state at create
57
 * time as possible.  Wherever possible, we translate the Gallium pipe state
58
 * to 3DSTATE commands, and store those commands in the CSO.  At draw time,
59
 * we can simply memcpy them into a batch buffer.
60
 *
61
 * No hardware matches the abstraction perfectly, so some commands require
62
 * information from multiple CSOs.  In this case, we can store two copies
63
 * of the packet (one in each CSO), and simply | together their DWords at
64
 * draw time.  Sometimes the second set is trivial (one or two fields), so
65
 * we simply pack it at draw time.
66
 *
67
 * There are two main components in the file below.  First, the CSO hooks
68
 * create/bind/track state.  The second are the draw-time upload functions,
69
 * iris_upload_render_state() and iris_upload_compute_state(), which read
70
 * the context state and emit the commands into the actual batch.
71
 */
72

73
#include <stdio.h>
74
#include <errno.h>
75

76
#if HAVE_VALGRIND
77
#include <valgrind.h>
78
#include <memcheck.h>
79
#define VG(x) x
80
#ifdef DEBUG
81
#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
82
#endif
83
#else
84
#define VG(x)
85
#endif
86

87
#include "pipe/p_defines.h"
88
#include "pipe/p_state.h"
89
#include "pipe/p_context.h"
90
#include "pipe/p_screen.h"
91
#include "util/u_dual_blend.h"
92
#include "util/u_inlines.h"
93
#include "util/format/u_format.h"
94
#include "util/u_framebuffer.h"
95
#include "util/u_transfer.h"
96
#include "util/u_upload_mgr.h"
97
#include "util/u_viewport.h"
98
#include "util/u_memory.h"
99
#include "drm-uapi/i915_drm.h"
100
#include "nir.h"
101
#include "intel/compiler/brw_compiler.h"
102
#include "intel/common/intel_aux_map.h"
103
#include "intel/common/intel_l3_config.h"
104
#include "intel/common/intel_sample_positions.h"
105
#include "iris_batch.h"
106
#include "iris_context.h"
107
#include "iris_defines.h"
108
#include "iris_pipe.h"
109
#include "iris_resource.h"
110

111
#include "iris_genx_macros.h"
112
#include "intel/common/intel_guardband.h"
113

114
/**
115
 * Statically assert that PIPE_* enums match the hardware packets.
116
 * (As long as they match, we don't need to translate them.)
117
 */
118
UNUSED static void pipe_asserts()
119
{
120
#define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
121

122
   /* pipe_logicop happens to match the hardware. */
123
   PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR);
124
   PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR);
125
   PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED);
126
   PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED);
127
   PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE);
128
   PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT);
129
   PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR);
130
   PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND);
131
   PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND);
132
   PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV);
133
   PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP);
134
   PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED);
135
   PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY);
136
   PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE);
137
   PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR);
138
   PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET);
139

140
   /* pipe_blend_func happens to match the hardware. */
141
   PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE);
142
   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR);
143
   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA);
144
   PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA);
145
   PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR);
146
   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE);
147
   PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR);
148
   PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA);
149
   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR);
150
   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA);
151
   PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO);
152
   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR);
153
   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA);
154
   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA);
155
   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR);
156
   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR);
157
   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA);
158
   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR);
159
   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA);
160

161
   /* pipe_blend_func happens to match the hardware. */
162
   PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD);
163
   PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT);
164
   PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT);
165
   PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN);
166
   PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX);
167

168
   /* pipe_stencil_op happens to match the hardware. */
169
   PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP);
170
   PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO);
171
   PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE);
172
   PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT);
173
   PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT);
174
   PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR);
175
   PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR);
176
   PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT);
177

178
   /* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */
179
   PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT);
180
   PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT);
181
#undef PIPE_ASSERT
182
}
183

184
static unsigned
185
translate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch)
186
{
187
   static const unsigned map[] = {
188
      [PIPE_PRIM_POINTS]                   = _3DPRIM_POINTLIST,
189
      [PIPE_PRIM_LINES]                    = _3DPRIM_LINELIST,
190
      [PIPE_PRIM_LINE_LOOP]                = _3DPRIM_LINELOOP,
191
      [PIPE_PRIM_LINE_STRIP]               = _3DPRIM_LINESTRIP,
192
      [PIPE_PRIM_TRIANGLES]                = _3DPRIM_TRILIST,
193
      [PIPE_PRIM_TRIANGLE_STRIP]           = _3DPRIM_TRISTRIP,
194
      [PIPE_PRIM_TRIANGLE_FAN]             = _3DPRIM_TRIFAN,
195
      [PIPE_PRIM_QUADS]                    = _3DPRIM_QUADLIST,
196
      [PIPE_PRIM_QUAD_STRIP]               = _3DPRIM_QUADSTRIP,
197
      [PIPE_PRIM_POLYGON]                  = _3DPRIM_POLYGON,
198
      [PIPE_PRIM_LINES_ADJACENCY]          = _3DPRIM_LINELIST_ADJ,
199
      [PIPE_PRIM_LINE_STRIP_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
200
      [PIPE_PRIM_TRIANGLES_ADJACENCY]      = _3DPRIM_TRILIST_ADJ,
201
      [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
202
      [PIPE_PRIM_PATCHES]                  = _3DPRIM_PATCHLIST_1 - 1,
203
   };
204

205
   return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0);
206
}
207

208
static unsigned
209
translate_compare_func(enum pipe_compare_func pipe_func)
210
{
211
   static const unsigned map[] = {
212
      [PIPE_FUNC_NEVER]    = COMPAREFUNCTION_NEVER,
213
      [PIPE_FUNC_LESS]     = COMPAREFUNCTION_LESS,
214
      [PIPE_FUNC_EQUAL]    = COMPAREFUNCTION_EQUAL,
215
      [PIPE_FUNC_LEQUAL]   = COMPAREFUNCTION_LEQUAL,
216
      [PIPE_FUNC_GREATER]  = COMPAREFUNCTION_GREATER,
217
      [PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL,
218
      [PIPE_FUNC_GEQUAL]   = COMPAREFUNCTION_GEQUAL,
219
      [PIPE_FUNC_ALWAYS]   = COMPAREFUNCTION_ALWAYS,
220
   };
221
   return map[pipe_func];
222
}
223

224
static unsigned
225
translate_shadow_func(enum pipe_compare_func pipe_func)
226
{
227
   /* Gallium specifies the result of shadow comparisons as:
228
    *
229
    *    1 if ref <op> texel,
230
    *    0 otherwise.
231
    *
232
    * The hardware does:
233
    *
234
    *    0 if texel <op> ref,
235
    *    1 otherwise.
236
    *
237
    * So we need to flip the operator and also negate.
238
    */
239
   static const unsigned map[] = {
240
      [PIPE_FUNC_NEVER]    = PREFILTEROP_ALWAYS,
241
      [PIPE_FUNC_LESS]     = PREFILTEROP_LEQUAL,
242
      [PIPE_FUNC_EQUAL]    = PREFILTEROP_NOTEQUAL,
243
      [PIPE_FUNC_LEQUAL]   = PREFILTEROP_LESS,
244
      [PIPE_FUNC_GREATER]  = PREFILTEROP_GEQUAL,
245
      [PIPE_FUNC_NOTEQUAL] = PREFILTEROP_EQUAL,
246
      [PIPE_FUNC_GEQUAL]   = PREFILTEROP_GREATER,
247
      [PIPE_FUNC_ALWAYS]   = PREFILTEROP_NEVER,
248
   };
249
   return map[pipe_func];
250
}
251

252
static unsigned
253
translate_cull_mode(unsigned pipe_face)
254
{
255
   static const unsigned map[4] = {
256
      [PIPE_FACE_NONE]           = CULLMODE_NONE,
257
      [PIPE_FACE_FRONT]          = CULLMODE_FRONT,
258
      [PIPE_FACE_BACK]           = CULLMODE_BACK,
259
      [PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH,
260
   };
261
   return map[pipe_face];
262
}
263

264
static unsigned
265
translate_fill_mode(unsigned pipe_polymode)
266
{
267
   static const unsigned map[4] = {
268
      [PIPE_POLYGON_MODE_FILL]           = FILL_MODE_SOLID,
269
      [PIPE_POLYGON_MODE_LINE]           = FILL_MODE_WIREFRAME,
270
      [PIPE_POLYGON_MODE_POINT]          = FILL_MODE_POINT,
271
      [PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID,
272
   };
273
   return map[pipe_polymode];
274
}
275

276
static unsigned
277
translate_mip_filter(enum pipe_tex_mipfilter pipe_mip)
278
{
279
   static const unsigned map[] = {
280
      [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,
281
      [PIPE_TEX_MIPFILTER_LINEAR]  = MIPFILTER_LINEAR,
282
      [PIPE_TEX_MIPFILTER_NONE]    = MIPFILTER_NONE,
283
   };
284
   return map[pipe_mip];
285
}
286

287
static uint32_t
288
translate_wrap(unsigned pipe_wrap)
289
{
290
   static const unsigned map[] = {
291
      [PIPE_TEX_WRAP_REPEAT]                 = TCM_WRAP,
292
      [PIPE_TEX_WRAP_CLAMP]                  = TCM_HALF_BORDER,
293
      [PIPE_TEX_WRAP_CLAMP_TO_EDGE]          = TCM_CLAMP,
294
      [PIPE_TEX_WRAP_CLAMP_TO_BORDER]        = TCM_CLAMP_BORDER,
295
      [PIPE_TEX_WRAP_MIRROR_REPEAT]          = TCM_MIRROR,
296
      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE]   = TCM_MIRROR_ONCE,
297

298
      /* These are unsupported. */
299
      [PIPE_TEX_WRAP_MIRROR_CLAMP]           = -1,
300
      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1,
301
   };
302
   return map[pipe_wrap];
303
}
304

305
/**
306
 * Allocate space for some indirect state.
307
 *
308
 * Return a pointer to the map (to fill it out) and a state ref (for
309
 * referring to the state in GPU commands).
310
 */
311
static void *
312
upload_state(struct u_upload_mgr *uploader,
313
             struct iris_state_ref *ref,
314
             unsigned size,
315
             unsigned alignment)
316
{
317
   void *p = NULL;
318
   u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
319
   return p;
320
}
321

322
/**
323
 * Stream out temporary/short-lived state.
324
 *
325
 * This allocates space, pins the BO, and includes the BO address in the
326
 * returned offset (which works because all state lives in 32-bit memory
327
 * zones).
328
 */
329
static uint32_t *
330
stream_state(struct iris_batch *batch,
331
             struct u_upload_mgr *uploader,
332
             struct pipe_resource **out_res,
333
             unsigned size,
334
             unsigned alignment,
335
             uint32_t *out_offset)
336
{
337
   void *ptr = NULL;
338

339
   u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
340

341
   struct iris_bo *bo = iris_resource_bo(*out_res);
342
   iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
343

344
   iris_record_state_size(batch->state_sizes,
345
                          bo->gtt_offset + *out_offset, size);
346

347
   *out_offset += iris_bo_offset_from_base_address(bo);
348

349
   return ptr;
350
}
351

352
/**
353
 * stream_state() + memcpy.
354
 */
355
static uint32_t
356
emit_state(struct iris_batch *batch,
357
           struct u_upload_mgr *uploader,
358
           struct pipe_resource **out_res,
359
           const void *data,
360
           unsigned size,
361
           unsigned alignment)
362
{
363
   unsigned offset = 0;
364
   uint32_t *map =
365
      stream_state(batch, uploader, out_res, size, alignment, &offset);
366

367
   if (map)
368
      memcpy(map, data, size);
369

370
   return offset;
371
}
372

373
/**
374
 * Did field 'x' change between 'old_cso' and 'new_cso'?
375
 *
376
 * (If so, we may want to set some dirty flags.)
377
 */
378
#define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x))
379
#define cso_changed_memcmp(x) \
380
   (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0)
381

382
static void
383
flush_before_state_base_change(struct iris_batch *batch)
384
{
385
   const struct intel_device_info *devinfo = &batch->screen->devinfo;
386

387
   /* Flush before emitting STATE_BASE_ADDRESS.
388
    *
389
    * This isn't documented anywhere in the PRM.  However, it seems to be
390
    * necessary prior to changing the surface state base address.  We've
391
    * seen issues in Vulkan where we get GPU hangs when using multi-level
392
    * command buffers which clear depth, reset state base address, and then
393
    * go render stuff.
394
    *
395
    * Normally, in GL, we would trust the kernel to do sufficient stalls
396
    * and flushes prior to executing our batch.  However, it doesn't seem
397
    * as if the kernel's flushing is always sufficient and we don't want to
398
    * rely on it.
399
    *
400
    * We make this an end-of-pipe sync instead of a normal flush because we
401
    * do not know the current status of the GPU.  On Haswell at least,
402
    * having a fast-clear operation in flight at the same time as a normal
403
    * rendering operation can cause hangs.  Since the kernel's flushing is
404
    * insufficient, we need to ensure that any rendering operations from
405
    * other processes are definitely complete before we try to do our own
406
    * rendering.  It's a bit of a big hammer but it appears to work.
407
    */
408
   iris_emit_end_of_pipe_sync(batch,
409
                              "change STATE_BASE_ADDRESS (flushes)",
410
                              PIPE_CONTROL_RENDER_TARGET_FLUSH |
411
                              PIPE_CONTROL_DEPTH_CACHE_FLUSH |
412
                              PIPE_CONTROL_DATA_CACHE_FLUSH |
413
                              /* Wa_1606662791:
414
                               *
415
                               *   Software must program PIPE_CONTROL command
416
                               *   with "HDC Pipeline Flush" prior to
417
                               *   programming of the below two non-pipeline
418
                               *   state :
419
                               *      * STATE_BASE_ADDRESS
420
                               *      * 3DSTATE_BINDING_TABLE_POOL_ALLOC
421
                               */
422
                              ((GFX_VER == 12 && devinfo->revision == 0 /* A0 */ ?
423
                                PIPE_CONTROL_FLUSH_HDC : 0)));
424
}
425

426
static void
427
flush_after_state_base_change(struct iris_batch *batch)
428
{
429
   /* After re-setting the surface state base address, we have to do some
430
    * cache flusing so that the sampler engine will pick up the new
431
    * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
432
    * Shared Function > 3D Sampler > State > State Caching (page 96):
433
    *
434
    *    Coherency with system memory in the state cache, like the texture
435
    *    cache is handled partially by software. It is expected that the
436
    *    command stream or shader will issue Cache Flush operation or
437
    *    Cache_Flush sampler message to ensure that the L1 cache remains
438
    *    coherent with system memory.
439
    *
440
    *    [...]
441
    *
442
    *    Whenever the value of the Dynamic_State_Base_Addr,
443
    *    Surface_State_Base_Addr are altered, the L1 state cache must be
444
    *    invalidated to ensure the new surface or sampler state is fetched
445
    *    from system memory.
446
    *
447
    * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
448
    * which, according the PIPE_CONTROL instruction documentation in the
449
    * Broadwell PRM:
450
    *
451
    *    Setting this bit is independent of any other bit in this packet.
452
    *    This bit controls the invalidation of the L1 and L2 state caches
453
    *    at the top of the pipe i.e. at the parsing time.
454
    *
455
    * Unfortunately, experimentation seems to indicate that state cache
456
    * invalidation through a PIPE_CONTROL does nothing whatsoever in
457
    * regards to surface state and binding tables.  In stead, it seems that
458
    * invalidating the texture cache is what is actually needed.
459
    *
460
    * XXX:  As far as we have been able to determine through
461
    * experimentation, shows that flush the texture cache appears to be
462
    * sufficient.  The theory here is that all of the sampling/rendering
463
    * units cache the binding table in the texture cache.  However, we have
464
    * yet to be able to actually confirm this.
465
    */
466
   iris_emit_end_of_pipe_sync(batch,
467
                              "change STATE_BASE_ADDRESS (invalidates)",
468
                              PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
469
                              PIPE_CONTROL_CONST_CACHE_INVALIDATE |
470
                              PIPE_CONTROL_STATE_CACHE_INVALIDATE);
471
}
472

473
static void
474
_iris_emit_lri(struct iris_batch *batch, uint32_t reg, uint32_t val)
475
{
476
   iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
477
      lri.RegisterOffset = reg;
478
      lri.DataDWord      = val;
479
   }
480
}
481
#define iris_emit_lri(b, r, v) _iris_emit_lri(b, GENX(r##_num), v)
482

483
static void
484
_iris_emit_lrr(struct iris_batch *batch, uint32_t dst, uint32_t src)
485
{
486
   iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_REG), lrr) {
487
      lrr.SourceRegisterAddress = src;
488
      lrr.DestinationRegisterAddress = dst;
489
   }
490
}
491

492
static void
493
iris_load_register_reg32(struct iris_batch *batch, uint32_t dst,
494
                         uint32_t src)
495
{
496
   _iris_emit_lrr(batch, dst, src);
497
}
498

499
static void
500
iris_load_register_reg64(struct iris_batch *batch, uint32_t dst,
501
                         uint32_t src)
502
{
503
   _iris_emit_lrr(batch, dst, src);
504
   _iris_emit_lrr(batch, dst + 4, src + 4);
505
}
506

507
static void
508
iris_load_register_imm32(struct iris_batch *batch, uint32_t reg,
509
                         uint32_t val)
510
{
511
   _iris_emit_lri(batch, reg, val);
512
}
513

514
static void
515
iris_load_register_imm64(struct iris_batch *batch, uint32_t reg,
516
                         uint64_t val)
517
{
518
   _iris_emit_lri(batch, reg + 0, val & 0xffffffff);
519
   _iris_emit_lri(batch, reg + 4, val >> 32);
520
}
521

522
/**
523
 * Emit MI_LOAD_REGISTER_MEM to load a 32-bit MMIO register from a buffer.
524
 */
525
static void
526
iris_load_register_mem32(struct iris_batch *batch, uint32_t reg,
527
                         struct iris_bo *bo, uint32_t offset)
528
{
529
   iris_batch_sync_region_start(batch);
530
   iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
531
      lrm.RegisterAddress = reg;
532
      lrm.MemoryAddress = ro_bo(bo, offset);
533
   }
534
   iris_batch_sync_region_end(batch);
535
}
536

537
/**
538
 * Load a 64-bit value from a buffer into a MMIO register via
539
 * two MI_LOAD_REGISTER_MEM commands.
540
 */
541
static void
542
iris_load_register_mem64(struct iris_batch *batch, uint32_t reg,
543
                         struct iris_bo *bo, uint32_t offset)
544
{
545
   iris_load_register_mem32(batch, reg + 0, bo, offset + 0);
546
   iris_load_register_mem32(batch, reg + 4, bo, offset + 4);
547
}
548

549
static void
550
iris_store_register_mem32(struct iris_batch *batch, uint32_t reg,
551
                          struct iris_bo *bo, uint32_t offset,
552
                          bool predicated)
553
{
554
   iris_batch_sync_region_start(batch);
555
   iris_emit_cmd(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
556
      srm.RegisterAddress = reg;
557
      srm.MemoryAddress = rw_bo(bo, offset, IRIS_DOMAIN_OTHER_WRITE);
558
      srm.PredicateEnable = predicated;
559
   }
560
   iris_batch_sync_region_end(batch);
561
}
562

563
static void
564
iris_store_register_mem64(struct iris_batch *batch, uint32_t reg,
565
                          struct iris_bo *bo, uint32_t offset,
566
                          bool predicated)
567
{
568
   iris_store_register_mem32(batch, reg + 0, bo, offset + 0, predicated);
569
   iris_store_register_mem32(batch, reg + 4, bo, offset + 4, predicated);
570
}
571

572
static void
573
iris_store_data_imm32(struct iris_batch *batch,
574
                      struct iris_bo *bo, uint32_t offset,
575
                      uint32_t imm)
576
{
577
   iris_batch_sync_region_start(batch);
578
   iris_emit_cmd(batch, GENX(MI_STORE_DATA_IMM), sdi) {
579
      sdi.Address = rw_bo(bo, offset, IRIS_DOMAIN_OTHER_WRITE);
580
      sdi.ImmediateData = imm;
581
   }
582
   iris_batch_sync_region_end(batch);
583
}
584

585
static void
586
iris_store_data_imm64(struct iris_batch *batch,
587
                      struct iris_bo *bo, uint32_t offset,
588
                      uint64_t imm)
589
{
590
   /* Can't use iris_emit_cmd because MI_STORE_DATA_IMM has a length of
591
    * 2 in genxml but it's actually variable length and we need 5 DWords.
592
    */
593
   void *map = iris_get_command_space(batch, 4 * 5);
594
   iris_batch_sync_region_start(batch);
595
   _iris_pack_command(batch, GENX(MI_STORE_DATA_IMM), map, sdi) {
596
      sdi.DWordLength = 5 - 2;
597
      sdi.Address = rw_bo(bo, offset, IRIS_DOMAIN_OTHER_WRITE);
598
      sdi.ImmediateData = imm;
599
   }
600
   iris_batch_sync_region_end(batch);
601
}
602

603
static void
604
iris_copy_mem_mem(struct iris_batch *batch,
605
                  struct iris_bo *dst_bo, uint32_t dst_offset,
606
                  struct iris_bo *src_bo, uint32_t src_offset,
607
                  unsigned bytes)
608
{
609
   /* MI_COPY_MEM_MEM operates on DWords. */
610
   assert(bytes % 4 == 0);
611
   assert(dst_offset % 4 == 0);
612
   assert(src_offset % 4 == 0);
613
   iris_batch_sync_region_start(batch);
614

615
   for (unsigned i = 0; i < bytes; i += 4) {
616
      iris_emit_cmd(batch, GENX(MI_COPY_MEM_MEM), cp) {
617
         cp.DestinationMemoryAddress = rw_bo(dst_bo, dst_offset + i,
618
                                             IRIS_DOMAIN_OTHER_WRITE);
619
         cp.SourceMemoryAddress = ro_bo(src_bo, src_offset + i);
620
      }
621
   }
622

623
   iris_batch_sync_region_end(batch);
624
}
625

626
static void
627
emit_pipeline_select(struct iris_batch *batch, uint32_t pipeline)
628
{
629
#if GFX_VER >= 8 && GFX_VER < 10
630
   /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
631
    *
632
    *   Software must clear the COLOR_CALC_STATE Valid field in
633
    *   3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
634
    *   with Pipeline Select set to GPGPU.
635
    *
636
    * The internal hardware docs recommend the same workaround for Gfx9
637
    * hardware too.
638
    */
639
   if (pipeline == GPGPU)
640
      iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), t);
641
#endif
642

643

644
   /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
645
    * PIPELINE_SELECT [DevBWR+]":
646
    *
647
    *    "Project: DEVSNB+
648
    *
649
    *     Software must ensure all the write caches are flushed through a
650
    *     stalling PIPE_CONTROL command followed by another PIPE_CONTROL
651
    *     command to invalidate read only caches prior to programming
652
    *     MI_PIPELINE_SELECT command to change the Pipeline Select Mode."
653
    */
654
    iris_emit_pipe_control_flush(batch,
655
                                 "workaround: PIPELINE_SELECT flushes (1/2)",
656
                                 PIPE_CONTROL_RENDER_TARGET_FLUSH |
657
                                 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
658
                                 PIPE_CONTROL_DATA_CACHE_FLUSH |
659
                                 PIPE_CONTROL_CS_STALL);
660

661
    iris_emit_pipe_control_flush(batch,
662
                                 "workaround: PIPELINE_SELECT flushes (2/2)",
663
                                 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
664
                                 PIPE_CONTROL_CONST_CACHE_INVALIDATE |
665
                                 PIPE_CONTROL_STATE_CACHE_INVALIDATE |
666
                                 PIPE_CONTROL_INSTRUCTION_INVALIDATE);
667

668
   iris_emit_cmd(batch, GENX(PIPELINE_SELECT), sel) {
669
#if GFX_VER >= 9
670
      sel.MaskBits = GFX_VER >= 12 ? 0x13 : 3;
671
      sel.MediaSamplerDOPClockGateEnable = GFX_VER >= 12;
672
#endif
673
      sel.PipelineSelection = pipeline;
674
   }
675
}
676

677
UNUSED static void
678
init_glk_barrier_mode(struct iris_batch *batch, uint32_t value)
679
{
680
#if GFX_VER == 9
681
   /* Project: DevGLK
682
    *
683
    *    "This chicken bit works around a hardware issue with barrier
684
    *     logic encountered when switching between GPGPU and 3D pipelines.
685
    *     To workaround the issue, this mode bit should be set after a
686
    *     pipeline is selected."
687
    */
688
   iris_emit_reg(batch, GENX(SLICE_COMMON_ECO_CHICKEN1), reg) {
689
      reg.GLKBarrierMode = value;
690
      reg.GLKBarrierModeMask = 1;
691
   }
692
#endif
693
}
694

695
static void
696
init_state_base_address(struct iris_batch *batch)
697
{
698
   struct isl_device *isl_dev = &batch->screen->isl_dev;
699
   uint32_t mocs = isl_mocs(isl_dev, 0, false);
700
   flush_before_state_base_change(batch);
701

702
   /* We program most base addresses once at context initialization time.
703
    * Each base address points at a 4GB memory zone, and never needs to
704
    * change.  See iris_bufmgr.h for a description of the memory zones.
705
    *
706
    * The one exception is Surface State Base Address, which needs to be
707
    * updated occasionally.  See iris_binder.c for the details there.
708
    */
709
   iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
710
      sba.GeneralStateMOCS            = mocs;
711
      sba.StatelessDataPortAccessMOCS = mocs;
712
      sba.DynamicStateMOCS            = mocs;
713
      sba.IndirectObjectMOCS          = mocs;
714
      sba.InstructionMOCS             = mocs;
715
      sba.SurfaceStateMOCS            = mocs;
716

717
      sba.GeneralStateBaseAddressModifyEnable   = true;
718
      sba.DynamicStateBaseAddressModifyEnable   = true;
719
      sba.IndirectObjectBaseAddressModifyEnable = true;
720
      sba.InstructionBaseAddressModifyEnable    = true;
721
      sba.GeneralStateBufferSizeModifyEnable    = true;
722
      sba.DynamicStateBufferSizeModifyEnable    = true;
723
#if (GFX_VER >= 9)
724
      sba.BindlessSurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_BINDLESS_START);
725
      sba.BindlessSurfaceStateSize = (IRIS_BINDLESS_SIZE >> 12) - 1;
726
      sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
727
      sba.BindlessSurfaceStateMOCS    = mocs;
728
#endif
729
      sba.IndirectObjectBufferSizeModifyEnable  = true;
730
      sba.InstructionBuffersizeModifyEnable     = true;
731

732
      sba.InstructionBaseAddress  = ro_bo(NULL, IRIS_MEMZONE_SHADER_START);
733
      sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START);
734

735
      sba.GeneralStateBufferSize   = 0xfffff;
736
      sba.IndirectObjectBufferSize = 0xfffff;
737
      sba.InstructionBufferSize    = 0xfffff;
738
      sba.DynamicStateBufferSize   = 0xfffff;
739
   }
740

741
   flush_after_state_base_change(batch);
742
}
743

744
static void
745
iris_emit_l3_config(struct iris_batch *batch,
746
                    const struct intel_l3_config *cfg)
747
{
748
   assert(cfg || GFX_VER >= 12);
749

750
#if GFX_VER >= 12
751
#define L3_ALLOCATION_REG GENX(L3ALLOC)
752
#define L3_ALLOCATION_REG_num GENX(L3ALLOC_num)
753
#else
754
#define L3_ALLOCATION_REG GENX(L3CNTLREG)
755
#define L3_ALLOCATION_REG_num GENX(L3CNTLREG_num)
756
#endif
757

758
   iris_emit_reg(batch, L3_ALLOCATION_REG, reg) {
759
#if GFX_VER < 11
760
      reg.SLMEnable = cfg->n[INTEL_L3P_SLM] > 0;
761
#endif
762
#if GFX_VER == 11
763
      /* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set
764
       * in L3CNTLREG register. The default setting of the bit is not the
765
       * desirable behavior.
766
       */
767
      reg.ErrorDetectionBehaviorControl = true;
768
      reg.UseFullWays = true;
769
#endif
770
      if (GFX_VER < 12 || cfg) {
771
         reg.URBAllocation = cfg->n[INTEL_L3P_URB];
772
         reg.ROAllocation = cfg->n[INTEL_L3P_RO];
773
         reg.DCAllocation = cfg->n[INTEL_L3P_DC];
774
         reg.AllAllocation = cfg->n[INTEL_L3P_ALL];
775
      } else {
776
#if GFX_VER >= 12
777
         reg.L3FullWayAllocationEnable = true;
778
#endif
779
      }
780
   }
781
}
782

783
#if GFX_VER == 9
784
static void
785
iris_enable_obj_preemption(struct iris_batch *batch, bool enable)
786
{
787
   /* A fixed function pipe flush is required before modifying this field */
788
   iris_emit_end_of_pipe_sync(batch, enable ? "enable preemption"
789
                                            : "disable preemption",
790
                              PIPE_CONTROL_RENDER_TARGET_FLUSH);
791

792
   /* enable object level preemption */
793
   iris_emit_reg(batch, GENX(CS_CHICKEN1), reg) {
794
      reg.ReplayMode = enable;
795
      reg.ReplayModeMask = true;
796
   }
797
}
798
#endif
799

800
/**
801
 * Compute an \p n x \p m pixel hashing table usable as slice, subslice or
802
 * pixel pipe hashing table.  The resulting table is the cyclic repetition of
803
 * a fixed pattern with periodicity equal to \p period.
804
 *
805
 * If \p index is specified to be equal to \p period, a 2-way hashing table
806
 * will be generated such that indices 0 and 1 are returned for the following
807
 * fractions of entries respectively:
808
 *
809
 *   p_0 = ceil(period / 2) / period
810
 *   p_1 = floor(period / 2) / period
811
 *
812
 * If \p index is even and less than \p period, a 3-way hashing table will be
813
 * generated such that indices 0, 1 and 2 are returned for the following
814
 * fractions of entries:
815
 *
816
 *   p_0 = (ceil(period / 2) - 1) / period
817
 *   p_1 = floor(period / 2) / period
818
 *   p_2 = 1 / period
819
 *
820
 * The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0
821
 * and p_1 will be swapped for the result.  Note that in the context of pixel
822
 * pipe hashing this can be always 0 on Gfx12 platforms, since the hardware
823
 * transparently remaps logical indices found on the table to physical pixel
824
 * pipe indices from the highest to lowest EU count.
825
 */
826
UNUSED static void
827
calculate_pixel_hashing_table(unsigned n, unsigned m,
828
                              unsigned period, unsigned index, bool flip,
829
                              uint32_t *p)
830
{
831
   for (unsigned i = 0; i < n; i++) {
832
      for (unsigned j = 0; j < m; j++) {
833
         const unsigned k = (i + j) % period;
834
         p[j + m * i] = (k == index ? 2 : (k & 1) ^ flip);
835
      }
836
   }
837
}
838

839
#if GFX_VER == 11
840
static void
841
gfx11_upload_pixel_hashing_tables(struct iris_batch *batch)
842
{
843
   const struct intel_device_info *devinfo = &batch->screen->devinfo;
844
   assert(devinfo->ppipe_subslices[2] == 0);
845

846
   if (devinfo->ppipe_subslices[0] == devinfo->ppipe_subslices[1])
847
      return;
848

849
   struct iris_context *ice = batch->ice;
850
   assert(&ice->batches[IRIS_BATCH_RENDER] == batch);
851

852
   unsigned size = GENX(SLICE_HASH_TABLE_length) * 4;
853
   uint32_t hash_address;
854
   struct pipe_resource *tmp = NULL;
855
   uint32_t *map =
856
      stream_state(batch, ice->state.dynamic_uploader, &tmp,
857
                   size, 64, &hash_address);
858
   pipe_resource_reference(&tmp, NULL);
859

860
   const bool flip = devinfo->ppipe_subslices[0] < devinfo->ppipe_subslices[1];
861
   struct GENX(SLICE_HASH_TABLE) table;
862
   calculate_pixel_hashing_table(16, 16, 3, 3, flip, table.Entry[0]);
863

864
   GENX(SLICE_HASH_TABLE_pack)(NULL, map, &table);
865

866
   iris_emit_cmd(batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) {
867
      ptr.SliceHashStatePointerValid = true;
868
      ptr.SliceHashTableStatePointer = hash_address;
869
   }
870

871
   iris_emit_cmd(batch, GENX(3DSTATE_3D_MODE), mode) {
872
      mode.SliceHashingTableEnable = true;
873
   }
874
}
875
#elif GFX_VERx10 == 120
876
static void
877
gfx12_upload_pixel_hashing_tables(struct iris_batch *batch)
878
{
879
   const struct intel_device_info *devinfo = &batch->screen->devinfo;
880
   /* For each n calculate ppipes_of[n], equal to the number of pixel pipes
881
    * present with n active dual subslices.
882
    */
883
   unsigned ppipes_of[3] = {};
884

885
   for (unsigned n = 0; n < ARRAY_SIZE(ppipes_of); n++) {
886
      for (unsigned p = 0; p < ARRAY_SIZE(devinfo->ppipe_subslices); p++)
887
         ppipes_of[n] += (devinfo->ppipe_subslices[p] == n);
888
   }
889

890
   /* Gfx12 has three pixel pipes. */
891
   assert(ppipes_of[0] + ppipes_of[1] + ppipes_of[2] == 3);
892

893
   if (ppipes_of[2] == 3 || ppipes_of[0] == 2) {
894
      /* All three pixel pipes have the maximum number of active dual
895
       * subslices, or there is only one active pixel pipe: Nothing to do.
896
       */
897
      return;
898
   }
899

900
   iris_emit_cmd(batch, GENX(3DSTATE_SUBSLICE_HASH_TABLE), p) {
901
      p.SliceHashControl[0] = TABLE_0;
902

903
      if (ppipes_of[2] == 2 && ppipes_of[0] == 1)
904
         calculate_pixel_hashing_table(8, 16, 2, 2, 0, p.TwoWayTableEntry[0]);
905
      else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1)
906
         calculate_pixel_hashing_table(8, 16, 3, 3, 0, p.TwoWayTableEntry[0]);
907

908
      if (ppipes_of[2] == 2 && ppipes_of[1] == 1)
909
         calculate_pixel_hashing_table(8, 16, 5, 4, 0, p.ThreeWayTableEntry[0]);
910
      else if (ppipes_of[2] == 2 && ppipes_of[0] == 1)
911
         calculate_pixel_hashing_table(8, 16, 2, 2, 0, p.ThreeWayTableEntry[0]);
912
      else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1)
913
         calculate_pixel_hashing_table(8, 16, 3, 3, 0, p.ThreeWayTableEntry[0]);
914
      else
915
         unreachable("Illegal fusing.");
916
   }
917

918
   iris_emit_cmd(batch, GENX(3DSTATE_3D_MODE), p) {
919
      p.SubsliceHashingTableEnable = true;
920
      p.SubsliceHashingTableEnableMask = true;
921
   }
922
}
923
#endif
924

925
static void
926
iris_alloc_push_constants(struct iris_batch *batch)
927
{
928
   /* For now, we set a static partitioning of the push constant area,
929
    * assuming that all stages could be in use.
930
    *
931
    * TODO: Try lazily allocating the HS/DS/GS sections as needed, and
932
    *       see if that improves performance by offering more space to
933
    *       the VS/FS when those aren't in use.  Also, try dynamically
934
    *       enabling/disabling it like i965 does.  This would be more
935
    *       stalls and may not actually help; we don't know yet.
936
    */
937
   for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
938
      iris_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
939
         alloc._3DCommandSubOpcode = 18 + i;
940
         alloc.ConstantBufferOffset = 6 * i;
941
         alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? 8 : 6;
942
      }
943
   }
944
}
945

946
#if GFX_VER >= 12
947
static void
948
init_aux_map_state(struct iris_batch *batch);
949
#endif
950

951
/**
952
 * Upload initial GPU state for any kind of context.
953
 *
954
 * These need to happen for both render and compute.
955
 */
956
static void
957
iris_init_common_context(struct iris_batch *batch)
958
{
959
#if GFX_VER == 11
960
   iris_emit_reg(batch, GENX(SAMPLER_MODE), reg) {
961
      reg.HeaderlessMessageforPreemptableContexts = 1;
962
      reg.HeaderlessMessageforPreemptableContextsMask = 1;
963
   }
964

965
   /* Bit 1 must be set in HALF_SLICE_CHICKEN7. */
966
   iris_emit_reg(batch, GENX(HALF_SLICE_CHICKEN7), reg) {
967
      reg.EnabledTexelOffsetPrecisionFix = 1;
968
      reg.EnabledTexelOffsetPrecisionFixMask = 1;
969
   }
970
#endif
971
}
972

973
/**
974
 * Upload the initial GPU state for a render context.
975
 *
976
 * This sets some invariant state that needs to be programmed a particular
977
 * way, but we never actually change.
978
 */
979
static void
980
iris_init_render_context(struct iris_batch *batch)
981
{
982
   UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
983

984
   iris_batch_sync_region_start(batch);
985

986
   emit_pipeline_select(batch, _3D);
987

988
   iris_emit_l3_config(batch, batch->screen->l3_config_3d);
989

990
   init_state_base_address(batch);
991

992
   iris_init_common_context(batch);
993

994
#if GFX_VER >= 9
995
   iris_emit_reg(batch, GENX(CS_DEBUG_MODE2), reg) {
996
      reg.CONSTANT_BUFFERAddressOffsetDisable = true;
997
      reg.CONSTANT_BUFFERAddressOffsetDisableMask = true;
998
   }
999
#else
1000
   iris_emit_reg(batch, GENX(INSTPM), reg) {
1001
      reg.CONSTANT_BUFFERAddressOffsetDisable = true;
1002
      reg.CONSTANT_BUFFERAddressOffsetDisableMask = true;
1003
   }
1004
#endif
1005

1006
#if GFX_VER == 9
1007
   iris_emit_reg(batch, GENX(CACHE_MODE_1), reg) {
1008
      reg.FloatBlendOptimizationEnable = true;
1009
      reg.FloatBlendOptimizationEnableMask = true;
1010
      reg.MSCRAWHazardAvoidanceBit = true;
1011
      reg.MSCRAWHazardAvoidanceBitMask = true;
1012
      reg.PartialResolveDisableInVC = true;
1013
      reg.PartialResolveDisableInVCMask = true;
1014
   }
1015

1016
   if (devinfo->is_geminilake)
1017
      init_glk_barrier_mode(batch, GLK_BARRIER_MODE_3D_HULL);
1018
#endif
1019

1020
#if GFX_VER == 11
1021
   iris_emit_reg(batch, GENX(TCCNTLREG), reg) {
1022
      reg.L3DataPartialWriteMergingEnable = true;
1023
      reg.ColorZPartialWriteMergingEnable = true;
1024
      reg.URBPartialWriteMergingEnable = true;
1025
      reg.TCDisable = true;
1026
   }
1027

1028
   /* Hardware specification recommends disabling repacking for the
1029
    * compatibility with decompression mechanism in display controller.
1030
    */
1031
   if (devinfo->disable_ccs_repack) {
1032
      iris_emit_reg(batch, GENX(CACHE_MODE_0), reg) {
1033
         reg.DisableRepackingforCompression = true;
1034
         reg.DisableRepackingforCompressionMask = true;
1035
      }
1036
   }
1037

1038
   gfx11_upload_pixel_hashing_tables(batch);
1039
#endif
1040

1041
#if GFX_VERx10 == 120
1042
   gfx12_upload_pixel_hashing_tables(batch);
1043
#endif
1044

1045
   /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined, so we want to avoid
1046
    * changing it dynamically.  We set it to the maximum size here, and
1047
    * instead include the render target dimensions in the viewport, so
1048
    * viewport extents clipping takes care of pruning stray geometry.
1049
    */
1050
   iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
1051
      rect.ClippedDrawingRectangleXMax = UINT16_MAX;
1052
      rect.ClippedDrawingRectangleYMax = UINT16_MAX;
1053
   }
1054

1055
   /* Set the initial MSAA sample positions. */
1056
   iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) {
1057
      INTEL_SAMPLE_POS_1X(pat._1xSample);
1058
      INTEL_SAMPLE_POS_2X(pat._2xSample);
1059
      INTEL_SAMPLE_POS_4X(pat._4xSample);
1060
      INTEL_SAMPLE_POS_8X(pat._8xSample);
1061
#if GFX_VER >= 9
1062
      INTEL_SAMPLE_POS_16X(pat._16xSample);
1063
#endif
1064
   }
1065

1066
   /* Use the legacy AA line coverage computation. */
1067
   iris_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo);
1068

1069
   /* Disable chromakeying (it's for media) */
1070
   iris_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo);
1071

1072
   /* We want regular rendering, not special HiZ operations. */
1073
   iris_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo);
1074

1075
   /* No polygon stippling offsets are necessary. */
1076
   /* TODO: may need to set an offset for origin-UL framebuffers */
1077
   iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo);
1078

1079
   iris_alloc_push_constants(batch);
1080

1081

1082
#if GFX_VER >= 12
1083
   init_aux_map_state(batch);
1084
#endif
1085

1086
   iris_batch_sync_region_end(batch);
1087
}
1088

1089
static void
1090
iris_init_compute_context(struct iris_batch *batch)
1091
{
1092
   UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
1093

1094
   iris_batch_sync_region_start(batch);
1095

1096
   /* Wa_1607854226:
1097
    *
1098
    *  Start with pipeline in 3D mode to set the STATE_BASE_ADDRESS.
1099
    */
1100
#if GFX_VER == 12
1101
   emit_pipeline_select(batch, _3D);
1102
#else
1103
   emit_pipeline_select(batch, GPGPU);
1104
#endif
1105

1106
   iris_emit_l3_config(batch, batch->screen->l3_config_cs);
1107

1108
   init_state_base_address(batch);
1109

1110
   iris_init_common_context(batch);
1111

1112
#if GFX_VER == 12
1113
   emit_pipeline_select(batch, GPGPU);
1114
#endif
1115

1116
#if GFX_VER == 9
1117
   if (devinfo->is_geminilake)
1118
      init_glk_barrier_mode(batch, GLK_BARRIER_MODE_GPGPU);
1119
#endif
1120

1121
#if GFX_VER >= 12
1122
   init_aux_map_state(batch);
1123
#endif
1124

1125
   iris_batch_sync_region_end(batch);
1126
}
1127

1128
struct iris_vertex_buffer_state {
1129
   /** The VERTEX_BUFFER_STATE hardware structure. */
1130
   uint32_t state[GENX(VERTEX_BUFFER_STATE_length)];
1131

1132
   /** The resource to source vertex data from. */
1133
   struct pipe_resource *resource;
1134

1135
   int offset;
1136
};
1137

1138
struct iris_depth_buffer_state {
1139
   /* Depth/HiZ/Stencil related hardware packets. */
1140
   uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) +
1141
                    GENX(3DSTATE_STENCIL_BUFFER_length) +
1142
                    GENX(3DSTATE_HIER_DEPTH_BUFFER_length) +
1143
                    GENX(3DSTATE_CLEAR_PARAMS_length) +
1144
                    GENX(MI_LOAD_REGISTER_IMM_length) * 2];
1145
};
1146

1147
/**
1148
 * Generation-specific context state (ice->state.genx->...).
1149
 *
1150
 * Most state can go in iris_context directly, but these encode hardware
1151
 * packets which vary by generation.
1152
 */
1153
struct iris_genx_state {
1154
   struct iris_vertex_buffer_state vertex_buffers[33];
1155
   uint32_t last_index_buffer[GENX(3DSTATE_INDEX_BUFFER_length)];
1156

1157
   struct iris_depth_buffer_state depth_buffer;
1158

1159
   uint32_t so_buffers[4 * GENX(3DSTATE_SO_BUFFER_length)];
1160

1161
#if GFX_VER == 8
1162
   bool pma_fix_enabled;
1163
#endif
1164

1165
#if GFX_VER == 9
1166
   /* Is object level preemption enabled? */
1167
   bool object_preemption;
1168
#endif
1169

1170
   struct {
1171
#if GFX_VER == 8
1172
      struct brw_image_param image_param[PIPE_MAX_SHADER_IMAGES];
1173
#endif
1174
   } shaders[MESA_SHADER_STAGES];
1175
};
1176

1177
/**
1178
 * The pipe->set_blend_color() driver hook.
1179
 *
1180
 * This corresponds to our COLOR_CALC_STATE.
1181
 */
1182
static void
1183
iris_set_blend_color(struct pipe_context *ctx,
1184
                     const struct pipe_blend_color *state)
1185
{
1186
   struct iris_context *ice = (struct iris_context *) ctx;
1187

1188
   /* Our COLOR_CALC_STATE is exactly pipe_blend_color, so just memcpy */
1189
   memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color));
1190
   ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
1191
}
1192

1193
/**
1194
 * Gallium CSO for blend state (see pipe_blend_state).
1195
 */
1196
struct iris_blend_state {
1197
   /** Partial 3DSTATE_PS_BLEND */
1198
   uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)];
1199

1200
   /** Partial BLEND_STATE */
1201
   uint32_t blend_state[GENX(BLEND_STATE_length) +
1202
                        BRW_MAX_DRAW_BUFFERS * GENX(BLEND_STATE_ENTRY_length)];
1203

1204
   bool alpha_to_coverage; /* for shader key */
1205

1206
   /** Bitfield of whether blending is enabled for RT[i] - for aux resolves */
1207
   uint8_t blend_enables;
1208

1209
   /** Bitfield of whether color writes are enabled for RT[i] */
1210
   uint8_t color_write_enables;
1211

1212
   /** Does RT[0] use dual color blending? */
1213
   bool dual_color_blending;
1214
};
1215

1216
static enum pipe_blendfactor
1217
fix_blendfactor(enum pipe_blendfactor f, bool alpha_to_one)
1218
{
1219
   if (alpha_to_one) {
1220
      if (f == PIPE_BLENDFACTOR_SRC1_ALPHA)
1221
         return PIPE_BLENDFACTOR_ONE;
1222

1223
      if (f == PIPE_BLENDFACTOR_INV_SRC1_ALPHA)
1224
         return PIPE_BLENDFACTOR_ZERO;
1225
   }
1226

1227
   return f;
1228
}
1229

1230
/**
1231
 * The pipe->create_blend_state() driver hook.
1232
 *
1233
 * Translates a pipe_blend_state into iris_blend_state.
1234
 */
1235
static void *
1236
iris_create_blend_state(struct pipe_context *ctx,
1237
                        const struct pipe_blend_state *state)
1238
{
1239
   struct iris_blend_state *cso = malloc(sizeof(struct iris_blend_state));
1240
   uint32_t *blend_entry = cso->blend_state + GENX(BLEND_STATE_length);
1241

1242
   cso->blend_enables = 0;
1243
   cso->color_write_enables = 0;
1244
   STATIC_ASSERT(BRW_MAX_DRAW_BUFFERS <= 8);
1245

1246
   cso->alpha_to_coverage = state->alpha_to_coverage;
1247

1248
   bool indep_alpha_blend = false;
1249

1250
   for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
1251
      const struct pipe_rt_blend_state *rt =
1252
         &state->rt[state->independent_blend_enable ? i : 0];
1253

1254
      enum pipe_blendfactor src_rgb =
1255
         fix_blendfactor(rt->rgb_src_factor, state->alpha_to_one);
1256
      enum pipe_blendfactor src_alpha =
1257
         fix_blendfactor(rt->alpha_src_factor, state->alpha_to_one);
1258
      enum pipe_blendfactor dst_rgb =
1259
         fix_blendfactor(rt->rgb_dst_factor, state->alpha_to_one);
1260
      enum pipe_blendfactor dst_alpha =
1261
         fix_blendfactor(rt->alpha_dst_factor, state->alpha_to_one);
1262

1263
      if (rt->rgb_func != rt->alpha_func ||
1264
          src_rgb != src_alpha || dst_rgb != dst_alpha)
1265
         indep_alpha_blend = true;
1266

1267
      if (rt->blend_enable)
1268
         cso->blend_enables |= 1u << i;
1269

1270
      if (rt->colormask)
1271
         cso->color_write_enables |= 1u << i;
1272

1273
      iris_pack_state(GENX(BLEND_STATE_ENTRY), blend_entry, be) {
1274
         be.LogicOpEnable = state->logicop_enable;
1275
         be.LogicOpFunction = state->logicop_func;
1276

1277
         be.PreBlendSourceOnlyClampEnable = false;
1278
         be.ColorClampRange = COLORCLAMP_RTFORMAT;
1279
         be.PreBlendColorClampEnable = true;
1280
         be.PostBlendColorClampEnable = true;
1281

1282
         be.ColorBufferBlendEnable = rt->blend_enable;
1283

1284
         be.ColorBlendFunction          = rt->rgb_func;
1285
         be.AlphaBlendFunction          = rt->alpha_func;
1286

1287
         /* The casts prevent warnings about implicit enum type conversions. */
1288
         be.SourceBlendFactor           = (int) src_rgb;
1289
         be.SourceAlphaBlendFactor      = (int) src_alpha;
1290
         be.DestinationBlendFactor      = (int) dst_rgb;
1291
         be.DestinationAlphaBlendFactor = (int) dst_alpha;
1292

1293
         be.WriteDisableRed   = !(rt->colormask & PIPE_MASK_R);
1294
         be.WriteDisableGreen = !(rt->colormask & PIPE_MASK_G);
1295
         be.WriteDisableBlue  = !(rt->colormask & PIPE_MASK_B);
1296
         be.WriteDisableAlpha = !(rt->colormask & PIPE_MASK_A);
1297
      }
1298
      blend_entry += GENX(BLEND_STATE_ENTRY_length);
1299
   }
1300

1301
   iris_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) {
1302
      /* pb.HasWriteableRT is filled in at draw time.
1303
       * pb.AlphaTestEnable is filled in at draw time.
1304
       *
1305
       * pb.ColorBufferBlendEnable is filled in at draw time so we can avoid
1306
       * setting it when dual color blending without an appropriate shader.
1307
       */
1308

1309
      pb.AlphaToCoverageEnable = state->alpha_to_coverage;
1310
      pb.IndependentAlphaBlendEnable = indep_alpha_blend;
1311

1312
      /* The casts prevent warnings about implicit enum type conversions. */
1313
      pb.SourceBlendFactor =
1314
         (int) fix_blendfactor(state->rt[0].rgb_src_factor, state->alpha_to_one);
1315
      pb.SourceAlphaBlendFactor =
1316
         (int) fix_blendfactor(state->rt[0].alpha_src_factor, state->alpha_to_one);
1317
      pb.DestinationBlendFactor =
1318
         (int) fix_blendfactor(state->rt[0].rgb_dst_factor, state->alpha_to_one);
1319
      pb.DestinationAlphaBlendFactor =
1320
         (int) fix_blendfactor(state->rt[0].alpha_dst_factor, state->alpha_to_one);
1321
   }
1322

1323
   iris_pack_state(GENX(BLEND_STATE), cso->blend_state, bs) {
1324
      bs.AlphaToCoverageEnable = state->alpha_to_coverage;
1325
      bs.IndependentAlphaBlendEnable = indep_alpha_blend;
1326
      bs.AlphaToOneEnable = state->alpha_to_one;
1327
      bs.AlphaToCoverageDitherEnable = state->alpha_to_coverage;
1328
      bs.ColorDitherEnable = state->dither;
1329
      /* bl.AlphaTestEnable and bs.AlphaTestFunction are filled in later. */
1330
   }
1331

1332
   cso->dual_color_blending = util_blend_state_is_dual(state, 0);
1333

1334
   return cso;
1335
}
1336

1337
/**
1338
 * The pipe->bind_blend_state() driver hook.
1339
 *
1340
 * Bind a blending CSO and flag related dirty bits.
1341
 */
1342
static void
1343
iris_bind_blend_state(struct pipe_context *ctx, void *state)
1344
{
1345
   struct iris_context *ice = (struct iris_context *) ctx;
1346
   struct iris_blend_state *cso = state;
1347

1348
   ice->state.cso_blend = cso;
1349

1350
   ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
1351
   ice->state.dirty |= IRIS_DIRTY_BLEND_STATE;
1352
   ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[IRIS_NOS_BLEND];
1353

1354
   if (GFX_VER == 8)
1355
      ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
1356
}
1357

1358
/**
1359
 * Return true if the FS writes to any color outputs which are not disabled
1360
 * via color masking.
1361
 */
1362
static bool
1363
has_writeable_rt(const struct iris_blend_state *cso_blend,
1364
                 const struct shader_info *fs_info)
1365
{
1366
   if (!fs_info)
1367
      return false;
1368

1369
   unsigned rt_outputs = fs_info->outputs_written >> FRAG_RESULT_DATA0;
1370

1371
   if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
1372
      rt_outputs = (1 << BRW_MAX_DRAW_BUFFERS) - 1;
1373

1374
   return cso_blend->color_write_enables & rt_outputs;
1375
}
1376

1377
/**
1378
 * Gallium CSO for depth, stencil, and alpha testing state.
1379
 */
1380
struct iris_depth_stencil_alpha_state {
1381
   /** Partial 3DSTATE_WM_DEPTH_STENCIL. */
1382
   uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
1383

1384
#if GFX_VER >= 12
1385
   uint32_t depth_bounds[GENX(3DSTATE_DEPTH_BOUNDS_length)];
1386
#endif
1387

1388
   /** Outbound to BLEND_STATE, 3DSTATE_PS_BLEND, COLOR_CALC_STATE. */
1389
   unsigned alpha_enabled:1;
1390
   unsigned alpha_func:3;     /**< PIPE_FUNC_x */
1391
   float alpha_ref_value;     /**< reference value */
1392

1393
   /** Outbound to resolve and cache set tracking. */
1394
   bool depth_writes_enabled;
1395
   bool stencil_writes_enabled;
1396

1397
   /** Outbound to Gfx8-9 PMA stall equations */
1398
   bool depth_test_enabled;
1399
};
1400

1401
/**
1402
 * The pipe->create_depth_stencil_alpha_state() driver hook.
1403
 *
1404
 * We encode most of 3DSTATE_WM_DEPTH_STENCIL, and just save off the alpha
1405
 * testing state since we need pieces of it in a variety of places.
1406
 */
1407
static void *
1408
iris_create_zsa_state(struct pipe_context *ctx,
1409
                      const struct pipe_depth_stencil_alpha_state *state)
1410
{
1411
   struct iris_depth_stencil_alpha_state *cso =
1412
      malloc(sizeof(struct iris_depth_stencil_alpha_state));
1413

1414
   bool two_sided_stencil = state->stencil[1].enabled;
1415

1416
   cso->alpha_enabled = state->alpha_enabled;
1417
   cso->alpha_func = state->alpha_func;
1418
   cso->alpha_ref_value = state->alpha_ref_value;
1419
   cso->depth_writes_enabled = state->depth_writemask;
1420
   cso->depth_test_enabled = state->depth_enabled;
1421
   cso->stencil_writes_enabled =
1422
      state->stencil[0].writemask != 0 ||
1423
      (two_sided_stencil && state->stencil[1].writemask != 0);
1424

1425
   /* gallium frontends need to optimize away EQUAL writes for us. */
1426
   assert(!(state->depth_func == PIPE_FUNC_EQUAL && state->depth_writemask));
1427

1428
   iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), cso->wmds, wmds) {
1429
      wmds.StencilFailOp = state->stencil[0].fail_op;
1430
      wmds.StencilPassDepthFailOp = state->stencil[0].zfail_op;
1431
      wmds.StencilPassDepthPassOp = state->stencil[0].zpass_op;
1432
      wmds.StencilTestFunction =
1433
         translate_compare_func(state->stencil[0].func);
1434
      wmds.BackfaceStencilFailOp = state->stencil[1].fail_op;
1435
      wmds.BackfaceStencilPassDepthFailOp = state->stencil[1].zfail_op;
1436
      wmds.BackfaceStencilPassDepthPassOp = state->stencil[1].zpass_op;
1437
      wmds.BackfaceStencilTestFunction =
1438
         translate_compare_func(state->stencil[1].func);
1439
      wmds.DepthTestFunction = translate_compare_func(state->depth_func);
1440
      wmds.DoubleSidedStencilEnable = two_sided_stencil;
1441
      wmds.StencilTestEnable = state->stencil[0].enabled;
1442
      wmds.StencilBufferWriteEnable =
1443
         state->stencil[0].writemask != 0 ||
1444
         (two_sided_stencil && state->stencil[1].writemask != 0);
1445
      wmds.DepthTestEnable = state->depth_enabled;
1446
      wmds.DepthBufferWriteEnable = state->depth_writemask;
1447
      wmds.StencilTestMask = state->stencil[0].valuemask;
1448
      wmds.StencilWriteMask = state->stencil[0].writemask;
1449
      wmds.BackfaceStencilTestMask = state->stencil[1].valuemask;
1450
      wmds.BackfaceStencilWriteMask = state->stencil[1].writemask;
1451
      /* wmds.[Backface]StencilReferenceValue are merged later */
1452
#if GFX_VER >= 12
1453
      wmds.StencilReferenceValueModifyDisable = true;
1454
#endif
1455
   }
1456

1457
#if GFX_VER >= 12
1458
   iris_pack_command(GENX(3DSTATE_DEPTH_BOUNDS), cso->depth_bounds, depth_bounds) {
1459
      depth_bounds.DepthBoundsTestValueModifyDisable = false;
1460
      depth_bounds.DepthBoundsTestEnableModifyDisable = false;
1461
      depth_bounds.DepthBoundsTestEnable = state->depth_bounds_test;
1462
      depth_bounds.DepthBoundsTestMinValue = state->depth_bounds_min;
1463
      depth_bounds.DepthBoundsTestMaxValue = state->depth_bounds_max;
1464
   }
1465
#endif
1466

1467
   return cso;
1468
}
1469

1470
/**
1471
 * The pipe->bind_depth_stencil_alpha_state() driver hook.
1472
 *
1473
 * Bind a depth/stencil/alpha CSO and flag related dirty bits.
1474
 */
1475
static void
1476
iris_bind_zsa_state(struct pipe_context *ctx, void *state)
1477
{
1478
   struct iris_context *ice = (struct iris_context *) ctx;
1479
   struct iris_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa;
1480
   struct iris_depth_stencil_alpha_state *new_cso = state;
1481

1482
   if (new_cso) {
1483
      if (cso_changed(alpha_ref_value))
1484
         ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
1485

1486
      if (cso_changed(alpha_enabled))
1487
         ice->state.dirty |= IRIS_DIRTY_PS_BLEND | IRIS_DIRTY_BLEND_STATE;
1488

1489
      if (cso_changed(alpha_func))
1490
         ice->state.dirty |= IRIS_DIRTY_BLEND_STATE;
1491

1492
      if (cso_changed(depth_writes_enabled) || cso_changed(stencil_writes_enabled))
1493
         ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
1494

1495
      ice->state.depth_writes_enabled = new_cso->depth_writes_enabled;
1496
      ice->state.stencil_writes_enabled = new_cso->stencil_writes_enabled;
1497

1498
#if GFX_VER >= 12
1499
      if (cso_changed(depth_bounds))
1500
         ice->state.dirty |= IRIS_DIRTY_DEPTH_BOUNDS;
1501
#endif
1502
   }
1503

1504
   ice->state.cso_zsa = new_cso;
1505
   ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
1506
   ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
1507
   ice->state.stage_dirty |=
1508
      ice->state.stage_dirty_for_nos[IRIS_NOS_DEPTH_STENCIL_ALPHA];
1509

1510
   if (GFX_VER == 8)
1511
      ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
1512
}
1513

1514
#if GFX_VER == 8
1515
static bool
1516
want_pma_fix(struct iris_context *ice)
1517
{
1518
   UNUSED struct iris_screen *screen = (void *) ice->ctx.screen;
1519
   UNUSED const struct intel_device_info *devinfo = &screen->devinfo;
1520
   const struct brw_wm_prog_data *wm_prog_data = (void *)
1521
      ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
1522
   const struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
1523
   const struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
1524
   const struct iris_blend_state *cso_blend = ice->state.cso_blend;
1525

1526
   /* In very specific combinations of state, we can instruct Gfx8-9 hardware
1527
    * to avoid stalling at the pixel mask array.  The state equations are
1528
    * documented in these places:
1529
    *
1530
    * - Gfx8 Depth PMA Fix:   CACHE_MODE_1::NP_PMA_FIX_ENABLE
1531
    * - Gfx9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable
1532
    *
1533
    * Both equations share some common elements:
1534
    *
1535
    *    no_hiz_op =
1536
    *       !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
1537
    *         3DSTATE_WM_HZ_OP::DepthBufferResolve ||
1538
    *         3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
1539
    *         3DSTATE_WM_HZ_OP::StencilBufferClear) &&
1540
    *
1541
    *    killpixels =
1542
    *       3DSTATE_WM::ForceKillPix != ForceOff &&
1543
    *       (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1544
    *        3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1545
    *        3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1546
    *        3DSTATE_PS_BLEND::AlphaTestEnable ||
1547
    *        3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1548
    *
1549
    *    (Technically the stencil PMA treats ForceKillPix differently,
1550
    *     but I think this is a documentation oversight, and we don't
1551
    *     ever use it in this way, so it doesn't matter).
1552
    *
1553
    *    common_pma_fix =
1554
    *       3DSTATE_WM::ForceThreadDispatch != 1 &&
1555
    *       3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 &&
1556
    *       3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
1557
    *       3DSTATE_DEPTH_BUFFER::HIZ Enable &&
1558
    *       3DSTATE_WM::EDSC_Mode != EDSC_PREPS &&
1559
    *       3DSTATE_PS_EXTRA::PixelShaderValid &&
1560
    *       no_hiz_op
1561
    *
1562
    * These are always true:
1563
    *
1564
    *    3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0
1565
    *    3DSTATE_PS_EXTRA::PixelShaderValid
1566
    *
1567
    * Also, we never use the normal drawing path for HiZ ops; these are true:
1568
    *
1569
    *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
1570
    *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
1571
    *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
1572
    *      3DSTATE_WM_HZ_OP::StencilBufferClear)
1573
    *
1574
    * This happens sometimes:
1575
    *
1576
    *    3DSTATE_WM::ForceThreadDispatch != 1
1577
    *
1578
    * However, we choose to ignore it as it either agrees with the signal
1579
    * (dispatch was already enabled, so nothing out of the ordinary), or
1580
    * there are no framebuffer attachments (so no depth or HiZ anyway,
1581
    * meaning the PMA signal will already be disabled).
1582
    */
1583

1584
   if (!cso_fb->zsbuf)
1585
      return false;
1586

1587
   struct iris_resource *zres, *sres;
1588
   iris_get_depth_stencil_resources(cso_fb->zsbuf->texture, &zres, &sres);
1589

1590
   /* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
1591
    * 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
1592
    */
1593
   if (!zres || !iris_resource_level_has_hiz(zres, cso_fb->zsbuf->u.tex.level))
1594
      return false;
1595

1596
   /* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS */
1597
   if (wm_prog_data->early_fragment_tests)
1598
      return false;
1599

1600
   /* 3DSTATE_WM::ForceKillPix != ForceOff &&
1601
    * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1602
    *  3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1603
    *  3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1604
    *  3DSTATE_PS_BLEND::AlphaTestEnable ||
1605
    *  3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1606
    */
1607
   bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask ||
1608
                     cso_blend->alpha_to_coverage || cso_zsa->alpha_enabled;
1609

1610
   /* The Gfx8 depth PMA equation becomes:
1611
    *
1612
    *    depth_writes =
1613
    *       3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
1614
    *       3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE
1615
    *
1616
    *    stencil_writes =
1617
    *       3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
1618
    *       3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
1619
    *       3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE
1620
    *
1621
    *    Z_PMA_OPT =
1622
    *       common_pma_fix &&
1623
    *       3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable &&
1624
    *       ((killpixels && (depth_writes || stencil_writes)) ||
1625
    *        3DSTATE_PS_EXTRA::PixelShaderComputedDepthMode != PSCDEPTH_OFF)
1626
    *
1627
    */
1628
   if (!cso_zsa->depth_test_enabled)
1629
      return false;
1630

1631
   return wm_prog_data->computed_depth_mode != PSCDEPTH_OFF ||
1632
          (killpixels && (cso_zsa->depth_writes_enabled ||
1633
                          (sres && cso_zsa->stencil_writes_enabled)));
1634
}
1635
#endif
1636

1637
void
1638
genX(update_pma_fix)(struct iris_context *ice,
1639
                     struct iris_batch *batch,
1640
                     bool enable)
1641
{
1642
#if GFX_VER == 8
1643
   struct iris_genx_state *genx = ice->state.genx;
1644

1645
   if (genx->pma_fix_enabled == enable)
1646
      return;
1647

1648
   genx->pma_fix_enabled = enable;
1649

1650
   /* According to the Broadwell PIPE_CONTROL documentation, software should
1651
    * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
1652
    * prior to the LRI.  If stencil buffer writes are enabled, then a Render        * Cache Flush is also necessary.
1653
    *
1654
    * The Gfx9 docs say to use a depth stall rather than a command streamer
1655
    * stall.  However, the hardware seems to violently disagree.  A full
1656
    * command streamer stall seems to be needed in both cases.
1657
    */
1658
   iris_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
1659
                                PIPE_CONTROL_CS_STALL |
1660
                                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1661
                                PIPE_CONTROL_RENDER_TARGET_FLUSH);
1662

1663
   iris_emit_reg(batch, GENX(CACHE_MODE_1), reg) {
1664
      reg.NPPMAFixEnable = enable;
1665
      reg.NPEarlyZFailsDisable = enable;
1666
      reg.NPPMAFixEnableMask = true;
1667
      reg.NPEarlyZFailsDisableMask = true;
1668
   }
1669

1670
   /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
1671
    * Flush bits is often necessary.  We do it regardless because it's easier.
1672
    * The render cache flush is also necessary if stencil writes are enabled.
1673
    *
1674
    * Again, the Gfx9 docs give a different set of flushes but the Broadwell
1675
    * flushes seem to work just as well.
1676
    */
1677
   iris_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
1678
                                PIPE_CONTROL_DEPTH_STALL |
1679
                                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1680
                                PIPE_CONTROL_RENDER_TARGET_FLUSH);
1681
#endif
1682
}
1683

1684
/**
1685
 * Gallium CSO for rasterizer state.
1686
 */
1687
struct iris_rasterizer_state {
1688
   uint32_t sf[GENX(3DSTATE_SF_length)];
1689
   uint32_t clip[GENX(3DSTATE_CLIP_length)];
1690
   uint32_t raster[GENX(3DSTATE_RASTER_length)];
1691
   uint32_t wm[GENX(3DSTATE_WM_length)];
1692
   uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)];
1693

1694
   uint8_t num_clip_plane_consts;
1695
   bool clip_halfz; /* for CC_VIEWPORT */
1696
   bool depth_clip_near; /* for CC_VIEWPORT */
1697
   bool depth_clip_far; /* for CC_VIEWPORT */
1698
   bool flatshade; /* for shader state */
1699
   bool flatshade_first; /* for stream output */
1700
   bool clamp_fragment_color; /* for shader state */
1701
   bool light_twoside; /* for shader state */
1702
   bool rasterizer_discard; /* for 3DSTATE_STREAMOUT and 3DSTATE_CLIP */
1703
   bool half_pixel_center; /* for 3DSTATE_MULTISAMPLE */
1704
   bool line_stipple_enable;
1705
   bool poly_stipple_enable;
1706
   bool multisample;
1707
   bool force_persample_interp;
1708
   bool conservative_rasterization;
1709
   bool fill_mode_point;
1710
   bool fill_mode_line;
1711
   bool fill_mode_point_or_line;
1712
   enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */
1713
   uint16_t sprite_coord_enable;
1714
};
1715

1716
static float
1717
get_line_width(const struct pipe_rasterizer_state *state)
1718
{
1719
   float line_width = state->line_width;
1720

1721
   /* From the OpenGL 4.4 spec:
1722
    *
1723
    * "The actual width of non-antialiased lines is determined by rounding
1724
    *  the supplied width to the nearest integer, then clamping it to the
1725
    *  implementation-dependent maximum non-antialiased line width."
1726
    */
1727
   if (!state->multisample && !state->line_smooth)
1728
      line_width = roundf(state->line_width);
1729

1730
   if (!state->multisample && state->line_smooth && line_width < 1.5f) {
1731
      /* For 1 pixel line thickness or less, the general anti-aliasing
1732
       * algorithm gives up, and a garbage line is generated.  Setting a
1733
       * Line Width of 0.0 specifies the rasterization of the "thinnest"
1734
       * (one-pixel-wide), non-antialiased lines.
1735
       *
1736
       * Lines rendered with zero Line Width are rasterized using the
1737
       * "Grid Intersection Quantization" rules as specified by the
1738
       * "Zero-Width (Cosmetic) Line Rasterization" section of the docs.
1739
       */
1740
      line_width = 0.0f;
1741
   }
1742

1743
   return line_width;
1744
}
1745

1746
/**
1747
 * The pipe->create_rasterizer_state() driver hook.
1748
 */
1749
static void *
1750
iris_create_rasterizer_state(struct pipe_context *ctx,
1751
                             const struct pipe_rasterizer_state *state)
1752
{
1753
   struct iris_rasterizer_state *cso =
1754
      malloc(sizeof(struct iris_rasterizer_state));
1755

1756
   cso->multisample = state->multisample;
1757
   cso->force_persample_interp = state->force_persample_interp;
1758
   cso->clip_halfz = state->clip_halfz;
1759
   cso->depth_clip_near = state->depth_clip_near;
1760
   cso->depth_clip_far = state->depth_clip_far;
1761
   cso->flatshade = state->flatshade;
1762
   cso->flatshade_first = state->flatshade_first;
1763
   cso->clamp_fragment_color = state->clamp_fragment_color;
1764
   cso->light_twoside = state->light_twoside;
1765
   cso->rasterizer_discard = state->rasterizer_discard;
1766
   cso->half_pixel_center = state->half_pixel_center;
1767
   cso->sprite_coord_mode = state->sprite_coord_mode;
1768
   cso->sprite_coord_enable = state->sprite_coord_enable;
1769
   cso->line_stipple_enable = state->line_stipple_enable;
1770
   cso->poly_stipple_enable = state->poly_stipple_enable;
1771
   cso->conservative_rasterization =
1772
      state->conservative_raster_mode == PIPE_CONSERVATIVE_RASTER_POST_SNAP;
1773

1774
   cso->fill_mode_point =
1775
      state->fill_front == PIPE_POLYGON_MODE_POINT ||
1776
      state->fill_back == PIPE_POLYGON_MODE_POINT;
1777
   cso->fill_mode_line =
1778
      state->fill_front == PIPE_POLYGON_MODE_LINE ||
1779
      state->fill_back == PIPE_POLYGON_MODE_LINE;
1780
   cso->fill_mode_point_or_line =
1781
      cso->fill_mode_point ||
1782
      cso->fill_mode_line;
1783

1784
   if (state->clip_plane_enable != 0)
1785
      cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1;
1786
   else
1787
      cso->num_clip_plane_consts = 0;
1788

1789
   float line_width = get_line_width(state);
1790

1791
   iris_pack_command(GENX(3DSTATE_SF), cso->sf, sf) {
1792
      sf.StatisticsEnable = true;
1793
      sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
1794
      sf.LineEndCapAntialiasingRegionWidth =
1795
         state->line_smooth ? _10pixels : _05pixels;
1796
      sf.LastPixelEnable = state->line_last_pixel;
1797
      sf.LineWidth = line_width;
1798
      sf.SmoothPointEnable = (state->point_smooth || state->multisample) &&
1799
                             !state->point_quad_rasterization;
1800
      sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State;
1801
      sf.PointWidth = CLAMP(state->point_size, 0.125f, 255.875f);
1802

1803
      if (state->flatshade_first) {
1804
         sf.TriangleFanProvokingVertexSelect = 1;
1805
      } else {
1806
         sf.TriangleStripListProvokingVertexSelect = 2;
1807
         sf.TriangleFanProvokingVertexSelect = 2;
1808
         sf.LineStripListProvokingVertexSelect = 1;
1809
      }
1810
   }
1811

1812
   iris_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) {
1813
      rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise;
1814
      rr.CullMode = translate_cull_mode(state->cull_face);
1815
      rr.FrontFaceFillMode = translate_fill_mode(state->fill_front);
1816
      rr.BackFaceFillMode = translate_fill_mode(state->fill_back);
1817
      rr.DXMultisampleRasterizationEnable = state->multisample;
1818
      rr.GlobalDepthOffsetEnableSolid = state->offset_tri;
1819
      rr.GlobalDepthOffsetEnableWireframe = state->offset_line;
1820
      rr.GlobalDepthOffsetEnablePoint = state->offset_point;
1821
      rr.GlobalDepthOffsetConstant = state->offset_units * 2;
1822
      rr.GlobalDepthOffsetScale = state->offset_scale;
1823
      rr.GlobalDepthOffsetClamp = state->offset_clamp;
1824
      rr.SmoothPointEnable = state->point_smooth;
1825
      rr.AntialiasingEnable = state->line_smooth;
1826
      rr.ScissorRectangleEnable = state->scissor;
1827
#if GFX_VER >= 9
1828
      rr.ViewportZNearClipTestEnable = state->depth_clip_near;
1829
      rr.ViewportZFarClipTestEnable = state->depth_clip_far;
1830
      rr.ConservativeRasterizationEnable =
1831
         cso->conservative_rasterization;
1832
#else
1833
      rr.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far);
1834
#endif
1835
   }
1836

1837
   iris_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) {
1838
      /* cl.NonPerspectiveBarycentricEnable is filled in at draw time from
1839
       * the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB.
1840
       */
1841
      cl.EarlyCullEnable = true;
1842
      cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable;
1843
      cl.ForceUserClipDistanceClipTestEnableBitmask = true;
1844
      cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;
1845
      cl.GuardbandClipTestEnable = true;
1846
      cl.ClipEnable = true;
1847
      cl.MinimumPointWidth = 0.125;
1848
      cl.MaximumPointWidth = 255.875;
1849

1850
      if (state->flatshade_first) {
1851
         cl.TriangleFanProvokingVertexSelect = 1;
1852
      } else {
1853
         cl.TriangleStripListProvokingVertexSelect = 2;
1854
         cl.TriangleFanProvokingVertexSelect = 2;
1855
         cl.LineStripListProvokingVertexSelect = 1;
1856
      }
1857
   }
1858

1859
   iris_pack_command(GENX(3DSTATE_WM), cso->wm, wm) {
1860
      /* wm.BarycentricInterpolationMode and wm.EarlyDepthStencilControl are
1861
       * filled in at draw time from the FS program.
1862
       */
1863
      wm.LineAntialiasingRegionWidth = _10pixels;
1864
      wm.LineEndCapAntialiasingRegionWidth = _05pixels;
1865
      wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
1866
      wm.LineStippleEnable = state->line_stipple_enable;
1867
      wm.PolygonStippleEnable = state->poly_stipple_enable;
1868
   }
1869

1870
   /* Remap from 0..255 back to 1..256 */
1871
   const unsigned line_stipple_factor = state->line_stipple_factor + 1;
1872

1873
   iris_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) {
1874
      if (state->line_stipple_enable) {
1875
         line.LineStipplePattern = state->line_stipple_pattern;
1876
         line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor;
1877
         line.LineStippleRepeatCount = line_stipple_factor;
1878
      }
1879
   }
1880

1881
   return cso;
1882
}
1883

1884
/**
1885
 * The pipe->bind_rasterizer_state() driver hook.
1886
 *
1887
 * Bind a rasterizer CSO and flag related dirty bits.
1888
 */
1889
static void
1890
iris_bind_rasterizer_state(struct pipe_context *ctx, void *state)
1891
{
1892
   struct iris_context *ice = (struct iris_context *) ctx;
1893
   struct iris_rasterizer_state *old_cso = ice->state.cso_rast;
1894
   struct iris_rasterizer_state *new_cso = state;
1895

1896
   if (new_cso) {
1897
      /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */
1898
      if (cso_changed_memcmp(line_stipple))
1899
         ice->state.dirty |= IRIS_DIRTY_LINE_STIPPLE;
1900

1901
      if (cso_changed(half_pixel_center))
1902
         ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
1903

1904
      if (cso_changed(line_stipple_enable) || cso_changed(poly_stipple_enable))
1905
         ice->state.dirty |= IRIS_DIRTY_WM;
1906

1907
      if (cso_changed(rasterizer_discard))
1908
         ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
1909

1910
      if (cso_changed(flatshade_first))
1911
         ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
1912

1913
      if (cso_changed(depth_clip_near) || cso_changed(depth_clip_far) ||
1914
          cso_changed(clip_halfz))
1915
         ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
1916

1917
      if (cso_changed(sprite_coord_enable) ||
1918
          cso_changed(sprite_coord_mode) ||
1919
          cso_changed(light_twoside))
1920
         ice->state.dirty |= IRIS_DIRTY_SBE;
1921

1922
      if (cso_changed(conservative_rasterization))
1923
         ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS;
1924
   }
1925

1926
   ice->state.cso_rast = new_cso;
1927
   ice->state.dirty |= IRIS_DIRTY_RASTER;
1928
   ice->state.dirty |= IRIS_DIRTY_CLIP;
1929
   ice->state.stage_dirty |=
1930
      ice->state.stage_dirty_for_nos[IRIS_NOS_RASTERIZER];
1931
}
1932

1933
/**
1934
 * Return true if the given wrap mode requires the border color to exist.
1935
 *
1936
 * (We can skip uploading it if the sampler isn't going to use it.)
1937
 */
1938
static bool
1939
wrap_mode_needs_border_color(unsigned wrap_mode)
1940
{
1941
   return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER;
1942
}
1943

1944
/**
1945
 * Gallium CSO for sampler state.
1946
 */
1947
struct iris_sampler_state {
1948
   union pipe_color_union border_color;
1949
   bool needs_border_color;
1950

1951
   uint32_t sampler_state[GENX(SAMPLER_STATE_length)];
1952
};
1953

1954
/**
1955
 * The pipe->create_sampler_state() driver hook.
1956
 *
1957
 * We fill out SAMPLER_STATE (except for the border color pointer), and
1958
 * store that on the CPU.  It doesn't make sense to upload it to a GPU
1959
 * buffer object yet, because 3DSTATE_SAMPLER_STATE_POINTERS requires
1960
 * all bound sampler states to be in contiguous memor.
1961
 */
1962
static void *
1963
iris_create_sampler_state(struct pipe_context *ctx,
1964
                          const struct pipe_sampler_state *state)
1965
{
1966
   struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state);
1967

1968
   if (!cso)
1969
      return NULL;
1970

1971
   STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST);
1972
   STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR);
1973

1974
   unsigned wrap_s = translate_wrap(state->wrap_s);
1975
   unsigned wrap_t = translate_wrap(state->wrap_t);
1976
   unsigned wrap_r = translate_wrap(state->wrap_r);
1977

1978
   memcpy(&cso->border_color, &state->border_color, sizeof(cso->border_color));
1979

1980
   cso->needs_border_color = wrap_mode_needs_border_color(wrap_s) ||
1981
                             wrap_mode_needs_border_color(wrap_t) ||
1982
                             wrap_mode_needs_border_color(wrap_r);
1983

1984
   float min_lod = state->min_lod;
1985
   unsigned mag_img_filter = state->mag_img_filter;
1986

1987
   // XXX: explain this code ported from ilo...I don't get it at all...
1988
   if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
1989
       state->min_lod > 0.0f) {
1990
      min_lod = 0.0f;
1991
      mag_img_filter = state->min_img_filter;
1992
   }
1993

1994
   iris_pack_state(GENX(SAMPLER_STATE), cso->sampler_state, samp) {
1995
      samp.TCXAddressControlMode = wrap_s;
1996
      samp.TCYAddressControlMode = wrap_t;
1997
      samp.TCZAddressControlMode = wrap_r;
1998
      samp.CubeSurfaceControlMode = state->seamless_cube_map;
1999
      samp.NonnormalizedCoordinateEnable = !state->normalized_coords;
2000
      samp.MinModeFilter = state->min_img_filter;
2001
      samp.MagModeFilter = mag_img_filter;
2002
      samp.MipModeFilter = translate_mip_filter(state->min_mip_filter);
2003
      samp.MaximumAnisotropy = RATIO21;
2004

2005
      if (state->max_anisotropy >= 2) {
2006
         if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
2007
            samp.MinModeFilter = MAPFILTER_ANISOTROPIC;
2008
            samp.AnisotropicAlgorithm = EWAApproximation;
2009
         }
2010

2011
         if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)
2012
            samp.MagModeFilter = MAPFILTER_ANISOTROPIC;
2013

2014
         samp.MaximumAnisotropy =
2015
            MIN2((state->max_anisotropy - 2) / 2, RATIO161);
2016
      }
2017

2018
      /* Set address rounding bits if not using nearest filtering. */
2019
      if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) {
2020
         samp.UAddressMinFilterRoundingEnable = true;
2021
         samp.VAddressMinFilterRoundingEnable = true;
2022
         samp.RAddressMinFilterRoundingEnable = true;
2023
      }
2024

2025
      if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) {
2026
         samp.UAddressMagFilterRoundingEnable = true;
2027
         samp.VAddressMagFilterRoundingEnable = true;
2028
         samp.RAddressMagFilterRoundingEnable = true;
2029
      }
2030

2031
      if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
2032
         samp.ShadowFunction = translate_shadow_func(state->compare_func);
2033

2034
      const float hw_max_lod = GFX_VER >= 7 ? 14 : 13;
2035

2036
      samp.LODPreClampMode = CLAMP_MODE_OGL;
2037
      samp.MinLOD = CLAMP(min_lod, 0, hw_max_lod);
2038
      samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod);
2039
      samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15);
2040

2041
      /* .BorderColorPointer is filled in by iris_bind_sampler_states. */
2042
   }
2043

2044
   return cso;
2045
}
2046

2047
/**
2048
 * The pipe->bind_sampler_states() driver hook.
2049
 */
2050
static void
2051
iris_bind_sampler_states(struct pipe_context *ctx,
2052
                         enum pipe_shader_type p_stage,
2053
                         unsigned start, unsigned count,
2054
                         void **states)
2055
{
2056
   struct iris_context *ice = (struct iris_context *) ctx;
2057
   gl_shader_stage stage = stage_from_pipe(p_stage);
2058
   struct iris_shader_state *shs = &ice->state.shaders[stage];
2059

2060
   assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS);
2061

2062
   bool dirty = false;
2063

2064
   for (int i = 0; i < count; i++) {
2065
      if (shs->samplers[start + i] != states[i]) {
2066
         shs->samplers[start + i] = states[i];
2067
         dirty = true;
2068
      }
2069
   }
2070

2071
   if (dirty)
2072
      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
2073
}
2074

2075
/**
2076
 * Upload the sampler states into a contiguous area of GPU memory, for
2077
 * for 3DSTATE_SAMPLER_STATE_POINTERS_*.
2078
 *
2079
 * Also fill out the border color state pointers.
2080
 */
2081
static void
2082
iris_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage)
2083
{
2084
   struct iris_shader_state *shs = &ice->state.shaders[stage];
2085
   const struct shader_info *info = iris_get_shader_info(ice, stage);
2086

2087
   /* We assume gallium frontends will call pipe->bind_sampler_states()
2088
    * if the program's number of textures changes.
2089
    */
2090
   unsigned count = info ? BITSET_LAST_BIT(info->textures_used) : 0;
2091

2092
   if (!count)
2093
      return;
2094

2095
   /* Assemble the SAMPLER_STATEs into a contiguous table that lives
2096
    * in the dynamic state memory zone, so we can point to it via the
2097
    * 3DSTATE_SAMPLER_STATE_POINTERS_* commands.
2098
    */
2099
   unsigned size = count * 4 * GENX(SAMPLER_STATE_length);
2100
   uint32_t *map =
2101
      upload_state(ice->state.dynamic_uploader, &shs->sampler_table, size, 32);
2102
   if (unlikely(!map))
2103
      return;
2104

2105
   struct pipe_resource *res = shs->sampler_table.res;
2106
   struct iris_bo *bo = iris_resource_bo(res);
2107

2108
   iris_record_state_size(ice->state.sizes,
2109
                          bo->gtt_offset + shs->sampler_table.offset, size);
2110

2111
   shs->sampler_table.offset += iris_bo_offset_from_base_address(bo);
2112

2113
   /* Make sure all land in the same BO */
2114
   iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS);
2115

2116
   ice->state.need_border_colors &= ~(1 << stage);
2117

2118
   for (int i = 0; i < count; i++) {
2119
      struct iris_sampler_state *state = shs->samplers[i];
2120
      struct iris_sampler_view *tex = shs->textures[i];
2121

2122
      if (!state) {
2123
         memset(map, 0, 4 * GENX(SAMPLER_STATE_length));
2124
      } else if (!state->needs_border_color) {
2125
         memcpy(map, state->sampler_state, 4 * GENX(SAMPLER_STATE_length));
2126
      } else {
2127
         ice->state.need_border_colors |= 1 << stage;
2128

2129
         /* We may need to swizzle the border color for format faking.
2130
          * A/LA formats are faked as R/RG with 000R or R00G swizzles.
2131
          * This means we need to move the border color's A channel into
2132
          * the R or G channels so that those read swizzles will move it
2133
          * back into A.
2134
          */
2135
         union pipe_color_union *color = &state->border_color;
2136
         union pipe_color_union tmp;
2137
         if (tex) {
2138
            enum pipe_format internal_format = tex->res->internal_format;
2139

2140
            if (util_format_is_alpha(internal_format)) {
2141
               unsigned char swz[4] = {
2142
                  PIPE_SWIZZLE_W, PIPE_SWIZZLE_0,
2143
                  PIPE_SWIZZLE_0, PIPE_SWIZZLE_0
2144
               };
2145
               util_format_apply_color_swizzle(&tmp, color, swz, true);
2146
               color = &tmp;
2147
            } else if (util_format_is_luminance_alpha(internal_format) &&
2148
                       internal_format != PIPE_FORMAT_L8A8_SRGB) {
2149
               unsigned char swz[4] = {
2150
                  PIPE_SWIZZLE_X, PIPE_SWIZZLE_W,
2151
                  PIPE_SWIZZLE_0, PIPE_SWIZZLE_0
2152
               };
2153
               util_format_apply_color_swizzle(&tmp, color, swz, true);
2154
               color = &tmp;
2155
            }
2156
         }
2157

2158
         /* Stream out the border color and merge the pointer. */
2159
         uint32_t offset = iris_upload_border_color(ice, color);
2160

2161
         uint32_t dynamic[GENX(SAMPLER_STATE_length)];
2162
         iris_pack_state(GENX(SAMPLER_STATE), dynamic, dyns) {
2163
            dyns.BorderColorPointer = offset;
2164
         }
2165

2166
         for (uint32_t j = 0; j < GENX(SAMPLER_STATE_length); j++)
2167
            map[j] = state->sampler_state[j] | dynamic[j];
2168
      }
2169

2170
      map += GENX(SAMPLER_STATE_length);
2171
   }
2172
}
2173

2174
static enum isl_channel_select
2175
fmt_swizzle(const struct iris_format_info *fmt, enum pipe_swizzle swz)
2176
{
2177
   switch (swz) {
2178
   case PIPE_SWIZZLE_X: return fmt->swizzle.r;
2179
   case PIPE_SWIZZLE_Y: return fmt->swizzle.g;
2180
   case PIPE_SWIZZLE_Z: return fmt->swizzle.b;
2181
   case PIPE_SWIZZLE_W: return fmt->swizzle.a;
2182
   case PIPE_SWIZZLE_1: return ISL_CHANNEL_SELECT_ONE;
2183
   case PIPE_SWIZZLE_0: return ISL_CHANNEL_SELECT_ZERO;
2184
   default: unreachable("invalid swizzle");
2185
   }
2186
}
2187

2188
static void
2189
fill_buffer_surface_state(struct isl_device *isl_dev,
2190
                          struct iris_resource *res,
2191
                          void *map,
2192
                          enum isl_format format,
2193
                          struct isl_swizzle swizzle,
2194
                          unsigned offset,
2195
                          unsigned size,
2196
                          isl_surf_usage_flags_t usage)
2197
{
2198
   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
2199
   const unsigned cpp = format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8;
2200

2201
   /* The ARB_texture_buffer_specification says:
2202
    *
2203
    *    "The number of texels in the buffer texture's texel array is given by
2204
    *
2205
    *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
2206
    *
2207
    *     where <buffer_size> is the size of the buffer object, in basic
2208
    *     machine units and <components> and <base_type> are the element count
2209
    *     and base data type for elements, as specified in Table X.1.  The
2210
    *     number of texels in the texel array is then clamped to the
2211
    *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
2212
    *
2213
    * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
2214
    * so that when ISL divides by stride to obtain the number of texels, that
2215
    * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
2216
    */
2217
   unsigned final_size =
2218
      MIN3(size, res->bo->size - res->offset - offset,
2219
           IRIS_MAX_TEXTURE_BUFFER_SIZE * cpp);
2220

2221
   isl_buffer_fill_state(isl_dev, map,
2222
                         .address = res->bo->gtt_offset + res->offset + offset,
2223
                         .size_B = final_size,
2224
                         .format = format,
2225
                         .swizzle = swizzle,
2226
                         .stride_B = cpp,
2227
                         .mocs = iris_mocs(res->bo, isl_dev, usage));
2228
}
2229

2230
#define SURFACE_STATE_ALIGNMENT 64
2231

2232
/**
2233
 * Allocate several contiguous SURFACE_STATE structures, one for each
2234
 * supported auxiliary surface mode.  This only allocates the CPU-side
2235
 * copy, they will need to be uploaded later after they're filled in.
2236
 */
2237
static void
2238
alloc_surface_states(struct iris_surface_state *surf_state,
2239
                     unsigned aux_usages)
2240
{
2241
   const unsigned surf_size = 4 * GENX(RENDER_SURFACE_STATE_length);
2242

2243
   /* If this changes, update this to explicitly align pointers */
2244
   STATIC_ASSERT(surf_size == SURFACE_STATE_ALIGNMENT);
2245

2246
   assert(aux_usages != 0);
2247

2248
   /* In case we're re-allocating them... */
2249
   free(surf_state->cpu);
2250

2251
   surf_state->num_states = util_bitcount(aux_usages);
2252
   surf_state->cpu = calloc(surf_state->num_states, surf_size);
2253
   surf_state->ref.offset = 0;
2254
   pipe_resource_reference(&surf_state->ref.res, NULL);
2255

2256
   assert(surf_state->cpu);
2257
}
2258

2259
/**
2260
 * Upload the CPU side SURFACE_STATEs into a GPU buffer.
2261
 */
2262
static void
2263
upload_surface_states(struct u_upload_mgr *mgr,
2264
                      struct iris_surface_state *surf_state)
2265
{
2266
   const unsigned surf_size = 4 * GENX(RENDER_SURFACE_STATE_length);
2267
   const unsigned bytes = surf_state->num_states * surf_size;
2268

2269
   void *map =
2270
      upload_state(mgr, &surf_state->ref, bytes, SURFACE_STATE_ALIGNMENT);
2271

2272
   surf_state->ref.offset +=
2273
      iris_bo_offset_from_base_address(iris_resource_bo(surf_state->ref.res));
2274

2275
   if (map)
2276
      memcpy(map, surf_state->cpu, bytes);
2277
}
2278

2279
/**
2280
 * Update resource addresses in a set of SURFACE_STATE descriptors,
2281
 * and re-upload them if necessary.
2282
 */
2283
static bool
2284
update_surface_state_addrs(struct u_upload_mgr *mgr,
2285
                           struct iris_surface_state *surf_state,
2286
                           struct iris_bo *bo)
2287
{
2288
   if (surf_state->bo_address == bo->gtt_offset)
2289
      return false;
2290

2291
   STATIC_ASSERT(GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) % 64 == 0);
2292
   STATIC_ASSERT(GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_bits) == 64);
2293

2294
   uint64_t *ss_addr = (uint64_t *) &surf_state->cpu[GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) / 32];
2295

2296
   /* First, update the CPU copies.  We assume no other fields exist in
2297
    * the QWord containing Surface Base Address.
2298
    */
2299
   for (unsigned i = 0; i < surf_state->num_states; i++) {
2300
      *ss_addr = *ss_addr - surf_state->bo_address + bo->gtt_offset;
2301
      ss_addr = ((void *) ss_addr) + SURFACE_STATE_ALIGNMENT;
2302
   }
2303

2304
   /* Next, upload the updated copies to a GPU buffer. */
2305
   upload_surface_states(mgr, surf_state);
2306

2307
   surf_state->bo_address = bo->gtt_offset;
2308

2309
   return true;
2310
}
2311

2312
static void
2313
fill_surface_state(struct isl_device *isl_dev,
2314
                   void *map,
2315
                   struct iris_resource *res,
2316
                   struct isl_surf *surf,
2317
                   struct isl_view *view,
2318
                   unsigned aux_usage,
2319
                   uint32_t extra_main_offset,
2320
                   uint32_t tile_x_sa,
2321
                   uint32_t tile_y_sa)
2322
{
2323
   struct isl_surf_fill_state_info f = {
2324
      .surf = surf,
2325
      .view = view,
2326
      .mocs = iris_mocs(res->bo, isl_dev, view->usage),
2327
      .address = res->bo->gtt_offset + res->offset + extra_main_offset,
2328
      .x_offset_sa = tile_x_sa,
2329
      .y_offset_sa = tile_y_sa,
2330
   };
2331

2332
   assert(!iris_resource_unfinished_aux_import(res));
2333

2334
   if (aux_usage != ISL_AUX_USAGE_NONE) {
2335
      f.aux_surf = &res->aux.surf;
2336
      f.aux_usage = aux_usage;
2337
      f.aux_address = res->aux.bo->gtt_offset + res->aux.offset;
2338

2339
      struct iris_bo *clear_bo = NULL;
2340
      uint64_t clear_offset = 0;
2341
      f.clear_color =
2342
         iris_resource_get_clear_color(res, &clear_bo, &clear_offset);
2343
      if (clear_bo) {
2344
         f.clear_address = clear_bo->gtt_offset + clear_offset;
2345
         f.use_clear_address = isl_dev->info->ver > 9;
2346
      }
2347
   }
2348

2349
   isl_surf_fill_state_s(isl_dev, map, &f);
2350
}
2351

2352
/**
2353
 * The pipe->create_sampler_view() driver hook.
2354
 */
2355
static struct pipe_sampler_view *
2356
iris_create_sampler_view(struct pipe_context *ctx,
2357
                         struct pipe_resource *tex,
2358
                         const struct pipe_sampler_view *tmpl)
2359
{
2360
   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
2361
   const struct intel_device_info *devinfo = &screen->devinfo;
2362
   struct iris_sampler_view *isv = calloc(1, sizeof(struct iris_sampler_view));
2363

2364
   if (!isv)
2365
      return NULL;
2366

2367
   /* initialize base object */
2368
   isv->base = *tmpl;
2369
   isv->base.context = ctx;
2370
   isv->base.texture = NULL;
2371
   pipe_reference_init(&isv->base.reference, 1);
2372
   pipe_resource_reference(&isv->base.texture, tex);
2373

2374
   if (util_format_is_depth_or_stencil(tmpl->format)) {
2375
      struct iris_resource *zres, *sres;
2376
      const struct util_format_description *desc =
2377
         util_format_description(tmpl->format);
2378

2379
      iris_get_depth_stencil_resources(tex, &zres, &sres);
2380

2381
      tex = util_format_has_depth(desc) ? &zres->base.b : &sres->base.b;
2382
   }
2383

2384
   isv->res = (struct iris_resource *) tex;
2385

2386
   alloc_surface_states(&isv->surface_state, isv->res->aux.sampler_usages);
2387

2388
   isv->surface_state.bo_address = isv->res->bo->gtt_offset;
2389

2390
   isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT;
2391

2392
   if (isv->base.target == PIPE_TEXTURE_CUBE ||
2393
       isv->base.target == PIPE_TEXTURE_CUBE_ARRAY)
2394
      usage |= ISL_SURF_USAGE_CUBE_BIT;
2395

2396
   const struct iris_format_info fmt =
2397
      iris_format_for_usage(devinfo, tmpl->format, usage);
2398

2399
   isv->clear_color = isv->res->aux.clear_color;
2400

2401
   isv->view = (struct isl_view) {
2402
      .format = fmt.fmt,
2403
      .swizzle = (struct isl_swizzle) {
2404
         .r = fmt_swizzle(&fmt, tmpl->swizzle_r),
2405
         .g = fmt_swizzle(&fmt, tmpl->swizzle_g),
2406
         .b = fmt_swizzle(&fmt, tmpl->swizzle_b),
2407
         .a = fmt_swizzle(&fmt, tmpl->swizzle_a),
2408
      },
2409
      .usage = usage,
2410
   };
2411

2412
   void *map = isv->surface_state.cpu;
2413

2414
   /* Fill out SURFACE_STATE for this view. */
2415
   if (tmpl->target != PIPE_BUFFER) {
2416
      isv->view.base_level = tmpl->u.tex.first_level;
2417
      isv->view.levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1;
2418
      // XXX: do I need to port f9fd0cf4790cb2a530e75d1a2206dbb9d8af7cb2?
2419
      isv->view.base_array_layer = tmpl->u.tex.first_layer;
2420
      isv->view.array_len =
2421
         tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1;
2422

2423
      if (iris_resource_unfinished_aux_import(isv->res))
2424
         iris_resource_finish_aux_import(&screen->base, isv->res);
2425

2426
      unsigned aux_modes = isv->res->aux.sampler_usages;
2427
      while (aux_modes) {
2428
         enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes);
2429

2430
         fill_surface_state(&screen->isl_dev, map, isv->res, &isv->res->surf,
2431
                            &isv->view, aux_usage, 0, 0, 0);
2432

2433
         map += SURFACE_STATE_ALIGNMENT;
2434
      }
2435
   } else {
2436
      fill_buffer_surface_state(&screen->isl_dev, isv->res, map,
2437
                                isv->view.format, isv->view.swizzle,
2438
                                tmpl->u.buf.offset, tmpl->u.buf.size,
2439
                                ISL_SURF_USAGE_TEXTURE_BIT);
2440
   }
2441

2442
   return &isv->base;
2443
}
2444

2445
static void
2446
iris_sampler_view_destroy(struct pipe_context *ctx,
2447
                          struct pipe_sampler_view *state)
2448
{
2449
   struct iris_sampler_view *isv = (void *) state;
2450
   pipe_resource_reference(&state->texture, NULL);
2451
   pipe_resource_reference(&isv->surface_state.ref.res, NULL);
2452
   free(isv->surface_state.cpu);
2453
   free(isv);
2454
}
2455

2456
/**
2457
 * The pipe->create_surface() driver hook.
2458
 *
2459
 * In Gallium nomenclature, "surfaces" are a view of a resource that
2460
 * can be bound as a render target or depth/stencil buffer.
2461
 */
2462
static struct pipe_surface *
2463
iris_create_surface(struct pipe_context *ctx,
2464
                    struct pipe_resource *tex,
2465
                    const struct pipe_surface *tmpl)
2466
{
2467
   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
2468
   const struct intel_device_info *devinfo = &screen->devinfo;
2469

2470
   isl_surf_usage_flags_t usage = 0;
2471
   if (tmpl->writable)
2472
      usage = ISL_SURF_USAGE_STORAGE_BIT;
2473
   else if (util_format_is_depth_or_stencil(tmpl->format))
2474
      usage = ISL_SURF_USAGE_DEPTH_BIT;
2475
   else
2476
      usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
2477

2478
   const struct iris_format_info fmt =
2479
      iris_format_for_usage(devinfo, tmpl->format, usage);
2480

2481
   if ((usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&
2482
       !isl_format_supports_rendering(devinfo, fmt.fmt)) {
2483
      /* Framebuffer validation will reject this invalid case, but it
2484
       * hasn't had the opportunity yet.  In the meantime, we need to
2485
       * avoid hitting ISL asserts about unsupported formats below.
2486
       */
2487
      return NULL;
2488
   }
2489

2490
   struct iris_surface *surf = calloc(1, sizeof(struct iris_surface));
2491
   struct pipe_surface *psurf = &surf->base;
2492
   struct iris_resource *res = (struct iris_resource *) tex;
2493

2494
   if (!surf)
2495
      return NULL;
2496

2497
   pipe_reference_init(&psurf->reference, 1);
2498
   pipe_resource_reference(&psurf->texture, tex);
2499
   psurf->context = ctx;
2500
   psurf->format = tmpl->format;
2501
   psurf->width = tex->width0;
2502
   psurf->height = tex->height0;
2503
   psurf->texture = tex;
2504
   psurf->u.tex.first_layer = tmpl->u.tex.first_layer;
2505
   psurf->u.tex.last_layer = tmpl->u.tex.last_layer;
2506
   psurf->u.tex.level = tmpl->u.tex.level;
2507

2508
   uint32_t array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1;
2509

2510
   struct isl_view *view = &surf->view;
2511
   *view = (struct isl_view) {
2512
      .format = fmt.fmt,
2513
      .base_level = tmpl->u.tex.level,
2514
      .levels = 1,
2515
      .base_array_layer = tmpl->u.tex.first_layer,
2516
      .array_len = array_len,
2517
      .swizzle = ISL_SWIZZLE_IDENTITY,
2518
      .usage = usage,
2519
   };
2520

2521
#if GFX_VER == 8
2522
   struct isl_view *read_view = &surf->read_view;
2523
   *read_view = (struct isl_view) {
2524
      .format = fmt.fmt,
2525
      .base_level = tmpl->u.tex.level,
2526
      .levels = 1,
2527
      .base_array_layer = tmpl->u.tex.first_layer,
2528
      .array_len = array_len,
2529
      .swizzle = ISL_SWIZZLE_IDENTITY,
2530
      .usage = ISL_SURF_USAGE_TEXTURE_BIT,
2531
   };
2532

2533
   struct isl_surf read_surf = res->surf;
2534
   uint32_t read_surf_offset_B = 0;
2535
   uint32_t read_surf_tile_x_sa = 0, read_surf_tile_y_sa = 0;
2536
   if (tex->target == PIPE_TEXTURE_3D && array_len == 1) {
2537
      /* The minimum array element field of the surface state structure is
2538
       * ignored by the sampler unit for 3D textures on some hardware.  If the
2539
       * render buffer is a single slice of a 3D texture, create a 2D texture
2540
       * covering that slice.
2541
       *
2542
       * TODO: This only handles the case where we're rendering to a single
2543
       * slice of an array texture.  If we have layered rendering combined
2544
       * with non-coherent FB fetch and a non-zero base_array_layer, then
2545
       * we're going to run into problems.
2546
       *
2547
       * See https://gitlab.freedesktop.org/mesa/mesa/-/issues/4904
2548
       */
2549
      isl_surf_get_image_surf(&screen->isl_dev, &res->surf,
2550
                              read_view->base_level,
2551
                              0, read_view->base_array_layer,
2552
                              &read_surf, &read_surf_offset_B,
2553
                              &read_surf_tile_x_sa, &read_surf_tile_y_sa);
2554
      read_view->base_level = 0;
2555
      read_view->base_array_layer = 0;
2556
      assert(read_view->array_len == 1);
2557
   } else if (tex->target == PIPE_TEXTURE_1D_ARRAY) {
2558
      /* Convert 1D array textures to 2D arrays because shaders always provide
2559
       * the array index coordinate at the Z component to avoid recompiles
2560
       * when changing the texture target of the framebuffer.
2561
       */
2562
      assert(read_surf.dim_layout == ISL_DIM_LAYOUT_GFX4_2D);
2563
      read_surf.dim = ISL_SURF_DIM_2D;
2564
   }
2565
#endif
2566

2567
   surf->clear_color = res->aux.clear_color;
2568

2569
   /* Bail early for depth/stencil - we don't want SURFACE_STATE for them. */
2570
   if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT |
2571
                          ISL_SURF_USAGE_STENCIL_BIT))
2572
      return psurf;
2573

2574

2575
   alloc_surface_states(&surf->surface_state, res->aux.possible_usages);
2576
   surf->surface_state.bo_address = res->bo->gtt_offset;
2577

2578
#if GFX_VER == 8
2579
   alloc_surface_states(&surf->surface_state_read, res->aux.possible_usages);
2580
   surf->surface_state_read.bo_address = res->bo->gtt_offset;
2581
#endif
2582

2583
   if (!isl_format_is_compressed(res->surf.format)) {
2584
      if (iris_resource_unfinished_aux_import(res))
2585
         iris_resource_finish_aux_import(&screen->base, res);
2586

2587
      void *map = surf->surface_state.cpu;
2588
      UNUSED void *map_read = surf->surface_state_read.cpu;
2589

2590
      /* This is a normal surface.  Fill out a SURFACE_STATE for each possible
2591
       * auxiliary surface mode and return the pipe_surface.
2592
       */
2593
      unsigned aux_modes = res->aux.possible_usages;
2594
      while (aux_modes) {
2595
         enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes);
2596
         fill_surface_state(&screen->isl_dev, map, res, &res->surf,
2597
                            view, aux_usage, 0, 0, 0);
2598
         map += SURFACE_STATE_ALIGNMENT;
2599

2600
#if GFX_VER == 8
2601
         fill_surface_state(&screen->isl_dev, map_read, res,
2602
                            &read_surf, read_view, aux_usage,
2603
                            read_surf_offset_B,
2604
                            read_surf_tile_x_sa, read_surf_tile_y_sa);
2605
         map_read += SURFACE_STATE_ALIGNMENT;
2606
#endif
2607
      }
2608

2609
      return psurf;
2610
   }
2611

2612
   /* The resource has a compressed format, which is not renderable, but we
2613
    * have a renderable view format.  We must be attempting to upload blocks
2614
    * of compressed data via an uncompressed view.
2615
    *
2616
    * In this case, we can assume there are no auxiliary buffers, a single
2617
    * miplevel, and that the resource is single-sampled.  Gallium may try
2618
    * and create an uncompressed view with multiple layers, however.
2619
    */
2620
   assert(!isl_format_is_compressed(fmt.fmt));
2621
   assert(res->aux.possible_usages == 1 << ISL_AUX_USAGE_NONE);
2622
   assert(res->surf.samples == 1);
2623
   assert(view->levels == 1);
2624

2625
   struct isl_surf isl_surf;
2626
   uint32_t offset_B = 0, tile_x_el = 0, tile_y_el = 0;
2627
   bool ok = isl_surf_get_uncompressed_surf(&screen->isl_dev, &res->surf,
2628
                                            view, &isl_surf, view,
2629
                                            &offset_B, &tile_x_el, &tile_y_el);
2630
   if (!ok) {
2631
      free(surf);
2632
      return NULL;
2633
   }
2634

2635
   psurf->width = isl_surf.logical_level0_px.width;
2636
   psurf->height = isl_surf.logical_level0_px.height;
2637

2638
   struct isl_surf_fill_state_info f = {
2639
      .surf = &isl_surf,
2640
      .view = view,
2641
      .mocs = iris_mocs(res->bo, &screen->isl_dev,
2642
                        ISL_SURF_USAGE_RENDER_TARGET_BIT),
2643
      .address = res->bo->gtt_offset + offset_B,
2644
      .x_offset_sa = tile_x_el, /* Single-sampled, so el == sa */
2645
      .y_offset_sa = tile_y_el, /* Single-sampled, so el == sa */
2646
   };
2647

2648
   isl_surf_fill_state_s(&screen->isl_dev, surf->surface_state.cpu, &f);
2649

2650
   return psurf;
2651
}
2652

2653
#if GFX_VER < 9
2654
static void
2655
fill_default_image_param(struct brw_image_param *param)
2656
{
2657
   memset(param, 0, sizeof(*param));
2658
   /* Set the swizzling shifts to all-ones to effectively disable swizzling --
2659
    * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
2660
    * detailed explanation of these parameters.
2661
    */
2662
   param->swizzling[0] = 0xff;
2663
   param->swizzling[1] = 0xff;
2664
}
2665

2666
static void
2667
fill_buffer_image_param(struct brw_image_param *param,
2668
                        enum pipe_format pfmt,
2669
                        unsigned size)
2670
{
2671
   const unsigned cpp = util_format_get_blocksize(pfmt);
2672

2673
   fill_default_image_param(param);
2674
   param->size[0] = size / cpp;
2675
   param->stride[0] = cpp;
2676
}
2677
#else
2678
#define isl_surf_fill_image_param(x, ...)
2679
#define fill_default_image_param(x, ...)
2680
#define fill_buffer_image_param(x, ...)
2681
#endif
2682

2683
/**
2684
 * The pipe->set_shader_images() driver hook.
2685
 */
2686
static void
2687
iris_set_shader_images(struct pipe_context *ctx,
2688
                       enum pipe_shader_type p_stage,
2689
                       unsigned start_slot, unsigned count,
2690
                       unsigned unbind_num_trailing_slots,
2691
                       const struct pipe_image_view *p_images)
2692
{
2693
   struct iris_context *ice = (struct iris_context *) ctx;
2694
   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
2695
   gl_shader_stage stage = stage_from_pipe(p_stage);
2696
   struct iris_shader_state *shs = &ice->state.shaders[stage];
2697
#if GFX_VER == 8
2698
   struct iris_genx_state *genx = ice->state.genx;
2699
   struct brw_image_param *image_params = genx->shaders[stage].image_param;
2700
#endif
2701

2702
   shs->bound_image_views &=
2703
      ~u_bit_consecutive(start_slot, count + unbind_num_trailing_slots);
2704

2705
   for (unsigned i = 0; i < count; i++) {
2706
      struct iris_image_view *iv = &shs->image[start_slot + i];
2707

2708
      if (p_images && p_images[i].resource) {
2709
         const struct pipe_image_view *img = &p_images[i];
2710
         struct iris_resource *res = (void *) img->resource;
2711

2712
         util_copy_image_view(&iv->base, img);
2713

2714
         shs->bound_image_views |= 1 << (start_slot + i);
2715

2716
         res->bind_history |= PIPE_BIND_SHADER_IMAGE;
2717
         res->bind_stages |= 1 << stage;
2718

2719
         enum isl_format isl_fmt = iris_image_view_get_format(ice, img);
2720

2721
         /* Render compression with images supported on gfx12+ only. */
2722
         unsigned aux_usages = GFX_VER >= 12 ? res->aux.possible_usages :
2723
            1 << ISL_AUX_USAGE_NONE;
2724

2725
         alloc_surface_states(&iv->surface_state, aux_usages);
2726
         iv->surface_state.bo_address = res->bo->gtt_offset;
2727

2728
         void *map = iv->surface_state.cpu;
2729

2730
         if (res->base.b.target != PIPE_BUFFER) {
2731
            struct isl_view view = {
2732
               .format = isl_fmt,
2733
               .base_level = img->u.tex.level,
2734
               .levels = 1,
2735
               .base_array_layer = img->u.tex.first_layer,
2736
               .array_len = img->u.tex.last_layer - img->u.tex.first_layer + 1,
2737
               .swizzle = ISL_SWIZZLE_IDENTITY,
2738
               .usage = ISL_SURF_USAGE_STORAGE_BIT,
2739
            };
2740

2741
            /* If using untyped fallback. */
2742
            if (isl_fmt == ISL_FORMAT_RAW) {
2743
               fill_buffer_surface_state(&screen->isl_dev, res, map,
2744
                                         isl_fmt, ISL_SWIZZLE_IDENTITY,
2745
                                         0, res->bo->size,
2746
                                         ISL_SURF_USAGE_STORAGE_BIT);
2747
            } else {
2748
               unsigned aux_modes = aux_usages;
2749
               while (aux_modes) {
2750
                  enum isl_aux_usage usage = u_bit_scan(&aux_modes);
2751

2752
                  fill_surface_state(&screen->isl_dev, map, res, &res->surf,
2753
                                     &view, usage, 0, 0, 0);
2754

2755
                  map += SURFACE_STATE_ALIGNMENT;
2756
               }
2757
            }
2758

2759
            isl_surf_fill_image_param(&screen->isl_dev,
2760
                                      &image_params[start_slot + i],
2761
                                      &res->surf, &view);
2762
         } else {
2763
            util_range_add(&res->base.b, &res->valid_buffer_range, img->u.buf.offset,
2764
                           img->u.buf.offset + img->u.buf.size);
2765

2766
            fill_buffer_surface_state(&screen->isl_dev, res, map,
2767
                                      isl_fmt, ISL_SWIZZLE_IDENTITY,
2768
                                      img->u.buf.offset, img->u.buf.size,
2769
                                      ISL_SURF_USAGE_STORAGE_BIT);
2770
            fill_buffer_image_param(&image_params[start_slot + i],
2771
                                    img->format, img->u.buf.size);
2772
         }
2773

2774
         upload_surface_states(ice->state.surface_uploader, &iv->surface_state);
2775
      } else {
2776
         pipe_resource_reference(&iv->base.resource, NULL);
2777
         pipe_resource_reference(&iv->surface_state.ref.res, NULL);
2778
         fill_default_image_param(&image_params[start_slot + i]);
2779
      }
2780
   }
2781

2782
   ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2783
   ice->state.dirty |=
2784
      stage == MESA_SHADER_COMPUTE ? IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES
2785
                                   : IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
2786

2787
   /* Broadwell also needs brw_image_params re-uploaded */
2788
   if (GFX_VER < 9) {
2789
      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS << stage;
2790
      shs->sysvals_need_upload = true;
2791
   }
2792

2793
   if (unbind_num_trailing_slots) {
2794
      iris_set_shader_images(ctx, p_stage, start_slot + count,
2795
                             unbind_num_trailing_slots, 0, NULL);
2796
   }
2797
}
2798

2799

2800
/**
2801
 * The pipe->set_sampler_views() driver hook.
2802
 */
2803
static void
2804
iris_set_sampler_views(struct pipe_context *ctx,
2805
                       enum pipe_shader_type p_stage,
2806
                       unsigned start, unsigned count,
2807
                       unsigned unbind_num_trailing_slots,
2808
                       struct pipe_sampler_view **views)
2809
{
2810
   struct iris_context *ice = (struct iris_context *) ctx;
2811
   gl_shader_stage stage = stage_from_pipe(p_stage);
2812
   struct iris_shader_state *shs = &ice->state.shaders[stage];
2813
   unsigned i;
2814

2815
   shs->bound_sampler_views &=
2816
      ~u_bit_consecutive(start, count + unbind_num_trailing_slots);
2817

2818
   for (i = 0; i < count; i++) {
2819
      struct pipe_sampler_view *pview = views ? views[i] : NULL;
2820
      pipe_sampler_view_reference((struct pipe_sampler_view **)
2821
                                  &shs->textures[start + i], pview);
2822
      struct iris_sampler_view *view = (void *) pview;
2823
      if (view) {
2824
         view->res->bind_history |= PIPE_BIND_SAMPLER_VIEW;
2825
         view->res->bind_stages |= 1 << stage;
2826

2827
         shs->bound_sampler_views |= 1 << (start + i);
2828

2829
         update_surface_state_addrs(ice->state.surface_uploader,
2830
                                    &view->surface_state, view->res->bo);
2831
      }
2832
   }
2833
   for (; i < count + unbind_num_trailing_slots; i++) {
2834
      pipe_sampler_view_reference((struct pipe_sampler_view **)
2835
                                  &shs->textures[start + i], NULL);
2836
   }
2837

2838
   ice->state.stage_dirty |= (IRIS_STAGE_DIRTY_BINDINGS_VS << stage);
2839
   ice->state.dirty |=
2840
      stage == MESA_SHADER_COMPUTE ? IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES
2841
                                   : IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
2842
}
2843

2844
static void
2845
iris_set_compute_resources(struct pipe_context *ctx,
2846
                           unsigned start, unsigned count,
2847
                           struct pipe_surface **resources)
2848
{
2849
   assert(count == 0);
2850
}
2851

2852
static void
2853
iris_set_global_binding(struct pipe_context *ctx,
2854
                        unsigned start_slot, unsigned count,
2855
                        struct pipe_resource **resources,
2856
                        uint32_t **handles)
2857
{
2858
   struct iris_context *ice = (struct iris_context *) ctx;
2859

2860
   assert(start_slot + count <= IRIS_MAX_GLOBAL_BINDINGS);
2861
   for (unsigned i = 0; i < count; i++) {
2862
      if (resources && resources[i]) {
2863
         pipe_resource_reference(&ice->state.global_bindings[start_slot + i],
2864
                                 resources[i]);
2865
         struct iris_resource *res = (void *) resources[i];
2866
         uint64_t addr = res->bo->gtt_offset;
2867
         memcpy(handles[i], &addr, sizeof(addr));
2868
      } else {
2869
         pipe_resource_reference(&ice->state.global_bindings[start_slot + i],
2870
                                 NULL);
2871
      }
2872
   }
2873

2874
   ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_CS;
2875
}
2876

2877
/**
2878
 * The pipe->set_tess_state() driver hook.
2879
 */
2880
static void
2881
iris_set_tess_state(struct pipe_context *ctx,
2882
                    const float default_outer_level[4],
2883
                    const float default_inner_level[2])
2884
{
2885
   struct iris_context *ice = (struct iris_context *) ctx;
2886
   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
2887

2888
   memcpy(&ice->state.default_outer_level[0], &default_outer_level[0], 4 * sizeof(float));
2889
   memcpy(&ice->state.default_inner_level[0], &default_inner_level[0], 2 * sizeof(float));
2890

2891
   ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TCS;
2892
   shs->sysvals_need_upload = true;
2893
}
2894

2895
static void
2896
iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)
2897
{
2898
   struct iris_surface *surf = (void *) p_surf;
2899
   pipe_resource_reference(&p_surf->texture, NULL);
2900
   pipe_resource_reference(&surf->surface_state.ref.res, NULL);
2901
   pipe_resource_reference(&surf->surface_state_read.ref.res, NULL);
2902
   free(surf->surface_state.cpu);
2903
   free(surf);
2904
}
2905

2906
static void
2907
iris_set_clip_state(struct pipe_context *ctx,
2908
                    const struct pipe_clip_state *state)
2909
{
2910
   struct iris_context *ice = (struct iris_context *) ctx;
2911
   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
2912
   struct iris_shader_state *gshs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
2913
   struct iris_shader_state *tshs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
2914

2915
   memcpy(&ice->state.clip_planes, state, sizeof(*state));
2916

2917
   ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS |
2918
                             IRIS_STAGE_DIRTY_CONSTANTS_GS |
2919
                             IRIS_STAGE_DIRTY_CONSTANTS_TES;
2920
   shs->sysvals_need_upload = true;
2921
   gshs->sysvals_need_upload = true;
2922
   tshs->sysvals_need_upload = true;
2923
}
2924

2925
/**
2926
 * The pipe->set_polygon_stipple() driver hook.
2927
 */
2928
static void
2929
iris_set_polygon_stipple(struct pipe_context *ctx,
2930
                         const struct pipe_poly_stipple *state)
2931
{
2932
   struct iris_context *ice = (struct iris_context *) ctx;
2933
   memcpy(&ice->state.poly_stipple, state, sizeof(*state));
2934
   ice->state.dirty |= IRIS_DIRTY_POLYGON_STIPPLE;
2935
}
2936

2937
/**
2938
 * The pipe->set_sample_mask() driver hook.
2939
 */
2940
static void
2941
iris_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
2942
{
2943
   struct iris_context *ice = (struct iris_context *) ctx;
2944

2945
   /* We only support 16x MSAA, so we have 16 bits of sample maks.
2946
    * st/mesa may pass us 0xffffffff though, meaning "enable all samples".
2947
    */
2948
   ice->state.sample_mask = sample_mask & 0xffff;
2949
   ice->state.dirty |= IRIS_DIRTY_SAMPLE_MASK;
2950
}
2951

2952
/**
2953
 * The pipe->set_scissor_states() driver hook.
2954
 *
2955
 * This corresponds to our SCISSOR_RECT state structures.  It's an
2956
 * exact match, so we just store them, and memcpy them out later.
2957
 */
2958
static void
2959
iris_set_scissor_states(struct pipe_context *ctx,
2960
                        unsigned start_slot,
2961
                        unsigned num_scissors,
2962
                        const struct pipe_scissor_state *rects)
2963
{
2964
   struct iris_context *ice = (struct iris_context *) ctx;
2965

2966
   for (unsigned i = 0; i < num_scissors; i++) {
2967
      if (rects[i].minx == rects[i].maxx || rects[i].miny == rects[i].maxy) {
2968
         /* If the scissor was out of bounds and got clamped to 0 width/height
2969
          * at the bounds, the subtraction of 1 from maximums could produce a
2970
          * negative number and thus not clip anything.  Instead, just provide
2971
          * a min > max scissor inside the bounds, which produces the expected
2972
          * no rendering.
2973
          */
2974
         ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) {
2975
            .minx = 1, .maxx = 0, .miny = 1, .maxy = 0,
2976
         };
2977
      } else {
2978
         ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) {
2979
            .minx = rects[i].minx,     .miny = rects[i].miny,
2980
            .maxx = rects[i].maxx - 1, .maxy = rects[i].maxy - 1,
2981
         };
2982
      }
2983
   }
2984

2985
   ice->state.dirty |= IRIS_DIRTY_SCISSOR_RECT;
2986
}
2987

2988
/**
2989
 * The pipe->set_stencil_ref() driver hook.
2990
 *
2991
 * This is added to 3DSTATE_WM_DEPTH_STENCIL dynamically at draw time.
2992
 */
2993
static void
2994
iris_set_stencil_ref(struct pipe_context *ctx,
2995
                     const struct pipe_stencil_ref state)
2996
{
2997
   struct iris_context *ice = (struct iris_context *) ctx;
2998
   memcpy(&ice->state.stencil_ref, &state, sizeof(state));
2999
   if (GFX_VER >= 12)
3000
      ice->state.dirty |= IRIS_DIRTY_STENCIL_REF;
3001
   else if (GFX_VER >= 9)
3002
      ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
3003
   else
3004
      ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
3005
}
3006

3007
static float
3008
viewport_extent(const struct pipe_viewport_state *state, int axis, float sign)
3009
{
3010
   return copysignf(state->scale[axis], sign) + state->translate[axis];
3011
}
3012

3013
/**
3014
 * The pipe->set_viewport_states() driver hook.
3015
 *
3016
 * This corresponds to our SF_CLIP_VIEWPORT states.  We can't calculate
3017
 * the guardband yet, as we need the framebuffer dimensions, but we can
3018
 * at least fill out the rest.
3019
 */
3020
static void
3021
iris_set_viewport_states(struct pipe_context *ctx,
3022
                         unsigned start_slot,
3023
                         unsigned count,
3024
                         const struct pipe_viewport_state *states)
3025
{
3026
   struct iris_context *ice = (struct iris_context *) ctx;
3027

3028
   memcpy(&ice->state.viewports[start_slot], states, sizeof(*states) * count);
3029

3030
   ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT;
3031

3032
   if (ice->state.cso_rast && (!ice->state.cso_rast->depth_clip_near ||
3033
                               !ice->state.cso_rast->depth_clip_far))
3034
      ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
3035
}
3036

3037
/**
3038
 * The pipe->set_framebuffer_state() driver hook.
3039
 *
3040
 * Sets the current draw FBO, including color render targets, depth,
3041
 * and stencil buffers.
3042
 */
3043
static void
3044
iris_set_framebuffer_state(struct pipe_context *ctx,
3045
                           const struct pipe_framebuffer_state *state)
3046
{
3047
   struct iris_context *ice = (struct iris_context *) ctx;
3048
   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
3049
   struct isl_device *isl_dev = &screen->isl_dev;
3050
   struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
3051
   struct iris_resource *zres;
3052
   struct iris_resource *stencil_res;
3053

3054
   unsigned samples = util_framebuffer_get_num_samples(state);
3055
   unsigned layers = util_framebuffer_get_num_layers(state);
3056

3057
   if (cso->samples != samples) {
3058
      ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
3059

3060
      /* We need to toggle 3DSTATE_PS::32 Pixel Dispatch Enable */
3061
      if (GFX_VER >= 9 && (cso->samples == 16 || samples == 16))
3062
         ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS;
3063
   }
3064

3065
   if (cso->nr_cbufs != state->nr_cbufs) {
3066
      ice->state.dirty |= IRIS_DIRTY_BLEND_STATE;
3067
   }
3068

3069
   if ((cso->layers == 0) != (layers == 0)) {
3070
      ice->state.dirty |= IRIS_DIRTY_CLIP;
3071
   }
3072

3073
   if (cso->width != state->width || cso->height != state->height) {
3074
      ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT;
3075
   }
3076

3077
   if (cso->zsbuf || state->zsbuf) {
3078
      ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER;
3079
   }
3080

3081
   util_copy_framebuffer_state(cso, state);
3082
   cso->samples = samples;
3083
   cso->layers = layers;
3084

3085
   struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer;
3086

3087
   struct isl_view view = {
3088
      .base_level = 0,
3089
      .levels = 1,
3090
      .base_array_layer = 0,
3091
      .array_len = 1,
3092
      .swizzle = ISL_SWIZZLE_IDENTITY,
3093
   };
3094

3095
   struct isl_depth_stencil_hiz_emit_info info = { .view = &view };
3096

3097
   if (cso->zsbuf) {
3098
      iris_get_depth_stencil_resources(cso->zsbuf->texture, &zres,
3099
                                       &stencil_res);
3100

3101
      view.base_level = cso->zsbuf->u.tex.level;
3102
      view.base_array_layer = cso->zsbuf->u.tex.first_layer;
3103
      view.array_len =
3104
         cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1;
3105

3106
      if (zres) {
3107
         view.usage |= ISL_SURF_USAGE_DEPTH_BIT;
3108

3109
         info.depth_surf = &zres->surf;
3110
         info.depth_address = zres->bo->gtt_offset + zres->offset;
3111
         info.mocs = iris_mocs(zres->bo, isl_dev, view.usage);
3112

3113
         view.format = zres->surf.format;
3114

3115
         if (iris_resource_level_has_hiz(zres, view.base_level)) {
3116
            info.hiz_usage = zres->aux.usage;
3117
            info.hiz_surf = &zres->aux.surf;
3118
            info.hiz_address = zres->aux.bo->gtt_offset + zres->aux.offset;
3119
         }
3120

3121
         ice->state.hiz_usage = info.hiz_usage;
3122
      }
3123

3124
      if (stencil_res) {
3125
         view.usage |= ISL_SURF_USAGE_STENCIL_BIT;
3126
         info.stencil_aux_usage = stencil_res->aux.usage;
3127
         info.stencil_surf = &stencil_res->surf;
3128
         info.stencil_address = stencil_res->bo->gtt_offset + stencil_res->offset;
3129
         if (!zres) {
3130
            view.format = stencil_res->surf.format;
3131
            info.mocs = iris_mocs(stencil_res->bo, isl_dev, view.usage);
3132
         }
3133
      }
3134
   }
3135

3136
   isl_emit_depth_stencil_hiz_s(isl_dev, cso_z->packets, &info);
3137

3138
   /* Make a null surface for unbound buffers */
3139
   void *null_surf_map =
3140
      upload_state(ice->state.surface_uploader, &ice->state.null_fb,
3141
                   4 * GENX(RENDER_SURFACE_STATE_length), 64);
3142
   isl_null_fill_state(&screen->isl_dev, null_surf_map,
3143
                       .size = isl_extent3d(MAX2(cso->width, 1),
3144
                                            MAX2(cso->height, 1),
3145
                                            cso->layers ? cso->layers : 1));
3146
   ice->state.null_fb.offset +=
3147
      iris_bo_offset_from_base_address(iris_resource_bo(ice->state.null_fb.res));
3148

3149
   /* Render target change */
3150
   ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_FS;
3151

3152
   ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
3153

3154
   ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
3155

3156
   ice->state.stage_dirty |=
3157
      ice->state.stage_dirty_for_nos[IRIS_NOS_FRAMEBUFFER];
3158

3159
   if (GFX_VER == 8)
3160
      ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
3161
}
3162

3163
/**
3164
 * The pipe->set_constant_buffer() driver hook.
3165
 *
3166
 * This uploads any constant data in user buffers, and references
3167
 * any UBO resources containing constant data.
3168
 */
3169
static void
3170
iris_set_constant_buffer(struct pipe_context *ctx,
3171
                         enum pipe_shader_type p_stage, unsigned index,
3172
                         bool take_ownership,
3173
                         const struct pipe_constant_buffer *input)
3174
{
3175
   struct iris_context *ice = (struct iris_context *) ctx;
3176
   gl_shader_stage stage = stage_from_pipe(p_stage);
3177
   struct iris_shader_state *shs = &ice->state.shaders[stage];
3178
   struct pipe_shader_buffer *cbuf = &shs->constbuf[index];
3179

3180
   /* TODO: Only do this if the buffer changes? */
3181
   pipe_resource_reference(&shs->constbuf_surf_state[index].res, NULL);
3182

3183
   if (input && input->buffer_size && (input->buffer || input->user_buffer)) {
3184
      shs->bound_cbufs |= 1u << index;
3185

3186
      if (input->user_buffer) {
3187
         void *map = NULL;
3188
         pipe_resource_reference(&cbuf->buffer, NULL);
3189
         u_upload_alloc(ice->ctx.const_uploader, 0, input->buffer_size, 64,
3190
                        &cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
3191

3192
         if (!cbuf->buffer) {
3193
            /* Allocation was unsuccessful - just unbind */
3194
            iris_set_constant_buffer(ctx, p_stage, index, false, NULL);
3195
            return;
3196
         }
3197

3198
         assert(map);
3199
         memcpy(map, input->user_buffer, input->buffer_size);
3200
      } else if (input->buffer) {
3201
         if (take_ownership) {
3202
            pipe_resource_reference(&cbuf->buffer, NULL);
3203
            cbuf->buffer = input->buffer;
3204
         } else {
3205
            pipe_resource_reference(&cbuf->buffer, input->buffer);
3206
         }
3207

3208
         cbuf->buffer_offset = input->buffer_offset;
3209
      }
3210

3211
      cbuf->buffer_size =
3212
         MIN2(input->buffer_size,
3213
              iris_resource_bo(cbuf->buffer)->size - cbuf->buffer_offset);
3214

3215
      struct iris_resource *res = (void *) cbuf->buffer;
3216
      res->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
3217
      res->bind_stages |= 1 << stage;
3218
   } else {
3219
      shs->bound_cbufs &= ~(1u << index);
3220
      pipe_resource_reference(&cbuf->buffer, NULL);
3221
   }
3222

3223
   ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS << stage;
3224
}
3225

3226
static void
3227
upload_sysvals(struct iris_context *ice,
3228
               gl_shader_stage stage,
3229
               const struct pipe_grid_info *grid)
3230
{
3231
   UNUSED struct iris_genx_state *genx = ice->state.genx;
3232
   struct iris_shader_state *shs = &ice->state.shaders[stage];
3233

3234
   struct iris_compiled_shader *shader = ice->shaders.prog[stage];
3235
   if (!shader || (shader->num_system_values == 0 &&
3236
                   shader->kernel_input_size == 0))
3237
      return;
3238

3239
   assert(shader->num_cbufs > 0);
3240

3241
   unsigned sysval_cbuf_index = shader->num_cbufs - 1;
3242
   struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index];
3243
   unsigned system_values_start =
3244
      ALIGN(shader->kernel_input_size, sizeof(uint32_t));
3245
   unsigned upload_size = system_values_start +
3246
                          shader->num_system_values * sizeof(uint32_t);
3247
   void *map = NULL;
3248

3249
   assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
3250
   u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
3251
                  &cbuf->buffer_offset, &cbuf->buffer, &map);
3252

3253
   if (shader->kernel_input_size > 0)
3254
      memcpy(map, grid->input, shader->kernel_input_size);
3255

3256
   uint32_t *sysval_map = map + system_values_start;
3257
   for (int i = 0; i < shader->num_system_values; i++) {
3258
      uint32_t sysval = shader->system_values[i];
3259
      uint32_t value = 0;
3260

3261
      if (BRW_PARAM_DOMAIN(sysval) == BRW_PARAM_DOMAIN_IMAGE) {
3262
#if GFX_VER == 8
3263
         unsigned img = BRW_PARAM_IMAGE_IDX(sysval);
3264
         unsigned offset = BRW_PARAM_IMAGE_OFFSET(sysval);
3265
         struct brw_image_param *param =
3266
            &genx->shaders[stage].image_param[img];
3267

3268
         assert(offset < sizeof(struct brw_image_param));
3269
         value = ((uint32_t *) param)[offset];
3270
#endif
3271
      } else if (sysval == BRW_PARAM_BUILTIN_ZERO) {
3272
         value = 0;
3273
      } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) {
3274
         int plane = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(sysval);
3275
         int comp  = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(sysval);
3276
         value = fui(ice->state.clip_planes.ucp[plane][comp]);
3277
      } else if (sysval == BRW_PARAM_BUILTIN_PATCH_VERTICES_IN) {
3278
         if (stage == MESA_SHADER_TESS_CTRL) {
3279
            value = ice->state.vertices_per_patch;
3280
         } else {
3281
            assert(stage == MESA_SHADER_TESS_EVAL);
3282
            const struct shader_info *tcs_info =
3283
               iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
3284
            if (tcs_info)
3285
               value = tcs_info->tess.tcs_vertices_out;
3286
            else
3287
               value = ice->state.vertices_per_patch;
3288
         }
3289
      } else if (sysval >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X &&
3290
                 sysval <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) {
3291
         unsigned i = sysval - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
3292
         value = fui(ice->state.default_outer_level[i]);
3293
      } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) {
3294
         value = fui(ice->state.default_inner_level[0]);
3295
      } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) {
3296
         value = fui(ice->state.default_inner_level[1]);
3297
      } else if (sysval >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X &&
3298
                 sysval <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) {
3299
         unsigned i = sysval - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X;
3300
         value = ice->state.last_block[i];
3301
      } else if (sysval == BRW_PARAM_BUILTIN_WORK_DIM) {
3302
         value = grid->work_dim;
3303
      } else {
3304
         assert(!"unhandled system value");
3305
      }
3306

3307
      *sysval_map++ = value;
3308
   }
3309

3310
   cbuf->buffer_size = upload_size;
3311
   iris_upload_ubo_ssbo_surf_state(ice, cbuf,
3312
                                   &shs->constbuf_surf_state[sysval_cbuf_index],
3313
                                   ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
3314

3315
   shs->sysvals_need_upload = false;
3316
}
3317

3318
/**
3319
 * The pipe->set_shader_buffers() driver hook.
3320
 *
3321
 * This binds SSBOs and ABOs.  Unfortunately, we need to stream out
3322
 * SURFACE_STATE here, as the buffer offset may change each time.
3323
 */
3324
static void
3325
iris_set_shader_buffers(struct pipe_context *ctx,
3326
                        enum pipe_shader_type p_stage,
3327
                        unsigned start_slot, unsigned count,
3328
                        const struct pipe_shader_buffer *buffers,
3329
                        unsigned writable_bitmask)
3330
{
3331
   struct iris_context *ice = (struct iris_context *) ctx;
3332
   gl_shader_stage stage = stage_from_pipe(p_stage);
3333
   struct iris_shader_state *shs = &ice->state.shaders[stage];
3334

3335
   unsigned modified_bits = u_bit_consecutive(start_slot, count);
3336

3337
   shs->bound_ssbos &= ~modified_bits;
3338
   shs->writable_ssbos &= ~modified_bits;
3339
   shs->writable_ssbos |= writable_bitmask << start_slot;
3340

3341
   for (unsigned i = 0; i < count; i++) {
3342
      if (buffers && buffers[i].buffer) {
3343
         struct iris_resource *res = (void *) buffers[i].buffer;
3344
         struct pipe_shader_buffer *ssbo = &shs->ssbo[start_slot + i];
3345
         struct iris_state_ref *surf_state =
3346
            &shs->ssbo_surf_state[start_slot + i];
3347
         pipe_resource_reference(&ssbo->buffer, &res->base.b);
3348
         ssbo->buffer_offset = buffers[i].buffer_offset;
3349
         ssbo->buffer_size =
3350
            MIN2(buffers[i].buffer_size, res->bo->size - ssbo->buffer_offset);
3351

3352
         shs->bound_ssbos |= 1 << (start_slot + i);
3353

3354
         isl_surf_usage_flags_t usage = ISL_SURF_USAGE_STORAGE_BIT;
3355

3356
         iris_upload_ubo_ssbo_surf_state(ice, ssbo, surf_state, usage);
3357

3358
         res->bind_history |= PIPE_BIND_SHADER_BUFFER;
3359
         res->bind_stages |= 1 << stage;
3360

3361
         util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset,
3362
                        ssbo->buffer_offset + ssbo->buffer_size);
3363
      } else {
3364
         pipe_resource_reference(&shs->ssbo[start_slot + i].buffer, NULL);
3365
         pipe_resource_reference(&shs->ssbo_surf_state[start_slot + i].res,
3366
                                 NULL);
3367
      }
3368
   }
3369

3370
   ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
3371
}
3372

3373
static void
3374
iris_delete_state(struct pipe_context *ctx, void *state)
3375
{
3376
   free(state);
3377
}
3378

3379
/**
3380
 * The pipe->set_vertex_buffers() driver hook.
3381
 *
3382
 * This translates pipe_vertex_buffer to our 3DSTATE_VERTEX_BUFFERS packet.
3383
 */
3384
static void
3385
iris_set_vertex_buffers(struct pipe_context *ctx,
3386
                        unsigned start_slot, unsigned count,
3387
                        unsigned unbind_num_trailing_slots,
3388
                        bool take_ownership,
3389
                        const struct pipe_vertex_buffer *buffers)
3390
{
3391
   struct iris_context *ice = (struct iris_context *) ctx;
3392
   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
3393
   struct iris_genx_state *genx = ice->state.genx;
3394

3395
   ice->state.bound_vertex_buffers &=
3396
      ~u_bit_consecutive64(start_slot, count + unbind_num_trailing_slots);
3397

3398
   for (unsigned i = 0; i < count; i++) {
3399
      const struct pipe_vertex_buffer *buffer = buffers ? &buffers[i] : NULL;
3400
      struct iris_vertex_buffer_state *state =
3401
         &genx->vertex_buffers[start_slot + i];
3402

3403
      if (!buffer) {
3404
         pipe_resource_reference(&state->resource, NULL);
3405
         continue;
3406
      }
3407

3408
      /* We may see user buffers that are NULL bindings. */
3409
      assert(!(buffer->is_user_buffer && buffer->buffer.user != NULL));
3410

3411
      if (take_ownership) {
3412
         pipe_resource_reference(&state->resource, NULL);
3413
         state->resource = buffer->buffer.resource;
3414
      } else {
3415
         pipe_resource_reference(&state->resource, buffer->buffer.resource);
3416
      }
3417
      struct iris_resource *res = (void *) state->resource;
3418

3419
      state->offset = (int) buffer->buffer_offset;
3420

3421
      if (res) {
3422
         ice->state.bound_vertex_buffers |= 1ull << (start_slot + i);
3423
         res->bind_history |= PIPE_BIND_VERTEX_BUFFER;
3424
      }
3425

3426
      iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) {
3427
         vb.VertexBufferIndex = start_slot + i;
3428
         vb.AddressModifyEnable = true;
3429
         vb.BufferPitch = buffer->stride;
3430
         if (res) {
3431
            vb.BufferSize = res->base.b.width0 - (int) buffer->buffer_offset;
3432
            vb.BufferStartingAddress =
3433
               ro_bo(NULL, res->bo->gtt_offset + (int) buffer->buffer_offset);
3434
            vb.MOCS = iris_mocs(res->bo, &screen->isl_dev,
3435
                                ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
3436
#if GFX_VER >= 12
3437
            vb.L3BypassDisable       = true;
3438
#endif
3439
         } else {
3440
            vb.NullVertexBuffer = true;
3441
         }
3442
      }
3443
   }
3444

3445
   for (unsigned i = 0; i < unbind_num_trailing_slots; i++) {
3446
      struct iris_vertex_buffer_state *state =
3447
         &genx->vertex_buffers[start_slot + count + i];
3448

3449
      pipe_resource_reference(&state->resource, NULL);
3450
   }
3451

3452
   ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
3453
}
3454

3455
/**
3456
 * Gallium CSO for vertex elements.
3457
 */
3458
struct iris_vertex_element_state {
3459
   uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
3460
   uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)];
3461
   uint32_t edgeflag_ve[GENX(VERTEX_ELEMENT_STATE_length)];
3462
   uint32_t edgeflag_vfi[GENX(3DSTATE_VF_INSTANCING_length)];
3463
   unsigned count;
3464
};
3465

3466
/**
3467
 * The pipe->create_vertex_elements() driver hook.
3468
 *
3469
 * This translates pipe_vertex_element to our 3DSTATE_VERTEX_ELEMENTS
3470
 * and 3DSTATE_VF_INSTANCING commands. The vertex_elements and vf_instancing
3471
 * arrays are ready to be emitted at draw time if no EdgeFlag or SGVs are
3472
 * needed. In these cases we will need information available at draw time.
3473
 * We setup edgeflag_ve and edgeflag_vfi as alternatives last
3474
 * 3DSTATE_VERTEX_ELEMENT and 3DSTATE_VF_INSTANCING that can be used at
3475
 * draw time if we detect that EdgeFlag is needed by the Vertex Shader.
3476
 */
3477
static void *
3478
iris_create_vertex_elements(struct pipe_context *ctx,
3479
                            unsigned count,
3480
                            const struct pipe_vertex_element *state)
3481
{
3482
   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
3483
   const struct intel_device_info *devinfo = &screen->devinfo;
3484
   struct iris_vertex_element_state *cso =
3485
      malloc(sizeof(struct iris_vertex_element_state));
3486

3487
   cso->count = count;
3488

3489
   iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) {
3490
      ve.DWordLength =
3491
         1 + GENX(VERTEX_ELEMENT_STATE_length) * MAX2(count, 1) - 2;
3492
   }
3493

3494
   uint32_t *ve_pack_dest = &cso->vertex_elements[1];
3495
   uint32_t *vfi_pack_dest = cso->vf_instancing;
3496

3497
   if (count == 0) {
3498
      iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
3499
         ve.Valid = true;
3500
         ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
3501
         ve.Component0Control = VFCOMP_STORE_0;
3502
         ve.Component1Control = VFCOMP_STORE_0;
3503
         ve.Component2Control = VFCOMP_STORE_0;
3504
         ve.Component3Control = VFCOMP_STORE_1_FP;
3505
      }
3506

3507
      iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
3508
      }
3509
   }
3510

3511
   for (int i = 0; i < count; i++) {
3512
      const struct iris_format_info fmt =
3513
         iris_format_for_usage(devinfo, state[i].src_format, 0);
3514
      unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC,
3515
                           VFCOMP_STORE_SRC, VFCOMP_STORE_SRC };
3516

3517
      switch (isl_format_get_num_channels(fmt.fmt)) {
3518
      case 0: comp[0] = VFCOMP_STORE_0; FALLTHROUGH;
3519
      case 1: comp[1] = VFCOMP_STORE_0; FALLTHROUGH;
3520
      case 2: comp[2] = VFCOMP_STORE_0; FALLTHROUGH;
3521
      case 3:
3522
         comp[3] = isl_format_has_int_channel(fmt.fmt) ? VFCOMP_STORE_1_INT
3523
                                                       : VFCOMP_STORE_1_FP;
3524
         break;
3525
      }
3526
      iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
3527
         ve.EdgeFlagEnable = false;
3528
         ve.VertexBufferIndex = state[i].vertex_buffer_index;
3529
         ve.Valid = true;
3530
         ve.SourceElementOffset = state[i].src_offset;
3531
         ve.SourceElementFormat = fmt.fmt;
3532
         ve.Component0Control = comp[0];
3533
         ve.Component1Control = comp[1];
3534
         ve.Component2Control = comp[2];
3535
         ve.Component3Control = comp[3];
3536
      }
3537

3538
      iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
3539
         vi.VertexElementIndex = i;
3540
         vi.InstancingEnable = state[i].instance_divisor > 0;
3541
         vi.InstanceDataStepRate = state[i].instance_divisor;
3542
      }
3543

3544
      ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
3545
      vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length);
3546
   }
3547

3548
   /* An alternative version of the last VE and VFI is stored so it
3549
    * can be used at draw time in case Vertex Shader uses EdgeFlag
3550
    */
3551
   if (count) {
3552
      const unsigned edgeflag_index = count - 1;
3553
      const struct iris_format_info fmt =
3554
         iris_format_for_usage(devinfo, state[edgeflag_index].src_format, 0);
3555
      iris_pack_state(GENX(VERTEX_ELEMENT_STATE), cso->edgeflag_ve, ve) {
3556
         ve.EdgeFlagEnable = true ;
3557
         ve.VertexBufferIndex = state[edgeflag_index].vertex_buffer_index;
3558
         ve.Valid = true;
3559
         ve.SourceElementOffset = state[edgeflag_index].src_offset;
3560
         ve.SourceElementFormat = fmt.fmt;
3561
         ve.Component0Control = VFCOMP_STORE_SRC;
3562
         ve.Component1Control = VFCOMP_STORE_0;
3563
         ve.Component2Control = VFCOMP_STORE_0;
3564
         ve.Component3Control = VFCOMP_STORE_0;
3565
      }
3566
      iris_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->edgeflag_vfi, vi) {
3567
         /* The vi.VertexElementIndex of the EdgeFlag Vertex Element is filled
3568
          * at draw time, as it should change if SGVs are emitted.
3569
          */
3570
         vi.InstancingEnable = state[edgeflag_index].instance_divisor > 0;
3571
         vi.InstanceDataStepRate = state[edgeflag_index].instance_divisor;
3572
      }
3573
   }
3574

3575
   return cso;
3576
}
3577

3578
/**
3579
 * The pipe->bind_vertex_elements_state() driver hook.
3580
 */
3581
static void
3582
iris_bind_vertex_elements_state(struct pipe_context *ctx, void *state)
3583
{
3584
   struct iris_context *ice = (struct iris_context *) ctx;
3585
   struct iris_vertex_element_state *old_cso = ice->state.cso_vertex_elements;
3586
   struct iris_vertex_element_state *new_cso = state;
3587

3588
   /* 3DSTATE_VF_SGVs overrides the last VE, so if the count is changing,
3589
    * we need to re-emit it to ensure we're overriding the right one.
3590
    */
3591
   if (new_cso && cso_changed(count))
3592
      ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
3593

3594
   ice->state.cso_vertex_elements = state;
3595
   ice->state.dirty |= IRIS_DIRTY_VERTEX_ELEMENTS;
3596
}
3597

3598
/**
3599
 * The pipe->create_stream_output_target() driver hook.
3600
 *
3601
 * "Target" here refers to a destination buffer.  We translate this into
3602
 * a 3DSTATE_SO_BUFFER packet.  We can handle most fields, but don't yet
3603
 * know which buffer this represents, or whether we ought to zero the
3604
 * write-offsets, or append.  Those are handled in the set() hook.
3605
 */
3606
static struct pipe_stream_output_target *
3607
iris_create_stream_output_target(struct pipe_context *ctx,
3608
                                 struct pipe_resource *p_res,
3609
                                 unsigned buffer_offset,
3610
                                 unsigned buffer_size)
3611
{
3612
   struct iris_resource *res = (void *) p_res;
3613
   struct iris_stream_output_target *cso = calloc(1, sizeof(*cso));
3614
   if (!cso)
3615
      return NULL;
3616

3617
   res->bind_history |= PIPE_BIND_STREAM_OUTPUT;
3618

3619
   pipe_reference_init(&cso->base.reference, 1);
3620
   pipe_resource_reference(&cso->base.buffer, p_res);
3621
   cso->base.buffer_offset = buffer_offset;
3622
   cso->base.buffer_size = buffer_size;
3623
   cso->base.context = ctx;
3624

3625
   util_range_add(&res->base.b, &res->valid_buffer_range, buffer_offset,
3626
                  buffer_offset + buffer_size);
3627

3628
   return &cso->base;
3629
}
3630

3631
static void
3632
iris_stream_output_target_destroy(struct pipe_context *ctx,
3633
                                  struct pipe_stream_output_target *state)
3634
{
3635
   struct iris_stream_output_target *cso = (void *) state;
3636

3637
   pipe_resource_reference(&cso->base.buffer, NULL);
3638
   pipe_resource_reference(&cso->offset.res, NULL);
3639

3640
   free(cso);
3641
}
3642

3643
/**
3644
 * The pipe->set_stream_output_targets() driver hook.
3645
 *
3646
 * At this point, we know which targets are bound to a particular index,
3647
 * and also whether we want to append or start over.  We can finish the
3648
 * 3DSTATE_SO_BUFFER packets we started earlier.
3649
 */
3650
static void
3651
iris_set_stream_output_targets(struct pipe_context *ctx,
3652
                               unsigned num_targets,
3653
                               struct pipe_stream_output_target **targets,
3654
                               const unsigned *offsets)
3655
{
3656
   struct iris_context *ice = (struct iris_context *) ctx;
3657
   struct iris_genx_state *genx = ice->state.genx;
3658
   uint32_t *so_buffers = genx->so_buffers;
3659
   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
3660

3661
   const bool active = num_targets > 0;
3662
   if (ice->state.streamout_active != active) {
3663
      ice->state.streamout_active = active;
3664
      ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
3665

3666
      /* We only emit 3DSTATE_SO_DECL_LIST when streamout is active, because
3667
       * it's a non-pipelined command.  If we're switching streamout on, we
3668
       * may have missed emitting it earlier, so do so now.  (We're already
3669
       * taking a stall to update 3DSTATE_SO_BUFFERS anyway...)
3670
       */
3671
      if (active) {
3672
         ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST;
3673
      } else {
3674
         uint32_t flush = 0;
3675
         for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
3676
            struct iris_stream_output_target *tgt =
3677
               (void *) ice->state.so_target[i];
3678
            if (tgt) {
3679
               struct iris_resource *res = (void *) tgt->base.buffer;
3680

3681
               flush |= iris_flush_bits_for_history(ice, res);
3682
               iris_dirty_for_history(ice, res);
3683
            }
3684
         }
3685
#if GFX_VER >= 12
3686
         /* SO draws require flushing of const cache to make SO data
3687
          * observable when VB/IB are cached in L3.
3688
          */
3689
         if (flush & PIPE_CONTROL_VF_CACHE_INVALIDATE)
3690
            flush |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3691
#endif
3692
         iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER],
3693
                                      "make streamout results visible", flush);
3694
      }
3695
   }
3696

3697
   for (int i = 0; i < 4; i++) {
3698
      pipe_so_target_reference(&ice->state.so_target[i],
3699
                               i < num_targets ? targets[i] : NULL);
3700
   }
3701

3702
   /* No need to update 3DSTATE_SO_BUFFER unless SOL is active. */
3703
   if (!active)
3704
      return;
3705

3706
   for (unsigned i = 0; i < 4; i++,
3707
        so_buffers += GENX(3DSTATE_SO_BUFFER_length)) {
3708

3709
      struct iris_stream_output_target *tgt = (void *) ice->state.so_target[i];
3710
      unsigned offset = offsets[i];
3711

3712
      if (!tgt) {
3713
         iris_pack_command(GENX(3DSTATE_SO_BUFFER), so_buffers, sob) {
3714
#if GFX_VER < 12
3715
            sob.SOBufferIndex = i;
3716
#else
3717
            sob._3DCommandOpcode = 0;
3718
            sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD + i;
3719
#endif
3720
         }
3721
         continue;
3722
      }
3723

3724
      if (!tgt->offset.res)
3725
         upload_state(ctx->const_uploader, &tgt->offset, sizeof(uint32_t), 4);
3726

3727
      struct iris_resource *res = (void *) tgt->base.buffer;
3728

3729
      /* Note that offsets[i] will either be 0, causing us to zero
3730
       * the value in the buffer, or 0xFFFFFFFF, which happens to mean
3731
       * "continue appending at the existing offset."
3732
       */
3733
      assert(offset == 0 || offset == 0xFFFFFFFF);
3734

3735
      /* When we're first called with an offset of 0, we want the next
3736
       * 3DSTATE_SO_BUFFER packets to reset the offset to the beginning.
3737
       * Any further times we emit those packets, we want to use 0xFFFFFFFF
3738
       * to continue appending from the current offset.
3739
       *
3740
       * Note that we might be called by Begin (offset = 0), Pause, then
3741
       * Resume (offset = 0xFFFFFFFF) before ever drawing (where these
3742
       * commands will actually be sent to the GPU).  In this case, we
3743
       * don't want to append - we still want to do our initial zeroing.
3744
       */
3745
      if (offset == 0)
3746
         tgt->zero_offset = true;
3747

3748
      iris_pack_command(GENX(3DSTATE_SO_BUFFER), so_buffers, sob) {
3749
#if GFX_VER < 12
3750
         sob.SOBufferIndex = i;
3751
#else
3752
         sob._3DCommandOpcode = 0;
3753
         sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD + i;
3754
#endif
3755
         sob.SurfaceBaseAddress =
3756
            rw_bo(NULL, res->bo->gtt_offset + tgt->base.buffer_offset,
3757
                  IRIS_DOMAIN_OTHER_WRITE);
3758
         sob.SOBufferEnable = true;
3759
         sob.StreamOffsetWriteEnable = true;
3760
         sob.StreamOutputBufferOffsetAddressEnable = true;
3761
         sob.MOCS = iris_mocs(res->bo, &screen->isl_dev, 0);
3762

3763
         sob.SurfaceSize = MAX2(tgt->base.buffer_size / 4, 1) - 1;
3764
         sob.StreamOutputBufferOffsetAddress =
3765
            rw_bo(NULL, iris_resource_bo(tgt->offset.res)->gtt_offset +
3766
                        tgt->offset.offset, IRIS_DOMAIN_OTHER_WRITE);
3767
         sob.StreamOffset = 0xFFFFFFFF; /* not offset, see above */
3768
      }
3769
   }
3770

3771
   ice->state.dirty |= IRIS_DIRTY_SO_BUFFERS;
3772
}
3773

3774
/**
3775
 * An iris-vtable helper for encoding the 3DSTATE_SO_DECL_LIST and
3776
 * 3DSTATE_STREAMOUT packets.
3777
 *
3778
 * 3DSTATE_SO_DECL_LIST is a list of shader outputs we want the streamout
3779
 * hardware to record.  We can create it entirely based on the shader, with
3780
 * no dynamic state dependencies.
3781
 *
3782
 * 3DSTATE_STREAMOUT is an annoying mix of shader-based information and
3783
 * state-based settings.  We capture the shader-related ones here, and merge
3784
 * the rest in at draw time.
3785
 */
3786
static uint32_t *
3787
iris_create_so_decl_list(const struct pipe_stream_output_info *info,
3788
                         const struct brw_vue_map *vue_map)
3789
{
3790
   struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
3791
   int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
3792
   int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
3793
   int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
3794
   int max_decls = 0;
3795
   STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
3796

3797
   memset(so_decl, 0, sizeof(so_decl));
3798

3799
   /* Construct the list of SO_DECLs to be emitted.  The formatting of the
3800
    * command feels strange -- each dword pair contains a SO_DECL per stream.
3801
    */
3802
   for (unsigned i = 0; i < info->num_outputs; i++) {
3803
      const struct pipe_stream_output *output = &info->output[i];
3804
      const int buffer = output->output_buffer;
3805
      const int varying = output->register_index;
3806
      const unsigned stream_id = output->stream;
3807
      assert(stream_id < MAX_VERTEX_STREAMS);
3808

3809
      buffer_mask[stream_id] |= 1 << buffer;
3810

3811
      assert(vue_map->varying_to_slot[varying] >= 0);
3812

3813
      /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
3814
       * array.  Instead, it simply increments DstOffset for the following
3815
       * input by the number of components that should be skipped.
3816
       *
3817
       * Our hardware is unusual in that it requires us to program SO_DECLs
3818
       * for fake "hole" components, rather than simply taking the offset
3819
       * for each real varying.  Each hole can have size 1, 2, 3, or 4; we
3820
       * program as many size = 4 holes as we can, then a final hole to
3821
       * accommodate the final 1, 2, or 3 remaining.
3822
       */
3823
      int skip_components = output->dst_offset - next_offset[buffer];
3824

3825
      while (skip_components > 0) {
3826
         so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
3827
            .HoleFlag = 1,
3828
            .OutputBufferSlot = output->output_buffer,
3829
            .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
3830
         };
3831
         skip_components -= 4;
3832
      }
3833

3834
      next_offset[buffer] = output->dst_offset + output->num_components;
3835

3836
      so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
3837
         .OutputBufferSlot = output->output_buffer,
3838
         .RegisterIndex = vue_map->varying_to_slot[varying],
3839
         .ComponentMask =
3840
            ((1 << output->num_components) - 1) << output->start_component,
3841
      };
3842

3843
      if (decls[stream_id] > max_decls)
3844
         max_decls = decls[stream_id];
3845
   }
3846

3847
   unsigned dwords = GENX(3DSTATE_STREAMOUT_length) + (3 + 2 * max_decls);
3848
   uint32_t *map = ralloc_size(NULL, sizeof(uint32_t) * dwords);
3849
   uint32_t *so_decl_map = map + GENX(3DSTATE_STREAMOUT_length);
3850

3851
   iris_pack_command(GENX(3DSTATE_STREAMOUT), map, sol) {
3852
      int urb_entry_read_offset = 0;
3853
      int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
3854
         urb_entry_read_offset;
3855

3856
      /* We always read the whole vertex.  This could be reduced at some
3857
       * point by reading less and offsetting the register index in the
3858
       * SO_DECLs.
3859
       */
3860
      sol.Stream0VertexReadOffset = urb_entry_read_offset;
3861
      sol.Stream0VertexReadLength = urb_entry_read_length - 1;
3862
      sol.Stream1VertexReadOffset = urb_entry_read_offset;
3863
      sol.Stream1VertexReadLength = urb_entry_read_length - 1;
3864
      sol.Stream2VertexReadOffset = urb_entry_read_offset;
3865
      sol.Stream2VertexReadLength = urb_entry_read_length - 1;
3866
      sol.Stream3VertexReadOffset = urb_entry_read_offset;
3867
      sol.Stream3VertexReadLength = urb_entry_read_length - 1;
3868

3869
      /* Set buffer pitches; 0 means unbound. */
3870
      sol.Buffer0SurfacePitch = 4 * info->stride[0];
3871
      sol.Buffer1SurfacePitch = 4 * info->stride[1];
3872
      sol.Buffer2SurfacePitch = 4 * info->stride[2];
3873
      sol.Buffer3SurfacePitch = 4 * info->stride[3];
3874
   }
3875

3876
   iris_pack_command(GENX(3DSTATE_SO_DECL_LIST), so_decl_map, list) {
3877
      list.DWordLength = 3 + 2 * max_decls - 2;
3878
      list.StreamtoBufferSelects0 = buffer_mask[0];
3879
      list.StreamtoBufferSelects1 = buffer_mask[1];
3880
      list.StreamtoBufferSelects2 = buffer_mask[2];
3881
      list.StreamtoBufferSelects3 = buffer_mask[3];
3882
      list.NumEntries0 = decls[0];
3883
      list.NumEntries1 = decls[1];
3884
      list.NumEntries2 = decls[2];
3885
      list.NumEntries3 = decls[3];
3886
   }
3887

3888
   for (int i = 0; i < max_decls; i++) {
3889
      iris_pack_state(GENX(SO_DECL_ENTRY), so_decl_map + 3 + i * 2, entry) {
3890
         entry.Stream0Decl = so_decl[0][i];
3891
         entry.Stream1Decl = so_decl[1][i];
3892
         entry.Stream2Decl = so_decl[2][i];
3893
         entry.Stream3Decl = so_decl[3][i];
3894
      }
3895
   }
3896

3897
   return map;
3898
}
3899

3900
static void
3901
iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots,
3902
                                   const struct brw_vue_map *last_vue_map,
3903
                                   bool two_sided_color,
3904
                                   unsigned *out_offset,
3905
                                   unsigned *out_length)
3906
{
3907
   /* The compiler computes the first URB slot without considering COL/BFC
3908
    * swizzling (because it doesn't know whether it's enabled), so we need
3909
    * to do that here too.  This may result in a smaller offset, which
3910
    * should be safe.
3911
    */
3912
   const unsigned first_slot =
3913
      brw_compute_first_urb_slot_required(fs_input_slots, last_vue_map);
3914

3915
   /* This becomes the URB read offset (counted in pairs of slots). */
3916
   assert(first_slot % 2 == 0);
3917
   *out_offset = first_slot / 2;
3918

3919
   /* We need to adjust the inputs read to account for front/back color
3920
    * swizzling, as it can make the URB length longer.
3921
    */
3922
   for (int c = 0; c <= 1; c++) {
3923
      if (fs_input_slots & (VARYING_BIT_COL0 << c)) {
3924
         /* If two sided color is enabled, the fragment shader's gl_Color
3925
          * (COL0) input comes from either the gl_FrontColor (COL0) or
3926
          * gl_BackColor (BFC0) input varyings.  Mark BFC as used, too.
3927
          */
3928
         if (two_sided_color)
3929
            fs_input_slots |= (VARYING_BIT_BFC0 << c);
3930

3931
         /* If front color isn't written, we opt to give them back color
3932
          * instead of an undefined value.  Switch from COL to BFC.
3933
          */
3934
         if (last_vue_map->varying_to_slot[VARYING_SLOT_COL0 + c] == -1) {
3935
            fs_input_slots &= ~(VARYING_BIT_COL0 << c);
3936
            fs_input_slots |= (VARYING_BIT_BFC0 << c);
3937
         }
3938
      }
3939
   }
3940

3941
   /* Compute the minimum URB Read Length necessary for the FS inputs.
3942
    *
3943
    * From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
3944
    * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
3945
    *
3946
    * "This field should be set to the minimum length required to read the
3947
    *  maximum source attribute.  The maximum source attribute is indicated
3948
    *  by the maximum value of the enabled Attribute # Source Attribute if
3949
    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
3950
    *  enable is not set.
3951
    *  read_length = ceiling((max_source_attr + 1) / 2)
3952
    *
3953
    *  [errata] Corruption/Hang possible if length programmed larger than
3954
    *  recommended"
3955
    *
3956
    * Similar text exists for Ivy Bridge.
3957
    *
3958
    * We find the last URB slot that's actually read by the FS.
3959
    */
3960
   unsigned last_read_slot = last_vue_map->num_slots - 1;
3961
   while (last_read_slot > first_slot && !(fs_input_slots &
3962
          (1ull << last_vue_map->slot_to_varying[last_read_slot])))
3963
      --last_read_slot;
3964

3965
   /* The URB read length is the difference of the two, counted in pairs. */
3966
   *out_length = DIV_ROUND_UP(last_read_slot - first_slot + 1, 2);
3967
}
3968

3969
static void
3970
iris_emit_sbe_swiz(struct iris_batch *batch,
3971
                   const struct iris_context *ice,
3972
                   const struct brw_vue_map *vue_map,
3973
                   unsigned urb_read_offset,
3974
                   unsigned sprite_coord_enables)
3975
{
3976
   struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = {};
3977
   const struct brw_wm_prog_data *wm_prog_data = (void *)
3978
      ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
3979
   const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
3980

3981
   /* XXX: this should be generated when putting programs in place */
3982

3983
   for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
3984
      const uint8_t fs_attr = wm_prog_data->urb_setup_attribs[idx];
3985
      const int input_index = wm_prog_data->urb_setup[fs_attr];
3986
      if (input_index < 0 || input_index >= 16)
3987
         continue;
3988

3989
      struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr =
3990
         &attr_overrides[input_index];
3991
      int slot = vue_map->varying_to_slot[fs_attr];
3992

3993
      /* Viewport and Layer are stored in the VUE header.  We need to override
3994
       * them to zero if earlier stages didn't write them, as GL requires that
3995
       * they read back as zero when not explicitly set.
3996
       */
3997
      switch (fs_attr) {
3998
      case VARYING_SLOT_VIEWPORT:
3999
      case VARYING_SLOT_LAYER:
4000
         attr->ComponentOverrideX = true;
4001
         attr->ComponentOverrideW = true;
4002
         attr->ConstantSource = CONST_0000;
4003

4004
         if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
4005
            attr->ComponentOverrideY = true;
4006
         if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
4007
            attr->ComponentOverrideZ = true;
4008
         continue;
4009

4010
      case VARYING_SLOT_PRIMITIVE_ID:
4011
         /* Override if the previous shader stage didn't write gl_PrimitiveID. */
4012
         if (slot == -1) {
4013
            attr->ComponentOverrideX = true;
4014
            attr->ComponentOverrideY = true;
4015
            attr->ComponentOverrideZ = true;
4016
            attr->ComponentOverrideW = true;
4017
            attr->ConstantSource = PRIM_ID;
4018
            continue;
4019
         }
4020
         break;
4021

4022
      default:
4023
         break;
4024
      }
4025

4026
      if (sprite_coord_enables & (1 << input_index))
4027
         continue;
4028

4029
      /* If there was only a back color written but not front, use back
4030
       * as the color instead of undefined.
4031
       */
4032
      if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
4033
         slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
4034
      if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
4035
         slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
4036

4037
      /* Not written by the previous stage - undefined. */
4038
      if (slot == -1) {
4039
         attr->ComponentOverrideX = true;
4040
         attr->ComponentOverrideY = true;
4041
         attr->ComponentOverrideZ = true;
4042
         attr->ComponentOverrideW = true;
4043
         attr->ConstantSource = CONST_0001_FLOAT;
4044
         continue;
4045
      }
4046

4047
      /* Compute the location of the attribute relative to the read offset,
4048
       * which is counted in 256-bit increments (two 128-bit VUE slots).
4049
       */
4050
      const int source_attr = slot - 2 * urb_read_offset;
4051
      assert(source_attr >= 0 && source_attr <= 32);
4052
      attr->SourceAttribute = source_attr;
4053

4054
      /* If we are doing two-sided color, and the VUE slot following this one
4055
       * represents a back-facing color, then we need to instruct the SF unit
4056
       * to do back-facing swizzling.
4057
       */
4058
      if (cso_rast->light_twoside &&
4059
          ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
4060
            vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
4061
           (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
4062
            vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1)))
4063
         attr->SwizzleSelect = INPUTATTR_FACING;
4064
   }
4065

4066
   iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) {
4067
      for (int i = 0; i < 16; i++)
4068
         sbes.Attribute[i] = attr_overrides[i];
4069
   }
4070
}
4071

4072
static bool
4073
iris_is_drawing_points(const struct iris_context *ice)
4074
{
4075
   const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
4076

4077
   if (cso_rast->fill_mode_point) {
4078
      return true;
4079
   }
4080

4081
   if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
4082
      const struct brw_gs_prog_data *gs_prog_data =
4083
         (void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data;
4084
      return gs_prog_data->output_topology == _3DPRIM_POINTLIST;
4085
   } else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
4086
      const struct brw_tes_prog_data *tes_data =
4087
         (void *) ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data;
4088
      return tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
4089
   } else {
4090
      return ice->state.prim_mode == PIPE_PRIM_POINTS;
4091
   }
4092
}
4093

4094
static unsigned
4095
iris_calculate_point_sprite_overrides(const struct brw_wm_prog_data *prog_data,
4096
                                      const struct iris_rasterizer_state *cso)
4097
{
4098
   unsigned overrides = 0;
4099

4100
   if (prog_data->urb_setup[VARYING_SLOT_PNTC] != -1)
4101
      overrides |= 1 << prog_data->urb_setup[VARYING_SLOT_PNTC];
4102

4103
   for (int i = 0; i < 8; i++) {
4104
      if ((cso->sprite_coord_enable & (1 << i)) &&
4105
          prog_data->urb_setup[VARYING_SLOT_TEX0 + i] != -1)
4106
         overrides |= 1 << prog_data->urb_setup[VARYING_SLOT_TEX0 + i];
4107
   }
4108

4109
   return overrides;
4110
}
4111

4112
static void
4113
iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice)
4114
{
4115
   const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
4116
   const struct brw_wm_prog_data *wm_prog_data = (void *)
4117
      ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
4118
   const struct shader_info *fs_info =
4119
      iris_get_shader_info(ice, MESA_SHADER_FRAGMENT);
4120
   const struct brw_vue_map *last_vue_map =
4121
      &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
4122

4123
   unsigned urb_read_offset, urb_read_length;
4124
   iris_compute_sbe_urb_read_interval(fs_info->inputs_read,
4125
                                      last_vue_map,
4126
                                      cso_rast->light_twoside,
4127
                                      &urb_read_offset, &urb_read_length);
4128

4129
   unsigned sprite_coord_overrides =
4130
      iris_is_drawing_points(ice) ?
4131
      iris_calculate_point_sprite_overrides(wm_prog_data, cso_rast) : 0;
4132

4133
   iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
4134
      sbe.AttributeSwizzleEnable = true;
4135
      sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
4136
      sbe.PointSpriteTextureCoordinateOrigin = cso_rast->sprite_coord_mode;
4137
      sbe.VertexURBEntryReadOffset = urb_read_offset;
4138
      sbe.VertexURBEntryReadLength = urb_read_length;
4139
      sbe.ForceVertexURBEntryReadOffset = true;
4140
      sbe.ForceVertexURBEntryReadLength = true;
4141
      sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
4142
      sbe.PointSpriteTextureCoordinateEnable = sprite_coord_overrides;
4143
#if GFX_VER >= 9
4144
      for (int i = 0; i < 32; i++) {
4145
         sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
4146
      }
4147
#endif
4148
   }
4149

4150
   iris_emit_sbe_swiz(batch, ice, last_vue_map, urb_read_offset,
4151
                      sprite_coord_overrides);
4152
}
4153

4154
/* ------------------------------------------------------------------- */
4155

4156
/**
4157
 * Populate VS program key fields based on the current state.
4158
 */
4159
static void
4160
iris_populate_vs_key(const struct iris_context *ice,
4161
                     const struct shader_info *info,
4162
                     gl_shader_stage last_stage,
4163
                     struct iris_vs_prog_key *key)
4164
{
4165
   const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
4166

4167
   if (info->clip_distance_array_size == 0 &&
4168
       (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&
4169
       last_stage == MESA_SHADER_VERTEX)
4170
      key->vue.nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
4171
}
4172

4173
/**
4174
 * Populate TCS program key fields based on the current state.
4175
 */
4176
static void
4177
iris_populate_tcs_key(const struct iris_context *ice,
4178
                      struct iris_tcs_prog_key *key)
4179
{
4180
}
4181

4182
/**
4183
 * Populate TES program key fields based on the current state.
4184
 */
4185
static void
4186
iris_populate_tes_key(const struct iris_context *ice,
4187
                      const struct shader_info *info,
4188
                      gl_shader_stage last_stage,
4189
                      struct iris_tes_prog_key *key)
4190
{
4191
   const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
4192

4193
   if (info->clip_distance_array_size == 0 &&
4194
       (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&
4195
       last_stage == MESA_SHADER_TESS_EVAL)
4196
      key->vue.nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
4197
}
4198

4199
/**
4200
 * Populate GS program key fields based on the current state.
4201
 */
4202
static void
4203
iris_populate_gs_key(const struct iris_context *ice,
4204
                     const struct shader_info *info,
4205
                     gl_shader_stage last_stage,
4206
                     struct iris_gs_prog_key *key)
4207
{
4208
   const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
4209

4210
   if (info->clip_distance_array_size == 0 &&
4211
       (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&
4212
       last_stage == MESA_SHADER_GEOMETRY)
4213
      key->vue.nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
4214
}
4215

4216
/**
4217
 * Populate FS program key fields based on the current state.
4218
 */
4219
static void
4220
iris_populate_fs_key(const struct iris_context *ice,
4221
                     const struct shader_info *info,
4222
                     struct iris_fs_prog_key *key)
4223
{
4224
   struct iris_screen *screen = (void *) ice->ctx.screen;
4225
   const struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
4226
   const struct iris_depth_stencil_alpha_state *zsa = ice->state.cso_zsa;
4227
   const struct iris_rasterizer_state *rast = ice->state.cso_rast;
4228
   const struct iris_blend_state *blend = ice->state.cso_blend;
4229

4230
   key->nr_color_regions = fb->nr_cbufs;
4231

4232
   key->clamp_fragment_color = rast->clamp_fragment_color;
4233

4234
   key->alpha_to_coverage = blend->alpha_to_coverage;
4235

4236
   key->alpha_test_replicate_alpha = fb->nr_cbufs > 1 && zsa->alpha_enabled;
4237

4238
   key->flat_shade = rast->flatshade &&
4239
      (info->inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1));
4240

4241
   key->persample_interp = rast->force_persample_interp;
4242
   key->multisample_fbo = rast->multisample && fb->samples > 1;
4243

4244
   key->coherent_fb_fetch = GFX_VER >= 9;
4245

4246
   key->force_dual_color_blend =
4247
      screen->driconf.dual_color_blend_by_location &&
4248
      (blend->blend_enables & 1) && blend->dual_color_blending;
4249

4250
   /* TODO: Respect glHint for key->high_quality_derivatives */
4251
}
4252

4253
static void
4254
iris_populate_cs_key(const struct iris_context *ice,
4255
                     struct iris_cs_prog_key *key)
4256
{
4257
}
4258

4259
static uint64_t
4260
KSP(const struct iris_compiled_shader *shader)
4261
{
4262
   struct iris_resource *res = (void *) shader->assembly.res;
4263
   return iris_bo_offset_from_base_address(res->bo) + shader->assembly.offset;
4264
}
4265

4266
#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix, stage)                   \
4267
   pkt.KernelStartPointer = KSP(shader);                                  \
4268
   pkt.BindingTableEntryCount = shader->bt.size_bytes / 4;                \
4269
   pkt.FloatingPointMode = prog_data->use_alt_mode;                       \
4270
                                                                          \
4271
   pkt.DispatchGRFStartRegisterForURBData =                               \
4272
      prog_data->dispatch_grf_start_reg;                                  \
4273
   pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length;       \
4274
   pkt.prefix##URBEntryReadOffset = 0;                                    \
4275
                                                                          \
4276
   pkt.StatisticsEnable = true;                                           \
4277
   pkt.Enable           = true;                                           \
4278
                                                                          \
4279
   if (prog_data->total_scratch) {                                        \
4280
      INIT_THREAD_SCRATCH_SIZE(pkt)                                       \
4281
   }
4282

4283
#if GFX_VERx10 >= 125
4284
#define INIT_THREAD_SCRATCH_SIZE(pkt)
4285
#define MERGE_SCRATCH_ADDR(name)                                          \
4286
{                                                                         \
4287
   uint32_t pkt2[GENX(name##_length)] = {0};                              \
4288
   _iris_pack_command(batch, GENX(name), pkt2, p) {                       \
4289
      p.ScratchSpaceBuffer = scratch_addr >> 4;                           \
4290
   }                                                                      \
4291
   iris_emit_merge(batch, pkt, pkt2, GENX(name##_length));                \
4292
}
4293
#else
4294
#define INIT_THREAD_SCRATCH_SIZE(pkt)                                     \
4295
   pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
4296
#define MERGE_SCRATCH_ADDR(name)                                          \
4297
{                                                                         \
4298
   uint32_t pkt2[GENX(name##_length)] = {0};                              \
4299
   _iris_pack_command(batch, GENX(name), pkt2, p) {                       \
4300
      p.ScratchSpaceBasePointer =                                         \
4301
         rw_bo(NULL, scratch_addr, IRIS_DOMAIN_NONE);                     \
4302
   }                                                                      \
4303
   iris_emit_merge(batch, pkt, pkt2, GENX(name##_length));                \
4304
}
4305
#endif
4306

4307

4308
/**
4309
 * Encode most of 3DSTATE_VS based on the compiled shader.
4310
 */
4311
static void
4312
iris_store_vs_state(const struct intel_device_info *devinfo,
4313
                    struct iris_compiled_shader *shader)
4314
{
4315
   struct brw_stage_prog_data *prog_data = shader->prog_data;
4316
   struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
4317

4318
   iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) {
4319
      INIT_THREAD_DISPATCH_FIELDS(vs, Vertex, MESA_SHADER_VERTEX);
4320
      vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
4321
      vs.SIMD8DispatchEnable = true;
4322
      vs.UserClipDistanceCullTestEnableBitmask =
4323
         vue_prog_data->cull_distance_mask;
4324
   }
4325
}
4326

4327
/**
4328
 * Encode most of 3DSTATE_HS based on the compiled shader.
4329
 */
4330
static void
4331
iris_store_tcs_state(const struct intel_device_info *devinfo,
4332
                     struct iris_compiled_shader *shader)
4333
{
4334
   struct brw_stage_prog_data *prog_data = shader->prog_data;
4335
   struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
4336
   struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data;
4337

4338
   iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) {
4339
      INIT_THREAD_DISPATCH_FIELDS(hs, Vertex, MESA_SHADER_TESS_CTRL);
4340

4341
#if GFX_VER >= 12
4342
      /* Wa_1604578095:
4343
       *
4344
       *    Hang occurs when the number of max threads is less than 2 times
4345
       *    the number of instance count. The number of max threads must be
4346
       *    more than 2 times the number of instance count.
4347
       */
4348
      assert((devinfo->max_tcs_threads / 2) > tcs_prog_data->instances);
4349
      hs.DispatchGRFStartRegisterForURBData = prog_data->dispatch_grf_start_reg & 0x1f;
4350
      hs.DispatchGRFStartRegisterForURBData5 = prog_data->dispatch_grf_start_reg >> 5;
4351
#endif
4352

4353
      hs.InstanceCount = tcs_prog_data->instances - 1;
4354
      hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
4355
      hs.IncludeVertexHandles = true;
4356

4357
#if GFX_VER == 12
4358
      /* Patch Count threshold specifies the maximum number of patches that
4359
       * will be accumulated before a thread dispatch is forced.
4360
       */
4361
      hs.PatchCountThreshold = tcs_prog_data->patch_count_threshold;
4362
#endif
4363

4364
#if GFX_VER >= 9
4365
      hs.DispatchMode = vue_prog_data->dispatch_mode;
4366
      hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
4367
#endif
4368
   }
4369
}
4370

4371
/**
4372
 * Encode 3DSTATE_TE and most of 3DSTATE_DS based on the compiled shader.
4373
 */
4374
static void
4375
iris_store_tes_state(const struct intel_device_info *devinfo,
4376
                     struct iris_compiled_shader *shader)
4377
{
4378
   struct brw_stage_prog_data *prog_data = shader->prog_data;
4379
   struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
4380
   struct brw_tes_prog_data *tes_prog_data = (void *) prog_data;
4381

4382
   uint32_t *te_state = (void *) shader->derived_data;
4383
   uint32_t *ds_state = te_state + GENX(3DSTATE_TE_length);
4384

4385
   iris_pack_command(GENX(3DSTATE_TE), te_state, te) {
4386
      te.Partitioning = tes_prog_data->partitioning;
4387
      te.OutputTopology = tes_prog_data->output_topology;
4388
      te.TEDomain = tes_prog_data->domain;
4389
      te.TEEnable = true;
4390
      te.MaximumTessellationFactorOdd = 63.0;
4391
      te.MaximumTessellationFactorNotOdd = 64.0;
4392
   }
4393

4394
   iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) {
4395
      INIT_THREAD_DISPATCH_FIELDS(ds, Patch, MESA_SHADER_TESS_EVAL);
4396

4397
      ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
4398
      ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
4399
      ds.ComputeWCoordinateEnable =
4400
         tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
4401

4402
      ds.UserClipDistanceCullTestEnableBitmask =
4403
         vue_prog_data->cull_distance_mask;
4404
   }
4405

4406
}
4407

4408
/**
4409
 * Encode most of 3DSTATE_GS based on the compiled shader.
4410
 */
4411
static void
4412
iris_store_gs_state(const struct intel_device_info *devinfo,
4413
                    struct iris_compiled_shader *shader)
4414
{
4415
   struct brw_stage_prog_data *prog_data = shader->prog_data;
4416
   struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
4417
   struct brw_gs_prog_data *gs_prog_data = (void *) prog_data;
4418

4419
   iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) {
4420
      INIT_THREAD_DISPATCH_FIELDS(gs, Vertex, MESA_SHADER_GEOMETRY);
4421

4422
      gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
4423
      gs.OutputTopology = gs_prog_data->output_topology;
4424
      gs.ControlDataHeaderSize =
4425
         gs_prog_data->control_data_header_size_hwords;
4426
      gs.InstanceControl = gs_prog_data->invocations - 1;
4427
      gs.DispatchMode = DISPATCH_MODE_SIMD8;
4428
      gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
4429
      gs.ControlDataFormat = gs_prog_data->control_data_format;
4430
      gs.ReorderMode = TRAILING;
4431
      gs.ExpectedVertexCount = gs_prog_data->vertices_in;
4432
      gs.MaximumNumberofThreads =
4433
         GFX_VER == 8 ? (devinfo->max_gs_threads / 2 - 1)
4434
                      : (devinfo->max_gs_threads - 1);
4435

4436
      if (gs_prog_data->static_vertex_count != -1) {
4437
         gs.StaticOutput = true;
4438
         gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
4439
      }
4440
      gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
4441

4442
      gs.UserClipDistanceCullTestEnableBitmask =
4443
         vue_prog_data->cull_distance_mask;
4444

4445
      const int urb_entry_write_offset = 1;
4446
      const uint32_t urb_entry_output_length =
4447
         DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
4448
         urb_entry_write_offset;
4449

4450
      gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
4451
      gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
4452
   }
4453
}
4454

4455
/**
4456
 * Encode most of 3DSTATE_PS and 3DSTATE_PS_EXTRA based on the shader.
4457
 */
4458
static void
4459
iris_store_fs_state(const struct intel_device_info *devinfo,
4460
                    struct iris_compiled_shader *shader)
4461
{
4462
   struct brw_stage_prog_data *prog_data = shader->prog_data;
4463
   struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
4464

4465
   uint32_t *ps_state = (void *) shader->derived_data;
4466
   uint32_t *psx_state = ps_state + GENX(3DSTATE_PS_length);
4467

4468
   iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) {
4469
      ps.VectorMaskEnable = true;
4470
      ps.BindingTableEntryCount = shader->bt.size_bytes / 4;
4471
      ps.FloatingPointMode = prog_data->use_alt_mode;
4472
      ps.MaximumNumberofThreadsPerPSD = 64 - (GFX_VER == 8 ? 2 : 1);
4473

4474
      ps.PushConstantEnable = prog_data->ubo_ranges[0].length > 0;
4475

4476
      /* From the documentation for this packet:
4477
       * "If the PS kernel does not need the Position XY Offsets to
4478
       *  compute a Position Value, then this field should be programmed
4479
       *  to POSOFFSET_NONE."
4480
       *
4481
       * "SW Recommendation: If the PS kernel needs the Position Offsets
4482
       *  to compute a Position XY value, this field should match Position
4483
       *  ZW Interpolation Mode to ensure a consistent position.xyzw
4484
       *  computation."
4485
       *
4486
       * We only require XY sample offsets. So, this recommendation doesn't
4487
       * look useful at the moment.  We might need this in future.
4488
       */
4489
      ps.PositionXYOffsetSelect =
4490
         wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
4491

4492
      if (prog_data->total_scratch) {
4493
         INIT_THREAD_SCRATCH_SIZE(ps);
4494
      }
4495
   }
4496

4497
   iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
4498
      psx.PixelShaderValid = true;
4499
      psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
4500
      psx.PixelShaderKillsPixel = wm_prog_data->uses_kill;
4501
      psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
4502
      psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
4503
      psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
4504
      psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
4505
      psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
4506

4507
#if GFX_VER >= 9
4508
      psx.PixelShaderPullsBary = wm_prog_data->pulls_bary;
4509
      psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
4510
#endif
4511
   }
4512
}
4513

4514
/**
4515
 * Compute the size of the derived data (shader command packets).
4516
 *
4517
 * This must match the data written by the iris_store_xs_state() functions.
4518
 */
4519
static void
4520
iris_store_cs_state(const struct intel_device_info *devinfo,
4521
                    struct iris_compiled_shader *shader)
4522
{
4523
   struct brw_cs_prog_data *cs_prog_data = (void *) shader->prog_data;
4524
   void *map = shader->derived_data;
4525

4526
   iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) {
4527
#if GFX_VERx10 < 125
4528
      desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
4529
      desc.CrossThreadConstantDataReadLength =
4530
         cs_prog_data->push.cross_thread.regs;
4531
#else
4532
      assert(cs_prog_data->push.per_thread.regs == 0);
4533
      assert(cs_prog_data->push.cross_thread.regs == 0);
4534
#endif
4535
      desc.BarrierEnable = cs_prog_data->uses_barrier;
4536
#if GFX_VER >= 12
4537
      /* TODO: Check if we are missing workarounds and enable mid-thread
4538
       * preemption.
4539
       *
4540
       * We still have issues with mid-thread preemption (it was already
4541
       * disabled by the kernel on gfx11, due to missing workarounds). It's
4542
       * possible that we are just missing some workarounds, and could enable
4543
       * it later, but for now let's disable it to fix a GPU in compute in Car
4544
       * Chase (and possibly more).
4545
       */
4546
      desc.ThreadPreemptionDisable = true;
4547
#endif
4548
   }
4549
}
4550

4551
static unsigned
4552
iris_derived_program_state_size(enum iris_program_cache_id cache_id)
4553
{
4554
   assert(cache_id <= IRIS_CACHE_BLORP);
4555

4556
   static const unsigned dwords[] = {
4557
      [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length),
4558
      [IRIS_CACHE_TCS] = GENX(3DSTATE_HS_length),
4559
      [IRIS_CACHE_TES] = GENX(3DSTATE_TE_length) + GENX(3DSTATE_DS_length),
4560
      [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length),
4561
      [IRIS_CACHE_FS] =
4562
         GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length),
4563
      [IRIS_CACHE_CS] = GENX(INTERFACE_DESCRIPTOR_DATA_length),
4564
      [IRIS_CACHE_BLORP] = 0,
4565
   };
4566

4567
   return sizeof(uint32_t) * dwords[cache_id];
4568
}
4569

4570
/**
4571
 * Create any state packets corresponding to the given shader stage
4572
 * (i.e. 3DSTATE_VS) and save them as "derived data" in the shader variant.
4573
 * This means that we can look up a program in the in-memory cache and
4574
 * get most of the state packet without having to reconstruct it.
4575
 */
4576
static void
4577
iris_store_derived_program_state(const struct intel_device_info *devinfo,
4578
                                 enum iris_program_cache_id cache_id,
4579
                                 struct iris_compiled_shader *shader)
4580
{
4581
   switch (cache_id) {
4582
   case IRIS_CACHE_VS:
4583
      iris_store_vs_state(devinfo, shader);
4584
      break;
4585
   case IRIS_CACHE_TCS:
4586
      iris_store_tcs_state(devinfo, shader);
4587
      break;
4588
   case IRIS_CACHE_TES:
4589
      iris_store_tes_state(devinfo, shader);
4590
      break;
4591
   case IRIS_CACHE_GS:
4592
      iris_store_gs_state(devinfo, shader);
4593
      break;
4594
   case IRIS_CACHE_FS:
4595
      iris_store_fs_state(devinfo, shader);
4596
      break;
4597
   case IRIS_CACHE_CS:
4598
      iris_store_cs_state(devinfo, shader);
4599
      break;
4600
   case IRIS_CACHE_BLORP:
4601
      break;
4602
   }
4603
}
4604

4605
/* ------------------------------------------------------------------- */
4606

4607
static const uint32_t push_constant_opcodes[] = {
4608
   [MESA_SHADER_VERTEX]    = 21,
4609
   [MESA_SHADER_TESS_CTRL] = 25, /* HS */
4610
   [MESA_SHADER_TESS_EVAL] = 26, /* DS */
4611
   [MESA_SHADER_GEOMETRY]  = 22,
4612
   [MESA_SHADER_FRAGMENT]  = 23,
4613
   [MESA_SHADER_COMPUTE]   = 0,
4614
};
4615

4616
static uint32_t
4617
use_null_surface(struct iris_batch *batch, struct iris_context *ice)
4618
{
4619
   struct iris_bo *state_bo = iris_resource_bo(ice->state.unbound_tex.res);
4620

4621
   iris_use_pinned_bo(batch, state_bo, false, IRIS_DOMAIN_NONE);
4622

4623
   return ice->state.unbound_tex.offset;
4624
}
4625

4626
static uint32_t
4627
use_null_fb_surface(struct iris_batch *batch, struct iris_context *ice)
4628
{
4629
   /* If set_framebuffer_state() was never called, fall back to 1x1x1 */
4630
   if (!ice->state.null_fb.res)
4631
      return use_null_surface(batch, ice);
4632

4633
   struct iris_bo *state_bo = iris_resource_bo(ice->state.null_fb.res);
4634

4635
   iris_use_pinned_bo(batch, state_bo, false, IRIS_DOMAIN_NONE);
4636

4637
   return ice->state.null_fb.offset;
4638
}
4639

4640
static uint32_t
4641
surf_state_offset_for_aux(struct iris_resource *res,
4642
                          unsigned aux_modes,
4643
                          enum isl_aux_usage aux_usage)
4644
{
4645
   assert(aux_modes & (1 << aux_usage));
4646
   return SURFACE_STATE_ALIGNMENT *
4647
          util_bitcount(aux_modes & ((1 << aux_usage) - 1));
4648
}
4649

4650
#if GFX_VER == 9
4651
static void
4652
surf_state_update_clear_value(struct iris_batch *batch,
4653
                              struct iris_resource *res,
4654
                              struct iris_state_ref *state,
4655
                              unsigned aux_modes,
4656
                              enum isl_aux_usage aux_usage)
4657
{
4658
   struct isl_device *isl_dev = &batch->screen->isl_dev;
4659
   struct iris_bo *state_bo = iris_resource_bo(state->res);
4660
   uint64_t real_offset = state->offset + IRIS_MEMZONE_BINDER_START;
4661
   uint32_t offset_into_bo = real_offset - state_bo->gtt_offset;
4662
   uint32_t clear_offset = offset_into_bo +
4663
      isl_dev->ss.clear_value_offset +
4664
      surf_state_offset_for_aux(res, aux_modes, aux_usage);
4665
   uint32_t *color = res->aux.clear_color.u32;
4666

4667
   assert(isl_dev->ss.clear_value_size == 16);
4668

4669
   if (aux_usage == ISL_AUX_USAGE_HIZ) {
4670
      iris_emit_pipe_control_write(batch, "update fast clear value (Z)",
4671
                                   PIPE_CONTROL_WRITE_IMMEDIATE,
4672
                                   state_bo, clear_offset, color[0]);
4673
   } else {
4674
      iris_emit_pipe_control_write(batch, "update fast clear color (RG__)",
4675
                                   PIPE_CONTROL_WRITE_IMMEDIATE,
4676
                                   state_bo, clear_offset,
4677
                                   (uint64_t) color[0] |
4678
                                   (uint64_t) color[1] << 32);
4679
      iris_emit_pipe_control_write(batch, "update fast clear color (__BA)",
4680
                                   PIPE_CONTROL_WRITE_IMMEDIATE,
4681
                                   state_bo, clear_offset + 8,
4682
                                   (uint64_t) color[2] |
4683
                                   (uint64_t) color[3] << 32);
4684
   }
4685

4686
   iris_emit_pipe_control_flush(batch,
4687
                                "update fast clear: state cache invalidate",
4688
                                PIPE_CONTROL_FLUSH_ENABLE |
4689
                                PIPE_CONTROL_STATE_CACHE_INVALIDATE);
4690
}
4691
#endif
4692

4693
static void
4694
update_clear_value(struct iris_context *ice,
4695
                   struct iris_batch *batch,
4696
                   struct iris_resource *res,
4697
                   struct iris_surface_state *surf_state,
4698
                   unsigned all_aux_modes,
4699
                   struct isl_view *view)
4700
{
4701
   UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
4702
   UNUSED unsigned aux_modes = all_aux_modes;
4703

4704
   /* We only need to update the clear color in the surface state for gfx8 and
4705
    * gfx9. Newer gens can read it directly from the clear color state buffer.
4706
    */
4707
#if GFX_VER == 9
4708
   /* Skip updating the ISL_AUX_USAGE_NONE surface state */
4709
   aux_modes &= ~(1 << ISL_AUX_USAGE_NONE);
4710

4711
   while (aux_modes) {
4712
      enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes);
4713

4714
      surf_state_update_clear_value(batch, res, &surf_state->ref,
4715
                                    all_aux_modes, aux_usage);
4716
   }
4717
#elif GFX_VER == 8
4718
   /* TODO: Could update rather than re-filling */
4719
   alloc_surface_states(surf_state, all_aux_modes);
4720

4721
   void *map = surf_state->cpu;
4722

4723
   while (aux_modes) {
4724
      enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes);
4725
      fill_surface_state(isl_dev, map, res, &res->surf, view, aux_usage,
4726
                         0, 0, 0);
4727
      map += SURFACE_STATE_ALIGNMENT;
4728
   }
4729

4730
   upload_surface_states(ice->state.surface_uploader, surf_state);
4731
#endif
4732
}
4733

4734
/**
4735
 * Add a surface to the validation list, as well as the buffer containing
4736
 * the corresponding SURFACE_STATE.
4737
 *
4738
 * Returns the binding table entry (offset to SURFACE_STATE).
4739
 */
4740
static uint32_t
4741
use_surface(struct iris_context *ice,
4742
            struct iris_batch *batch,
4743
            struct pipe_surface *p_surf,
4744
            bool writeable,
4745
            enum isl_aux_usage aux_usage,
4746
            bool is_read_surface,
4747
            enum iris_domain access)
4748
{
4749
   struct iris_surface *surf = (void *) p_surf;
4750
   struct iris_resource *res = (void *) p_surf->texture;
4751
   uint32_t offset = 0;
4752

4753
   if (GFX_VER == 8 && is_read_surface && !surf->surface_state_read.ref.res) {
4754
      upload_surface_states(ice->state.surface_uploader,
4755
                            &surf->surface_state_read);
4756
   }
4757

4758
   if (!surf->surface_state.ref.res) {
4759
      upload_surface_states(ice->state.surface_uploader,
4760
                            &surf->surface_state);
4761
   }
4762

4763
   if (res->aux.bo) {
4764
      iris_use_pinned_bo(batch, res->aux.bo, writeable, access);
4765
      if (res->aux.clear_color_bo)
4766
         iris_use_pinned_bo(batch, res->aux.clear_color_bo, false, access);
4767

4768
      if (memcmp(&res->aux.clear_color, &surf->clear_color,
4769
                 sizeof(surf->clear_color)) != 0) {
4770
         update_clear_value(ice, batch, res, &surf->surface_state,
4771
                            res->aux.possible_usages, &surf->view);
4772
         if (GFX_VER == 8) {
4773
            update_clear_value(ice, batch, res, &surf->surface_state_read,
4774
                               res->aux.possible_usages, &surf->read_view);
4775
         }
4776
         surf->clear_color = res->aux.clear_color;
4777
      }
4778
   }
4779

4780
   iris_use_pinned_bo(batch, iris_resource_bo(p_surf->texture),
4781
                      writeable, access);
4782
   if (GFX_VER == 8 && is_read_surface) {
4783
      iris_use_pinned_bo(batch, iris_resource_bo(surf->surface_state_read.ref.res), false,
4784
                         IRIS_DOMAIN_NONE);
4785
   } else {
4786
      iris_use_pinned_bo(batch, iris_resource_bo(surf->surface_state.ref.res), false,
4787
                         IRIS_DOMAIN_NONE);
4788
   }
4789

4790
   offset = (GFX_VER == 8 && is_read_surface)
4791
               ? surf->surface_state_read.ref.offset
4792
               : surf->surface_state.ref.offset;
4793

4794
   return offset +
4795
          surf_state_offset_for_aux(res, res->aux.possible_usages, aux_usage);
4796
}
4797

4798
static uint32_t
4799
use_sampler_view(struct iris_context *ice,
4800
                 struct iris_batch *batch,
4801
                 struct iris_sampler_view *isv)
4802
{
4803
   enum isl_aux_usage aux_usage =
4804
      iris_resource_texture_aux_usage(ice, isv->res, isv->view.format);
4805

4806
   if (!isv->surface_state.ref.res)
4807
      upload_surface_states(ice->state.surface_uploader, &isv->surface_state);
4808

4809
   if (isv->res->aux.bo) {
4810
      iris_use_pinned_bo(batch, isv->res->aux.bo,
4811
                         false, IRIS_DOMAIN_OTHER_READ);
4812
      if (isv->res->aux.clear_color_bo)
4813
         iris_use_pinned_bo(batch, isv->res->aux.clear_color_bo,
4814
                            false, IRIS_DOMAIN_OTHER_READ);
4815
      if (memcmp(&isv->res->aux.clear_color, &isv->clear_color,
4816
                 sizeof(isv->clear_color)) != 0) {
4817
         update_clear_value(ice, batch, isv->res, &isv->surface_state,
4818
                            isv->res->aux.sampler_usages, &isv->view);
4819
         isv->clear_color = isv->res->aux.clear_color;
4820
      }
4821
   }
4822

4823
   iris_use_pinned_bo(batch, isv->res->bo, false, IRIS_DOMAIN_OTHER_READ);
4824
   iris_use_pinned_bo(batch, iris_resource_bo(isv->surface_state.ref.res), false,
4825
                      IRIS_DOMAIN_NONE);
4826

4827
   return isv->surface_state.ref.offset +
4828
          surf_state_offset_for_aux(isv->res, isv->res->aux.sampler_usages,
4829
                                    aux_usage);
4830
}
4831

4832
static uint32_t
4833
use_ubo_ssbo(struct iris_batch *batch,
4834
             struct iris_context *ice,
4835
             struct pipe_shader_buffer *buf,
4836
             struct iris_state_ref *surf_state,
4837
             bool writable, enum iris_domain access)
4838
{
4839
   if (!buf->buffer || !surf_state->res)
4840
      return use_null_surface(batch, ice);
4841

4842
   iris_use_pinned_bo(batch, iris_resource_bo(buf->buffer), writable, access);
4843
   iris_use_pinned_bo(batch, iris_resource_bo(surf_state->res), false,
4844
                      IRIS_DOMAIN_NONE);
4845

4846
   return surf_state->offset;
4847
}
4848

4849
static uint32_t
4850
use_image(struct iris_batch *batch, struct iris_context *ice,
4851
          struct iris_shader_state *shs, const struct shader_info *info,
4852
          int i)
4853
{
4854
   struct iris_image_view *iv = &shs->image[i];
4855
   struct iris_resource *res = (void *) iv->base.resource;
4856

4857
   if (!res)
4858
      return use_null_surface(batch, ice);
4859

4860
   bool write = iv->base.shader_access & PIPE_IMAGE_ACCESS_WRITE;
4861

4862
   iris_use_pinned_bo(batch, res->bo, write, IRIS_DOMAIN_NONE);
4863
   iris_use_pinned_bo(batch, iris_resource_bo(iv->surface_state.ref.res),
4864
                      false, IRIS_DOMAIN_NONE);
4865

4866
   if (res->aux.bo)
4867
      iris_use_pinned_bo(batch, res->aux.bo, write, IRIS_DOMAIN_NONE);
4868

4869
   enum isl_aux_usage aux_usage =
4870
      iris_image_view_aux_usage(ice, &iv->base, info);
4871

4872
   return iv->surface_state.ref.offset +
4873
      surf_state_offset_for_aux(res, res->aux.possible_usages, aux_usage);
4874
}
4875

4876
#define push_bt_entry(addr) \
4877
   assert(addr >= binder_addr); \
4878
   assert(s < shader->bt.size_bytes / sizeof(uint32_t)); \
4879
   if (!pin_only) bt_map[s++] = (addr) - binder_addr;
4880

4881
#define bt_assert(section) \
4882
   if (!pin_only && shader->bt.used_mask[section] != 0) \
4883
      assert(shader->bt.offsets[section] == s);
4884

4885
/**
4886
 * Populate the binding table for a given shader stage.
4887
 *
4888
 * This fills out the table of pointers to surfaces required by the shader,
4889
 * and also adds those buffers to the validation list so the kernel can make
4890
 * resident before running our batch.
4891
 */
4892
static void
4893
iris_populate_binding_table(struct iris_context *ice,
4894
                            struct iris_batch *batch,
4895
                            gl_shader_stage stage,
4896
                            bool pin_only)
4897
{
4898
   const struct iris_binder *binder = &ice->state.binder;
4899
   struct iris_compiled_shader *shader = ice->shaders.prog[stage];
4900
   if (!shader)
4901
      return;
4902

4903
   struct iris_binding_table *bt = &shader->bt;
4904
   UNUSED struct brw_stage_prog_data *prog_data = shader->prog_data;
4905
   struct iris_shader_state *shs = &ice->state.shaders[stage];
4906
   uint32_t binder_addr = binder->bo->gtt_offset;
4907

4908
   uint32_t *bt_map = binder->map + binder->bt_offset[stage];
4909
   int s = 0;
4910

4911
   const struct shader_info *info = iris_get_shader_info(ice, stage);
4912
   if (!info) {
4913
      /* TCS passthrough doesn't need a binding table. */
4914
      assert(stage == MESA_SHADER_TESS_CTRL);
4915
      return;
4916
   }
4917

4918
   if (stage == MESA_SHADER_COMPUTE &&
4919
       shader->bt.used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS]) {
4920
      /* surface for gl_NumWorkGroups */
4921
      struct iris_state_ref *grid_data = &ice->state.grid_size;
4922
      struct iris_state_ref *grid_state = &ice->state.grid_surf_state;
4923
      iris_use_pinned_bo(batch, iris_resource_bo(grid_data->res), false,
4924
                         IRIS_DOMAIN_OTHER_READ);
4925
      iris_use_pinned_bo(batch, iris_resource_bo(grid_state->res), false,
4926
                         IRIS_DOMAIN_NONE);
4927
      push_bt_entry(grid_state->offset);
4928
   }
4929

4930
   if (stage == MESA_SHADER_FRAGMENT) {
4931
      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
4932
      /* Note that cso_fb->nr_cbufs == fs_key->nr_color_regions. */
4933
      if (cso_fb->nr_cbufs) {
4934
         for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
4935
            uint32_t addr;
4936
            if (cso_fb->cbufs[i]) {
4937
               addr = use_surface(ice, batch, cso_fb->cbufs[i], true,
4938
                                  ice->state.draw_aux_usage[i], false,
4939
                                  IRIS_DOMAIN_RENDER_WRITE);
4940
            } else {
4941
               addr = use_null_fb_surface(batch, ice);
4942
            }
4943
            push_bt_entry(addr);
4944
         }
4945
      } else if (GFX_VER < 11) {
4946
         uint32_t addr = use_null_fb_surface(batch, ice);
4947
         push_bt_entry(addr);
4948
      }
4949
   }
4950

4951
#define foreach_surface_used(index, group) \
4952
   bt_assert(group); \
4953
   for (int index = 0; index < bt->sizes[group]; index++) \
4954
      if (iris_group_index_to_bti(bt, group, index) != \
4955
          IRIS_SURFACE_NOT_USED)
4956

4957
   foreach_surface_used(i, IRIS_SURFACE_GROUP_RENDER_TARGET_READ) {
4958
      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
4959
      uint32_t addr;
4960
      if (cso_fb->cbufs[i]) {
4961
         addr = use_surface(ice, batch, cso_fb->cbufs[i],
4962
                            false, ice->state.draw_aux_usage[i], true,
4963
                            IRIS_DOMAIN_OTHER_READ);
4964
         push_bt_entry(addr);
4965
      }
4966
   }
4967

4968
   foreach_surface_used(i, IRIS_SURFACE_GROUP_TEXTURE) {
4969
      struct iris_sampler_view *view = shs->textures[i];
4970
      uint32_t addr = view ? use_sampler_view(ice, batch, view)
4971
                           : use_null_surface(batch, ice);
4972
      push_bt_entry(addr);
4973
   }
4974

4975
   foreach_surface_used(i, IRIS_SURFACE_GROUP_IMAGE) {
4976
      uint32_t addr = use_image(batch, ice, shs, info, i);
4977
      push_bt_entry(addr);
4978
   }
4979

4980
   foreach_surface_used(i, IRIS_SURFACE_GROUP_UBO) {
4981
      uint32_t addr = use_ubo_ssbo(batch, ice, &shs->constbuf[i],
4982
                                   &shs->constbuf_surf_state[i], false,
4983
                                   IRIS_DOMAIN_OTHER_READ);
4984
      push_bt_entry(addr);
4985
   }
4986

4987
   foreach_surface_used(i, IRIS_SURFACE_GROUP_SSBO) {
4988
      uint32_t addr =
4989
         use_ubo_ssbo(batch, ice, &shs->ssbo[i], &shs->ssbo_surf_state[i],
4990
                      shs->writable_ssbos & (1u << i), IRIS_DOMAIN_NONE);
4991
      push_bt_entry(addr);
4992
   }
4993

4994
#if 0
4995
      /* XXX: YUV surfaces not implemented yet */
4996
      bt_assert(plane_start[1], ...);
4997
      bt_assert(plane_start[2], ...);
4998
#endif
4999
}
5000

5001
static void
5002
iris_use_optional_res(struct iris_batch *batch,
5003
                      struct pipe_resource *res,
5004
                      bool writeable,
5005
                      enum iris_domain access)
5006
{
5007
   if (res) {
5008
      struct iris_bo *bo = iris_resource_bo(res);
5009
      iris_use_pinned_bo(batch, bo, writeable, access);
5010
   }
5011
}
5012

5013
static void
5014
pin_depth_and_stencil_buffers(struct iris_batch *batch,
5015
                              struct pipe_surface *zsbuf,
5016
                              struct iris_depth_stencil_alpha_state *cso_zsa)
5017
{
5018
   if (!zsbuf)
5019
      return;
5020

5021
   struct iris_resource *zres, *sres;
5022
   iris_get_depth_stencil_resources(zsbuf->texture, &zres, &sres);
5023

5024
   if (zres) {
5025
      const enum iris_domain access = cso_zsa->depth_writes_enabled ?
5026
         IRIS_DOMAIN_DEPTH_WRITE : IRIS_DOMAIN_OTHER_READ;
5027
      iris_use_pinned_bo(batch, zres->bo, cso_zsa->depth_writes_enabled,
5028
                         access);
5029
      if (zres->aux.bo) {
5030
         iris_use_pinned_bo(batch, zres->aux.bo,
5031
                            cso_zsa->depth_writes_enabled, access);
5032
      }
5033
   }
5034

5035
   if (sres) {
5036
      const enum iris_domain access = cso_zsa->stencil_writes_enabled ?
5037
         IRIS_DOMAIN_DEPTH_WRITE : IRIS_DOMAIN_OTHER_READ;
5038
      iris_use_pinned_bo(batch, sres->bo, cso_zsa->stencil_writes_enabled,
5039
                         access);
5040
   }
5041
}
5042

5043
static uint32_t
5044
pin_scratch_space(struct iris_context *ice,
5045
                  struct iris_batch *batch,
5046
                  const struct brw_stage_prog_data *prog_data,
5047
                  gl_shader_stage stage)
5048
{
5049
   uint32_t scratch_addr = 0;
5050

5051
   if (prog_data->total_scratch > 0) {
5052
      struct iris_bo *scratch_bo =
5053
         iris_get_scratch_space(ice, prog_data->total_scratch, stage);
5054
      iris_use_pinned_bo(batch, scratch_bo, true, IRIS_DOMAIN_NONE);
5055

5056
#if GFX_VERx10 >= 125
5057
      const struct iris_state_ref *ref =
5058
         iris_get_scratch_surf(ice, prog_data->total_scratch);
5059
      iris_use_pinned_bo(batch, iris_resource_bo(ref->res),
5060
                         false, IRIS_DOMAIN_NONE);
5061
      scratch_addr = ref->offset +
5062
                     iris_resource_bo(ref->res)->gtt_offset -
5063
                     IRIS_MEMZONE_BINDLESS_START;
5064
      assert((scratch_addr & 0x3f) == 0 && scratch_addr < (1 << 26));
5065
#else
5066
      scratch_addr = scratch_bo->gtt_offset;
5067
#endif
5068
   }
5069

5070
   return scratch_addr;
5071
}
5072

5073
/* ------------------------------------------------------------------- */
5074

5075
/**
5076
 * Pin any BOs which were installed by a previous batch, and restored
5077
 * via the hardware logical context mechanism.
5078
 *
5079
 * We don't need to re-emit all state every batch - the hardware context
5080
 * mechanism will save and restore it for us.  This includes pointers to
5081
 * various BOs...which won't exist unless we ask the kernel to pin them
5082
 * by adding them to the validation list.
5083
 *
5084
 * We can skip buffers if we've re-emitted those packets, as we're
5085
 * overwriting those stale pointers with new ones, and don't actually
5086
 * refer to the old BOs.
5087
 */
5088
static void
5089
iris_restore_render_saved_bos(struct iris_context *ice,
5090
                              struct iris_batch *batch,
5091
                              const struct pipe_draw_info *draw)
5092
{
5093
   struct iris_genx_state *genx = ice->state.genx;
5094

5095
   const uint64_t clean = ~ice->state.dirty;
5096
   const uint64_t stage_clean = ~ice->state.stage_dirty;
5097

5098
   if (clean & IRIS_DIRTY_CC_VIEWPORT) {
5099
      iris_use_optional_res(batch, ice->state.last_res.cc_vp, false,
5100
                            IRIS_DOMAIN_NONE);
5101
   }
5102

5103
   if (clean & IRIS_DIRTY_SF_CL_VIEWPORT) {
5104
      iris_use_optional_res(batch, ice->state.last_res.sf_cl_vp, false,
5105
                            IRIS_DOMAIN_NONE);
5106
   }
5107

5108
   if (clean & IRIS_DIRTY_BLEND_STATE) {
5109
      iris_use_optional_res(batch, ice->state.last_res.blend, false,
5110
                            IRIS_DOMAIN_NONE);
5111
   }
5112

5113
   if (clean & IRIS_DIRTY_COLOR_CALC_STATE) {
5114
      iris_use_optional_res(batch, ice->state.last_res.color_calc, false,
5115
                            IRIS_DOMAIN_NONE);
5116
   }
5117

5118
   if (clean & IRIS_DIRTY_SCISSOR_RECT) {
5119
      iris_use_optional_res(batch, ice->state.last_res.scissor, false,
5120
                            IRIS_DOMAIN_NONE);
5121
   }
5122

5123
   if (ice->state.streamout_active && (clean & IRIS_DIRTY_SO_BUFFERS)) {
5124
      for (int i = 0; i < 4; i++) {
5125
         struct iris_stream_output_target *tgt =
5126
            (void *) ice->state.so_target[i];
5127
         if (tgt) {
5128
            iris_use_pinned_bo(batch, iris_resource_bo(tgt->base.buffer),
5129
                               true, IRIS_DOMAIN_OTHER_WRITE);
5130
            iris_use_pinned_bo(batch, iris_resource_bo(tgt->offset.res),
5131
                               true, IRIS_DOMAIN_OTHER_WRITE);
5132
         }
5133
      }
5134
   }
5135

5136
   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
5137
      if (!(stage_clean & (IRIS_STAGE_DIRTY_CONSTANTS_VS << stage)))
5138
         continue;
5139

5140
      struct iris_shader_state *shs = &ice->state.shaders[stage];
5141
      struct iris_compiled_shader *shader = ice->shaders.prog[stage];
5142

5143
      if (!shader)
5144
         continue;
5145

5146
      struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
5147

5148
      for (int i = 0; i < 4; i++) {
5149
         const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
5150

5151
         if (range->length == 0)
5152
            continue;
5153

5154
         /* Range block is a binding table index, map back to UBO index. */
5155
         unsigned block_index = iris_bti_to_group_index(
5156
            &shader->bt, IRIS_SURFACE_GROUP_UBO, range->block);
5157
         assert(block_index != IRIS_SURFACE_NOT_USED);
5158

5159
         struct pipe_shader_buffer *cbuf = &shs->constbuf[block_index];
5160
         struct iris_resource *res = (void *) cbuf->buffer;
5161

5162
         if (res)
5163
            iris_use_pinned_bo(batch, res->bo, false, IRIS_DOMAIN_OTHER_READ);
5164
         else
5165
            iris_use_pinned_bo(batch, batch->screen->workaround_bo, false,
5166
                               IRIS_DOMAIN_OTHER_READ);
5167
      }
5168
   }
5169

5170
   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
5171
      if (stage_clean & (IRIS_STAGE_DIRTY_BINDINGS_VS << stage)) {
5172
         /* Re-pin any buffers referred to by the binding table. */
5173
         iris_populate_binding_table(ice, batch, stage, true);
5174
      }
5175
   }
5176

5177
   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
5178
      struct iris_shader_state *shs = &ice->state.shaders[stage];
5179
      struct pipe_resource *res = shs->sampler_table.res;
5180
      if (res)
5181
         iris_use_pinned_bo(batch, iris_resource_bo(res), false,
5182
                            IRIS_DOMAIN_NONE);
5183
   }
5184

5185
   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
5186
      if (stage_clean & (IRIS_STAGE_DIRTY_VS << stage)) {
5187
         struct iris_compiled_shader *shader = ice->shaders.prog[stage];
5188

5189
         if (shader) {
5190
            struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
5191
            iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
5192

5193
            pin_scratch_space(ice, batch, shader->prog_data, stage);
5194
         }
5195
      }
5196
   }
5197

5198
   if ((clean & IRIS_DIRTY_DEPTH_BUFFER) &&
5199
       (clean & IRIS_DIRTY_WM_DEPTH_STENCIL)) {
5200
      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5201
      pin_depth_and_stencil_buffers(batch, cso_fb->zsbuf, ice->state.cso_zsa);
5202
   }
5203

5204
   iris_use_optional_res(batch, ice->state.last_res.index_buffer, false,
5205
                         IRIS_DOMAIN_OTHER_READ);
5206

5207
   if (clean & IRIS_DIRTY_VERTEX_BUFFERS) {
5208
      uint64_t bound = ice->state.bound_vertex_buffers;
5209
      while (bound) {
5210
         const int i = u_bit_scan64(&bound);
5211
         struct pipe_resource *res = genx->vertex_buffers[i].resource;
5212
         iris_use_pinned_bo(batch, iris_resource_bo(res), false,
5213
                            IRIS_DOMAIN_OTHER_READ);
5214
      }
5215
   }
5216
}
5217

5218
static void
5219
iris_restore_compute_saved_bos(struct iris_context *ice,
5220
                               struct iris_batch *batch,
5221
                               const struct pipe_grid_info *grid)
5222
{
5223
   const uint64_t stage_clean = ~ice->state.stage_dirty;
5224

5225
   const int stage = MESA_SHADER_COMPUTE;
5226
   struct iris_shader_state *shs = &ice->state.shaders[stage];
5227

5228
   if (stage_clean & IRIS_STAGE_DIRTY_BINDINGS_CS) {
5229
      /* Re-pin any buffers referred to by the binding table. */
5230
      iris_populate_binding_table(ice, batch, stage, true);
5231
   }
5232

5233
   struct pipe_resource *sampler_res = shs->sampler_table.res;
5234
   if (sampler_res)
5235
      iris_use_pinned_bo(batch, iris_resource_bo(sampler_res), false,
5236
                         IRIS_DOMAIN_NONE);
5237

5238
   if ((stage_clean & IRIS_STAGE_DIRTY_SAMPLER_STATES_CS) &&
5239
       (stage_clean & IRIS_STAGE_DIRTY_BINDINGS_CS) &&
5240
       (stage_clean & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
5241
       (stage_clean & IRIS_STAGE_DIRTY_CS)) {
5242
      iris_use_optional_res(batch, ice->state.last_res.cs_desc, false,
5243
                            IRIS_DOMAIN_NONE);
5244
   }
5245

5246
   if (stage_clean & IRIS_STAGE_DIRTY_CS) {
5247
      struct iris_compiled_shader *shader = ice->shaders.prog[stage];
5248

5249
      if (shader) {
5250
         struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
5251
         iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
5252

5253
         if (GFX_VERx10 < 125) {
5254
            struct iris_bo *curbe_bo =
5255
               iris_resource_bo(ice->state.last_res.cs_thread_ids);
5256
            iris_use_pinned_bo(batch, curbe_bo, false, IRIS_DOMAIN_NONE);
5257
         }
5258

5259
         pin_scratch_space(ice, batch, shader->prog_data, stage);
5260
      }
5261
   }
5262
}
5263

5264
/**
5265
 * Possibly emit STATE_BASE_ADDRESS to update Surface State Base Address.
5266
 */
5267
static void
5268
iris_update_surface_base_address(struct iris_batch *batch,
5269
                                 struct iris_binder *binder)
5270
{
5271
   if (batch->last_surface_base_address == binder->bo->gtt_offset)
5272
      return;
5273

5274
   struct isl_device *isl_dev = &batch->screen->isl_dev;
5275
   uint32_t mocs = isl_mocs(isl_dev, 0, false);
5276

5277
   iris_batch_sync_region_start(batch);
5278

5279
   flush_before_state_base_change(batch);
5280

5281
#if GFX_VER == 12
5282
   /* Wa_1607854226:
5283
    *
5284
    *  Workaround the non pipelined state not applying in MEDIA/GPGPU pipeline
5285
    *  mode by putting the pipeline temporarily in 3D mode..
5286
    */
5287
   if (batch->name == IRIS_BATCH_COMPUTE)
5288
      emit_pipeline_select(batch, _3D);
5289
#endif
5290

5291
   iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
5292
      sba.SurfaceStateBaseAddressModifyEnable = true;
5293
      sba.SurfaceStateBaseAddress = ro_bo(binder->bo, 0);
5294

5295
      /* The hardware appears to pay attention to the MOCS fields even
5296
       * if you don't set the "Address Modify Enable" bit for the base.
5297
       */
5298
      sba.GeneralStateMOCS            = mocs;
5299
      sba.StatelessDataPortAccessMOCS = mocs;
5300
      sba.DynamicStateMOCS            = mocs;
5301
      sba.IndirectObjectMOCS          = mocs;
5302
      sba.InstructionMOCS             = mocs;
5303
      sba.SurfaceStateMOCS            = mocs;
5304
#if GFX_VER >= 9
5305
      sba.BindlessSurfaceStateMOCS    = mocs;
5306
#endif
5307
   }
5308

5309
#if GFX_VER == 12
5310
   /* Wa_1607854226:
5311
    *
5312
    *  Put the pipeline back into compute mode.
5313
    */
5314
   if (batch->name == IRIS_BATCH_COMPUTE)
5315
      emit_pipeline_select(batch, GPGPU);
5316
#endif
5317

5318
   flush_after_state_base_change(batch);
5319
   iris_batch_sync_region_end(batch);
5320

5321
   batch->last_surface_base_address = binder->bo->gtt_offset;
5322
}
5323

5324
static inline void
5325
iris_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
5326
                        bool window_space_position, float *zmin, float *zmax)
5327
{
5328
   if (window_space_position) {
5329
      *zmin = 0.f;
5330
      *zmax = 1.f;
5331
      return;
5332
   }
5333
   util_viewport_zmin_zmax(vp, halfz, zmin, zmax);
5334
}
5335

5336
#if GFX_VER >= 12
5337
void
5338
genX(invalidate_aux_map_state)(struct iris_batch *batch)
5339
{
5340
   struct iris_screen *screen = batch->screen;
5341
   void *aux_map_ctx = iris_bufmgr_get_aux_map_context(screen->bufmgr);
5342
   if (!aux_map_ctx)
5343
      return;
5344
   uint32_t aux_map_state_num = intel_aux_map_get_state_num(aux_map_ctx);
5345
   if (batch->last_aux_map_state != aux_map_state_num) {
5346
      /* HSD 1209978178: docs say that before programming the aux table:
5347
       *
5348
       *    "Driver must ensure that the engine is IDLE but ensure it doesn't
5349
       *    add extra flushes in the case it knows that the engine is already
5350
       *    IDLE."
5351
       *
5352
       * An end of pipe sync is needed here, otherwise we see GPU hangs in
5353
       * dEQP-GLES31.functional.copy_image.* tests.
5354
       */
5355
      iris_emit_end_of_pipe_sync(batch, "Invalidate aux map table",
5356
                                 PIPE_CONTROL_CS_STALL);
5357

5358
      /* If the aux-map state number increased, then we need to rewrite the
5359
       * register. Rewriting the register is used to both set the aux-map
5360
       * translation table address, and also to invalidate any previously
5361
       * cached translations.
5362
       */
5363
      iris_load_register_imm32(batch, GENX(GFX_CCS_AUX_INV_num), 1);
5364
      batch->last_aux_map_state = aux_map_state_num;
5365
   }
5366
}
5367

5368
static void
5369
init_aux_map_state(struct iris_batch *batch)
5370
{
5371
   struct iris_screen *screen = batch->screen;
5372
   void *aux_map_ctx = iris_bufmgr_get_aux_map_context(screen->bufmgr);
5373
   if (!aux_map_ctx)
5374
      return;
5375

5376
   uint64_t base_addr = intel_aux_map_get_base(aux_map_ctx);
5377
   assert(base_addr != 0 && align64(base_addr, 32 * 1024) == base_addr);
5378
   iris_load_register_imm64(batch, GENX(GFX_AUX_TABLE_BASE_ADDR_num),
5379
                            base_addr);
5380
}
5381
#endif
5382

5383
struct push_bos {
5384
   struct {
5385
      struct iris_address addr;
5386
      uint32_t length;
5387
   } buffers[4];
5388
   int buffer_count;
5389
   uint32_t max_length;
5390
};
5391

5392
static void
5393
setup_constant_buffers(struct iris_context *ice,
5394
                       struct iris_batch *batch,
5395
                       int stage,
5396
                       struct push_bos *push_bos)
5397
{
5398
   struct iris_shader_state *shs = &ice->state.shaders[stage];
5399
   struct iris_compiled_shader *shader = ice->shaders.prog[stage];
5400
   struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
5401

5402
   uint32_t push_range_sum = 0;
5403

5404
   int n = 0;
5405
   for (int i = 0; i < 4; i++) {
5406
      const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
5407

5408
      if (range->length == 0)
5409
         continue;
5410

5411
      push_range_sum += range->length;
5412

5413
      if (range->length > push_bos->max_length)
5414
         push_bos->max_length = range->length;
5415

5416
      /* Range block is a binding table index, map back to UBO index. */
5417
      unsigned block_index = iris_bti_to_group_index(
5418
         &shader->bt, IRIS_SURFACE_GROUP_UBO, range->block);
5419
      assert(block_index != IRIS_SURFACE_NOT_USED);
5420

5421
      struct pipe_shader_buffer *cbuf = &shs->constbuf[block_index];
5422
      struct iris_resource *res = (void *) cbuf->buffer;
5423

5424
      assert(cbuf->buffer_offset % 32 == 0);
5425

5426
      push_bos->buffers[n].length = range->length;
5427
      push_bos->buffers[n].addr =
5428
         res ? ro_bo(res->bo, range->start * 32 + cbuf->buffer_offset)
5429
         : batch->screen->workaround_address;
5430
      n++;
5431
   }
5432

5433
   /* From the 3DSTATE_CONSTANT_XS and 3DSTATE_CONSTANT_ALL programming notes:
5434
    *
5435
    *    "The sum of all four read length fields must be less than or
5436
    *    equal to the size of 64."
5437
    */
5438
   assert(push_range_sum <= 64);
5439

5440
   push_bos->buffer_count = n;
5441
}
5442

5443
static void
5444
emit_push_constant_packets(struct iris_context *ice,
5445
                           struct iris_batch *batch,
5446
                           int stage,
5447
                           const struct push_bos *push_bos)
5448
{
5449
   UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5450
   struct iris_compiled_shader *shader = ice->shaders.prog[stage];
5451
   struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
5452

5453
   iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) {
5454
      pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
5455
#if GFX_VER >= 12
5456
      pkt.MOCS = isl_mocs(isl_dev, 0, false);
5457
#endif
5458
      if (prog_data) {
5459
         /* The Skylake PRM contains the following restriction:
5460
          *
5461
          *    "The driver must ensure The following case does not occur
5462
          *     without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
5463
          *     buffer 3 read length equal to zero committed followed by a
5464
          *     3DSTATE_CONSTANT_* with buffer 0 read length not equal to
5465
          *     zero committed."
5466
          *
5467
          * To avoid this, we program the buffers in the highest slots.
5468
          * This way, slot 0 is only used if slot 3 is also used.
5469
          */
5470
         int n = push_bos->buffer_count;
5471
         assert(n <= 4);
5472
         const unsigned shift = 4 - n;
5473
         for (int i = 0; i < n; i++) {
5474
            pkt.ConstantBody.ReadLength[i + shift] =
5475
               push_bos->buffers[i].length;
5476
            pkt.ConstantBody.Buffer[i + shift] = push_bos->buffers[i].addr;
5477
         }
5478
      }
5479
   }
5480
}
5481

5482
#if GFX_VER >= 12
5483
static void
5484
emit_push_constant_packet_all(struct iris_context *ice,
5485
                              struct iris_batch *batch,
5486
                              uint32_t shader_mask,
5487
                              const struct push_bos *push_bos)
5488
{
5489
   struct isl_device *isl_dev = &batch->screen->isl_dev;
5490

5491
   if (!push_bos) {
5492
      iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), pc) {
5493
         pc.ShaderUpdateEnable = shader_mask;
5494
      }
5495
      return;
5496
   }
5497

5498
   const uint32_t n = push_bos->buffer_count;
5499
   const uint32_t max_pointers = 4;
5500
   const uint32_t num_dwords = 2 + 2 * n;
5501
   uint32_t const_all[2 + 2 * max_pointers];
5502
   uint32_t *dw = &const_all[0];
5503

5504
   assert(n <= max_pointers);
5505
   iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), dw, all) {
5506
      all.DWordLength = num_dwords - 2;
5507
      all.MOCS = isl_mocs(isl_dev, 0, false);
5508
      all.ShaderUpdateEnable = shader_mask;
5509
      all.PointerBufferMask = (1 << n) - 1;
5510
   }
5511
   dw += 2;
5512

5513
   for (int i = 0; i < n; i++) {
5514
      _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
5515
                       dw + i * 2, data) {
5516
         data.PointerToConstantBuffer = push_bos->buffers[i].addr;
5517
         data.ConstantBufferReadLength = push_bos->buffers[i].length;
5518
      }
5519
   }
5520
   iris_batch_emit(batch, const_all, sizeof(uint32_t) * num_dwords);
5521
}
5522
#endif
5523

5524
static void
5525
iris_upload_dirty_render_state(struct iris_context *ice,
5526
                               struct iris_batch *batch,
5527
                               const struct pipe_draw_info *draw)
5528
{
5529
   const uint64_t dirty = ice->state.dirty;
5530
   const uint64_t stage_dirty = ice->state.stage_dirty;
5531

5532
   if (!(dirty & IRIS_ALL_DIRTY_FOR_RENDER) &&
5533
       !(stage_dirty & IRIS_ALL_STAGE_DIRTY_FOR_RENDER))
5534
      return;
5535

5536
   struct iris_genx_state *genx = ice->state.genx;
5537
   struct iris_binder *binder = &ice->state.binder;
5538
   struct brw_wm_prog_data *wm_prog_data = (void *)
5539
      ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
5540

5541
   if (dirty & IRIS_DIRTY_CC_VIEWPORT) {
5542
      const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
5543
      uint32_t cc_vp_address;
5544

5545
      /* XXX: could avoid streaming for depth_clip [0,1] case. */
5546
      uint32_t *cc_vp_map =
5547
         stream_state(batch, ice->state.dynamic_uploader,
5548
                      &ice->state.last_res.cc_vp,
5549
                      4 * ice->state.num_viewports *
5550
                      GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
5551
      for (int i = 0; i < ice->state.num_viewports; i++) {
5552
         float zmin, zmax;
5553
         iris_viewport_zmin_zmax(&ice->state.viewports[i], cso_rast->clip_halfz,
5554
                                 ice->state.window_space_position,
5555
                                 &zmin, &zmax);
5556
         if (cso_rast->depth_clip_near)
5557
            zmin = 0.0;
5558
         if (cso_rast->depth_clip_far)
5559
            zmax = 1.0;
5560

5561
         iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
5562
            ccv.MinimumDepth = zmin;
5563
            ccv.MaximumDepth = zmax;
5564
         }
5565

5566
         cc_vp_map += GENX(CC_VIEWPORT_length);
5567
      }
5568

5569
      iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
5570
         ptr.CCViewportPointer = cc_vp_address;
5571
      }
5572
   }
5573

5574
   if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) {
5575
      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5576
      uint32_t sf_cl_vp_address;
5577
      uint32_t *vp_map =
5578
         stream_state(batch, ice->state.dynamic_uploader,
5579
                      &ice->state.last_res.sf_cl_vp,
5580
                      4 * ice->state.num_viewports *
5581
                      GENX(SF_CLIP_VIEWPORT_length), 64, &sf_cl_vp_address);
5582

5583
      for (unsigned i = 0; i < ice->state.num_viewports; i++) {
5584
         const struct pipe_viewport_state *state = &ice->state.viewports[i];
5585
         float gb_xmin, gb_xmax, gb_ymin, gb_ymax;
5586

5587
         float vp_xmin = viewport_extent(state, 0, -1.0f);
5588
         float vp_xmax = viewport_extent(state, 0,  1.0f);
5589
         float vp_ymin = viewport_extent(state, 1, -1.0f);
5590
         float vp_ymax = viewport_extent(state, 1,  1.0f);
5591

5592
         intel_calculate_guardband_size(cso_fb->width, cso_fb->height,
5593
                                        state->scale[0], state->scale[1],
5594
                                        state->translate[0], state->translate[1],
5595
                                        &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax);
5596

5597
         iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) {
5598
            vp.ViewportMatrixElementm00 = state->scale[0];
5599
            vp.ViewportMatrixElementm11 = state->scale[1];
5600
            vp.ViewportMatrixElementm22 = state->scale[2];
5601
            vp.ViewportMatrixElementm30 = state->translate[0];
5602
            vp.ViewportMatrixElementm31 = state->translate[1];
5603
            vp.ViewportMatrixElementm32 = state->translate[2];
5604
            vp.XMinClipGuardband = gb_xmin;
5605
            vp.XMaxClipGuardband = gb_xmax;
5606
            vp.YMinClipGuardband = gb_ymin;
5607
            vp.YMaxClipGuardband = gb_ymax;
5608
            vp.XMinViewPort = MAX2(vp_xmin, 0);
5609
            vp.XMaxViewPort = MIN2(vp_xmax, cso_fb->width) - 1;
5610
            vp.YMinViewPort = MAX2(vp_ymin, 0);
5611
            vp.YMaxViewPort = MIN2(vp_ymax, cso_fb->height) - 1;
5612
         }
5613

5614
         vp_map += GENX(SF_CLIP_VIEWPORT_length);
5615
      }
5616

5617
      iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
5618
         ptr.SFClipViewportPointer = sf_cl_vp_address;
5619
      }
5620
   }
5621

5622
   if (dirty & IRIS_DIRTY_URB) {
5623
      for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
5624
         if (!ice->shaders.prog[i]) {
5625
            ice->shaders.urb.size[i] = 1;
5626
         } else {
5627
            struct brw_vue_prog_data *vue_prog_data =
5628
               (void *) ice->shaders.prog[i]->prog_data;
5629
            ice->shaders.urb.size[i] = vue_prog_data->urb_entry_size;
5630
         }
5631
         assert(ice->shaders.urb.size[i] != 0);
5632
      }
5633

5634
      intel_get_urb_config(&batch->screen->devinfo,
5635
                           batch->screen->l3_config_3d,
5636
                           ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL,
5637
                           ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL,
5638
                           ice->shaders.urb.size,
5639
                           ice->shaders.urb.entries,
5640
                           ice->shaders.urb.start,
5641
                           &ice->state.urb_deref_block_size,
5642
                           &ice->shaders.urb.constrained);
5643

5644
      for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
5645
         iris_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {
5646
            urb._3DCommandSubOpcode += i;
5647
            urb.VSURBStartingAddress     = ice->shaders.urb.start[i];
5648
            urb.VSURBEntryAllocationSize = ice->shaders.urb.size[i] - 1;
5649
            urb.VSNumberofURBEntries     = ice->shaders.urb.entries[i];
5650
         }
5651
      }
5652
   }
5653

5654
   if (dirty & IRIS_DIRTY_BLEND_STATE) {
5655
      struct iris_blend_state *cso_blend = ice->state.cso_blend;
5656
      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5657
      struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
5658
      const int header_dwords = GENX(BLEND_STATE_length);
5659

5660
      /* Always write at least one BLEND_STATE - the final RT message will
5661
       * reference BLEND_STATE[0] even if there aren't color writes.  There
5662
       * may still be alpha testing, computed depth, and so on.
5663
       */
5664
      const int rt_dwords =
5665
         MAX2(cso_fb->nr_cbufs, 1) * GENX(BLEND_STATE_ENTRY_length);
5666

5667
      uint32_t blend_offset;
5668
      uint32_t *blend_map =
5669
         stream_state(batch, ice->state.dynamic_uploader,
5670
                      &ice->state.last_res.blend,
5671
                      4 * (header_dwords + rt_dwords), 64, &blend_offset);
5672

5673
      uint32_t blend_state_header;
5674
      iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) {
5675
         bs.AlphaTestEnable = cso_zsa->alpha_enabled;
5676
         bs.AlphaTestFunction = translate_compare_func(cso_zsa->alpha_func);
5677
      }
5678

5679
      blend_map[0] = blend_state_header | cso_blend->blend_state[0];
5680
      memcpy(&blend_map[1], &cso_blend->blend_state[1], 4 * rt_dwords);
5681

5682
      iris_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {
5683
         ptr.BlendStatePointer = blend_offset;
5684
         ptr.BlendStatePointerValid = true;
5685
      }
5686
   }
5687

5688
   if (dirty & IRIS_DIRTY_COLOR_CALC_STATE) {
5689
      struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
5690
#if GFX_VER == 8
5691
      struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
5692
#endif
5693
      uint32_t cc_offset;
5694
      void *cc_map =
5695
         stream_state(batch, ice->state.dynamic_uploader,
5696
                      &ice->state.last_res.color_calc,
5697
                      sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
5698
                      64, &cc_offset);
5699
      iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) {
5700
         cc.AlphaTestFormat = ALPHATEST_FLOAT32;
5701
         cc.AlphaReferenceValueAsFLOAT32 = cso->alpha_ref_value;
5702
         cc.BlendConstantColorRed   = ice->state.blend_color.color[0];
5703
         cc.BlendConstantColorGreen = ice->state.blend_color.color[1];
5704
         cc.BlendConstantColorBlue  = ice->state.blend_color.color[2];
5705
         cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];
5706
#if GFX_VER == 8
5707
	 cc.StencilReferenceValue = p_stencil_refs->ref_value[0];
5708
	 cc.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];
5709
#endif
5710
      }
5711
      iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
5712
         ptr.ColorCalcStatePointer = cc_offset;
5713
         ptr.ColorCalcStatePointerValid = true;
5714
      }
5715
   }
5716

5717
   /* Wa_1604061319
5718
    *
5719
    *    3DSTATE_CONSTANT_* needs to be programmed before BTP_*
5720
    *
5721
    * Testing shows that all the 3DSTATE_CONSTANT_XS need to be emitted if
5722
    * any stage has a dirty binding table.
5723
    */
5724
   const bool emit_const_wa = GFX_VER >= 11 &&
5725
      ((dirty & IRIS_DIRTY_RENDER_BUFFER) ||
5726
       (stage_dirty & IRIS_ALL_STAGE_DIRTY_BINDINGS_FOR_RENDER));
5727

5728
#if GFX_VER >= 12
5729
   uint32_t nobuffer_stages = 0;
5730
#endif
5731

5732
   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
5733
      if (!(stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << stage)) &&
5734
          !emit_const_wa)
5735
         continue;
5736

5737
      struct iris_shader_state *shs = &ice->state.shaders[stage];
5738
      struct iris_compiled_shader *shader = ice->shaders.prog[stage];
5739

5740
      if (!shader)
5741
         continue;
5742

5743
      if (shs->sysvals_need_upload)
5744
         upload_sysvals(ice, stage, NULL);
5745

5746
      struct push_bos push_bos = {};
5747
      setup_constant_buffers(ice, batch, stage, &push_bos);
5748

5749
#if GFX_VER >= 12
5750
      /* If this stage doesn't have any push constants, emit it later in a
5751
       * single CONSTANT_ALL packet with all the other stages.
5752
       */
5753
      if (push_bos.buffer_count == 0) {
5754
         nobuffer_stages |= 1 << stage;
5755
         continue;
5756
      }
5757

5758
      /* The Constant Buffer Read Length field from 3DSTATE_CONSTANT_ALL
5759
       * contains only 5 bits, so we can only use it for buffers smaller than
5760
       * 32.
5761
       */
5762
      if (push_bos.max_length < 32) {
5763
         emit_push_constant_packet_all(ice, batch, 1 << stage, &push_bos);
5764
         continue;
5765
      }
5766
#endif
5767
      emit_push_constant_packets(ice, batch, stage, &push_bos);
5768
   }
5769

5770
#if GFX_VER >= 12
5771
   if (nobuffer_stages)
5772
      emit_push_constant_packet_all(ice, batch, nobuffer_stages, NULL);
5773
#endif
5774

5775
   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
5776
      /* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted
5777
       * in order to commit constants.  TODO: Investigate "Disable Gather
5778
       * at Set Shader" to go back to legacy mode...
5779
       */
5780
      if (stage_dirty & ((IRIS_STAGE_DIRTY_BINDINGS_VS |
5781
                          (GFX_VER == 9 ? IRIS_STAGE_DIRTY_CONSTANTS_VS : 0))
5782
                            << stage)) {
5783
         iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) {
5784
            ptr._3DCommandSubOpcode = 38 + stage;
5785
            ptr.PointertoVSBindingTable = binder->bt_offset[stage];
5786
         }
5787
      }
5788
   }
5789

5790
   if (GFX_VER >= 11 && (dirty & IRIS_DIRTY_RENDER_BUFFER)) {
5791
      // XXX: we may want to flag IRIS_DIRTY_MULTISAMPLE (or SAMPLE_MASK?)
5792
      // XXX: see commit 979fc1bc9bcc64027ff2cfafd285676f31b930a6
5793

5794
      /* The PIPE_CONTROL command description says:
5795
       *
5796
       *   "Whenever a Binding Table Index (BTI) used by a Render Target
5797
       *    Message points to a different RENDER_SURFACE_STATE, SW must issue a
5798
       *    Render Target Cache Flush by enabling this bit. When render target
5799
       *    flush is set due to new association of BTI, PS Scoreboard Stall bit
5800
       *    must be set in this packet."
5801
       */
5802
      // XXX: does this need to happen at 3DSTATE_BTP_PS time?
5803
      iris_emit_pipe_control_flush(batch, "workaround: RT BTI change [draw]",
5804
                                   PIPE_CONTROL_RENDER_TARGET_FLUSH |
5805
                                   PIPE_CONTROL_STALL_AT_SCOREBOARD);
5806
   }
5807

5808
   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
5809
      if (stage_dirty & (IRIS_STAGE_DIRTY_BINDINGS_VS << stage)) {
5810
         iris_populate_binding_table(ice, batch, stage, false);
5811
      }
5812
   }
5813

5814
   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
5815
      if (!(stage_dirty & (IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage)) ||
5816
          !ice->shaders.prog[stage])
5817
         continue;
5818

5819
      iris_upload_sampler_states(ice, stage);
5820

5821
      struct iris_shader_state *shs = &ice->state.shaders[stage];
5822
      struct pipe_resource *res = shs->sampler_table.res;
5823
      if (res)
5824
         iris_use_pinned_bo(batch, iris_resource_bo(res), false,
5825
                            IRIS_DOMAIN_NONE);
5826

5827
      iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
5828
         ptr._3DCommandSubOpcode = 43 + stage;
5829
         ptr.PointertoVSSamplerState = shs->sampler_table.offset;
5830
      }
5831
   }
5832

5833
   if (ice->state.need_border_colors)
5834
      iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false,
5835
                         IRIS_DOMAIN_NONE);
5836

5837
   if (dirty & IRIS_DIRTY_MULTISAMPLE) {
5838
      iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
5839
         ms.PixelLocation =
5840
            ice->state.cso_rast->half_pixel_center ? CENTER : UL_CORNER;
5841
         if (ice->state.framebuffer.samples > 0)
5842
            ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1;
5843
      }
5844
   }
5845

5846
   if (dirty & IRIS_DIRTY_SAMPLE_MASK) {
5847
      iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) {
5848
         ms.SampleMask = ice->state.sample_mask;
5849
      }
5850
   }
5851

5852
   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
5853
      if (!(stage_dirty & (IRIS_STAGE_DIRTY_VS << stage)))
5854
         continue;
5855

5856
      struct iris_compiled_shader *shader = ice->shaders.prog[stage];
5857

5858
      if (shader) {
5859
         struct brw_stage_prog_data *prog_data = shader->prog_data;
5860
         struct iris_resource *cache = (void *) shader->assembly.res;
5861
         iris_use_pinned_bo(batch, cache->bo, false, IRIS_DOMAIN_NONE);
5862

5863
         uint32_t scratch_addr =
5864
            pin_scratch_space(ice, batch, prog_data, stage);
5865

5866
         if (stage == MESA_SHADER_FRAGMENT) {
5867
            UNUSED struct iris_rasterizer_state *cso = ice->state.cso_rast;
5868
            struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5869

5870
            uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0};
5871
            _iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) {
5872
               ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
5873
               ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
5874
               ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
5875

5876
              /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say:
5877
               *
5878
               *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16,
5879
               *     SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
5880
               *     mode."
5881
               *
5882
               * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
5883
               */
5884
               if (GFX_VER >= 9 && cso_fb->samples == 16 &&
5885
                   !wm_prog_data->persample_dispatch) {
5886
                  assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
5887
                  ps._32PixelDispatchEnable = false;
5888
               }
5889

5890
               ps.DispatchGRFStartRegisterForConstantSetupData0 =
5891
                  brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
5892
               ps.DispatchGRFStartRegisterForConstantSetupData1 =
5893
                  brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
5894
               ps.DispatchGRFStartRegisterForConstantSetupData2 =
5895
                  brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
5896

5897
               ps.KernelStartPointer0 = KSP(shader) +
5898
                  brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
5899
               ps.KernelStartPointer1 = KSP(shader) +
5900
                  brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
5901
               ps.KernelStartPointer2 = KSP(shader) +
5902
                  brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
5903

5904
#if GFX_VERx10 >= 125
5905
               ps.ScratchSpaceBuffer = scratch_addr >> 4;
5906
#else
5907
               ps.ScratchSpaceBasePointer =
5908
                  rw_bo(NULL, scratch_addr, IRIS_DOMAIN_NONE);
5909
#endif
5910
            }
5911

5912
            uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0};
5913
            iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
5914
#if GFX_VER >= 9
5915
               if (!wm_prog_data->uses_sample_mask)
5916
                  psx.InputCoverageMaskState  = ICMS_NONE;
5917
               else if (wm_prog_data->post_depth_coverage)
5918
                  psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
5919
               else if (wm_prog_data->inner_coverage &&
5920
                        cso->conservative_rasterization)
5921
                  psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE;
5922
               else
5923
                  psx.InputCoverageMaskState = ICMS_NORMAL;
5924
#else
5925
               psx.PixelShaderUsesInputCoverageMask =
5926
                  wm_prog_data->uses_sample_mask;
5927
#endif
5928
            }
5929

5930
            uint32_t *shader_ps = (uint32_t *) shader->derived_data;
5931
            uint32_t *shader_psx = shader_ps + GENX(3DSTATE_PS_length);
5932
            iris_emit_merge(batch, shader_ps, ps_state,
5933
                            GENX(3DSTATE_PS_length));
5934
            iris_emit_merge(batch, shader_psx, psx_state,
5935
                            GENX(3DSTATE_PS_EXTRA_length));
5936
         } else if (scratch_addr) {
5937
            uint32_t *pkt = (uint32_t *) shader->derived_data;
5938
            switch (stage) {
5939
            case MESA_SHADER_VERTEX:    MERGE_SCRATCH_ADDR(3DSTATE_VS); break;
5940
            case MESA_SHADER_TESS_CTRL: MERGE_SCRATCH_ADDR(3DSTATE_HS); break;
5941
            case MESA_SHADER_TESS_EVAL: MERGE_SCRATCH_ADDR(3DSTATE_DS); break;
5942
            case MESA_SHADER_GEOMETRY:  MERGE_SCRATCH_ADDR(3DSTATE_GS); break;
5943
            }
5944
         } else {
5945
            iris_batch_emit(batch, shader->derived_data,
5946
                            iris_derived_program_state_size(stage));
5947
         }
5948
      } else {
5949
         if (stage == MESA_SHADER_TESS_EVAL) {
5950
            iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
5951
            iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
5952
            iris_emit_cmd(batch, GENX(3DSTATE_DS), ds);
5953
         } else if (stage == MESA_SHADER_GEOMETRY) {
5954
            iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
5955
         }
5956
      }
5957
   }
5958

5959
   if (ice->state.streamout_active) {
5960
      if (dirty & IRIS_DIRTY_SO_BUFFERS) {
5961
         for (int i = 0; i < 4; i++) {
5962
            struct iris_stream_output_target *tgt =
5963
               (void *) ice->state.so_target[i];
5964
            const uint32_t dwords = GENX(3DSTATE_SO_BUFFER_length);
5965
            uint32_t *so_buffers = genx->so_buffers + i * dwords;
5966
            bool zero_offset = false;
5967

5968
            if (tgt) {
5969
               zero_offset = tgt->zero_offset;
5970
               iris_use_pinned_bo(batch, iris_resource_bo(tgt->base.buffer),
5971
                                  true, IRIS_DOMAIN_OTHER_WRITE);
5972
               iris_use_pinned_bo(batch, iris_resource_bo(tgt->offset.res),
5973
                                  true, IRIS_DOMAIN_OTHER_WRITE);
5974
            }
5975

5976
            if (zero_offset) {
5977
               /* Skip the last DWord which contains "Stream Offset" of
5978
                * 0xFFFFFFFF and instead emit a dword of zero directly.
5979
                */
5980
               STATIC_ASSERT(GENX(3DSTATE_SO_BUFFER_StreamOffset_start) ==
5981
                             32 * (dwords - 1));
5982
               const uint32_t zero = 0;
5983
               iris_batch_emit(batch, so_buffers, 4 * (dwords - 1));
5984
               iris_batch_emit(batch, &zero, sizeof(zero));
5985
               tgt->zero_offset = false;
5986
            } else {
5987
               iris_batch_emit(batch, so_buffers, 4 * dwords);
5988
            }
5989
         }
5990
      }
5991

5992
      if ((dirty & IRIS_DIRTY_SO_DECL_LIST) && ice->state.streamout) {
5993
         uint32_t *decl_list =
5994
            ice->state.streamout + GENX(3DSTATE_STREAMOUT_length);
5995
         iris_batch_emit(batch, decl_list, 4 * ((decl_list[0] & 0xff) + 2));
5996
      }
5997

5998
      if (dirty & IRIS_DIRTY_STREAMOUT) {
5999
         const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
6000

6001
         uint32_t dynamic_sol[GENX(3DSTATE_STREAMOUT_length)];
6002
         iris_pack_command(GENX(3DSTATE_STREAMOUT), dynamic_sol, sol) {
6003
            sol.SOFunctionEnable = true;
6004
            sol.SOStatisticsEnable = true;
6005

6006
            sol.RenderingDisable = cso_rast->rasterizer_discard &&
6007
                                   !ice->state.prims_generated_query_active;
6008
            sol.ReorderMode = cso_rast->flatshade_first ? LEADING : TRAILING;
6009
         }
6010

6011
         assert(ice->state.streamout);
6012

6013
         iris_emit_merge(batch, ice->state.streamout, dynamic_sol,
6014
                         GENX(3DSTATE_STREAMOUT_length));
6015
      }
6016
   } else {
6017
      if (dirty & IRIS_DIRTY_STREAMOUT) {
6018
         iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), sol);
6019
      }
6020
   }
6021

6022
   if (dirty & IRIS_DIRTY_CLIP) {
6023
      struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
6024
      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
6025

6026
      bool gs_or_tes = ice->shaders.prog[MESA_SHADER_GEOMETRY] ||
6027
                       ice->shaders.prog[MESA_SHADER_TESS_EVAL];
6028
      bool points_or_lines = cso_rast->fill_mode_point_or_line ||
6029
         (gs_or_tes ? ice->shaders.output_topology_is_points_or_lines
6030
                    : ice->state.prim_is_points_or_lines);
6031

6032
      uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)];
6033
      iris_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) {
6034
         cl.StatisticsEnable = ice->state.statistics_counters_enabled;
6035
         if (cso_rast->rasterizer_discard)
6036
            cl.ClipMode = CLIPMODE_REJECT_ALL;
6037
         else if (ice->state.window_space_position)
6038
            cl.ClipMode = CLIPMODE_ACCEPT_ALL;
6039
         else
6040
            cl.ClipMode = CLIPMODE_NORMAL;
6041

6042
         cl.PerspectiveDivideDisable = ice->state.window_space_position;
6043
         cl.ViewportXYClipTestEnable = !points_or_lines;
6044

6045
         if (wm_prog_data->barycentric_interp_modes &
6046
             BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
6047
            cl.NonPerspectiveBarycentricEnable = true;
6048

6049
         cl.ForceZeroRTAIndexEnable = cso_fb->layers <= 1;
6050
         cl.MaximumVPIndex = ice->state.num_viewports - 1;
6051
      }
6052
      iris_emit_merge(batch, cso_rast->clip, dynamic_clip,
6053
                      ARRAY_SIZE(cso_rast->clip));
6054
   }
6055

6056
   if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_URB)) {
6057
      struct iris_rasterizer_state *cso = ice->state.cso_rast;
6058
      iris_batch_emit(batch, cso->raster, sizeof(cso->raster));
6059

6060
      uint32_t dynamic_sf[GENX(3DSTATE_SF_length)];
6061
      iris_pack_command(GENX(3DSTATE_SF), &dynamic_sf, sf) {
6062
         sf.ViewportTransformEnable = !ice->state.window_space_position;
6063

6064
#if GFX_VER >= 12
6065
         sf.DerefBlockSize = ice->state.urb_deref_block_size;
6066
#endif
6067
      }
6068
      iris_emit_merge(batch, cso->sf, dynamic_sf,
6069
                      ARRAY_SIZE(dynamic_sf));
6070
   }
6071

6072
   if (dirty & IRIS_DIRTY_WM) {
6073
      struct iris_rasterizer_state *cso = ice->state.cso_rast;
6074
      uint32_t dynamic_wm[GENX(3DSTATE_WM_length)];
6075

6076
      iris_pack_command(GENX(3DSTATE_WM), &dynamic_wm, wm) {
6077
         wm.StatisticsEnable = ice->state.statistics_counters_enabled;
6078

6079
         wm.BarycentricInterpolationMode =
6080
            wm_prog_data->barycentric_interp_modes;
6081

6082
         if (wm_prog_data->early_fragment_tests)
6083
            wm.EarlyDepthStencilControl = EDSC_PREPS;
6084
         else if (wm_prog_data->has_side_effects)
6085
            wm.EarlyDepthStencilControl = EDSC_PSEXEC;
6086

6087
         /* We could skip this bit if color writes are enabled. */
6088
         if (wm_prog_data->has_side_effects || wm_prog_data->uses_kill)
6089
            wm.ForceThreadDispatchEnable = ForceON;
6090
      }
6091
      iris_emit_merge(batch, cso->wm, dynamic_wm, ARRAY_SIZE(cso->wm));
6092
   }
6093

6094
   if (dirty & IRIS_DIRTY_SBE) {
6095
      iris_emit_sbe(batch, ice);
6096
   }
6097

6098
   if (dirty & IRIS_DIRTY_PS_BLEND) {
6099
      struct iris_blend_state *cso_blend = ice->state.cso_blend;
6100
      struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
6101
      const struct shader_info *fs_info =
6102
         iris_get_shader_info(ice, MESA_SHADER_FRAGMENT);
6103

6104
      uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)];
6105
      iris_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) {
6106
         pb.HasWriteableRT = has_writeable_rt(cso_blend, fs_info);
6107
         pb.AlphaTestEnable = cso_zsa->alpha_enabled;
6108

6109
         /* The dual source blending docs caution against using SRC1 factors
6110
          * when the shader doesn't use a dual source render target write.
6111
          * Empirically, this can lead to GPU hangs, and the results are
6112
          * undefined anyway, so simply disable blending to avoid the hang.
6113
          */
6114
         pb.ColorBufferBlendEnable = (cso_blend->blend_enables & 1) &&
6115
            (!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend);
6116
      }
6117

6118
      iris_emit_merge(batch, cso_blend->ps_blend, dynamic_pb,
6119
                      ARRAY_SIZE(cso_blend->ps_blend));
6120
   }
6121

6122
   if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) {
6123
      struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
6124
#if GFX_VER >= 9 && GFX_VER < 12
6125
      struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
6126
      uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
6127
      iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) {
6128
         wmds.StencilReferenceValue = p_stencil_refs->ref_value[0];
6129
         wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];
6130
      }
6131
      iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds));
6132
#else
6133
      /* Use modify disable fields which allow us to emit packets
6134
       * directly instead of merging them later.
6135
       */
6136
      iris_batch_emit(batch, cso->wmds, sizeof(cso->wmds));
6137
#endif
6138

6139
#if GFX_VER >= 12
6140
      iris_batch_emit(batch, cso->depth_bounds, sizeof(cso->depth_bounds));
6141
#endif
6142
   }
6143

6144
   if (dirty & IRIS_DIRTY_STENCIL_REF) {
6145
#if GFX_VER >= 12
6146
      /* Use modify disable fields which allow us to emit packets
6147
       * directly instead of merging them later.
6148
       */
6149
      struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
6150
      uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
6151
      iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) {
6152
         wmds.StencilReferenceValue = p_stencil_refs->ref_value[0];
6153
         wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];
6154
         wmds.StencilTestMaskModifyDisable = true;
6155
         wmds.StencilWriteMaskModifyDisable = true;
6156
         wmds.StencilStateModifyDisable = true;
6157
         wmds.DepthStateModifyDisable = true;
6158
      }
6159
      iris_batch_emit(batch, stencil_refs, sizeof(stencil_refs));
6160
#endif
6161
   }
6162

6163
   if (dirty & IRIS_DIRTY_SCISSOR_RECT) {
6164
      /* Wa_1409725701:
6165
       *    "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
6166
       *    stored as an array of up to 16 elements. The location of first
6167
       *    element of the array, as specified by Pointer to SCISSOR_RECT,
6168
       *    should be aligned to a 64-byte boundary.
6169
       */
6170
      uint32_t alignment = 64;
6171
      uint32_t scissor_offset =
6172
         emit_state(batch, ice->state.dynamic_uploader,
6173
                    &ice->state.last_res.scissor,
6174
                    ice->state.scissors,
6175
                    sizeof(struct pipe_scissor_state) *
6176
                    ice->state.num_viewports, alignment);
6177

6178
      iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
6179
         ptr.ScissorRectPointer = scissor_offset;
6180
      }
6181
   }
6182

6183
   if (dirty & IRIS_DIRTY_DEPTH_BUFFER) {
6184
      struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer;
6185

6186
      /* Do not emit the clear params yets. We need to update the clear value
6187
       * first.
6188
       */
6189
      uint32_t clear_length = GENX(3DSTATE_CLEAR_PARAMS_length) * 4;
6190
      uint32_t cso_z_size = batch->screen->isl_dev.ds.size - clear_length;;
6191

6192
#if GFX_VERx10 == 120
6193
      /* Wa_14010455700
6194
       *
6195
       * ISL will change some CHICKEN registers depending on the depth surface
6196
       * format, along with emitting the depth and stencil packets. In that
6197
       * case, we want to do a depth flush and stall, so the pipeline is not
6198
       * using these settings while we change the registers.
6199
       */
6200
      iris_emit_end_of_pipe_sync(batch,
6201
                                 "Workaround: Stop pipeline for 14010455700",
6202
                                 PIPE_CONTROL_DEPTH_STALL |
6203
                                 PIPE_CONTROL_DEPTH_CACHE_FLUSH);
6204
#endif
6205

6206
      iris_batch_emit(batch, cso_z->packets, cso_z_size);
6207
      if (GFX_VER >= 12) {
6208
         /* Wa_1408224581
6209
          *
6210
          * Workaround: Gfx12LP Astep only An additional pipe control with
6211
          * post-sync = store dword operation would be required.( w/a is to
6212
          * have an additional pipe control after the stencil state whenever
6213
          * the surface state bits of this state is changing).
6214
          */
6215
         iris_emit_pipe_control_write(batch, "WA for stencil state",
6216
                                      PIPE_CONTROL_WRITE_IMMEDIATE,
6217
                                      batch->screen->workaround_address.bo,
6218
                                      batch->screen->workaround_address.offset, 0);
6219
      }
6220

6221
      union isl_color_value clear_value = { .f32 = { 0, } };
6222

6223
      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
6224
      if (cso_fb->zsbuf) {
6225
         struct iris_resource *zres, *sres;
6226
         iris_get_depth_stencil_resources(cso_fb->zsbuf->texture,
6227
                                          &zres, &sres);
6228
         if (zres && zres->aux.bo)
6229
            clear_value = iris_resource_get_clear_color(zres, NULL, NULL);
6230
      }
6231

6232
      uint32_t clear_params[GENX(3DSTATE_CLEAR_PARAMS_length)];
6233
      iris_pack_command(GENX(3DSTATE_CLEAR_PARAMS), clear_params, clear) {
6234
         clear.DepthClearValueValid = true;
6235
         clear.DepthClearValue = clear_value.f32[0];
6236
      }
6237
      iris_batch_emit(batch, clear_params, clear_length);
6238
   }
6239

6240
   if (dirty & (IRIS_DIRTY_DEPTH_BUFFER | IRIS_DIRTY_WM_DEPTH_STENCIL)) {
6241
      /* Listen for buffer changes, and also write enable changes. */
6242
      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
6243
      pin_depth_and_stencil_buffers(batch, cso_fb->zsbuf, ice->state.cso_zsa);
6244
   }
6245

6246
   if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) {
6247
      iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
6248
         for (int i = 0; i < 32; i++) {
6249
            poly.PatternRow[i] = ice->state.poly_stipple.stipple[i];
6250
         }
6251
      }
6252
   }
6253

6254
   if (dirty & IRIS_DIRTY_LINE_STIPPLE) {
6255
      struct iris_rasterizer_state *cso = ice->state.cso_rast;
6256
      iris_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple));
6257
   }
6258

6259
   if (dirty & IRIS_DIRTY_VF_TOPOLOGY) {
6260
      iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
6261
         topo.PrimitiveTopologyType =
6262
            translate_prim_type(draw->mode, draw->vertices_per_patch);
6263
      }
6264
   }
6265

6266
   if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) {
6267
      int count = util_bitcount64(ice->state.bound_vertex_buffers);
6268
      uint64_t dynamic_bound = ice->state.bound_vertex_buffers;
6269

6270
      if (ice->state.vs_uses_draw_params) {
6271
         assert(ice->draw.draw_params.res);
6272

6273
         struct iris_vertex_buffer_state *state =
6274
            &(ice->state.genx->vertex_buffers[count]);
6275
         pipe_resource_reference(&state->resource, ice->draw.draw_params.res);
6276
         struct iris_resource *res = (void *) state->resource;
6277

6278
         iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) {
6279
            vb.VertexBufferIndex = count;
6280
            vb.AddressModifyEnable = true;
6281
            vb.BufferPitch = 0;
6282
            vb.BufferSize = res->bo->size - ice->draw.draw_params.offset;
6283
            vb.BufferStartingAddress =
6284
               ro_bo(NULL, res->bo->gtt_offset +
6285
                           (int) ice->draw.draw_params.offset);
6286
            vb.MOCS = iris_mocs(res->bo, &batch->screen->isl_dev,
6287
                                ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
6288
#if GFX_VER >= 12
6289
            vb.L3BypassDisable       = true;
6290
#endif
6291
         }
6292
         dynamic_bound |= 1ull << count;
6293
         count++;
6294
      }
6295

6296
      if (ice->state.vs_uses_derived_draw_params) {
6297
         struct iris_vertex_buffer_state *state =
6298
            &(ice->state.genx->vertex_buffers[count]);
6299
         pipe_resource_reference(&state->resource,
6300
                                 ice->draw.derived_draw_params.res);
6301
         struct iris_resource *res = (void *) ice->draw.derived_draw_params.res;
6302

6303
         iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) {
6304
             vb.VertexBufferIndex = count;
6305
            vb.AddressModifyEnable = true;
6306
            vb.BufferPitch = 0;
6307
            vb.BufferSize =
6308
               res->bo->size - ice->draw.derived_draw_params.offset;
6309
            vb.BufferStartingAddress =
6310
               ro_bo(NULL, res->bo->gtt_offset +
6311
                           (int) ice->draw.derived_draw_params.offset);
6312
            vb.MOCS = iris_mocs(res->bo, &batch->screen->isl_dev,
6313
                                ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
6314
#if GFX_VER >= 12
6315
            vb.L3BypassDisable       = true;
6316
#endif
6317
         }
6318
         dynamic_bound |= 1ull << count;
6319
         count++;
6320
      }
6321

6322
      if (count) {
6323
#if GFX_VER >= 11
6324
         /* Gfx11+ doesn't need the cache workaround below */
6325
         uint64_t bound = dynamic_bound;
6326
         while (bound) {
6327
            const int i = u_bit_scan64(&bound);
6328
            iris_use_optional_res(batch, genx->vertex_buffers[i].resource,
6329
                                  false, IRIS_DOMAIN_OTHER_READ);
6330
         }
6331
#else
6332
         /* The VF cache designers cut corners, and made the cache key's
6333
          * <VertexBufferIndex, Memory Address> tuple only consider the bottom
6334
          * 32 bits of the address.  If you have two vertex buffers which get
6335
          * placed exactly 4 GiB apart and use them in back-to-back draw calls,
6336
          * you can get collisions (even within a single batch).
6337
          *
6338
          * So, we need to do a VF cache invalidate if the buffer for a VB
6339
          * slot slot changes [48:32] address bits from the previous time.
6340
          */
6341
         unsigned flush_flags = 0;
6342

6343
         uint64_t bound = dynamic_bound;
6344
         while (bound) {
6345
            const int i = u_bit_scan64(&bound);
6346
            uint16_t high_bits = 0;
6347

6348
            struct iris_resource *res =
6349
               (void *) genx->vertex_buffers[i].resource;
6350
            if (res) {
6351
               iris_use_pinned_bo(batch, res->bo, false, IRIS_DOMAIN_OTHER_READ);
6352

6353
               high_bits = res->bo->gtt_offset >> 32ull;
6354
               if (high_bits != ice->state.last_vbo_high_bits[i]) {
6355
                  flush_flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE |
6356
                                 PIPE_CONTROL_CS_STALL;
6357
                  ice->state.last_vbo_high_bits[i] = high_bits;
6358
               }
6359
            }
6360
         }
6361

6362
         if (flush_flags) {
6363
            iris_emit_pipe_control_flush(batch,
6364
                                         "workaround: VF cache 32-bit key [VB]",
6365
                                         flush_flags);
6366
         }
6367
#endif
6368

6369
         const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
6370

6371
         uint32_t *map =
6372
            iris_get_command_space(batch, 4 * (1 + vb_dwords * count));
6373
         _iris_pack_command(batch, GENX(3DSTATE_VERTEX_BUFFERS), map, vb) {
6374
            vb.DWordLength = (vb_dwords * count + 1) - 2;
6375
         }
6376
         map += 1;
6377

6378
         bound = dynamic_bound;
6379
         while (bound) {
6380
            const int i = u_bit_scan64(&bound);
6381
            memcpy(map, genx->vertex_buffers[i].state,
6382
                   sizeof(uint32_t) * vb_dwords);
6383
            map += vb_dwords;
6384
         }
6385
      }
6386
   }
6387

6388
   if (dirty & IRIS_DIRTY_VERTEX_ELEMENTS) {
6389
      struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements;
6390
      const unsigned entries = MAX2(cso->count, 1);
6391
      if (!(ice->state.vs_needs_sgvs_element ||
6392
            ice->state.vs_uses_derived_draw_params ||
6393
            ice->state.vs_needs_edge_flag)) {
6394
         iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) *
6395
                         (1 + entries * GENX(VERTEX_ELEMENT_STATE_length)));
6396
      } else {
6397
         uint32_t dynamic_ves[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
6398
         const unsigned dyn_count = cso->count +
6399
            ice->state.vs_needs_sgvs_element +
6400
            ice->state.vs_uses_derived_draw_params;
6401

6402
         iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS),
6403
                           &dynamic_ves, ve) {
6404
            ve.DWordLength =
6405
               1 + GENX(VERTEX_ELEMENT_STATE_length) * dyn_count - 2;
6406
         }
6407
         memcpy(&dynamic_ves[1], &cso->vertex_elements[1],
6408
                (cso->count - ice->state.vs_needs_edge_flag) *
6409
                GENX(VERTEX_ELEMENT_STATE_length) * sizeof(uint32_t));
6410
         uint32_t *ve_pack_dest =
6411
            &dynamic_ves[1 + (cso->count - ice->state.vs_needs_edge_flag) *
6412
                         GENX(VERTEX_ELEMENT_STATE_length)];
6413

6414
         if (ice->state.vs_needs_sgvs_element) {
6415
            uint32_t base_ctrl = ice->state.vs_uses_draw_params ?
6416
                                 VFCOMP_STORE_SRC : VFCOMP_STORE_0;
6417
            iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
6418
               ve.Valid = true;
6419
               ve.VertexBufferIndex =
6420
                  util_bitcount64(ice->state.bound_vertex_buffers);
6421
               ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
6422
               ve.Component0Control = base_ctrl;
6423
               ve.Component1Control = base_ctrl;
6424
               ve.Component2Control = VFCOMP_STORE_0;
6425
               ve.Component3Control = VFCOMP_STORE_0;
6426
            }
6427
            ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
6428
         }
6429
         if (ice->state.vs_uses_derived_draw_params) {
6430
            iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
6431
               ve.Valid = true;
6432
               ve.VertexBufferIndex =
6433
                  util_bitcount64(ice->state.bound_vertex_buffers) +
6434
                  ice->state.vs_uses_draw_params;
6435
               ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
6436
               ve.Component0Control = VFCOMP_STORE_SRC;
6437
               ve.Component1Control = VFCOMP_STORE_SRC;
6438
               ve.Component2Control = VFCOMP_STORE_0;
6439
               ve.Component3Control = VFCOMP_STORE_0;
6440
            }
6441
            ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
6442
         }
6443
         if (ice->state.vs_needs_edge_flag) {
6444
            for (int i = 0; i < GENX(VERTEX_ELEMENT_STATE_length);  i++)
6445
               ve_pack_dest[i] = cso->edgeflag_ve[i];
6446
         }
6447

6448
         iris_batch_emit(batch, &dynamic_ves, sizeof(uint32_t) *
6449
                         (1 + dyn_count * GENX(VERTEX_ELEMENT_STATE_length)));
6450
      }
6451

6452
      if (!ice->state.vs_needs_edge_flag) {
6453
         iris_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) *
6454
                         entries * GENX(3DSTATE_VF_INSTANCING_length));
6455
      } else {
6456
         assert(cso->count > 0);
6457
         const unsigned edgeflag_index = cso->count - 1;
6458
         uint32_t dynamic_vfi[33 * GENX(3DSTATE_VF_INSTANCING_length)];
6459
         memcpy(&dynamic_vfi[0], cso->vf_instancing, edgeflag_index *
6460
                GENX(3DSTATE_VF_INSTANCING_length) * sizeof(uint32_t));
6461

6462
         uint32_t *vfi_pack_dest = &dynamic_vfi[0] +
6463
            edgeflag_index * GENX(3DSTATE_VF_INSTANCING_length);
6464
         iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
6465
            vi.VertexElementIndex = edgeflag_index +
6466
               ice->state.vs_needs_sgvs_element +
6467
               ice->state.vs_uses_derived_draw_params;
6468
         }
6469
         for (int i = 0; i < GENX(3DSTATE_VF_INSTANCING_length);  i++)
6470
            vfi_pack_dest[i] |= cso->edgeflag_vfi[i];
6471

6472
         iris_batch_emit(batch, &dynamic_vfi[0], sizeof(uint32_t) *
6473
                         entries * GENX(3DSTATE_VF_INSTANCING_length));
6474
      }
6475
   }
6476

6477
   if (dirty & IRIS_DIRTY_VF_SGVS) {
6478
      const struct brw_vs_prog_data *vs_prog_data = (void *)
6479
         ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;
6480
      struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements;
6481

6482
      iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgv) {
6483
         if (vs_prog_data->uses_vertexid) {
6484
            sgv.VertexIDEnable = true;
6485
            sgv.VertexIDComponentNumber = 2;
6486
            sgv.VertexIDElementOffset =
6487
               cso->count - ice->state.vs_needs_edge_flag;
6488
         }
6489

6490
         if (vs_prog_data->uses_instanceid) {
6491
            sgv.InstanceIDEnable = true;
6492
            sgv.InstanceIDComponentNumber = 3;
6493
            sgv.InstanceIDElementOffset =
6494
               cso->count - ice->state.vs_needs_edge_flag;
6495
         }
6496
      }
6497
   }
6498

6499
   if (dirty & IRIS_DIRTY_VF) {
6500
      iris_emit_cmd(batch, GENX(3DSTATE_VF), vf) {
6501
         if (draw->primitive_restart) {
6502
            vf.IndexedDrawCutIndexEnable = true;
6503
            vf.CutIndex = draw->restart_index;
6504
         }
6505
      }
6506
   }
6507

6508
   if (dirty & IRIS_DIRTY_VF_STATISTICS) {
6509
      iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf) {
6510
         vf.StatisticsEnable = true;
6511
      }
6512
   }
6513

6514
#if GFX_VER == 8
6515
   if (dirty & IRIS_DIRTY_PMA_FIX) {
6516
      bool enable = want_pma_fix(ice);
6517
      genX(update_pma_fix)(ice, batch, enable);
6518
   }
6519
#endif
6520

6521
   if (ice->state.current_hash_scale != 1)
6522
      genX(emit_hashing_mode)(ice, batch, UINT_MAX, UINT_MAX, 1);
6523

6524
#if GFX_VER >= 12
6525
   genX(invalidate_aux_map_state)(batch);
6526
#endif
6527
}
6528

6529
static void
6530
iris_upload_render_state(struct iris_context *ice,
6531
                         struct iris_batch *batch,
6532
                         const struct pipe_draw_info *draw,
6533
                         unsigned drawid_offset,
6534
                         const struct pipe_draw_indirect_info *indirect,
6535
                         const struct pipe_draw_start_count_bias *sc)
6536
{
6537
   bool use_predicate = ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
6538

6539
   iris_batch_sync_region_start(batch);
6540

6541
   /* Always pin the binder.  If we're emitting new binding table pointers,
6542
    * we need it.  If not, we're probably inheriting old tables via the
6543
    * context, and need it anyway.  Since true zero-bindings cases are
6544
    * practically non-existent, just pin it and avoid last_res tracking.
6545
    */
6546
   iris_use_pinned_bo(batch, ice->state.binder.bo, false,
6547
                      IRIS_DOMAIN_NONE);
6548

6549
   if (!batch->contains_draw) {
6550
      if (GFX_VER == 12) {
6551
         /* Re-emit constants when starting a new batch buffer in order to
6552
          * work around push constant corruption on context switch.
6553
          *
6554
          * XXX - Provide hardware spec quotation when available.
6555
          */
6556
         ice->state.stage_dirty |= (IRIS_STAGE_DIRTY_CONSTANTS_VS  |
6557
                                    IRIS_STAGE_DIRTY_CONSTANTS_TCS |
6558
                                    IRIS_STAGE_DIRTY_CONSTANTS_TES |
6559
                                    IRIS_STAGE_DIRTY_CONSTANTS_GS  |
6560
                                    IRIS_STAGE_DIRTY_CONSTANTS_FS);
6561
      }
6562
      batch->contains_draw = true;
6563
   }
6564

6565
   if (!batch->contains_draw_with_next_seqno) {
6566
      iris_restore_render_saved_bos(ice, batch, draw);
6567
      batch->contains_draw_with_next_seqno = true;
6568
   }
6569

6570
   iris_upload_dirty_render_state(ice, batch, draw);
6571

6572
   if (draw->index_size > 0) {
6573
      unsigned offset;
6574

6575
      if (draw->has_user_indices) {
6576
         unsigned start_offset = draw->index_size * sc->start;
6577

6578
         u_upload_data(ice->ctx.const_uploader, start_offset,
6579
                       sc->count * draw->index_size, 4,
6580
                       (char*)draw->index.user + start_offset,
6581
                       &offset, &ice->state.last_res.index_buffer);
6582
         offset -= start_offset;
6583
      } else {
6584
         struct iris_resource *res = (void *) draw->index.resource;
6585
         res->bind_history |= PIPE_BIND_INDEX_BUFFER;
6586

6587
         pipe_resource_reference(&ice->state.last_res.index_buffer,
6588
                                 draw->index.resource);
6589
         offset = 0;
6590
      }
6591

6592
      struct iris_genx_state *genx = ice->state.genx;
6593
      struct iris_bo *bo = iris_resource_bo(ice->state.last_res.index_buffer);
6594

6595
      uint32_t ib_packet[GENX(3DSTATE_INDEX_BUFFER_length)];
6596
      iris_pack_command(GENX(3DSTATE_INDEX_BUFFER), ib_packet, ib) {
6597
         ib.IndexFormat = draw->index_size >> 1;
6598
         ib.MOCS = iris_mocs(bo, &batch->screen->isl_dev,
6599
                             ISL_SURF_USAGE_INDEX_BUFFER_BIT);
6600
         ib.BufferSize = bo->size - offset;
6601
         ib.BufferStartingAddress = ro_bo(NULL, bo->gtt_offset + offset);
6602
#if GFX_VER >= 12
6603
         ib.L3BypassDisable       = true;
6604
#endif
6605
      }
6606

6607
      if (memcmp(genx->last_index_buffer, ib_packet, sizeof(ib_packet)) != 0) {
6608
         memcpy(genx->last_index_buffer, ib_packet, sizeof(ib_packet));
6609
         iris_batch_emit(batch, ib_packet, sizeof(ib_packet));
6610
         iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_OTHER_READ);
6611
      }
6612

6613
#if GFX_VER < 11
6614
      /* The VF cache key only uses 32-bits, see vertex buffer comment above */
6615
      uint16_t high_bits = bo->gtt_offset >> 32ull;
6616
      if (high_bits != ice->state.last_index_bo_high_bits) {
6617
         iris_emit_pipe_control_flush(batch,
6618
                                      "workaround: VF cache 32-bit key [IB]",
6619
                                      PIPE_CONTROL_VF_CACHE_INVALIDATE |
6620
                                      PIPE_CONTROL_CS_STALL);
6621
         ice->state.last_index_bo_high_bits = high_bits;
6622
      }
6623
#endif
6624
   }
6625

6626
#define _3DPRIM_END_OFFSET          0x2420
6627
#define _3DPRIM_START_VERTEX        0x2430
6628
#define _3DPRIM_VERTEX_COUNT        0x2434
6629
#define _3DPRIM_INSTANCE_COUNT      0x2438
6630
#define _3DPRIM_START_INSTANCE      0x243C
6631
#define _3DPRIM_BASE_VERTEX         0x2440
6632

6633
   if (indirect && !indirect->count_from_stream_output) {
6634
      if (indirect->indirect_draw_count) {
6635
         use_predicate = true;
6636

6637
         struct iris_bo *draw_count_bo =
6638
            iris_resource_bo(indirect->indirect_draw_count);
6639
         unsigned draw_count_offset =
6640
            indirect->indirect_draw_count_offset;
6641

6642
         iris_emit_pipe_control_flush(batch,
6643
                                      "ensure indirect draw buffer is flushed",
6644
                                      PIPE_CONTROL_FLUSH_ENABLE);
6645

6646
         if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
6647
            struct mi_builder b;
6648
            mi_builder_init(&b, &batch->screen->devinfo, batch);
6649

6650
            /* comparison = draw id < draw count */
6651
            struct mi_value comparison =
6652
               mi_ult(&b, mi_imm(drawid_offset),
6653
                          mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
6654

6655
            /* predicate = comparison & conditional rendering predicate */
6656
            mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
6657
                         mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
6658
         } else {
6659
            uint32_t mi_predicate;
6660

6661
            /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
6662
            iris_load_register_imm64(batch, MI_PREDICATE_SRC1, drawid_offset);
6663
            /* Upload the current draw count from the draw parameters buffer
6664
             * to MI_PREDICATE_SRC0.
6665
             */
6666
            iris_load_register_mem32(batch, MI_PREDICATE_SRC0,
6667
                                     draw_count_bo, draw_count_offset);
6668
            /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
6669
            iris_load_register_imm32(batch, MI_PREDICATE_SRC0 + 4, 0);
6670

6671
            if (drawid_offset == 0) {
6672
               mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
6673
                              MI_PREDICATE_COMBINEOP_SET |
6674
                              MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
6675
            } else {
6676
               /* While draw_index < draw_count the predicate's result will be
6677
                *  (draw_index == draw_count) ^ TRUE = TRUE
6678
                * When draw_index == draw_count the result is
6679
                *  (TRUE) ^ TRUE = FALSE
6680
                * After this all results will be:
6681
                *  (FALSE) ^ FALSE = FALSE
6682
                */
6683
               mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
6684
                              MI_PREDICATE_COMBINEOP_XOR |
6685
                              MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
6686
            }
6687
            iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
6688
         }
6689
      }
6690
      struct iris_bo *bo = iris_resource_bo(indirect->buffer);
6691
      assert(bo);
6692

6693
      iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
6694
         lrm.RegisterAddress = _3DPRIM_VERTEX_COUNT;
6695
         lrm.MemoryAddress = ro_bo(bo, indirect->offset + 0);
6696
      }
6697
      iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
6698
         lrm.RegisterAddress = _3DPRIM_INSTANCE_COUNT;
6699
         lrm.MemoryAddress = ro_bo(bo, indirect->offset + 4);
6700
      }
6701
      iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
6702
         lrm.RegisterAddress = _3DPRIM_START_VERTEX;
6703
         lrm.MemoryAddress = ro_bo(bo, indirect->offset + 8);
6704
      }
6705
      if (draw->index_size) {
6706
         iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
6707
            lrm.RegisterAddress = _3DPRIM_BASE_VERTEX;
6708
            lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12);
6709
         }
6710
         iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
6711
            lrm.RegisterAddress = _3DPRIM_START_INSTANCE;
6712
            lrm.MemoryAddress = ro_bo(bo, indirect->offset + 16);
6713
         }
6714
      } else {
6715
         iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
6716
            lrm.RegisterAddress = _3DPRIM_START_INSTANCE;
6717
            lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12);
6718
         }
6719
         iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
6720
            lri.RegisterOffset = _3DPRIM_BASE_VERTEX;
6721
            lri.DataDWord = 0;
6722
         }
6723
      }
6724
   } else if (indirect && indirect->count_from_stream_output) {
6725
      struct iris_stream_output_target *so =
6726
         (void *) indirect->count_from_stream_output;
6727

6728
      /* XXX: Replace with actual cache tracking */
6729
      iris_emit_pipe_control_flush(batch,
6730
                                   "draw count from stream output stall",
6731
                                   PIPE_CONTROL_CS_STALL);
6732

6733
      struct mi_builder b;
6734
      mi_builder_init(&b, &batch->screen->devinfo, batch);
6735

6736
      struct iris_address addr =
6737
         ro_bo(iris_resource_bo(so->offset.res), so->offset.offset);
6738
      struct mi_value offset =
6739
         mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);
6740

6741
      mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
6742
                   mi_udiv32_imm(&b, offset, so->stride));
6743

6744
      _iris_emit_lri(batch, _3DPRIM_START_VERTEX, 0);
6745
      _iris_emit_lri(batch, _3DPRIM_BASE_VERTEX, 0);
6746
      _iris_emit_lri(batch, _3DPRIM_START_INSTANCE, 0);
6747
      _iris_emit_lri(batch, _3DPRIM_INSTANCE_COUNT, draw->instance_count);
6748
   }
6749

6750
   iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_DRAW, draw, indirect, sc);
6751

6752
   iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
6753
      prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
6754
      prim.PredicateEnable = use_predicate;
6755

6756
      if (indirect) {
6757
         prim.IndirectParameterEnable = true;
6758
      } else {
6759
         prim.StartInstanceLocation = draw->start_instance;
6760
         prim.InstanceCount = draw->instance_count;
6761
         prim.VertexCountPerInstance = sc->count;
6762

6763
         prim.StartVertexLocation = sc->start;
6764

6765
         if (draw->index_size) {
6766
            prim.BaseVertexLocation += sc->index_bias;
6767
         }
6768
      }
6769
   }
6770

6771
   iris_batch_sync_region_end(batch);
6772
}
6773

6774
static void
6775
iris_load_indirect_location(struct iris_context *ice,
6776
                            struct iris_batch *batch,
6777
                            const struct pipe_grid_info *grid)
6778
{
6779
#define GPGPU_DISPATCHDIMX 0x2500
6780
#define GPGPU_DISPATCHDIMY 0x2504
6781
#define GPGPU_DISPATCHDIMZ 0x2508
6782

6783
   assert(grid->indirect);
6784

6785
   struct iris_state_ref *grid_size = &ice->state.grid_size;
6786
   struct iris_bo *bo = iris_resource_bo(grid_size->res);
6787
   iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
6788
      lrm.RegisterAddress = GPGPU_DISPATCHDIMX;
6789
      lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0);
6790
   }
6791
   iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
6792
      lrm.RegisterAddress = GPGPU_DISPATCHDIMY;
6793
      lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4);
6794
   }
6795
   iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
6796
      lrm.RegisterAddress = GPGPU_DISPATCHDIMZ;
6797
      lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8);
6798
   }
6799
}
6800

6801
#if GFX_VERx10 >= 125
6802

6803
static void
6804
iris_upload_compute_walker(struct iris_context *ice,
6805
                           struct iris_batch *batch,
6806
                           const struct pipe_grid_info *grid)
6807
{
6808
   const uint64_t stage_dirty = ice->state.stage_dirty;
6809
   struct iris_screen *screen = batch->screen;
6810
   const struct intel_device_info *devinfo = &screen->devinfo;
6811
   struct iris_binder *binder = &ice->state.binder;
6812
   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
6813
   struct iris_compiled_shader *shader =
6814
      ice->shaders.prog[MESA_SHADER_COMPUTE];
6815
   struct brw_stage_prog_data *prog_data = shader->prog_data;
6816
   struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
6817
   const struct brw_cs_dispatch_info dispatch =
6818
      brw_cs_get_dispatch_info(devinfo, cs_prog_data, grid->block);
6819

6820
   if (stage_dirty & IRIS_STAGE_DIRTY_CS) {
6821
      iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
6822
         cfe.MaximumNumberofThreads =
6823
            devinfo->max_cs_threads * screen->subslice_total - 1;
6824
         if (prog_data->total_scratch > 0) {
6825
            cfe.ScratchSpaceBuffer =
6826
               iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4;
6827
         }
6828
      }
6829
   }
6830

6831
   if (grid->indirect)
6832
      iris_load_indirect_location(ice, batch, grid);
6833

6834
   iris_emit_cmd(batch, GENX(COMPUTE_WALKER), cw) {
6835
      cw.IndirectParameterEnable        = grid->indirect;
6836
      cw.SIMDSize                       = dispatch.simd_size / 16;
6837
      cw.LocalXMaximum                  = grid->block[0] - 1;
6838
      cw.LocalYMaximum                  = grid->block[1] - 1;
6839
      cw.LocalZMaximum                  = grid->block[2] - 1;
6840
      cw.ThreadGroupIDXDimension        = grid->grid[0];
6841
      cw.ThreadGroupIDYDimension        = grid->grid[1];
6842
      cw.ThreadGroupIDZDimension        = grid->grid[2];
6843
      cw.ExecutionMask                  = dispatch.right_mask;
6844

6845
      cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
6846
         .KernelStartPointer = KSP(shader),
6847
         .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
6848
         .SharedLocalMemorySize =
6849
            encode_slm_size(GFX_VER, prog_data->total_shared),
6850
         .BarrierEnable = cs_prog_data->uses_barrier,
6851
         .SamplerStatePointer = shs->sampler_table.offset,
6852
         .BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE],
6853
      };
6854

6855
      assert(brw_cs_push_const_total_size(cs_prog_data, dispatch.threads) == 0);
6856
   }
6857

6858
}
6859

6860
#else /* #if GFX_VERx10 >= 125 */
6861

6862
static void
6863
iris_upload_gpgpu_walker(struct iris_context *ice,
6864
                         struct iris_batch *batch,
6865
                         const struct pipe_grid_info *grid)
6866
{
6867
   const uint64_t stage_dirty = ice->state.stage_dirty;
6868
   struct iris_screen *screen = batch->screen;
6869
   const struct intel_device_info *devinfo = &screen->devinfo;
6870
   struct iris_binder *binder = &ice->state.binder;
6871
   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
6872
   struct iris_uncompiled_shader *ish =
6873
      ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
6874
   struct iris_compiled_shader *shader =
6875
      ice->shaders.prog[MESA_SHADER_COMPUTE];
6876
   struct brw_stage_prog_data *prog_data = shader->prog_data;
6877
   struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
6878
   const struct brw_cs_dispatch_info dispatch =
6879
      brw_cs_get_dispatch_info(devinfo, cs_prog_data, grid->block);
6880

6881
   if ((stage_dirty & IRIS_STAGE_DIRTY_CS) ||
6882
       cs_prog_data->local_size[0] == 0 /* Variable local group size */) {
6883
      /* The MEDIA_VFE_STATE documentation for Gfx8+ says:
6884
       *
6885
       *   "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
6886
       *    the only bits that are changed are scoreboard related: Scoreboard
6887
       *    Enable, Scoreboard Type, Scoreboard Mask, Scoreboard Delta.  For
6888
       *    these scoreboard related states, a MEDIA_STATE_FLUSH is
6889
       *    sufficient."
6890
       */
6891
      iris_emit_pipe_control_flush(batch,
6892
                                   "workaround: stall before MEDIA_VFE_STATE",
6893
                                   PIPE_CONTROL_CS_STALL);
6894

6895
      iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) {
6896
         if (prog_data->total_scratch) {
6897
            uint32_t scratch_addr =
6898
               pin_scratch_space(ice, batch, prog_data, MESA_SHADER_COMPUTE);
6899

6900
            vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
6901
            vfe.ScratchSpaceBasePointer =
6902
               rw_bo(NULL, scratch_addr, IRIS_DOMAIN_NONE);
6903
         }
6904

6905
         vfe.MaximumNumberofThreads =
6906
            devinfo->max_cs_threads * screen->subslice_total - 1;
6907
#if GFX_VER < 11
6908
         vfe.ResetGatewayTimer =
6909
            Resettingrelativetimerandlatchingtheglobaltimestamp;
6910
#endif
6911
#if GFX_VER == 8
6912
         vfe.BypassGatewayControl = true;
6913
#endif
6914
         vfe.NumberofURBEntries = 2;
6915
         vfe.URBEntryAllocationSize = 2;
6916

6917
         vfe.CURBEAllocationSize =
6918
            ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
6919
                  cs_prog_data->push.cross_thread.regs, 2);
6920
      }
6921
   }
6922

6923
   /* TODO: Combine subgroup-id with cbuf0 so we can push regular uniforms */
6924
   if ((stage_dirty & IRIS_STAGE_DIRTY_CS) ||
6925
       cs_prog_data->local_size[0] == 0 /* Variable local group size */) {
6926
      uint32_t curbe_data_offset = 0;
6927
      assert(cs_prog_data->push.cross_thread.dwords == 0 &&
6928
             cs_prog_data->push.per_thread.dwords == 1 &&
6929
             cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
6930
      const unsigned push_const_size =
6931
         brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
6932
      uint32_t *curbe_data_map =
6933
         stream_state(batch, ice->state.dynamic_uploader,
6934
                      &ice->state.last_res.cs_thread_ids,
6935
                      ALIGN(push_const_size, 64), 64,
6936
                      &curbe_data_offset);
6937
      assert(curbe_data_map);
6938
      memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
6939
      iris_fill_cs_push_const_buffer(cs_prog_data, dispatch.threads,
6940
                                     curbe_data_map);
6941

6942
      iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
6943
         curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
6944
         curbe.CURBEDataStartAddress = curbe_data_offset;
6945
      }
6946
   }
6947

6948
   for (unsigned i = 0; i < IRIS_MAX_GLOBAL_BINDINGS; i++) {
6949
      struct pipe_resource *res = ice->state.global_bindings[i];
6950
      if (!res)
6951
         continue;
6952

6953
      iris_use_pinned_bo(batch, iris_resource_bo(res),
6954
                         true, IRIS_DOMAIN_NONE);
6955
   }
6956

6957
   if (stage_dirty & (IRIS_STAGE_DIRTY_SAMPLER_STATES_CS |
6958
                      IRIS_STAGE_DIRTY_BINDINGS_CS |
6959
                      IRIS_STAGE_DIRTY_CONSTANTS_CS |
6960
                      IRIS_STAGE_DIRTY_CS)) {
6961
      uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
6962

6963
      iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
6964
         idd.SharedLocalMemorySize =
6965
            encode_slm_size(GFX_VER, ish->kernel_shared_size);
6966
         idd.KernelStartPointer =
6967
            KSP(shader) + brw_cs_prog_data_prog_offset(cs_prog_data,
6968
                                                       dispatch.simd_size);
6969
         idd.SamplerStatePointer = shs->sampler_table.offset;
6970
         idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
6971
         idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads;
6972
      }
6973

6974
      for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++)
6975
         desc[i] |= ((uint32_t *) shader->derived_data)[i];
6976

6977
      iris_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
6978
         load.InterfaceDescriptorTotalLength =
6979
            GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
6980
         load.InterfaceDescriptorDataStartAddress =
6981
            emit_state(batch, ice->state.dynamic_uploader,
6982
                       &ice->state.last_res.cs_desc, desc, sizeof(desc), 64);
6983
      }
6984
   }
6985

6986
   if (grid->indirect)
6987
      iris_load_indirect_location(ice, batch, grid);
6988

6989
   iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_COMPUTE, NULL, NULL, NULL);
6990

6991
   iris_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) {
6992
      ggw.IndirectParameterEnable    = grid->indirect != NULL;
6993
      ggw.SIMDSize                   = dispatch.simd_size / 16;
6994
      ggw.ThreadDepthCounterMaximum  = 0;
6995
      ggw.ThreadHeightCounterMaximum = 0;
6996
      ggw.ThreadWidthCounterMaximum  = dispatch.threads - 1;
6997
      ggw.ThreadGroupIDXDimension    = grid->grid[0];
6998
      ggw.ThreadGroupIDYDimension    = grid->grid[1];
6999
      ggw.ThreadGroupIDZDimension    = grid->grid[2];
7000
      ggw.RightExecutionMask         = dispatch.right_mask;
7001
      ggw.BottomExecutionMask        = 0xffffffff;
7002
   }
7003

7004
   iris_emit_cmd(batch, GENX(MEDIA_STATE_FLUSH), msf);
7005
}
7006

7007
#endif /* #if GFX_VERx10 >= 125 */
7008

7009
static void
7010
iris_upload_compute_state(struct iris_context *ice,
7011
                          struct iris_batch *batch,
7012
                          const struct pipe_grid_info *grid)
7013
{
7014
   const uint64_t stage_dirty = ice->state.stage_dirty;
7015
   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
7016
   struct iris_compiled_shader *shader =
7017
      ice->shaders.prog[MESA_SHADER_COMPUTE];
7018

7019
   iris_batch_sync_region_start(batch);
7020

7021
   /* Always pin the binder.  If we're emitting new binding table pointers,
7022
    * we need it.  If not, we're probably inheriting old tables via the
7023
    * context, and need it anyway.  Since true zero-bindings cases are
7024
    * practically non-existent, just pin it and avoid last_res tracking.
7025
    */
7026
   iris_use_pinned_bo(batch, ice->state.binder.bo, false, IRIS_DOMAIN_NONE);
7027

7028
   if (((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
7029
        shs->sysvals_need_upload) ||
7030
       shader->kernel_input_size > 0)
7031
      upload_sysvals(ice, MESA_SHADER_COMPUTE, grid);
7032

7033
   if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS)
7034
      iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);
7035

7036
   if (stage_dirty & IRIS_STAGE_DIRTY_SAMPLER_STATES_CS)
7037
      iris_upload_sampler_states(ice, MESA_SHADER_COMPUTE);
7038

7039
   iris_use_optional_res(batch, shs->sampler_table.res, false,
7040
                         IRIS_DOMAIN_NONE);
7041
   iris_use_pinned_bo(batch, iris_resource_bo(shader->assembly.res), false,
7042
                      IRIS_DOMAIN_NONE);
7043

7044
   if (ice->state.need_border_colors)
7045
      iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false,
7046
                         IRIS_DOMAIN_NONE);
7047

7048
#if GFX_VER >= 12
7049
   genX(invalidate_aux_map_state)(batch);
7050
#endif
7051

7052
#if GFX_VERx10 >= 125
7053
   iris_upload_compute_walker(ice, batch, grid);
7054
#else
7055
   iris_upload_gpgpu_walker(ice, batch, grid);
7056
#endif
7057

7058
   if (!batch->contains_draw_with_next_seqno) {
7059
      iris_restore_compute_saved_bos(ice, batch, grid);
7060
      batch->contains_draw_with_next_seqno = batch->contains_draw = true;
7061
   }
7062

7063
   iris_batch_sync_region_end(batch);
7064
}
7065

7066
/**
7067
 * State module teardown.
7068
 */
7069
static void
7070
iris_destroy_state(struct iris_context *ice)
7071
{
7072
   struct iris_genx_state *genx = ice->state.genx;
7073

7074
   pipe_resource_reference(&ice->draw.draw_params.res, NULL);
7075
   pipe_resource_reference(&ice->draw.derived_draw_params.res, NULL);
7076

7077
   /* Loop over all VBOs, including ones for draw parameters */
7078
   for (unsigned i = 0; i < ARRAY_SIZE(genx->vertex_buffers); i++) {
7079
      pipe_resource_reference(&genx->vertex_buffers[i].resource, NULL);
7080
   }
7081

7082
   free(ice->state.genx);
7083

7084
   for (int i = 0; i < 4; i++) {
7085
      pipe_so_target_reference(&ice->state.so_target[i], NULL);
7086
   }
7087

7088
   for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
7089
      pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL);
7090
   }
7091
   pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL);
7092

7093
   for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {
7094
      struct iris_shader_state *shs = &ice->state.shaders[stage];
7095
      pipe_resource_reference(&shs->sampler_table.res, NULL);
7096
      for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
7097
         pipe_resource_reference(&shs->constbuf[i].buffer, NULL);
7098
         pipe_resource_reference(&shs->constbuf_surf_state[i].res, NULL);
7099
      }
7100
      for (int i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) {
7101
         pipe_resource_reference(&shs->image[i].base.resource, NULL);
7102
         pipe_resource_reference(&shs->image[i].surface_state.ref.res, NULL);
7103
         free(shs->image[i].surface_state.cpu);
7104
      }
7105
      for (int i = 0; i < PIPE_MAX_SHADER_BUFFERS; i++) {
7106
         pipe_resource_reference(&shs->ssbo[i].buffer, NULL);
7107
         pipe_resource_reference(&shs->ssbo_surf_state[i].res, NULL);
7108
      }
7109
      for (int i = 0; i < IRIS_MAX_TEXTURE_SAMPLERS; i++) {
7110
         pipe_sampler_view_reference((struct pipe_sampler_view **)
7111
                                     &shs->textures[i], NULL);
7112
      }
7113
   }
7114

7115
   pipe_resource_reference(&ice->state.grid_size.res, NULL);
7116
   pipe_resource_reference(&ice->state.grid_surf_state.res, NULL);
7117

7118
   pipe_resource_reference(&ice->state.null_fb.res, NULL);
7119
   pipe_resource_reference(&ice->state.unbound_tex.res, NULL);
7120

7121
   pipe_resource_reference(&ice->state.last_res.cc_vp, NULL);
7122
   pipe_resource_reference(&ice->state.last_res.sf_cl_vp, NULL);
7123
   pipe_resource_reference(&ice->state.last_res.color_calc, NULL);
7124
   pipe_resource_reference(&ice->state.last_res.scissor, NULL);
7125
   pipe_resource_reference(&ice->state.last_res.blend, NULL);
7126
   pipe_resource_reference(&ice->state.last_res.index_buffer, NULL);
7127
   pipe_resource_reference(&ice->state.last_res.cs_thread_ids, NULL);
7128
   pipe_resource_reference(&ice->state.last_res.cs_desc, NULL);
7129
}
7130

7131
/* ------------------------------------------------------------------- */
7132

7133
static void
7134
iris_rebind_buffer(struct iris_context *ice,
7135
                   struct iris_resource *res)
7136
{
7137
   struct pipe_context *ctx = &ice->ctx;
7138
   struct iris_genx_state *genx = ice->state.genx;
7139

7140
   assert(res->base.b.target == PIPE_BUFFER);
7141

7142
   /* Buffers can't be framebuffer attachments, nor display related,
7143
    * and we don't have upstream Clover support.
7144
    */
7145
   assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL |
7146
                                 PIPE_BIND_RENDER_TARGET |
7147
                                 PIPE_BIND_BLENDABLE |
7148
                                 PIPE_BIND_DISPLAY_TARGET |
7149
                                 PIPE_BIND_CURSOR |
7150
                                 PIPE_BIND_COMPUTE_RESOURCE |
7151
                                 PIPE_BIND_GLOBAL)));
7152

7153
   if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) {
7154
      uint64_t bound_vbs = ice->state.bound_vertex_buffers;
7155
      while (bound_vbs) {
7156
         const int i = u_bit_scan64(&bound_vbs);
7157
         struct iris_vertex_buffer_state *state = &genx->vertex_buffers[i];
7158

7159
         /* Update the CPU struct */
7160
         STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_start) == 32);
7161
         STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) == 64);
7162
         uint64_t *addr = (uint64_t *) &state->state[1];
7163
         struct iris_bo *bo = iris_resource_bo(state->resource);
7164

7165
         if (*addr != bo->gtt_offset + state->offset) {
7166
            *addr = bo->gtt_offset + state->offset;
7167
            ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
7168
         }
7169
      }
7170
   }
7171

7172
   /* We don't need to handle PIPE_BIND_INDEX_BUFFER here: we re-emit
7173
    * the 3DSTATE_INDEX_BUFFER packet whenever the address changes.
7174
    *
7175
    * There is also no need to handle these:
7176
    * - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw)
7177
    * - PIPE_BIND_QUERY_BUFFER (no persistent state references)
7178
    */
7179

7180
   if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) {
7181
      uint32_t *so_buffers = genx->so_buffers;
7182
      for (unsigned i = 0; i < 4; i++,
7183
           so_buffers += GENX(3DSTATE_SO_BUFFER_length)) {
7184

7185
         /* There are no other fields in bits 127:64 */
7186
         uint64_t *addr = (uint64_t *) &so_buffers[2];
7187
         STATIC_ASSERT(GENX(3DSTATE_SO_BUFFER_SurfaceBaseAddress_start) == 66);
7188
         STATIC_ASSERT(GENX(3DSTATE_SO_BUFFER_SurfaceBaseAddress_bits) == 46);
7189

7190
         struct pipe_stream_output_target *tgt = ice->state.so_target[i];
7191
         if (tgt) {
7192
            struct iris_bo *bo = iris_resource_bo(tgt->buffer);
7193
            if (*addr != bo->gtt_offset + tgt->buffer_offset) {
7194
               *addr = bo->gtt_offset + tgt->buffer_offset;
7195
               ice->state.dirty |= IRIS_DIRTY_SO_BUFFERS;
7196
            }
7197
         }
7198
      }
7199
   }
7200

7201
   for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) {
7202
      struct iris_shader_state *shs = &ice->state.shaders[s];
7203
      enum pipe_shader_type p_stage = stage_to_pipe(s);
7204

7205
      if (!(res->bind_stages & (1 << s)))
7206
         continue;
7207

7208
      if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
7209
         /* Skip constant buffer 0, it's for regular uniforms, not UBOs */
7210
         uint32_t bound_cbufs = shs->bound_cbufs & ~1u;
7211
         while (bound_cbufs) {
7212
            const int i = u_bit_scan(&bound_cbufs);
7213
            struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
7214
            struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
7215

7216
            if (res->bo == iris_resource_bo(cbuf->buffer)) {
7217
               pipe_resource_reference(&surf_state->res, NULL);
7218
               ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_VS << s;
7219
            }
7220
         }
7221
      }
7222

7223
      if (res->bind_history & PIPE_BIND_SHADER_BUFFER) {
7224
         uint32_t bound_ssbos = shs->bound_ssbos;
7225
         while (bound_ssbos) {
7226
            const int i = u_bit_scan(&bound_ssbos);
7227
            struct pipe_shader_buffer *ssbo = &shs->ssbo[i];
7228

7229
            if (res->bo == iris_resource_bo(ssbo->buffer)) {
7230
               struct pipe_shader_buffer buf = {
7231
                  .buffer = &res->base.b,
7232
                  .buffer_offset = ssbo->buffer_offset,
7233
                  .buffer_size = ssbo->buffer_size,
7234
               };
7235
               iris_set_shader_buffers(ctx, p_stage, i, 1, &buf,
7236
                                       (shs->writable_ssbos >> i) & 1);
7237
            }
7238
         }
7239
      }
7240

7241
      if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) {
7242
         uint32_t bound_sampler_views = shs->bound_sampler_views;
7243
         while (bound_sampler_views) {
7244
            const int i = u_bit_scan(&bound_sampler_views);
7245
            struct iris_sampler_view *isv = shs->textures[i];
7246
            struct iris_bo *bo = isv->res->bo;
7247

7248
            if (update_surface_state_addrs(ice->state.surface_uploader,
7249
                                           &isv->surface_state, bo)) {
7250
               ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << s;
7251
            }
7252
         }
7253
      }
7254

7255
      if (res->bind_history & PIPE_BIND_SHADER_IMAGE) {
7256
         uint32_t bound_image_views = shs->bound_image_views;
7257
         while (bound_image_views) {
7258
            const int i = u_bit_scan(&bound_image_views);
7259
            struct iris_image_view *iv = &shs->image[i];
7260
            struct iris_bo *bo = iris_resource_bo(iv->base.resource);
7261

7262
            if (update_surface_state_addrs(ice->state.surface_uploader,
7263
                                           &iv->surface_state, bo)) {
7264
               ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << s;
7265
            }
7266
         }
7267
      }
7268
   }
7269
}
7270

7271
/* ------------------------------------------------------------------- */
7272

7273
/**
7274
 * Introduce a batch synchronization boundary, and update its cache coherency
7275
 * status to reflect the execution of a PIPE_CONTROL command with the
7276
 * specified flags.
7277
 */
7278
static void
7279
batch_mark_sync_for_pipe_control(struct iris_batch *batch, uint32_t flags)
7280
{
7281
   iris_batch_sync_boundary(batch);
7282

7283
   if ((flags & PIPE_CONTROL_CS_STALL)) {
7284
      if ((flags & PIPE_CONTROL_RENDER_TARGET_FLUSH))
7285
         iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_RENDER_WRITE);
7286

7287
      if ((flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH))
7288
         iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_DEPTH_WRITE);
7289

7290
      if ((flags & PIPE_CONTROL_FLUSH_ENABLE))
7291
         iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_OTHER_WRITE);
7292

7293
      if ((flags & (PIPE_CONTROL_CACHE_FLUSH_BITS |
7294
                    PIPE_CONTROL_STALL_AT_SCOREBOARD)))
7295
         iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_OTHER_READ);
7296
   }
7297

7298
   if ((flags & PIPE_CONTROL_RENDER_TARGET_FLUSH))
7299
      iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_RENDER_WRITE);
7300

7301
   if ((flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH))
7302
      iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_DEPTH_WRITE);
7303

7304
   if ((flags & PIPE_CONTROL_FLUSH_ENABLE))
7305
      iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_OTHER_WRITE);
7306

7307
   if ((flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) &&
7308
       (flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE))
7309
      iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_OTHER_READ);
7310
}
7311

7312
static unsigned
7313
flags_to_post_sync_op(uint32_t flags)
7314
{
7315
   if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
7316
      return WriteImmediateData;
7317

7318
   if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
7319
      return WritePSDepthCount;
7320

7321
   if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
7322
      return WriteTimestamp;
7323

7324
   return 0;
7325
}
7326

7327
/**
7328
 * Do the given flags have a Post Sync or LRI Post Sync operation?
7329
 */
7330
static enum pipe_control_flags
7331
get_post_sync_flags(enum pipe_control_flags flags)
7332
{
7333
   flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
7334
            PIPE_CONTROL_WRITE_DEPTH_COUNT |
7335
            PIPE_CONTROL_WRITE_TIMESTAMP |
7336
            PIPE_CONTROL_LRI_POST_SYNC_OP;
7337

7338
   /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
7339
    * "LRI Post Sync Operation".  So more than one bit set would be illegal.
7340
    */
7341
   assert(util_bitcount(flags) <= 1);
7342

7343
   return flags;
7344
}
7345

7346
#define IS_COMPUTE_PIPELINE(batch) (batch->name == IRIS_BATCH_COMPUTE)
7347

7348
/**
7349
 * Emit a series of PIPE_CONTROL commands, taking into account any
7350
 * workarounds necessary to actually accomplish the caller's request.
7351
 *
7352
 * Unless otherwise noted, spec quotations in this function come from:
7353
 *
7354
 * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
7355
 * Restrictions for PIPE_CONTROL.
7356
 *
7357
 * You should not use this function directly.  Use the helpers in
7358
 * iris_pipe_control.c instead, which may split the pipe control further.
7359
 */
7360
static void
7361
iris_emit_raw_pipe_control(struct iris_batch *batch,
7362
                           const char *reason,
7363
                           uint32_t flags,
7364
                           struct iris_bo *bo,
7365
                           uint32_t offset,
7366
                           uint64_t imm)
7367
{
7368
   UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
7369
   enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
7370
   enum pipe_control_flags non_lri_post_sync_flags =
7371
      post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
7372

7373
   /* Recursive PIPE_CONTROL workarounds --------------------------------
7374
    * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
7375
    *
7376
    * We do these first because we want to look at the original operation,
7377
    * rather than any workarounds we set.
7378
    */
7379
   if (GFX_VER == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
7380
      /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
7381
       * lists several workarounds:
7382
       *
7383
       *    "Project: SKL, KBL, BXT
7384
       *
7385
       *     If the VF Cache Invalidation Enable is set to a 1 in a
7386
       *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
7387
       *     sets to 0, with the VF Cache Invalidation Enable set to 0
7388
       *     needs to be sent prior to the PIPE_CONTROL with VF Cache
7389
       *     Invalidation Enable set to a 1."
7390
       */
7391
      iris_emit_raw_pipe_control(batch,
7392
                                 "workaround: recursive VF cache invalidate",
7393
                                 0, NULL, 0, 0);
7394
   }
7395

7396
   /* Wa_1409226450, Wait for EU to be idle before pipe control which
7397
    * invalidates the instruction cache
7398
    */
7399
   if (GFX_VER == 12 && (flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE)) {
7400
      iris_emit_raw_pipe_control(batch,
7401
                                 "workaround: CS stall before instruction "
7402
                                 "cache invalidate",
7403
                                 PIPE_CONTROL_CS_STALL |
7404
                                 PIPE_CONTROL_STALL_AT_SCOREBOARD, bo, offset,
7405
                                 imm);
7406
   }
7407

7408
   if ((GFX_VER == 9 || (GFX_VER == 12 && devinfo->revision == 0 /* A0*/)) &&
7409
        IS_COMPUTE_PIPELINE(batch) && post_sync_flags) {
7410
      /* Project: SKL / Argument: LRI Post Sync Operation [23]
7411
       *
7412
       * "PIPECONTROL command with “Command Streamer Stall Enable” must be
7413
       *  programmed prior to programming a PIPECONTROL command with "LRI
7414
       *  Post Sync Operation" in GPGPU mode of operation (i.e when
7415
       *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
7416
       *
7417
       * The same text exists a few rows below for Post Sync Op.
7418
       *
7419
       * On Gfx12 this is Wa_1607156449.
7420
       */
7421
      iris_emit_raw_pipe_control(batch,
7422
                                 "workaround: CS stall before gpgpu post-sync",
7423
                                 PIPE_CONTROL_CS_STALL, bo, offset, imm);
7424
   }
7425

7426
   /* "Flush Types" workarounds ---------------------------------------------
7427
    * We do these now because they may add post-sync operations or CS stalls.
7428
    */
7429

7430
   if (GFX_VER < 11 && flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
7431
      /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
7432
       *
7433
       * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
7434
       *  'Write PS Depth Count' or 'Write Timestamp'."
7435
       */
7436
      if (!bo) {
7437
         flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
7438
         post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
7439
         non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
7440
         bo = batch->screen->workaround_address.bo;
7441
         offset = batch->screen->workaround_address.offset;
7442
      }
7443
   }
7444

7445
   if (flags & PIPE_CONTROL_DEPTH_STALL) {
7446
      /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
7447
       *
7448
       *    "This bit must be DISABLED for operations other than writing
7449
       *     PS_DEPTH_COUNT."
7450
       *
7451
       * This seems like nonsense.  An Ivybridge workaround requires us to
7452
       * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
7453
       * operation.  Gfx8+ requires us to emit depth stalls and depth cache
7454
       * flushes together.  So, it's hard to imagine this means anything other
7455
       * than "we originally intended this to be used for PS_DEPTH_COUNT".
7456
       *
7457
       * We ignore the supposed restriction and do nothing.
7458
       */
7459
   }
7460

7461
   if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
7462
                PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
7463
      /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
7464
       *
7465
       *    "This bit must be DISABLED for End-of-pipe (Read) fences,
7466
       *     PS_DEPTH_COUNT or TIMESTAMP queries."
7467
       *
7468
       * TODO: Implement end-of-pipe checking.
7469
       */
7470
      assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
7471
                                  PIPE_CONTROL_WRITE_TIMESTAMP)));
7472
   }
7473

7474
   if (GFX_VER < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
7475
      /* From the PIPE_CONTROL instruction table, bit 1:
7476
       *
7477
       *    "This bit is ignored if Depth Stall Enable is set.
7478
       *     Further, the render cache is not flushed even if Write Cache
7479
       *     Flush Enable bit is set."
7480
       *
7481
       * We assert that the caller doesn't do this combination, to try and
7482
       * prevent mistakes.  It shouldn't hurt the GPU, though.
7483
       *
7484
       * We skip this check on Gfx11+ as the "Stall at Pixel Scoreboard"
7485
       * and "Render Target Flush" combo is explicitly required for BTI
7486
       * update workarounds.
7487
       */
7488
      assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
7489
                        PIPE_CONTROL_RENDER_TARGET_FLUSH)));
7490
   }
7491

7492
   /* PIPE_CONTROL page workarounds ------------------------------------- */
7493

7494
   if (GFX_VER <= 8 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
7495
      /* From the PIPE_CONTROL page itself:
7496
       *
7497
       *    "IVB, HSW, BDW
7498
       *     Restriction: Pipe_control with CS-stall bit set must be issued
7499
       *     before a pipe-control command that has the State Cache
7500
       *     Invalidate bit set."
7501
       */
7502
      flags |= PIPE_CONTROL_CS_STALL;
7503
   }
7504

7505
   if (flags & PIPE_CONTROL_FLUSH_LLC) {
7506
      /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
7507
       *
7508
       *    "Project: ALL
7509
       *     SW must always program Post-Sync Operation to "Write Immediate
7510
       *     Data" when Flush LLC is set."
7511
       *
7512
       * For now, we just require the caller to do it.
7513
       */
7514
      assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
7515
   }
7516

7517
   /* "Post-Sync Operation" workarounds -------------------------------- */
7518

7519
   /* Project: All / Argument: Global Snapshot Count Reset [19]
7520
    *
7521
    * "This bit must not be exercised on any product.
7522
    *  Requires stall bit ([20] of DW1) set."
7523
    *
7524
    * We don't use this, so we just assert that it isn't used.  The
7525
    * PIPE_CONTROL instruction page indicates that they intended this
7526
    * as a debug feature and don't think it is useful in production,
7527
    * but it may actually be usable, should we ever want to.
7528
    */
7529
   assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
7530

7531
   if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
7532
                PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
7533
      /* Project: All / Arguments:
7534
       *
7535
       * - Generic Media State Clear [16]
7536
       * - Indirect State Pointers Disable [16]
7537
       *
7538
       *    "Requires stall bit ([20] of DW1) set."
7539
       *
7540
       * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
7541
       * State Clear) says:
7542
       *
7543
       *    "PIPECONTROL command with “Command Streamer Stall Enable” must be
7544
       *     programmed prior to programming a PIPECONTROL command with "Media
7545
       *     State Clear" set in GPGPU mode of operation"
7546
       *
7547
       * This is a subset of the earlier rule, so there's nothing to do.
7548
       */
7549
      flags |= PIPE_CONTROL_CS_STALL;
7550
   }
7551

7552
   if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
7553
      /* Project: All / Argument: Store Data Index
7554
       *
7555
       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
7556
       *  than '0'."
7557
       *
7558
       * For now, we just assert that the caller does this.  We might want to
7559
       * automatically add a write to the workaround BO...
7560
       */
7561
      assert(non_lri_post_sync_flags != 0);
7562
   }
7563

7564
   if (flags & PIPE_CONTROL_SYNC_GFDT) {
7565
      /* Project: All / Argument: Sync GFDT
7566
       *
7567
       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
7568
       *  than '0' or 0x2520[13] must be set."
7569
       *
7570
       * For now, we just assert that the caller does this.
7571
       */
7572
      assert(non_lri_post_sync_flags != 0);
7573
   }
7574

7575
   if (flags & PIPE_CONTROL_TLB_INVALIDATE) {
7576
      /* Project: IVB+ / Argument: TLB inv
7577
       *
7578
       *    "Requires stall bit ([20] of DW1) set."
7579
       *
7580
       * Also, from the PIPE_CONTROL instruction table:
7581
       *
7582
       *    "Project: SKL+
7583
       *     Post Sync Operation or CS stall must be set to ensure a TLB
7584
       *     invalidation occurs.  Otherwise no cycle will occur to the TLB
7585
       *     cache to invalidate."
7586
       *
7587
       * This is not a subset of the earlier rule, so there's nothing to do.
7588
       */
7589
      flags |= PIPE_CONTROL_CS_STALL;
7590
   }
7591

7592
   if (GFX_VER == 9 && devinfo->gt == 4) {
7593
      /* TODO: The big Skylake GT4 post sync op workaround */
7594
   }
7595

7596
   /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
7597

7598
   if (IS_COMPUTE_PIPELINE(batch)) {
7599
      if (GFX_VER >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
7600
         /* Project: SKL+ / Argument: Tex Invalidate
7601
          * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
7602
          */
7603
         flags |= PIPE_CONTROL_CS_STALL;
7604
      }
7605

7606
      if (GFX_VER == 8 && (post_sync_flags ||
7607
                           (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
7608
                                     PIPE_CONTROL_DEPTH_STALL |
7609
                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
7610
                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
7611
                                     PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
7612
         /* Project: BDW / Arguments:
7613
          *
7614
          * - LRI Post Sync Operation   [23]
7615
          * - Post Sync Op              [15:14]
7616
          * - Notify En                 [8]
7617
          * - Depth Stall               [13]
7618
          * - Render Target Cache Flush [12]
7619
          * - Depth Cache Flush         [0]
7620
          * - DC Flush Enable           [5]
7621
          *
7622
          *    "Requires stall bit ([20] of DW) set for all GPGPU and Media
7623
          *     Workloads."
7624
          */
7625
         flags |= PIPE_CONTROL_CS_STALL;
7626

7627
         /* Also, from the PIPE_CONTROL instruction table, bit 20:
7628
          *
7629
          *    "Project: BDW
7630
          *     This bit must be always set when PIPE_CONTROL command is
7631
          *     programmed by GPGPU and MEDIA workloads, except for the cases
7632
          *     when only Read Only Cache Invalidation bits are set (State
7633
          *     Cache Invalidation Enable, Instruction cache Invalidation
7634
          *     Enable, Texture Cache Invalidation Enable, Constant Cache
7635
          *     Invalidation Enable). This is to WA FFDOP CG issue, this WA
7636
          *     need not implemented when FF_DOP_CG is disable via "Fixed
7637
          *     Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
7638
          *
7639
          * It sounds like we could avoid CS stalls in some cases, but we
7640
          * don't currently bother.  This list isn't exactly the list above,
7641
          * either...
7642
          */
7643
      }
7644
   }
7645

7646
   /* "Stall" workarounds ----------------------------------------------
7647
    * These have to come after the earlier ones because we may have added
7648
    * some additional CS stalls above.
7649
    */
7650

7651
   if (GFX_VER < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
7652
      /* Project: PRE-SKL, VLV, CHV
7653
       *
7654
       * "[All Stepping][All SKUs]:
7655
       *
7656
       *  One of the following must also be set:
7657
       *
7658
       *  - Render Target Cache Flush Enable ([12] of DW1)
7659
       *  - Depth Cache Flush Enable ([0] of DW1)
7660
       *  - Stall at Pixel Scoreboard ([1] of DW1)
7661
       *  - Depth Stall ([13] of DW1)
7662
       *  - Post-Sync Operation ([13] of DW1)
7663
       *  - DC Flush Enable ([5] of DW1)"
7664
       *
7665
       * If we don't already have one of those bits set, we choose to add
7666
       * "Stall at Pixel Scoreboard".  Some of the other bits require a
7667
       * CS stall as a workaround (see above), which would send us into
7668
       * an infinite recursion of PIPE_CONTROLs.  "Stall at Pixel Scoreboard"
7669
       * appears to be safe, so we choose that.
7670
       */
7671
      const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
7672
                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
7673
                               PIPE_CONTROL_WRITE_IMMEDIATE |
7674
                               PIPE_CONTROL_WRITE_DEPTH_COUNT |
7675
                               PIPE_CONTROL_WRITE_TIMESTAMP |
7676
                               PIPE_CONTROL_STALL_AT_SCOREBOARD |
7677
                               PIPE_CONTROL_DEPTH_STALL |
7678
                               PIPE_CONTROL_DATA_CACHE_FLUSH;
7679
      if (!(flags & wa_bits))
7680
         flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
7681
   }
7682

7683
   if (GFX_VER >= 12 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
7684
      /* Wa_1409600907:
7685
       *
7686
       * "PIPE_CONTROL with Depth Stall Enable bit must be set
7687
       * with any PIPE_CONTROL with Depth Flush Enable bit set.
7688
       */
7689
      flags |= PIPE_CONTROL_DEPTH_STALL;
7690
   }
7691

7692
   /* Emit --------------------------------------------------------------- */
7693

7694
   if (INTEL_DEBUG & DEBUG_PIPE_CONTROL) {
7695
      fprintf(stderr,
7696
              "  PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n",
7697
              (flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "",
7698
              (flags & PIPE_CONTROL_CS_STALL) ? "CS " : "",
7699
              (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "",
7700
              (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) ? "VF " : "",
7701
              (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) ? "RT " : "",
7702
              (flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE) ? "Const " : "",
7703
              (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "",
7704
              (flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "",
7705
              (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "",
7706
              (flags & PIPE_CONTROL_TILE_CACHE_FLUSH) ? "Tile " : "",
7707
              (flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "",
7708
              (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "",
7709
              (flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "",
7710
              (flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE) ? "Inst " : "",
7711
              (flags & PIPE_CONTROL_MEDIA_STATE_CLEAR) ? "MediaClear " : "",
7712
              (flags & PIPE_CONTROL_NOTIFY_ENABLE) ? "Notify " : "",
7713
              (flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) ?
7714
                 "SnapRes" : "",
7715
              (flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE) ?
7716
                  "ISPDis" : "",
7717
              (flags & PIPE_CONTROL_WRITE_IMMEDIATE) ? "WriteImm " : "",
7718
              (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) ? "WriteZCount " : "",
7719
              (flags & PIPE_CONTROL_WRITE_TIMESTAMP) ? "WriteTimestamp " : "",
7720
              (flags & PIPE_CONTROL_FLUSH_HDC) ? "HDC " : "",
7721
              imm, reason);
7722
   }
7723

7724
   batch_mark_sync_for_pipe_control(batch, flags);
7725
   iris_batch_sync_region_start(batch);
7726

7727
   iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) {
7728
#if GFX_VER >= 12
7729
      pc.TileCacheFlushEnable = flags & PIPE_CONTROL_TILE_CACHE_FLUSH;
7730
#endif
7731
#if GFX_VER >= 11
7732
      pc.HDCPipelineFlushEnable = flags & PIPE_CONTROL_FLUSH_HDC;
7733
#endif
7734
      pc.LRIPostSyncOperation = NoLRIOperation;
7735
      pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
7736
      pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
7737
      pc.StoreDataIndex = 0;
7738
      pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
7739
      pc.GlobalSnapshotCountReset =
7740
         flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
7741
      pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
7742
      pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
7743
      pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
7744
      pc.RenderTargetCacheFlushEnable =
7745
         flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
7746
      pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
7747
      pc.StateCacheInvalidationEnable =
7748
         flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
7749
      pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
7750
      pc.ConstantCacheInvalidationEnable =
7751
         flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
7752
      pc.PostSyncOperation = flags_to_post_sync_op(flags);
7753
      pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
7754
      pc.InstructionCacheInvalidateEnable =
7755
         flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
7756
      pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
7757
      pc.IndirectStatePointersDisable =
7758
         flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
7759
      pc.TextureCacheInvalidationEnable =
7760
         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
7761
      pc.Address = rw_bo(bo, offset, IRIS_DOMAIN_OTHER_WRITE);
7762
      pc.ImmediateData = imm;
7763
   }
7764

7765
   iris_batch_sync_region_end(batch);
7766
}
7767

7768
#if GFX_VER == 9
7769
/**
7770
 * Preemption on Gfx9 has to be enabled or disabled in various cases.
7771
 *
7772
 * See these workarounds for preemption:
7773
 *  - WaDisableMidObjectPreemptionForGSLineStripAdj
7774
 *  - WaDisableMidObjectPreemptionForTrifanOrPolygon
7775
 *  - WaDisableMidObjectPreemptionForLineLoop
7776
 *  - WA#0798
7777
 *
7778
 * We don't put this in the vtable because it's only used on Gfx9.
7779
 */
7780
void
7781
gfx9_toggle_preemption(struct iris_context *ice,
7782
                       struct iris_batch *batch,
7783
                       const struct pipe_draw_info *draw)
7784
{
7785
   struct iris_genx_state *genx = ice->state.genx;
7786
   bool object_preemption = true;
7787

7788
   /* WaDisableMidObjectPreemptionForGSLineStripAdj
7789
    *
7790
    *    "WA: Disable mid-draw preemption when draw-call is a linestrip_adj
7791
    *     and GS is enabled."
7792
    */
7793
   if (draw->mode == PIPE_PRIM_LINE_STRIP_ADJACENCY &&
7794
       ice->shaders.prog[MESA_SHADER_GEOMETRY])
7795
      object_preemption = false;
7796

7797
   /* WaDisableMidObjectPreemptionForTrifanOrPolygon
7798
    *
7799
    *    "TriFan miscompare in Execlist Preemption test. Cut index that is
7800
    *     on a previous context. End the previous, the resume another context
7801
    *     with a tri-fan or polygon, and the vertex count is corrupted. If we
7802
    *     prempt again we will cause corruption.
7803
    *
7804
    *     WA: Disable mid-draw preemption when draw-call has a tri-fan."
7805
    */
7806
   if (draw->mode == PIPE_PRIM_TRIANGLE_FAN)
7807
      object_preemption = false;
7808

7809
   /* WaDisableMidObjectPreemptionForLineLoop
7810
    *
7811
    *    "VF Stats Counters Missing a vertex when preemption enabled.
7812
    *
7813
    *     WA: Disable mid-draw preemption when the draw uses a lineloop
7814
    *     topology."
7815
    */
7816
   if (draw->mode == PIPE_PRIM_LINE_LOOP)
7817
      object_preemption = false;
7818

7819
   /* WA#0798
7820
    *
7821
    *    "VF is corrupting GAFS data when preempted on an instance boundary
7822
    *     and replayed with instancing enabled.
7823
    *
7824
    *     WA: Disable preemption when using instanceing."
7825
    */
7826
   if (draw->instance_count > 1)
7827
      object_preemption = false;
7828

7829
   if (genx->object_preemption != object_preemption) {
7830
      iris_enable_obj_preemption(batch, object_preemption);
7831
      genx->object_preemption = object_preemption;
7832
   }
7833
}
7834
#endif
7835

7836
static void
7837
iris_lost_genx_state(struct iris_context *ice, struct iris_batch *batch)
7838
{
7839
   struct iris_genx_state *genx = ice->state.genx;
7840

7841
   memset(genx->last_index_buffer, 0, sizeof(genx->last_index_buffer));
7842
}
7843

7844
static void
7845
iris_emit_mi_report_perf_count(struct iris_batch *batch,
7846
                               struct iris_bo *bo,
7847
                               uint32_t offset_in_bytes,
7848
                               uint32_t report_id)
7849
{
7850
   iris_batch_sync_region_start(batch);
7851
   iris_emit_cmd(batch, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
7852
      mi_rpc.MemoryAddress = rw_bo(bo, offset_in_bytes,
7853
                                   IRIS_DOMAIN_OTHER_WRITE);
7854
      mi_rpc.ReportID = report_id;
7855
   }
7856
   iris_batch_sync_region_end(batch);
7857
}
7858

7859
/**
7860
 * Update the pixel hashing modes that determine the balancing of PS threads
7861
 * across subslices and slices.
7862
 *
7863
 * \param width Width bound of the rendering area (already scaled down if \p
7864
 *              scale is greater than 1).
7865
 * \param height Height bound of the rendering area (already scaled down if \p
7866
 *               scale is greater than 1).
7867
 * \param scale The number of framebuffer samples that could potentially be
7868
 *              affected by an individual channel of the PS thread.  This is
7869
 *              typically one for single-sampled rendering, but for operations
7870
 *              like CCS resolves and fast clears a single PS invocation may
7871
 *              update a huge number of pixels, in which case a finer
7872
 *              balancing is desirable in order to maximally utilize the
7873
 *              bandwidth available.  UINT_MAX can be used as shorthand for
7874
 *              "finest hashing mode available".
7875
 */
7876
void
7877
genX(emit_hashing_mode)(struct iris_context *ice, struct iris_batch *batch,
7878
                        unsigned width, unsigned height, unsigned scale)
7879
{
7880
#if GFX_VER == 9
7881
   const struct intel_device_info *devinfo = &batch->screen->devinfo;
7882
   const unsigned slice_hashing[] = {
7883
      /* Because all Gfx9 platforms with more than one slice require
7884
       * three-way subslice hashing, a single "normal" 16x16 slice hashing
7885
       * block is guaranteed to suffer from substantial imbalance, with one
7886
       * subslice receiving twice as much work as the other two in the
7887
       * slice.
7888
       *
7889
       * The performance impact of that would be particularly severe when
7890
       * three-way hashing is also in use for slice balancing (which is the
7891
       * case for all Gfx9 GT4 platforms), because one of the slices
7892
       * receives one every three 16x16 blocks in either direction, which
7893
       * is roughly the periodicity of the underlying subslice imbalance
7894
       * pattern ("roughly" because in reality the hardware's
7895
       * implementation of three-way hashing doesn't do exact modulo 3
7896
       * arithmetic, which somewhat decreases the magnitude of this effect
7897
       * in practice).  This leads to a systematic subslice imbalance
7898
       * within that slice regardless of the size of the primitive.  The
7899
       * 32x32 hashing mode guarantees that the subslice imbalance within a
7900
       * single slice hashing block is minimal, largely eliminating this
7901
       * effect.
7902
       */
7903
      _32x32,
7904
      /* Finest slice hashing mode available. */
7905
      NORMAL
7906
   };
7907
   const unsigned subslice_hashing[] = {
7908
      /* 16x16 would provide a slight cache locality benefit especially
7909
       * visible in the sampler L1 cache efficiency of low-bandwidth
7910
       * non-LLC platforms, but it comes at the cost of greater subslice
7911
       * imbalance for primitives of dimensions approximately intermediate
7912
       * between 16x4 and 16x16.
7913
       */
7914
      _16x4,
7915
      /* Finest subslice hashing mode available. */
7916
      _8x4
7917
   };
7918
   /* Dimensions of the smallest hashing block of a given hashing mode.  If
7919
    * the rendering area is smaller than this there can't possibly be any
7920
    * benefit from switching to this mode, so we optimize out the
7921
    * transition.
7922
    */
7923
   const unsigned min_size[][2] = {
7924
      { 16, 4 },
7925
      { 8, 4 }
7926
   };
7927
   const unsigned idx = scale > 1;
7928

7929
   if (width > min_size[idx][0] || height > min_size[idx][1]) {
7930
      iris_emit_raw_pipe_control(batch,
7931
                                 "workaround: CS stall before GT_MODE LRI",
7932
                                 PIPE_CONTROL_STALL_AT_SCOREBOARD |
7933
                                 PIPE_CONTROL_CS_STALL,
7934
                                 NULL, 0, 0);
7935

7936
      iris_emit_reg(batch, GENX(GT_MODE), reg) {
7937
         reg.SliceHashing = (devinfo->num_slices > 1 ? slice_hashing[idx] : 0);
7938
         reg.SliceHashingMask = (devinfo->num_slices > 1 ? -1 : 0);
7939
         reg.SubsliceHashing = subslice_hashing[idx];
7940
         reg.SubsliceHashingMask = -1;
7941
      };
7942

7943
      ice->state.current_hash_scale = scale;
7944
   }
7945
#endif
7946
}
7947

7948
static void
7949
iris_set_frontend_noop(struct pipe_context *ctx, bool enable)
7950
{
7951
   struct iris_context *ice = (struct iris_context *) ctx;
7952

7953
   if (iris_batch_prepare_noop(&ice->batches[IRIS_BATCH_RENDER], enable)) {
7954
      ice->state.dirty |= IRIS_ALL_DIRTY_FOR_RENDER;
7955
      ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_FOR_RENDER;
7956
   }
7957

7958
   if (iris_batch_prepare_noop(&ice->batches[IRIS_BATCH_COMPUTE], enable)) {
7959
      ice->state.dirty |= IRIS_ALL_DIRTY_FOR_COMPUTE;
7960
      ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE;
7961
   }
7962
}
7963

7964
void
7965
genX(init_screen_state)(struct iris_screen *screen)
7966
{
7967
   assert(screen->devinfo.verx10 == GFX_VERx10);
7968
   screen->vtbl.destroy_state = iris_destroy_state;
7969
   screen->vtbl.init_render_context = iris_init_render_context;
7970
   screen->vtbl.init_compute_context = iris_init_compute_context;
7971
   screen->vtbl.upload_render_state = iris_upload_render_state;
7972
   screen->vtbl.update_surface_base_address = iris_update_surface_base_address;
7973
   screen->vtbl.upload_compute_state = iris_upload_compute_state;
7974
   screen->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
7975
   screen->vtbl.emit_mi_report_perf_count = iris_emit_mi_report_perf_count;
7976
   screen->vtbl.rebind_buffer = iris_rebind_buffer;
7977
   screen->vtbl.load_register_reg32 = iris_load_register_reg32;
7978
   screen->vtbl.load_register_reg64 = iris_load_register_reg64;
7979
   screen->vtbl.load_register_imm32 = iris_load_register_imm32;
7980
   screen->vtbl.load_register_imm64 = iris_load_register_imm64;
7981
   screen->vtbl.load_register_mem32 = iris_load_register_mem32;
7982
   screen->vtbl.load_register_mem64 = iris_load_register_mem64;
7983
   screen->vtbl.store_register_mem32 = iris_store_register_mem32;
7984
   screen->vtbl.store_register_mem64 = iris_store_register_mem64;
7985
   screen->vtbl.store_data_imm32 = iris_store_data_imm32;
7986
   screen->vtbl.store_data_imm64 = iris_store_data_imm64;
7987
   screen->vtbl.copy_mem_mem = iris_copy_mem_mem;
7988
   screen->vtbl.derived_program_state_size = iris_derived_program_state_size;
7989
   screen->vtbl.store_derived_program_state = iris_store_derived_program_state;
7990
   screen->vtbl.create_so_decl_list = iris_create_so_decl_list;
7991
   screen->vtbl.populate_vs_key = iris_populate_vs_key;
7992
   screen->vtbl.populate_tcs_key = iris_populate_tcs_key;
7993
   screen->vtbl.populate_tes_key = iris_populate_tes_key;
7994
   screen->vtbl.populate_gs_key = iris_populate_gs_key;
7995
   screen->vtbl.populate_fs_key = iris_populate_fs_key;
7996
   screen->vtbl.populate_cs_key = iris_populate_cs_key;
7997
   screen->vtbl.lost_genx_state = iris_lost_genx_state;
7998
}
7999

8000
void
8001
genX(init_state)(struct iris_context *ice)
8002
{
8003
   struct pipe_context *ctx = &ice->ctx;
8004
   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
8005

8006
   ctx->create_blend_state = iris_create_blend_state;
8007
   ctx->create_depth_stencil_alpha_state = iris_create_zsa_state;
8008
   ctx->create_rasterizer_state = iris_create_rasterizer_state;
8009
   ctx->create_sampler_state = iris_create_sampler_state;
8010
   ctx->create_sampler_view = iris_create_sampler_view;
8011
   ctx->create_surface = iris_create_surface;
8012
   ctx->create_vertex_elements_state = iris_create_vertex_elements;
8013
   ctx->bind_blend_state = iris_bind_blend_state;
8014
   ctx->bind_depth_stencil_alpha_state = iris_bind_zsa_state;
8015
   ctx->bind_sampler_states = iris_bind_sampler_states;
8016
   ctx->bind_rasterizer_state = iris_bind_rasterizer_state;
8017
   ctx->bind_vertex_elements_state = iris_bind_vertex_elements_state;
8018
   ctx->delete_blend_state = iris_delete_state;
8019
   ctx->delete_depth_stencil_alpha_state = iris_delete_state;
8020
   ctx->delete_rasterizer_state = iris_delete_state;
8021
   ctx->delete_sampler_state = iris_delete_state;
8022
   ctx->delete_vertex_elements_state = iris_delete_state;
8023
   ctx->set_blend_color = iris_set_blend_color;
8024
   ctx->set_clip_state = iris_set_clip_state;
8025
   ctx->set_constant_buffer = iris_set_constant_buffer;
8026
   ctx->set_shader_buffers = iris_set_shader_buffers;
8027
   ctx->set_shader_images = iris_set_shader_images;
8028
   ctx->set_sampler_views = iris_set_sampler_views;
8029
   ctx->set_compute_resources = iris_set_compute_resources;
8030
   ctx->set_global_binding = iris_set_global_binding;
8031
   ctx->set_tess_state = iris_set_tess_state;
8032
   ctx->set_framebuffer_state = iris_set_framebuffer_state;
8033
   ctx->set_polygon_stipple = iris_set_polygon_stipple;
8034
   ctx->set_sample_mask = iris_set_sample_mask;
8035
   ctx->set_scissor_states = iris_set_scissor_states;
8036
   ctx->set_stencil_ref = iris_set_stencil_ref;
8037
   ctx->set_vertex_buffers = iris_set_vertex_buffers;
8038
   ctx->set_viewport_states = iris_set_viewport_states;
8039
   ctx->sampler_view_destroy = iris_sampler_view_destroy;
8040
   ctx->surface_destroy = iris_surface_destroy;
8041
   ctx->draw_vbo = iris_draw_vbo;
8042
   ctx->launch_grid = iris_launch_grid;
8043
   ctx->create_stream_output_target = iris_create_stream_output_target;
8044
   ctx->stream_output_target_destroy = iris_stream_output_target_destroy;
8045
   ctx->set_stream_output_targets = iris_set_stream_output_targets;
8046
   ctx->set_frontend_noop = iris_set_frontend_noop;
8047

8048
   ice->state.dirty = ~0ull;
8049
   ice->state.stage_dirty = ~0ull;
8050

8051
   ice->state.statistics_counters_enabled = true;
8052

8053
   ice->state.sample_mask = 0xffff;
8054
   ice->state.num_viewports = 1;
8055
   ice->state.prim_mode = PIPE_PRIM_MAX;
8056
   ice->state.genx = calloc(1, sizeof(struct iris_genx_state));
8057
   ice->draw.derived_params.drawid = -1;
8058

8059
   /* Make a 1x1x1 null surface for unbound textures */
8060
   void *null_surf_map =
8061
      upload_state(ice->state.surface_uploader, &ice->state.unbound_tex,
8062
                   4 * GENX(RENDER_SURFACE_STATE_length), 64);
8063
   isl_null_fill_state(&screen->isl_dev, null_surf_map,
8064
                       .size = isl_extent3d(1, 1, 1));
8065
   ice->state.unbound_tex.offset +=
8066
      iris_bo_offset_from_base_address(iris_resource_bo(ice->state.unbound_tex.res));
8067

8068
   /* Default all scissor rectangles to be empty regions. */
8069
   for (int i = 0; i < IRIS_MAX_VIEWPORTS; i++) {
8070
      ice->state.scissors[i] = (struct pipe_scissor_state) {
8071
         .minx = 1, .maxx = 0, .miny = 1, .maxy = 0,
8072
      };
8073
   }
8074
}
8075

8076
Product

Resources

Company