Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/vulkan/anv_private.h
4547 views
1
/*
2
* Copyright © 2015 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#ifndef ANV_PRIVATE_H
25
#define ANV_PRIVATE_H
26
27
#include <stdlib.h>
28
#include <stdio.h>
29
#include <stdbool.h>
30
#include <pthread.h>
31
#include <assert.h>
32
#include <stdint.h>
33
#include "drm-uapi/i915_drm.h"
34
35
#ifdef HAVE_VALGRIND
36
#include <valgrind.h>
37
#include <memcheck.h>
38
#define VG(x) x
39
#ifndef NDEBUG
40
#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
41
#endif
42
#else
43
#define VG(x) ((void)0)
44
#endif
45
46
#include "common/intel_clflush.h"
47
#include "common/intel_decoder.h"
48
#include "common/intel_gem.h"
49
#include "common/intel_l3_config.h"
50
#include "common/intel_measure.h"
51
#include "dev/intel_device_info.h"
52
#include "blorp/blorp.h"
53
#include "compiler/brw_compiler.h"
54
#include "compiler/brw_rt.h"
55
#include "util/bitset.h"
56
#include "util/bitscan.h"
57
#include "util/macros.h"
58
#include "util/hash_table.h"
59
#include "util/list.h"
60
#include "util/sparse_array.h"
61
#include "util/u_atomic.h"
62
#include "util/u_vector.h"
63
#include "util/u_math.h"
64
#include "util/vma.h"
65
#include "util/xmlconfig.h"
66
#include "vk_alloc.h"
67
#include "vk_debug_report.h"
68
#include "vk_device.h"
69
#include "vk_instance.h"
70
#include "vk_physical_device.h"
71
#include "vk_shader_module.h"
72
#include "vk_util.h"
73
74
/* Pre-declarations needed for WSI entrypoints */
75
struct wl_surface;
76
struct wl_display;
77
typedef struct xcb_connection_t xcb_connection_t;
78
typedef uint32_t xcb_visualid_t;
79
typedef uint32_t xcb_window_t;
80
81
struct anv_batch;
82
struct anv_buffer;
83
struct anv_buffer_view;
84
struct anv_image_view;
85
struct anv_acceleration_structure;
86
struct anv_instance;
87
88
struct intel_aux_map_context;
89
struct intel_perf_config;
90
struct intel_perf_counter_pass;
91
struct intel_perf_query_result;
92
93
#include <vulkan/vulkan.h>
94
#include <vulkan/vk_icd.h>
95
96
#include "anv_android.h"
97
#include "anv_entrypoints.h"
98
#include "isl/isl.h"
99
100
#include "dev/intel_debug.h"
101
#undef MESA_LOG_TAG
102
#define MESA_LOG_TAG "MESA-INTEL"
103
#include "util/log.h"
104
#include "wsi_common.h"
105
106
#define NSEC_PER_SEC 1000000000ull
107
108
/* anv Virtual Memory Layout
109
* =========================
110
*
111
* When the anv driver is determining the virtual graphics addresses of memory
112
* objects itself using the softpin mechanism, the following memory ranges
113
* will be used.
114
*
115
* Three special considerations to notice:
116
*
117
* (1) the dynamic state pool is located within the same 4 GiB as the low
118
* heap. This is to work around a VF cache issue described in a comment in
119
* anv_physical_device_init_heaps.
120
*
121
* (2) the binding table pool is located at lower addresses than the surface
122
* state pool, within a 4 GiB range. This allows surface state base addresses
123
* to cover both binding tables (16 bit offsets) and surface states (32 bit
124
* offsets).
125
*
126
* (3) the last 4 GiB of the address space is withheld from the high
127
* heap. Various hardware units will read past the end of an object for
128
* various reasons. This healthy margin prevents reads from wrapping around
129
* 48-bit addresses.
130
*/
131
#define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000010000ULL /* 64 KiB */
132
#define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL
133
#define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */
134
#define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL
135
#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */
136
#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL
137
#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */
138
#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL
139
#define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */
140
#define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL
141
#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
142
#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
143
#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
144
#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x0002bfffffffULL
145
#define HIGH_HEAP_MIN_ADDRESS 0x0002c0000000ULL /* 11 GiB */
146
147
#define GENERAL_STATE_POOL_SIZE \
148
(GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)
149
#define LOW_HEAP_SIZE \
150
(LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
151
#define DYNAMIC_STATE_POOL_SIZE \
152
(DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
153
#define BINDING_TABLE_POOL_SIZE \
154
(BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
155
#define SURFACE_STATE_POOL_SIZE \
156
(SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
157
#define INSTRUCTION_STATE_POOL_SIZE \
158
(INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
159
#define CLIENT_VISIBLE_HEAP_SIZE \
160
(CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
161
162
/* Allowing different clear colors requires us to perform a depth resolve at
163
* the end of certain render passes. This is because while slow clears store
164
* the clear color in the HiZ buffer, fast clears (without a resolve) don't.
165
* See the PRMs for examples describing when additional resolves would be
166
* necessary. To enable fast clears without requiring extra resolves, we set
167
* the clear value to a globally-defined one. We could allow different values
168
* if the user doesn't expect coherent data during or after a render passes
169
* (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
170
* don't seem to exist yet. In almost all Vulkan applications tested thus far,
171
* 1.0f seems to be the only value used. The only application that doesn't set
172
* this value does so through the usage of an seemingly uninitialized clear
173
* value.
174
*/
175
#define ANV_HZ_FC_VAL 1.0f
176
177
#define MAX_VBS 28
178
#define MAX_XFB_BUFFERS 4
179
#define MAX_XFB_STREAMS 4
180
#define MAX_SETS 8
181
#define MAX_RTS 8
182
#define MAX_VIEWPORTS 16
183
#define MAX_SCISSORS 16
184
#define MAX_PUSH_CONSTANTS_SIZE 128
185
#define MAX_DYNAMIC_BUFFERS 16
186
#define MAX_IMAGES 64
187
#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
188
#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
189
#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
190
/* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
191
* use 64 here to avoid cache issues. This could most likely bring it back to
192
* 32 if we had different virtual addresses for the different views on a given
193
* GEM object.
194
*/
195
#define ANV_UBO_ALIGNMENT 64
196
#define ANV_SSBO_ALIGNMENT 4
197
#define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
198
#define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
199
#define MAX_SAMPLE_LOCATIONS 16
200
201
/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
202
*
203
* "The surface state model is used when a Binding Table Index (specified
204
* in the message descriptor) of less than 240 is specified. In this model,
205
* the Binding Table Index is used to index into the binding table, and the
206
* binding table entry contains a pointer to the SURFACE_STATE."
207
*
208
* Binding table values above 240 are used for various things in the hardware
209
* such as stateless, stateless with incoherent cache, SLM, and bindless.
210
*/
211
#define MAX_BINDING_TABLE_SIZE 240
212
213
/* The kernel relocation API has a limitation of a 32-bit delta value
214
* applied to the address before it is written which, in spite of it being
215
* unsigned, is treated as signed . Because of the way that this maps to
216
* the Vulkan API, we cannot handle an offset into a buffer that does not
217
* fit into a signed 32 bits. The only mechanism we have for dealing with
218
* this at the moment is to limit all VkDeviceMemory objects to a maximum
219
* of 2GB each. The Vulkan spec allows us to do this:
220
*
221
* "Some platforms may have a limit on the maximum size of a single
222
* allocation. For example, certain systems may fail to create
223
* allocations with a size greater than or equal to 4GB. Such a limit is
224
* implementation-dependent, and if such a failure occurs then the error
225
* VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
226
*
227
* We don't use vk_error here because it's not an error so much as an
228
* indication to the application that the allocation is too large.
229
*/
230
#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
231
232
#define ANV_SVGS_VB_INDEX MAX_VBS
233
#define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
234
235
/* We reserve this MI ALU register for the purpose of handling predication.
236
* Other code which uses the MI ALU should leave it alone.
237
*/
238
#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
239
240
/* We reserve this MI ALU register to pass around an offset computed from
241
* VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
242
* Other code which uses the MI ALU should leave it alone.
243
*/
244
#define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
245
246
/* For gfx12 we set the streamout buffers using 4 separate commands
247
* (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
248
* of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
249
* 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
250
* 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
251
* SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
252
* 3DSTATE_SO_BUFFER_INDEX_0.
253
*/
254
#define SO_BUFFER_INDEX_0_CMD 0x60
255
#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
256
257
static inline uint32_t
258
align_down_npot_u32(uint32_t v, uint32_t a)
259
{
260
return v - (v % a);
261
}
262
263
static inline uint32_t
264
align_down_u32(uint32_t v, uint32_t a)
265
{
266
assert(a != 0 && a == (a & -a));
267
return v & ~(a - 1);
268
}
269
270
static inline uint32_t
271
align_u32(uint32_t v, uint32_t a)
272
{
273
assert(a != 0 && a == (a & -a));
274
return align_down_u32(v + a - 1, a);
275
}
276
277
static inline uint64_t
278
align_down_u64(uint64_t v, uint64_t a)
279
{
280
assert(a != 0 && a == (a & -a));
281
return v & ~(a - 1);
282
}
283
284
static inline uint64_t
285
align_u64(uint64_t v, uint64_t a)
286
{
287
return align_down_u64(v + a - 1, a);
288
}
289
290
static inline int32_t
291
align_i32(int32_t v, int32_t a)
292
{
293
assert(a != 0 && a == (a & -a));
294
return (v + a - 1) & ~(a - 1);
295
}
296
297
/** Alignment must be a power of 2. */
298
static inline bool
299
anv_is_aligned(uintmax_t n, uintmax_t a)
300
{
301
assert(a == (a & -a));
302
return (n & (a - 1)) == 0;
303
}
304
305
static inline uint32_t
306
anv_minify(uint32_t n, uint32_t levels)
307
{
308
if (unlikely(n == 0))
309
return 0;
310
else
311
return MAX2(n >> levels, 1);
312
}
313
314
static inline float
315
anv_clamp_f(float f, float min, float max)
316
{
317
assert(min < max);
318
319
if (f > max)
320
return max;
321
else if (f < min)
322
return min;
323
else
324
return f;
325
}
326
327
static inline bool
328
anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
329
{
330
if (*inout_mask & clear_mask) {
331
*inout_mask &= ~clear_mask;
332
return true;
333
} else {
334
return false;
335
}
336
}
337
338
static inline union isl_color_value
339
vk_to_isl_color(VkClearColorValue color)
340
{
341
return (union isl_color_value) {
342
.u32 = {
343
color.uint32[0],
344
color.uint32[1],
345
color.uint32[2],
346
color.uint32[3],
347
},
348
};
349
}
350
351
static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
352
{
353
uintptr_t mask = (1ull << bits) - 1;
354
*flags = ptr & mask;
355
return (void *) (ptr & ~mask);
356
}
357
358
static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
359
{
360
uintptr_t value = (uintptr_t) ptr;
361
uintptr_t mask = (1ull << bits) - 1;
362
return value | (mask & flags);
363
}
364
365
/* Whenever we generate an error, pass it through this function. Useful for
366
* debugging, where we can break on it. Only call at error site, not when
367
* propagating errors. Might be useful to plug in a stack trace here.
368
*/
369
370
VkResult __vk_errorv(struct anv_instance *instance,
371
const struct vk_object_base *object, VkResult error,
372
const char *file, int line, const char *format,
373
va_list args);
374
375
VkResult __vk_errorf(struct anv_instance *instance,
376
const struct vk_object_base *object, VkResult error,
377
const char *file, int line, const char *format, ...)
378
anv_printflike(6, 7);
379
380
#ifdef DEBUG
381
#define vk_error(error) __vk_errorf(NULL, NULL, error, __FILE__, __LINE__, NULL)
382
#define vk_errorfi(instance, obj, error, format, ...)\
383
__vk_errorf(instance, obj, error,\
384
__FILE__, __LINE__, format, ## __VA_ARGS__)
385
#define vk_errorf(device, obj, error, format, ...)\
386
vk_errorfi(anv_device_instance_or_null(device),\
387
obj, error, format, ## __VA_ARGS__)
388
#else
389
390
static inline VkResult __dummy_vk_error(VkResult error, UNUSED const void *ignored)
391
{
392
return error;
393
}
394
395
#define vk_error(error) __dummy_vk_error(error, NULL)
396
#define vk_errorfi(instance, obj, error, format, ...) __dummy_vk_error(error, instance)
397
#define vk_errorf(device, obj, error, format, ...) __dummy_vk_error(error, device)
398
#endif
399
400
/**
401
* Warn on ignored extension structs.
402
*
403
* The Vulkan spec requires us to ignore unsupported or unknown structs in
404
* a pNext chain. In debug mode, emitting warnings for ignored structs may
405
* help us discover structs that we should not have ignored.
406
*
407
*
408
* From the Vulkan 1.0.38 spec:
409
*
410
* Any component of the implementation (the loader, any enabled layers,
411
* and drivers) must skip over, without processing (other than reading the
412
* sType and pNext members) any chained structures with sType values not
413
* defined by extensions supported by that component.
414
*/
415
#define anv_debug_ignored_stype(sType) \
416
mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
417
418
void __anv_perf_warn(struct anv_device *device,
419
const struct vk_object_base *object,
420
const char *file, int line, const char *format, ...)
421
anv_printflike(5, 6);
422
void anv_loge(const char *format, ...) anv_printflike(1, 2);
423
void anv_loge_v(const char *format, va_list va);
424
425
/**
426
* Print a FINISHME message, including its source location.
427
*/
428
#define anv_finishme(format, ...) \
429
do { \
430
static bool reported = false; \
431
if (!reported) { \
432
mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
433
##__VA_ARGS__); \
434
reported = true; \
435
} \
436
} while (0)
437
438
/**
439
* Print a perf warning message. Set INTEL_DEBUG=perf to see these.
440
*/
441
#define anv_perf_warn(instance, obj, format, ...) \
442
do { \
443
static bool reported = false; \
444
if (!reported && (INTEL_DEBUG & DEBUG_PERF)) { \
445
__anv_perf_warn(instance, obj, __FILE__, __LINE__,\
446
format, ##__VA_ARGS__); \
447
reported = true; \
448
} \
449
} while (0)
450
451
/* A non-fatal assert. Useful for debugging. */
452
#ifdef DEBUG
453
#define anv_assert(x) ({ \
454
if (unlikely(!(x))) \
455
mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
456
})
457
#else
458
#define anv_assert(x)
459
#endif
460
461
struct anv_bo {
462
const char *name;
463
464
uint32_t gem_handle;
465
466
uint32_t refcount;
467
468
/* Index into the current validation list. This is used by the
469
* validation list building alrogithm to track which buffers are already
470
* in the validation list so that we can ensure uniqueness.
471
*/
472
uint32_t index;
473
474
/* Index for use with util_sparse_array_free_list */
475
uint32_t free_index;
476
477
/* Last known offset. This value is provided by the kernel when we
478
* execbuf and is used as the presumed offset for the next bunch of
479
* relocations.
480
*/
481
uint64_t offset;
482
483
/** Size of the buffer not including implicit aux */
484
uint64_t size;
485
486
/* Map for internally mapped BOs.
487
*
488
* If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
489
*/
490
void *map;
491
492
/** Size of the implicit CCS range at the end of the buffer
493
*
494
* On Gfx12, CCS data is always a direct 1/256 scale-down. A single 64K
495
* page of main surface data maps to a 256B chunk of CCS data and that
496
* mapping is provided on TGL-LP by the AUX table which maps virtual memory
497
* addresses in the main surface to virtual memory addresses for CCS data.
498
*
499
* Because we can't change these maps around easily and because Vulkan
500
* allows two VkImages to be bound to overlapping memory regions (as long
501
* as the app is careful), it's not feasible to make this mapping part of
502
* the image. (On Gfx11 and earlier, the mapping was provided via
503
* RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
504
* Instead, we attach the CCS data directly to the buffer object and setup
505
* the AUX table mapping at BO creation time.
506
*
507
* This field is for internal tracking use by the BO allocator only and
508
* should not be touched by other parts of the code. If something wants to
509
* know if a BO has implicit CCS data, it should instead look at the
510
* has_implicit_ccs boolean below.
511
*
512
* This data is not included in maps of this buffer.
513
*/
514
uint32_t _ccs_size;
515
516
/** Flags to pass to the kernel through drm_i915_exec_object2::flags */
517
uint32_t flags;
518
519
/** True if this BO may be shared with other processes */
520
bool is_external:1;
521
522
/** True if this BO is a wrapper
523
*
524
* When set to true, none of the fields in this BO are meaningful except
525
* for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
526
* See also anv_bo_unwrap(). Wrapper BOs are not allowed when use_softpin
527
* is set in the physical device.
528
*/
529
bool is_wrapper:1;
530
531
/** See also ANV_BO_ALLOC_FIXED_ADDRESS */
532
bool has_fixed_address:1;
533
534
/** True if this BO wraps a host pointer */
535
bool from_host_ptr:1;
536
537
/** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
538
bool has_client_visible_address:1;
539
540
/** True if this BO has implicit CCS data attached to it */
541
bool has_implicit_ccs:1;
542
};
543
544
static inline struct anv_bo *
545
anv_bo_ref(struct anv_bo *bo)
546
{
547
p_atomic_inc(&bo->refcount);
548
return bo;
549
}
550
551
static inline struct anv_bo *
552
anv_bo_unwrap(struct anv_bo *bo)
553
{
554
while (bo->is_wrapper)
555
bo = bo->map;
556
return bo;
557
}
558
559
/* Represents a lock-free linked list of "free" things. This is used by
560
* both the block pool and the state pools. Unfortunately, in order to
561
* solve the ABA problem, we can't use a single uint32_t head.
562
*/
563
union anv_free_list {
564
struct {
565
uint32_t offset;
566
567
/* A simple count that is incremented every time the head changes. */
568
uint32_t count;
569
};
570
/* Make sure it's aligned to 64 bits. This will make atomic operations
571
* faster on 32 bit platforms.
572
*/
573
uint64_t u64 __attribute__ ((aligned (8)));
574
};
575
576
#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
577
578
struct anv_block_state {
579
union {
580
struct {
581
uint32_t next;
582
uint32_t end;
583
};
584
/* Make sure it's aligned to 64 bits. This will make atomic operations
585
* faster on 32 bit platforms.
586
*/
587
uint64_t u64 __attribute__ ((aligned (8)));
588
};
589
};
590
591
#define anv_block_pool_foreach_bo(bo, pool) \
592
for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
593
_pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
594
_pp_bo++)
595
596
#define ANV_MAX_BLOCK_POOL_BOS 20
597
598
struct anv_block_pool {
599
const char *name;
600
601
struct anv_device *device;
602
bool use_softpin;
603
604
/* Wrapper BO for use in relocation lists. This BO is simply a wrapper
605
* around the actual BO so that we grow the pool after the wrapper BO has
606
* been put in a relocation list. This is only used in the non-softpin
607
* case.
608
*/
609
struct anv_bo wrapper_bo;
610
611
struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
612
struct anv_bo *bo;
613
uint32_t nbos;
614
615
uint64_t size;
616
617
/* The address where the start of the pool is pinned. The various bos that
618
* are created as the pool grows will have addresses in the range
619
* [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
620
*/
621
uint64_t start_address;
622
623
/* The offset from the start of the bo to the "center" of the block
624
* pool. Pointers to allocated blocks are given by
625
* bo.map + center_bo_offset + offsets.
626
*/
627
uint32_t center_bo_offset;
628
629
/* Current memory map of the block pool. This pointer may or may not
630
* point to the actual beginning of the block pool memory. If
631
* anv_block_pool_alloc_back has ever been called, then this pointer
632
* will point to the "center" position of the buffer and all offsets
633
* (negative or positive) given out by the block pool alloc functions
634
* will be valid relative to this pointer.
635
*
636
* In particular, map == bo.map + center_offset
637
*
638
* DO NOT access this pointer directly. Use anv_block_pool_map() instead,
639
* since it will handle the softpin case as well, where this points to NULL.
640
*/
641
void *map;
642
int fd;
643
644
/**
645
* Array of mmaps and gem handles owned by the block pool, reclaimed when
646
* the block pool is destroyed.
647
*/
648
struct u_vector mmap_cleanups;
649
650
struct anv_block_state state;
651
652
struct anv_block_state back_state;
653
};
654
655
/* Block pools are backed by a fixed-size 1GB memfd */
656
#define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
657
658
/* The center of the block pool is also the middle of the memfd. This may
659
* change in the future if we decide differently for some reason.
660
*/
661
#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
662
663
static inline uint32_t
664
anv_block_pool_size(struct anv_block_pool *pool)
665
{
666
return pool->state.end + pool->back_state.end;
667
}
668
669
struct anv_state {
670
int32_t offset;
671
uint32_t alloc_size;
672
void *map;
673
uint32_t idx;
674
};
675
676
#define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
677
678
struct anv_fixed_size_state_pool {
679
union anv_free_list free_list;
680
struct anv_block_state block;
681
};
682
683
#define ANV_MIN_STATE_SIZE_LOG2 6
684
#define ANV_MAX_STATE_SIZE_LOG2 21
685
686
#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
687
688
struct anv_free_entry {
689
uint32_t next;
690
struct anv_state state;
691
};
692
693
struct anv_state_table {
694
struct anv_device *device;
695
int fd;
696
struct anv_free_entry *map;
697
uint32_t size;
698
struct anv_block_state state;
699
struct u_vector cleanups;
700
};
701
702
struct anv_state_pool {
703
struct anv_block_pool block_pool;
704
705
/* Offset into the relevant state base address where the state pool starts
706
* allocating memory.
707
*/
708
int32_t start_offset;
709
710
struct anv_state_table table;
711
712
/* The size of blocks which will be allocated from the block pool */
713
uint32_t block_size;
714
715
/** Free list for "back" allocations */
716
union anv_free_list back_alloc_free_list;
717
718
struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
719
};
720
721
struct anv_state_reserved_pool {
722
struct anv_state_pool *pool;
723
union anv_free_list reserved_blocks;
724
uint32_t count;
725
};
726
727
struct anv_state_stream {
728
struct anv_state_pool *state_pool;
729
730
/* The size of blocks to allocate from the state pool */
731
uint32_t block_size;
732
733
/* Current block we're allocating from */
734
struct anv_state block;
735
736
/* Offset into the current block at which to allocate the next state */
737
uint32_t next;
738
739
/* List of all blocks allocated from this pool */
740
struct util_dynarray all_blocks;
741
};
742
743
/* The block_pool functions exported for testing only. The block pool should
744
* only be used via a state pool (see below).
745
*/
746
VkResult anv_block_pool_init(struct anv_block_pool *pool,
747
struct anv_device *device,
748
const char *name,
749
uint64_t start_address,
750
uint32_t initial_size);
751
void anv_block_pool_finish(struct anv_block_pool *pool);
752
int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
753
uint32_t block_size, uint32_t *padding);
754
int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
755
uint32_t block_size);
756
void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
757
size);
758
759
VkResult anv_state_pool_init(struct anv_state_pool *pool,
760
struct anv_device *device,
761
const char *name,
762
uint64_t base_address,
763
int32_t start_offset,
764
uint32_t block_size);
765
void anv_state_pool_finish(struct anv_state_pool *pool);
766
struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
767
uint32_t state_size, uint32_t alignment);
768
struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
769
void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
770
void anv_state_stream_init(struct anv_state_stream *stream,
771
struct anv_state_pool *state_pool,
772
uint32_t block_size);
773
void anv_state_stream_finish(struct anv_state_stream *stream);
774
struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
775
uint32_t size, uint32_t alignment);
776
777
void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
778
struct anv_state_pool *parent,
779
uint32_t count, uint32_t size,
780
uint32_t alignment);
781
void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
782
struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
783
void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
784
struct anv_state state);
785
786
VkResult anv_state_table_init(struct anv_state_table *table,
787
struct anv_device *device,
788
uint32_t initial_entries);
789
void anv_state_table_finish(struct anv_state_table *table);
790
VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
791
uint32_t count);
792
void anv_free_list_push(union anv_free_list *list,
793
struct anv_state_table *table,
794
uint32_t idx, uint32_t count);
795
struct anv_state* anv_free_list_pop(union anv_free_list *list,
796
struct anv_state_table *table);
797
798
799
static inline struct anv_state *
800
anv_state_table_get(struct anv_state_table *table, uint32_t idx)
801
{
802
return &table->map[idx].state;
803
}
804
/**
805
* Implements a pool of re-usable BOs. The interface is identical to that
806
* of block_pool except that each block is its own BO.
807
*/
808
struct anv_bo_pool {
809
const char *name;
810
811
struct anv_device *device;
812
813
struct util_sparse_array_free_list free_list[16];
814
};
815
816
void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
817
const char *name);
818
void anv_bo_pool_finish(struct anv_bo_pool *pool);
819
VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
820
struct anv_bo **bo_out);
821
void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
822
823
struct anv_scratch_pool {
824
/* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
825
struct anv_bo *bos[16][MESA_SHADER_STAGES];
826
uint32_t surfs[16];
827
struct anv_state surf_states[16];
828
};
829
830
void anv_scratch_pool_init(struct anv_device *device,
831
struct anv_scratch_pool *pool);
832
void anv_scratch_pool_finish(struct anv_device *device,
833
struct anv_scratch_pool *pool);
834
struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
835
struct anv_scratch_pool *pool,
836
gl_shader_stage stage,
837
unsigned per_thread_scratch);
838
uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
839
struct anv_scratch_pool *pool,
840
unsigned per_thread_scratch);
841
842
/** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
843
struct anv_bo_cache {
844
struct util_sparse_array bo_map;
845
pthread_mutex_t mutex;
846
};
847
848
VkResult anv_bo_cache_init(struct anv_bo_cache *cache);
849
void anv_bo_cache_finish(struct anv_bo_cache *cache);
850
851
struct anv_queue_family {
852
/* Standard bits passed on to the client */
853
VkQueueFlags queueFlags;
854
uint32_t queueCount;
855
856
/* Driver internal information */
857
enum drm_i915_gem_engine_class engine_class;
858
};
859
860
#define ANV_MAX_QUEUE_FAMILIES 3
861
862
struct anv_memory_type {
863
/* Standard bits passed on to the client */
864
VkMemoryPropertyFlags propertyFlags;
865
uint32_t heapIndex;
866
};
867
868
struct anv_memory_heap {
869
/* Standard bits passed on to the client */
870
VkDeviceSize size;
871
VkMemoryHeapFlags flags;
872
873
/** Driver-internal book-keeping.
874
*
875
* Align it to 64 bits to make atomic operations faster on 32 bit platforms.
876
*/
877
VkDeviceSize used __attribute__ ((aligned (8)));
878
879
bool is_local_mem;
880
};
881
882
struct anv_memregion {
883
struct drm_i915_gem_memory_class_instance region;
884
uint64_t size;
885
};
886
887
struct anv_physical_device {
888
struct vk_physical_device vk;
889
890
/* Link in anv_instance::physical_devices */
891
struct list_head link;
892
893
struct anv_instance * instance;
894
bool no_hw;
895
char path[20];
896
const char * name;
897
struct {
898
uint16_t domain;
899
uint8_t bus;
900
uint8_t device;
901
uint8_t function;
902
} pci_info;
903
struct intel_device_info info;
904
/** Amount of "GPU memory" we want to advertise
905
*
906
* Clearly, this value is bogus since Intel is a UMA architecture. On
907
* gfx7 platforms, we are limited by GTT size unless we want to implement
908
* fine-grained tracking and GTT splitting. On Broadwell and above we are
909
* practically unlimited. However, we will never report more than 3/4 of
910
* the total system ram to try and avoid running out of RAM.
911
*/
912
bool supports_48bit_addresses;
913
struct brw_compiler * compiler;
914
struct isl_device isl_dev;
915
struct intel_perf_config * perf;
916
/*
917
* Number of commands required to implement a performance query begin +
918
* end.
919
*/
920
uint32_t n_perf_query_commands;
921
int cmd_parser_version;
922
bool has_exec_async;
923
bool has_exec_capture;
924
bool has_exec_fence;
925
bool has_syncobj;
926
bool has_syncobj_wait;
927
bool has_syncobj_wait_available;
928
bool has_context_priority;
929
bool has_context_isolation;
930
bool has_thread_submit;
931
bool has_mem_available;
932
bool has_mmap_offset;
933
uint64_t gtt_size;
934
935
bool use_softpin;
936
bool always_use_bindless;
937
bool use_call_secondary;
938
939
/** True if we can access buffers using A64 messages */
940
bool has_a64_buffer_access;
941
/** True if we can use bindless access for images */
942
bool has_bindless_images;
943
/** True if we can use bindless access for samplers */
944
bool has_bindless_samplers;
945
/** True if we can use timeline semaphores through execbuf */
946
bool has_exec_timeline;
947
948
/** True if we can read the GPU timestamp register
949
*
950
* When running in a virtual context, the timestamp register is unreadable
951
* on Gfx12+.
952
*/
953
bool has_reg_timestamp;
954
955
/** True if this device has implicit AUX
956
*
957
* If true, CCS is handled as an implicit attachment to the BO rather than
958
* as an explicitly bound surface.
959
*/
960
bool has_implicit_ccs;
961
962
bool always_flush_cache;
963
964
uint32_t eu_total;
965
uint32_t subslice_total;
966
967
struct {
968
uint32_t family_count;
969
struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];
970
} queue;
971
972
struct {
973
uint32_t type_count;
974
struct anv_memory_type types[VK_MAX_MEMORY_TYPES];
975
uint32_t heap_count;
976
struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS];
977
bool need_clflush;
978
} memory;
979
980
struct anv_memregion vram;
981
struct anv_memregion sys;
982
uint8_t driver_build_sha1[20];
983
uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
984
uint8_t driver_uuid[VK_UUID_SIZE];
985
uint8_t device_uuid[VK_UUID_SIZE];
986
987
struct disk_cache * disk_cache;
988
989
struct wsi_device wsi_device;
990
int local_fd;
991
bool has_local;
992
int64_t local_major;
993
int64_t local_minor;
994
int master_fd;
995
bool has_master;
996
int64_t master_major;
997
int64_t master_minor;
998
struct drm_i915_query_engine_info * engine_info;
999
1000
void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_bo *, uint32_t );
1001
struct intel_measure_device measure_device;
1002
};
1003
1004
struct anv_app_info {
1005
const char* app_name;
1006
uint32_t app_version;
1007
const char* engine_name;
1008
uint32_t engine_version;
1009
uint32_t api_version;
1010
};
1011
1012
struct anv_instance {
1013
struct vk_instance vk;
1014
1015
bool physical_devices_enumerated;
1016
struct list_head physical_devices;
1017
1018
bool pipeline_cache_enabled;
1019
1020
struct driOptionCache dri_options;
1021
struct driOptionCache available_dri_options;
1022
};
1023
1024
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1025
void anv_finish_wsi(struct anv_physical_device *physical_device);
1026
1027
struct anv_queue_submit {
1028
struct anv_cmd_buffer ** cmd_buffers;
1029
uint32_t cmd_buffer_count;
1030
uint32_t cmd_buffer_array_length;
1031
1032
uint32_t fence_count;
1033
uint32_t fence_array_length;
1034
struct drm_i915_gem_exec_fence * fences;
1035
uint64_t * fence_values;
1036
1037
uint32_t temporary_semaphore_count;
1038
uint32_t temporary_semaphore_array_length;
1039
struct anv_semaphore_impl * temporary_semaphores;
1040
1041
/* Semaphores to be signaled with a SYNC_FD. */
1042
struct anv_semaphore ** sync_fd_semaphores;
1043
uint32_t sync_fd_semaphore_count;
1044
uint32_t sync_fd_semaphore_array_length;
1045
1046
/* Allocated only with non shareable timelines. */
1047
union {
1048
struct anv_timeline ** wait_timelines;
1049
uint32_t * wait_timeline_syncobjs;
1050
};
1051
uint32_t wait_timeline_count;
1052
uint32_t wait_timeline_array_length;
1053
uint64_t * wait_timeline_values;
1054
1055
struct anv_timeline ** signal_timelines;
1056
uint32_t signal_timeline_count;
1057
uint32_t signal_timeline_array_length;
1058
uint64_t * signal_timeline_values;
1059
1060
int in_fence;
1061
bool need_out_fence;
1062
int out_fence;
1063
1064
uint32_t fence_bo_count;
1065
uint32_t fence_bo_array_length;
1066
/* An array of struct anv_bo pointers with lower bit used as a flag to
1067
* signal we will wait on that BO (see anv_(un)pack_ptr).
1068
*/
1069
uintptr_t * fence_bos;
1070
1071
int perf_query_pass;
1072
struct anv_query_pool * perf_query_pool;
1073
1074
const VkAllocationCallbacks * alloc;
1075
VkSystemAllocationScope alloc_scope;
1076
1077
struct anv_bo * simple_bo;
1078
uint32_t simple_bo_size;
1079
1080
struct list_head link;
1081
};
1082
1083
struct anv_queue {
1084
struct vk_object_base base;
1085
1086
struct anv_device * device;
1087
1088
VkDeviceQueueCreateFlags flags;
1089
const struct anv_queue_family * family;
1090
1091
uint32_t exec_flags;
1092
1093
/* Set once from the device api calls. */
1094
bool lost_signaled;
1095
1096
/* Only set once atomically by the queue */
1097
int lost;
1098
int error_line;
1099
const char * error_file;
1100
char error_msg[80];
1101
1102
/*
1103
* This mutext protects the variables below.
1104
*/
1105
pthread_mutex_t mutex;
1106
1107
pthread_t thread;
1108
pthread_cond_t cond;
1109
1110
/*
1111
* A list of struct anv_queue_submit to be submitted to i915.
1112
*/
1113
struct list_head queued_submits;
1114
1115
/* Set to true to stop the submission thread */
1116
bool quit;
1117
};
1118
1119
struct anv_pipeline_cache {
1120
struct vk_object_base base;
1121
struct anv_device * device;
1122
pthread_mutex_t mutex;
1123
1124
struct hash_table * nir_cache;
1125
1126
struct hash_table * cache;
1127
1128
bool external_sync;
1129
};
1130
1131
struct nir_xfb_info;
1132
struct anv_pipeline_bind_map;
1133
1134
void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
1135
struct anv_device *device,
1136
bool cache_enabled,
1137
bool external_sync);
1138
void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
1139
1140
struct anv_shader_bin *
1141
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
1142
const void *key, uint32_t key_size);
1143
struct anv_shader_bin *
1144
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
1145
gl_shader_stage stage,
1146
const void *key_data, uint32_t key_size,
1147
const void *kernel_data, uint32_t kernel_size,
1148
const struct brw_stage_prog_data *prog_data,
1149
uint32_t prog_data_size,
1150
const struct brw_compile_stats *stats,
1151
uint32_t num_stats,
1152
const struct nir_xfb_info *xfb_info,
1153
const struct anv_pipeline_bind_map *bind_map);
1154
1155
struct anv_shader_bin *
1156
anv_device_search_for_kernel(struct anv_device *device,
1157
struct anv_pipeline_cache *cache,
1158
const void *key_data, uint32_t key_size,
1159
bool *user_cache_bit);
1160
1161
struct anv_shader_bin *
1162
anv_device_upload_kernel(struct anv_device *device,
1163
struct anv_pipeline_cache *cache,
1164
gl_shader_stage stage,
1165
const void *key_data, uint32_t key_size,
1166
const void *kernel_data, uint32_t kernel_size,
1167
const struct brw_stage_prog_data *prog_data,
1168
uint32_t prog_data_size,
1169
const struct brw_compile_stats *stats,
1170
uint32_t num_stats,
1171
const struct nir_xfb_info *xfb_info,
1172
const struct anv_pipeline_bind_map *bind_map);
1173
1174
struct nir_shader;
1175
struct nir_shader_compiler_options;
1176
1177
struct nir_shader *
1178
anv_device_search_for_nir(struct anv_device *device,
1179
struct anv_pipeline_cache *cache,
1180
const struct nir_shader_compiler_options *nir_options,
1181
unsigned char sha1_key[20],
1182
void *mem_ctx);
1183
1184
void
1185
anv_device_upload_nir(struct anv_device *device,
1186
struct anv_pipeline_cache *cache,
1187
const struct nir_shader *nir,
1188
unsigned char sha1_key[20]);
1189
1190
struct anv_address {
1191
struct anv_bo *bo;
1192
int64_t offset;
1193
};
1194
1195
struct anv_device {
1196
struct vk_device vk;
1197
1198
struct anv_physical_device * physical;
1199
bool no_hw;
1200
struct intel_device_info info;
1201
struct isl_device isl_dev;
1202
int context_id;
1203
int fd;
1204
bool can_chain_batches;
1205
bool robust_buffer_access;
1206
bool has_thread_submit;
1207
1208
pthread_mutex_t vma_mutex;
1209
struct util_vma_heap vma_lo;
1210
struct util_vma_heap vma_cva;
1211
struct util_vma_heap vma_hi;
1212
1213
/** List of all anv_device_memory objects */
1214
struct list_head memory_objects;
1215
1216
struct anv_bo_pool batch_bo_pool;
1217
1218
struct anv_bo_cache bo_cache;
1219
1220
struct anv_state_pool general_state_pool;
1221
struct anv_state_pool dynamic_state_pool;
1222
struct anv_state_pool instruction_state_pool;
1223
struct anv_state_pool binding_table_pool;
1224
struct anv_state_pool surface_state_pool;
1225
1226
struct anv_state_reserved_pool custom_border_colors;
1227
1228
/** BO used for various workarounds
1229
*
1230
* There are a number of workarounds on our hardware which require writing
1231
* data somewhere and it doesn't really matter where. For that, we use
1232
* this BO and just write to the first dword or so.
1233
*
1234
* We also need to be able to handle NULL buffers bound as pushed UBOs.
1235
* For that, we use the high bytes (>= 1024) of the workaround BO.
1236
*/
1237
struct anv_bo * workaround_bo;
1238
struct anv_address workaround_address;
1239
1240
struct anv_bo * trivial_batch_bo;
1241
struct anv_state null_surface_state;
1242
1243
struct anv_pipeline_cache default_pipeline_cache;
1244
struct blorp_context blorp;
1245
1246
struct anv_state border_colors;
1247
1248
struct anv_state slice_hash;
1249
1250
uint32_t queue_count;
1251
struct anv_queue * queues;
1252
1253
struct anv_scratch_pool scratch_pool;
1254
struct anv_bo *rt_scratch_bos[16];
1255
1256
struct anv_shader_bin *rt_trampoline;
1257
struct anv_shader_bin *rt_trivial_return;
1258
1259
pthread_mutex_t mutex;
1260
pthread_cond_t queue_submit;
1261
int _lost;
1262
int lost_reported;
1263
1264
struct intel_batch_decode_ctx decoder_ctx;
1265
/*
1266
* When decoding a anv_cmd_buffer, we might need to search for BOs through
1267
* the cmd_buffer's list.
1268
*/
1269
struct anv_cmd_buffer *cmd_buffer_being_decoded;
1270
1271
int perf_fd; /* -1 if no opened */
1272
uint64_t perf_metric; /* 0 if unset */
1273
1274
struct intel_aux_map_context *aux_map_ctx;
1275
1276
const struct intel_l3_config *l3_config;
1277
1278
struct intel_debug_block_frame *debug_frame_desc;
1279
};
1280
1281
#if defined(GFX_VERx10) && GFX_VERx10 >= 90
1282
#define ANV_ALWAYS_SOFTPIN true
1283
#else
1284
#define ANV_ALWAYS_SOFTPIN false
1285
#endif
1286
1287
static inline bool
1288
anv_use_softpin(const struct anv_physical_device *pdevice)
1289
{
1290
#if defined(GFX_VERx10) && GFX_VERx10 >= 90
1291
/* Sky Lake and later always uses softpin */
1292
assert(pdevice->use_softpin);
1293
return true;
1294
#elif defined(GFX_VERx10) && GFX_VERx10 < 80
1295
/* Haswell and earlier never use softpin */
1296
assert(!pdevice->use_softpin);
1297
return false;
1298
#else
1299
/* If we don't have a GFX_VERx10 #define, we need to look at the physical
1300
* device. Also, for GFX version 8, we need to look at the physical
1301
* device because Broadwell softpins but Cherryview doesn't.
1302
*/
1303
return pdevice->use_softpin;
1304
#endif
1305
}
1306
1307
static inline struct anv_instance *
1308
anv_device_instance_or_null(const struct anv_device *device)
1309
{
1310
return device ? device->physical->instance : NULL;
1311
}
1312
1313
static inline struct anv_state_pool *
1314
anv_binding_table_pool(struct anv_device *device)
1315
{
1316
if (anv_use_softpin(device->physical))
1317
return &device->binding_table_pool;
1318
else
1319
return &device->surface_state_pool;
1320
}
1321
1322
static inline struct anv_state
1323
anv_binding_table_pool_alloc(struct anv_device *device)
1324
{
1325
if (anv_use_softpin(device->physical))
1326
return anv_state_pool_alloc(&device->binding_table_pool,
1327
device->binding_table_pool.block_size, 0);
1328
else
1329
return anv_state_pool_alloc_back(&device->surface_state_pool);
1330
}
1331
1332
static inline void
1333
anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1334
anv_state_pool_free(anv_binding_table_pool(device), state);
1335
}
1336
1337
static inline uint32_t
1338
anv_mocs(const struct anv_device *device,
1339
const struct anv_bo *bo,
1340
isl_surf_usage_flags_t usage)
1341
{
1342
return isl_mocs(&device->isl_dev, usage, bo && bo->is_external);
1343
}
1344
1345
void anv_device_init_blorp(struct anv_device *device);
1346
void anv_device_finish_blorp(struct anv_device *device);
1347
1348
void _anv_device_report_lost(struct anv_device *device);
1349
VkResult _anv_device_set_lost(struct anv_device *device,
1350
const char *file, int line,
1351
const char *msg, ...)
1352
anv_printflike(4, 5);
1353
VkResult _anv_queue_set_lost(struct anv_queue *queue,
1354
const char *file, int line,
1355
const char *msg, ...)
1356
anv_printflike(4, 5);
1357
#define anv_device_set_lost(dev, ...) \
1358
_anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
1359
#define anv_queue_set_lost(queue, ...) \
1360
(queue)->device->has_thread_submit ? \
1361
_anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) : \
1362
_anv_device_set_lost(queue->device, __FILE__, __LINE__, __VA_ARGS__)
1363
1364
static inline bool
1365
anv_device_is_lost(struct anv_device *device)
1366
{
1367
int lost = p_atomic_read(&device->_lost);
1368
if (unlikely(lost && !device->lost_reported))
1369
_anv_device_report_lost(device);
1370
return lost;
1371
}
1372
1373
VkResult anv_device_query_status(struct anv_device *device);
1374
1375
1376
enum anv_bo_alloc_flags {
1377
/** Specifies that the BO must have a 32-bit address
1378
*
1379
* This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
1380
*/
1381
ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
1382
1383
/** Specifies that the BO may be shared externally */
1384
ANV_BO_ALLOC_EXTERNAL = (1 << 1),
1385
1386
/** Specifies that the BO should be mapped */
1387
ANV_BO_ALLOC_MAPPED = (1 << 2),
1388
1389
/** Specifies that the BO should be snooped so we get coherency */
1390
ANV_BO_ALLOC_SNOOPED = (1 << 3),
1391
1392
/** Specifies that the BO should be captured in error states */
1393
ANV_BO_ALLOC_CAPTURE = (1 << 4),
1394
1395
/** Specifies that the BO will have an address assigned by the caller
1396
*
1397
* Such BOs do not exist in any VMA heap.
1398
*/
1399
ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
1400
1401
/** Enables implicit synchronization on the BO
1402
*
1403
* This is the opposite of EXEC_OBJECT_ASYNC.
1404
*/
1405
ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
1406
1407
/** Enables implicit synchronization on the BO
1408
*
1409
* This is equivalent to EXEC_OBJECT_WRITE.
1410
*/
1411
ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
1412
1413
/** Has an address which is visible to the client */
1414
ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
1415
1416
/** This buffer has implicit CCS data attached to it */
1417
ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
1418
1419
/** This buffer is allocated from local memory */
1420
ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
1421
};
1422
1423
VkResult anv_device_alloc_bo(struct anv_device *device,
1424
const char *name, uint64_t size,
1425
enum anv_bo_alloc_flags alloc_flags,
1426
uint64_t explicit_address,
1427
struct anv_bo **bo);
1428
VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1429
void *host_ptr, uint32_t size,
1430
enum anv_bo_alloc_flags alloc_flags,
1431
uint64_t client_address,
1432
struct anv_bo **bo_out);
1433
VkResult anv_device_import_bo(struct anv_device *device, int fd,
1434
enum anv_bo_alloc_flags alloc_flags,
1435
uint64_t client_address,
1436
struct anv_bo **bo);
1437
VkResult anv_device_export_bo(struct anv_device *device,
1438
struct anv_bo *bo, int *fd_out);
1439
void anv_device_release_bo(struct anv_device *device,
1440
struct anv_bo *bo);
1441
1442
static inline struct anv_bo *
1443
anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
1444
{
1445
return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
1446
}
1447
1448
VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo);
1449
VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1450
int64_t timeout);
1451
1452
VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
1453
uint32_t exec_flags,
1454
const VkDeviceQueueCreateInfo *pCreateInfo);
1455
void anv_queue_finish(struct anv_queue *queue);
1456
1457
VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);
1458
VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
1459
struct anv_batch *batch);
1460
1461
uint64_t anv_gettime_ns(void);
1462
uint64_t anv_get_absolute_timeout(uint64_t timeout);
1463
1464
void* anv_gem_mmap(struct anv_device *device,
1465
uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1466
void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
1467
uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1468
void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1469
uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
1470
uint32_t num_regions,
1471
struct drm_i915_gem_memory_class_instance *regions);
1472
uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1473
int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
1474
int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1475
int anv_gem_execbuffer(struct anv_device *device,
1476
struct drm_i915_gem_execbuffer2 *execbuf);
1477
int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1478
uint32_t stride, uint32_t tiling);
1479
int anv_gem_create_context(struct anv_device *device);
1480
int anv_gem_create_context_engines(struct anv_device *device,
1481
const struct drm_i915_query_engine_info *info,
1482
int num_engines,
1483
uint16_t *engine_classes);
1484
bool anv_gem_has_context_priority(int fd);
1485
int anv_gem_destroy_context(struct anv_device *device, int context);
1486
int anv_gem_set_context_param(int fd, int context, uint32_t param,
1487
uint64_t value);
1488
int anv_gem_get_context_param(int fd, int context, uint32_t param,
1489
uint64_t *value);
1490
int anv_gem_get_param(int fd, uint32_t param);
1491
uint64_t anv_gem_get_drm_cap(int fd, uint32_t capability);
1492
int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1493
bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
1494
int anv_gem_context_get_reset_stats(int fd, int context,
1495
uint32_t *active, uint32_t *pending);
1496
int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1497
int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);
1498
uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1499
int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1500
int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
1501
uint32_t read_domains, uint32_t write_domain);
1502
int anv_gem_sync_file_merge(struct anv_device *device, int fd1, int fd2);
1503
uint32_t anv_gem_syncobj_create(struct anv_device *device, uint32_t flags);
1504
void anv_gem_syncobj_destroy(struct anv_device *device, uint32_t handle);
1505
int anv_gem_syncobj_handle_to_fd(struct anv_device *device, uint32_t handle);
1506
uint32_t anv_gem_syncobj_fd_to_handle(struct anv_device *device, int fd);
1507
int anv_gem_syncobj_export_sync_file(struct anv_device *device,
1508
uint32_t handle);
1509
int anv_gem_syncobj_import_sync_file(struct anv_device *device,
1510
uint32_t handle, int fd);
1511
void anv_gem_syncobj_reset(struct anv_device *device, uint32_t handle);
1512
bool anv_gem_supports_syncobj_wait(int fd);
1513
int anv_gem_syncobj_wait(struct anv_device *device,
1514
const uint32_t *handles, uint32_t num_handles,
1515
int64_t abs_timeout_ns, bool wait_all);
1516
int anv_gem_syncobj_timeline_wait(struct anv_device *device,
1517
const uint32_t *handles, const uint64_t *points,
1518
uint32_t num_items, int64_t abs_timeout_ns,
1519
bool wait_all, bool wait_materialize);
1520
int anv_gem_syncobj_timeline_signal(struct anv_device *device,
1521
const uint32_t *handles, const uint64_t *points,
1522
uint32_t num_items);
1523
int anv_gem_syncobj_timeline_query(struct anv_device *device,
1524
const uint32_t *handles, uint64_t *points,
1525
uint32_t num_items);
1526
int anv_i915_query(int fd, uint64_t query_id, void *buffer,
1527
int32_t *buffer_len);
1528
struct drm_i915_query_engine_info *anv_gem_get_engine_info(int fd);
1529
int anv_gem_count_engines(const struct drm_i915_query_engine_info *info,
1530
uint16_t engine_class);
1531
1532
uint64_t anv_vma_alloc(struct anv_device *device,
1533
uint64_t size, uint64_t align,
1534
enum anv_bo_alloc_flags alloc_flags,
1535
uint64_t client_address);
1536
void anv_vma_free(struct anv_device *device,
1537
uint64_t address, uint64_t size);
1538
1539
struct anv_reloc_list {
1540
uint32_t num_relocs;
1541
uint32_t array_length;
1542
struct drm_i915_gem_relocation_entry * relocs;
1543
struct anv_bo ** reloc_bos;
1544
uint32_t dep_words;
1545
BITSET_WORD * deps;
1546
};
1547
1548
VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1549
const VkAllocationCallbacks *alloc);
1550
void anv_reloc_list_finish(struct anv_reloc_list *list,
1551
const VkAllocationCallbacks *alloc);
1552
1553
VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1554
const VkAllocationCallbacks *alloc,
1555
uint32_t offset, struct anv_bo *target_bo,
1556
uint32_t delta, uint64_t *address_u64_out);
1557
1558
VkResult anv_reloc_list_add_bo(struct anv_reloc_list *list,
1559
const VkAllocationCallbacks *alloc,
1560
struct anv_bo *target_bo);
1561
1562
struct anv_batch_bo {
1563
/* Link in the anv_cmd_buffer.owned_batch_bos list */
1564
struct list_head link;
1565
1566
struct anv_bo * bo;
1567
1568
/* Bytes actually consumed in this batch BO */
1569
uint32_t length;
1570
1571
/* When this batch BO is used as part of a primary batch buffer, this
1572
* tracked whether it is chained to another primary batch buffer.
1573
*
1574
* If this is the case, the relocation list's last entry points the
1575
* location of the MI_BATCH_BUFFER_START chaining to the next batch.
1576
*/
1577
bool chained;
1578
1579
struct anv_reloc_list relocs;
1580
};
1581
1582
struct anv_batch {
1583
const VkAllocationCallbacks * alloc;
1584
1585
struct anv_address start_addr;
1586
1587
void * start;
1588
void * end;
1589
void * next;
1590
1591
struct anv_reloc_list * relocs;
1592
1593
/* This callback is called (with the associated user data) in the event
1594
* that the batch runs out of space.
1595
*/
1596
VkResult (*extend_cb)(struct anv_batch *, void *);
1597
void * user_data;
1598
1599
/**
1600
* Current error status of the command buffer. Used to track inconsistent
1601
* or incomplete command buffer states that are the consequence of run-time
1602
* errors such as out of memory scenarios. We want to track this in the
1603
* batch because the command buffer object is not visible to some parts
1604
* of the driver.
1605
*/
1606
VkResult status;
1607
};
1608
1609
void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1610
void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1611
struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
1612
1613
static inline void
1614
anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
1615
void *map, size_t size)
1616
{
1617
batch->start_addr = addr;
1618
batch->next = batch->start = map;
1619
batch->end = map + size;
1620
}
1621
1622
static inline VkResult
1623
anv_batch_set_error(struct anv_batch *batch, VkResult error)
1624
{
1625
assert(error != VK_SUCCESS);
1626
if (batch->status == VK_SUCCESS)
1627
batch->status = error;
1628
return batch->status;
1629
}
1630
1631
static inline bool
1632
anv_batch_has_error(struct anv_batch *batch)
1633
{
1634
return batch->status != VK_SUCCESS;
1635
}
1636
1637
static inline uint64_t
1638
anv_batch_emit_reloc(struct anv_batch *batch,
1639
void *location, struct anv_bo *bo, uint32_t delta)
1640
{
1641
uint64_t address_u64 = 0;
1642
VkResult result;
1643
1644
if (ANV_ALWAYS_SOFTPIN) {
1645
address_u64 = bo->offset + delta;
1646
result = anv_reloc_list_add_bo(batch->relocs, batch->alloc, bo);
1647
} else {
1648
result = anv_reloc_list_add(batch->relocs, batch->alloc,
1649
location - batch->start, bo, delta,
1650
&address_u64);
1651
}
1652
if (unlikely(result != VK_SUCCESS)) {
1653
anv_batch_set_error(batch, result);
1654
return 0;
1655
}
1656
1657
return address_u64;
1658
}
1659
1660
1661
#define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
1662
1663
static inline struct anv_address
1664
anv_address_from_u64(uint64_t addr_u64)
1665
{
1666
assert(addr_u64 == intel_canonical_address(addr_u64));
1667
return (struct anv_address) {
1668
.bo = NULL,
1669
.offset = addr_u64,
1670
};
1671
}
1672
1673
static inline bool
1674
anv_address_is_null(struct anv_address addr)
1675
{
1676
return addr.bo == NULL && addr.offset == 0;
1677
}
1678
1679
static inline uint64_t
1680
anv_address_physical(struct anv_address addr)
1681
{
1682
if (addr.bo && (ANV_ALWAYS_SOFTPIN ||
1683
(addr.bo->flags & EXEC_OBJECT_PINNED))) {
1684
assert(addr.bo->flags & EXEC_OBJECT_PINNED);
1685
return intel_canonical_address(addr.bo->offset + addr.offset);
1686
} else {
1687
return intel_canonical_address(addr.offset);
1688
}
1689
}
1690
1691
static inline struct anv_address
1692
anv_address_add(struct anv_address addr, uint64_t offset)
1693
{
1694
addr.offset += offset;
1695
return addr;
1696
}
1697
1698
static inline void
1699
write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1700
{
1701
unsigned reloc_size = 0;
1702
if (device->info.ver >= 8) {
1703
reloc_size = sizeof(uint64_t);
1704
*(uint64_t *)p = intel_canonical_address(v);
1705
} else {
1706
reloc_size = sizeof(uint32_t);
1707
*(uint32_t *)p = v;
1708
}
1709
1710
if (flush && !device->info.has_llc)
1711
intel_flush_range(p, reloc_size);
1712
}
1713
1714
static inline uint64_t
1715
_anv_combine_address(struct anv_batch *batch, void *location,
1716
const struct anv_address address, uint32_t delta)
1717
{
1718
if (address.bo == NULL) {
1719
return address.offset + delta;
1720
} else if (batch == NULL) {
1721
assert(address.bo->flags & EXEC_OBJECT_PINNED);
1722
return anv_address_physical(anv_address_add(address, delta));
1723
} else {
1724
assert(batch->start <= location && location < batch->end);
1725
/* i915 relocations are signed. */
1726
assert(INT32_MIN <= address.offset && address.offset <= INT32_MAX);
1727
return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1728
}
1729
}
1730
1731
#define __gen_address_type struct anv_address
1732
#define __gen_user_data struct anv_batch
1733
#define __gen_combine_address _anv_combine_address
1734
1735
/* Wrapper macros needed to work around preprocessor argument issues. In
1736
* particular, arguments don't get pre-evaluated if they are concatenated.
1737
* This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1738
* GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1739
* We can work around this easily enough with these helpers.
1740
*/
1741
#define __anv_cmd_length(cmd) cmd ## _length
1742
#define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1743
#define __anv_cmd_header(cmd) cmd ## _header
1744
#define __anv_cmd_pack(cmd) cmd ## _pack
1745
#define __anv_reg_num(reg) reg ## _num
1746
1747
#define anv_pack_struct(dst, struc, ...) do { \
1748
struct struc __template = { \
1749
__VA_ARGS__ \
1750
}; \
1751
__anv_cmd_pack(struc)(NULL, dst, &__template); \
1752
VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1753
} while (0)
1754
1755
#define anv_batch_emitn(batch, n, cmd, ...) ({ \
1756
void *__dst = anv_batch_emit_dwords(batch, n); \
1757
if (__dst) { \
1758
struct cmd __template = { \
1759
__anv_cmd_header(cmd), \
1760
.DWordLength = n - __anv_cmd_length_bias(cmd), \
1761
__VA_ARGS__ \
1762
}; \
1763
__anv_cmd_pack(cmd)(batch, __dst, &__template); \
1764
} \
1765
__dst; \
1766
})
1767
1768
#define anv_batch_emit_merge(batch, dwords0, dwords1) \
1769
do { \
1770
uint32_t *dw; \
1771
\
1772
STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \
1773
dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \
1774
if (!dw) \
1775
break; \
1776
for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \
1777
dw[i] = (dwords0)[i] | (dwords1)[i]; \
1778
VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1779
} while (0)
1780
1781
#define anv_batch_emit(batch, cmd, name) \
1782
for (struct cmd name = { __anv_cmd_header(cmd) }, \
1783
*_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
1784
__builtin_expect(_dst != NULL, 1); \
1785
({ __anv_cmd_pack(cmd)(batch, _dst, &name); \
1786
VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1787
_dst = NULL; \
1788
}))
1789
1790
#define anv_batch_write_reg(batch, reg, name) \
1791
for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \
1792
({ \
1793
uint32_t _dw[__anv_cmd_length(reg)]; \
1794
__anv_cmd_pack(reg)(NULL, _dw, &name); \
1795
for (unsigned i = 0; i < __anv_cmd_length(reg); i++) { \
1796
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
1797
lri.RegisterOffset = __anv_reg_num(reg); \
1798
lri.DataDWord = _dw[i]; \
1799
} \
1800
} \
1801
_cont = NULL; \
1802
}))
1803
1804
/* #define __gen_get_batch_dwords anv_batch_emit_dwords */
1805
/* #define __gen_get_batch_address anv_batch_address */
1806
/* #define __gen_address_value anv_address_physical */
1807
/* #define __gen_address_offset anv_address_add */
1808
1809
struct anv_device_memory {
1810
struct vk_object_base base;
1811
1812
struct list_head link;
1813
1814
struct anv_bo * bo;
1815
const struct anv_memory_type * type;
1816
VkDeviceSize map_size;
1817
void * map;
1818
1819
/* If set, we are holding reference to AHardwareBuffer
1820
* which we must release when memory is freed.
1821
*/
1822
struct AHardwareBuffer * ahw;
1823
1824
/* If set, this memory comes from a host pointer. */
1825
void * host_ptr;
1826
};
1827
1828
/**
1829
* Header for Vertex URB Entry (VUE)
1830
*/
1831
struct anv_vue_header {
1832
uint32_t Reserved;
1833
uint32_t RTAIndex; /* RenderTargetArrayIndex */
1834
uint32_t ViewportIndex;
1835
float PointWidth;
1836
};
1837
1838
/** Struct representing a sampled image descriptor
1839
*
1840
* This descriptor layout is used for sampled images, bare sampler, and
1841
* combined image/sampler descriptors.
1842
*/
1843
struct anv_sampled_image_descriptor {
1844
/** Bindless image handle
1845
*
1846
* This is expected to already be shifted such that the 20-bit
1847
* SURFACE_STATE table index is in the top 20 bits.
1848
*/
1849
uint32_t image;
1850
1851
/** Bindless sampler handle
1852
*
1853
* This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
1854
* to the dynamic state base address.
1855
*/
1856
uint32_t sampler;
1857
};
1858
1859
struct anv_texture_swizzle_descriptor {
1860
/** Texture swizzle
1861
*
1862
* See also nir_intrinsic_channel_select_intel
1863
*/
1864
uint8_t swizzle[4];
1865
1866
/** Unused padding to ensure the struct is a multiple of 64 bits */
1867
uint32_t _pad;
1868
};
1869
1870
/** Struct representing a storage image descriptor */
1871
struct anv_storage_image_descriptor {
1872
/** Bindless image handles
1873
*
1874
* These are expected to already be shifted such that the 20-bit
1875
* SURFACE_STATE table index is in the top 20 bits.
1876
*/
1877
uint32_t read_write;
1878
uint32_t write_only;
1879
};
1880
1881
/** Struct representing a address/range descriptor
1882
*
1883
* The fields of this struct correspond directly to the data layout of
1884
* nir_address_format_64bit_bounded_global addresses. The last field is the
1885
* offset in the NIR address so it must be zero so that when you load the
1886
* descriptor you get a pointer to the start of the range.
1887
*/
1888
struct anv_address_range_descriptor {
1889
uint64_t address;
1890
uint32_t range;
1891
uint32_t zero;
1892
};
1893
1894
enum anv_descriptor_data {
1895
/** The descriptor contains a BTI reference to a surface state */
1896
ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0),
1897
/** The descriptor contains a BTI reference to a sampler state */
1898
ANV_DESCRIPTOR_SAMPLER_STATE = (1 << 1),
1899
/** The descriptor contains an actual buffer view */
1900
ANV_DESCRIPTOR_BUFFER_VIEW = (1 << 2),
1901
/** The descriptor contains auxiliary image layout data */
1902
ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3),
1903
/** The descriptor contains auxiliary image layout data */
1904
ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
1905
/** anv_address_range_descriptor with a buffer address and range */
1906
ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5),
1907
/** Bindless surface handle */
1908
ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
1909
/** Storage image handles */
1910
ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
1911
/** Storage image handles */
1912
ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),
1913
};
1914
1915
struct anv_descriptor_set_binding_layout {
1916
/* The type of the descriptors in this binding */
1917
VkDescriptorType type;
1918
1919
/* Flags provided when this binding was created */
1920
VkDescriptorBindingFlagsEXT flags;
1921
1922
/* Bitfield representing the type of data this descriptor contains */
1923
enum anv_descriptor_data data;
1924
1925
/* Maximum number of YCbCr texture/sampler planes */
1926
uint8_t max_plane_count;
1927
1928
/* Number of array elements in this binding (or size in bytes for inline
1929
* uniform data)
1930
*/
1931
uint32_t array_size;
1932
1933
/* Index into the flattend descriptor set */
1934
uint32_t descriptor_index;
1935
1936
/* Index into the dynamic state array for a dynamic buffer */
1937
int16_t dynamic_offset_index;
1938
1939
/* Index into the descriptor set buffer views */
1940
int32_t buffer_view_index;
1941
1942
/* Offset into the descriptor buffer where this descriptor lives */
1943
uint32_t descriptor_offset;
1944
1945
/* Immutable samplers (or NULL if no immutable samplers) */
1946
struct anv_sampler **immutable_samplers;
1947
};
1948
1949
unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout);
1950
1951
unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice,
1952
VkDescriptorType type);
1953
1954
bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
1955
const struct anv_descriptor_set_binding_layout *binding,
1956
bool sampler);
1957
1958
bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
1959
const struct anv_descriptor_set_binding_layout *binding,
1960
bool sampler);
1961
1962
struct anv_descriptor_set_layout {
1963
struct vk_object_base base;
1964
1965
/* Descriptor set layouts can be destroyed at almost any time */
1966
uint32_t ref_cnt;
1967
1968
/* Number of bindings in this descriptor set */
1969
uint32_t binding_count;
1970
1971
/* Total number of descriptors */
1972
uint32_t descriptor_count;
1973
1974
/* Shader stages affected by this descriptor set */
1975
uint16_t shader_stages;
1976
1977
/* Number of buffer views in this descriptor set */
1978
uint32_t buffer_view_count;
1979
1980
/* Number of dynamic offsets used by this descriptor set */
1981
uint16_t dynamic_offset_count;
1982
1983
/* For each dynamic buffer, which VkShaderStageFlagBits stages are using
1984
* this buffer
1985
*/
1986
VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
1987
1988
/* Size of the descriptor buffer for this descriptor set */
1989
uint32_t descriptor_buffer_size;
1990
1991
/* Bindings in this descriptor set */
1992
struct anv_descriptor_set_binding_layout binding[0];
1993
};
1994
1995
void anv_descriptor_set_layout_destroy(struct anv_device *device,
1996
struct anv_descriptor_set_layout *layout);
1997
1998
static inline void
1999
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
2000
{
2001
assert(layout && layout->ref_cnt >= 1);
2002
p_atomic_inc(&layout->ref_cnt);
2003
}
2004
2005
static inline void
2006
anv_descriptor_set_layout_unref(struct anv_device *device,
2007
struct anv_descriptor_set_layout *layout)
2008
{
2009
assert(layout && layout->ref_cnt >= 1);
2010
if (p_atomic_dec_zero(&layout->ref_cnt))
2011
anv_descriptor_set_layout_destroy(device, layout);
2012
}
2013
2014
struct anv_descriptor {
2015
VkDescriptorType type;
2016
2017
union {
2018
struct {
2019
VkImageLayout layout;
2020
struct anv_image_view *image_view;
2021
struct anv_sampler *sampler;
2022
};
2023
2024
struct {
2025
struct anv_buffer *buffer;
2026
uint64_t offset;
2027
uint64_t range;
2028
};
2029
2030
struct anv_buffer_view *buffer_view;
2031
};
2032
};
2033
2034
struct anv_descriptor_set {
2035
struct vk_object_base base;
2036
2037
struct anv_descriptor_pool *pool;
2038
struct anv_descriptor_set_layout *layout;
2039
2040
/* Amount of space occupied in the the pool by this descriptor set. It can
2041
* be larger than the size of the descriptor set.
2042
*/
2043
uint32_t size;
2044
2045
/* State relative to anv_descriptor_pool::bo */
2046
struct anv_state desc_mem;
2047
/* Surface state for the descriptor buffer */
2048
struct anv_state desc_surface_state;
2049
2050
/* Descriptor set address. */
2051
struct anv_address desc_addr;
2052
2053
uint32_t buffer_view_count;
2054
struct anv_buffer_view *buffer_views;
2055
2056
/* Link to descriptor pool's desc_sets list . */
2057
struct list_head pool_link;
2058
2059
uint32_t descriptor_count;
2060
struct anv_descriptor descriptors[0];
2061
};
2062
2063
static inline bool
2064
anv_descriptor_set_is_push(struct anv_descriptor_set *set)
2065
{
2066
return set->pool == NULL;
2067
}
2068
2069
struct anv_buffer_view {
2070
struct vk_object_base base;
2071
2072
enum isl_format format; /**< VkBufferViewCreateInfo::format */
2073
uint64_t range; /**< VkBufferViewCreateInfo::range */
2074
2075
struct anv_address address;
2076
2077
struct anv_state surface_state;
2078
struct anv_state storage_surface_state;
2079
struct anv_state writeonly_storage_surface_state;
2080
2081
struct brw_image_param storage_image_param;
2082
};
2083
2084
struct anv_push_descriptor_set {
2085
struct anv_descriptor_set set;
2086
2087
/* Put this field right behind anv_descriptor_set so it fills up the
2088
* descriptors[0] field. */
2089
struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2090
2091
/** True if the descriptor set buffer has been referenced by a draw or
2092
* dispatch command.
2093
*/
2094
bool set_used_on_gpu;
2095
2096
struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2097
};
2098
2099
static inline struct anv_address
2100
anv_descriptor_set_address(struct anv_descriptor_set *set)
2101
{
2102
if (anv_descriptor_set_is_push(set)) {
2103
/* We have to flag push descriptor set as used on the GPU
2104
* so that the next time we push descriptors, we grab a new memory.
2105
*/
2106
struct anv_push_descriptor_set *push_set =
2107
(struct anv_push_descriptor_set *)set;
2108
push_set->set_used_on_gpu = true;
2109
}
2110
2111
return set->desc_addr;
2112
}
2113
2114
struct anv_descriptor_pool {
2115
struct vk_object_base base;
2116
2117
uint32_t size;
2118
uint32_t next;
2119
uint32_t free_list;
2120
2121
struct anv_bo *bo;
2122
struct util_vma_heap bo_heap;
2123
2124
struct anv_state_stream surface_state_stream;
2125
void *surface_state_free_list;
2126
2127
struct list_head desc_sets;
2128
2129
char data[0];
2130
};
2131
2132
enum anv_descriptor_template_entry_type {
2133
ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_IMAGE,
2134
ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER,
2135
ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER_VIEW
2136
};
2137
2138
struct anv_descriptor_template_entry {
2139
/* The type of descriptor in this entry */
2140
VkDescriptorType type;
2141
2142
/* Binding in the descriptor set */
2143
uint32_t binding;
2144
2145
/* Offset at which to write into the descriptor set binding */
2146
uint32_t array_element;
2147
2148
/* Number of elements to write into the descriptor set binding */
2149
uint32_t array_count;
2150
2151
/* Offset into the user provided data */
2152
size_t offset;
2153
2154
/* Stride between elements into the user provided data */
2155
size_t stride;
2156
};
2157
2158
struct anv_descriptor_update_template {
2159
struct vk_object_base base;
2160
2161
VkPipelineBindPoint bind_point;
2162
2163
/* The descriptor set this template corresponds to. This value is only
2164
* valid if the template was created with the templateType
2165
* VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
2166
*/
2167
uint8_t set;
2168
2169
/* Number of entries in this template */
2170
uint32_t entry_count;
2171
2172
/* Entries of the template */
2173
struct anv_descriptor_template_entry entries[0];
2174
};
2175
2176
size_t
2177
anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout,
2178
uint32_t var_desc_count);
2179
2180
uint32_t
2181
anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout,
2182
uint32_t var_desc_count);
2183
2184
void
2185
anv_descriptor_set_write_image_view(struct anv_device *device,
2186
struct anv_descriptor_set *set,
2187
const VkDescriptorImageInfo * const info,
2188
VkDescriptorType type,
2189
uint32_t binding,
2190
uint32_t element);
2191
2192
void
2193
anv_descriptor_set_write_buffer_view(struct anv_device *device,
2194
struct anv_descriptor_set *set,
2195
VkDescriptorType type,
2196
struct anv_buffer_view *buffer_view,
2197
uint32_t binding,
2198
uint32_t element);
2199
2200
void
2201
anv_descriptor_set_write_buffer(struct anv_device *device,
2202
struct anv_descriptor_set *set,
2203
struct anv_state_stream *alloc_stream,
2204
VkDescriptorType type,
2205
struct anv_buffer *buffer,
2206
uint32_t binding,
2207
uint32_t element,
2208
VkDeviceSize offset,
2209
VkDeviceSize range);
2210
2211
void
2212
anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
2213
struct anv_descriptor_set *set,
2214
struct anv_acceleration_structure *accel,
2215
uint32_t binding,
2216
uint32_t element);
2217
2218
void
2219
anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2220
struct anv_descriptor_set *set,
2221
uint32_t binding,
2222
const void *data,
2223
size_t offset,
2224
size_t size);
2225
2226
void
2227
anv_descriptor_set_write_template(struct anv_device *device,
2228
struct anv_descriptor_set *set,
2229
struct anv_state_stream *alloc_stream,
2230
const struct anv_descriptor_update_template *template,
2231
const void *data);
2232
2233
VkResult
2234
anv_descriptor_set_create(struct anv_device *device,
2235
struct anv_descriptor_pool *pool,
2236
struct anv_descriptor_set_layout *layout,
2237
uint32_t var_desc_count,
2238
struct anv_descriptor_set **out_set);
2239
2240
void
2241
anv_descriptor_set_destroy(struct anv_device *device,
2242
struct anv_descriptor_pool *pool,
2243
struct anv_descriptor_set *set);
2244
2245
#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 5)
2246
#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 4)
2247
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3)
2248
#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2)
2249
#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
2250
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2251
2252
struct anv_pipeline_binding {
2253
/** Index in the descriptor set
2254
*
2255
* This is a flattened index; the descriptor set layout is already taken
2256
* into account.
2257
*/
2258
uint32_t index;
2259
2260
/** The descriptor set this surface corresponds to.
2261
*
2262
* The special ANV_DESCRIPTOR_SET_* values above indicates that this
2263
* binding is not a normal descriptor set but something else.
2264
*/
2265
uint8_t set;
2266
2267
union {
2268
/** Plane in the binding index for images */
2269
uint8_t plane;
2270
2271
/** Input attachment index (relative to the subpass) */
2272
uint8_t input_attachment_index;
2273
2274
/** Dynamic offset index (for dynamic UBOs and SSBOs) */
2275
uint8_t dynamic_offset_index;
2276
};
2277
2278
/** For a storage image, whether it is write-only */
2279
uint8_t write_only;
2280
2281
/** Pad to 64 bits so that there are no holes and we can safely memcmp
2282
* assuming POD zero-initialization.
2283
*/
2284
uint8_t pad;
2285
};
2286
2287
struct anv_push_range {
2288
/** Index in the descriptor set */
2289
uint32_t index;
2290
2291
/** Descriptor set index */
2292
uint8_t set;
2293
2294
/** Dynamic offset index (for dynamic UBOs) */
2295
uint8_t dynamic_offset_index;
2296
2297
/** Start offset in units of 32B */
2298
uint8_t start;
2299
2300
/** Range in units of 32B */
2301
uint8_t length;
2302
};
2303
2304
struct anv_pipeline_layout {
2305
struct vk_object_base base;
2306
2307
struct {
2308
struct anv_descriptor_set_layout *layout;
2309
uint32_t dynamic_offset_start;
2310
} set[MAX_SETS];
2311
2312
uint32_t num_sets;
2313
2314
unsigned char sha1[20];
2315
};
2316
2317
struct anv_buffer {
2318
struct vk_object_base base;
2319
2320
struct anv_device * device;
2321
VkDeviceSize size;
2322
2323
VkBufferCreateFlags create_flags;
2324
VkBufferUsageFlags usage;
2325
2326
/* Set when bound */
2327
struct anv_address address;
2328
};
2329
2330
static inline uint64_t
2331
anv_buffer_get_range(struct anv_buffer *buffer, uint64_t offset, uint64_t range)
2332
{
2333
assert(offset <= buffer->size);
2334
if (range == VK_WHOLE_SIZE) {
2335
return buffer->size - offset;
2336
} else {
2337
assert(range + offset >= range);
2338
assert(range + offset <= buffer->size);
2339
return range;
2340
}
2341
}
2342
2343
enum anv_cmd_dirty_bits {
2344
ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */
2345
ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */
2346
ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */
2347
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */
2348
ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */
2349
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */
2350
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
2351
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
2352
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
2353
ANV_CMD_DIRTY_PIPELINE = 1 << 9,
2354
ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10,
2355
ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11,
2356
ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12,
2357
ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */
2358
ANV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1 << 14, /* VK_DYNAMIC_STATE_CULL_MODE_EXT */
2359
ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1 << 15, /* VK_DYNAMIC_STATE_FRONT_FACE_EXT */
2360
ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1 << 16, /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT */
2361
ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 17, /* VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT */
2362
ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1 << 18, /* VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT */
2363
ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1 << 19, /* VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT */
2364
ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1 << 20, /* VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT */
2365
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1 << 21, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT */
2366
ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
2367
ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
2368
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */
2369
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE = 1 << 25, /* VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT */
2370
ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE = 1 << 26, /* VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR */
2371
ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1 << 27, /* VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT */
2372
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1 << 28, /* VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT */
2373
ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1 << 29, /* VK_DYNAMIC_STATE_LOGIC_OP_EXT */
2374
ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1 << 30, /* VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT */
2375
};
2376
typedef uint32_t anv_cmd_dirty_mask_t;
2377
2378
#define ANV_CMD_DIRTY_DYNAMIC_ALL \
2379
(ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | \
2380
ANV_CMD_DIRTY_DYNAMIC_SCISSOR | \
2381
ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | \
2382
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | \
2383
ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | \
2384
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | \
2385
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | \
2386
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | \
2387
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | \
2388
ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE | \
2389
ANV_CMD_DIRTY_DYNAMIC_CULL_MODE | \
2390
ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE | \
2391
ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | \
2392
ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | \
2393
ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | \
2394
ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | \
2395
ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | \
2396
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | \
2397
ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | \
2398
ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP | \
2399
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | \
2400
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | \
2401
ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE | \
2402
ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | \
2403
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE | \
2404
ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP | \
2405
ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)
2406
2407
static inline enum anv_cmd_dirty_bits
2408
anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
2409
{
2410
switch (vk_state) {
2411
case VK_DYNAMIC_STATE_VIEWPORT:
2412
case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
2413
return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
2414
case VK_DYNAMIC_STATE_SCISSOR:
2415
case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
2416
return ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
2417
case VK_DYNAMIC_STATE_LINE_WIDTH:
2418
return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
2419
case VK_DYNAMIC_STATE_DEPTH_BIAS:
2420
return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
2421
case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2422
return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
2423
case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2424
return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
2425
case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2426
return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
2427
case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2428
return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
2429
case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2430
return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
2431
case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
2432
return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
2433
case VK_DYNAMIC_STATE_CULL_MODE_EXT:
2434
return ANV_CMD_DIRTY_DYNAMIC_CULL_MODE;
2435
case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
2436
return ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
2437
case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
2438
return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
2439
case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
2440
return ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
2441
case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
2442
return ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
2443
case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
2444
return ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
2445
case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
2446
return ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
2447
case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
2448
return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
2449
case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
2450
return ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
2451
case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
2452
return ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
2453
case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
2454
return ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
2455
case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2456
return ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
2457
case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
2458
return ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE;
2459
case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT:
2460
return ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
2461
case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT:
2462
return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE;
2463
case VK_DYNAMIC_STATE_LOGIC_OP_EXT:
2464
return ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
2465
case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT:
2466
return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
2467
default:
2468
assert(!"Unsupported dynamic state");
2469
return 0;
2470
}
2471
}
2472
2473
2474
enum anv_pipe_bits {
2475
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
2476
ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
2477
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2),
2478
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),
2479
ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),
2480
ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),
2481
ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6),
2482
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),
2483
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
2484
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
2485
ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),
2486
2487
/* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
2488
* cache work has completed. Available on Gfx12+. For earlier Gfx we
2489
* must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
2490
*/
2491
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT = (1 << 14),
2492
ANV_PIPE_CS_STALL_BIT = (1 << 20),
2493
ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21),
2494
2495
/* This bit does not exist directly in PIPE_CONTROL. Instead it means that
2496
* a flush has happened but not a CS stall. The next time we do any sort
2497
* of invalidation we need to insert a CS stall at that time. Otherwise,
2498
* we would have to CS stall on every flush which could be bad.
2499
*/
2500
ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22),
2501
2502
/* This bit does not exist directly in PIPE_CONTROL. It means that render
2503
* target operations related to transfer commands with VkBuffer as
2504
* destination are ongoing. Some operations like copies on the command
2505
* streamer might need to be aware of this to trigger the appropriate stall
2506
* before they can proceed with the copy.
2507
*/
2508
ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 23),
2509
2510
/* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
2511
* AUX-TT data has changed and we need to invalidate AUX-TT data. This is
2512
* done by writing the AUX-TT register.
2513
*/
2514
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 24),
2515
2516
/* This bit does not exist directly in PIPE_CONTROL. It means that a
2517
* PIPE_CONTROL with a post-sync operation will follow. This is used to
2518
* implement a workaround for Gfx9.
2519
*/
2520
ANV_PIPE_POST_SYNC_BIT = (1 << 25),
2521
};
2522
2523
#define ANV_PIPE_FLUSH_BITS ( \
2524
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
2525
ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2526
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2527
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
2528
ANV_PIPE_TILE_CACHE_FLUSH_BIT)
2529
2530
#define ANV_PIPE_STALL_BITS ( \
2531
ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
2532
ANV_PIPE_DEPTH_STALL_BIT | \
2533
ANV_PIPE_CS_STALL_BIT)
2534
2535
#define ANV_PIPE_INVALIDATE_BITS ( \
2536
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
2537
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
2538
ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
2539
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2540
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
2541
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
2542
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
2543
2544
static inline enum anv_pipe_bits
2545
anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
2546
VkAccessFlags flags)
2547
{
2548
enum anv_pipe_bits pipe_bits = 0;
2549
2550
u_foreach_bit(b, flags) {
2551
switch ((VkAccessFlagBits)(1 << b)) {
2552
case VK_ACCESS_SHADER_WRITE_BIT:
2553
/* We're transitioning a buffer that was previously used as write
2554
* destination through the data port. To make its content available
2555
* to future operations, flush the hdc pipeline.
2556
*/
2557
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2558
break;
2559
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
2560
/* We're transitioning a buffer that was previously used as render
2561
* target. To make its content available to future operations, flush
2562
* the render target cache.
2563
*/
2564
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2565
break;
2566
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
2567
/* We're transitioning a buffer that was previously used as depth
2568
* buffer. To make its content available to future operations, flush
2569
* the depth cache.
2570
*/
2571
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2572
break;
2573
case VK_ACCESS_TRANSFER_WRITE_BIT:
2574
/* We're transitioning a buffer that was previously used as a
2575
* transfer write destination. Generic write operations include color
2576
* & depth operations as well as buffer operations like :
2577
* - vkCmdClearColorImage()
2578
* - vkCmdClearDepthStencilImage()
2579
* - vkCmdBlitImage()
2580
* - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
2581
*
2582
* Most of these operations are implemented using Blorp which writes
2583
* through the render target, so flush that cache to make it visible
2584
* to future operations. And for depth related operations we also
2585
* need to flush the depth cache.
2586
*/
2587
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2588
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2589
break;
2590
case VK_ACCESS_MEMORY_WRITE_BIT:
2591
/* We're transitioning a buffer for generic write operations. Flush
2592
* all the caches.
2593
*/
2594
pipe_bits |= ANV_PIPE_FLUSH_BITS;
2595
break;
2596
case VK_ACCESS_HOST_WRITE_BIT:
2597
/* We're transitioning a buffer for access by CPU. Invalidate
2598
* all the caches. Since data and tile caches don't have invalidate,
2599
* we are forced to flush those as well.
2600
*/
2601
pipe_bits |= ANV_PIPE_FLUSH_BITS;
2602
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2603
break;
2604
default:
2605
break; /* Nothing to do */
2606
}
2607
}
2608
2609
return pipe_bits;
2610
}
2611
2612
static inline enum anv_pipe_bits
2613
anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
2614
VkAccessFlags flags)
2615
{
2616
enum anv_pipe_bits pipe_bits = 0;
2617
2618
u_foreach_bit(b, flags) {
2619
switch ((VkAccessFlagBits)(1 << b)) {
2620
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
2621
/* Indirect draw commands take a buffer as input that we're going to
2622
* read from the command streamer to load some of the HW registers
2623
* (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
2624
* command streamer stall so that all the cache flushes have
2625
* completed before the command streamer loads from memory.
2626
*/
2627
pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2628
/* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
2629
* through a vertex buffer, so invalidate that cache.
2630
*/
2631
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2632
/* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
2633
* UBO from the buffer, so we need to invalidate constant cache.
2634
*/
2635
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2636
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2637
/* Tile cache flush needed For CmdDipatchIndirect since command
2638
* streamer and vertex fetch aren't L3 coherent.
2639
*/
2640
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2641
break;
2642
case VK_ACCESS_INDEX_READ_BIT:
2643
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
2644
/* We transitioning a buffer to be used for as input for vkCmdDraw*
2645
* commands, so we invalidate the VF cache to make sure there is no
2646
* stale data when we start rendering.
2647
*/
2648
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2649
break;
2650
case VK_ACCESS_UNIFORM_READ_BIT:
2651
/* We transitioning a buffer to be used as uniform data. Because
2652
* uniform is accessed through the data port & sampler, we need to
2653
* invalidate the texture cache (sampler) & constant cache (data
2654
* port) to avoid stale data.
2655
*/
2656
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2657
if (device->physical->compiler->indirect_ubos_use_sampler)
2658
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2659
else
2660
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2661
break;
2662
case VK_ACCESS_SHADER_READ_BIT:
2663
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
2664
case VK_ACCESS_TRANSFER_READ_BIT:
2665
/* Transitioning a buffer to be read through the sampler, so
2666
* invalidate the texture cache, we don't want any stale data.
2667
*/
2668
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2669
break;
2670
case VK_ACCESS_MEMORY_READ_BIT:
2671
/* Transitioning a buffer for generic read, invalidate all the
2672
* caches.
2673
*/
2674
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2675
break;
2676
case VK_ACCESS_MEMORY_WRITE_BIT:
2677
/* Generic write, make sure all previously written things land in
2678
* memory.
2679
*/
2680
pipe_bits |= ANV_PIPE_FLUSH_BITS;
2681
break;
2682
case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
2683
/* Transitioning a buffer for conditional rendering. We'll load the
2684
* content of this buffer into HW registers using the command
2685
* streamer, so we need to stall the command streamer to make sure
2686
* any in-flight flush operations have completed. Needs tile cache
2687
* and data cache flush because command stream isn't L3 coherent yet.
2688
*/
2689
pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2690
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2691
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2692
break;
2693
case VK_ACCESS_HOST_READ_BIT:
2694
/* We're transitioning a buffer that was written by CPU. Flush
2695
* all the caches.
2696
*/
2697
pipe_bits |= ANV_PIPE_FLUSH_BITS;
2698
break;
2699
default:
2700
break; /* Nothing to do */
2701
}
2702
}
2703
2704
return pipe_bits;
2705
}
2706
2707
#define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \
2708
VK_IMAGE_ASPECT_COLOR_BIT | \
2709
VK_IMAGE_ASPECT_PLANE_0_BIT | \
2710
VK_IMAGE_ASPECT_PLANE_1_BIT | \
2711
VK_IMAGE_ASPECT_PLANE_2_BIT)
2712
#define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
2713
VK_IMAGE_ASPECT_PLANE_0_BIT | \
2714
VK_IMAGE_ASPECT_PLANE_1_BIT | \
2715
VK_IMAGE_ASPECT_PLANE_2_BIT)
2716
2717
struct anv_vertex_binding {
2718
struct anv_buffer * buffer;
2719
VkDeviceSize offset;
2720
VkDeviceSize stride;
2721
VkDeviceSize size;
2722
};
2723
2724
struct anv_xfb_binding {
2725
struct anv_buffer * buffer;
2726
VkDeviceSize offset;
2727
VkDeviceSize size;
2728
};
2729
2730
struct anv_push_constants {
2731
/** Push constant data provided by the client through vkPushConstants */
2732
uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
2733
2734
/** Dynamic offsets for dynamic UBOs and SSBOs */
2735
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2736
2737
/* Robust access pushed registers. */
2738
uint64_t push_reg_mask[MESA_SHADER_STAGES];
2739
2740
/** Pad out to a multiple of 32 bytes */
2741
uint32_t pad[2];
2742
2743
/* Base addresses for descriptor sets */
2744
uint64_t desc_sets[MAX_SETS];
2745
2746
struct {
2747
/** Base workgroup ID
2748
*
2749
* Used for vkCmdDispatchBase.
2750
*/
2751
uint32_t base_work_group_id[3];
2752
2753
/** Subgroup ID
2754
*
2755
* This is never set by software but is implicitly filled out when
2756
* uploading the push constants for compute shaders.
2757
*/
2758
uint32_t subgroup_id;
2759
} cs;
2760
};
2761
2762
struct anv_dynamic_state {
2763
struct {
2764
uint32_t count;
2765
VkViewport viewports[MAX_VIEWPORTS];
2766
} viewport;
2767
2768
struct {
2769
uint32_t count;
2770
VkRect2D scissors[MAX_SCISSORS];
2771
} scissor;
2772
2773
float line_width;
2774
2775
struct {
2776
float bias;
2777
float clamp;
2778
float slope;
2779
} depth_bias;
2780
2781
float blend_constants[4];
2782
2783
struct {
2784
float min;
2785
float max;
2786
} depth_bounds;
2787
2788
struct {
2789
uint32_t front;
2790
uint32_t back;
2791
} stencil_compare_mask;
2792
2793
struct {
2794
uint32_t front;
2795
uint32_t back;
2796
} stencil_write_mask;
2797
2798
struct {
2799
uint32_t front;
2800
uint32_t back;
2801
} stencil_reference;
2802
2803
struct {
2804
struct {
2805
VkStencilOp fail_op;
2806
VkStencilOp pass_op;
2807
VkStencilOp depth_fail_op;
2808
VkCompareOp compare_op;
2809
} front;
2810
struct {
2811
VkStencilOp fail_op;
2812
VkStencilOp pass_op;
2813
VkStencilOp depth_fail_op;
2814
VkCompareOp compare_op;
2815
} back;
2816
} stencil_op;
2817
2818
struct {
2819
uint32_t factor;
2820
uint16_t pattern;
2821
} line_stipple;
2822
2823
struct {
2824
uint32_t samples;
2825
VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
2826
} sample_locations;
2827
2828
VkExtent2D fragment_shading_rate;
2829
2830
VkCullModeFlags cull_mode;
2831
VkFrontFace front_face;
2832
VkPrimitiveTopology primitive_topology;
2833
bool depth_test_enable;
2834
bool depth_write_enable;
2835
VkCompareOp depth_compare_op;
2836
bool depth_bounds_test_enable;
2837
bool stencil_test_enable;
2838
bool raster_discard;
2839
bool depth_bias_enable;
2840
bool primitive_restart_enable;
2841
VkLogicOp logic_op;
2842
bool dyn_vbo_stride;
2843
bool dyn_vbo_size;
2844
2845
/* Bitfield, one bit per render target */
2846
uint8_t color_writes;
2847
};
2848
2849
extern const struct anv_dynamic_state default_dynamic_state;
2850
2851
uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
2852
const struct anv_dynamic_state *src,
2853
uint32_t copy_mask);
2854
2855
struct anv_surface_state {
2856
struct anv_state state;
2857
/** Address of the surface referred to by this state
2858
*
2859
* This address is relative to the start of the BO.
2860
*/
2861
struct anv_address address;
2862
/* Address of the aux surface, if any
2863
*
2864
* This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2865
*
2866
* With the exception of gfx8, the bottom 12 bits of this address' offset
2867
* include extra aux information.
2868
*/
2869
struct anv_address aux_address;
2870
/* Address of the clear color, if any
2871
*
2872
* This address is relative to the start of the BO.
2873
*/
2874
struct anv_address clear_address;
2875
};
2876
2877
/**
2878
* Attachment state when recording a renderpass instance.
2879
*
2880
* The clear value is valid only if there exists a pending clear.
2881
*/
2882
struct anv_attachment_state {
2883
enum isl_aux_usage aux_usage;
2884
struct anv_surface_state color;
2885
struct anv_surface_state input;
2886
2887
VkImageLayout current_layout;
2888
VkImageLayout current_stencil_layout;
2889
VkImageAspectFlags pending_clear_aspects;
2890
VkImageAspectFlags pending_load_aspects;
2891
bool fast_clear;
2892
VkClearValue clear_value;
2893
2894
/* When multiview is active, attachments with a renderpass clear
2895
* operation have their respective layers cleared on the first
2896
* subpass that uses them, and only in that subpass. We keep track
2897
* of this using a bitfield to indicate which layers of an attachment
2898
* have not been cleared yet when multiview is active.
2899
*/
2900
uint32_t pending_clear_views;
2901
struct anv_image_view * image_view;
2902
};
2903
2904
/** State tracking for vertex buffer flushes
2905
*
2906
* On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
2907
* addresses. If you happen to have two vertex buffers which get placed
2908
* exactly 4 GiB apart and use them in back-to-back draw calls, you can get
2909
* collisions. In order to solve this problem, we track vertex address ranges
2910
* which are live in the cache and invalidate the cache if one ever exceeds 32
2911
* bits.
2912
*/
2913
struct anv_vb_cache_range {
2914
/* Virtual address at which the live vertex buffer cache range starts for
2915
* this vertex buffer index.
2916
*/
2917
uint64_t start;
2918
2919
/* Virtual address of the byte after where vertex buffer cache range ends.
2920
* This is exclusive such that end - start is the size of the range.
2921
*/
2922
uint64_t end;
2923
};
2924
2925
/** State tracking for particular pipeline bind point
2926
*
2927
* This struct is the base struct for anv_cmd_graphics_state and
2928
* anv_cmd_compute_state. These are used to track state which is bound to a
2929
* particular type of pipeline. Generic state that applies per-stage such as
2930
* binding table offsets and push constants is tracked generically with a
2931
* per-stage array in anv_cmd_state.
2932
*/
2933
struct anv_cmd_pipeline_state {
2934
struct anv_descriptor_set *descriptors[MAX_SETS];
2935
struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2936
2937
struct anv_push_constants push_constants;
2938
2939
/* Push constant state allocated when flushing push constants. */
2940
struct anv_state push_constants_state;
2941
};
2942
2943
/** State tracking for graphics pipeline
2944
*
2945
* This has anv_cmd_pipeline_state as a base struct to track things which get
2946
* bound to a graphics pipeline. Along with general pipeline bind point state
2947
* which is in the anv_cmd_pipeline_state base struct, it also contains other
2948
* state which is graphics-specific.
2949
*/
2950
struct anv_cmd_graphics_state {
2951
struct anv_cmd_pipeline_state base;
2952
2953
struct anv_graphics_pipeline *pipeline;
2954
2955
anv_cmd_dirty_mask_t dirty;
2956
uint32_t vb_dirty;
2957
2958
struct anv_vb_cache_range ib_bound_range;
2959
struct anv_vb_cache_range ib_dirty_range;
2960
struct anv_vb_cache_range vb_bound_ranges[33];
2961
struct anv_vb_cache_range vb_dirty_ranges[33];
2962
2963
VkShaderStageFlags push_constant_stages;
2964
2965
struct anv_dynamic_state dynamic;
2966
2967
uint32_t primitive_topology;
2968
2969
struct {
2970
struct anv_buffer *index_buffer;
2971
uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2972
uint32_t index_offset;
2973
} gfx7;
2974
};
2975
2976
/** State tracking for compute pipeline
2977
*
2978
* This has anv_cmd_pipeline_state as a base struct to track things which get
2979
* bound to a compute pipeline. Along with general pipeline bind point state
2980
* which is in the anv_cmd_pipeline_state base struct, it also contains other
2981
* state which is compute-specific.
2982
*/
2983
struct anv_cmd_compute_state {
2984
struct anv_cmd_pipeline_state base;
2985
2986
struct anv_compute_pipeline *pipeline;
2987
2988
bool pipeline_dirty;
2989
2990
struct anv_state push_data;
2991
2992
struct anv_address num_workgroups;
2993
};
2994
2995
struct anv_cmd_ray_tracing_state {
2996
struct anv_cmd_pipeline_state base;
2997
2998
struct anv_ray_tracing_pipeline *pipeline;
2999
3000
bool pipeline_dirty;
3001
3002
struct {
3003
struct anv_bo *bo;
3004
struct brw_rt_scratch_layout layout;
3005
} scratch;
3006
};
3007
3008
/** State required while building cmd buffer */
3009
struct anv_cmd_state {
3010
/* PIPELINE_SELECT.PipelineSelection */
3011
uint32_t current_pipeline;
3012
const struct intel_l3_config * current_l3_config;
3013
uint32_t last_aux_map_state;
3014
3015
struct anv_cmd_graphics_state gfx;
3016
struct anv_cmd_compute_state compute;
3017
struct anv_cmd_ray_tracing_state rt;
3018
3019
enum anv_pipe_bits pending_pipe_bits;
3020
VkShaderStageFlags descriptors_dirty;
3021
VkShaderStageFlags push_constants_dirty;
3022
3023
struct anv_framebuffer * framebuffer;
3024
struct anv_render_pass * pass;
3025
struct anv_subpass * subpass;
3026
VkRect2D render_area;
3027
uint32_t restart_index;
3028
struct anv_vertex_binding vertex_bindings[MAX_VBS];
3029
bool xfb_enabled;
3030
struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
3031
struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES];
3032
struct anv_state samplers[MESA_VULKAN_SHADER_STAGES];
3033
3034
unsigned char sampler_sha1s[MESA_SHADER_STAGES][20];
3035
unsigned char surface_sha1s[MESA_SHADER_STAGES][20];
3036
unsigned char push_sha1s[MESA_SHADER_STAGES][20];
3037
3038
/**
3039
* Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top
3040
* of any command buffer it is disabled by disabling it in EndCommandBuffer
3041
* and before invoking the secondary in ExecuteCommands.
3042
*/
3043
bool pma_fix_enabled;
3044
3045
/**
3046
* Whether or not we know for certain that HiZ is enabled for the current
3047
* subpass. If, for whatever reason, we are unsure as to whether HiZ is
3048
* enabled or not, this will be false.
3049
*/
3050
bool hiz_enabled;
3051
3052
bool conditional_render_enabled;
3053
3054
/**
3055
* Last rendering scale argument provided to
3056
* genX(cmd_buffer_emit_hashing_mode)().
3057
*/
3058
unsigned current_hash_scale;
3059
3060
/**
3061
* Array length is anv_cmd_state::pass::attachment_count. Array content is
3062
* valid only when recording a render pass instance.
3063
*/
3064
struct anv_attachment_state * attachments;
3065
3066
/**
3067
* Surface states for color render targets. These are stored in a single
3068
* flat array. For depth-stencil attachments, the surface state is simply
3069
* left blank.
3070
*/
3071
struct anv_state attachment_states;
3072
3073
/**
3074
* A null surface state of the right size to match the framebuffer. This
3075
* is one of the states in attachment_states.
3076
*/
3077
struct anv_state null_surface_state;
3078
};
3079
3080
struct anv_cmd_pool {
3081
struct vk_object_base base;
3082
VkAllocationCallbacks alloc;
3083
struct list_head cmd_buffers;
3084
3085
VkCommandPoolCreateFlags flags;
3086
};
3087
3088
#define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
3089
#define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
3090
3091
enum anv_cmd_buffer_exec_mode {
3092
ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
3093
ANV_CMD_BUFFER_EXEC_MODE_EMIT,
3094
ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
3095
ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
3096
ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
3097
ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
3098
};
3099
3100
struct anv_measure_batch;
3101
3102
struct anv_cmd_buffer {
3103
struct vk_object_base base;
3104
3105
struct anv_device * device;
3106
3107
struct anv_cmd_pool * pool;
3108
struct list_head pool_link;
3109
3110
struct anv_batch batch;
3111
3112
/* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
3113
* recorded upon calling vkEndCommandBuffer(). This is useful if we need to
3114
* rewrite the end to chain multiple batch together at vkQueueSubmit().
3115
*/
3116
void * batch_end;
3117
3118
/* Fields required for the actual chain of anv_batch_bo's.
3119
*
3120
* These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
3121
*/
3122
struct list_head batch_bos;
3123
enum anv_cmd_buffer_exec_mode exec_mode;
3124
3125
/* A vector of anv_batch_bo pointers for every batch or surface buffer
3126
* referenced by this command buffer
3127
*
3128
* initialized by anv_cmd_buffer_init_batch_bo_chain()
3129
*/
3130
struct u_vector seen_bbos;
3131
3132
/* A vector of int32_t's for every block of binding tables.
3133
*
3134
* initialized by anv_cmd_buffer_init_batch_bo_chain()
3135
*/
3136
struct u_vector bt_block_states;
3137
struct anv_state bt_next;
3138
3139
struct anv_reloc_list surface_relocs;
3140
/** Last seen surface state block pool center bo offset */
3141
uint32_t last_ss_pool_center;
3142
3143
/* Serial for tracking buffer completion */
3144
uint32_t serial;
3145
3146
/* Stream objects for storing temporary data */
3147
struct anv_state_stream surface_state_stream;
3148
struct anv_state_stream dynamic_state_stream;
3149
struct anv_state_stream general_state_stream;
3150
3151
VkCommandBufferUsageFlags usage_flags;
3152
VkCommandBufferLevel level;
3153
3154
struct anv_query_pool *perf_query_pool;
3155
3156
struct anv_cmd_state state;
3157
3158
struct anv_address return_addr;
3159
3160
/* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
3161
uint64_t intel_perf_marker;
3162
3163
struct anv_measure_batch *measure;
3164
3165
/**
3166
* KHR_performance_query requires self modifying command buffers and this
3167
* array has the location of modifying commands to the query begin and end
3168
* instructions storing performance counters. The array length is
3169
* anv_physical_device::n_perf_query_commands.
3170
*/
3171
struct mi_address_token *self_mod_locations;
3172
3173
/**
3174
* Index tracking which of the self_mod_locations items have already been
3175
* used.
3176
*/
3177
uint32_t perf_reloc_idx;
3178
3179
/**
3180
* Sum of all the anv_batch_bo sizes allocated for this command buffer.
3181
* Used to increase allocation size for long command buffers.
3182
*/
3183
uint32_t total_batch_size;
3184
};
3185
3186
/* Determine whether we can chain a given cmd_buffer to another one. We need
3187
* softpin and we also need to make sure that we can edit the end of the batch
3188
* to point to next one, which requires the command buffer to not be used
3189
* simultaneously.
3190
*/
3191
static inline bool
3192
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
3193
{
3194
return anv_use_softpin(cmd_buffer->device->physical) &&
3195
!(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
3196
}
3197
3198
VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3199
void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3200
void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3201
void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
3202
void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
3203
struct anv_cmd_buffer *secondary);
3204
void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
3205
VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
3206
struct anv_cmd_buffer *cmd_buffer,
3207
const VkSemaphore *in_semaphores,
3208
const uint64_t *in_wait_values,
3209
uint32_t num_in_semaphores,
3210
const VkSemaphore *out_semaphores,
3211
const uint64_t *out_signal_values,
3212
uint32_t num_out_semaphores,
3213
VkFence fence,
3214
int perf_query_pass);
3215
3216
VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
3217
3218
struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3219
const void *data, uint32_t size, uint32_t alignment);
3220
struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3221
uint32_t *a, uint32_t *b,
3222
uint32_t dwords, uint32_t alignment);
3223
3224
struct anv_address
3225
anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
3226
struct anv_state
3227
anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
3228
uint32_t entries, uint32_t *state_offset);
3229
struct anv_state
3230
anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
3231
struct anv_state
3232
anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
3233
uint32_t size, uint32_t alignment);
3234
3235
VkResult
3236
anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
3237
3238
void gfx8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
3239
void gfx8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
3240
bool depth_clamp_enable);
3241
void gfx7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
3242
3243
void anv_cmd_buffer_setup_attachments(struct anv_cmd_buffer *cmd_buffer,
3244
struct anv_render_pass *pass,
3245
struct anv_framebuffer *framebuffer,
3246
const VkClearValue *clear_values);
3247
3248
void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
3249
3250
struct anv_state
3251
anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
3252
struct anv_state
3253
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
3254
3255
const struct anv_image_view *
3256
anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
3257
3258
VkResult
3259
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
3260
uint32_t num_entries,
3261
uint32_t *state_offset,
3262
struct anv_state *bt_state);
3263
3264
void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
3265
3266
void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
3267
3268
enum anv_fence_type {
3269
ANV_FENCE_TYPE_NONE = 0,
3270
ANV_FENCE_TYPE_BO,
3271
ANV_FENCE_TYPE_WSI_BO,
3272
ANV_FENCE_TYPE_SYNCOBJ,
3273
ANV_FENCE_TYPE_WSI,
3274
};
3275
3276
enum anv_bo_fence_state {
3277
/** Indicates that this is a new (or newly reset fence) */
3278
ANV_BO_FENCE_STATE_RESET,
3279
3280
/** Indicates that this fence has been submitted to the GPU but is still
3281
* (as far as we know) in use by the GPU.
3282
*/
3283
ANV_BO_FENCE_STATE_SUBMITTED,
3284
3285
ANV_BO_FENCE_STATE_SIGNALED,
3286
};
3287
3288
struct anv_fence_impl {
3289
enum anv_fence_type type;
3290
3291
union {
3292
/** Fence implementation for BO fences
3293
*
3294
* These fences use a BO and a set of CPU-tracked state flags. The BO
3295
* is added to the object list of the last execbuf call in a QueueSubmit
3296
* and is marked EXEC_WRITE. The state flags track when the BO has been
3297
* submitted to the kernel. We need to do this because Vulkan lets you
3298
* wait on a fence that has not yet been submitted and I915_GEM_BUSY
3299
* will say it's idle in this case.
3300
*/
3301
struct {
3302
struct anv_bo *bo;
3303
enum anv_bo_fence_state state;
3304
} bo;
3305
3306
/** DRM syncobj handle for syncobj-based fences */
3307
uint32_t syncobj;
3308
3309
/** WSI fence */
3310
struct wsi_fence *fence_wsi;
3311
};
3312
};
3313
3314
struct anv_fence {
3315
struct vk_object_base base;
3316
3317
/* Permanent fence state. Every fence has some form of permanent state
3318
* (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on (for
3319
* cross-process fences) or it could just be a dummy for use internally.
3320
*/
3321
struct anv_fence_impl permanent;
3322
3323
/* Temporary fence state. A fence *may* have temporary state. That state
3324
* is added to the fence by an import operation and is reset back to
3325
* ANV_SEMAPHORE_TYPE_NONE when the fence is reset. A fence with temporary
3326
* state cannot be signaled because the fence must already be signaled
3327
* before the temporary state can be exported from the fence in the other
3328
* process and imported here.
3329
*/
3330
struct anv_fence_impl temporary;
3331
};
3332
3333
void anv_fence_reset_temporary(struct anv_device *device,
3334
struct anv_fence *fence);
3335
3336
struct anv_event {
3337
struct vk_object_base base;
3338
uint64_t semaphore;
3339
struct anv_state state;
3340
};
3341
3342
enum anv_semaphore_type {
3343
ANV_SEMAPHORE_TYPE_NONE = 0,
3344
ANV_SEMAPHORE_TYPE_DUMMY,
3345
ANV_SEMAPHORE_TYPE_BO,
3346
ANV_SEMAPHORE_TYPE_WSI_BO,
3347
ANV_SEMAPHORE_TYPE_SYNC_FILE,
3348
ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
3349
ANV_SEMAPHORE_TYPE_TIMELINE,
3350
ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE,
3351
};
3352
3353
struct anv_timeline_point {
3354
struct list_head link;
3355
3356
uint64_t serial;
3357
3358
/* Number of waiter on this point, when > 0 the point should not be garbage
3359
* collected.
3360
*/
3361
int waiting;
3362
3363
/* BO used for synchronization. */
3364
struct anv_bo *bo;
3365
};
3366
3367
struct anv_timeline {
3368
pthread_mutex_t mutex;
3369
pthread_cond_t cond;
3370
3371
uint64_t highest_past;
3372
uint64_t highest_pending;
3373
3374
struct list_head points;
3375
struct list_head free_points;
3376
};
3377
3378
struct anv_semaphore_impl {
3379
enum anv_semaphore_type type;
3380
3381
union {
3382
/* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO
3383
* or type == ANV_SEMAPHORE_TYPE_WSI_BO. This BO will be added to the
3384
* object list on any execbuf2 calls for which this semaphore is used as
3385
* a wait or signal fence. When used as a signal fence or when type ==
3386
* ANV_SEMAPHORE_TYPE_WSI_BO, the EXEC_OBJECT_WRITE flag will be set.
3387
*/
3388
struct anv_bo *bo;
3389
3390
/* The sync file descriptor when type == ANV_SEMAPHORE_TYPE_SYNC_FILE.
3391
* If the semaphore is in the unsignaled state due to either just being
3392
* created or because it has been used for a wait, fd will be -1.
3393
*/
3394
int fd;
3395
3396
/* Sync object handle when type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ.
3397
* Unlike GEM BOs, DRM sync objects aren't deduplicated by the kernel on
3398
* import so we don't need to bother with a userspace cache.
3399
*/
3400
uint32_t syncobj;
3401
3402
/* Non shareable timeline semaphore
3403
*
3404
* Used when kernel don't have support for timeline semaphores.
3405
*/
3406
struct anv_timeline timeline;
3407
};
3408
};
3409
3410
struct anv_semaphore {
3411
struct vk_object_base base;
3412
3413
uint32_t refcount;
3414
3415
/* Permanent semaphore state. Every semaphore has some form of permanent
3416
* state (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on
3417
* (for cross-process semaphores0 or it could just be a dummy for use
3418
* internally.
3419
*/
3420
struct anv_semaphore_impl permanent;
3421
3422
/* Temporary semaphore state. A semaphore *may* have temporary state.
3423
* That state is added to the semaphore by an import operation and is reset
3424
* back to ANV_SEMAPHORE_TYPE_NONE when the semaphore is waited on. A
3425
* semaphore with temporary state cannot be signaled because the semaphore
3426
* must already be signaled before the temporary state can be exported from
3427
* the semaphore in the other process and imported here.
3428
*/
3429
struct anv_semaphore_impl temporary;
3430
};
3431
3432
void anv_semaphore_reset_temporary(struct anv_device *device,
3433
struct anv_semaphore *semaphore);
3434
3435
#define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
3436
3437
#define anv_foreach_stage(stage, stage_bits) \
3438
for (gl_shader_stage stage, \
3439
__tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \
3440
stage = __builtin_ffs(__tmp) - 1, __tmp; \
3441
__tmp &= ~(1 << (stage)))
3442
3443
struct anv_pipeline_bind_map {
3444
unsigned char surface_sha1[20];
3445
unsigned char sampler_sha1[20];
3446
unsigned char push_sha1[20];
3447
3448
uint32_t surface_count;
3449
uint32_t sampler_count;
3450
3451
struct anv_pipeline_binding * surface_to_descriptor;
3452
struct anv_pipeline_binding * sampler_to_descriptor;
3453
3454
struct anv_push_range push_ranges[4];
3455
};
3456
3457
struct anv_shader_bin_key {
3458
uint32_t size;
3459
uint8_t data[0];
3460
};
3461
3462
struct anv_shader_bin {
3463
uint32_t ref_cnt;
3464
3465
gl_shader_stage stage;
3466
3467
const struct anv_shader_bin_key *key;
3468
3469
struct anv_state kernel;
3470
uint32_t kernel_size;
3471
3472
const struct brw_stage_prog_data *prog_data;
3473
uint32_t prog_data_size;
3474
3475
struct brw_compile_stats stats[3];
3476
uint32_t num_stats;
3477
3478
struct nir_xfb_info *xfb_info;
3479
3480
struct anv_pipeline_bind_map bind_map;
3481
};
3482
3483
struct anv_shader_bin *
3484
anv_shader_bin_create(struct anv_device *device,
3485
gl_shader_stage stage,
3486
const void *key, uint32_t key_size,
3487
const void *kernel, uint32_t kernel_size,
3488
const struct brw_stage_prog_data *prog_data,
3489
uint32_t prog_data_size,
3490
const struct brw_compile_stats *stats, uint32_t num_stats,
3491
const struct nir_xfb_info *xfb_info,
3492
const struct anv_pipeline_bind_map *bind_map);
3493
3494
void
3495
anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
3496
3497
static inline void
3498
anv_shader_bin_ref(struct anv_shader_bin *shader)
3499
{
3500
assert(shader && shader->ref_cnt >= 1);
3501
p_atomic_inc(&shader->ref_cnt);
3502
}
3503
3504
static inline void
3505
anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
3506
{
3507
assert(shader && shader->ref_cnt >= 1);
3508
if (p_atomic_dec_zero(&shader->ref_cnt))
3509
anv_shader_bin_destroy(device, shader);
3510
}
3511
3512
#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
3513
assert((local_arg_offset) % 8 == 0); \
3514
const struct brw_bs_prog_data *prog_data = \
3515
brw_bs_prog_data_const(bin->prog_data); \
3516
assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \
3517
\
3518
(struct GFX_BINDLESS_SHADER_RECORD) { \
3519
.OffsetToLocalArguments = (local_arg_offset) / 8, \
3520
.BindlessShaderDispatchMode = prog_data->simd_size / 16, \
3521
.KernelStartPointer = bin->kernel.offset, \
3522
}; \
3523
})
3524
3525
struct anv_pipeline_executable {
3526
gl_shader_stage stage;
3527
3528
struct brw_compile_stats stats;
3529
3530
char *nir;
3531
char *disasm;
3532
};
3533
3534
enum anv_pipeline_type {
3535
ANV_PIPELINE_GRAPHICS,
3536
ANV_PIPELINE_COMPUTE,
3537
ANV_PIPELINE_RAY_TRACING,
3538
};
3539
3540
struct anv_pipeline {
3541
struct vk_object_base base;
3542
3543
struct anv_device * device;
3544
3545
struct anv_batch batch;
3546
struct anv_reloc_list batch_relocs;
3547
3548
void * mem_ctx;
3549
3550
enum anv_pipeline_type type;
3551
VkPipelineCreateFlags flags;
3552
3553
struct util_dynarray executables;
3554
3555
const struct intel_l3_config * l3_config;
3556
};
3557
3558
struct anv_graphics_pipeline {
3559
struct anv_pipeline base;
3560
3561
uint32_t batch_data[512];
3562
3563
/* States that are part of batch_data and should be not emitted
3564
* dynamically.
3565
*/
3566
anv_cmd_dirty_mask_t static_state_mask;
3567
3568
/* States that need to be reemitted in cmd_buffer_flush_dynamic_state().
3569
* This might cover more than the dynamic states specified at pipeline
3570
* creation.
3571
*/
3572
anv_cmd_dirty_mask_t dynamic_state_mask;
3573
3574
struct anv_dynamic_state dynamic_state;
3575
3576
/* States declared dynamic at pipeline creation. */
3577
anv_cmd_dirty_mask_t dynamic_states;
3578
3579
uint32_t topology;
3580
3581
/* These fields are required with dynamic primitive topology,
3582
* rasterization_samples used only with gen < 8.
3583
*/
3584
VkLineRasterizationModeEXT line_mode;
3585
VkPolygonMode polygon_mode;
3586
uint32_t rasterization_samples;
3587
3588
struct anv_subpass * subpass;
3589
3590
struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
3591
3592
VkShaderStageFlags active_stages;
3593
3594
bool writes_depth;
3595
bool depth_test_enable;
3596
bool writes_stencil;
3597
bool stencil_test_enable;
3598
bool depth_clamp_enable;
3599
bool depth_clip_enable;
3600
bool sample_shading_enable;
3601
bool kill_pixel;
3602
bool depth_bounds_test_enable;
3603
bool force_fragment_thread_dispatch;
3604
3605
/* When primitive replication is used, subpass->view_mask will describe what
3606
* views to replicate.
3607
*/
3608
bool use_primitive_replication;
3609
3610
struct anv_state blend_state;
3611
3612
struct anv_state cps_state;
3613
3614
uint32_t vb_used;
3615
struct anv_pipeline_vertex_binding {
3616
uint32_t stride;
3617
bool instanced;
3618
uint32_t instance_divisor;
3619
} vb[MAX_VBS];
3620
3621
struct {
3622
uint32_t sf[7];
3623
uint32_t depth_stencil_state[3];
3624
uint32_t clip[4];
3625
uint32_t xfb_bo_pitch[4];
3626
uint32_t wm[3];
3627
uint32_t blend_state[MAX_RTS * 2];
3628
uint32_t streamout_state[3];
3629
} gfx7;
3630
3631
struct {
3632
uint32_t sf[4];
3633
uint32_t raster[5];
3634
uint32_t wm_depth_stencil[3];
3635
uint32_t wm[2];
3636
uint32_t ps_blend[2];
3637
uint32_t blend_state[1 + MAX_RTS * 2];
3638
uint32_t streamout_state[5];
3639
} gfx8;
3640
3641
struct {
3642
uint32_t wm_depth_stencil[4];
3643
} gfx9;
3644
};
3645
3646
struct anv_compute_pipeline {
3647
struct anv_pipeline base;
3648
3649
struct anv_shader_bin * cs;
3650
uint32_t batch_data[9];
3651
uint32_t interface_descriptor_data[8];
3652
};
3653
3654
struct anv_rt_shader_group {
3655
VkRayTracingShaderGroupTypeKHR type;
3656
3657
struct anv_shader_bin *general;
3658
struct anv_shader_bin *closest_hit;
3659
struct anv_shader_bin *any_hit;
3660
struct anv_shader_bin *intersection;
3661
3662
/* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
3663
uint32_t handle[8];
3664
};
3665
3666
struct anv_ray_tracing_pipeline {
3667
struct anv_pipeline base;
3668
3669
/* All shaders in the pipeline */
3670
struct util_dynarray shaders;
3671
3672
uint32_t group_count;
3673
struct anv_rt_shader_group * groups;
3674
3675
/* If non-zero, this is the default computed stack size as per the stack
3676
* size computation in the Vulkan spec. If zero, that indicates that the
3677
* client has requested a dynamic stack size.
3678
*/
3679
uint32_t stack_size;
3680
};
3681
3682
#define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
3683
static inline struct anv_##pipe_type##_pipeline * \
3684
anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \
3685
{ \
3686
assert(pipeline->type == pipe_enum); \
3687
return (struct anv_##pipe_type##_pipeline *) pipeline; \
3688
}
3689
3690
ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
3691
ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
3692
ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
3693
3694
static inline bool
3695
anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
3696
gl_shader_stage stage)
3697
{
3698
return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
3699
}
3700
3701
#define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \
3702
static inline const struct brw_##prefix##_prog_data * \
3703
get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \
3704
{ \
3705
if (anv_pipeline_has_stage(pipeline, stage)) { \
3706
return (const struct brw_##prefix##_prog_data *) \
3707
pipeline->shaders[stage]->prog_data; \
3708
} else { \
3709
return NULL; \
3710
} \
3711
}
3712
3713
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
3714
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
3715
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
3716
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
3717
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
3718
3719
static inline const struct brw_cs_prog_data *
3720
get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
3721
{
3722
assert(pipeline->cs);
3723
return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
3724
}
3725
3726
static inline const struct brw_vue_prog_data *
3727
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
3728
{
3729
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
3730
return &get_gs_prog_data(pipeline)->base;
3731
else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
3732
return &get_tes_prog_data(pipeline)->base;
3733
else
3734
return &get_vs_prog_data(pipeline)->base;
3735
}
3736
3737
VkResult
3738
anv_device_init_rt_shaders(struct anv_device *device);
3739
3740
void
3741
anv_device_finish_rt_shaders(struct anv_device *device);
3742
3743
VkResult
3744
anv_pipeline_init(struct anv_pipeline *pipeline,
3745
struct anv_device *device,
3746
enum anv_pipeline_type type,
3747
VkPipelineCreateFlags flags,
3748
const VkAllocationCallbacks *pAllocator);
3749
3750
void
3751
anv_pipeline_finish(struct anv_pipeline *pipeline,
3752
struct anv_device *device,
3753
const VkAllocationCallbacks *pAllocator);
3754
3755
VkResult
3756
anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,
3757
struct anv_pipeline_cache *cache,
3758
const VkGraphicsPipelineCreateInfo *pCreateInfo,
3759
const VkAllocationCallbacks *alloc);
3760
3761
VkResult
3762
anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
3763
struct anv_pipeline_cache *cache,
3764
const VkComputePipelineCreateInfo *info,
3765
const struct vk_shader_module *module,
3766
const char *entrypoint,
3767
const VkSpecializationInfo *spec_info);
3768
3769
VkResult
3770
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3771
struct anv_device *device,
3772
struct anv_pipeline_cache *cache,
3773
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3774
const VkAllocationCallbacks *alloc);
3775
3776
struct anv_format_plane {
3777
enum isl_format isl_format:16;
3778
struct isl_swizzle swizzle;
3779
3780
/* Whether this plane contains chroma channels */
3781
bool has_chroma;
3782
3783
/* For downscaling of YUV planes */
3784
uint8_t denominator_scales[2];
3785
3786
/* How to map sampled ycbcr planes to a single 4 component element. */
3787
struct isl_swizzle ycbcr_swizzle;
3788
3789
/* What aspect is associated to this plane */
3790
VkImageAspectFlags aspect;
3791
};
3792
3793
3794
struct anv_format {
3795
struct anv_format_plane planes[3];
3796
VkFormat vk_format;
3797
uint8_t n_planes;
3798
bool can_ycbcr;
3799
};
3800
3801
/**
3802
* Return the aspect's _format_ plane, not its _memory_ plane (using the
3803
* vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
3804
* aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
3805
* VK_IMAGE_ASPECT_MEMORY_PLANE_* .
3806
*/
3807
static inline uint32_t
3808
anv_image_aspect_to_plane(VkImageAspectFlags image_aspects,
3809
VkImageAspectFlags aspect_mask)
3810
{
3811
switch (aspect_mask) {
3812
case VK_IMAGE_ASPECT_COLOR_BIT:
3813
case VK_IMAGE_ASPECT_DEPTH_BIT:
3814
case VK_IMAGE_ASPECT_PLANE_0_BIT:
3815
return 0;
3816
case VK_IMAGE_ASPECT_STENCIL_BIT:
3817
if ((image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) == 0)
3818
return 0;
3819
FALLTHROUGH;
3820
case VK_IMAGE_ASPECT_PLANE_1_BIT:
3821
return 1;
3822
case VK_IMAGE_ASPECT_PLANE_2_BIT:
3823
return 2;
3824
default:
3825
/* Purposefully assert with depth/stencil aspects. */
3826
unreachable("invalid image aspect");
3827
}
3828
}
3829
3830
static inline VkImageAspectFlags
3831
anv_plane_to_aspect(VkImageAspectFlags image_aspects,
3832
uint32_t plane)
3833
{
3834
if (image_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
3835
if (util_bitcount(image_aspects) > 1)
3836
return VK_IMAGE_ASPECT_PLANE_0_BIT << plane;
3837
return VK_IMAGE_ASPECT_COLOR_BIT;
3838
}
3839
if (image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
3840
return VK_IMAGE_ASPECT_DEPTH_BIT << plane;
3841
assert(image_aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
3842
return VK_IMAGE_ASPECT_STENCIL_BIT;
3843
}
3844
3845
#define anv_foreach_image_aspect_bit(b, image, aspects) \
3846
u_foreach_bit(b, anv_image_expand_aspects(image, aspects))
3847
3848
const struct anv_format *
3849
anv_get_format(VkFormat format);
3850
3851
static inline uint32_t
3852
anv_get_format_planes(VkFormat vk_format)
3853
{
3854
const struct anv_format *format = anv_get_format(vk_format);
3855
3856
return format != NULL ? format->n_planes : 0;
3857
}
3858
3859
struct anv_format_plane
3860
anv_get_format_plane(const struct intel_device_info *devinfo,
3861
VkFormat vk_format,
3862
VkImageAspectFlagBits aspect, VkImageTiling tiling);
3863
3864
static inline enum isl_format
3865
anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
3866
VkImageAspectFlags aspect, VkImageTiling tiling)
3867
{
3868
return anv_get_format_plane(devinfo, vk_format, aspect, tiling).isl_format;
3869
}
3870
3871
bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
3872
VkImageCreateFlags create_flags,
3873
VkFormat vk_format,
3874
VkImageTiling vk_tiling,
3875
const VkImageFormatListCreateInfoKHR *fmt_list);
3876
3877
extern VkFormat
3878
vk_format_from_android(unsigned android_format, unsigned android_usage);
3879
3880
static inline struct isl_swizzle
3881
anv_swizzle_for_render(struct isl_swizzle swizzle)
3882
{
3883
/* Sometimes the swizzle will have alpha map to one. We do this to fake
3884
* RGB as RGBA for texturing
3885
*/
3886
assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
3887
swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
3888
3889
/* But it doesn't matter what we render to that channel */
3890
swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
3891
3892
return swizzle;
3893
}
3894
3895
void
3896
anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
3897
3898
/**
3899
* Describes how each part of anv_image will be bound to memory.
3900
*/
3901
struct anv_image_memory_range {
3902
/**
3903
* Disjoint bindings into which each portion of the image will be bound.
3904
*
3905
* Binding images to memory can be complicated and invold binding different
3906
* portions of the image to different memory objects or regions. For most
3907
* images, everything lives in the MAIN binding and gets bound by
3908
* vkBindImageMemory. For disjoint multi-planar images, each plane has
3909
* a unique, disjoint binding and gets bound by vkBindImageMemory2 with
3910
* VkBindImagePlaneMemoryInfo. There may also exist bits of memory which are
3911
* implicit or driver-managed and live in special-case bindings.
3912
*/
3913
enum anv_image_memory_binding {
3914
/**
3915
* Used if and only if image is not multi-planar disjoint. Bound by
3916
* vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
3917
*/
3918
ANV_IMAGE_MEMORY_BINDING_MAIN,
3919
3920
/**
3921
* Used if and only if image is multi-planar disjoint. Bound by
3922
* vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
3923
*/
3924
ANV_IMAGE_MEMORY_BINDING_PLANE_0,
3925
ANV_IMAGE_MEMORY_BINDING_PLANE_1,
3926
ANV_IMAGE_MEMORY_BINDING_PLANE_2,
3927
3928
/**
3929
* Driver-private bo. In special cases we may store the aux surface and/or
3930
* aux state in this binding.
3931
*/
3932
ANV_IMAGE_MEMORY_BINDING_PRIVATE,
3933
3934
/** Sentinel */
3935
ANV_IMAGE_MEMORY_BINDING_END,
3936
} binding;
3937
3938
/**
3939
* Offset is relative to the start of the binding created by
3940
* vkBindImageMemory, not to the start of the bo.
3941
*/
3942
uint64_t offset;
3943
3944
uint64_t size;
3945
uint32_t alignment;
3946
};
3947
3948
/**
3949
* Subsurface of an anv_image.
3950
*/
3951
struct anv_surface {
3952
struct isl_surf isl;
3953
struct anv_image_memory_range memory_range;
3954
};
3955
3956
static inline bool MUST_CHECK
3957
anv_surface_is_valid(const struct anv_surface *surface)
3958
{
3959
return surface->isl.size_B > 0 && surface->memory_range.size > 0;
3960
}
3961
3962
struct anv_image {
3963
struct vk_object_base base;
3964
3965
VkImageType type; /**< VkImageCreateInfo::imageType */
3966
/* The original VkFormat provided by the client. This may not match any
3967
* of the actual surface formats.
3968
*/
3969
VkFormat vk_format;
3970
const struct anv_format *format;
3971
3972
VkImageAspectFlags aspects;
3973
VkExtent3D extent;
3974
uint32_t levels;
3975
uint32_t array_size;
3976
uint32_t samples; /**< VkImageCreateInfo::samples */
3977
uint32_t n_planes;
3978
VkImageUsageFlags usage; /**< VkImageCreateInfo::usage. */
3979
VkImageUsageFlags stencil_usage;
3980
VkImageCreateFlags create_flags; /* Flags used when creating image. */
3981
VkImageTiling tiling; /** VkImageCreateInfo::tiling */
3982
3983
/** True if this is needs to be bound to an appropriately tiled BO.
3984
*
3985
* When not using modifiers, consumers such as X11, Wayland, and KMS need
3986
* the tiling passed via I915_GEM_SET_TILING. When exporting these buffers
3987
* we require a dedicated allocation so that we can know to allocate a
3988
* tiled buffer.
3989
*/
3990
bool needs_set_tiling;
3991
3992
/**
3993
* Must be DRM_FORMAT_MOD_INVALID unless tiling is
3994
* VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT.
3995
*/
3996
uint64_t drm_format_mod;
3997
3998
/**
3999
* Image has multi-planar format and was created with
4000
* VK_IMAGE_CREATE_DISJOINT_BIT.
4001
*/
4002
bool disjoint;
4003
4004
/* Image was created with external format. */
4005
bool external_format;
4006
4007
/**
4008
* Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
4009
* must be released when the image is destroyed.
4010
*/
4011
bool from_gralloc;
4012
4013
/**
4014
* The memory bindings created by vkCreateImage and vkBindImageMemory.
4015
*
4016
* For details on the image's memory layout, see check_memory_bindings().
4017
*
4018
* vkCreateImage constructs the `memory_range` for each
4019
* anv_image_memory_binding. After vkCreateImage, each binding is valid if
4020
* and only if `memory_range::size > 0`.
4021
*
4022
* vkBindImageMemory binds each valid `memory_range` to an `address`.
4023
* Usually, the app will provide the address via the parameters of
4024
* vkBindImageMemory. However, special-case bindings may be bound to
4025
* driver-private memory.
4026
*/
4027
struct anv_image_binding {
4028
struct anv_image_memory_range memory_range;
4029
struct anv_address address;
4030
} bindings[ANV_IMAGE_MEMORY_BINDING_END];
4031
4032
/**
4033
* Image subsurfaces
4034
*
4035
* For each foo, anv_image::planes[x].surface is valid if and only if
4036
* anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
4037
* to figure the number associated with a given aspect.
4038
*
4039
* The hardware requires that the depth buffer and stencil buffer be
4040
* separate surfaces. From Vulkan's perspective, though, depth and stencil
4041
* reside in the same VkImage. To satisfy both the hardware and Vulkan, we
4042
* allocate the depth and stencil buffers as separate surfaces in the same
4043
* bo.
4044
*/
4045
struct anv_image_plane {
4046
struct anv_surface primary_surface;
4047
4048
/**
4049
* A surface which shadows the main surface and may have different
4050
* tiling. This is used for sampling using a tiling that isn't supported
4051
* for other operations.
4052
*/
4053
struct anv_surface shadow_surface;
4054
4055
/**
4056
* The base aux usage for this image. For color images, this can be
4057
* either CCS_E or CCS_D depending on whether or not we can reliably
4058
* leave CCS on all the time.
4059
*/
4060
enum isl_aux_usage aux_usage;
4061
4062
struct anv_surface aux_surface;
4063
4064
/** Location of the fast clear state. */
4065
struct anv_image_memory_range fast_clear_memory_range;
4066
} planes[3];
4067
};
4068
4069
/* The ordering of this enum is important */
4070
enum anv_fast_clear_type {
4071
/** Image does not have/support any fast-clear blocks */
4072
ANV_FAST_CLEAR_NONE = 0,
4073
/** Image has/supports fast-clear but only to the default value */
4074
ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
4075
/** Image has/supports fast-clear with an arbitrary fast-clear value */
4076
ANV_FAST_CLEAR_ANY = 2,
4077
};
4078
4079
/* Returns the number of auxiliary buffer levels attached to an image. */
4080
static inline uint8_t
4081
anv_image_aux_levels(const struct anv_image * const image,
4082
VkImageAspectFlagBits aspect)
4083
{
4084
uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
4085
if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
4086
return 0;
4087
4088
return image->levels;
4089
}
4090
4091
/* Returns the number of auxiliary buffer layers attached to an image. */
4092
static inline uint32_t
4093
anv_image_aux_layers(const struct anv_image * const image,
4094
VkImageAspectFlagBits aspect,
4095
const uint8_t miplevel)
4096
{
4097
assert(image);
4098
4099
/* The miplevel must exist in the main buffer. */
4100
assert(miplevel < image->levels);
4101
4102
if (miplevel >= anv_image_aux_levels(image, aspect)) {
4103
/* There are no layers with auxiliary data because the miplevel has no
4104
* auxiliary data.
4105
*/
4106
return 0;
4107
}
4108
4109
return MAX2(image->array_size, image->extent.depth >> miplevel);
4110
}
4111
4112
static inline struct anv_address MUST_CHECK
4113
anv_image_address(const struct anv_image *image,
4114
const struct anv_image_memory_range *mem_range)
4115
{
4116
const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
4117
assert(binding->memory_range.offset == 0);
4118
4119
if (mem_range->size == 0)
4120
return ANV_NULL_ADDRESS;
4121
4122
return anv_address_add(binding->address, mem_range->offset);
4123
}
4124
4125
static inline struct anv_address
4126
anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
4127
const struct anv_image *image,
4128
VkImageAspectFlagBits aspect)
4129
{
4130
assert(image->aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
4131
VK_IMAGE_ASPECT_DEPTH_BIT));
4132
4133
uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
4134
const struct anv_image_memory_range *mem_range =
4135
&image->planes[plane].fast_clear_memory_range;
4136
4137
return anv_image_address(image, mem_range);
4138
}
4139
4140
static inline struct anv_address
4141
anv_image_get_fast_clear_type_addr(const struct anv_device *device,
4142
const struct anv_image *image,
4143
VkImageAspectFlagBits aspect)
4144
{
4145
struct anv_address addr =
4146
anv_image_get_clear_color_addr(device, image, aspect);
4147
4148
const unsigned clear_color_state_size = device->info.ver >= 10 ?
4149
device->isl_dev.ss.clear_color_state_size :
4150
device->isl_dev.ss.clear_value_size;
4151
return anv_address_add(addr, clear_color_state_size);
4152
}
4153
4154
static inline struct anv_address
4155
anv_image_get_compression_state_addr(const struct anv_device *device,
4156
const struct anv_image *image,
4157
VkImageAspectFlagBits aspect,
4158
uint32_t level, uint32_t array_layer)
4159
{
4160
assert(level < anv_image_aux_levels(image, aspect));
4161
assert(array_layer < anv_image_aux_layers(image, aspect, level));
4162
UNUSED uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
4163
assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);
4164
4165
/* Relative to start of the plane's fast clear memory range */
4166
uint32_t offset;
4167
4168
offset = 4; /* Go past the fast clear type */
4169
4170
if (image->type == VK_IMAGE_TYPE_3D) {
4171
for (uint32_t l = 0; l < level; l++)
4172
offset += anv_minify(image->extent.depth, l) * 4;
4173
} else {
4174
offset += level * image->array_size * 4;
4175
}
4176
4177
offset += array_layer * 4;
4178
4179
assert(offset < image->planes[plane].fast_clear_memory_range.size);
4180
4181
return anv_address_add(
4182
anv_image_get_fast_clear_type_addr(device, image, aspect),
4183
offset);
4184
}
4185
4186
/* Returns true if a HiZ-enabled depth buffer can be sampled from. */
4187
static inline bool
4188
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
4189
const struct anv_image *image)
4190
{
4191
if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
4192
return false;
4193
4194
/* For Gfx8-11, there are some restrictions around sampling from HiZ.
4195
* The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
4196
* say:
4197
*
4198
* "If this field is set to AUX_HIZ, Number of Multisamples must
4199
* be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
4200
*/
4201
if (image->type == VK_IMAGE_TYPE_3D)
4202
return false;
4203
4204
/* Allow this feature on BDW even though it is disabled in the BDW devinfo
4205
* struct. There's documentation which suggests that this feature actually
4206
* reduces performance on BDW, but it has only been observed to help so
4207
* far. Sampling fast-cleared blocks on BDW must also be handled with care
4208
* (see depth_stencil_attachment_compute_aux_usage() for more info).
4209
*/
4210
if (devinfo->ver != 8 && !devinfo->has_sample_with_hiz)
4211
return false;
4212
4213
return image->samples == 1;
4214
}
4215
4216
/* Returns true if an MCS-enabled buffer can be sampled from. */
4217
static inline bool
4218
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
4219
const struct anv_image *image)
4220
{
4221
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
4222
const uint32_t plane =
4223
anv_image_aspect_to_plane(image->aspects, VK_IMAGE_ASPECT_COLOR_BIT);
4224
4225
assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
4226
4227
const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
4228
4229
/* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
4230
* See HSD 1707282275, wa_14013111325. Due to the use of
4231
* format-reinterpretation, a simplified workaround is implemented.
4232
*/
4233
if (devinfo->ver >= 12 &&
4234
isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
4235
return false;
4236
}
4237
4238
return true;
4239
}
4240
4241
static inline bool
4242
anv_image_plane_uses_aux_map(const struct anv_device *device,
4243
const struct anv_image *image,
4244
uint32_t plane)
4245
{
4246
return device->info.has_aux_map &&
4247
isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
4248
}
4249
4250
void
4251
anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
4252
const struct anv_image *image,
4253
VkImageAspectFlagBits aspect,
4254
enum isl_aux_usage aux_usage,
4255
uint32_t level,
4256
uint32_t base_layer,
4257
uint32_t layer_count);
4258
4259
void
4260
anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
4261
const struct anv_image *image,
4262
VkImageAspectFlagBits aspect,
4263
enum isl_aux_usage aux_usage,
4264
enum isl_format format, struct isl_swizzle swizzle,
4265
uint32_t level, uint32_t base_layer, uint32_t layer_count,
4266
VkRect2D area, union isl_color_value clear_color);
4267
void
4268
anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
4269
const struct anv_image *image,
4270
VkImageAspectFlags aspects,
4271
enum isl_aux_usage depth_aux_usage,
4272
uint32_t level,
4273
uint32_t base_layer, uint32_t layer_count,
4274
VkRect2D area,
4275
float depth_value, uint8_t stencil_value);
4276
void
4277
anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
4278
const struct anv_image *src_image,
4279
enum isl_aux_usage src_aux_usage,
4280
uint32_t src_level, uint32_t src_base_layer,
4281
const struct anv_image *dst_image,
4282
enum isl_aux_usage dst_aux_usage,
4283
uint32_t dst_level, uint32_t dst_base_layer,
4284
VkImageAspectFlagBits aspect,
4285
uint32_t src_x, uint32_t src_y,
4286
uint32_t dst_x, uint32_t dst_y,
4287
uint32_t width, uint32_t height,
4288
uint32_t layer_count,
4289
enum blorp_filter filter);
4290
void
4291
anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
4292
const struct anv_image *image,
4293
VkImageAspectFlagBits aspect, uint32_t level,
4294
uint32_t base_layer, uint32_t layer_count,
4295
enum isl_aux_op hiz_op);
4296
void
4297
anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
4298
const struct anv_image *image,
4299
VkImageAspectFlags aspects,
4300
uint32_t level,
4301
uint32_t base_layer, uint32_t layer_count,
4302
VkRect2D area, uint8_t stencil_value);
4303
void
4304
anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
4305
const struct anv_image *image,
4306
enum isl_format format, struct isl_swizzle swizzle,
4307
VkImageAspectFlagBits aspect,
4308
uint32_t base_layer, uint32_t layer_count,
4309
enum isl_aux_op mcs_op, union isl_color_value *clear_value,
4310
bool predicate);
4311
void
4312
anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
4313
const struct anv_image *image,
4314
enum isl_format format, struct isl_swizzle swizzle,
4315
VkImageAspectFlagBits aspect, uint32_t level,
4316
uint32_t base_layer, uint32_t layer_count,
4317
enum isl_aux_op ccs_op, union isl_color_value *clear_value,
4318
bool predicate);
4319
4320
void
4321
anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
4322
const struct anv_image *image,
4323
VkImageAspectFlagBits aspect,
4324
uint32_t base_level, uint32_t level_count,
4325
uint32_t base_layer, uint32_t layer_count);
4326
4327
enum isl_aux_state ATTRIBUTE_PURE
4328
anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
4329
const struct anv_image *image,
4330
const VkImageAspectFlagBits aspect,
4331
const VkImageLayout layout);
4332
4333
enum isl_aux_usage ATTRIBUTE_PURE
4334
anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
4335
const struct anv_image *image,
4336
const VkImageAspectFlagBits aspect,
4337
const VkImageUsageFlagBits usage,
4338
const VkImageLayout layout);
4339
4340
enum anv_fast_clear_type ATTRIBUTE_PURE
4341
anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
4342
const struct anv_image * const image,
4343
const VkImageAspectFlagBits aspect,
4344
const VkImageLayout layout);
4345
4346
/* This is defined as a macro so that it works for both
4347
* VkImageSubresourceRange and VkImageSubresourceLayers
4348
*/
4349
#define anv_get_layerCount(_image, _range) \
4350
((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
4351
(_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
4352
4353
static inline uint32_t
4354
anv_get_levelCount(const struct anv_image *image,
4355
const VkImageSubresourceRange *range)
4356
{
4357
return range->levelCount == VK_REMAINING_MIP_LEVELS ?
4358
image->levels - range->baseMipLevel : range->levelCount;
4359
}
4360
4361
static inline VkImageAspectFlags
4362
anv_image_expand_aspects(const struct anv_image *image,
4363
VkImageAspectFlags aspects)
4364
{
4365
/* If the underlying image has color plane aspects and
4366
* VK_IMAGE_ASPECT_COLOR_BIT has been requested, then return the aspects of
4367
* the underlying image. */
4368
if ((image->aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) != 0 &&
4369
aspects == VK_IMAGE_ASPECT_COLOR_BIT)
4370
return image->aspects;
4371
4372
return aspects;
4373
}
4374
4375
static inline bool
4376
anv_image_aspects_compatible(VkImageAspectFlags aspects1,
4377
VkImageAspectFlags aspects2)
4378
{
4379
if (aspects1 == aspects2)
4380
return true;
4381
4382
/* Only 1 color aspects are compatibles. */
4383
if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4384
(aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4385
util_bitcount(aspects1) == util_bitcount(aspects2))
4386
return true;
4387
4388
return false;
4389
}
4390
4391
struct anv_image_view {
4392
struct vk_object_base base;
4393
4394
const struct anv_image *image; /**< VkImageViewCreateInfo::image */
4395
4396
VkImageAspectFlags aspect_mask;
4397
VkFormat vk_format;
4398
VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
4399
4400
unsigned n_planes;
4401
struct {
4402
uint32_t image_plane;
4403
4404
struct isl_view isl;
4405
4406
/**
4407
* RENDER_SURFACE_STATE when using image as a sampler surface with an
4408
* image layout of SHADER_READ_ONLY_OPTIMAL or
4409
* DEPTH_STENCIL_READ_ONLY_OPTIMAL.
4410
*/
4411
struct anv_surface_state optimal_sampler_surface_state;
4412
4413
/**
4414
* RENDER_SURFACE_STATE when using image as a sampler surface with an
4415
* image layout of GENERAL.
4416
*/
4417
struct anv_surface_state general_sampler_surface_state;
4418
4419
/**
4420
* RENDER_SURFACE_STATE when using image as a storage image. Separate
4421
* states for write-only and readable, using the real format for
4422
* write-only and the lowered format for readable.
4423
*/
4424
struct anv_surface_state storage_surface_state;
4425
struct anv_surface_state writeonly_storage_surface_state;
4426
4427
struct brw_image_param storage_image_param;
4428
} planes[3];
4429
};
4430
4431
enum anv_image_view_state_flags {
4432
ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY = (1 << 0),
4433
ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL = (1 << 1),
4434
};
4435
4436
void anv_image_fill_surface_state(struct anv_device *device,
4437
const struct anv_image *image,
4438
VkImageAspectFlagBits aspect,
4439
const struct isl_view *view,
4440
isl_surf_usage_flags_t view_usage,
4441
enum isl_aux_usage aux_usage,
4442
const union isl_color_value *clear_color,
4443
enum anv_image_view_state_flags flags,
4444
struct anv_surface_state *state_inout,
4445
struct brw_image_param *image_param_out);
4446
4447
struct anv_image_create_info {
4448
const VkImageCreateInfo *vk_info;
4449
4450
/** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
4451
isl_tiling_flags_t isl_tiling_flags;
4452
4453
/** These flags will be added to any derived from VkImageCreateInfo. */
4454
isl_surf_usage_flags_t isl_extra_usage_flags;
4455
4456
bool external_format;
4457
};
4458
4459
VkResult anv_image_create(VkDevice _device,
4460
const struct anv_image_create_info *info,
4461
const VkAllocationCallbacks* alloc,
4462
VkImage *pImage);
4463
4464
enum isl_format
4465
anv_isl_format_for_descriptor_type(const struct anv_device *device,
4466
VkDescriptorType type);
4467
4468
static inline VkExtent3D
4469
anv_sanitize_image_extent(const VkImageType imageType,
4470
const VkExtent3D imageExtent)
4471
{
4472
switch (imageType) {
4473
case VK_IMAGE_TYPE_1D:
4474
return (VkExtent3D) { imageExtent.width, 1, 1 };
4475
case VK_IMAGE_TYPE_2D:
4476
return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
4477
case VK_IMAGE_TYPE_3D:
4478
return imageExtent;
4479
default:
4480
unreachable("invalid image type");
4481
}
4482
}
4483
4484
static inline VkOffset3D
4485
anv_sanitize_image_offset(const VkImageType imageType,
4486
const VkOffset3D imageOffset)
4487
{
4488
switch (imageType) {
4489
case VK_IMAGE_TYPE_1D:
4490
return (VkOffset3D) { imageOffset.x, 0, 0 };
4491
case VK_IMAGE_TYPE_2D:
4492
return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
4493
case VK_IMAGE_TYPE_3D:
4494
return imageOffset;
4495
default:
4496
unreachable("invalid image type");
4497
}
4498
}
4499
4500
static inline uint32_t
4501
anv_rasterization_aa_mode(VkPolygonMode raster_mode,
4502
VkLineRasterizationModeEXT line_mode)
4503
{
4504
if (raster_mode == VK_POLYGON_MODE_LINE &&
4505
line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT)
4506
return true;
4507
return false;
4508
}
4509
4510
VkFormatFeatureFlags
4511
anv_get_image_format_features(const struct intel_device_info *devinfo,
4512
VkFormat vk_format,
4513
const struct anv_format *anv_format,
4514
VkImageTiling vk_tiling,
4515
const struct isl_drm_modifier_info *isl_mod_info);
4516
4517
void anv_fill_buffer_surface_state(struct anv_device *device,
4518
struct anv_state state,
4519
enum isl_format format,
4520
isl_surf_usage_flags_t usage,
4521
struct anv_address address,
4522
uint32_t range, uint32_t stride);
4523
4524
static inline void
4525
anv_clear_color_from_att_state(union isl_color_value *clear_color,
4526
const struct anv_attachment_state *att_state,
4527
const struct anv_image_view *iview)
4528
{
4529
const struct isl_format_layout *view_fmtl =
4530
isl_format_get_layout(iview->planes[0].isl.format);
4531
4532
#define COPY_CLEAR_COLOR_CHANNEL(c, i) \
4533
if (view_fmtl->channels.c.bits) \
4534
clear_color->u32[i] = att_state->clear_value.color.uint32[i]
4535
4536
COPY_CLEAR_COLOR_CHANNEL(r, 0);
4537
COPY_CLEAR_COLOR_CHANNEL(g, 1);
4538
COPY_CLEAR_COLOR_CHANNEL(b, 2);
4539
COPY_CLEAR_COLOR_CHANNEL(a, 3);
4540
4541
#undef COPY_CLEAR_COLOR_CHANNEL
4542
}
4543
4544
4545
/* Haswell border color is a bit of a disaster. Float and unorm formats use a
4546
* straightforward 32-bit float color in the first 64 bytes. Instead of using
4547
* a nice float/integer union like Gfx8+, Haswell specifies the integer border
4548
* color as a separate entry /after/ the float color. The layout of this entry
4549
* also depends on the format's bpp (with extra hacks for RG32), and overlaps.
4550
*
4551
* Since we don't know the format/bpp, we can't make any of the border colors
4552
* containing '1' work for all formats, as it would be in the wrong place for
4553
* some of them. We opt to make 32-bit integers work as this seems like the
4554
* most common option. Fortunately, transparent black works regardless, as
4555
* all zeroes is the same in every bit-size.
4556
*/
4557
struct hsw_border_color {
4558
float float32[4];
4559
uint32_t _pad0[12];
4560
uint32_t uint32[4];
4561
uint32_t _pad1[108];
4562
};
4563
4564
struct gfx8_border_color {
4565
union {
4566
float float32[4];
4567
uint32_t uint32[4];
4568
};
4569
/* Pad out to 64 bytes */
4570
uint32_t _pad[12];
4571
};
4572
4573
struct anv_ycbcr_conversion {
4574
struct vk_object_base base;
4575
4576
const struct anv_format * format;
4577
VkSamplerYcbcrModelConversion ycbcr_model;
4578
VkSamplerYcbcrRange ycbcr_range;
4579
VkComponentSwizzle mapping[4];
4580
VkChromaLocation chroma_offsets[2];
4581
VkFilter chroma_filter;
4582
bool chroma_reconstruction;
4583
};
4584
4585
struct anv_sampler {
4586
struct vk_object_base base;
4587
4588
uint32_t state[3][4];
4589
uint32_t n_planes;
4590
struct anv_ycbcr_conversion *conversion;
4591
4592
/* Blob of sampler state data which is guaranteed to be 32-byte aligned
4593
* and with a 32-byte stride for use as bindless samplers.
4594
*/
4595
struct anv_state bindless_state;
4596
4597
struct anv_state custom_border_color;
4598
};
4599
4600
struct anv_framebuffer {
4601
struct vk_object_base base;
4602
4603
uint32_t width;
4604
uint32_t height;
4605
uint32_t layers;
4606
4607
uint32_t attachment_count;
4608
struct anv_image_view * attachments[0];
4609
};
4610
4611
struct anv_subpass_attachment {
4612
VkImageUsageFlagBits usage;
4613
uint32_t attachment;
4614
VkImageLayout layout;
4615
4616
/* Used only with attachment containing stencil data. */
4617
VkImageLayout stencil_layout;
4618
};
4619
4620
struct anv_subpass {
4621
uint32_t attachment_count;
4622
4623
/**
4624
* A pointer to all attachment references used in this subpass.
4625
* Only valid if ::attachment_count > 0.
4626
*/
4627
struct anv_subpass_attachment * attachments;
4628
uint32_t input_count;
4629
struct anv_subpass_attachment * input_attachments;
4630
uint32_t color_count;
4631
struct anv_subpass_attachment * color_attachments;
4632
struct anv_subpass_attachment * resolve_attachments;
4633
4634
struct anv_subpass_attachment * depth_stencil_attachment;
4635
struct anv_subpass_attachment * ds_resolve_attachment;
4636
VkResolveModeFlagBitsKHR depth_resolve_mode;
4637
VkResolveModeFlagBitsKHR stencil_resolve_mode;
4638
4639
uint32_t view_mask;
4640
4641
/** Subpass has a depth/stencil self-dependency */
4642
bool has_ds_self_dep;
4643
4644
/** Subpass has at least one color resolve attachment */
4645
bool has_color_resolve;
4646
};
4647
4648
static inline unsigned
4649
anv_subpass_view_count(const struct anv_subpass *subpass)
4650
{
4651
return MAX2(1, util_bitcount(subpass->view_mask));
4652
}
4653
4654
struct anv_render_pass_attachment {
4655
/* TODO: Consider using VkAttachmentDescription instead of storing each of
4656
* its members individually.
4657
*/
4658
VkFormat format;
4659
uint32_t samples;
4660
VkImageUsageFlags usage;
4661
VkAttachmentLoadOp load_op;
4662
VkAttachmentStoreOp store_op;
4663
VkAttachmentLoadOp stencil_load_op;
4664
VkImageLayout initial_layout;
4665
VkImageLayout final_layout;
4666
VkImageLayout first_subpass_layout;
4667
4668
VkImageLayout stencil_initial_layout;
4669
VkImageLayout stencil_final_layout;
4670
4671
/* The subpass id in which the attachment will be used last. */
4672
uint32_t last_subpass_idx;
4673
};
4674
4675
struct anv_render_pass {
4676
struct vk_object_base base;
4677
4678
uint32_t attachment_count;
4679
uint32_t subpass_count;
4680
/* An array of subpass_count+1 flushes, one per subpass boundary */
4681
enum anv_pipe_bits * subpass_flushes;
4682
struct anv_render_pass_attachment * attachments;
4683
struct anv_subpass subpasses[0];
4684
};
4685
4686
#define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
4687
4688
struct anv_query_pool {
4689
struct vk_object_base base;
4690
4691
VkQueryType type;
4692
VkQueryPipelineStatisticFlags pipeline_statistics;
4693
/** Stride between slots, in bytes */
4694
uint32_t stride;
4695
/** Number of slots in this query pool */
4696
uint32_t slots;
4697
struct anv_bo * bo;
4698
4699
/* KHR perf queries : */
4700
uint32_t pass_size;
4701
uint32_t data_offset;
4702
uint32_t snapshot_size;
4703
uint32_t n_counters;
4704
struct intel_perf_counter_pass *counter_pass;
4705
uint32_t n_passes;
4706
struct intel_perf_query_info **pass_query;
4707
};
4708
4709
static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
4710
uint32_t pass)
4711
{
4712
return pool->pass_size * pass + 8;
4713
}
4714
4715
struct anv_acceleration_structure {
4716
struct vk_object_base base;
4717
4718
VkDeviceSize size;
4719
struct anv_address address;
4720
};
4721
4722
int anv_get_instance_entrypoint_index(const char *name);
4723
int anv_get_device_entrypoint_index(const char *name);
4724
int anv_get_physical_device_entrypoint_index(const char *name);
4725
4726
const char *anv_get_instance_entry_name(int index);
4727
const char *anv_get_physical_device_entry_name(int index);
4728
const char *anv_get_device_entry_name(int index);
4729
4730
bool
4731
anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
4732
const struct vk_instance_extension_table *instance);
4733
bool
4734
anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
4735
const struct vk_instance_extension_table *instance);
4736
bool
4737
anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
4738
const struct vk_instance_extension_table *instance,
4739
const struct vk_device_extension_table *device);
4740
4741
const struct vk_device_dispatch_table *
4742
anv_get_device_dispatch_table(const struct intel_device_info *devinfo);
4743
4744
void
4745
anv_dump_pipe_bits(enum anv_pipe_bits bits);
4746
4747
static inline void
4748
anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
4749
enum anv_pipe_bits bits,
4750
const char* reason)
4751
{
4752
cmd_buffer->state.pending_pipe_bits |= bits;
4753
if (unlikely(INTEL_DEBUG & DEBUG_PIPE_CONTROL) && bits)
4754
{
4755
fputs("pc: add ", stderr);
4756
anv_dump_pipe_bits(bits);
4757
fprintf(stderr, "reason: %s\n", reason);
4758
}
4759
}
4760
4761
static inline uint32_t
4762
anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
4763
{
4764
/* This function must be called from within a subpass. */
4765
assert(cmd_state->pass && cmd_state->subpass);
4766
4767
const uint32_t subpass_id = cmd_state->subpass - cmd_state->pass->subpasses;
4768
4769
/* The id of this subpass shouldn't exceed the number of subpasses in this
4770
* render pass minus 1.
4771
*/
4772
assert(subpass_id < cmd_state->pass->subpass_count);
4773
return subpass_id;
4774
}
4775
4776
struct anv_performance_configuration_intel {
4777
struct vk_object_base base;
4778
4779
struct intel_perf_registers *register_config;
4780
4781
uint64_t config_id;
4782
};
4783
4784
void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
4785
void anv_device_perf_init(struct anv_device *device);
4786
void anv_perf_write_pass_results(struct intel_perf_config *perf,
4787
struct anv_query_pool *pool, uint32_t pass,
4788
const struct intel_perf_query_result *accumulated_results,
4789
union VkPerformanceCounterResultKHR *results);
4790
4791
#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
4792
VK_FROM_HANDLE(__anv_type, __name, __handle)
4793
4794
VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, base, VkCommandBuffer,
4795
VK_OBJECT_TYPE_COMMAND_BUFFER)
4796
VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
4797
VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
4798
VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
4799
VK_OBJECT_TYPE_PHYSICAL_DEVICE)
4800
VK_DEFINE_HANDLE_CASTS(anv_queue, base, VkQueue, VK_OBJECT_TYPE_QUEUE)
4801
4802
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_acceleration_structure, base,
4803
VkAccelerationStructureKHR,
4804
VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
4805
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, base, VkCommandPool,
4806
VK_OBJECT_TYPE_COMMAND_POOL)
4807
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, base, VkBuffer,
4808
VK_OBJECT_TYPE_BUFFER)
4809
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,
4810
VK_OBJECT_TYPE_BUFFER_VIEW)
4811
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
4812
VK_OBJECT_TYPE_DESCRIPTOR_POOL)
4813
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
4814
VK_OBJECT_TYPE_DESCRIPTOR_SET)
4815
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
4816
VkDescriptorSetLayout,
4817
VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
4818
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,
4819
VkDescriptorUpdateTemplate,
4820
VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
4821
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
4822
VK_OBJECT_TYPE_DEVICE_MEMORY)
4823
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
4824
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
4825
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, base, VkFramebuffer,
4826
VK_OBJECT_TYPE_FRAMEBUFFER)
4827
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, base, VkImage, VK_OBJECT_TYPE_IMAGE)
4828
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, base, VkImageView,
4829
VK_OBJECT_TYPE_IMAGE_VIEW);
4830
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, base, VkPipelineCache,
4831
VK_OBJECT_TYPE_PIPELINE_CACHE)
4832
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
4833
VK_OBJECT_TYPE_PIPELINE)
4834
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
4835
VK_OBJECT_TYPE_PIPELINE_LAYOUT)
4836
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
4837
VK_OBJECT_TYPE_QUERY_POOL)
4838
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, base, VkRenderPass,
4839
VK_OBJECT_TYPE_RENDER_PASS)
4840
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
4841
VK_OBJECT_TYPE_SAMPLER)
4842
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, base, VkSemaphore,
4843
VK_OBJECT_TYPE_SEMAPHORE)
4844
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
4845
VkSamplerYcbcrConversion,
4846
VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
4847
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
4848
VkPerformanceConfigurationINTEL,
4849
VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
4850
4851
#define anv_genX(devinfo, thing) ({ \
4852
__typeof(&gfx9_##thing) genX_thing; \
4853
switch ((devinfo)->verx10) { \
4854
case 70: \
4855
genX_thing = &gfx7_##thing; \
4856
break; \
4857
case 75: \
4858
genX_thing = &gfx75_##thing; \
4859
break; \
4860
case 80: \
4861
genX_thing = &gfx8_##thing; \
4862
break; \
4863
case 90: \
4864
genX_thing = &gfx9_##thing; \
4865
break; \
4866
case 110: \
4867
genX_thing = &gfx11_##thing; \
4868
break; \
4869
case 120: \
4870
genX_thing = &gfx12_##thing; \
4871
break; \
4872
case 125: \
4873
genX_thing = &gfx125_##thing; \
4874
break; \
4875
default: \
4876
unreachable("Unknown hardware generation"); \
4877
} \
4878
genX_thing; \
4879
})
4880
4881
/* Gen-specific function declarations */
4882
#ifdef genX
4883
# include "anv_genX.h"
4884
#else
4885
# define genX(x) gfx7_##x
4886
# include "anv_genX.h"
4887
# undef genX
4888
# define genX(x) gfx75_##x
4889
# include "anv_genX.h"
4890
# undef genX
4891
# define genX(x) gfx8_##x
4892
# include "anv_genX.h"
4893
# undef genX
4894
# define genX(x) gfx9_##x
4895
# include "anv_genX.h"
4896
# undef genX
4897
# define genX(x) gfx11_##x
4898
# include "anv_genX.h"
4899
# undef genX
4900
# define genX(x) gfx12_##x
4901
# include "anv_genX.h"
4902
# undef genX
4903
# define genX(x) gfx125_##x
4904
# include "anv_genX.h"
4905
# undef genX
4906
#endif
4907
4908
#endif /* ANV_PRIVATE_H */
4909
4910