Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/vulkan/tu_private.h
4565 views
1
/*
2
* Copyright © 2016 Red Hat.
3
* Copyright © 2016 Bas Nieuwenhuizen
4
*
5
* based in part on anv driver which is:
6
* Copyright © 2015 Intel Corporation
7
*
8
* Permission is hereby granted, free of charge, to any person obtaining a
9
* copy of this software and associated documentation files (the "Software"),
10
* to deal in the Software without restriction, including without limitation
11
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
12
* and/or sell copies of the Software, and to permit persons to whom the
13
* Software is furnished to do so, subject to the following conditions:
14
*
15
* The above copyright notice and this permission notice (including the next
16
* paragraph) shall be included in all copies or substantial portions of the
17
* Software.
18
*
19
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25
* DEALINGS IN THE SOFTWARE.
26
*/
27
28
#ifndef TU_PRIVATE_H
29
#define TU_PRIVATE_H
30
31
#include <assert.h>
32
#include <pthread.h>
33
#include <stdbool.h>
34
#include <stdint.h>
35
#include <stdio.h>
36
#include <stdlib.h>
37
#include <string.h>
38
#ifdef HAVE_VALGRIND
39
#include <memcheck.h>
40
#include <valgrind.h>
41
#define VG(x) x
42
#else
43
#define VG(x) ((void)0)
44
#endif
45
46
#define MESA_LOG_TAG "TU"
47
48
#include "c11/threads.h"
49
#include "main/macros.h"
50
#include "util/bitscan.h"
51
#include "util/list.h"
52
#include "util/log.h"
53
#include "util/macros.h"
54
#include "util/u_atomic.h"
55
#include "util/u_dynarray.h"
56
#include "vk_alloc.h"
57
#include "vk_debug_report.h"
58
#include "vk_device.h"
59
#include "vk_dispatch_table.h"
60
#include "vk_extensions.h"
61
#include "vk_instance.h"
62
#include "vk_physical_device.h"
63
#include "vk_shader_module.h"
64
#include "wsi_common.h"
65
66
#include "ir3/ir3_compiler.h"
67
#include "ir3/ir3_shader.h"
68
69
#include "adreno_common.xml.h"
70
#include "adreno_pm4.xml.h"
71
#include "a6xx.xml.h"
72
#include "fdl/freedreno_layout.h"
73
#include "common/freedreno_dev_info.h"
74
#include "perfcntrs/freedreno_perfcntr.h"
75
76
#include "tu_descriptor_set.h"
77
#include "tu_util.h"
78
79
/* Pre-declarations needed for WSI entrypoints */
80
struct wl_surface;
81
struct wl_display;
82
typedef struct xcb_connection_t xcb_connection_t;
83
typedef uint32_t xcb_visualid_t;
84
typedef uint32_t xcb_window_t;
85
86
#include <vulkan/vk_android_native_buffer.h>
87
#include <vulkan/vk_icd.h>
88
#include <vulkan/vulkan.h>
89
90
#include "tu_entrypoints.h"
91
92
#include "vk_format.h"
93
94
#define MAX_VBS 32
95
#define MAX_VERTEX_ATTRIBS 32
96
#define MAX_RTS 8
97
#define MAX_VSC_PIPES 32
98
#define MAX_VIEWPORTS 16
99
#define MAX_SCISSORS 16
100
#define MAX_DISCARD_RECTANGLES 4
101
#define MAX_PUSH_CONSTANTS_SIZE 128
102
#define MAX_PUSH_DESCRIPTORS 32
103
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
104
#define MAX_DYNAMIC_STORAGE_BUFFERS 8
105
#define MAX_DYNAMIC_BUFFERS \
106
(MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
107
#define TU_MAX_DRM_DEVICES 8
108
#define MAX_VIEWS 16
109
#define MAX_BIND_POINTS 2 /* compute + graphics */
110
/* The Qualcomm driver exposes 0x20000058 */
111
#define MAX_STORAGE_BUFFER_RANGE 0x20000000
112
/* We use ldc for uniform buffer loads, just like the Qualcomm driver, so
113
* expose the same maximum range.
114
* TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual
115
* range might be higher.
116
*/
117
#define MAX_UNIFORM_BUFFER_RANGE 0x10000
118
119
#define A6XX_TEX_CONST_DWORDS 16
120
#define A6XX_TEX_SAMP_DWORDS 4
121
122
#define COND(bool, val) ((bool) ? (val) : 0)
123
#define BIT(bit) (1u << (bit))
124
125
/* Whenever we generate an error, pass it through this function. Useful for
126
* debugging, where we can break on it. Only call at error site, not when
127
* propagating errors. Might be useful to plug in a stack trace here.
128
*/
129
130
struct tu_instance;
131
132
VkResult
133
__vk_errorf(struct tu_instance *instance,
134
VkResult error,
135
bool force_print,
136
const char *file,
137
int line,
138
const char *format,
139
...) PRINTFLIKE(6, 7);
140
141
#define vk_error(instance, error) \
142
__vk_errorf(instance, error, false, __FILE__, __LINE__, NULL);
143
#define vk_errorf(instance, error, format, ...) \
144
__vk_errorf(instance, error, false, __FILE__, __LINE__, format, ##__VA_ARGS__);
145
146
/* Prints startup errors if TU_DEBUG=startup is set or on a debug driver
147
* build.
148
*/
149
#define vk_startup_errorf(instance, error, format, ...) \
150
__vk_errorf(instance, error, instance->debug_flags & TU_DEBUG_STARTUP, \
151
__FILE__, __LINE__, format, ##__VA_ARGS__)
152
153
void
154
__tu_finishme(const char *file, int line, const char *format, ...)
155
PRINTFLIKE(3, 4);
156
157
/**
158
* Print a FINISHME message, including its source location.
159
*/
160
#define tu_finishme(format, ...) \
161
do { \
162
static bool reported = false; \
163
if (!reported) { \
164
__tu_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
165
reported = true; \
166
} \
167
} while (0)
168
169
#define tu_stub() \
170
do { \
171
tu_finishme("stub %s", __func__); \
172
} while (0)
173
174
struct tu_memory_heap {
175
/* Standard bits passed on to the client */
176
VkDeviceSize size;
177
VkMemoryHeapFlags flags;
178
179
/** Copied from ANV:
180
*
181
* Driver-internal book-keeping.
182
*
183
* Align it to 64 bits to make atomic operations faster on 32 bit platforms.
184
*/
185
VkDeviceSize used __attribute__ ((aligned (8)));
186
};
187
188
uint64_t
189
tu_get_system_heap_size(void);
190
191
struct tu_physical_device
192
{
193
struct vk_physical_device vk;
194
195
struct tu_instance *instance;
196
197
const char *name;
198
uint8_t driver_uuid[VK_UUID_SIZE];
199
uint8_t device_uuid[VK_UUID_SIZE];
200
uint8_t cache_uuid[VK_UUID_SIZE];
201
202
struct wsi_device wsi_device;
203
204
int local_fd;
205
int master_fd;
206
207
unsigned gpu_id;
208
uint32_t gmem_size;
209
uint64_t gmem_base;
210
uint32_t ccu_offset_gmem;
211
uint32_t ccu_offset_bypass;
212
213
const struct fd_dev_info *info;
214
215
int msm_major_version;
216
int msm_minor_version;
217
218
/* This is the drivers on-disk cache used as a fallback as opposed to
219
* the pipeline cache defined by apps.
220
*/
221
struct disk_cache *disk_cache;
222
223
struct tu_memory_heap heap;
224
};
225
226
enum tu_debug_flags
227
{
228
TU_DEBUG_STARTUP = 1 << 0,
229
TU_DEBUG_NIR = 1 << 1,
230
TU_DEBUG_NOBIN = 1 << 3,
231
TU_DEBUG_SYSMEM = 1 << 4,
232
TU_DEBUG_FORCEBIN = 1 << 5,
233
TU_DEBUG_NOUBWC = 1 << 6,
234
TU_DEBUG_NOMULTIPOS = 1 << 7,
235
TU_DEBUG_NOLRZ = 1 << 8,
236
TU_DEBUG_PERFC = 1 << 9,
237
};
238
239
struct tu_instance
240
{
241
struct vk_instance vk;
242
243
uint32_t api_version;
244
int physical_device_count;
245
struct tu_physical_device physical_devices[TU_MAX_DRM_DEVICES];
246
247
enum tu_debug_flags debug_flags;
248
};
249
250
VkResult
251
tu_wsi_init(struct tu_physical_device *physical_device);
252
void
253
tu_wsi_finish(struct tu_physical_device *physical_device);
254
255
bool
256
tu_instance_extension_supported(const char *name);
257
uint32_t
258
tu_physical_device_api_version(struct tu_physical_device *dev);
259
bool
260
tu_physical_device_extension_supported(struct tu_physical_device *dev,
261
const char *name);
262
263
struct cache_entry;
264
265
struct tu_pipeline_cache
266
{
267
struct vk_object_base base;
268
269
struct tu_device *device;
270
pthread_mutex_t mutex;
271
272
uint32_t total_size;
273
uint32_t table_size;
274
uint32_t kernel_count;
275
struct cache_entry **hash_table;
276
bool modified;
277
278
VkAllocationCallbacks alloc;
279
};
280
281
struct tu_pipeline_key
282
{
283
};
284
285
286
/* queue types */
287
#define TU_QUEUE_GENERAL 0
288
289
#define TU_MAX_QUEUE_FAMILIES 1
290
291
struct tu_syncobj;
292
293
struct tu_queue
294
{
295
struct vk_object_base base;
296
297
struct tu_device *device;
298
uint32_t queue_family_index;
299
int queue_idx;
300
VkDeviceQueueCreateFlags flags;
301
302
uint32_t msm_queue_id;
303
int fence;
304
305
/* Queue containing deferred submits */
306
struct list_head queued_submits;
307
};
308
309
struct tu_bo
310
{
311
uint32_t gem_handle;
312
uint64_t size;
313
uint64_t iova;
314
void *map;
315
};
316
317
enum global_shader {
318
GLOBAL_SH_VS,
319
GLOBAL_SH_FS_BLIT,
320
GLOBAL_SH_FS_BLIT_ZSCALE,
321
GLOBAL_SH_FS_CLEAR0,
322
GLOBAL_SH_FS_CLEAR_MAX = GLOBAL_SH_FS_CLEAR0 + MAX_RTS,
323
GLOBAL_SH_COUNT,
324
};
325
326
#define TU_BORDER_COLOR_COUNT 4096
327
#define TU_BORDER_COLOR_BUILTIN 6
328
329
/* This struct defines the layout of the global_bo */
330
struct tu6_global
331
{
332
/* clear/blit shaders, all <= 16 instrs (16 instr = 1 instrlen unit) */
333
instr_t shaders[GLOBAL_SH_COUNT][16];
334
335
uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */
336
uint32_t _pad0;
337
volatile uint32_t vsc_draw_overflow;
338
uint32_t _pad1;
339
volatile uint32_t vsc_prim_overflow;
340
uint32_t _pad2;
341
uint64_t predicate;
342
343
/* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
344
struct {
345
uint32_t offset;
346
uint32_t pad[7];
347
} flush_base[4];
348
349
ALIGN16 uint32_t cs_indirect_xyz[3];
350
351
/* note: larger global bo will be used for customBorderColors */
352
struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];
353
};
354
#define gb_offset(member) offsetof(struct tu6_global, member)
355
#define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member))
356
357
void tu_init_clear_blit_shaders(struct tu6_global *global);
358
359
/* extra space in vsc draw/prim streams */
360
#define VSC_PAD 0x40
361
362
struct tu_device
363
{
364
struct vk_device vk;
365
struct tu_instance *instance;
366
367
struct tu_queue *queues[TU_MAX_QUEUE_FAMILIES];
368
int queue_count[TU_MAX_QUEUE_FAMILIES];
369
370
struct tu_physical_device *physical_device;
371
int fd;
372
int _lost;
373
374
struct ir3_compiler *compiler;
375
376
/* Backup in-memory cache to be used if the app doesn't provide one */
377
struct tu_pipeline_cache *mem_cache;
378
379
#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
380
381
/* Currently the kernel driver uses a 32-bit GPU address space, but it
382
* should be impossible to go beyond 48 bits.
383
*/
384
struct {
385
struct tu_bo bo;
386
mtx_t construct_mtx;
387
bool initialized;
388
} scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
389
390
struct tu_bo global_bo;
391
392
uint32_t vsc_draw_strm_pitch;
393
uint32_t vsc_prim_strm_pitch;
394
BITSET_DECLARE(custom_border_color, TU_BORDER_COLOR_COUNT);
395
mtx_t mutex;
396
397
/* bo list for submits: */
398
struct drm_msm_gem_submit_bo *bo_list;
399
/* map bo handles to bo list index: */
400
uint32_t *bo_idx;
401
uint32_t bo_count, bo_list_size, bo_idx_size;
402
mtx_t bo_mutex;
403
404
/* Command streams to set pass index to a scratch reg */
405
struct tu_cs *perfcntrs_pass_cs;
406
struct tu_cs_entry *perfcntrs_pass_cs_entries;
407
408
/* Condition variable for timeline semaphore to notify waiters when a
409
* new submit is executed. */
410
pthread_cond_t timeline_cond;
411
pthread_mutex_t submit_mutex;
412
413
#ifdef ANDROID
414
const void *gralloc;
415
enum {
416
TU_GRALLOC_UNKNOWN,
417
TU_GRALLOC_CROS,
418
TU_GRALLOC_OTHER,
419
} gralloc_type;
420
#endif
421
};
422
423
VkResult _tu_device_set_lost(struct tu_device *device,
424
const char *msg, ...) PRINTFLIKE(2, 3);
425
#define tu_device_set_lost(dev, ...) \
426
_tu_device_set_lost(dev, __VA_ARGS__)
427
428
static inline bool
429
tu_device_is_lost(struct tu_device *device)
430
{
431
return unlikely(p_atomic_read(&device->_lost));
432
}
433
434
VkResult
435
tu_device_submit_deferred_locked(struct tu_device *dev);
436
437
enum tu_bo_alloc_flags
438
{
439
TU_BO_ALLOC_NO_FLAGS = 0,
440
TU_BO_ALLOC_ALLOW_DUMP = 1 << 0,
441
TU_BO_ALLOC_GPU_READ_ONLY = 1 << 1,
442
};
443
444
VkResult
445
tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
446
enum tu_bo_alloc_flags flags);
447
VkResult
448
tu_bo_init_dmabuf(struct tu_device *dev,
449
struct tu_bo *bo,
450
uint64_t size,
451
int fd);
452
int
453
tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo);
454
void
455
tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);
456
VkResult
457
tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
458
459
/* Get a scratch bo for use inside a command buffer. This will always return
460
* the same bo given the same size or similar sizes, so only one scratch bo
461
* can be used at the same time. It's meant for short-lived things where we
462
* need to write to some piece of memory, read from it, and then immediately
463
* discard it.
464
*/
465
VkResult
466
tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo);
467
468
struct tu_cs_entry
469
{
470
/* No ownership */
471
const struct tu_bo *bo;
472
473
uint32_t size;
474
uint32_t offset;
475
};
476
477
struct tu_cs_memory {
478
uint32_t *map;
479
uint64_t iova;
480
};
481
482
struct tu_draw_state {
483
uint64_t iova : 48;
484
uint32_t size : 16;
485
};
486
487
enum tu_dynamic_state
488
{
489
/* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */
490
TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1,
491
TU_DYNAMIC_STATE_RB_DEPTH_CNTL,
492
TU_DYNAMIC_STATE_RB_STENCIL_CNTL,
493
TU_DYNAMIC_STATE_VB_STRIDE,
494
TU_DYNAMIC_STATE_COUNT,
495
/* no associated draw state: */
496
TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT,
497
/* re-use the line width enum as it uses GRAS_SU_CNTL: */
498
TU_DYNAMIC_STATE_GRAS_SU_CNTL = VK_DYNAMIC_STATE_LINE_WIDTH,
499
};
500
501
enum tu_draw_state_group_id
502
{
503
TU_DRAW_STATE_PROGRAM_CONFIG,
504
TU_DRAW_STATE_PROGRAM,
505
TU_DRAW_STATE_PROGRAM_BINNING,
506
TU_DRAW_STATE_TESS,
507
TU_DRAW_STATE_VB,
508
TU_DRAW_STATE_VI,
509
TU_DRAW_STATE_VI_BINNING,
510
TU_DRAW_STATE_RAST,
511
TU_DRAW_STATE_BLEND,
512
TU_DRAW_STATE_SHADER_GEOM_CONST,
513
TU_DRAW_STATE_FS_CONST,
514
TU_DRAW_STATE_DESC_SETS,
515
TU_DRAW_STATE_DESC_SETS_LOAD,
516
TU_DRAW_STATE_VS_PARAMS,
517
TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM,
518
TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM,
519
TU_DRAW_STATE_LRZ,
520
TU_DRAW_STATE_DEPTH_PLANE,
521
522
/* dynamic state related draw states */
523
TU_DRAW_STATE_DYNAMIC,
524
TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT,
525
};
526
527
enum tu_cs_mode
528
{
529
530
/*
531
* A command stream in TU_CS_MODE_GROW mode grows automatically whenever it
532
* is full. tu_cs_begin must be called before command packet emission and
533
* tu_cs_end must be called after.
534
*
535
* This mode may create multiple entries internally. The entries must be
536
* submitted together.
537
*/
538
TU_CS_MODE_GROW,
539
540
/*
541
* A command stream in TU_CS_MODE_EXTERNAL mode wraps an external,
542
* fixed-size buffer. tu_cs_begin and tu_cs_end are optional and have no
543
* effect on it.
544
*
545
* This mode does not create any entry or any BO.
546
*/
547
TU_CS_MODE_EXTERNAL,
548
549
/*
550
* A command stream in TU_CS_MODE_SUB_STREAM mode does not support direct
551
* command packet emission. tu_cs_begin_sub_stream must be called to get a
552
* sub-stream to emit comamnd packets to. When done with the sub-stream,
553
* tu_cs_end_sub_stream must be called.
554
*
555
* This mode does not create any entry internally.
556
*/
557
TU_CS_MODE_SUB_STREAM,
558
};
559
560
struct tu_cs
561
{
562
uint32_t *start;
563
uint32_t *cur;
564
uint32_t *reserved_end;
565
uint32_t *end;
566
567
struct tu_device *device;
568
enum tu_cs_mode mode;
569
uint32_t next_bo_size;
570
571
struct tu_cs_entry *entries;
572
uint32_t entry_count;
573
uint32_t entry_capacity;
574
575
struct tu_bo **bos;
576
uint32_t bo_count;
577
uint32_t bo_capacity;
578
579
/* state for cond_exec_start/cond_exec_end */
580
uint32_t cond_flags;
581
uint32_t *cond_dwords;
582
};
583
584
struct tu_device_memory
585
{
586
struct vk_object_base base;
587
588
struct tu_bo bo;
589
};
590
591
struct tu_descriptor_range
592
{
593
uint64_t va;
594
uint32_t size;
595
};
596
597
struct tu_descriptor_set
598
{
599
struct vk_object_base base;
600
601
const struct tu_descriptor_set_layout *layout;
602
struct tu_descriptor_pool *pool;
603
uint32_t size;
604
605
uint64_t va;
606
uint32_t *mapped_ptr;
607
608
uint32_t *dynamic_descriptors;
609
};
610
611
struct tu_descriptor_pool_entry
612
{
613
uint32_t offset;
614
uint32_t size;
615
struct tu_descriptor_set *set;
616
};
617
618
struct tu_descriptor_pool
619
{
620
struct vk_object_base base;
621
622
struct tu_bo bo;
623
uint64_t current_offset;
624
uint64_t size;
625
626
uint8_t *host_memory_base;
627
uint8_t *host_memory_ptr;
628
uint8_t *host_memory_end;
629
630
uint32_t entry_count;
631
uint32_t max_entry_count;
632
struct tu_descriptor_pool_entry entries[0];
633
};
634
635
struct tu_descriptor_update_template_entry
636
{
637
VkDescriptorType descriptor_type;
638
639
/* The number of descriptors to update */
640
uint32_t descriptor_count;
641
642
/* Into mapped_ptr or dynamic_descriptors, in units of the respective array
643
*/
644
uint32_t dst_offset;
645
646
/* In dwords. Not valid/used for dynamic descriptors */
647
uint32_t dst_stride;
648
649
uint32_t buffer_offset;
650
651
/* Only valid for combined image samplers and samplers */
652
uint16_t has_sampler;
653
654
/* In bytes */
655
size_t src_offset;
656
size_t src_stride;
657
658
/* For push descriptors */
659
const struct tu_sampler *immutable_samplers;
660
};
661
662
struct tu_descriptor_update_template
663
{
664
struct vk_object_base base;
665
666
uint32_t entry_count;
667
VkPipelineBindPoint bind_point;
668
struct tu_descriptor_update_template_entry entry[0];
669
};
670
671
struct tu_buffer
672
{
673
struct vk_object_base base;
674
675
VkDeviceSize size;
676
677
VkBufferUsageFlags usage;
678
VkBufferCreateFlags flags;
679
680
struct tu_bo *bo;
681
VkDeviceSize bo_offset;
682
};
683
684
static inline uint64_t
685
tu_buffer_iova(struct tu_buffer *buffer)
686
{
687
return buffer->bo->iova + buffer->bo_offset;
688
}
689
690
const char *
691
tu_get_debug_option_name(int id);
692
693
const char *
694
tu_get_perftest_option_name(int id);
695
696
struct tu_descriptor_state
697
{
698
struct tu_descriptor_set *sets[MAX_SETS];
699
struct tu_descriptor_set push_set;
700
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
701
};
702
703
enum tu_cmd_dirty_bits
704
{
705
TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0),
706
TU_CMD_DIRTY_VB_STRIDE = BIT(1),
707
TU_CMD_DIRTY_GRAS_SU_CNTL = BIT(2),
708
TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3),
709
TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4),
710
TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5),
711
TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6),
712
TU_CMD_DIRTY_SHADER_CONSTS = BIT(7),
713
TU_CMD_DIRTY_LRZ = BIT(8),
714
TU_CMD_DIRTY_VS_PARAMS = BIT(9),
715
/* all draw states were disabled and need to be re-enabled: */
716
TU_CMD_DIRTY_DRAW_STATE = BIT(10)
717
};
718
719
/* There are only three cache domains we have to care about: the CCU, or
720
* color cache unit, which is used for color and depth/stencil attachments
721
* and copy/blit destinations, and is split conceptually into color and depth,
722
* and the universal cache or UCHE which is used for pretty much everything
723
* else, except for the CP (uncached) and host. We need to flush whenever data
724
* crosses these boundaries.
725
*/
726
727
enum tu_cmd_access_mask {
728
TU_ACCESS_UCHE_READ = 1 << 0,
729
TU_ACCESS_UCHE_WRITE = 1 << 1,
730
TU_ACCESS_CCU_COLOR_READ = 1 << 2,
731
TU_ACCESS_CCU_COLOR_WRITE = 1 << 3,
732
TU_ACCESS_CCU_DEPTH_READ = 1 << 4,
733
TU_ACCESS_CCU_DEPTH_WRITE = 1 << 5,
734
735
/* Experiments have shown that while it's safe to avoid flushing the CCU
736
* after each blit/renderpass, it's not safe to assume that subsequent
737
* lookups with a different attachment state will hit unflushed cache
738
* entries. That is, the CCU needs to be flushed and possibly invalidated
739
* when accessing memory with a different attachment state. Writing to an
740
* attachment under the following conditions after clearing using the
741
* normal 2d engine path is known to have issues:
742
*
743
* - It isn't the 0'th layer.
744
* - There are more than one attachment, and this isn't the 0'th attachment
745
* (this seems to also depend on the cpp of the attachments).
746
*
747
* Our best guess is that the layer/MRT state is used when computing
748
* the location of a cache entry in CCU, to avoid conflicts. We assume that
749
* any access in a renderpass after or before an access by a transfer needs
750
* a flush/invalidate, and use the _INCOHERENT variants to represent access
751
* by a transfer.
752
*/
753
TU_ACCESS_CCU_COLOR_INCOHERENT_READ = 1 << 6,
754
TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE = 1 << 7,
755
TU_ACCESS_CCU_DEPTH_INCOHERENT_READ = 1 << 8,
756
TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE = 1 << 9,
757
758
/* Accesses by the host */
759
TU_ACCESS_HOST_READ = 1 << 10,
760
TU_ACCESS_HOST_WRITE = 1 << 11,
761
762
/* Accesses by a GPU engine which bypasses any cache. e.g. writes via
763
* CP_EVENT_WRITE::BLIT and the CP are SYSMEM_WRITE.
764
*/
765
TU_ACCESS_SYSMEM_READ = 1 << 12,
766
TU_ACCESS_SYSMEM_WRITE = 1 << 13,
767
768
/* Set if a WFI is required. This can be required for:
769
* - 2D engine which (on some models) doesn't wait for flushes to complete
770
* before starting
771
* - CP draw indirect opcodes, where we need to wait for any flushes to
772
* complete but the CP implicitly waits for WFI's to complete and
773
* therefore we only need a WFI after the flushes.
774
*/
775
TU_ACCESS_WFI_READ = 1 << 14,
776
777
/* Set if a CP_WAIT_FOR_ME is required due to the data being read by the CP
778
* without it waiting for any WFI.
779
*/
780
TU_ACCESS_WFM_READ = 1 << 15,
781
782
/* Memory writes from the CP start in-order with draws and event writes,
783
* but execute asynchronously and hence need a CP_WAIT_MEM_WRITES if read.
784
*/
785
TU_ACCESS_CP_WRITE = 1 << 16,
786
787
TU_ACCESS_READ =
788
TU_ACCESS_UCHE_READ |
789
TU_ACCESS_CCU_COLOR_READ |
790
TU_ACCESS_CCU_DEPTH_READ |
791
TU_ACCESS_CCU_COLOR_INCOHERENT_READ |
792
TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
793
TU_ACCESS_HOST_READ |
794
TU_ACCESS_SYSMEM_READ |
795
TU_ACCESS_WFI_READ |
796
TU_ACCESS_WFM_READ,
797
798
TU_ACCESS_WRITE =
799
TU_ACCESS_UCHE_WRITE |
800
TU_ACCESS_CCU_COLOR_WRITE |
801
TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE |
802
TU_ACCESS_CCU_DEPTH_WRITE |
803
TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE |
804
TU_ACCESS_HOST_WRITE |
805
TU_ACCESS_SYSMEM_WRITE |
806
TU_ACCESS_CP_WRITE,
807
808
TU_ACCESS_ALL =
809
TU_ACCESS_READ |
810
TU_ACCESS_WRITE,
811
};
812
813
enum tu_cmd_flush_bits {
814
TU_CMD_FLAG_CCU_FLUSH_DEPTH = 1 << 0,
815
TU_CMD_FLAG_CCU_FLUSH_COLOR = 1 << 1,
816
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH = 1 << 2,
817
TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3,
818
TU_CMD_FLAG_CACHE_FLUSH = 1 << 4,
819
TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5,
820
TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6,
821
TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7,
822
TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8,
823
824
TU_CMD_FLAG_ALL_FLUSH =
825
TU_CMD_FLAG_CCU_FLUSH_DEPTH |
826
TU_CMD_FLAG_CCU_FLUSH_COLOR |
827
TU_CMD_FLAG_CACHE_FLUSH |
828
/* Treat the CP as a sort of "cache" which may need to be "flushed" via
829
* waiting for writes to land with WAIT_FOR_MEM_WRITES.
830
*/
831
TU_CMD_FLAG_WAIT_MEM_WRITES,
832
833
TU_CMD_FLAG_GPU_INVALIDATE =
834
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH |
835
TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
836
TU_CMD_FLAG_CACHE_INVALIDATE,
837
838
TU_CMD_FLAG_ALL_INVALIDATE =
839
TU_CMD_FLAG_GPU_INVALIDATE |
840
/* Treat the CP as a sort of "cache" which may need to be "invalidated"
841
* via waiting for UCHE/CCU flushes to land with WFI/WFM.
842
*/
843
TU_CMD_FLAG_WAIT_FOR_IDLE |
844
TU_CMD_FLAG_WAIT_FOR_ME,
845
};
846
847
/* Changing the CCU from sysmem mode to gmem mode or vice-versa is pretty
848
* heavy, involving a CCU cache flush/invalidate and a WFI in order to change
849
* which part of the gmem is used by the CCU. Here we keep track of what the
850
* state of the CCU.
851
*/
852
enum tu_cmd_ccu_state {
853
TU_CMD_CCU_SYSMEM,
854
TU_CMD_CCU_GMEM,
855
TU_CMD_CCU_UNKNOWN,
856
};
857
858
struct tu_cache_state {
859
/* Caches which must be made available (flushed) eventually if there are
860
* any users outside that cache domain, and caches which must be
861
* invalidated eventually if there are any reads.
862
*/
863
enum tu_cmd_flush_bits pending_flush_bits;
864
/* Pending flushes */
865
enum tu_cmd_flush_bits flush_bits;
866
};
867
868
enum tu_lrz_force_disable_mask {
869
TU_LRZ_FORCE_DISABLE_LRZ = 1 << 0,
870
TU_LRZ_FORCE_DISABLE_WRITE = 1 << 1,
871
};
872
873
enum tu_lrz_direction {
874
TU_LRZ_UNKNOWN,
875
/* Depth func less/less-than: */
876
TU_LRZ_LESS,
877
/* Depth func greater/greater-than: */
878
TU_LRZ_GREATER,
879
};
880
881
struct tu_lrz_pipeline
882
{
883
uint32_t force_disable_mask;
884
bool fs_has_kill;
885
bool force_late_z;
886
bool early_fragment_tests;
887
};
888
889
struct tu_lrz_state
890
{
891
/* Depth/Stencil image currently on use to do LRZ */
892
struct tu_image *image;
893
bool valid : 1;
894
struct tu_draw_state state;
895
enum tu_lrz_direction prev_direction;
896
};
897
898
struct tu_vs_params {
899
uint32_t params_offset;
900
uint32_t vertex_offset;
901
uint32_t first_instance;
902
};
903
904
struct tu_cmd_state
905
{
906
uint32_t dirty;
907
908
struct tu_pipeline *pipeline;
909
struct tu_pipeline *compute_pipeline;
910
911
/* Vertex buffers, viewports, and scissors
912
* the states for these can be updated partially, so we need to save these
913
* to be able to emit a complete draw state
914
*/
915
struct {
916
uint64_t base;
917
uint32_t size;
918
uint32_t stride;
919
} vb[MAX_VBS];
920
VkViewport viewport[MAX_VIEWPORTS];
921
VkRect2D scissor[MAX_SCISSORS];
922
uint32_t max_viewport, max_scissor;
923
924
/* for dynamic states that can't be emitted directly */
925
uint32_t dynamic_stencil_mask;
926
uint32_t dynamic_stencil_wrmask;
927
uint32_t dynamic_stencil_ref;
928
929
uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl;
930
enum pc_di_primtype primtype;
931
932
/* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
933
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
934
struct tu_draw_state vertex_buffers;
935
struct tu_draw_state shader_const[2];
936
struct tu_draw_state desc_sets;
937
938
struct tu_draw_state vs_params;
939
940
/* Index buffer */
941
uint64_t index_va;
942
uint32_t max_index_count;
943
uint8_t index_size;
944
945
/* because streamout base has to be 32-byte aligned
946
* there is an extra offset to deal with when it is
947
* unaligned
948
*/
949
uint8_t streamout_offset[IR3_MAX_SO_BUFFERS];
950
951
/* Renderpasses are tricky, because we may need to flush differently if
952
* using sysmem vs. gmem and therefore we have to delay any flushing that
953
* happens before a renderpass. So we have to have two copies of the flush
954
* state, one for intra-renderpass flushes (i.e. renderpass dependencies)
955
* and one for outside a renderpass.
956
*/
957
struct tu_cache_state cache;
958
struct tu_cache_state renderpass_cache;
959
960
enum tu_cmd_ccu_state ccu_state;
961
962
const struct tu_render_pass *pass;
963
const struct tu_subpass *subpass;
964
const struct tu_framebuffer *framebuffer;
965
VkRect2D render_area;
966
967
struct tu_cs_entry tile_store_ib;
968
969
bool xfb_used;
970
bool has_tess;
971
bool has_subpass_predication;
972
bool predication_active;
973
974
struct tu_lrz_state lrz;
975
976
struct tu_draw_state depth_plane_state;
977
978
struct tu_vs_params last_vs_params;
979
};
980
981
struct tu_cmd_pool
982
{
983
struct vk_object_base base;
984
985
VkAllocationCallbacks alloc;
986
struct list_head cmd_buffers;
987
struct list_head free_cmd_buffers;
988
uint32_t queue_family_index;
989
};
990
991
enum tu_cmd_buffer_status
992
{
993
TU_CMD_BUFFER_STATUS_INVALID,
994
TU_CMD_BUFFER_STATUS_INITIAL,
995
TU_CMD_BUFFER_STATUS_RECORDING,
996
TU_CMD_BUFFER_STATUS_EXECUTABLE,
997
TU_CMD_BUFFER_STATUS_PENDING,
998
};
999
1000
struct tu_cmd_buffer
1001
{
1002
struct vk_object_base base;
1003
1004
struct tu_device *device;
1005
1006
struct tu_cmd_pool *pool;
1007
struct list_head pool_link;
1008
1009
VkCommandBufferUsageFlags usage_flags;
1010
VkCommandBufferLevel level;
1011
enum tu_cmd_buffer_status status;
1012
1013
struct tu_cmd_state state;
1014
uint32_t queue_family_index;
1015
1016
uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1017
VkShaderStageFlags push_constant_stages;
1018
struct tu_descriptor_set meta_push_descriptors;
1019
1020
struct tu_descriptor_state descriptors[MAX_BIND_POINTS];
1021
1022
VkResult record_result;
1023
1024
struct tu_cs cs;
1025
struct tu_cs draw_cs;
1026
struct tu_cs draw_epilogue_cs;
1027
struct tu_cs sub_cs;
1028
1029
uint32_t vsc_draw_strm_pitch;
1030
uint32_t vsc_prim_strm_pitch;
1031
};
1032
1033
/* Temporary struct for tracking a register state to be written, used by
1034
* a6xx-pack.h and tu_cs_emit_regs()
1035
*/
1036
struct tu_reg_value {
1037
uint32_t reg;
1038
uint64_t value;
1039
bool is_address;
1040
struct tu_bo *bo;
1041
bool bo_write;
1042
uint32_t bo_offset;
1043
uint32_t bo_shift;
1044
};
1045
1046
1047
void tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer,
1048
struct tu_cs *cs);
1049
1050
void tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
1051
struct tu_cs *cs,
1052
enum tu_cmd_ccu_state ccu_state);
1053
1054
void
1055
tu6_emit_event_write(struct tu_cmd_buffer *cmd,
1056
struct tu_cs *cs,
1057
enum vgt_event_type event);
1058
1059
static inline struct tu_descriptor_state *
1060
tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer,
1061
VkPipelineBindPoint bind_point)
1062
{
1063
return &cmd_buffer->descriptors[bind_point];
1064
}
1065
1066
struct tu_event
1067
{
1068
struct vk_object_base base;
1069
struct tu_bo bo;
1070
};
1071
1072
struct tu_push_constant_range
1073
{
1074
uint32_t lo;
1075
uint32_t count;
1076
};
1077
1078
struct tu_shader
1079
{
1080
struct ir3_shader *ir3_shader;
1081
1082
struct tu_push_constant_range push_consts;
1083
uint8_t active_desc_sets;
1084
bool multi_pos_output;
1085
};
1086
1087
bool
1088
tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,
1089
struct tu_device *dev);
1090
1091
nir_shader *
1092
tu_spirv_to_nir(struct tu_device *dev,
1093
const VkPipelineShaderStageCreateInfo *stage_info,
1094
gl_shader_stage stage);
1095
1096
struct tu_shader *
1097
tu_shader_create(struct tu_device *dev,
1098
nir_shader *nir,
1099
unsigned multiview_mask,
1100
struct tu_pipeline_layout *layout,
1101
const VkAllocationCallbacks *alloc);
1102
1103
void
1104
tu_shader_destroy(struct tu_device *dev,
1105
struct tu_shader *shader,
1106
const VkAllocationCallbacks *alloc);
1107
1108
struct tu_program_descriptor_linkage
1109
{
1110
struct ir3_const_state const_state;
1111
1112
uint32_t constlen;
1113
1114
struct tu_push_constant_range push_consts;
1115
};
1116
1117
struct tu_pipeline_executable {
1118
gl_shader_stage stage;
1119
1120
struct ir3_info stats;
1121
bool is_binning;
1122
1123
char *nir_from_spirv;
1124
char *nir_final;
1125
char *disasm;
1126
};
1127
1128
struct tu_pipeline
1129
{
1130
struct vk_object_base base;
1131
1132
struct tu_cs cs;
1133
1134
/* Separate BO for private memory since it should GPU writable */
1135
struct tu_bo pvtmem_bo;
1136
1137
struct tu_pipeline_layout *layout;
1138
1139
bool need_indirect_descriptor_sets;
1140
VkShaderStageFlags active_stages;
1141
uint32_t active_desc_sets;
1142
1143
/* mask of enabled dynamic states
1144
* if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used
1145
*/
1146
uint32_t dynamic_state_mask;
1147
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
1148
1149
/* for dynamic states which use the same register: */
1150
uint32_t gras_su_cntl, gras_su_cntl_mask;
1151
uint32_t rb_depth_cntl, rb_depth_cntl_mask;
1152
uint32_t rb_stencil_cntl, rb_stencil_cntl_mask;
1153
uint32_t stencil_wrmask;
1154
1155
bool rb_depth_cntl_disable;
1156
1157
/* draw states for the pipeline */
1158
struct tu_draw_state load_state, rast_state, blend_state;
1159
1160
/* for vertex buffers state */
1161
uint32_t num_vbs;
1162
1163
struct
1164
{
1165
struct tu_draw_state config_state;
1166
struct tu_draw_state state;
1167
struct tu_draw_state binning_state;
1168
1169
struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
1170
} program;
1171
1172
struct
1173
{
1174
struct tu_draw_state state;
1175
struct tu_draw_state binning_state;
1176
} vi;
1177
1178
struct
1179
{
1180
enum pc_di_primtype primtype;
1181
bool primitive_restart;
1182
} ia;
1183
1184
struct
1185
{
1186
uint32_t patch_type;
1187
uint32_t param_stride;
1188
uint32_t hs_bo_regid;
1189
uint32_t ds_bo_regid;
1190
bool upper_left_domain_origin;
1191
} tess;
1192
1193
struct
1194
{
1195
uint32_t local_size[3];
1196
uint32_t subgroup_size;
1197
} compute;
1198
1199
bool provoking_vertex_last;
1200
1201
struct tu_lrz_pipeline lrz;
1202
1203
void *executables_mem_ctx;
1204
/* tu_pipeline_executable */
1205
struct util_dynarray executables;
1206
};
1207
1208
void
1209
tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport, uint32_t num_viewport);
1210
1211
void
1212
tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scs, uint32_t scissor_count);
1213
1214
void
1215
tu6_clear_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs, struct tu_image* image, const VkClearValue *value);
1216
1217
void
1218
tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc);
1219
1220
void
1221
tu6_emit_depth_bias(struct tu_cs *cs,
1222
float constant_factor,
1223
float clamp,
1224
float slope_factor);
1225
1226
void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples);
1227
1228
void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);
1229
1230
void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
1231
1232
struct tu_pvtmem_config {
1233
uint64_t iova;
1234
uint32_t per_fiber_size;
1235
uint32_t per_sp_size;
1236
bool per_wave;
1237
};
1238
1239
void
1240
tu6_emit_xs_config(struct tu_cs *cs,
1241
gl_shader_stage stage,
1242
const struct ir3_shader_variant *xs);
1243
1244
void
1245
tu6_emit_xs(struct tu_cs *cs,
1246
gl_shader_stage stage,
1247
const struct ir3_shader_variant *xs,
1248
const struct tu_pvtmem_config *pvtmem,
1249
uint64_t binary_iova);
1250
1251
void
1252
tu6_emit_vpc(struct tu_cs *cs,
1253
const struct ir3_shader_variant *vs,
1254
const struct ir3_shader_variant *hs,
1255
const struct ir3_shader_variant *ds,
1256
const struct ir3_shader_variant *gs,
1257
const struct ir3_shader_variant *fs,
1258
uint32_t patch_control_points);
1259
1260
void
1261
tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs);
1262
1263
struct tu_image_view;
1264
1265
void
1266
tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
1267
struct tu_cs *cs,
1268
struct tu_image_view *src,
1269
struct tu_image_view *dst,
1270
uint32_t layer_mask,
1271
uint32_t layers,
1272
const VkRect2D *rect);
1273
1274
void
1275
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
1276
struct tu_cs *cs,
1277
uint32_t a,
1278
const VkRenderPassBeginInfo *info);
1279
1280
void
1281
tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
1282
struct tu_cs *cs,
1283
uint32_t a,
1284
const VkRenderPassBeginInfo *info);
1285
1286
void
1287
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
1288
struct tu_cs *cs,
1289
uint32_t a,
1290
bool force_load);
1291
1292
/* expose this function to be able to emit load without checking LOAD_OP */
1293
void
1294
tu_emit_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
1295
1296
/* note: gmem store can also resolve */
1297
void
1298
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
1299
struct tu_cs *cs,
1300
uint32_t a,
1301
uint32_t gmem_a);
1302
1303
enum tu_supported_formats {
1304
FMT_VERTEX = 1,
1305
FMT_TEXTURE = 2,
1306
FMT_COLOR = 4,
1307
};
1308
1309
struct tu_native_format
1310
{
1311
enum a6xx_format fmt : 8;
1312
enum a3xx_color_swap swap : 8;
1313
enum a6xx_tile_mode tile_mode : 8;
1314
enum tu_supported_formats supported : 8;
1315
};
1316
1317
struct tu_native_format tu6_format_vtx(VkFormat format);
1318
struct tu_native_format tu6_format_color(VkFormat format, enum a6xx_tile_mode tile_mode);
1319
struct tu_native_format tu6_format_texture(VkFormat format, enum a6xx_tile_mode tile_mode);
1320
1321
static inline enum a6xx_format
1322
tu6_base_format(VkFormat format)
1323
{
1324
/* note: tu6_format_color doesn't care about tiling for .fmt field */
1325
return tu6_format_color(format, TILE6_LINEAR).fmt;
1326
}
1327
1328
struct tu_image
1329
{
1330
struct vk_object_base base;
1331
1332
/* The original VkFormat provided by the client. This may not match any
1333
* of the actual surface formats.
1334
*/
1335
VkFormat vk_format;
1336
uint32_t level_count;
1337
uint32_t layer_count;
1338
1339
struct fdl_layout layout[3];
1340
uint32_t total_size;
1341
1342
#ifdef ANDROID
1343
/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
1344
VkDeviceMemory owned_memory;
1345
#endif
1346
1347
/* Set when bound */
1348
struct tu_bo *bo;
1349
VkDeviceSize bo_offset;
1350
1351
uint32_t lrz_height;
1352
uint32_t lrz_pitch;
1353
uint32_t lrz_offset;
1354
1355
bool shareable;
1356
};
1357
1358
static inline uint32_t
1359
tu_get_layerCount(const struct tu_image *image,
1360
const VkImageSubresourceRange *range)
1361
{
1362
return range->layerCount == VK_REMAINING_ARRAY_LAYERS
1363
? image->layer_count - range->baseArrayLayer
1364
: range->layerCount;
1365
}
1366
1367
static inline uint32_t
1368
tu_get_levelCount(const struct tu_image *image,
1369
const VkImageSubresourceRange *range)
1370
{
1371
return range->levelCount == VK_REMAINING_MIP_LEVELS
1372
? image->level_count - range->baseMipLevel
1373
: range->levelCount;
1374
}
1375
1376
struct tu_image_view
1377
{
1378
struct vk_object_base base;
1379
1380
struct tu_image *image; /**< VkImageViewCreateInfo::image */
1381
1382
uint64_t base_addr;
1383
uint64_t ubwc_addr;
1384
uint32_t layer_size;
1385
uint32_t ubwc_layer_size;
1386
1387
/* used to determine if fast gmem store path can be used */
1388
VkExtent2D extent;
1389
bool need_y2_align;
1390
1391
bool ubwc_enabled;
1392
1393
uint32_t descriptor[A6XX_TEX_CONST_DWORDS];
1394
1395
/* Descriptor for use as a storage image as opposed to a sampled image.
1396
* This has a few differences for cube maps (e.g. type).
1397
*/
1398
uint32_t storage_descriptor[A6XX_TEX_CONST_DWORDS];
1399
1400
/* pre-filled register values */
1401
uint32_t PITCH;
1402
uint32_t FLAG_BUFFER_PITCH;
1403
1404
uint32_t RB_MRT_BUF_INFO;
1405
uint32_t SP_FS_MRT_REG;
1406
1407
uint32_t SP_PS_2D_SRC_INFO;
1408
uint32_t SP_PS_2D_SRC_SIZE;
1409
1410
uint32_t RB_2D_DST_INFO;
1411
1412
uint32_t RB_BLIT_DST_INFO;
1413
1414
/* for d32s8 separate stencil */
1415
uint64_t stencil_base_addr;
1416
uint32_t stencil_layer_size;
1417
uint32_t stencil_PITCH;
1418
};
1419
1420
struct tu_sampler_ycbcr_conversion {
1421
struct vk_object_base base;
1422
1423
VkFormat format;
1424
VkSamplerYcbcrModelConversion ycbcr_model;
1425
VkSamplerYcbcrRange ycbcr_range;
1426
VkComponentMapping components;
1427
VkChromaLocation chroma_offsets[2];
1428
VkFilter chroma_filter;
1429
};
1430
1431
struct tu_sampler {
1432
struct vk_object_base base;
1433
1434
uint32_t descriptor[A6XX_TEX_SAMP_DWORDS];
1435
struct tu_sampler_ycbcr_conversion *ycbcr_sampler;
1436
};
1437
1438
void
1439
tu_cs_image_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1440
1441
void
1442
tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer, bool src);
1443
1444
void
1445
tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1446
1447
void
1448
tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1449
1450
#define tu_image_view_stencil(iview, x) \
1451
((iview->x & ~A6XX_##x##_COLOR_FORMAT__MASK) | A6XX_##x##_COLOR_FORMAT(FMT6_8_UINT))
1452
1453
VkResult
1454
tu_gralloc_info(struct tu_device *device,
1455
const VkNativeBufferANDROID *gralloc_info,
1456
int *dma_buf,
1457
uint64_t *modifier);
1458
1459
VkResult
1460
tu_import_memory_from_gralloc_handle(VkDevice device_h,
1461
int dma_buf,
1462
const VkAllocationCallbacks *alloc,
1463
VkImage image_h);
1464
1465
void
1466
tu_image_view_init(struct tu_image_view *iview,
1467
const VkImageViewCreateInfo *pCreateInfo,
1468
bool limited_z24s8);
1469
1470
bool
1471
ubwc_possible(VkFormat format, VkImageType type, VkImageUsageFlags usage, VkImageUsageFlags stencil_usage,
1472
const struct fd_dev_info *info, VkSampleCountFlagBits samples);
1473
1474
struct tu_buffer_view
1475
{
1476
struct vk_object_base base;
1477
1478
uint32_t descriptor[A6XX_TEX_CONST_DWORDS];
1479
1480
struct tu_buffer *buffer;
1481
};
1482
void
1483
tu_buffer_view_init(struct tu_buffer_view *view,
1484
struct tu_device *device,
1485
const VkBufferViewCreateInfo *pCreateInfo);
1486
1487
struct tu_attachment_info
1488
{
1489
struct tu_image_view *attachment;
1490
};
1491
1492
struct tu_framebuffer
1493
{
1494
struct vk_object_base base;
1495
1496
uint32_t width;
1497
uint32_t height;
1498
uint32_t layers;
1499
1500
/* size of the first tile */
1501
VkExtent2D tile0;
1502
/* number of tiles */
1503
VkExtent2D tile_count;
1504
1505
/* size of the first VSC pipe */
1506
VkExtent2D pipe0;
1507
/* number of VSC pipes */
1508
VkExtent2D pipe_count;
1509
1510
/* pipe register values */
1511
uint32_t pipe_config[MAX_VSC_PIPES];
1512
uint32_t pipe_sizes[MAX_VSC_PIPES];
1513
1514
uint32_t attachment_count;
1515
struct tu_attachment_info attachments[0];
1516
};
1517
1518
void
1519
tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
1520
const struct tu_device *device,
1521
const struct tu_render_pass *pass);
1522
1523
struct tu_subpass_barrier {
1524
VkPipelineStageFlags src_stage_mask;
1525
VkAccessFlags src_access_mask;
1526
VkAccessFlags dst_access_mask;
1527
bool incoherent_ccu_color, incoherent_ccu_depth;
1528
};
1529
1530
struct tu_subpass_attachment
1531
{
1532
uint32_t attachment;
1533
};
1534
1535
struct tu_subpass
1536
{
1537
uint32_t input_count;
1538
uint32_t color_count;
1539
uint32_t resolve_count;
1540
bool resolve_depth_stencil;
1541
struct tu_subpass_attachment *input_attachments;
1542
struct tu_subpass_attachment *color_attachments;
1543
struct tu_subpass_attachment *resolve_attachments;
1544
struct tu_subpass_attachment depth_stencil_attachment;
1545
1546
VkSampleCountFlagBits samples;
1547
1548
uint32_t srgb_cntl;
1549
uint32_t multiview_mask;
1550
1551
struct tu_subpass_barrier start_barrier;
1552
};
1553
1554
struct tu_render_pass_attachment
1555
{
1556
VkFormat format;
1557
uint32_t samples;
1558
uint32_t cpp;
1559
VkImageAspectFlags clear_mask;
1560
uint32_t clear_views;
1561
bool load;
1562
bool store;
1563
int32_t gmem_offset;
1564
/* for D32S8 separate stencil: */
1565
bool load_stencil;
1566
bool store_stencil;
1567
int32_t gmem_offset_stencil;
1568
};
1569
1570
struct tu_render_pass
1571
{
1572
struct vk_object_base base;
1573
1574
uint32_t attachment_count;
1575
uint32_t subpass_count;
1576
uint32_t gmem_pixels;
1577
uint32_t tile_align_w;
1578
struct tu_subpass_attachment *subpass_attachments;
1579
struct tu_render_pass_attachment *attachments;
1580
struct tu_subpass_barrier end_barrier;
1581
struct tu_subpass subpasses[0];
1582
};
1583
1584
#define PERF_CNTRS_REG 4
1585
1586
struct tu_perf_query_data
1587
{
1588
uint32_t gid; /* group-id */
1589
uint32_t cid; /* countable-id within the group */
1590
uint32_t cntr_reg; /* counter register within the group */
1591
uint32_t pass; /* pass index that countables can be requested */
1592
uint32_t app_idx; /* index provided by apps */
1593
};
1594
1595
struct tu_query_pool
1596
{
1597
struct vk_object_base base;
1598
1599
VkQueryType type;
1600
uint32_t stride;
1601
uint64_t size;
1602
uint32_t pipeline_statistics;
1603
struct tu_bo bo;
1604
1605
/* For performance query */
1606
const struct fd_perfcntr_group *perf_group;
1607
uint32_t perf_group_count;
1608
uint32_t counter_index_count;
1609
struct tu_perf_query_data perf_query_data[0];
1610
};
1611
1612
uint32_t
1613
tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index);
1614
1615
void
1616
tu_update_descriptor_sets(const struct tu_device *device,
1617
VkDescriptorSet overrideSet,
1618
uint32_t descriptorWriteCount,
1619
const VkWriteDescriptorSet *pDescriptorWrites,
1620
uint32_t descriptorCopyCount,
1621
const VkCopyDescriptorSet *pDescriptorCopies);
1622
1623
void
1624
tu_update_descriptor_set_with_template(
1625
const struct tu_device *device,
1626
struct tu_descriptor_set *set,
1627
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1628
const void *pData);
1629
1630
VkResult
1631
tu_physical_device_init(struct tu_physical_device *device,
1632
struct tu_instance *instance);
1633
VkResult
1634
tu_enumerate_devices(struct tu_instance *instance);
1635
1636
int
1637
tu_drm_submitqueue_new(const struct tu_device *dev,
1638
int priority,
1639
uint32_t *queue_id);
1640
1641
void
1642
tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id);
1643
1644
int
1645
tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2);
1646
1647
int
1648
tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync);
1649
1650
#define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \
1651
\
1652
static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
1653
{ \
1654
return (struct __tu_type *) _handle; \
1655
} \
1656
\
1657
static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \
1658
{ \
1659
return (__VkType) _obj; \
1660
}
1661
1662
#define TU_DEFINE_NONDISP_HANDLE_CASTS(__tu_type, __VkType) \
1663
\
1664
static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
1665
{ \
1666
return (struct __tu_type *) (uintptr_t) _handle; \
1667
} \
1668
\
1669
static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \
1670
{ \
1671
return (__VkType)(uintptr_t) _obj; \
1672
}
1673
1674
#define TU_FROM_HANDLE(__tu_type, __name, __handle) \
1675
struct __tu_type *__name = __tu_type##_from_handle(__handle)
1676
1677
TU_DEFINE_HANDLE_CASTS(tu_cmd_buffer, VkCommandBuffer)
1678
TU_DEFINE_HANDLE_CASTS(tu_device, VkDevice)
1679
TU_DEFINE_HANDLE_CASTS(tu_instance, VkInstance)
1680
TU_DEFINE_HANDLE_CASTS(tu_physical_device, VkPhysicalDevice)
1681
TU_DEFINE_HANDLE_CASTS(tu_queue, VkQueue)
1682
1683
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_cmd_pool, VkCommandPool)
1684
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer, VkBuffer)
1685
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer_view, VkBufferView)
1686
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_pool, VkDescriptorPool)
1687
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set, VkDescriptorSet)
1688
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set_layout,
1689
VkDescriptorSetLayout)
1690
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_update_template,
1691
VkDescriptorUpdateTemplate)
1692
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_device_memory, VkDeviceMemory)
1693
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_event, VkEvent)
1694
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_framebuffer, VkFramebuffer)
1695
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image, VkImage)
1696
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image_view, VkImageView);
1697
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_cache, VkPipelineCache)
1698
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline, VkPipeline)
1699
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, VkPipelineLayout)
1700
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_query_pool, VkQueryPool)
1701
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_render_pass, VkRenderPass)
1702
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler, VkSampler)
1703
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler_ycbcr_conversion, VkSamplerYcbcrConversion)
1704
1705
/* for TU_FROM_HANDLE with both VkFence and VkSemaphore: */
1706
#define tu_syncobj_from_handle(x) ((struct tu_syncobj*) (uintptr_t) (x))
1707
1708
void
1709
update_stencil_mask(uint32_t *value, VkStencilFaceFlags face, uint32_t mask);
1710
1711
#endif /* TU_PRIVATE_H */
1712
1713