Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_private.h
4560 views
1
/*
2
* Copyright © 2019 Raspberry Pi
3
*
4
* based in part on anv driver which is:
5
* Copyright © 2015 Intel Corporation
6
*
7
* based in part on radv driver which is:
8
* Copyright © 2016 Red Hat.
9
* Copyright © 2016 Bas Nieuwenhuizen
10
*
11
* Permission is hereby granted, free of charge, to any person obtaining a
12
* copy of this software and associated documentation files (the "Software"),
13
* to deal in the Software without restriction, including without limitation
14
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
15
* and/or sell copies of the Software, and to permit persons to whom the
16
* Software is furnished to do so, subject to the following conditions:
17
*
18
* The above copyright notice and this permission notice (including the next
19
* paragraph) shall be included in all copies or substantial portions of the
20
* Software.
21
*
22
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28
* IN THE SOFTWARE.
29
*/
30
#ifndef V3DV_PRIVATE_H
31
#define V3DV_PRIVATE_H
32
33
#include <stdio.h>
34
#include <string.h>
35
#include <vulkan/vulkan.h>
36
#include <vulkan/vk_icd.h>
37
#include <vk_enum_to_str.h>
38
39
#include "vk_device.h"
40
#include "vk_instance.h"
41
#include "vk_physical_device.h"
42
#include "vk_shader_module.h"
43
#include "vk_util.h"
44
45
#include <xf86drm.h>
46
47
#ifdef HAVE_VALGRIND
48
#include <valgrind.h>
49
#include <memcheck.h>
50
#define VG(x) x
51
#else
52
#define VG(x) ((void)0)
53
#endif
54
55
#include "v3dv_limits.h"
56
57
#include "common/v3d_device_info.h"
58
#include "common/v3d_limits.h"
59
#include "common/v3d_tiling.h"
60
#include "common/v3d_util.h"
61
62
#include "compiler/shader_enums.h"
63
#include "compiler/spirv/nir_spirv.h"
64
65
#include "compiler/v3d_compiler.h"
66
67
#include "vk_debug_report.h"
68
#include "util/set.h"
69
#include "util/hash_table.h"
70
#include "util/xmlconfig.h"
71
#include "u_atomic.h"
72
73
#include "v3dv_entrypoints.h"
74
#include "v3dv_bo.h"
75
76
#include "drm-uapi/v3d_drm.h"
77
78
#include "vk_alloc.h"
79
#include "simulator/v3d_simulator.h"
80
81
#include "v3dv_cl.h"
82
83
#include "wsi_common.h"
84
85
/* A non-fatal assert. Useful for debugging. */
86
#ifdef DEBUG
87
#define v3dv_assert(x) ({ \
88
if (unlikely(!(x))) \
89
fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
90
})
91
#else
92
#define v3dv_assert(x)
93
#endif
94
95
#define perf_debug(...) do { \
96
if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \
97
fprintf(stderr, __VA_ARGS__); \
98
} while (0)
99
100
struct v3dv_instance;
101
102
#ifdef USE_V3D_SIMULATOR
103
#define using_v3d_simulator true
104
#else
105
#define using_v3d_simulator false
106
#endif
107
108
struct v3d_simulator_file;
109
110
/* Minimum required by the Vulkan 1.1 spec */
111
#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
112
113
struct v3dv_physical_device {
114
struct vk_physical_device vk;
115
116
char *name;
117
int32_t render_fd;
118
int32_t display_fd;
119
int32_t master_fd;
120
121
uint8_t driver_build_sha1[20];
122
uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
123
uint8_t device_uuid[VK_UUID_SIZE];
124
uint8_t driver_uuid[VK_UUID_SIZE];
125
126
struct disk_cache *disk_cache;
127
128
mtx_t mutex;
129
130
struct wsi_device wsi_device;
131
132
VkPhysicalDeviceMemoryProperties memory;
133
134
struct v3d_device_info devinfo;
135
136
struct v3d_simulator_file *sim_file;
137
138
const struct v3d_compiler *compiler;
139
uint32_t next_program_id;
140
141
struct {
142
bool merge_jobs;
143
} options;
144
};
145
146
VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
147
struct v3dv_physical_device *pdevice,
148
VkIcdSurfaceBase *surface);
149
150
VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
151
void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
152
struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
153
uint32_t index);
154
155
void v3dv_meta_clear_init(struct v3dv_device *device);
156
void v3dv_meta_clear_finish(struct v3dv_device *device);
157
158
void v3dv_meta_blit_init(struct v3dv_device *device);
159
void v3dv_meta_blit_finish(struct v3dv_device *device);
160
161
void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
162
void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
163
164
struct v3dv_instance {
165
struct vk_instance vk;
166
167
int physicalDeviceCount;
168
struct v3dv_physical_device physicalDevice;
169
170
bool pipeline_cache_enabled;
171
bool default_pipeline_cache_enabled;
172
};
173
174
/* Tracks wait threads spawned from a single vkQueueSubmit call */
175
struct v3dv_queue_submit_wait_info {
176
/* struct vk_object_base base; ?*/
177
struct list_head list_link;
178
179
struct v3dv_device *device;
180
181
/* List of wait threads spawned for any command buffers in a particular
182
* call to vkQueueSubmit.
183
*/
184
uint32_t wait_thread_count;
185
struct {
186
pthread_t thread;
187
bool finished;
188
} wait_threads[16];
189
190
/* The master wait thread for the entire submit. This will wait for all
191
* other threads in this submit to complete before processing signal
192
* semaphores and fences.
193
*/
194
pthread_t master_wait_thread;
195
196
/* List of semaphores (and fence) to signal after all wait threads completed
197
* and all command buffer jobs in the submission have been sent to the GPU.
198
*/
199
uint32_t signal_semaphore_count;
200
VkSemaphore *signal_semaphores;
201
VkFence fence;
202
};
203
204
struct v3dv_queue {
205
struct vk_object_base base;
206
207
struct v3dv_device *device;
208
VkDeviceQueueCreateFlags flags;
209
210
/* A list of active v3dv_queue_submit_wait_info */
211
struct list_head submit_wait_list;
212
213
/* A mutex to prevent concurrent access to the list of wait threads */
214
mtx_t mutex;
215
216
struct v3dv_job *noop_job;
217
};
218
219
#define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
220
#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
221
sizeof(VkComponentMapping))
222
223
struct v3dv_meta_color_clear_pipeline {
224
VkPipeline pipeline;
225
VkRenderPass pass;
226
bool cached;
227
uint64_t key;
228
};
229
230
struct v3dv_meta_depth_clear_pipeline {
231
VkPipeline pipeline;
232
uint64_t key;
233
};
234
235
struct v3dv_meta_blit_pipeline {
236
VkPipeline pipeline;
237
VkRenderPass pass;
238
VkRenderPass pass_no_load;
239
uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
240
};
241
242
struct v3dv_meta_texel_buffer_copy_pipeline {
243
VkPipeline pipeline;
244
VkRenderPass pass;
245
VkRenderPass pass_no_load;
246
uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
247
};
248
249
struct v3dv_pipeline_key {
250
bool robust_buffer_access;
251
uint8_t topology;
252
uint8_t logicop_func;
253
bool msaa;
254
bool sample_coverage;
255
bool sample_alpha_to_coverage;
256
bool sample_alpha_to_one;
257
uint8_t cbufs;
258
struct {
259
enum pipe_format format;
260
const uint8_t *swizzle;
261
} color_fmt[V3D_MAX_DRAW_BUFFERS];
262
uint8_t f32_color_rb;
263
uint32_t va_swap_rb_mask;
264
};
265
266
struct v3dv_pipeline_cache_stats {
267
uint32_t miss;
268
uint32_t hit;
269
uint32_t count;
270
};
271
272
/* Equivalent to gl_shader_stage, but including the coordinate shaders
273
*
274
* FIXME: perhaps move to common
275
*/
276
enum broadcom_shader_stage {
277
BROADCOM_SHADER_VERTEX,
278
BROADCOM_SHADER_VERTEX_BIN,
279
BROADCOM_SHADER_GEOMETRY,
280
BROADCOM_SHADER_GEOMETRY_BIN,
281
BROADCOM_SHADER_FRAGMENT,
282
BROADCOM_SHADER_COMPUTE,
283
};
284
285
#define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
286
287
/* Assumes that coordinate shaders will be custom-handled by the caller */
288
static inline enum broadcom_shader_stage
289
gl_shader_stage_to_broadcom(gl_shader_stage stage)
290
{
291
switch (stage) {
292
case MESA_SHADER_VERTEX:
293
return BROADCOM_SHADER_VERTEX;
294
case MESA_SHADER_GEOMETRY:
295
return BROADCOM_SHADER_GEOMETRY;
296
case MESA_SHADER_FRAGMENT:
297
return BROADCOM_SHADER_FRAGMENT;
298
case MESA_SHADER_COMPUTE:
299
return BROADCOM_SHADER_COMPUTE;
300
default:
301
unreachable("Unknown gl shader stage");
302
}
303
}
304
305
static inline gl_shader_stage
306
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
307
{
308
switch (stage) {
309
case BROADCOM_SHADER_VERTEX:
310
case BROADCOM_SHADER_VERTEX_BIN:
311
return MESA_SHADER_VERTEX;
312
case BROADCOM_SHADER_GEOMETRY:
313
case BROADCOM_SHADER_GEOMETRY_BIN:
314
return MESA_SHADER_GEOMETRY;
315
case BROADCOM_SHADER_FRAGMENT:
316
return MESA_SHADER_FRAGMENT;
317
case BROADCOM_SHADER_COMPUTE:
318
return MESA_SHADER_COMPUTE;
319
default:
320
unreachable("Unknown broadcom shader stage");
321
}
322
}
323
324
static inline bool
325
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
326
{
327
switch (stage) {
328
case BROADCOM_SHADER_VERTEX_BIN:
329
case BROADCOM_SHADER_GEOMETRY_BIN:
330
return true;
331
default:
332
return false;
333
}
334
}
335
336
static inline bool
337
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
338
{
339
switch (stage) {
340
case BROADCOM_SHADER_VERTEX:
341
case BROADCOM_SHADER_GEOMETRY:
342
return true;
343
default:
344
return false;
345
}
346
}
347
348
static inline enum broadcom_shader_stage
349
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
350
{
351
switch (stage) {
352
case BROADCOM_SHADER_VERTEX:
353
return BROADCOM_SHADER_VERTEX_BIN;
354
case BROADCOM_SHADER_GEOMETRY:
355
return BROADCOM_SHADER_GEOMETRY_BIN;
356
default:
357
unreachable("Invalid shader stage");
358
}
359
}
360
361
static inline const char *
362
broadcom_shader_stage_name(enum broadcom_shader_stage stage)
363
{
364
switch(stage) {
365
case BROADCOM_SHADER_VERTEX_BIN:
366
return "MESA_SHADER_VERTEX_BIN";
367
case BROADCOM_SHADER_GEOMETRY_BIN:
368
return "MESA_SHADER_GEOMETRY_BIN";
369
default:
370
return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
371
}
372
}
373
374
struct v3dv_pipeline_cache {
375
struct vk_object_base base;
376
377
struct v3dv_device *device;
378
mtx_t mutex;
379
380
struct hash_table *nir_cache;
381
struct v3dv_pipeline_cache_stats nir_stats;
382
383
struct hash_table *cache;
384
struct v3dv_pipeline_cache_stats stats;
385
};
386
387
struct v3dv_device {
388
struct vk_device vk;
389
390
struct v3dv_instance *instance;
391
struct v3dv_physical_device *pdevice;
392
393
struct v3d_device_info devinfo;
394
struct v3dv_queue queue;
395
396
/* A sync object to track the last job submitted to the GPU. */
397
uint32_t last_job_sync;
398
399
/* A mutex to prevent concurrent access to last_job_sync from the queue */
400
mtx_t mutex;
401
402
/* Resources used for meta operations */
403
struct {
404
mtx_t mtx;
405
struct {
406
VkPipelineLayout p_layout;
407
struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
408
} color_clear;
409
struct {
410
VkPipelineLayout p_layout;
411
struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
412
} depth_clear;
413
struct {
414
VkDescriptorSetLayout ds_layout;
415
VkPipelineLayout p_layout;
416
struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
417
} blit;
418
struct {
419
VkDescriptorSetLayout ds_layout;
420
VkPipelineLayout p_layout;
421
struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
422
} texel_buffer_copy;
423
} meta;
424
425
struct v3dv_bo_cache {
426
/** List of struct v3d_bo freed, by age. */
427
struct list_head time_list;
428
/** List of struct v3d_bo freed, per size, by age. */
429
struct list_head *size_list;
430
uint32_t size_list_size;
431
432
mtx_t lock;
433
434
uint32_t cache_size;
435
uint32_t cache_count;
436
uint32_t max_cache_size;
437
} bo_cache;
438
439
uint32_t bo_size;
440
uint32_t bo_count;
441
442
struct v3dv_pipeline_cache default_pipeline_cache;
443
444
/* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
445
* following covers the most common case, that is all attributes format
446
* being float being float, allowing us to reuse the same BO for all
447
* pipelines matching this requirement. Pipelines that need integer
448
* attributes will create their own BO.
449
*/
450
struct v3dv_bo *default_attribute_float;
451
VkPhysicalDeviceFeatures features;
452
};
453
454
struct v3dv_device_memory {
455
struct vk_object_base base;
456
457
struct v3dv_bo *bo;
458
const VkMemoryType *type;
459
bool has_bo_ownership;
460
bool is_for_wsi;
461
};
462
463
#define V3D_OUTPUT_IMAGE_FORMAT_NO 255
464
#define TEXTURE_DATA_FORMAT_NO 255
465
466
struct v3dv_format {
467
bool supported;
468
469
/* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
470
uint8_t rt_type;
471
472
/* One of V3D33_TEXTURE_DATA_FORMAT_*. */
473
uint8_t tex_type;
474
475
/* Swizzle to apply to the RGBA shader output for storing to the tile
476
* buffer, to the RGBA tile buffer to produce shader input (for
477
* blending), and for turning the rgba8888 texture sampler return
478
* value into shader rgba values.
479
*/
480
uint8_t swizzle[4];
481
482
/* Whether the return value is 16F/I/UI or 32F/I/UI. */
483
uint8_t return_size;
484
485
/* If the format supports (linear) filtering when texturing. */
486
bool supports_filtering;
487
};
488
489
struct v3d_resource_slice {
490
uint32_t offset;
491
uint32_t stride;
492
uint32_t padded_height;
493
/* Size of a single pane of the slice. For 3D textures, there will be
494
* a number of panes equal to the minified, power-of-two-aligned
495
* depth.
496
*/
497
uint32_t size;
498
uint8_t ub_pad;
499
enum v3d_tiling_mode tiling;
500
uint32_t padded_height_of_output_image_in_uif_blocks;
501
};
502
503
struct v3dv_image {
504
struct vk_object_base base;
505
506
VkImageType type;
507
VkImageAspectFlags aspects;
508
509
VkExtent3D extent;
510
uint32_t levels;
511
uint32_t array_size;
512
uint32_t samples;
513
VkImageUsageFlags usage;
514
VkImageCreateFlags flags;
515
VkImageTiling tiling;
516
517
VkFormat vk_format;
518
const struct v3dv_format *format;
519
520
uint32_t cpp;
521
522
uint64_t drm_format_mod;
523
bool tiled;
524
bool external;
525
526
struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
527
uint64_t size; /* Total size in bytes */
528
uint32_t cube_map_stride;
529
uint32_t alignment;
530
531
struct v3dv_device_memory *mem;
532
VkDeviceSize mem_offset;
533
};
534
535
VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
536
537
/* Pre-generating packets needs to consider changes in packet sizes across hw
538
* versions. Keep things simple and allocate enough space for any supported
539
* version. We ensure the size is large enough through static asserts.
540
*/
541
#define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
542
#define V3DV_SAMPLER_STATE_LENGTH 24
543
#define V3DV_BLEND_CFG_LENGTH 5
544
#define V3DV_CFG_BITS_LENGTH 4
545
#define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
546
#define V3DV_VCM_CACHE_SIZE_LENGTH 2
547
#define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
548
#define V3DV_STENCIL_CFG_LENGTH 6
549
550
struct v3dv_image_view {
551
struct vk_object_base base;
552
553
struct v3dv_image *image;
554
VkImageAspectFlags aspects;
555
VkExtent3D extent;
556
VkImageViewType type;
557
558
VkFormat vk_format;
559
const struct v3dv_format *format;
560
bool swap_rb;
561
uint32_t internal_bpp;
562
uint32_t internal_type;
563
564
uint32_t base_level;
565
uint32_t max_level;
566
uint32_t first_layer;
567
uint32_t last_layer;
568
uint32_t offset;
569
570
/* Precomputed (composed from createinfo->components and formar swizzle)
571
* swizzles to pass in to the shader key.
572
*
573
* This could be also included on the descriptor bo, but the shader state
574
* packet doesn't need it on a bo, so we can just avoid a memory copy
575
*/
576
uint8_t swizzle[4];
577
578
/* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
579
* during UpdateDescriptorSets.
580
*
581
* Empirical tests show that cube arrays need a different shader state
582
* depending on whether they are used with a sampler or not, so for these
583
* we generate two states and select the one to use based on the descriptor
584
* type.
585
*/
586
uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
587
};
588
589
uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
590
591
struct v3dv_buffer {
592
struct vk_object_base base;
593
594
VkDeviceSize size;
595
VkBufferUsageFlags usage;
596
uint32_t alignment;
597
598
struct v3dv_device_memory *mem;
599
VkDeviceSize mem_offset;
600
};
601
602
struct v3dv_buffer_view {
603
struct vk_object_base base;
604
605
struct v3dv_buffer *buffer;
606
607
VkFormat vk_format;
608
const struct v3dv_format *format;
609
uint32_t internal_bpp;
610
uint32_t internal_type;
611
612
uint32_t offset;
613
uint32_t size;
614
uint32_t num_elements;
615
616
/* Prepacked TEXTURE_SHADER_STATE. */
617
uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
618
};
619
620
struct v3dv_subpass_attachment {
621
uint32_t attachment;
622
VkImageLayout layout;
623
};
624
625
struct v3dv_subpass {
626
uint32_t input_count;
627
struct v3dv_subpass_attachment *input_attachments;
628
629
uint32_t color_count;
630
struct v3dv_subpass_attachment *color_attachments;
631
struct v3dv_subpass_attachment *resolve_attachments;
632
633
struct v3dv_subpass_attachment ds_attachment;
634
635
/* If we need to emit the clear of the depth/stencil attachment using a
636
* a draw call instead of using the TLB (GFXH-1461).
637
*/
638
bool do_depth_clear_with_draw;
639
bool do_stencil_clear_with_draw;
640
};
641
642
struct v3dv_render_pass_attachment {
643
VkAttachmentDescription desc;
644
uint32_t first_subpass;
645
uint32_t last_subpass;
646
647
/* If this is a multismapled attachment that is going to be resolved,
648
* whether we can use the TLB resolve on store.
649
*/
650
bool use_tlb_resolve;
651
};
652
653
struct v3dv_render_pass {
654
struct vk_object_base base;
655
656
uint32_t attachment_count;
657
struct v3dv_render_pass_attachment *attachments;
658
659
uint32_t subpass_count;
660
struct v3dv_subpass *subpasses;
661
662
struct v3dv_subpass_attachment *subpass_attachments;
663
};
664
665
struct v3dv_framebuffer {
666
struct vk_object_base base;
667
668
uint32_t width;
669
uint32_t height;
670
uint32_t layers;
671
672
/* Typically, edge tiles in the framebuffer have padding depending on the
673
* underlying tiling layout. One consequnce of this is that when the
674
* framebuffer dimensions are not aligned to tile boundaries, tile stores
675
* would still write full tiles on the edges and write to the padded area.
676
* If the framebuffer is aliasing a smaller region of a larger image, then
677
* we need to be careful with this though, as we won't have padding on the
678
* edge tiles (which typically means that we need to load the tile buffer
679
* before we store).
680
*/
681
bool has_edge_padding;
682
683
uint32_t attachment_count;
684
uint32_t color_attachment_count;
685
struct v3dv_image_view *attachments[0];
686
};
687
688
struct v3dv_frame_tiling {
689
uint32_t width;
690
uint32_t height;
691
uint32_t layers;
692
uint32_t render_target_count;
693
uint32_t internal_bpp;
694
bool msaa;
695
uint32_t tile_width;
696
uint32_t tile_height;
697
uint32_t draw_tiles_x;
698
uint32_t draw_tiles_y;
699
uint32_t supertile_width;
700
uint32_t supertile_height;
701
uint32_t frame_width_in_supertiles;
702
uint32_t frame_height_in_supertiles;
703
};
704
705
void v3dv_framebuffer_compute_internal_bpp_msaa(const struct v3dv_framebuffer *framebuffer,
706
const struct v3dv_subpass *subpass,
707
uint8_t *max_bpp, bool *msaa);
708
709
bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
710
const VkRect2D *area,
711
struct v3dv_framebuffer *fb,
712
struct v3dv_render_pass *pass,
713
uint32_t subpass_idx);
714
715
struct v3dv_cmd_pool {
716
struct vk_object_base base;
717
718
VkAllocationCallbacks alloc;
719
struct list_head cmd_buffers;
720
};
721
722
enum v3dv_cmd_buffer_status {
723
V3DV_CMD_BUFFER_STATUS_NEW = 0,
724
V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1,
725
V3DV_CMD_BUFFER_STATUS_RECORDING = 2,
726
V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3
727
};
728
729
union v3dv_clear_value {
730
uint32_t color[4];
731
struct {
732
float z;
733
uint8_t s;
734
};
735
};
736
737
struct v3dv_cmd_buffer_attachment_state {
738
/* The original clear value as provided by the Vulkan API */
739
VkClearValue vk_clear_value;
740
741
/* The hardware clear value */
742
union v3dv_clear_value clear_value;
743
};
744
745
struct v3dv_viewport_state {
746
uint32_t count;
747
VkViewport viewports[MAX_VIEWPORTS];
748
float translate[MAX_VIEWPORTS][3];
749
float scale[MAX_VIEWPORTS][3];
750
};
751
752
struct v3dv_scissor_state {
753
uint32_t count;
754
VkRect2D scissors[MAX_SCISSORS];
755
};
756
757
/* Mostly a v3dv mapping of VkDynamicState, used to track which data as
758
* defined as dynamic
759
*/
760
enum v3dv_dynamic_state_bits {
761
V3DV_DYNAMIC_VIEWPORT = 1 << 0,
762
V3DV_DYNAMIC_SCISSOR = 1 << 1,
763
V3DV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 2,
764
V3DV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 3,
765
V3DV_DYNAMIC_STENCIL_REFERENCE = 1 << 4,
766
V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5,
767
V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,
768
V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,
769
V3DV_DYNAMIC_ALL = (1 << 8) - 1,
770
};
771
772
/* Flags for dirty pipeline state.
773
*/
774
enum v3dv_cmd_dirty_bits {
775
V3DV_CMD_DIRTY_VIEWPORT = 1 << 0,
776
V3DV_CMD_DIRTY_SCISSOR = 1 << 1,
777
V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 2,
778
V3DV_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 3,
779
V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4,
780
V3DV_CMD_DIRTY_PIPELINE = 1 << 5,
781
V3DV_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 6,
782
V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 7,
783
V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 8,
784
V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 9,
785
V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 10,
786
V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 11,
787
V3DV_CMD_DIRTY_BLEND_CONSTANTS = 1 << 12,
788
V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 13,
789
V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 14,
790
V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 15,
791
};
792
793
struct v3dv_dynamic_state {
794
/**
795
* Bitmask of (1 << VK_DYNAMIC_STATE_*).
796
* Defines the set of saved dynamic state.
797
*/
798
uint32_t mask;
799
800
struct v3dv_viewport_state viewport;
801
802
struct v3dv_scissor_state scissor;
803
804
struct {
805
uint32_t front;
806
uint32_t back;
807
} stencil_compare_mask;
808
809
struct {
810
uint32_t front;
811
uint32_t back;
812
} stencil_write_mask;
813
814
struct {
815
uint32_t front;
816
uint32_t back;
817
} stencil_reference;
818
819
float blend_constants[4];
820
821
struct {
822
float constant_factor;
823
float depth_bias_clamp;
824
float slope_factor;
825
} depth_bias;
826
827
float line_width;
828
};
829
830
extern const struct v3dv_dynamic_state default_dynamic_state;
831
832
void v3dv_viewport_compute_xform(const VkViewport *viewport,
833
float scale[3],
834
float translate[3]);
835
836
enum v3dv_ez_state {
837
V3D_EZ_UNDECIDED = 0,
838
V3D_EZ_GT_GE,
839
V3D_EZ_LT_LE,
840
V3D_EZ_DISABLED,
841
};
842
843
enum v3dv_job_type {
844
V3DV_JOB_TYPE_GPU_CL = 0,
845
V3DV_JOB_TYPE_GPU_CL_SECONDARY,
846
V3DV_JOB_TYPE_GPU_TFU,
847
V3DV_JOB_TYPE_GPU_CSD,
848
V3DV_JOB_TYPE_CPU_RESET_QUERIES,
849
V3DV_JOB_TYPE_CPU_END_QUERY,
850
V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
851
V3DV_JOB_TYPE_CPU_SET_EVENT,
852
V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
853
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
854
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
855
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
856
};
857
858
struct v3dv_reset_query_cpu_job_info {
859
struct v3dv_query_pool *pool;
860
uint32_t first;
861
uint32_t count;
862
};
863
864
struct v3dv_end_query_cpu_job_info {
865
struct v3dv_query_pool *pool;
866
uint32_t query;
867
};
868
869
struct v3dv_copy_query_results_cpu_job_info {
870
struct v3dv_query_pool *pool;
871
uint32_t first;
872
uint32_t count;
873
struct v3dv_buffer *dst;
874
uint32_t offset;
875
uint32_t stride;
876
VkQueryResultFlags flags;
877
};
878
879
struct v3dv_event_set_cpu_job_info {
880
struct v3dv_event *event;
881
int state;
882
};
883
884
struct v3dv_event_wait_cpu_job_info {
885
/* List of events to wait on */
886
uint32_t event_count;
887
struct v3dv_event **events;
888
889
/* Whether any postponed jobs after the wait should wait on semaphores */
890
bool sem_wait;
891
};
892
893
struct v3dv_copy_buffer_to_image_cpu_job_info {
894
struct v3dv_image *image;
895
struct v3dv_buffer *buffer;
896
uint32_t buffer_offset;
897
uint32_t buffer_stride;
898
uint32_t buffer_layer_stride;
899
VkOffset3D image_offset;
900
VkExtent3D image_extent;
901
uint32_t mip_level;
902
uint32_t base_layer;
903
uint32_t layer_count;
904
};
905
906
struct v3dv_csd_indirect_cpu_job_info {
907
struct v3dv_buffer *buffer;
908
uint32_t offset;
909
struct v3dv_job *csd_job;
910
uint32_t wg_size;
911
uint32_t *wg_uniform_offsets[3];
912
bool needs_wg_uniform_rewrite;
913
};
914
915
struct v3dv_timestamp_query_cpu_job_info {
916
struct v3dv_query_pool *pool;
917
uint32_t query;
918
};
919
920
struct v3dv_job {
921
struct list_head list_link;
922
923
/* We only create job clones when executing secondary command buffers into
924
* primaries. These clones don't make deep copies of the original object
925
* so we want to flag them to avoid freeing resources they don't own.
926
*/
927
bool is_clone;
928
929
enum v3dv_job_type type;
930
931
struct v3dv_device *device;
932
933
struct v3dv_cmd_buffer *cmd_buffer;
934
935
struct v3dv_cl bcl;
936
struct v3dv_cl rcl;
937
struct v3dv_cl indirect;
938
939
/* Set of all BOs referenced by the job. This will be used for making
940
* the list of BOs that the kernel will need to have paged in to
941
* execute our job.
942
*/
943
struct set *bos;
944
uint32_t bo_count;
945
uint64_t bo_handle_mask;
946
947
struct v3dv_bo *tile_alloc;
948
struct v3dv_bo *tile_state;
949
950
bool tmu_dirty_rcl;
951
952
uint32_t first_subpass;
953
954
/* When the current subpass is split into multiple jobs, this flag is set
955
* to true for any jobs after the first in the same subpass.
956
*/
957
bool is_subpass_continue;
958
959
/* If this job is the last job emitted for a subpass. */
960
bool is_subpass_finish;
961
962
struct v3dv_frame_tiling frame_tiling;
963
964
enum v3dv_ez_state ez_state;
965
enum v3dv_ez_state first_ez_state;
966
967
/* If we have already decided if we need to disable Early Z/S completely
968
* for this job.
969
*/
970
bool decided_global_ez_enable;
971
972
/* If this job has been configured to use early Z/S clear */
973
bool early_zs_clear;
974
975
/* Number of draw calls recorded into the job */
976
uint32_t draw_count;
977
978
/* A flag indicating whether we want to flush every draw separately. This
979
* can be used for debugging, or for cases where special circumstances
980
* require this behavior.
981
*/
982
bool always_flush;
983
984
/* Whether we need to serialize this job in our command stream */
985
bool serialize;
986
987
/* If this is a CL job, whether we should sync before binning */
988
bool needs_bcl_sync;
989
990
/* Job specs for CPU jobs */
991
union {
992
struct v3dv_reset_query_cpu_job_info query_reset;
993
struct v3dv_end_query_cpu_job_info query_end;
994
struct v3dv_copy_query_results_cpu_job_info query_copy_results;
995
struct v3dv_event_set_cpu_job_info event_set;
996
struct v3dv_event_wait_cpu_job_info event_wait;
997
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
998
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
999
struct v3dv_timestamp_query_cpu_job_info query_timestamp;
1000
} cpu;
1001
1002
/* Job specs for TFU jobs */
1003
struct drm_v3d_submit_tfu tfu;
1004
1005
/* Job specs for CSD jobs */
1006
struct {
1007
struct v3dv_bo *shared_memory;
1008
uint32_t wg_count[3];
1009
uint32_t wg_base[3];
1010
struct drm_v3d_submit_csd submit;
1011
} csd;
1012
};
1013
1014
void v3dv_job_init(struct v3dv_job *job,
1015
enum v3dv_job_type type,
1016
struct v3dv_device *device,
1017
struct v3dv_cmd_buffer *cmd_buffer,
1018
int32_t subpass_idx);
1019
void v3dv_job_destroy(struct v3dv_job *job);
1020
1021
void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1022
void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1023
1024
void v3dv_job_start_frame(struct v3dv_job *job,
1025
uint32_t width,
1026
uint32_t height,
1027
uint32_t layers,
1028
uint32_t render_target_count,
1029
uint8_t max_internal_bpp,
1030
bool msaa);
1031
1032
struct v3dv_job *
1033
v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1034
struct v3dv_cmd_buffer *cmd_buffer);
1035
1036
struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1037
enum v3dv_job_type type,
1038
struct v3dv_cmd_buffer *cmd_buffer,
1039
uint32_t subpass_idx);
1040
1041
void
1042
v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1043
uint32_t slot_size,
1044
uint32_t used_count,
1045
uint32_t *alloc_count,
1046
void **ptr);
1047
1048
void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer);
1049
1050
/* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1051
* cmd_buffer specific header?
1052
*/
1053
struct v3dv_draw_info {
1054
uint32_t vertex_count;
1055
uint32_t instance_count;
1056
uint32_t first_vertex;
1057
uint32_t first_instance;
1058
};
1059
1060
struct v3dv_vertex_binding {
1061
struct v3dv_buffer *buffer;
1062
VkDeviceSize offset;
1063
};
1064
1065
struct v3dv_descriptor_state {
1066
struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1067
uint32_t valid;
1068
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1069
};
1070
1071
struct v3dv_cmd_pipeline_state {
1072
struct v3dv_pipeline *pipeline;
1073
1074
struct v3dv_descriptor_state descriptor_state;
1075
};
1076
1077
struct v3dv_cmd_buffer_state {
1078
struct v3dv_render_pass *pass;
1079
struct v3dv_framebuffer *framebuffer;
1080
VkRect2D render_area;
1081
1082
/* Current job being recorded */
1083
struct v3dv_job *job;
1084
1085
uint32_t subpass_idx;
1086
1087
struct v3dv_cmd_pipeline_state gfx;
1088
struct v3dv_cmd_pipeline_state compute;
1089
1090
struct v3dv_dynamic_state dynamic;
1091
1092
uint32_t dirty;
1093
VkShaderStageFlagBits dirty_descriptor_stages;
1094
VkShaderStageFlagBits dirty_push_constants_stages;
1095
1096
/* Current clip window. We use this to check whether we have an active
1097
* scissor, since in that case we can't use TLB clears and need to fallback
1098
* to drawing rects.
1099
*/
1100
VkRect2D clip_window;
1101
1102
/* Whether our render area is aligned to tile boundaries. If this is false
1103
* then we have tiles that are only partially covered by the render area,
1104
* and therefore, we need to be careful with our loads and stores so we don't
1105
* modify pixels for the tile area that is not covered by the render area.
1106
* This means, for example, that we can't use the TLB to clear, since that
1107
* always clears full tiles.
1108
*/
1109
bool tile_aligned_render_area;
1110
1111
uint32_t attachment_alloc_count;
1112
struct v3dv_cmd_buffer_attachment_state *attachments;
1113
1114
struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1115
1116
struct {
1117
VkBuffer buffer;
1118
VkDeviceSize offset;
1119
uint8_t index_size;
1120
} index_buffer;
1121
1122
/* Current uniforms */
1123
struct {
1124
struct v3dv_cl_reloc vs_bin;
1125
struct v3dv_cl_reloc vs;
1126
struct v3dv_cl_reloc gs_bin;
1127
struct v3dv_cl_reloc gs;
1128
struct v3dv_cl_reloc fs;
1129
} uniforms;
1130
1131
/* Used to flag OOM conditions during command buffer recording */
1132
bool oom;
1133
1134
/* Whether we have recorded a pipeline barrier that we still need to
1135
* process.
1136
*/
1137
bool has_barrier;
1138
bool has_bcl_barrier;
1139
1140
/* Secondary command buffer state */
1141
struct {
1142
bool occlusion_query_enable;
1143
} inheritance;
1144
1145
/* Command buffer state saved during a meta operation */
1146
struct {
1147
uint32_t subpass_idx;
1148
VkRenderPass pass;
1149
VkFramebuffer framebuffer;
1150
1151
uint32_t attachment_alloc_count;
1152
uint32_t attachment_count;
1153
struct v3dv_cmd_buffer_attachment_state *attachments;
1154
1155
bool tile_aligned_render_area;
1156
VkRect2D render_area;
1157
1158
struct v3dv_dynamic_state dynamic;
1159
1160
struct v3dv_cmd_pipeline_state gfx;
1161
bool has_descriptor_state;
1162
1163
uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1164
} meta;
1165
1166
/* Command buffer state for queries */
1167
struct {
1168
/* A list of vkCmdQueryEnd commands recorded in the command buffer during
1169
* a render pass. We queue these here and then schedule the corresponding
1170
* CPU jobs for them at the time we finish the GPU job in which they have
1171
* been recorded.
1172
*/
1173
struct {
1174
uint32_t used_count;
1175
uint32_t alloc_count;
1176
struct v3dv_end_query_cpu_job_info *states;
1177
} end;
1178
1179
/* This BO is not NULL if we have an active query, that is, we have
1180
* called vkCmdBeginQuery but not vkCmdEndQuery.
1181
*/
1182
struct {
1183
struct v3dv_bo *bo;
1184
uint32_t offset;
1185
} active_query;
1186
} query;
1187
};
1188
1189
/* The following struct represents the info from a descriptor that we store on
1190
* the host memory. They are mostly links to other existing vulkan objects,
1191
* like the image_view in order to access to swizzle info, or the buffer used
1192
* for a UBO/SSBO, for example.
1193
*
1194
* FIXME: revisit if makes sense to just move everything that would be needed
1195
* from a descriptor to the bo.
1196
*/
1197
struct v3dv_descriptor {
1198
VkDescriptorType type;
1199
1200
union {
1201
struct {
1202
struct v3dv_image_view *image_view;
1203
struct v3dv_sampler *sampler;
1204
};
1205
1206
struct {
1207
struct v3dv_buffer *buffer;
1208
uint32_t offset;
1209
uint32_t range;
1210
};
1211
1212
struct v3dv_buffer_view *buffer_view;
1213
};
1214
};
1215
1216
struct v3dv_query {
1217
bool maybe_available;
1218
union {
1219
/* Used by GPU queries (occlusion) */
1220
struct {
1221
struct v3dv_bo *bo;
1222
uint32_t offset;
1223
};
1224
/* Used by CPU queries (timestamp) */
1225
uint64_t value;
1226
};
1227
};
1228
1229
struct v3dv_query_pool {
1230
struct vk_object_base base;
1231
1232
struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
1233
1234
VkQueryType query_type;
1235
uint32_t query_count;
1236
struct v3dv_query *queries;
1237
};
1238
1239
VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1240
struct v3dv_query_pool *pool,
1241
uint32_t first,
1242
uint32_t count,
1243
void *data,
1244
VkDeviceSize stride,
1245
VkQueryResultFlags flags);
1246
1247
typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1248
uint64_t pobj,
1249
VkAllocationCallbacks *alloc);
1250
struct v3dv_cmd_buffer_private_obj {
1251
struct list_head list_link;
1252
uint64_t obj;
1253
v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1254
};
1255
1256
struct v3dv_cmd_buffer {
1257
struct vk_object_base base;
1258
1259
struct v3dv_device *device;
1260
1261
struct v3dv_cmd_pool *pool;
1262
struct list_head pool_link;
1263
1264
/* Used at submit time to link command buffers in the submission that have
1265
* spawned wait threads, so we can then wait on all of them to complete
1266
* before we process any signal sempahores or fences.
1267
*/
1268
struct list_head list_link;
1269
1270
VkCommandBufferUsageFlags usage_flags;
1271
VkCommandBufferLevel level;
1272
1273
enum v3dv_cmd_buffer_status status;
1274
1275
struct v3dv_cmd_buffer_state state;
1276
1277
/* FIXME: we have just one client-side and bo for the push constants,
1278
* independently of the stageFlags in vkCmdPushConstants, and the
1279
* pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1280
* tunning in the future if it makes sense.
1281
*/
1282
uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1283
struct v3dv_cl_reloc push_constants_resource;
1284
1285
/* Collection of Vulkan objects created internally by the driver (typically
1286
* during recording of meta operations) that are part of the command buffer
1287
* and should be destroyed with it.
1288
*/
1289
struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1290
1291
/* Per-command buffer resources for meta operations. */
1292
struct {
1293
struct {
1294
/* The current descriptor pool for blit sources */
1295
VkDescriptorPool dspool;
1296
} blit;
1297
struct {
1298
/* The current descriptor pool for texel buffer copy sources */
1299
VkDescriptorPool dspool;
1300
} texel_buffer_copy;
1301
} meta;
1302
1303
/* List of jobs in the command buffer. For primary command buffers it
1304
* represents the jobs we want to submit to the GPU. For secondary command
1305
* buffers it represents jobs that will be merged into a primary command
1306
* buffer via vkCmdExecuteCommands.
1307
*/
1308
struct list_head jobs;
1309
};
1310
1311
struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1312
int32_t subpass_idx,
1313
enum v3dv_job_type type);
1314
void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1315
1316
struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1317
uint32_t subpass_idx);
1318
struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1319
uint32_t subpass_idx);
1320
1321
void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1322
1323
void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1324
bool push_descriptor_state);
1325
void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1326
uint32_t dirty_dynamic_state,
1327
bool needs_subpass_resume);
1328
1329
void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
1330
struct v3dv_query_pool *pool,
1331
uint32_t first,
1332
uint32_t count);
1333
1334
void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1335
struct v3dv_query_pool *pool,
1336
uint32_t query,
1337
VkQueryControlFlags flags);
1338
1339
void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1340
struct v3dv_query_pool *pool,
1341
uint32_t query);
1342
1343
void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1344
struct v3dv_query_pool *pool,
1345
uint32_t first,
1346
uint32_t count,
1347
struct v3dv_buffer *dst,
1348
uint32_t offset,
1349
uint32_t stride,
1350
VkQueryResultFlags flags);
1351
1352
void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1353
struct drm_v3d_submit_tfu *tfu);
1354
1355
void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
1356
const uint32_t *wg_counts);
1357
1358
void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1359
uint64_t obj,
1360
v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1361
1362
struct v3dv_semaphore {
1363
struct vk_object_base base;
1364
1365
/* A syncobject handle associated with this semaphore */
1366
uint32_t sync;
1367
1368
/* A temporary syncobject handle produced from a vkImportSemaphoreFd. */
1369
uint32_t temp_sync;
1370
};
1371
1372
struct v3dv_fence {
1373
struct vk_object_base base;
1374
1375
/* A syncobject handle associated with this fence */
1376
uint32_t sync;
1377
1378
/* A temporary syncobject handle produced from a vkImportFenceFd. */
1379
uint32_t temp_sync;
1380
};
1381
1382
struct v3dv_event {
1383
struct vk_object_base base;
1384
int state;
1385
};
1386
1387
struct v3dv_shader_variant {
1388
enum broadcom_shader_stage stage;
1389
1390
union {
1391
struct v3d_prog_data *base;
1392
struct v3d_vs_prog_data *vs;
1393
struct v3d_gs_prog_data *gs;
1394
struct v3d_fs_prog_data *fs;
1395
struct v3d_compute_prog_data *cs;
1396
} prog_data;
1397
1398
/* We explicitly save the prog_data_size as it would make easier to
1399
* serialize
1400
*/
1401
uint32_t prog_data_size;
1402
1403
/* The assembly for this variant will be uploaded to a BO shared with all
1404
* other shader stages in that pipeline. This is the offset in that BO.
1405
*/
1406
uint32_t assembly_offset;
1407
1408
/* Note: it is really likely that qpu_insts would be NULL, as it will be
1409
* used only temporarily, to upload it to the shared bo, as we compile the
1410
* different stages individually.
1411
*/
1412
uint64_t *qpu_insts;
1413
uint32_t qpu_insts_size;
1414
};
1415
1416
/*
1417
* Per-stage info for each stage, useful so shader_module_compile_to_nir and
1418
* other methods doesn't have so many parameters.
1419
*
1420
* FIXME: for the case of the coordinate shader and the vertex shader, module,
1421
* entrypoint, spec_info and nir are the same. There are also info only
1422
* relevant to some stages. But seemed too much a hassle to create a new
1423
* struct only to handle that. Revisit if such kind of info starts to grow.
1424
*/
1425
struct v3dv_pipeline_stage {
1426
struct v3dv_pipeline *pipeline;
1427
1428
enum broadcom_shader_stage stage;
1429
1430
const struct vk_shader_module *module;
1431
const char *entrypoint;
1432
const VkSpecializationInfo *spec_info;
1433
1434
nir_shader *nir;
1435
1436
/* The following is the combined hash of module+entrypoint+spec_info+nir */
1437
unsigned char shader_sha1[20];
1438
1439
/** A name for this program, so you can track it in shader-db output. */
1440
uint32_t program_id;
1441
};
1442
1443
/* We are using the descriptor pool entry for two things:
1444
* * Track the allocated sets, so we can properly free it if needed
1445
* * Track the suballocated pool bo regions, so if some descriptor set is
1446
* freed, the gap could be reallocated later.
1447
*
1448
* Those only make sense if the pool was not created with the flag
1449
* VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1450
*/
1451
struct v3dv_descriptor_pool_entry
1452
{
1453
struct v3dv_descriptor_set *set;
1454
/* Offset and size of the subregion allocated for this entry from the
1455
* pool->bo
1456
*/
1457
uint32_t offset;
1458
uint32_t size;
1459
};
1460
1461
struct v3dv_descriptor_pool {
1462
struct vk_object_base base;
1463
1464
/* If this descriptor pool has been allocated for the driver for internal
1465
* use, typically to implement meta operations.
1466
*/
1467
bool is_driver_internal;
1468
1469
struct v3dv_bo *bo;
1470
/* Current offset at the descriptor bo. 0 means that we didn't use it for
1471
* any descriptor. If the descriptor bo is NULL, current offset is
1472
* meaningless
1473
*/
1474
uint32_t current_offset;
1475
1476
/* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1477
* descriptor sets are handled as a whole as pool memory and handled by the
1478
* following pointers. If set, they are not used, and individually
1479
* descriptor sets are allocated/freed.
1480
*/
1481
uint8_t *host_memory_base;
1482
uint8_t *host_memory_ptr;
1483
uint8_t *host_memory_end;
1484
1485
uint32_t entry_count;
1486
uint32_t max_entry_count;
1487
struct v3dv_descriptor_pool_entry entries[0];
1488
};
1489
1490
struct v3dv_descriptor_set {
1491
struct vk_object_base base;
1492
1493
struct v3dv_descriptor_pool *pool;
1494
1495
const struct v3dv_descriptor_set_layout *layout;
1496
1497
/* Offset relative to the descriptor pool bo for this set */
1498
uint32_t base_offset;
1499
1500
/* The descriptors below can be indexed (set/binding) using the set_layout
1501
*/
1502
struct v3dv_descriptor descriptors[0];
1503
};
1504
1505
struct v3dv_descriptor_set_binding_layout {
1506
VkDescriptorType type;
1507
1508
/* Number of array elements in this binding */
1509
uint32_t array_size;
1510
1511
/* Index into the flattend descriptor set */
1512
uint32_t descriptor_index;
1513
1514
uint32_t dynamic_offset_count;
1515
uint32_t dynamic_offset_index;
1516
1517
/* Offset into the descriptor set where this descriptor lives (final offset
1518
* on the descriptor bo need to take into account set->base_offset)
1519
*/
1520
uint32_t descriptor_offset;
1521
1522
/* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
1523
* if there are no immutable samplers.
1524
*/
1525
uint32_t immutable_samplers_offset;
1526
};
1527
1528
struct v3dv_descriptor_set_layout {
1529
struct vk_object_base base;
1530
1531
VkDescriptorSetLayoutCreateFlags flags;
1532
1533
/* Number of bindings in this descriptor set */
1534
uint32_t binding_count;
1535
1536
/* Total bo size needed for this descriptor set
1537
*/
1538
uint32_t bo_size;
1539
1540
/* Shader stages affected by this descriptor set */
1541
uint16_t shader_stages;
1542
1543
/* Number of descriptors in this descriptor set */
1544
uint32_t descriptor_count;
1545
1546
/* Number of dynamic offsets used by this descriptor set */
1547
uint16_t dynamic_offset_count;
1548
1549
/* Bindings in this descriptor set */
1550
struct v3dv_descriptor_set_binding_layout binding[0];
1551
};
1552
1553
struct v3dv_pipeline_layout {
1554
struct vk_object_base base;
1555
1556
struct {
1557
struct v3dv_descriptor_set_layout *layout;
1558
uint32_t dynamic_offset_start;
1559
} set[MAX_SETS];
1560
1561
uint32_t num_sets;
1562
1563
/* Shader stages that are declared to use descriptors from this layout */
1564
uint32_t shader_stages;
1565
1566
uint32_t dynamic_offset_count;
1567
uint32_t push_constant_size;
1568
};
1569
1570
/*
1571
* We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
1572
* it to be big enough to include the max value for all of them.
1573
*
1574
* FIXME: one alternative would be to allocate the map as big as you need for
1575
* each descriptor type. That would means more individual allocations.
1576
*/
1577
#define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
1578
MAX_UNIFORM_BUFFERS, \
1579
MAX_STORAGE_BUFFERS)
1580
1581
1582
struct v3dv_descriptor_map {
1583
/* TODO: avoid fixed size array/justify the size */
1584
unsigned num_desc; /* Number of descriptors */
1585
int set[DESCRIPTOR_MAP_SIZE];
1586
int binding[DESCRIPTOR_MAP_SIZE];
1587
int array_index[DESCRIPTOR_MAP_SIZE];
1588
int array_size[DESCRIPTOR_MAP_SIZE];
1589
1590
/* NOTE: the following is only for sampler, but this is the easier place to
1591
* put it.
1592
*/
1593
uint8_t return_size[DESCRIPTOR_MAP_SIZE];
1594
};
1595
1596
struct v3dv_sampler {
1597
struct vk_object_base base;
1598
1599
bool compare_enable;
1600
bool unnormalized_coordinates;
1601
bool clamp_to_transparent_black_border;
1602
1603
/* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
1604
* configuration. If needed it will be copied to the descriptor info during
1605
* UpdateDescriptorSets
1606
*/
1607
uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
1608
};
1609
1610
struct v3dv_descriptor_template_entry {
1611
/* The type of descriptor in this entry */
1612
VkDescriptorType type;
1613
1614
/* Binding in the descriptor set */
1615
uint32_t binding;
1616
1617
/* Offset at which to write into the descriptor set binding */
1618
uint32_t array_element;
1619
1620
/* Number of elements to write into the descriptor set binding */
1621
uint32_t array_count;
1622
1623
/* Offset into the user provided data */
1624
size_t offset;
1625
1626
/* Stride between elements into the user provided data */
1627
size_t stride;
1628
};
1629
1630
struct v3dv_descriptor_update_template {
1631
struct vk_object_base base;
1632
1633
VkPipelineBindPoint bind_point;
1634
1635
/* The descriptor set this template corresponds to. This value is only
1636
* valid if the template was created with the templateType
1637
* VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
1638
*/
1639
uint8_t set;
1640
1641
/* Number of entries in this template */
1642
uint32_t entry_count;
1643
1644
/* Entries of the template */
1645
struct v3dv_descriptor_template_entry entries[0];
1646
};
1647
1648
1649
/* We keep two special values for the sampler idx that represents exactly when a
1650
* sampler is not needed/provided. The main use is that even if we don't have
1651
* sampler, we still need to do the output unpacking (through
1652
* nir_lower_tex). The easier way to do this is to add those special "no
1653
* sampler" in the sampler_map, and then use the proper unpacking for that
1654
* case.
1655
*
1656
* We have one when we want a 16bit output size, and other when we want a
1657
* 32bit output size. We use the info coming from the RelaxedPrecision
1658
* decoration to decide between one and the other.
1659
*/
1660
#define V3DV_NO_SAMPLER_16BIT_IDX 0
1661
#define V3DV_NO_SAMPLER_32BIT_IDX 1
1662
1663
/*
1664
* Following two methods are using on the combined to/from texture/sampler
1665
* indices maps at v3dv_pipeline.
1666
*/
1667
static inline uint32_t
1668
v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
1669
uint32_t sampler_index)
1670
{
1671
return texture_index << 24 | sampler_index;
1672
}
1673
1674
static inline void
1675
v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
1676
uint32_t *texture_index,
1677
uint32_t *sampler_index)
1678
{
1679
uint32_t texture = combined_index_key >> 24;
1680
uint32_t sampler = combined_index_key & 0xffffff;
1681
1682
if (texture_index)
1683
*texture_index = texture;
1684
1685
if (sampler_index)
1686
*sampler_index = sampler;
1687
}
1688
1689
struct v3dv_descriptor_maps {
1690
struct v3dv_descriptor_map ubo_map;
1691
struct v3dv_descriptor_map ssbo_map;
1692
struct v3dv_descriptor_map sampler_map;
1693
struct v3dv_descriptor_map texture_map;
1694
};
1695
1696
/* The structure represents data shared between different objects, like the
1697
* pipeline and the pipeline cache, so we ref count it to know when it should
1698
* be freed.
1699
*/
1700
struct v3dv_pipeline_shared_data {
1701
uint32_t ref_cnt;
1702
1703
unsigned char sha1_key[20];
1704
1705
struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
1706
struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
1707
1708
struct v3dv_bo *assembly_bo;
1709
};
1710
1711
struct v3dv_pipeline {
1712
struct vk_object_base base;
1713
1714
struct v3dv_device *device;
1715
1716
VkShaderStageFlags active_stages;
1717
1718
struct v3dv_render_pass *pass;
1719
struct v3dv_subpass *subpass;
1720
1721
/* Note: We can't use just a MESA_SHADER_STAGES array because we also need
1722
* to track binning shaders. Note these will be freed once the pipeline
1723
* has been compiled.
1724
*/
1725
struct v3dv_pipeline_stage *vs;
1726
struct v3dv_pipeline_stage *vs_bin;
1727
struct v3dv_pipeline_stage *gs;
1728
struct v3dv_pipeline_stage *gs_bin;
1729
struct v3dv_pipeline_stage *fs;
1730
struct v3dv_pipeline_stage *cs;
1731
1732
/* Flags for whether optional pipeline stages are present, for convenience */
1733
bool has_gs;
1734
1735
/* Spilling memory requirements */
1736
struct {
1737
struct v3dv_bo *bo;
1738
uint32_t size_per_thread;
1739
} spill;
1740
1741
struct v3dv_dynamic_state dynamic_state;
1742
1743
struct v3dv_pipeline_layout *layout;
1744
1745
/* Whether this pipeline enables depth writes */
1746
bool z_updates_enable;
1747
1748
enum v3dv_ez_state ez_state;
1749
1750
bool msaa;
1751
bool sample_rate_shading;
1752
uint32_t sample_mask;
1753
1754
bool primitive_restart;
1755
1756
/* Accessed by binding. So vb[binding]->stride is the stride of the vertex
1757
* array with such binding
1758
*/
1759
struct v3dv_pipeline_vertex_binding {
1760
uint32_t stride;
1761
uint32_t instance_divisor;
1762
} vb[MAX_VBS];
1763
uint32_t vb_count;
1764
1765
/* Note that a lot of info from VkVertexInputAttributeDescription is
1766
* already prepacked, so here we are only storing those that need recheck
1767
* later. The array must be indexed by driver location, since that is the
1768
* order in which we need to emit the attributes.
1769
*/
1770
struct v3dv_pipeline_vertex_attrib {
1771
uint32_t binding;
1772
uint32_t offset;
1773
VkFormat vk_format;
1774
} va[MAX_VERTEX_ATTRIBS];
1775
uint32_t va_count;
1776
1777
enum pipe_prim_type topology;
1778
1779
struct v3dv_pipeline_shared_data *shared_data;
1780
1781
/* In general we can reuse v3dv_device->default_attribute_float, so note
1782
* that the following can be NULL.
1783
*
1784
* FIXME: the content of this BO will be small, so it could be improved to
1785
* be uploaded to a common BO. But as in most cases it will be NULL, it is
1786
* not a priority.
1787
*/
1788
struct v3dv_bo *default_attribute_values;
1789
1790
struct vpm_config vpm_cfg;
1791
struct vpm_config vpm_cfg_bin;
1792
1793
/* If the pipeline should emit any of the stencil configuration packets */
1794
bool emit_stencil_cfg[2];
1795
1796
/* Blend state */
1797
struct {
1798
/* Per-RT bit mask with blend enables */
1799
uint8_t enables;
1800
/* Per-RT prepacked blend config packets */
1801
uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
1802
/* Flag indicating whether the blend factors in use require
1803
* color constants.
1804
*/
1805
bool needs_color_constants;
1806
/* Mask with enabled color channels for each RT (4 bits per RT) */
1807
uint32_t color_write_masks;
1808
} blend;
1809
1810
/* Depth bias */
1811
struct {
1812
bool enabled;
1813
bool is_z16;
1814
} depth_bias;
1815
1816
/* Packets prepacked during pipeline creation
1817
*/
1818
uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
1819
uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
1820
uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
1821
uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
1822
MAX_VERTEX_ATTRIBS];
1823
uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
1824
};
1825
1826
static inline VkPipelineBindPoint
1827
v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
1828
{
1829
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
1830
!(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
1831
return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
1832
VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1833
}
1834
1835
static inline struct v3dv_descriptor_state*
1836
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
1837
struct v3dv_pipeline *pipeline)
1838
{
1839
if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
1840
return &cmd_buffer->state.compute.descriptor_state;
1841
else
1842
return &cmd_buffer->state.gfx.descriptor_state;
1843
}
1844
1845
const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
1846
1847
uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
1848
uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
1849
1850
VkResult __vk_errorf(struct v3dv_instance *instance, VkResult error,
1851
const char *file, int line,
1852
const char *format, ...);
1853
1854
#define vk_error(instance, error) __vk_errorf(instance, error, __FILE__, __LINE__, NULL);
1855
#define vk_errorf(instance, error, format, ...) __vk_errorf(instance, error, __FILE__, __LINE__, format, ## __VA_ARGS__);
1856
1857
#ifdef DEBUG
1858
#define v3dv_debug_ignored_stype(sType) \
1859
fprintf(stderr, "%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
1860
#else
1861
#define v3dv_debug_ignored_stype(sType)
1862
#endif
1863
1864
const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
1865
uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
1866
const struct v3dv_format *
1867
v3dv_get_compatible_tfu_format(struct v3dv_device *device,
1868
uint32_t bpp, VkFormat *out_vk_format);
1869
bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
1870
VkFormat vk_format,
1871
VkFormatFeatureFlags features);
1872
1873
struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
1874
struct v3dv_pipeline *pipeline,
1875
struct v3dv_shader_variant *variant);
1876
1877
struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
1878
struct v3dv_pipeline *pipeline,
1879
struct v3dv_shader_variant *variant,
1880
uint32_t **wg_count_offsets);
1881
1882
struct v3dv_shader_variant *
1883
v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
1884
struct v3dv_pipeline_cache *cache,
1885
struct v3d_key *key,
1886
size_t key_size,
1887
const VkAllocationCallbacks *pAllocator,
1888
VkResult *out_vk_result);
1889
1890
struct v3dv_shader_variant *
1891
v3dv_shader_variant_create(struct v3dv_device *device,
1892
enum broadcom_shader_stage stage,
1893
struct v3d_prog_data *prog_data,
1894
uint32_t prog_data_size,
1895
uint32_t assembly_offset,
1896
uint64_t *qpu_insts,
1897
uint32_t qpu_insts_size,
1898
VkResult *out_vk_result);
1899
1900
void
1901
v3dv_shader_variant_destroy(struct v3dv_device *device,
1902
struct v3dv_shader_variant *variant);
1903
1904
static inline void
1905
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
1906
{
1907
assert(shared_data && shared_data->ref_cnt >= 1);
1908
p_atomic_inc(&shared_data->ref_cnt);
1909
}
1910
1911
void
1912
v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
1913
struct v3dv_pipeline_shared_data *shared_data);
1914
1915
static inline void
1916
v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
1917
struct v3dv_pipeline_shared_data *shared_data)
1918
{
1919
assert(shared_data && shared_data->ref_cnt >= 1);
1920
if (p_atomic_dec_zero(&shared_data->ref_cnt))
1921
v3dv_pipeline_shared_data_destroy(device, shared_data);
1922
}
1923
1924
struct v3dv_descriptor *
1925
v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
1926
struct v3dv_descriptor_map *map,
1927
struct v3dv_pipeline_layout *pipeline_layout,
1928
uint32_t index,
1929
uint32_t *dynamic_offset);
1930
1931
const struct v3dv_sampler *
1932
v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
1933
struct v3dv_descriptor_map *map,
1934
struct v3dv_pipeline_layout *pipeline_layout,
1935
uint32_t index);
1936
1937
struct v3dv_cl_reloc
1938
v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
1939
struct v3dv_descriptor_state *descriptor_state,
1940
struct v3dv_descriptor_map *map,
1941
struct v3dv_pipeline_layout *pipeline_layout,
1942
uint32_t index);
1943
1944
struct v3dv_cl_reloc
1945
v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
1946
struct v3dv_descriptor_state *descriptor_state,
1947
struct v3dv_descriptor_map *map,
1948
struct v3dv_pipeline_layout *pipeline_layout,
1949
uint32_t index);
1950
1951
const struct v3dv_format*
1952
v3dv_descriptor_map_get_texture_format(struct v3dv_descriptor_state *descriptor_state,
1953
struct v3dv_descriptor_map *map,
1954
struct v3dv_pipeline_layout *pipeline_layout,
1955
uint32_t index,
1956
VkFormat *out_vk_format);
1957
1958
struct v3dv_bo*
1959
v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
1960
struct v3dv_descriptor_map *map,
1961
struct v3dv_pipeline_layout *pipeline_layout,
1962
uint32_t index);
1963
1964
static inline const struct v3dv_sampler *
1965
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
1966
const struct v3dv_descriptor_set_binding_layout *binding)
1967
{
1968
assert(binding->immutable_samplers_offset);
1969
return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
1970
}
1971
1972
void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
1973
struct v3dv_device *device,
1974
bool cache_enabled);
1975
1976
void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
1977
1978
void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
1979
struct v3dv_pipeline_cache *cache,
1980
nir_shader *nir,
1981
unsigned char sha1_key[20]);
1982
1983
nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
1984
struct v3dv_pipeline_cache *cache,
1985
const nir_shader_compiler_options *nir_options,
1986
unsigned char sha1_key[20]);
1987
1988
struct v3dv_pipeline_shared_data *
1989
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
1990
unsigned char sha1_key[20]);
1991
1992
void
1993
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
1994
struct v3dv_pipeline_cache *cache);
1995
1996
struct v3dv_bo *
1997
v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
1998
struct v3dv_pipeline *pipeline);
1999
2000
void v3dv_shader_module_internal_init(struct v3dv_device *device,
2001
struct vk_shader_module *module,
2002
nir_shader *nir);
2003
2004
#define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType) \
2005
\
2006
static inline struct __v3dv_type * \
2007
__v3dv_type ## _from_handle(__VkType _handle) \
2008
{ \
2009
return (struct __v3dv_type *) _handle; \
2010
} \
2011
\
2012
static inline __VkType \
2013
__v3dv_type ## _to_handle(struct __v3dv_type *_obj) \
2014
{ \
2015
return (__VkType) _obj; \
2016
}
2017
2018
#define V3DV_DEFINE_NONDISP_HANDLE_CASTS(__v3dv_type, __VkType) \
2019
\
2020
static inline struct __v3dv_type * \
2021
__v3dv_type ## _from_handle(__VkType _handle) \
2022
{ \
2023
return (struct __v3dv_type *)(uintptr_t) _handle; \
2024
} \
2025
\
2026
static inline __VkType \
2027
__v3dv_type ## _to_handle(struct __v3dv_type *_obj) \
2028
{ \
2029
return (__VkType)(uintptr_t) _obj; \
2030
}
2031
2032
#define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \
2033
struct __v3dv_type *__name = __v3dv_type ## _from_handle(__handle)
2034
2035
V3DV_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, VkCommandBuffer)
2036
V3DV_DEFINE_HANDLE_CASTS(v3dv_device, VkDevice)
2037
V3DV_DEFINE_HANDLE_CASTS(v3dv_instance, VkInstance)
2038
V3DV_DEFINE_HANDLE_CASTS(v3dv_physical_device, VkPhysicalDevice)
2039
V3DV_DEFINE_HANDLE_CASTS(v3dv_queue, VkQueue)
2040
2041
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, VkCommandPool)
2042
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, VkBuffer)
2043
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, VkBufferView)
2044
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, VkDeviceMemory)
2045
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, VkDescriptorPool)
2046
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, VkDescriptorSet)
2047
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, VkDescriptorSetLayout)
2048
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, VkDescriptorUpdateTemplate)
2049
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, VkEvent)
2050
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, VkFence)
2051
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, VkFramebuffer)
2052
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, VkImage)
2053
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, VkImageView)
2054
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, VkPipeline)
2055
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, VkPipelineCache)
2056
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, VkPipelineLayout)
2057
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, VkQueryPool)
2058
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, VkRenderPass)
2059
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, VkSampler)
2060
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, VkSemaphore)
2061
2062
/* This is defined as a macro so that it works for both
2063
* VkImageSubresourceRange and VkImageSubresourceLayers
2064
*/
2065
#define v3dv_layer_count(_image, _range) \
2066
((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
2067
(_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
2068
2069
#define v3dv_level_count(_image, _range) \
2070
((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \
2071
(_image)->levels - (_range)->baseMipLevel : (_range)->levelCount)
2072
2073
static inline int
2074
v3dv_ioctl(int fd, unsigned long request, void *arg)
2075
{
2076
if (using_v3d_simulator)
2077
return v3d_simulator_ioctl(fd, request, arg);
2078
else
2079
return drmIoctl(fd, request, arg);
2080
}
2081
2082
/* Flags OOM conditions in command buffer state.
2083
*
2084
* Note: notice that no-op jobs don't have a command buffer reference.
2085
*/
2086
static inline void
2087
v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2088
{
2089
if (cmd_buffer) {
2090
cmd_buffer->state.oom = true;
2091
} else {
2092
assert(job);
2093
if (job->cmd_buffer)
2094
job->cmd_buffer->state.oom = true;
2095
}
2096
}
2097
2098
#define v3dv_return_if_oom(_cmd_buffer, _job) do { \
2099
const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \
2100
if (__cmd_buffer && __cmd_buffer->state.oom) \
2101
return; \
2102
const struct v3dv_job *__job = _job; \
2103
if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \
2104
return; \
2105
} while(0) \
2106
2107
static inline uint32_t
2108
u64_hash(const void *key)
2109
{
2110
return _mesa_hash_data(key, sizeof(uint64_t));
2111
}
2112
2113
static inline bool
2114
u64_compare(const void *key1, const void *key2)
2115
{
2116
return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2117
}
2118
2119
/* Helper to call hw ver speficic functions */
2120
#define v3dv_X(device, thing) ({ \
2121
__typeof(&v3d42_##thing) v3d_X_thing; \
2122
switch (device->devinfo.ver) { \
2123
case 42: \
2124
v3d_X_thing = &v3d42_##thing; \
2125
break; \
2126
default: \
2127
unreachable("Unsupported hardware generation"); \
2128
} \
2129
v3d_X_thing; \
2130
})
2131
2132
2133
/* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2134
* define v3dX for each version supported, because when we compile code that
2135
* is not version-specific, all version-specific macros need to be already
2136
* defined.
2137
*/
2138
#ifdef v3dX
2139
# include "v3dvx_private.h"
2140
#else
2141
# define v3dX(x) v3d42_##x
2142
# include "v3dvx_private.h"
2143
# undef v3dX
2144
#endif
2145
2146
#endif /* V3DV_PRIVATE_H */
2147
2148