Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_pipeline_cache.c
4560 views
1
/*
2
* Copyright © 2019 Raspberry Pi
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "v3dv_private.h"
25
#include "vulkan/util/vk_util.h"
26
#include "util/blob.h"
27
#include "nir/nir_serialize.h"
28
29
static const bool debug_cache = false;
30
static const bool dump_stats = false;
31
static const bool dump_stats_on_destroy = false;
32
33
/* Shared for nir/variants */
34
#define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
35
36
static uint32_t
37
sha1_hash_func(const void *sha1)
38
{
39
return _mesa_hash_data(sha1, 20);
40
}
41
42
static bool
43
sha1_compare_func(const void *sha1_a, const void *sha1_b)
44
{
45
return memcmp(sha1_a, sha1_b, 20) == 0;
46
}
47
48
struct serialized_nir {
49
unsigned char sha1_key[20];
50
size_t size;
51
char data[0];
52
};
53
54
static void
55
cache_dump_stats(struct v3dv_pipeline_cache *cache)
56
{
57
fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count);
58
fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss);
59
fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit);
60
61
fprintf(stderr, " cache entries: %d\n", cache->stats.count);
62
fprintf(stderr, " cache miss count: %d\n", cache->stats.miss);
63
fprintf(stderr, " cache hit count: %d\n", cache->stats.hit);
64
}
65
66
void
67
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
68
struct v3dv_pipeline_cache *cache,
69
nir_shader *nir,
70
unsigned char sha1_key[20])
71
{
72
if (!cache || !cache->nir_cache)
73
return;
74
75
if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
76
return;
77
78
pthread_mutex_lock(&cache->mutex);
79
struct hash_entry *entry =
80
_mesa_hash_table_search(cache->nir_cache, sha1_key);
81
pthread_mutex_unlock(&cache->mutex);
82
if (entry)
83
return;
84
85
struct blob blob;
86
blob_init(&blob);
87
88
nir_serialize(&blob, nir, false);
89
if (blob.out_of_memory) {
90
blob_finish(&blob);
91
return;
92
}
93
94
pthread_mutex_lock(&cache->mutex);
95
/* Because ralloc isn't thread-safe, we have to do all this inside the
96
* lock. We could unlock for the big memcpy but it's probably not worth
97
* the hassle.
98
*/
99
entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
100
if (entry) {
101
blob_finish(&blob);
102
pthread_mutex_unlock(&cache->mutex);
103
return;
104
}
105
106
struct serialized_nir *snir =
107
ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
108
memcpy(snir->sha1_key, sha1_key, 20);
109
snir->size = blob.size;
110
memcpy(snir->data, blob.data, blob.size);
111
112
blob_finish(&blob);
113
114
cache->nir_stats.count++;
115
if (debug_cache) {
116
char sha1buf[41];
117
_mesa_sha1_format(sha1buf, snir->sha1_key);
118
fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
119
if (dump_stats)
120
cache_dump_stats(cache);
121
}
122
123
_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
124
125
pthread_mutex_unlock(&cache->mutex);
126
}
127
128
nir_shader*
129
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
130
struct v3dv_pipeline_cache *cache,
131
const nir_shader_compiler_options *nir_options,
132
unsigned char sha1_key[20])
133
{
134
if (!cache || !cache->nir_cache)
135
return NULL;
136
137
if (debug_cache) {
138
char sha1buf[41];
139
_mesa_sha1_format(sha1buf, sha1_key);
140
141
fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
142
}
143
144
const struct serialized_nir *snir = NULL;
145
146
pthread_mutex_lock(&cache->mutex);
147
struct hash_entry *entry =
148
_mesa_hash_table_search(cache->nir_cache, sha1_key);
149
if (entry)
150
snir = entry->data;
151
pthread_mutex_unlock(&cache->mutex);
152
153
if (snir) {
154
struct blob_reader blob;
155
blob_reader_init(&blob, snir->data, snir->size);
156
157
/* We use context NULL as we want the p_stage to keep the reference to
158
* nir, as we keep open the possibility of provide a shader variant
159
* after cache creation
160
*/
161
nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
162
if (blob.overrun) {
163
ralloc_free(nir);
164
} else {
165
cache->nir_stats.hit++;
166
if (debug_cache) {
167
fprintf(stderr, "\tnir cache hit: %p\n", nir);
168
if (dump_stats)
169
cache_dump_stats(cache);
170
}
171
return nir;
172
}
173
}
174
175
cache->nir_stats.miss++;
176
if (debug_cache) {
177
fprintf(stderr, "\tnir cache miss\n");
178
if (dump_stats)
179
cache_dump_stats(cache);
180
}
181
182
return NULL;
183
}
184
185
void
186
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
187
struct v3dv_device *device,
188
bool cache_enabled)
189
{
190
cache->device = device;
191
pthread_mutex_init(&cache->mutex, NULL);
192
193
if (cache_enabled) {
194
cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
195
sha1_compare_func);
196
cache->nir_stats.miss = 0;
197
cache->nir_stats.hit = 0;
198
cache->nir_stats.count = 0;
199
200
cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
201
sha1_compare_func);
202
cache->stats.miss = 0;
203
cache->stats.hit = 0;
204
cache->stats.count = 0;
205
} else {
206
cache->nir_cache = NULL;
207
cache->cache = NULL;
208
}
209
210
}
211
212
static struct v3dv_pipeline_shared_data *
213
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
214
struct blob_reader *blob);
215
216
static void
217
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
218
struct v3dv_pipeline_shared_data *shared_data,
219
bool from_disk_cache);
220
221
static bool
222
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
223
struct blob *blob);
224
225
/**
226
* It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with
227
* it, or NULL if doesn't have it cached. On the former, it will increases the
228
* ref_count, so caller is responsible to unref it.
229
*/
230
struct v3dv_pipeline_shared_data *
231
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
232
unsigned char sha1_key[20])
233
{
234
if (!cache || !cache->cache)
235
return NULL;
236
237
if (debug_cache) {
238
char sha1buf[41];
239
_mesa_sha1_format(sha1buf, sha1_key);
240
241
fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
242
}
243
244
pthread_mutex_lock(&cache->mutex);
245
246
struct hash_entry *entry =
247
_mesa_hash_table_search(cache->cache, sha1_key);
248
249
if (entry) {
250
struct v3dv_pipeline_shared_data *cache_entry =
251
(struct v3dv_pipeline_shared_data *) entry->data;
252
assert(cache_entry);
253
254
cache->stats.hit++;
255
if (debug_cache) {
256
fprintf(stderr, "\tcache hit: %p\n", cache_entry);
257
if (dump_stats)
258
cache_dump_stats(cache);
259
}
260
261
262
v3dv_pipeline_shared_data_ref(cache_entry);
263
264
pthread_mutex_unlock(&cache->mutex);
265
266
return cache_entry;
267
}
268
269
cache->stats.miss++;
270
if (debug_cache) {
271
fprintf(stderr, "\tcache miss\n");
272
if (dump_stats)
273
cache_dump_stats(cache);
274
}
275
276
pthread_mutex_unlock(&cache->mutex);
277
278
#ifdef ENABLE_SHADER_CACHE
279
struct v3dv_device *device = cache->device;
280
struct disk_cache *disk_cache = device->pdevice->disk_cache;
281
/* Note that the on-disk-cache can be independently disabled, while keeping
282
* the pipeline cache working, by using the environment variable
283
* MESA_GLSL_CACHE_DISABLE. In that case the calls to disk_cache_put/get
284
* will not do anything.
285
*/
286
if (disk_cache && device->instance->pipeline_cache_enabled) {
287
cache_key cache_key;
288
disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
289
290
size_t buffer_size;
291
uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
292
if (buffer) {
293
struct blob_reader blob;
294
struct v3dv_pipeline_shared_data *shared_data;
295
296
if (debug_cache)
297
fprintf(stderr, "\ton-disk-cache hit\n");
298
299
blob_reader_init(&blob, buffer, buffer_size);
300
shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
301
free(buffer);
302
303
if (shared_data) {
304
if (cache)
305
pipeline_cache_upload_shared_data(cache, shared_data, true);
306
return shared_data;
307
}
308
} else {
309
if (debug_cache)
310
fprintf(stderr, "\ton-disk-cache miss\n");
311
}
312
}
313
#endif
314
315
return NULL;
316
}
317
318
void
319
v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
320
struct v3dv_pipeline_shared_data *shared_data)
321
{
322
assert(shared_data->ref_cnt == 0);
323
324
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
325
if (shared_data->variants[stage] != NULL)
326
v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
327
328
/* We don't free binning descriptor maps as we are sharing them
329
* with the render shaders.
330
*/
331
if (shared_data->maps[stage] != NULL &&
332
!broadcom_shader_stage_is_binning(stage)) {
333
vk_free(&device->vk.alloc, shared_data->maps[stage]);
334
}
335
}
336
337
if (shared_data->assembly_bo)
338
v3dv_bo_free(device, shared_data->assembly_bo);
339
340
vk_free(&device->vk.alloc, shared_data);
341
}
342
343
static struct v3dv_pipeline_shared_data *
344
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
345
const unsigned char sha1_key[20],
346
struct v3dv_descriptor_maps **maps,
347
struct v3dv_shader_variant **variants,
348
const uint64_t *total_assembly,
349
const uint32_t total_assembly_size)
350
{
351
size_t size = sizeof(struct v3dv_pipeline_shared_data);
352
/* We create new_entry using the device alloc. Right now shared_data is ref
353
* and unref by both the pipeline and the pipeline cache, so we can't
354
* ensure that the cache or pipeline alloc will be available on the last
355
* unref.
356
*/
357
struct v3dv_pipeline_shared_data *new_entry =
358
vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
359
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
360
361
if (new_entry == NULL)
362
return NULL;
363
364
new_entry->ref_cnt = 1;
365
memcpy(new_entry->sha1_key, sha1_key, 20);
366
367
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
368
new_entry->maps[stage] = maps[stage];
369
new_entry->variants[stage] = variants[stage];
370
}
371
372
struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
373
"pipeline shader assembly", true);
374
if (!bo) {
375
fprintf(stderr, "failed to allocate memory for shaders assembly\n");
376
v3dv_pipeline_shared_data_unref(cache->device, new_entry);
377
return NULL;
378
}
379
380
bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
381
if (!ok) {
382
fprintf(stderr, "failed to map source shader buffer\n");
383
v3dv_pipeline_shared_data_unref(cache->device, new_entry);
384
return NULL;
385
}
386
387
memcpy(bo->map, total_assembly, total_assembly_size);
388
389
new_entry->assembly_bo = bo;
390
391
return new_entry;
392
}
393
394
static void
395
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
396
struct v3dv_pipeline_shared_data *shared_data,
397
bool from_disk_cache)
398
{
399
assert(shared_data);
400
401
if (!cache || !cache->cache)
402
return;
403
404
if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
405
return;
406
407
pthread_mutex_lock(&cache->mutex);
408
struct hash_entry *entry =
409
_mesa_hash_table_search(cache->cache, shared_data->sha1_key);
410
411
if (entry) {
412
pthread_mutex_unlock(&cache->mutex);
413
return;
414
}
415
416
v3dv_pipeline_shared_data_ref(shared_data);
417
_mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
418
cache->stats.count++;
419
if (debug_cache) {
420
char sha1buf[41];
421
_mesa_sha1_format(sha1buf, shared_data->sha1_key);
422
423
fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
424
cache, sha1buf, shared_data);
425
if (dump_stats)
426
cache_dump_stats(cache);
427
}
428
429
pthread_mutex_unlock(&cache->mutex);
430
431
#ifdef ENABLE_SHADER_CACHE
432
/* If we are being called from a on-disk-cache hit, we can skip writing to
433
* the disk cache
434
*/
435
if (from_disk_cache)
436
return;
437
438
struct v3dv_device *device = cache->device;
439
struct disk_cache *disk_cache = device->pdevice->disk_cache;
440
if (disk_cache) {
441
struct blob binary;
442
blob_init(&binary);
443
if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
444
cache_key cache_key;
445
disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
446
447
disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
448
if (debug_cache) {
449
char sha1buf[41];
450
_mesa_sha1_format(sha1buf, shared_data->sha1_key);
451
452
fprintf(stderr, "on-disk-cache, new cache entry with sha1 key %s:%p\n\n",
453
sha1buf, shared_data);
454
}
455
}
456
457
blob_finish(&binary);
458
}
459
#endif
460
}
461
462
/* Uploads all the "cacheable" or shared data from the pipeline */
463
void
464
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
465
struct v3dv_pipeline_cache *cache)
466
{
467
pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
468
}
469
470
static struct serialized_nir*
471
serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
472
struct blob_reader *blob)
473
{
474
const unsigned char *sha1_key = blob_read_bytes(blob, 20);
475
uint32_t snir_size = blob_read_uint32(blob);
476
const char* snir_data = blob_read_bytes(blob, snir_size);
477
if (blob->overrun)
478
return NULL;
479
480
struct serialized_nir *snir =
481
ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
482
memcpy(snir->sha1_key, sha1_key, 20);
483
snir->size = snir_size;
484
memcpy(snir->data, snir_data, snir_size);
485
486
return snir;
487
}
488
489
static struct v3dv_shader_variant*
490
shader_variant_create_from_blob(struct v3dv_device *device,
491
struct blob_reader *blob)
492
{
493
VkResult result;
494
495
enum broadcom_shader_stage stage = blob_read_uint32(blob);
496
497
uint32_t prog_data_size = blob_read_uint32(blob);
498
/* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
499
assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
500
501
const void *prog_data = blob_read_bytes(blob, prog_data_size);
502
if (blob->overrun)
503
return NULL;
504
505
uint32_t ulist_count = blob_read_uint32(blob);
506
uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
507
const void *contents_data = blob_read_bytes(blob, contents_size);
508
if (blob->overrun)
509
return NULL;
510
511
uint ulist_data_size = sizeof(uint32_t) * ulist_count;
512
const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
513
if (blob->overrun)
514
return NULL;
515
516
uint32_t assembly_offset = blob_read_uint32(blob);
517
uint32_t qpu_insts_size = blob_read_uint32(blob);
518
519
/* shader_variant_create expects a newly created prog_data for their own,
520
* as it is what the v3d compiler returns. So we are also allocating one
521
* (including the uniform list) and filled it up with the data that we read
522
* from the blob
523
*/
524
struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
525
memcpy(new_prog_data, prog_data, prog_data_size);
526
struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
527
ulist->count = ulist_count;
528
ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
529
memcpy(ulist->contents, contents_data, contents_size);
530
ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
531
memcpy(ulist->data, ulist_data_data, ulist_data_size);
532
533
return v3dv_shader_variant_create(device, stage,
534
new_prog_data, prog_data_size,
535
assembly_offset,
536
NULL, qpu_insts_size,
537
&result);
538
}
539
540
static struct v3dv_pipeline_shared_data *
541
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
542
struct blob_reader *blob)
543
{
544
const unsigned char *sha1_key = blob_read_bytes(blob, 20);
545
546
struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
547
548
uint8_t descriptor_maps_count = blob_read_uint8(blob);
549
for (uint8_t count = 0; count < descriptor_maps_count; count++) {
550
uint8_t stage = blob_read_uint8(blob);
551
552
const struct v3dv_descriptor_maps *current_maps =
553
blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
554
555
if (blob->overrun)
556
return NULL;
557
558
maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
559
sizeof(struct v3dv_descriptor_maps), 8,
560
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
561
562
if (maps[stage] == NULL)
563
return NULL;
564
565
memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
566
if (broadcom_shader_stage_is_render_with_binning(stage)) {
567
enum broadcom_shader_stage bin_stage =
568
broadcom_binning_shader_stage_for_render_stage(stage);
569
maps[bin_stage] = maps[stage];
570
}
571
}
572
573
uint8_t variant_count = blob_read_uint8(blob);
574
575
struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
576
577
for (uint8_t count = 0; count < variant_count; count++) {
578
uint8_t stage = blob_read_uint8(blob);
579
struct v3dv_shader_variant *variant =
580
shader_variant_create_from_blob(cache->device, blob);
581
variants[stage] = variant;
582
}
583
584
uint32_t total_assembly_size = blob_read_uint32(blob);
585
const uint64_t *total_assembly =
586
blob_read_bytes(blob, total_assembly_size);
587
588
if (blob->overrun)
589
return NULL;
590
591
return v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
592
total_assembly, total_assembly_size);
593
}
594
595
static void
596
pipeline_cache_load(struct v3dv_pipeline_cache *cache,
597
size_t size,
598
const void *data)
599
{
600
struct v3dv_device *device = cache->device;
601
struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
602
struct vk_pipeline_cache_header header;
603
604
if (cache->cache == NULL || cache->nir_cache == NULL)
605
return;
606
607
struct blob_reader blob;
608
blob_reader_init(&blob, data, size);
609
610
blob_copy_bytes(&blob, &header, sizeof(header));
611
if (size < sizeof(header))
612
return;
613
memcpy(&header, data, sizeof(header));
614
if (header.header_size < sizeof(header))
615
return;
616
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
617
return;
618
if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
619
return;
620
if (header.device_id != v3dv_physical_device_device_id(pdevice))
621
return;
622
if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
623
return;
624
625
uint32_t nir_count = blob_read_uint32(&blob);
626
if (blob.overrun)
627
return;
628
629
for (uint32_t i = 0; i < nir_count; i++) {
630
struct serialized_nir *snir =
631
serialized_nir_create_from_blob(cache, &blob);
632
633
if (!snir)
634
break;
635
636
_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
637
cache->nir_stats.count++;
638
}
639
640
uint32_t count = blob_read_uint32(&blob);
641
if (blob.overrun)
642
return;
643
644
for (uint32_t i = 0; i < count; i++) {
645
struct v3dv_pipeline_shared_data *cache_entry =
646
v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
647
if (!cache_entry)
648
break;
649
650
_mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
651
cache->stats.count++;
652
}
653
654
if (debug_cache) {
655
fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
656
"%i entries\n", cache, nir_count, count);
657
if (dump_stats)
658
cache_dump_stats(cache);
659
}
660
}
661
662
VKAPI_ATTR VkResult VKAPI_CALL
663
v3dv_CreatePipelineCache(VkDevice _device,
664
const VkPipelineCacheCreateInfo *pCreateInfo,
665
const VkAllocationCallbacks *pAllocator,
666
VkPipelineCache *pPipelineCache)
667
{
668
V3DV_FROM_HANDLE(v3dv_device, device, _device);
669
struct v3dv_pipeline_cache *cache;
670
671
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
672
assert(pCreateInfo->flags == 0);
673
674
cache = vk_object_zalloc(&device->vk, pAllocator,
675
sizeof(*cache),
676
VK_OBJECT_TYPE_PIPELINE_CACHE);
677
678
if (cache == NULL)
679
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
680
681
v3dv_pipeline_cache_init(cache, device,
682
device->instance->pipeline_cache_enabled);
683
684
if (pCreateInfo->initialDataSize > 0) {
685
pipeline_cache_load(cache,
686
pCreateInfo->initialDataSize,
687
pCreateInfo->pInitialData);
688
}
689
690
*pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
691
692
return VK_SUCCESS;
693
}
694
695
void
696
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
697
{
698
pthread_mutex_destroy(&cache->mutex);
699
700
if (dump_stats_on_destroy)
701
cache_dump_stats(cache);
702
703
if (cache->nir_cache) {
704
hash_table_foreach(cache->nir_cache, entry)
705
ralloc_free(entry->data);
706
707
_mesa_hash_table_destroy(cache->nir_cache, NULL);
708
}
709
710
if (cache->cache) {
711
hash_table_foreach(cache->cache, entry) {
712
struct v3dv_pipeline_shared_data *cache_entry = entry->data;
713
if (cache_entry)
714
v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
715
}
716
717
_mesa_hash_table_destroy(cache->cache, NULL);
718
}
719
}
720
721
VKAPI_ATTR void VKAPI_CALL
722
v3dv_DestroyPipelineCache(VkDevice _device,
723
VkPipelineCache _cache,
724
const VkAllocationCallbacks *pAllocator)
725
{
726
V3DV_FROM_HANDLE(v3dv_device, device, _device);
727
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
728
729
if (!cache)
730
return;
731
732
v3dv_pipeline_cache_finish(cache);
733
734
vk_object_free(&device->vk, pAllocator, cache);
735
}
736
737
VKAPI_ATTR VkResult VKAPI_CALL
738
v3dv_MergePipelineCaches(VkDevice device,
739
VkPipelineCache dstCache,
740
uint32_t srcCacheCount,
741
const VkPipelineCache *pSrcCaches)
742
{
743
V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
744
745
if (!dst->cache || !dst->nir_cache)
746
return VK_SUCCESS;
747
748
for (uint32_t i = 0; i < srcCacheCount; i++) {
749
V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
750
if (!src->cache || !src->nir_cache)
751
continue;
752
753
hash_table_foreach(src->nir_cache, entry) {
754
struct serialized_nir *src_snir = entry->data;
755
assert(src_snir);
756
757
if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
758
continue;
759
760
/* FIXME: we are using serialized nir shaders because they are
761
* convenient to create and store on the cache, but requires to do a
762
* copy here (and some other places) of the serialized NIR. Perhaps
763
* it would make sense to move to handle the NIR shaders with shared
764
* structures with ref counts, as the variants.
765
*/
766
struct serialized_nir *snir_dst =
767
ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
768
memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
769
snir_dst->size = src_snir->size;
770
memcpy(snir_dst->data, src_snir->data, src_snir->size);
771
772
_mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
773
dst->nir_stats.count++;
774
if (debug_cache) {
775
char sha1buf[41];
776
_mesa_sha1_format(sha1buf, snir_dst->sha1_key);
777
778
fprintf(stderr, "pipeline cache %p, added nir entry %s "
779
"from pipeline cache %p\n",
780
dst, sha1buf, src);
781
if (dump_stats)
782
cache_dump_stats(dst);
783
}
784
}
785
786
hash_table_foreach(src->cache, entry) {
787
struct v3dv_pipeline_shared_data *cache_entry = entry->data;
788
assert(cache_entry);
789
790
if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
791
continue;
792
793
v3dv_pipeline_shared_data_ref(cache_entry);
794
_mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
795
796
dst->stats.count++;
797
if (debug_cache) {
798
char sha1buf[41];
799
_mesa_sha1_format(sha1buf, cache_entry->sha1_key);
800
801
fprintf(stderr, "pipeline cache %p, added entry %s "
802
"from pipeline cache %p\n",
803
dst, sha1buf, src);
804
if (dump_stats)
805
cache_dump_stats(dst);
806
}
807
}
808
}
809
810
return VK_SUCCESS;
811
}
812
813
static bool
814
shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
815
struct blob *blob)
816
{
817
blob_write_uint32(blob, variant->stage);
818
819
blob_write_uint32(blob, variant->prog_data_size);
820
blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
821
822
struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
823
blob_write_uint32(blob, ulist->count);
824
blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
825
blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
826
827
blob_write_uint32(blob, variant->assembly_offset);
828
blob_write_uint32(blob, variant->qpu_insts_size);
829
830
return !blob->out_of_memory;
831
}
832
833
static bool
834
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
835
struct blob *blob)
836
{
837
blob_write_bytes(blob, cache_entry->sha1_key, 20);
838
839
uint8_t descriptor_maps_count = 0;
840
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
841
if (broadcom_shader_stage_is_binning(stage))
842
continue;
843
if (cache_entry->maps[stage] == NULL)
844
continue;
845
descriptor_maps_count++;
846
}
847
848
/* Compute pipelines only have one descriptor map,
849
* graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
850
* stages take the descriptor map from the render stage.
851
*/
852
assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
853
(descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
854
blob_write_uint8(blob, descriptor_maps_count);
855
856
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
857
if (cache_entry->maps[stage] == NULL)
858
continue;
859
if (broadcom_shader_stage_is_binning(stage))
860
continue;
861
862
blob_write_uint8(blob, stage);
863
blob_write_bytes(blob, cache_entry->maps[stage],
864
sizeof(struct v3dv_descriptor_maps));
865
}
866
867
uint8_t variant_count = 0;
868
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
869
if (cache_entry->variants[stage] == NULL)
870
continue;
871
variant_count++;
872
}
873
874
/* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
875
* compute pipelines only have 1.
876
*/
877
assert((variant_count == 5 || variant_count == 3) ||
878
(variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
879
blob_write_uint8(blob, variant_count);
880
881
uint32_t total_assembly_size = 0;
882
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
883
if (cache_entry->variants[stage] == NULL)
884
continue;
885
886
blob_write_uint8(blob, stage);
887
if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
888
return false;
889
890
total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
891
}
892
blob_write_uint32(blob, total_assembly_size);
893
894
assert(cache_entry->assembly_bo->map);
895
assert(cache_entry->assembly_bo->size >= total_assembly_size);
896
blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
897
898
return !blob->out_of_memory;
899
}
900
901
902
VKAPI_ATTR VkResult VKAPI_CALL
903
v3dv_GetPipelineCacheData(VkDevice _device,
904
VkPipelineCache _cache,
905
size_t *pDataSize,
906
void *pData)
907
{
908
V3DV_FROM_HANDLE(v3dv_device, device, _device);
909
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
910
911
struct blob blob;
912
if (pData) {
913
blob_init_fixed(&blob, pData, *pDataSize);
914
} else {
915
blob_init_fixed(&blob, NULL, SIZE_MAX);
916
}
917
918
struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
919
VkResult result = VK_INCOMPLETE;
920
921
pthread_mutex_lock(&cache->mutex);
922
923
struct vk_pipeline_cache_header header = {
924
.header_size = sizeof(struct vk_pipeline_cache_header),
925
.header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
926
.vendor_id = v3dv_physical_device_vendor_id(pdevice),
927
.device_id = v3dv_physical_device_device_id(pdevice),
928
};
929
memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
930
blob_write_bytes(&blob, &header, sizeof(header));
931
932
uint32_t nir_count = 0;
933
intptr_t nir_count_offset = blob_reserve_uint32(&blob);
934
if (nir_count_offset < 0) {
935
*pDataSize = 0;
936
goto done;
937
}
938
939
if (cache->nir_cache) {
940
hash_table_foreach(cache->nir_cache, entry) {
941
const struct serialized_nir *snir = entry->data;
942
943
size_t save_size = blob.size;
944
945
blob_write_bytes(&blob, snir->sha1_key, 20);
946
blob_write_uint32(&blob, snir->size);
947
blob_write_bytes(&blob, snir->data, snir->size);
948
949
if (blob.out_of_memory) {
950
blob.size = save_size;
951
goto done;
952
}
953
954
nir_count++;
955
}
956
}
957
blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
958
959
uint32_t count = 0;
960
intptr_t count_offset = blob_reserve_uint32(&blob);
961
if (count_offset < 0) {
962
*pDataSize = 0;
963
goto done;
964
}
965
966
if (cache->cache) {
967
hash_table_foreach(cache->cache, entry) {
968
struct v3dv_pipeline_shared_data *cache_entry = entry->data;
969
970
size_t save_size = blob.size;
971
if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
972
/* If it fails reset to the previous size and bail */
973
blob.size = save_size;
974
goto done;
975
}
976
977
count++;
978
}
979
}
980
981
blob_overwrite_uint32(&blob, count_offset, count);
982
983
*pDataSize = blob.size;
984
985
result = VK_SUCCESS;
986
987
if (debug_cache) {
988
assert(count <= cache->stats.count);
989
fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
990
"%i nir shader entries "
991
"%i entries, %u DataSize\n",
992
cache, nir_count, count, (uint32_t) *pDataSize);
993
}
994
995
done:
996
blob_finish(&blob);
997
998
pthread_mutex_unlock(&cache->mutex);
999
1000
return result;
1001
}
1002
1003