CoCalc -- v3dv_pipeline

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_pipeline_cache.c
⁴⁵⁶⁰ views
1
/*
2
 * Copyright © 2019 Raspberry Pi
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23

24
#include "v3dv_private.h"
25
#include "vulkan/util/vk_util.h"
26
#include "util/blob.h"
27
#include "nir/nir_serialize.h"
28

29
static const bool debug_cache = false;
30
static const bool dump_stats = false;
31
static const bool dump_stats_on_destroy = false;
32

33
/* Shared for nir/variants */
34
#define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
35

36
static uint32_t
37
sha1_hash_func(const void *sha1)
38
{
39
   return _mesa_hash_data(sha1, 20);
40
}
41

42
static bool
43
sha1_compare_func(const void *sha1_a, const void *sha1_b)
44
{
45
   return memcmp(sha1_a, sha1_b, 20) == 0;
46
}
47

48
struct serialized_nir {
49
   unsigned char sha1_key[20];
50
   size_t size;
51
   char data[0];
52
};
53

54
static void
55
cache_dump_stats(struct v3dv_pipeline_cache *cache)
56
{
57
   fprintf(stderr, "  NIR cache entries:      %d\n", cache->nir_stats.count);
58
   fprintf(stderr, "  NIR cache miss count:   %d\n", cache->nir_stats.miss);
59
   fprintf(stderr, "  NIR cache hit  count:   %d\n", cache->nir_stats.hit);
60

61
   fprintf(stderr, "  cache entries:      %d\n", cache->stats.count);
62
   fprintf(stderr, "  cache miss count:   %d\n", cache->stats.miss);
63
   fprintf(stderr, "  cache hit  count:   %d\n", cache->stats.hit);
64
}
65

66
void
67
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
68
                               struct v3dv_pipeline_cache *cache,
69
                               nir_shader *nir,
70
                               unsigned char sha1_key[20])
71
{
72
   if (!cache || !cache->nir_cache)
73
      return;
74

75
   if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
76
      return;
77

78
   pthread_mutex_lock(&cache->mutex);
79
   struct hash_entry *entry =
80
      _mesa_hash_table_search(cache->nir_cache, sha1_key);
81
   pthread_mutex_unlock(&cache->mutex);
82
   if (entry)
83
      return;
84

85
   struct blob blob;
86
   blob_init(&blob);
87

88
   nir_serialize(&blob, nir, false);
89
   if (blob.out_of_memory) {
90
      blob_finish(&blob);
91
      return;
92
   }
93

94
   pthread_mutex_lock(&cache->mutex);
95
   /* Because ralloc isn't thread-safe, we have to do all this inside the
96
    * lock.  We could unlock for the big memcpy but it's probably not worth
97
    * the hassle.
98
    */
99
   entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
100
   if (entry) {
101
      blob_finish(&blob);
102
      pthread_mutex_unlock(&cache->mutex);
103
      return;
104
   }
105

106
   struct serialized_nir *snir =
107
      ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
108
   memcpy(snir->sha1_key, sha1_key, 20);
109
   snir->size = blob.size;
110
   memcpy(snir->data, blob.data, blob.size);
111

112
   blob_finish(&blob);
113

114
   cache->nir_stats.count++;
115
   if (debug_cache) {
116
      char sha1buf[41];
117
      _mesa_sha1_format(sha1buf, snir->sha1_key);
118
      fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
119
      if (dump_stats)
120
         cache_dump_stats(cache);
121
   }
122

123
   _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
124

125
   pthread_mutex_unlock(&cache->mutex);
126
}
127

128
nir_shader*
129
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
130
                                   struct v3dv_pipeline_cache *cache,
131
                                   const nir_shader_compiler_options *nir_options,
132
                                   unsigned char sha1_key[20])
133
{
134
   if (!cache || !cache->nir_cache)
135
      return NULL;
136

137
   if (debug_cache) {
138
      char sha1buf[41];
139
      _mesa_sha1_format(sha1buf, sha1_key);
140

141
      fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
142
   }
143

144
   const struct serialized_nir *snir = NULL;
145

146
   pthread_mutex_lock(&cache->mutex);
147
   struct hash_entry *entry =
148
      _mesa_hash_table_search(cache->nir_cache, sha1_key);
149
   if (entry)
150
      snir = entry->data;
151
   pthread_mutex_unlock(&cache->mutex);
152

153
   if (snir) {
154
      struct blob_reader blob;
155
      blob_reader_init(&blob, snir->data, snir->size);
156

157
      /* We use context NULL as we want the p_stage to keep the reference to
158
       * nir, as we keep open the possibility of provide a shader variant
159
       * after cache creation
160
       */
161
      nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
162
      if (blob.overrun) {
163
         ralloc_free(nir);
164
      } else {
165
         cache->nir_stats.hit++;
166
         if (debug_cache) {
167
            fprintf(stderr, "\tnir cache hit: %p\n", nir);
168
            if (dump_stats)
169
               cache_dump_stats(cache);
170
         }
171
         return nir;
172
      }
173
   }
174

175
   cache->nir_stats.miss++;
176
   if (debug_cache) {
177
      fprintf(stderr, "\tnir cache miss\n");
178
      if (dump_stats)
179
         cache_dump_stats(cache);
180
   }
181

182
   return NULL;
183
}
184

185
void
186
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
187
                         struct v3dv_device *device,
188
                         bool cache_enabled)
189
{
190
   cache->device = device;
191
   pthread_mutex_init(&cache->mutex, NULL);
192

193
   if (cache_enabled) {
194
      cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
195
                                                 sha1_compare_func);
196
      cache->nir_stats.miss = 0;
197
      cache->nir_stats.hit = 0;
198
      cache->nir_stats.count = 0;
199

200
      cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
201
                                             sha1_compare_func);
202
      cache->stats.miss = 0;
203
      cache->stats.hit = 0;
204
      cache->stats.count = 0;
205
   } else {
206
      cache->nir_cache = NULL;
207
      cache->cache = NULL;
208
   }
209

210
}
211

212
static struct v3dv_pipeline_shared_data *
213
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
214
                                           struct blob_reader *blob);
215

216
static void
217
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
218
                                  struct v3dv_pipeline_shared_data *shared_data,
219
                                  bool from_disk_cache);
220

221
static bool
222
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
223
                                        struct blob *blob);
224

225
/**
226
 * It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with
227
 * it, or NULL if doesn't have it cached. On the former, it will increases the
228
 * ref_count, so caller is responsible to unref it.
229
 */
230
struct v3dv_pipeline_shared_data *
231
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
232
                                        unsigned char sha1_key[20])
233
{
234
   if (!cache || !cache->cache)
235
      return NULL;
236

237
   if (debug_cache) {
238
      char sha1buf[41];
239
      _mesa_sha1_format(sha1buf, sha1_key);
240

241
      fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
242
   }
243

244
   pthread_mutex_lock(&cache->mutex);
245

246
   struct hash_entry *entry =
247
      _mesa_hash_table_search(cache->cache, sha1_key);
248

249
   if (entry) {
250
      struct v3dv_pipeline_shared_data *cache_entry =
251
         (struct v3dv_pipeline_shared_data *) entry->data;
252
      assert(cache_entry);
253

254
      cache->stats.hit++;
255
      if (debug_cache) {
256
         fprintf(stderr, "\tcache hit: %p\n", cache_entry);
257
         if (dump_stats)
258
            cache_dump_stats(cache);
259
      }
260

261

262
      v3dv_pipeline_shared_data_ref(cache_entry);
263

264
      pthread_mutex_unlock(&cache->mutex);
265

266
      return cache_entry;
267
   }
268

269
   cache->stats.miss++;
270
   if (debug_cache) {
271
      fprintf(stderr, "\tcache miss\n");
272
      if (dump_stats)
273
         cache_dump_stats(cache);
274
   }
275

276
   pthread_mutex_unlock(&cache->mutex);
277

278
#ifdef ENABLE_SHADER_CACHE
279
   struct v3dv_device *device = cache->device;
280
   struct disk_cache *disk_cache = device->pdevice->disk_cache;
281
   /* Note that the on-disk-cache can be independently disabled, while keeping
282
    * the pipeline cache working, by using the environment variable
283
    * MESA_GLSL_CACHE_DISABLE.  In that case the calls to disk_cache_put/get
284
    * will not do anything.
285
    */
286
   if (disk_cache && device->instance->pipeline_cache_enabled) {
287
      cache_key cache_key;
288
      disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
289

290
      size_t buffer_size;
291
      uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
292
      if (buffer) {
293
         struct blob_reader blob;
294
         struct v3dv_pipeline_shared_data *shared_data;
295

296
         if (debug_cache)
297
            fprintf(stderr, "\ton-disk-cache hit\n");
298

299
         blob_reader_init(&blob, buffer, buffer_size);
300
         shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
301
         free(buffer);
302

303
         if (shared_data) {
304
            if (cache)
305
               pipeline_cache_upload_shared_data(cache, shared_data, true);
306
            return shared_data;
307
         }
308
      } else {
309
         if (debug_cache)
310
            fprintf(stderr, "\ton-disk-cache miss\n");
311
      }
312
   }
313
#endif
314

315
   return NULL;
316
}
317

318
void
319
v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
320
                                  struct v3dv_pipeline_shared_data *shared_data)
321
{
322
   assert(shared_data->ref_cnt == 0);
323

324
   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
325
      if (shared_data->variants[stage] != NULL)
326
         v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
327

328
      /* We don't free binning descriptor maps as we are sharing them
329
       * with the render shaders.
330
       */
331
      if (shared_data->maps[stage] != NULL &&
332
          !broadcom_shader_stage_is_binning(stage)) {
333
         vk_free(&device->vk.alloc, shared_data->maps[stage]);
334
      }
335
   }
336

337
   if (shared_data->assembly_bo)
338
      v3dv_bo_free(device, shared_data->assembly_bo);
339

340
   vk_free(&device->vk.alloc, shared_data);
341
}
342

343
static struct v3dv_pipeline_shared_data *
344
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
345
                              const unsigned char sha1_key[20],
346
                              struct v3dv_descriptor_maps **maps,
347
                              struct v3dv_shader_variant **variants,
348
                              const uint64_t *total_assembly,
349
                              const uint32_t total_assembly_size)
350
{
351
   size_t size = sizeof(struct v3dv_pipeline_shared_data);
352
   /* We create new_entry using the device alloc. Right now shared_data is ref
353
    * and unref by both the pipeline and the pipeline cache, so we can't
354
    * ensure that the cache or pipeline alloc will be available on the last
355
    * unref.
356
    */
357
   struct v3dv_pipeline_shared_data *new_entry =
358
      vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
359
                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
360

361
   if (new_entry == NULL)
362
      return NULL;
363

364
   new_entry->ref_cnt = 1;
365
   memcpy(new_entry->sha1_key, sha1_key, 20);
366

367
   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
368
      new_entry->maps[stage] = maps[stage];
369
      new_entry->variants[stage] = variants[stage];
370
   }
371

372
   struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
373
                                      "pipeline shader assembly", true);
374
   if (!bo) {
375
      fprintf(stderr, "failed to allocate memory for shaders assembly\n");
376
      v3dv_pipeline_shared_data_unref(cache->device, new_entry);
377
      return NULL;
378
   }
379

380
   bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
381
   if (!ok) {
382
      fprintf(stderr, "failed to map source shader buffer\n");
383
      v3dv_pipeline_shared_data_unref(cache->device, new_entry);
384
      return NULL;
385
   }
386

387
   memcpy(bo->map, total_assembly, total_assembly_size);
388

389
   new_entry->assembly_bo = bo;
390

391
   return new_entry;
392
}
393

394
static void
395
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
396
                                  struct v3dv_pipeline_shared_data *shared_data,
397
                                  bool from_disk_cache)
398
{
399
   assert(shared_data);
400

401
   if (!cache || !cache->cache)
402
      return;
403

404
   if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
405
      return;
406

407
   pthread_mutex_lock(&cache->mutex);
408
   struct hash_entry *entry =
409
      _mesa_hash_table_search(cache->cache, shared_data->sha1_key);
410

411
   if (entry) {
412
      pthread_mutex_unlock(&cache->mutex);
413
      return;
414
   }
415

416
   v3dv_pipeline_shared_data_ref(shared_data);
417
   _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
418
   cache->stats.count++;
419
   if (debug_cache) {
420
      char sha1buf[41];
421
      _mesa_sha1_format(sha1buf, shared_data->sha1_key);
422

423
      fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
424
              cache, sha1buf, shared_data);
425
      if (dump_stats)
426
         cache_dump_stats(cache);
427
   }
428

429
   pthread_mutex_unlock(&cache->mutex);
430

431
#ifdef ENABLE_SHADER_CACHE
432
   /* If we are being called from a on-disk-cache hit, we can skip writing to
433
    * the disk cache
434
    */
435
   if (from_disk_cache)
436
      return;
437

438
   struct v3dv_device *device = cache->device;
439
   struct disk_cache *disk_cache = device->pdevice->disk_cache;
440
   if (disk_cache) {
441
      struct blob binary;
442
      blob_init(&binary);
443
      if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
444
         cache_key cache_key;
445
         disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
446

447
         disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
448
         if (debug_cache) {
449
            char sha1buf[41];
450
            _mesa_sha1_format(sha1buf, shared_data->sha1_key);
451

452
            fprintf(stderr, "on-disk-cache, new cache entry with sha1 key %s:%p\n\n",
453
                    sha1buf, shared_data);
454
         }
455
      }
456

457
      blob_finish(&binary);
458
   }
459
#endif
460
}
461

462
/* Uploads all the "cacheable" or shared data from the pipeline */
463
void
464
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
465
                                    struct v3dv_pipeline_cache *cache)
466
{
467
   pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
468
}
469

470
static struct serialized_nir*
471
serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
472
                                struct blob_reader *blob)
473
{
474
   const unsigned char *sha1_key = blob_read_bytes(blob, 20);
475
   uint32_t snir_size = blob_read_uint32(blob);
476
   const char* snir_data = blob_read_bytes(blob, snir_size);
477
   if (blob->overrun)
478
      return NULL;
479

480
   struct serialized_nir *snir =
481
      ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
482
   memcpy(snir->sha1_key, sha1_key, 20);
483
   snir->size = snir_size;
484
   memcpy(snir->data, snir_data, snir_size);
485

486
   return snir;
487
}
488

489
static struct v3dv_shader_variant*
490
shader_variant_create_from_blob(struct v3dv_device *device,
491
                                struct blob_reader *blob)
492
{
493
   VkResult result;
494

495
   enum broadcom_shader_stage stage = blob_read_uint32(blob);
496

497
   uint32_t prog_data_size = blob_read_uint32(blob);
498
   /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
499
   assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
500

501
   const void *prog_data = blob_read_bytes(blob, prog_data_size);
502
   if (blob->overrun)
503
      return NULL;
504

505
   uint32_t ulist_count = blob_read_uint32(blob);
506
   uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
507
   const void *contents_data = blob_read_bytes(blob, contents_size);
508
   if (blob->overrun)
509
      return NULL;
510

511
   uint ulist_data_size = sizeof(uint32_t) * ulist_count;
512
   const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
513
   if (blob->overrun)
514
      return NULL;
515

516
   uint32_t assembly_offset = blob_read_uint32(blob);
517
   uint32_t qpu_insts_size = blob_read_uint32(blob);
518

519
   /* shader_variant_create expects a newly created prog_data for their own,
520
    * as it is what the v3d compiler returns. So we are also allocating one
521
    * (including the uniform list) and filled it up with the data that we read
522
    * from the blob
523
    */
524
   struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
525
   memcpy(new_prog_data, prog_data, prog_data_size);
526
   struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
527
   ulist->count = ulist_count;
528
   ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
529
   memcpy(ulist->contents, contents_data, contents_size);
530
   ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
531
   memcpy(ulist->data, ulist_data_data, ulist_data_size);
532

533
   return v3dv_shader_variant_create(device, stage,
534
                                     new_prog_data, prog_data_size,
535
                                     assembly_offset,
536
                                     NULL, qpu_insts_size,
537
                                     &result);
538
}
539

540
static struct v3dv_pipeline_shared_data *
541
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
542
                                           struct blob_reader *blob)
543
{
544
   const unsigned char *sha1_key = blob_read_bytes(blob, 20);
545

546
   struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
547

548
   uint8_t descriptor_maps_count = blob_read_uint8(blob);
549
   for (uint8_t count = 0; count < descriptor_maps_count; count++) {
550
      uint8_t stage = blob_read_uint8(blob);
551

552
      const struct v3dv_descriptor_maps *current_maps =
553
         blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
554

555
      if (blob->overrun)
556
         return NULL;
557

558
      maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
559
                               sizeof(struct v3dv_descriptor_maps), 8,
560
                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
561

562
      if (maps[stage] == NULL)
563
         return NULL;
564

565
      memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
566
      if (broadcom_shader_stage_is_render_with_binning(stage)) {
567
         enum broadcom_shader_stage bin_stage =
568
            broadcom_binning_shader_stage_for_render_stage(stage);
569
            maps[bin_stage] = maps[stage];
570
      }
571
   }
572

573
   uint8_t variant_count = blob_read_uint8(blob);
574

575
   struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
576

577
   for (uint8_t count = 0; count < variant_count; count++) {
578
      uint8_t stage = blob_read_uint8(blob);
579
      struct v3dv_shader_variant *variant =
580
         shader_variant_create_from_blob(cache->device, blob);
581
      variants[stage] = variant;
582
   }
583

584
   uint32_t total_assembly_size = blob_read_uint32(blob);
585
   const uint64_t *total_assembly =
586
      blob_read_bytes(blob, total_assembly_size);
587

588
   if (blob->overrun)
589
      return NULL;
590

591
   return v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
592
                                        total_assembly, total_assembly_size);
593
}
594

595
static void
596
pipeline_cache_load(struct v3dv_pipeline_cache *cache,
597
                    size_t size,
598
                    const void *data)
599
{
600
   struct v3dv_device *device = cache->device;
601
   struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
602
   struct vk_pipeline_cache_header header;
603

604
   if (cache->cache == NULL || cache->nir_cache == NULL)
605
      return;
606

607
   struct blob_reader blob;
608
   blob_reader_init(&blob, data, size);
609

610
   blob_copy_bytes(&blob, &header, sizeof(header));
611
   if (size < sizeof(header))
612
      return;
613
   memcpy(&header, data, sizeof(header));
614
   if (header.header_size < sizeof(header))
615
      return;
616
   if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
617
      return;
618
   if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
619
      return;
620
   if (header.device_id != v3dv_physical_device_device_id(pdevice))
621
      return;
622
   if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
623
      return;
624

625
   uint32_t nir_count = blob_read_uint32(&blob);
626
   if (blob.overrun)
627
      return;
628

629
   for (uint32_t i = 0; i < nir_count; i++) {
630
      struct serialized_nir *snir =
631
         serialized_nir_create_from_blob(cache, &blob);
632

633
      if (!snir)
634
         break;
635

636
      _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
637
      cache->nir_stats.count++;
638
   }
639

640
   uint32_t count = blob_read_uint32(&blob);
641
   if (blob.overrun)
642
      return;
643

644
   for (uint32_t i = 0; i < count; i++) {
645
      struct v3dv_pipeline_shared_data *cache_entry =
646
         v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
647
      if (!cache_entry)
648
         break;
649

650
      _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
651
      cache->stats.count++;
652
   }
653

654
   if (debug_cache) {
655
      fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
656
              "%i entries\n", cache, nir_count, count);
657
      if (dump_stats)
658
         cache_dump_stats(cache);
659
   }
660
}
661

662
VKAPI_ATTR VkResult VKAPI_CALL
663
v3dv_CreatePipelineCache(VkDevice _device,
664
                         const VkPipelineCacheCreateInfo *pCreateInfo,
665
                         const VkAllocationCallbacks *pAllocator,
666
                         VkPipelineCache *pPipelineCache)
667
{
668
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
669
   struct v3dv_pipeline_cache *cache;
670

671
   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
672
   assert(pCreateInfo->flags == 0);
673

674
   cache = vk_object_zalloc(&device->vk, pAllocator,
675
                            sizeof(*cache),
676
                            VK_OBJECT_TYPE_PIPELINE_CACHE);
677

678
   if (cache == NULL)
679
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
680

681
   v3dv_pipeline_cache_init(cache, device,
682
                            device->instance->pipeline_cache_enabled);
683

684
   if (pCreateInfo->initialDataSize > 0) {
685
      pipeline_cache_load(cache,
686
                          pCreateInfo->initialDataSize,
687
                          pCreateInfo->pInitialData);
688
   }
689

690
   *pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
691

692
   return VK_SUCCESS;
693
}
694

695
void
696
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
697
{
698
   pthread_mutex_destroy(&cache->mutex);
699

700
   if (dump_stats_on_destroy)
701
      cache_dump_stats(cache);
702

703
   if (cache->nir_cache) {
704
      hash_table_foreach(cache->nir_cache, entry)
705
         ralloc_free(entry->data);
706

707
      _mesa_hash_table_destroy(cache->nir_cache, NULL);
708
   }
709

710
   if (cache->cache) {
711
      hash_table_foreach(cache->cache, entry) {
712
         struct v3dv_pipeline_shared_data *cache_entry = entry->data;
713
         if (cache_entry)
714
            v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
715
      }
716

717
      _mesa_hash_table_destroy(cache->cache, NULL);
718
   }
719
}
720

721
VKAPI_ATTR void VKAPI_CALL
722
v3dv_DestroyPipelineCache(VkDevice _device,
723
                          VkPipelineCache _cache,
724
                          const VkAllocationCallbacks *pAllocator)
725
{
726
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
727
   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
728

729
   if (!cache)
730
      return;
731

732
   v3dv_pipeline_cache_finish(cache);
733

734
   vk_object_free(&device->vk, pAllocator, cache);
735
}
736

737
VKAPI_ATTR VkResult VKAPI_CALL
738
v3dv_MergePipelineCaches(VkDevice device,
739
                         VkPipelineCache dstCache,
740
                         uint32_t srcCacheCount,
741
                         const VkPipelineCache *pSrcCaches)
742
{
743
   V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
744

745
   if (!dst->cache || !dst->nir_cache)
746
      return VK_SUCCESS;
747

748
   for (uint32_t i = 0; i < srcCacheCount; i++) {
749
      V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
750
      if (!src->cache || !src->nir_cache)
751
         continue;
752

753
      hash_table_foreach(src->nir_cache, entry) {
754
         struct serialized_nir *src_snir = entry->data;
755
         assert(src_snir);
756

757
         if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
758
            continue;
759

760
         /* FIXME: we are using serialized nir shaders because they are
761
          * convenient to create and store on the cache, but requires to do a
762
          * copy here (and some other places) of the serialized NIR. Perhaps
763
          * it would make sense to move to handle the NIR shaders with shared
764
          * structures with ref counts, as the variants.
765
          */
766
         struct serialized_nir *snir_dst =
767
            ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
768
         memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
769
         snir_dst->size = src_snir->size;
770
         memcpy(snir_dst->data, src_snir->data, src_snir->size);
771

772
         _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
773
         dst->nir_stats.count++;
774
         if (debug_cache) {
775
            char sha1buf[41];
776
            _mesa_sha1_format(sha1buf, snir_dst->sha1_key);
777

778
            fprintf(stderr, "pipeline cache %p, added nir entry %s "
779
                    "from pipeline cache %p\n",
780
                    dst, sha1buf, src);
781
            if (dump_stats)
782
               cache_dump_stats(dst);
783
         }
784
      }
785

786
      hash_table_foreach(src->cache, entry) {
787
         struct v3dv_pipeline_shared_data *cache_entry = entry->data;
788
         assert(cache_entry);
789

790
         if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
791
            continue;
792

793
         v3dv_pipeline_shared_data_ref(cache_entry);
794
         _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
795

796
         dst->stats.count++;
797
         if (debug_cache) {
798
            char sha1buf[41];
799
            _mesa_sha1_format(sha1buf, cache_entry->sha1_key);
800

801
            fprintf(stderr, "pipeline cache %p, added entry %s "
802
                    "from pipeline cache %p\n",
803
                    dst, sha1buf, src);
804
            if (dump_stats)
805
               cache_dump_stats(dst);
806
         }
807
      }
808
   }
809

810
   return VK_SUCCESS;
811
}
812

813
static bool
814
shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
815
                             struct blob *blob)
816
{
817
   blob_write_uint32(blob, variant->stage);
818

819
   blob_write_uint32(blob, variant->prog_data_size);
820
   blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
821

822
   struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
823
   blob_write_uint32(blob, ulist->count);
824
   blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
825
   blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
826

827
   blob_write_uint32(blob, variant->assembly_offset);
828
   blob_write_uint32(blob, variant->qpu_insts_size);
829

830
   return !blob->out_of_memory;
831
}
832

833
static bool
834
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
835
                                        struct blob *blob)
836
{
837
   blob_write_bytes(blob, cache_entry->sha1_key, 20);
838

839
   uint8_t descriptor_maps_count = 0;
840
   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
841
      if (broadcom_shader_stage_is_binning(stage))
842
         continue;
843
      if (cache_entry->maps[stage] == NULL)
844
         continue;
845
      descriptor_maps_count++;
846
   }
847

848
   /* Compute pipelines only have one descriptor map,
849
    * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
850
    * stages take the descriptor map from the render stage.
851
    */
852
   assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
853
          (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
854
   blob_write_uint8(blob, descriptor_maps_count);
855

856
   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
857
      if (cache_entry->maps[stage] == NULL)
858
         continue;
859
      if (broadcom_shader_stage_is_binning(stage))
860
         continue;
861

862
      blob_write_uint8(blob, stage);
863
      blob_write_bytes(blob, cache_entry->maps[stage],
864
                       sizeof(struct v3dv_descriptor_maps));
865
   }
866

867
   uint8_t variant_count = 0;
868
   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
869
      if (cache_entry->variants[stage] == NULL)
870
         continue;
871
      variant_count++;
872
   }
873

874
   /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
875
    * compute pipelines only have 1.
876
    */
877
   assert((variant_count == 5  || variant_count == 3) ||
878
          (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
879
   blob_write_uint8(blob, variant_count);
880

881
   uint32_t total_assembly_size = 0;
882
   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
883
      if (cache_entry->variants[stage] == NULL)
884
         continue;
885

886
      blob_write_uint8(blob, stage);
887
      if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
888
         return false;
889

890
      total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
891
   }
892
   blob_write_uint32(blob, total_assembly_size);
893

894
   assert(cache_entry->assembly_bo->map);
895
   assert(cache_entry->assembly_bo->size >= total_assembly_size);
896
   blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
897

898
   return !blob->out_of_memory;
899
}
900

901

902
VKAPI_ATTR VkResult VKAPI_CALL
903
v3dv_GetPipelineCacheData(VkDevice _device,
904
                          VkPipelineCache _cache,
905
                          size_t *pDataSize,
906
                          void *pData)
907
{
908
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
909
   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
910

911
   struct blob blob;
912
   if (pData) {
913
      blob_init_fixed(&blob, pData, *pDataSize);
914
   } else {
915
      blob_init_fixed(&blob, NULL, SIZE_MAX);
916
   }
917

918
   struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
919
   VkResult result = VK_INCOMPLETE;
920

921
   pthread_mutex_lock(&cache->mutex);
922

923
   struct vk_pipeline_cache_header header = {
924
      .header_size = sizeof(struct vk_pipeline_cache_header),
925
      .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
926
      .vendor_id = v3dv_physical_device_vendor_id(pdevice),
927
      .device_id = v3dv_physical_device_device_id(pdevice),
928
   };
929
   memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
930
   blob_write_bytes(&blob, &header, sizeof(header));
931

932
   uint32_t nir_count = 0;
933
   intptr_t nir_count_offset = blob_reserve_uint32(&blob);
934
   if (nir_count_offset < 0) {
935
      *pDataSize = 0;
936
      goto done;
937
   }
938

939
   if (cache->nir_cache) {
940
      hash_table_foreach(cache->nir_cache, entry) {
941
         const struct serialized_nir *snir = entry->data;
942

943
         size_t save_size = blob.size;
944

945
         blob_write_bytes(&blob, snir->sha1_key, 20);
946
         blob_write_uint32(&blob, snir->size);
947
         blob_write_bytes(&blob, snir->data, snir->size);
948

949
         if (blob.out_of_memory) {
950
            blob.size = save_size;
951
            goto done;
952
         }
953

954
         nir_count++;
955
      }
956
   }
957
   blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
958

959
   uint32_t count = 0;
960
   intptr_t count_offset = blob_reserve_uint32(&blob);
961
   if (count_offset < 0) {
962
      *pDataSize = 0;
963
      goto done;
964
   }
965

966
   if (cache->cache) {
967
      hash_table_foreach(cache->cache, entry) {
968
         struct v3dv_pipeline_shared_data *cache_entry = entry->data;
969

970
         size_t save_size = blob.size;
971
         if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
972
            /* If it fails reset to the previous size and bail */
973
            blob.size = save_size;
974
            goto done;
975
         }
976

977
         count++;
978
      }
979
   }
980

981
   blob_overwrite_uint32(&blob, count_offset, count);
982

983
   *pDataSize = blob.size;
984

985
   result = VK_SUCCESS;
986

987
   if (debug_cache) {
988
      assert(count <= cache->stats.count);
989
      fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
990
              "%i nir shader entries "
991
              "%i entries, %u DataSize\n",
992
              cache, nir_count, count, (uint32_t) *pDataSize);
993
   }
994

995
 done:
996
   blob_finish(&blob);
997

998
   pthread_mutex_unlock(&cache->mutex);
999

1000
   return result;
1001
}
1002

1003
Product

Resources

Company