Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_pipeline_cache.c
4560 views
/*1* Copyright © 2019 Raspberry Pi2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "v3dv_private.h"24#include "vulkan/util/vk_util.h"25#include "util/blob.h"26#include "nir/nir_serialize.h"2728static const bool debug_cache = false;29static const bool dump_stats = false;30static const bool dump_stats_on_destroy = false;3132/* Shared for nir/variants */33#define V3DV_MAX_PIPELINE_CACHE_ENTRIES 40963435static uint32_t36sha1_hash_func(const void *sha1)37{38return _mesa_hash_data(sha1, 20);39}4041static bool42sha1_compare_func(const void *sha1_a, const void *sha1_b)43{44return memcmp(sha1_a, sha1_b, 20) == 0;45}4647struct serialized_nir {48unsigned char sha1_key[20];49size_t size;50char data[0];51};5253static void54cache_dump_stats(struct v3dv_pipeline_cache *cache)55{56fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count);57fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss);58fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit);5960fprintf(stderr, " cache entries: %d\n", cache->stats.count);61fprintf(stderr, " cache miss count: %d\n", cache->stats.miss);62fprintf(stderr, " cache hit count: %d\n", cache->stats.hit);63}6465void66v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,67struct v3dv_pipeline_cache *cache,68nir_shader *nir,69unsigned char sha1_key[20])70{71if (!cache || !cache->nir_cache)72return;7374if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)75return;7677pthread_mutex_lock(&cache->mutex);78struct hash_entry *entry =79_mesa_hash_table_search(cache->nir_cache, sha1_key);80pthread_mutex_unlock(&cache->mutex);81if (entry)82return;8384struct blob blob;85blob_init(&blob);8687nir_serialize(&blob, nir, false);88if (blob.out_of_memory) {89blob_finish(&blob);90return;91}9293pthread_mutex_lock(&cache->mutex);94/* Because ralloc isn't thread-safe, we have to do all this inside the95* lock. We could unlock for the big memcpy but it's probably not worth96* the hassle.97*/98entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);99if (entry) {100blob_finish(&blob);101pthread_mutex_unlock(&cache->mutex);102return;103}104105struct serialized_nir *snir =106ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);107memcpy(snir->sha1_key, sha1_key, 20);108snir->size = blob.size;109memcpy(snir->data, blob.data, blob.size);110111blob_finish(&blob);112113cache->nir_stats.count++;114if (debug_cache) {115char sha1buf[41];116_mesa_sha1_format(sha1buf, snir->sha1_key);117fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);118if (dump_stats)119cache_dump_stats(cache);120}121122_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);123124pthread_mutex_unlock(&cache->mutex);125}126127nir_shader*128v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,129struct v3dv_pipeline_cache *cache,130const nir_shader_compiler_options *nir_options,131unsigned char sha1_key[20])132{133if (!cache || !cache->nir_cache)134return NULL;135136if (debug_cache) {137char sha1buf[41];138_mesa_sha1_format(sha1buf, sha1_key);139140fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);141}142143const struct serialized_nir *snir = NULL;144145pthread_mutex_lock(&cache->mutex);146struct hash_entry *entry =147_mesa_hash_table_search(cache->nir_cache, sha1_key);148if (entry)149snir = entry->data;150pthread_mutex_unlock(&cache->mutex);151152if (snir) {153struct blob_reader blob;154blob_reader_init(&blob, snir->data, snir->size);155156/* We use context NULL as we want the p_stage to keep the reference to157* nir, as we keep open the possibility of provide a shader variant158* after cache creation159*/160nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);161if (blob.overrun) {162ralloc_free(nir);163} else {164cache->nir_stats.hit++;165if (debug_cache) {166fprintf(stderr, "\tnir cache hit: %p\n", nir);167if (dump_stats)168cache_dump_stats(cache);169}170return nir;171}172}173174cache->nir_stats.miss++;175if (debug_cache) {176fprintf(stderr, "\tnir cache miss\n");177if (dump_stats)178cache_dump_stats(cache);179}180181return NULL;182}183184void185v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,186struct v3dv_device *device,187bool cache_enabled)188{189cache->device = device;190pthread_mutex_init(&cache->mutex, NULL);191192if (cache_enabled) {193cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,194sha1_compare_func);195cache->nir_stats.miss = 0;196cache->nir_stats.hit = 0;197cache->nir_stats.count = 0;198199cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,200sha1_compare_func);201cache->stats.miss = 0;202cache->stats.hit = 0;203cache->stats.count = 0;204} else {205cache->nir_cache = NULL;206cache->cache = NULL;207}208209}210211static struct v3dv_pipeline_shared_data *212v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,213struct blob_reader *blob);214215static void216pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,217struct v3dv_pipeline_shared_data *shared_data,218bool from_disk_cache);219220static bool221v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,222struct blob *blob);223224/**225* It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with226* it, or NULL if doesn't have it cached. On the former, it will increases the227* ref_count, so caller is responsible to unref it.228*/229struct v3dv_pipeline_shared_data *230v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,231unsigned char sha1_key[20])232{233if (!cache || !cache->cache)234return NULL;235236if (debug_cache) {237char sha1buf[41];238_mesa_sha1_format(sha1buf, sha1_key);239240fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);241}242243pthread_mutex_lock(&cache->mutex);244245struct hash_entry *entry =246_mesa_hash_table_search(cache->cache, sha1_key);247248if (entry) {249struct v3dv_pipeline_shared_data *cache_entry =250(struct v3dv_pipeline_shared_data *) entry->data;251assert(cache_entry);252253cache->stats.hit++;254if (debug_cache) {255fprintf(stderr, "\tcache hit: %p\n", cache_entry);256if (dump_stats)257cache_dump_stats(cache);258}259260261v3dv_pipeline_shared_data_ref(cache_entry);262263pthread_mutex_unlock(&cache->mutex);264265return cache_entry;266}267268cache->stats.miss++;269if (debug_cache) {270fprintf(stderr, "\tcache miss\n");271if (dump_stats)272cache_dump_stats(cache);273}274275pthread_mutex_unlock(&cache->mutex);276277#ifdef ENABLE_SHADER_CACHE278struct v3dv_device *device = cache->device;279struct disk_cache *disk_cache = device->pdevice->disk_cache;280/* Note that the on-disk-cache can be independently disabled, while keeping281* the pipeline cache working, by using the environment variable282* MESA_GLSL_CACHE_DISABLE. In that case the calls to disk_cache_put/get283* will not do anything.284*/285if (disk_cache && device->instance->pipeline_cache_enabled) {286cache_key cache_key;287disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);288289size_t buffer_size;290uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);291if (buffer) {292struct blob_reader blob;293struct v3dv_pipeline_shared_data *shared_data;294295if (debug_cache)296fprintf(stderr, "\ton-disk-cache hit\n");297298blob_reader_init(&blob, buffer, buffer_size);299shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);300free(buffer);301302if (shared_data) {303if (cache)304pipeline_cache_upload_shared_data(cache, shared_data, true);305return shared_data;306}307} else {308if (debug_cache)309fprintf(stderr, "\ton-disk-cache miss\n");310}311}312#endif313314return NULL;315}316317void318v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,319struct v3dv_pipeline_shared_data *shared_data)320{321assert(shared_data->ref_cnt == 0);322323for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {324if (shared_data->variants[stage] != NULL)325v3dv_shader_variant_destroy(device, shared_data->variants[stage]);326327/* We don't free binning descriptor maps as we are sharing them328* with the render shaders.329*/330if (shared_data->maps[stage] != NULL &&331!broadcom_shader_stage_is_binning(stage)) {332vk_free(&device->vk.alloc, shared_data->maps[stage]);333}334}335336if (shared_data->assembly_bo)337v3dv_bo_free(device, shared_data->assembly_bo);338339vk_free(&device->vk.alloc, shared_data);340}341342static struct v3dv_pipeline_shared_data *343v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,344const unsigned char sha1_key[20],345struct v3dv_descriptor_maps **maps,346struct v3dv_shader_variant **variants,347const uint64_t *total_assembly,348const uint32_t total_assembly_size)349{350size_t size = sizeof(struct v3dv_pipeline_shared_data);351/* We create new_entry using the device alloc. Right now shared_data is ref352* and unref by both the pipeline and the pipeline cache, so we can't353* ensure that the cache or pipeline alloc will be available on the last354* unref.355*/356struct v3dv_pipeline_shared_data *new_entry =357vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,358VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);359360if (new_entry == NULL)361return NULL;362363new_entry->ref_cnt = 1;364memcpy(new_entry->sha1_key, sha1_key, 20);365366for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {367new_entry->maps[stage] = maps[stage];368new_entry->variants[stage] = variants[stage];369}370371struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,372"pipeline shader assembly", true);373if (!bo) {374fprintf(stderr, "failed to allocate memory for shaders assembly\n");375v3dv_pipeline_shared_data_unref(cache->device, new_entry);376return NULL;377}378379bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);380if (!ok) {381fprintf(stderr, "failed to map source shader buffer\n");382v3dv_pipeline_shared_data_unref(cache->device, new_entry);383return NULL;384}385386memcpy(bo->map, total_assembly, total_assembly_size);387388new_entry->assembly_bo = bo;389390return new_entry;391}392393static void394pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,395struct v3dv_pipeline_shared_data *shared_data,396bool from_disk_cache)397{398assert(shared_data);399400if (!cache || !cache->cache)401return;402403if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)404return;405406pthread_mutex_lock(&cache->mutex);407struct hash_entry *entry =408_mesa_hash_table_search(cache->cache, shared_data->sha1_key);409410if (entry) {411pthread_mutex_unlock(&cache->mutex);412return;413}414415v3dv_pipeline_shared_data_ref(shared_data);416_mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);417cache->stats.count++;418if (debug_cache) {419char sha1buf[41];420_mesa_sha1_format(sha1buf, shared_data->sha1_key);421422fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",423cache, sha1buf, shared_data);424if (dump_stats)425cache_dump_stats(cache);426}427428pthread_mutex_unlock(&cache->mutex);429430#ifdef ENABLE_SHADER_CACHE431/* If we are being called from a on-disk-cache hit, we can skip writing to432* the disk cache433*/434if (from_disk_cache)435return;436437struct v3dv_device *device = cache->device;438struct disk_cache *disk_cache = device->pdevice->disk_cache;439if (disk_cache) {440struct blob binary;441blob_init(&binary);442if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {443cache_key cache_key;444disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);445446disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);447if (debug_cache) {448char sha1buf[41];449_mesa_sha1_format(sha1buf, shared_data->sha1_key);450451fprintf(stderr, "on-disk-cache, new cache entry with sha1 key %s:%p\n\n",452sha1buf, shared_data);453}454}455456blob_finish(&binary);457}458#endif459}460461/* Uploads all the "cacheable" or shared data from the pipeline */462void463v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,464struct v3dv_pipeline_cache *cache)465{466pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);467}468469static struct serialized_nir*470serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,471struct blob_reader *blob)472{473const unsigned char *sha1_key = blob_read_bytes(blob, 20);474uint32_t snir_size = blob_read_uint32(blob);475const char* snir_data = blob_read_bytes(blob, snir_size);476if (blob->overrun)477return NULL;478479struct serialized_nir *snir =480ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);481memcpy(snir->sha1_key, sha1_key, 20);482snir->size = snir_size;483memcpy(snir->data, snir_data, snir_size);484485return snir;486}487488static struct v3dv_shader_variant*489shader_variant_create_from_blob(struct v3dv_device *device,490struct blob_reader *blob)491{492VkResult result;493494enum broadcom_shader_stage stage = blob_read_uint32(blob);495496uint32_t prog_data_size = blob_read_uint32(blob);497/* FIXME: as we include the stage perhaps we can avoid prog_data_size? */498assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));499500const void *prog_data = blob_read_bytes(blob, prog_data_size);501if (blob->overrun)502return NULL;503504uint32_t ulist_count = blob_read_uint32(blob);505uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;506const void *contents_data = blob_read_bytes(blob, contents_size);507if (blob->overrun)508return NULL;509510uint ulist_data_size = sizeof(uint32_t) * ulist_count;511const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);512if (blob->overrun)513return NULL;514515uint32_t assembly_offset = blob_read_uint32(blob);516uint32_t qpu_insts_size = blob_read_uint32(blob);517518/* shader_variant_create expects a newly created prog_data for their own,519* as it is what the v3d compiler returns. So we are also allocating one520* (including the uniform list) and filled it up with the data that we read521* from the blob522*/523struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);524memcpy(new_prog_data, prog_data, prog_data_size);525struct v3d_uniform_list *ulist = &new_prog_data->uniforms;526ulist->count = ulist_count;527ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);528memcpy(ulist->contents, contents_data, contents_size);529ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);530memcpy(ulist->data, ulist_data_data, ulist_data_size);531532return v3dv_shader_variant_create(device, stage,533new_prog_data, prog_data_size,534assembly_offset,535NULL, qpu_insts_size,536&result);537}538539static struct v3dv_pipeline_shared_data *540v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,541struct blob_reader *blob)542{543const unsigned char *sha1_key = blob_read_bytes(blob, 20);544545struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };546547uint8_t descriptor_maps_count = blob_read_uint8(blob);548for (uint8_t count = 0; count < descriptor_maps_count; count++) {549uint8_t stage = blob_read_uint8(blob);550551const struct v3dv_descriptor_maps *current_maps =552blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));553554if (blob->overrun)555return NULL;556557maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,558sizeof(struct v3dv_descriptor_maps), 8,559VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);560561if (maps[stage] == NULL)562return NULL;563564memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));565if (broadcom_shader_stage_is_render_with_binning(stage)) {566enum broadcom_shader_stage bin_stage =567broadcom_binning_shader_stage_for_render_stage(stage);568maps[bin_stage] = maps[stage];569}570}571572uint8_t variant_count = blob_read_uint8(blob);573574struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };575576for (uint8_t count = 0; count < variant_count; count++) {577uint8_t stage = blob_read_uint8(blob);578struct v3dv_shader_variant *variant =579shader_variant_create_from_blob(cache->device, blob);580variants[stage] = variant;581}582583uint32_t total_assembly_size = blob_read_uint32(blob);584const uint64_t *total_assembly =585blob_read_bytes(blob, total_assembly_size);586587if (blob->overrun)588return NULL;589590return v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,591total_assembly, total_assembly_size);592}593594static void595pipeline_cache_load(struct v3dv_pipeline_cache *cache,596size_t size,597const void *data)598{599struct v3dv_device *device = cache->device;600struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;601struct vk_pipeline_cache_header header;602603if (cache->cache == NULL || cache->nir_cache == NULL)604return;605606struct blob_reader blob;607blob_reader_init(&blob, data, size);608609blob_copy_bytes(&blob, &header, sizeof(header));610if (size < sizeof(header))611return;612memcpy(&header, data, sizeof(header));613if (header.header_size < sizeof(header))614return;615if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)616return;617if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))618return;619if (header.device_id != v3dv_physical_device_device_id(pdevice))620return;621if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)622return;623624uint32_t nir_count = blob_read_uint32(&blob);625if (blob.overrun)626return;627628for (uint32_t i = 0; i < nir_count; i++) {629struct serialized_nir *snir =630serialized_nir_create_from_blob(cache, &blob);631632if (!snir)633break;634635_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);636cache->nir_stats.count++;637}638639uint32_t count = blob_read_uint32(&blob);640if (blob.overrun)641return;642643for (uint32_t i = 0; i < count; i++) {644struct v3dv_pipeline_shared_data *cache_entry =645v3dv_pipeline_shared_data_create_from_blob(cache, &blob);646if (!cache_entry)647break;648649_mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);650cache->stats.count++;651}652653if (debug_cache) {654fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "655"%i entries\n", cache, nir_count, count);656if (dump_stats)657cache_dump_stats(cache);658}659}660661VKAPI_ATTR VkResult VKAPI_CALL662v3dv_CreatePipelineCache(VkDevice _device,663const VkPipelineCacheCreateInfo *pCreateInfo,664const VkAllocationCallbacks *pAllocator,665VkPipelineCache *pPipelineCache)666{667V3DV_FROM_HANDLE(v3dv_device, device, _device);668struct v3dv_pipeline_cache *cache;669670assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);671assert(pCreateInfo->flags == 0);672673cache = vk_object_zalloc(&device->vk, pAllocator,674sizeof(*cache),675VK_OBJECT_TYPE_PIPELINE_CACHE);676677if (cache == NULL)678return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);679680v3dv_pipeline_cache_init(cache, device,681device->instance->pipeline_cache_enabled);682683if (pCreateInfo->initialDataSize > 0) {684pipeline_cache_load(cache,685pCreateInfo->initialDataSize,686pCreateInfo->pInitialData);687}688689*pPipelineCache = v3dv_pipeline_cache_to_handle(cache);690691return VK_SUCCESS;692}693694void695v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)696{697pthread_mutex_destroy(&cache->mutex);698699if (dump_stats_on_destroy)700cache_dump_stats(cache);701702if (cache->nir_cache) {703hash_table_foreach(cache->nir_cache, entry)704ralloc_free(entry->data);705706_mesa_hash_table_destroy(cache->nir_cache, NULL);707}708709if (cache->cache) {710hash_table_foreach(cache->cache, entry) {711struct v3dv_pipeline_shared_data *cache_entry = entry->data;712if (cache_entry)713v3dv_pipeline_shared_data_unref(cache->device, cache_entry);714}715716_mesa_hash_table_destroy(cache->cache, NULL);717}718}719720VKAPI_ATTR void VKAPI_CALL721v3dv_DestroyPipelineCache(VkDevice _device,722VkPipelineCache _cache,723const VkAllocationCallbacks *pAllocator)724{725V3DV_FROM_HANDLE(v3dv_device, device, _device);726V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);727728if (!cache)729return;730731v3dv_pipeline_cache_finish(cache);732733vk_object_free(&device->vk, pAllocator, cache);734}735736VKAPI_ATTR VkResult VKAPI_CALL737v3dv_MergePipelineCaches(VkDevice device,738VkPipelineCache dstCache,739uint32_t srcCacheCount,740const VkPipelineCache *pSrcCaches)741{742V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);743744if (!dst->cache || !dst->nir_cache)745return VK_SUCCESS;746747for (uint32_t i = 0; i < srcCacheCount; i++) {748V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);749if (!src->cache || !src->nir_cache)750continue;751752hash_table_foreach(src->nir_cache, entry) {753struct serialized_nir *src_snir = entry->data;754assert(src_snir);755756if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))757continue;758759/* FIXME: we are using serialized nir shaders because they are760* convenient to create and store on the cache, but requires to do a761* copy here (and some other places) of the serialized NIR. Perhaps762* it would make sense to move to handle the NIR shaders with shared763* structures with ref counts, as the variants.764*/765struct serialized_nir *snir_dst =766ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);767memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);768snir_dst->size = src_snir->size;769memcpy(snir_dst->data, src_snir->data, src_snir->size);770771_mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);772dst->nir_stats.count++;773if (debug_cache) {774char sha1buf[41];775_mesa_sha1_format(sha1buf, snir_dst->sha1_key);776777fprintf(stderr, "pipeline cache %p, added nir entry %s "778"from pipeline cache %p\n",779dst, sha1buf, src);780if (dump_stats)781cache_dump_stats(dst);782}783}784785hash_table_foreach(src->cache, entry) {786struct v3dv_pipeline_shared_data *cache_entry = entry->data;787assert(cache_entry);788789if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))790continue;791792v3dv_pipeline_shared_data_ref(cache_entry);793_mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);794795dst->stats.count++;796if (debug_cache) {797char sha1buf[41];798_mesa_sha1_format(sha1buf, cache_entry->sha1_key);799800fprintf(stderr, "pipeline cache %p, added entry %s "801"from pipeline cache %p\n",802dst, sha1buf, src);803if (dump_stats)804cache_dump_stats(dst);805}806}807}808809return VK_SUCCESS;810}811812static bool813shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,814struct blob *blob)815{816blob_write_uint32(blob, variant->stage);817818blob_write_uint32(blob, variant->prog_data_size);819blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);820821struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;822blob_write_uint32(blob, ulist->count);823blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);824blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);825826blob_write_uint32(blob, variant->assembly_offset);827blob_write_uint32(blob, variant->qpu_insts_size);828829return !blob->out_of_memory;830}831832static bool833v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,834struct blob *blob)835{836blob_write_bytes(blob, cache_entry->sha1_key, 20);837838uint8_t descriptor_maps_count = 0;839for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {840if (broadcom_shader_stage_is_binning(stage))841continue;842if (cache_entry->maps[stage] == NULL)843continue;844descriptor_maps_count++;845}846847/* Compute pipelines only have one descriptor map,848* graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning849* stages take the descriptor map from the render stage.850*/851assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||852(descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));853blob_write_uint8(blob, descriptor_maps_count);854855for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {856if (cache_entry->maps[stage] == NULL)857continue;858if (broadcom_shader_stage_is_binning(stage))859continue;860861blob_write_uint8(blob, stage);862blob_write_bytes(blob, cache_entry->maps[stage],863sizeof(struct v3dv_descriptor_maps));864}865866uint8_t variant_count = 0;867for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {868if (cache_entry->variants[stage] == NULL)869continue;870variant_count++;871}872873/* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and874* compute pipelines only have 1.875*/876assert((variant_count == 5 || variant_count == 3) ||877(variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));878blob_write_uint8(blob, variant_count);879880uint32_t total_assembly_size = 0;881for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {882if (cache_entry->variants[stage] == NULL)883continue;884885blob_write_uint8(blob, stage);886if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))887return false;888889total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;890}891blob_write_uint32(blob, total_assembly_size);892893assert(cache_entry->assembly_bo->map);894assert(cache_entry->assembly_bo->size >= total_assembly_size);895blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);896897return !blob->out_of_memory;898}899900901VKAPI_ATTR VkResult VKAPI_CALL902v3dv_GetPipelineCacheData(VkDevice _device,903VkPipelineCache _cache,904size_t *pDataSize,905void *pData)906{907V3DV_FROM_HANDLE(v3dv_device, device, _device);908V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);909910struct blob blob;911if (pData) {912blob_init_fixed(&blob, pData, *pDataSize);913} else {914blob_init_fixed(&blob, NULL, SIZE_MAX);915}916917struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;918VkResult result = VK_INCOMPLETE;919920pthread_mutex_lock(&cache->mutex);921922struct vk_pipeline_cache_header header = {923.header_size = sizeof(struct vk_pipeline_cache_header),924.header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,925.vendor_id = v3dv_physical_device_vendor_id(pdevice),926.device_id = v3dv_physical_device_device_id(pdevice),927};928memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);929blob_write_bytes(&blob, &header, sizeof(header));930931uint32_t nir_count = 0;932intptr_t nir_count_offset = blob_reserve_uint32(&blob);933if (nir_count_offset < 0) {934*pDataSize = 0;935goto done;936}937938if (cache->nir_cache) {939hash_table_foreach(cache->nir_cache, entry) {940const struct serialized_nir *snir = entry->data;941942size_t save_size = blob.size;943944blob_write_bytes(&blob, snir->sha1_key, 20);945blob_write_uint32(&blob, snir->size);946blob_write_bytes(&blob, snir->data, snir->size);947948if (blob.out_of_memory) {949blob.size = save_size;950goto done;951}952953nir_count++;954}955}956blob_overwrite_uint32(&blob, nir_count_offset, nir_count);957958uint32_t count = 0;959intptr_t count_offset = blob_reserve_uint32(&blob);960if (count_offset < 0) {961*pDataSize = 0;962goto done;963}964965if (cache->cache) {966hash_table_foreach(cache->cache, entry) {967struct v3dv_pipeline_shared_data *cache_entry = entry->data;968969size_t save_size = blob.size;970if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {971/* If it fails reset to the previous size and bail */972blob.size = save_size;973goto done;974}975976count++;977}978}979980blob_overwrite_uint32(&blob, count_offset, count);981982*pDataSize = blob.size;983984result = VK_SUCCESS;985986if (debug_cache) {987assert(count <= cache->stats.count);988fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "989"%i nir shader entries "990"%i entries, %u DataSize\n",991cache, nir_count, count, (uint32_t) *pDataSize);992}993994done:995blob_finish(&blob);996997pthread_mutex_unlock(&cache->mutex);998999return result;1000}100110021003