Path: blob/21.2-virgl/src/amd/vulkan/radv_pipeline_cache.c
7176 views
/*1* Copyright © 2015 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "util/debug.h"24#include "util/disk_cache.h"25#include "util/macros.h"26#include "util/mesa-sha1.h"27#include "util/u_atomic.h"28#include "vulkan/util/vk_util.h"29#include "radv_debug.h"30#include "radv_private.h"31#include "radv_shader.h"3233struct cache_entry {34union {35unsigned char sha1[20];36uint32_t sha1_dw[5];37};38uint32_t binary_sizes[MESA_SHADER_STAGES];39struct radv_shader_variant *variants[MESA_SHADER_STAGES];40char code[0];41};4243static void44radv_pipeline_cache_lock(struct radv_pipeline_cache *cache)45{46if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)47return;4849mtx_lock(&cache->mutex);50}5152static void53radv_pipeline_cache_unlock(struct radv_pipeline_cache *cache)54{55if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)56return;5758mtx_unlock(&cache->mutex);59}6061void62radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device)63{64cache->device = device;65mtx_init(&cache->mutex, mtx_plain);66cache->flags = 0;6768cache->modified = false;69cache->kernel_count = 0;70cache->total_size = 0;71cache->table_size = 1024;72const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);73cache->hash_table = malloc(byte_size);7475/* We don't consider allocation failure fatal, we just start with a 0-sized76* cache. Disable caching when we want to keep shader debug info, since77* we don't get the debug info on cached shaders. */78if (cache->hash_table == NULL || (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))79cache->table_size = 0;80else81memset(cache->hash_table, 0, byte_size);82}8384void85radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)86{87for (unsigned i = 0; i < cache->table_size; ++i)88if (cache->hash_table[i]) {89for (int j = 0; j < MESA_SHADER_STAGES; ++j) {90if (cache->hash_table[i]->variants[j])91radv_shader_variant_destroy(cache->device, cache->hash_table[i]->variants[j]);92}93vk_free(&cache->alloc, cache->hash_table[i]);94}95mtx_destroy(&cache->mutex);96free(cache->hash_table);97}9899static uint32_t100entry_size(struct cache_entry *entry)101{102size_t ret = sizeof(*entry);103for (int i = 0; i < MESA_SHADER_STAGES; ++i)104if (entry->binary_sizes[i])105ret += entry->binary_sizes[i];106ret = align(ret, alignof(struct cache_entry));107return ret;108}109110void111radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **stages,112const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key,113uint32_t flags)114{115struct mesa_sha1 ctx;116117_mesa_sha1_init(&ctx);118if (key)119_mesa_sha1_update(&ctx, key, sizeof(*key));120if (layout)121_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));122123for (int i = 0; i < MESA_SHADER_STAGES; ++i) {124if (stages[i]) {125RADV_FROM_HANDLE(vk_shader_module, module, stages[i]->module);126const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;127128_mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));129_mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));130if (spec_info && spec_info->mapEntryCount) {131_mesa_sha1_update(&ctx, spec_info->pMapEntries,132spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);133_mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);134}135}136}137_mesa_sha1_update(&ctx, &flags, 4);138_mesa_sha1_final(&ctx, hash);139}140141static struct cache_entry *142radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, const unsigned char *sha1)143{144const uint32_t mask = cache->table_size - 1;145const uint32_t start = (*(uint32_t *)sha1);146147if (cache->table_size == 0)148return NULL;149150for (uint32_t i = 0; i < cache->table_size; i++) {151const uint32_t index = (start + i) & mask;152struct cache_entry *entry = cache->hash_table[index];153154if (!entry)155return NULL;156157if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {158return entry;159}160}161162unreachable("hash table should never be full");163}164165static struct cache_entry *166radv_pipeline_cache_search(struct radv_pipeline_cache *cache, const unsigned char *sha1)167{168struct cache_entry *entry;169170radv_pipeline_cache_lock(cache);171172entry = radv_pipeline_cache_search_unlocked(cache, sha1);173174radv_pipeline_cache_unlock(cache);175176return entry;177}178179static void180radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry)181{182const uint32_t mask = cache->table_size - 1;183const uint32_t start = entry->sha1_dw[0];184185/* We'll always be able to insert when we get here. */186assert(cache->kernel_count < cache->table_size / 2);187188for (uint32_t i = 0; i < cache->table_size; i++) {189const uint32_t index = (start + i) & mask;190if (!cache->hash_table[index]) {191cache->hash_table[index] = entry;192break;193}194}195196cache->total_size += entry_size(entry);197cache->kernel_count++;198}199200static VkResult201radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)202{203const uint32_t table_size = cache->table_size * 2;204const uint32_t old_table_size = cache->table_size;205const size_t byte_size = table_size * sizeof(cache->hash_table[0]);206struct cache_entry **table;207struct cache_entry **old_table = cache->hash_table;208209table = malloc(byte_size);210if (table == NULL)211return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);212213cache->hash_table = table;214cache->table_size = table_size;215cache->kernel_count = 0;216cache->total_size = 0;217218memset(cache->hash_table, 0, byte_size);219for (uint32_t i = 0; i < old_table_size; i++) {220struct cache_entry *entry = old_table[i];221if (!entry)222continue;223224radv_pipeline_cache_set_entry(cache, entry);225}226227free(old_table);228229return VK_SUCCESS;230}231232static void233radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry)234{235if (cache->kernel_count == cache->table_size / 2)236radv_pipeline_cache_grow(cache);237238/* Failing to grow that hash table isn't fatal, but may mean we don't239* have enough space to add this new kernel. Only add it if there's room.240*/241if (cache->kernel_count < cache->table_size / 2)242radv_pipeline_cache_set_entry(cache, entry);243}244245static bool246radv_is_cache_disabled(struct radv_device *device)247{248/* Pipeline caches can be disabled with RADV_DEBUG=nocache, with249* MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.250*/251return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE);252}253254bool255radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,256struct radv_pipeline_cache *cache,257const unsigned char *sha1,258struct radv_shader_variant **variants,259bool *found_in_application_cache)260{261struct cache_entry *entry;262263if (!cache) {264cache = device->mem_cache;265*found_in_application_cache = false;266}267268radv_pipeline_cache_lock(cache);269270entry = radv_pipeline_cache_search_unlocked(cache, sha1);271272if (!entry) {273*found_in_application_cache = false;274275/* Don't cache when we want debug info, since this isn't276* present in the cache.277*/278if (radv_is_cache_disabled(device) || !device->physical_device->disk_cache) {279radv_pipeline_cache_unlock(cache);280return false;281}282283uint8_t disk_sha1[20];284disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20, disk_sha1);285286entry =287(struct cache_entry *)disk_cache_get(device->physical_device->disk_cache, disk_sha1, NULL);288if (!entry) {289radv_pipeline_cache_unlock(cache);290return false;291} else {292size_t size = entry_size(entry);293struct cache_entry *new_entry =294vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);295if (!new_entry) {296free(entry);297radv_pipeline_cache_unlock(cache);298return false;299}300301memcpy(new_entry, entry, entry_size(entry));302free(entry);303entry = new_entry;304305if (!(device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE) ||306cache != device->mem_cache)307radv_pipeline_cache_add_entry(cache, new_entry);308}309}310311char *p = entry->code;312for (int i = 0; i < MESA_SHADER_STAGES; ++i) {313if (!entry->variants[i] && entry->binary_sizes[i]) {314struct radv_shader_binary *binary = calloc(1, entry->binary_sizes[i]);315memcpy(binary, p, entry->binary_sizes[i]);316p += entry->binary_sizes[i];317318entry->variants[i] = radv_shader_variant_create(device, binary, false);319free(binary);320} else if (entry->binary_sizes[i]) {321p += entry->binary_sizes[i];322}323}324325memcpy(variants, entry->variants, sizeof(entry->variants));326327if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache)328vk_free(&cache->alloc, entry);329else {330for (int i = 0; i < MESA_SHADER_STAGES; ++i)331if (entry->variants[i])332p_atomic_inc(&entry->variants[i]->ref_count);333}334335radv_pipeline_cache_unlock(cache);336return true;337}338339void340radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache,341const unsigned char *sha1, struct radv_shader_variant **variants,342struct radv_shader_binary *const *binaries)343{344if (!cache)345cache = device->mem_cache;346347radv_pipeline_cache_lock(cache);348struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);349if (entry) {350for (int i = 0; i < MESA_SHADER_STAGES; ++i) {351if (entry->variants[i]) {352radv_shader_variant_destroy(cache->device, variants[i]);353variants[i] = entry->variants[i];354} else {355entry->variants[i] = variants[i];356}357if (variants[i])358p_atomic_inc(&variants[i]->ref_count);359}360radv_pipeline_cache_unlock(cache);361return;362}363364/* Don't cache when we want debug info, since this isn't365* present in the cache.366*/367if (radv_is_cache_disabled(device)) {368radv_pipeline_cache_unlock(cache);369return;370}371372size_t size = sizeof(*entry);373for (int i = 0; i < MESA_SHADER_STAGES; ++i)374if (variants[i])375size += binaries[i]->total_size;376const size_t size_without_align = size;377size = align(size_without_align, alignof(struct cache_entry));378379entry = vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);380if (!entry) {381radv_pipeline_cache_unlock(cache);382return;383}384385memset(entry, 0, sizeof(*entry));386memcpy(entry->sha1, sha1, 20);387388char *p = entry->code;389390for (int i = 0; i < MESA_SHADER_STAGES; ++i) {391if (!variants[i])392continue;393394entry->binary_sizes[i] = binaries[i]->total_size;395396memcpy(p, binaries[i], binaries[i]->total_size);397p += binaries[i]->total_size;398}399400// Make valgrind happy by filling the alignment hole at the end.401assert(p == (char *)entry + size_without_align);402assert(sizeof(*entry) + (p - entry->code) == size_without_align);403memset((char *)entry + size_without_align, 0, size - size_without_align);404405/* Always add cache items to disk. This will allow collection of406* compiled shaders by third parties such as steam, even if the app407* implements its own pipeline cache.408*/409if (device->physical_device->disk_cache) {410uint8_t disk_sha1[20];411disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20, disk_sha1);412413disk_cache_put(device->physical_device->disk_cache, disk_sha1, entry, entry_size(entry),414NULL);415}416417if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache) {418vk_free2(&cache->alloc, NULL, entry);419radv_pipeline_cache_unlock(cache);420return;421}422423/* We delay setting the variant so we have reproducible disk cache424* items.425*/426for (int i = 0; i < MESA_SHADER_STAGES; ++i) {427if (!variants[i])428continue;429430entry->variants[i] = variants[i];431p_atomic_inc(&variants[i]->ref_count);432}433434radv_pipeline_cache_add_entry(cache, entry);435436cache->modified = true;437radv_pipeline_cache_unlock(cache);438return;439}440441bool442radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size)443{444struct radv_device *device = cache->device;445struct vk_pipeline_cache_header header;446447if (size < sizeof(header))448return false;449memcpy(&header, data, sizeof(header));450if (header.header_size < sizeof(header))451return false;452if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)453return false;454if (header.vendor_id != ATI_VENDOR_ID)455return false;456if (header.device_id != device->physical_device->rad_info.pci_id)457return false;458if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)459return false;460461char *end = (char *)data + size;462char *p = (char *)data + header.header_size;463464while (end - p >= sizeof(struct cache_entry)) {465struct cache_entry *entry = (struct cache_entry *)p;466struct cache_entry *dest_entry;467size_t size_of_entry = entry_size(entry);468if (end - p < size_of_entry)469break;470471dest_entry = vk_alloc(&cache->alloc, size_of_entry, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);472if (dest_entry) {473memcpy(dest_entry, entry, size_of_entry);474for (int i = 0; i < MESA_SHADER_STAGES; ++i)475dest_entry->variants[i] = NULL;476radv_pipeline_cache_add_entry(cache, dest_entry);477}478p += size_of_entry;479}480481return true;482}483484VkResult485radv_CreatePipelineCache(VkDevice _device, const VkPipelineCacheCreateInfo *pCreateInfo,486const VkAllocationCallbacks *pAllocator, VkPipelineCache *pPipelineCache)487{488RADV_FROM_HANDLE(radv_device, device, _device);489struct radv_pipeline_cache *cache;490491assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);492493cache = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*cache), 8,494VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);495if (cache == NULL)496return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);497498vk_object_base_init(&device->vk, &cache->base, VK_OBJECT_TYPE_PIPELINE_CACHE);499500if (pAllocator)501cache->alloc = *pAllocator;502else503cache->alloc = device->vk.alloc;504505radv_pipeline_cache_init(cache, device);506cache->flags = pCreateInfo->flags;507508if (pCreateInfo->initialDataSize > 0) {509radv_pipeline_cache_load(cache, pCreateInfo->pInitialData, pCreateInfo->initialDataSize);510}511512*pPipelineCache = radv_pipeline_cache_to_handle(cache);513514return VK_SUCCESS;515}516517void518radv_DestroyPipelineCache(VkDevice _device, VkPipelineCache _cache,519const VkAllocationCallbacks *pAllocator)520{521RADV_FROM_HANDLE(radv_device, device, _device);522RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);523524if (!cache)525return;526radv_pipeline_cache_finish(cache);527528vk_object_base_finish(&cache->base);529vk_free2(&device->vk.alloc, pAllocator, cache);530}531532VkResult533radv_GetPipelineCacheData(VkDevice _device, VkPipelineCache _cache, size_t *pDataSize, void *pData)534{535RADV_FROM_HANDLE(radv_device, device, _device);536RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);537struct vk_pipeline_cache_header *header;538VkResult result = VK_SUCCESS;539540radv_pipeline_cache_lock(cache);541542const size_t size = sizeof(*header) + cache->total_size;543if (pData == NULL) {544radv_pipeline_cache_unlock(cache);545*pDataSize = size;546return VK_SUCCESS;547}548if (*pDataSize < sizeof(*header)) {549radv_pipeline_cache_unlock(cache);550*pDataSize = 0;551return VK_INCOMPLETE;552}553void *p = pData, *end = (char *)pData + *pDataSize;554header = p;555header->header_size = align(sizeof(*header), alignof(struct cache_entry));556header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;557header->vendor_id = ATI_VENDOR_ID;558header->device_id = device->physical_device->rad_info.pci_id;559memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);560p = (char *)p + header->header_size;561562struct cache_entry *entry;563for (uint32_t i = 0; i < cache->table_size; i++) {564if (!cache->hash_table[i])565continue;566entry = cache->hash_table[i];567const uint32_t size_of_entry = entry_size(entry);568if ((char *)end < (char *)p + size_of_entry) {569result = VK_INCOMPLETE;570break;571}572573memcpy(p, entry, size_of_entry);574for (int j = 0; j < MESA_SHADER_STAGES; ++j)575((struct cache_entry *)p)->variants[j] = NULL;576p = (char *)p + size_of_entry;577}578*pDataSize = (char *)p - (char *)pData;579580radv_pipeline_cache_unlock(cache);581return result;582}583584static void585radv_pipeline_cache_merge(struct radv_pipeline_cache *dst, struct radv_pipeline_cache *src)586{587for (uint32_t i = 0; i < src->table_size; i++) {588struct cache_entry *entry = src->hash_table[i];589if (!entry || radv_pipeline_cache_search(dst, entry->sha1))590continue;591592radv_pipeline_cache_add_entry(dst, entry);593594src->hash_table[i] = NULL;595}596}597598VkResult599radv_MergePipelineCaches(VkDevice _device, VkPipelineCache destCache, uint32_t srcCacheCount,600const VkPipelineCache *pSrcCaches)601{602RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);603604for (uint32_t i = 0; i < srcCacheCount; i++) {605RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);606607radv_pipeline_cache_merge(dst, src);608}609610return VK_SUCCESS;611}612613614