Path: blob/21.2-virgl/src/freedreno/vulkan/tu_pipeline_cache.c
4565 views
/*1* Copyright © 2015 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*/2223#include "tu_private.h"2425#include "util/debug.h"26#include "util/disk_cache.h"27#include "util/mesa-sha1.h"28#include "util/u_atomic.h"29#include "vulkan/util/vk_util.h"3031struct cache_entry_variant_info32{33};3435struct cache_entry36{37union {38unsigned char sha1[20];39uint32_t sha1_dw[5];40};41uint32_t code_sizes[MESA_SHADER_STAGES];42struct tu_shader_variant *variants[MESA_SHADER_STAGES];43char code[0];44};4546static void47tu_pipeline_cache_init(struct tu_pipeline_cache *cache,48struct tu_device *device)49{50cache->device = device;51pthread_mutex_init(&cache->mutex, NULL);5253cache->modified = false;54cache->kernel_count = 0;55cache->total_size = 0;56cache->table_size = 1024;57const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);58cache->hash_table = malloc(byte_size);5960/* We don't consider allocation failure fatal, we just start with a 0-sized61* cache. Disable caching when we want to keep shader debug info, since62* we don't get the debug info on cached shaders. */63if (cache->hash_table == NULL)64cache->table_size = 0;65else66memset(cache->hash_table, 0, byte_size);67}6869static void70tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)71{72for (unsigned i = 0; i < cache->table_size; ++i)73if (cache->hash_table[i]) {74vk_free(&cache->alloc, cache->hash_table[i]);75}76pthread_mutex_destroy(&cache->mutex);77free(cache->hash_table);78}7980static uint32_t81entry_size(struct cache_entry *entry)82{83size_t ret = sizeof(*entry);84for (int i = 0; i < MESA_SHADER_STAGES; ++i)85if (entry->code_sizes[i])86ret +=87sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];88return ret;89}9091static struct cache_entry *92tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,93const unsigned char *sha1)94{95const uint32_t mask = cache->table_size - 1;96const uint32_t start = (*(uint32_t *) sha1);9798if (cache->table_size == 0)99return NULL;100101for (uint32_t i = 0; i < cache->table_size; i++) {102const uint32_t index = (start + i) & mask;103struct cache_entry *entry = cache->hash_table[index];104105if (!entry)106return NULL;107108if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {109return entry;110}111}112113unreachable("hash table should never be full");114}115116static struct cache_entry *117tu_pipeline_cache_search(struct tu_pipeline_cache *cache,118const unsigned char *sha1)119{120struct cache_entry *entry;121122pthread_mutex_lock(&cache->mutex);123124entry = tu_pipeline_cache_search_unlocked(cache, sha1);125126pthread_mutex_unlock(&cache->mutex);127128return entry;129}130131static void132tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,133struct cache_entry *entry)134{135const uint32_t mask = cache->table_size - 1;136const uint32_t start = entry->sha1_dw[0];137138/* We'll always be able to insert when we get here. */139assert(cache->kernel_count < cache->table_size / 2);140141for (uint32_t i = 0; i < cache->table_size; i++) {142const uint32_t index = (start + i) & mask;143if (!cache->hash_table[index]) {144cache->hash_table[index] = entry;145break;146}147}148149cache->total_size += entry_size(entry);150cache->kernel_count++;151}152153static VkResult154tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)155{156const uint32_t table_size = cache->table_size * 2;157const uint32_t old_table_size = cache->table_size;158const size_t byte_size = table_size * sizeof(cache->hash_table[0]);159struct cache_entry **table;160struct cache_entry **old_table = cache->hash_table;161162table = malloc(byte_size);163if (table == NULL)164return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);165166cache->hash_table = table;167cache->table_size = table_size;168cache->kernel_count = 0;169cache->total_size = 0;170171memset(cache->hash_table, 0, byte_size);172for (uint32_t i = 0; i < old_table_size; i++) {173struct cache_entry *entry = old_table[i];174if (!entry)175continue;176177tu_pipeline_cache_set_entry(cache, entry);178}179180free(old_table);181182return VK_SUCCESS;183}184185static void186tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,187struct cache_entry *entry)188{189if (cache->kernel_count == cache->table_size / 2)190tu_pipeline_cache_grow(cache);191192/* Failing to grow that hash table isn't fatal, but may mean we don't193* have enough space to add this new kernel. Only add it if there's room.194*/195if (cache->kernel_count < cache->table_size / 2)196tu_pipeline_cache_set_entry(cache, entry);197}198199static void200tu_pipeline_cache_load(struct tu_pipeline_cache *cache,201const void *data,202size_t size)203{204struct tu_device *device = cache->device;205struct vk_pipeline_cache_header header;206207if (size < sizeof(header))208return;209memcpy(&header, data, sizeof(header));210if (header.header_size < sizeof(header))211return;212if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)213return;214if (header.vendor_id != 0 /* TODO */)215return;216if (header.device_id != 0 /* TODO */)217return;218if (memcmp(header.uuid, device->physical_device->cache_uuid,219VK_UUID_SIZE) != 0)220return;221222char *end = (void *) data + size;223char *p = (void *) data + header.header_size;224225while (end - p >= sizeof(struct cache_entry)) {226struct cache_entry *entry = (struct cache_entry *) p;227struct cache_entry *dest_entry;228size_t size = entry_size(entry);229if (end - p < size)230break;231232dest_entry =233vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);234if (dest_entry) {235memcpy(dest_entry, entry, size);236for (int i = 0; i < MESA_SHADER_STAGES; ++i)237dest_entry->variants[i] = NULL;238tu_pipeline_cache_add_entry(cache, dest_entry);239}240p += size;241}242}243244VKAPI_ATTR VkResult VKAPI_CALL245tu_CreatePipelineCache(VkDevice _device,246const VkPipelineCacheCreateInfo *pCreateInfo,247const VkAllocationCallbacks *pAllocator,248VkPipelineCache *pPipelineCache)249{250TU_FROM_HANDLE(tu_device, device, _device);251struct tu_pipeline_cache *cache;252253assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);254assert(pCreateInfo->flags == 0);255256cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache),257VK_OBJECT_TYPE_PIPELINE_CACHE);258if (cache == NULL)259return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);260261if (pAllocator)262cache->alloc = *pAllocator;263else264cache->alloc = device->vk.alloc;265266tu_pipeline_cache_init(cache, device);267268if (pCreateInfo->initialDataSize > 0) {269tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,270pCreateInfo->initialDataSize);271}272273*pPipelineCache = tu_pipeline_cache_to_handle(cache);274275return VK_SUCCESS;276}277278VKAPI_ATTR void VKAPI_CALL279tu_DestroyPipelineCache(VkDevice _device,280VkPipelineCache _cache,281const VkAllocationCallbacks *pAllocator)282{283TU_FROM_HANDLE(tu_device, device, _device);284TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);285286if (!cache)287return;288tu_pipeline_cache_finish(cache);289290vk_object_free(&device->vk, pAllocator, cache);291}292293VKAPI_ATTR VkResult VKAPI_CALL294tu_GetPipelineCacheData(VkDevice _device,295VkPipelineCache _cache,296size_t *pDataSize,297void *pData)298{299TU_FROM_HANDLE(tu_device, device, _device);300TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);301struct vk_pipeline_cache_header *header;302VkResult result = VK_SUCCESS;303304pthread_mutex_lock(&cache->mutex);305306const size_t size = sizeof(*header) + cache->total_size;307if (pData == NULL) {308pthread_mutex_unlock(&cache->mutex);309*pDataSize = size;310return VK_SUCCESS;311}312if (*pDataSize < sizeof(*header)) {313pthread_mutex_unlock(&cache->mutex);314*pDataSize = 0;315return VK_INCOMPLETE;316}317void *p = pData, *end = pData + *pDataSize;318header = p;319header->header_size = sizeof(*header);320header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;321header->vendor_id = 0 /* TODO */;322header->device_id = 0 /* TODO */;323memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);324p += header->header_size;325326struct cache_entry *entry;327for (uint32_t i = 0; i < cache->table_size; i++) {328if (!cache->hash_table[i])329continue;330entry = cache->hash_table[i];331const uint32_t size = entry_size(entry);332if (end < p + size) {333result = VK_INCOMPLETE;334break;335}336337memcpy(p, entry, size);338for (int j = 0; j < MESA_SHADER_STAGES; ++j)339((struct cache_entry *) p)->variants[j] = NULL;340p += size;341}342*pDataSize = p - pData;343344pthread_mutex_unlock(&cache->mutex);345return result;346}347348static void349tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,350struct tu_pipeline_cache *src)351{352for (uint32_t i = 0; i < src->table_size; i++) {353struct cache_entry *entry = src->hash_table[i];354if (!entry || tu_pipeline_cache_search(dst, entry->sha1))355continue;356357tu_pipeline_cache_add_entry(dst, entry);358359src->hash_table[i] = NULL;360}361}362363VKAPI_ATTR VkResult VKAPI_CALL364tu_MergePipelineCaches(VkDevice _device,365VkPipelineCache destCache,366uint32_t srcCacheCount,367const VkPipelineCache *pSrcCaches)368{369TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);370371for (uint32_t i = 0; i < srcCacheCount; i++) {372TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);373374tu_pipeline_cache_merge(dst, src);375}376377return VK_SUCCESS;378}379380381