diff options
Diffstat (limited to 'src/amd/vulkan/radv_pipeline_cache.c')
-rw-r--r-- | src/amd/vulkan/radv_pipeline_cache.c | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c new file mode 100644 index 00000000000..032a7e46040 --- /dev/null +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -0,0 +1,475 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/mesa-sha1.h" +#include "util/debug.h" +#include "radv_private.h" + +#include "ac_nir_to_llvm.h" + +struct cache_entry { + unsigned char sha1[20]; + uint32_t code_size; + struct ac_shader_variant_info variant_info; + struct ac_shader_config config; + uint32_t rsrc1, rsrc2; + struct radv_shader_variant *variant; + uint32_t code[0]; +}; + +void +radv_pipeline_cache_init(struct radv_pipeline_cache *cache, + struct radv_device *device) +{ + cache->device = device; + pthread_mutex_init(&cache->mutex, NULL); + + cache->modified = false; + cache->kernel_count = 0; + cache->total_size = 0; + cache->table_size = 1024; + const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); + cache->hash_table = malloc(byte_size); + + /* We don't consider allocation failure fatal, we just start with a 0-sized + * cache. */ + if (cache->hash_table == NULL || + !env_var_as_boolean("RADV_ENABLE_PIPELINE_CACHE", true)) + cache->table_size = 0; + else + memset(cache->hash_table, 0, byte_size); +} + +void +radv_pipeline_cache_finish(struct radv_pipeline_cache *cache) +{ + for (unsigned i = 0; i < cache->table_size; ++i) + if (cache->hash_table[i]) { + if (cache->hash_table[i]->variant) + radv_shader_variant_destroy(cache->device, + cache->hash_table[i]->variant); + radv_free(&cache->alloc, cache->hash_table[i]); + } + pthread_mutex_destroy(&cache->mutex); + free(cache->hash_table); +} + +static uint32_t +entry_size(struct cache_entry *entry) +{ + return sizeof(*entry) + entry->code_size; +} + +void +radv_hash_shader(unsigned char *hash, struct radv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info, + const struct radv_pipeline_layout *layout, + const union ac_shader_variant_key *key) +{ + struct mesa_sha1 *ctx; + + ctx = _mesa_sha1_init(); + if (key) + _mesa_sha1_update(ctx, key, sizeof(*key)); + _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); + if (layout) + _mesa_sha1_update(ctx, layout->sha1, sizeof(layout->sha1)); + if (spec_info) { + _mesa_sha1_update(ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); + _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); + } + _mesa_sha1_final(ctx, hash); +} + + +static struct cache_entry * +radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, + const unsigned char *sha1) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) sha1); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + struct cache_entry *entry = cache->hash_table[index]; + + if (!entry) + return NULL; + + if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { + return entry; + } + } + + unreachable("hash table should never be full"); +} + +static struct cache_entry * +radv_pipeline_cache_search(struct radv_pipeline_cache *cache, + const unsigned char *sha1) +{ + struct cache_entry *entry; + + pthread_mutex_lock(&cache->mutex); + + entry = radv_pipeline_cache_search_unlocked(cache, sha1); + + pthread_mutex_unlock(&cache->mutex); + + return entry; +} + +struct radv_shader_variant * +radv_create_shader_variant_from_pipeline_cache(struct radv_device *device, + struct radv_pipeline_cache *cache, + const unsigned char *sha1) +{ + struct cache_entry *entry = radv_pipeline_cache_search(cache, sha1); + + if (!entry) + return NULL; + + if (!entry->variant) { + struct radv_shader_variant *variant; + + variant = calloc(1, sizeof(struct radv_shader_variant)); + if (!variant) + return NULL; + + variant->config = entry->config; + variant->info = entry->variant_info; + variant->rsrc1 = entry->rsrc1; + variant->rsrc2 = entry->rsrc2; + variant->ref_count = 1; + + variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256, + RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); + + void *ptr = device->ws->buffer_map(variant->bo); + memcpy(ptr, entry->code, entry->code_size); + device->ws->buffer_unmap(variant->bo); + + entry->variant = variant; + } + + __sync_fetch_and_add(&entry->variant->ref_count, 1); + return entry->variant; +} + + +static void +radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache, + struct cache_entry *entry) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) entry->sha1); + + /* We'll always be able to insert when we get here. */ + assert(cache->kernel_count < cache->table_size / 2); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + if (!cache->hash_table[index]) { + cache->hash_table[index] = entry; + break; + } + } + + cache->total_size += entry_size(entry); + cache->kernel_count++; +} + + +static VkResult +radv_pipeline_cache_grow(struct radv_pipeline_cache *cache) +{ + const uint32_t table_size = cache->table_size * 2; + const uint32_t old_table_size = cache->table_size; + const size_t byte_size = table_size * sizeof(cache->hash_table[0]); + struct cache_entry **table; + struct cache_entry **old_table = cache->hash_table; + + table = malloc(byte_size); + if (table == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + cache->hash_table = table; + cache->table_size = table_size; + cache->kernel_count = 0; + cache->total_size = 0; + + memset(cache->hash_table, 0, byte_size); + for (uint32_t i = 0; i < old_table_size; i++) { + struct cache_entry *entry = old_table[i]; + if (!entry) + continue; + + radv_pipeline_cache_set_entry(cache, entry); + } + + free(old_table); + + return VK_SUCCESS; +} + +static void +radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache, + struct cache_entry *entry) +{ + if (cache->kernel_count == cache->table_size / 2) + radv_pipeline_cache_grow(cache); + + /* Failing to grow that hash table isn't fatal, but may mean we don't + * have enough space to add this new kernel. Only add it if there's room. + */ + if (cache->kernel_count < cache->table_size / 2) + radv_pipeline_cache_set_entry(cache, entry); +} + +struct radv_shader_variant * +radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache, + const unsigned char *sha1, + struct radv_shader_variant *variant, + const void *code, unsigned code_size) +{ + pthread_mutex_lock(&cache->mutex); + struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1); + if (entry) { + if (entry->variant) { + radv_shader_variant_destroy(cache->device, variant); + variant = entry->variant; + } else { + entry->variant = variant; + } + __sync_fetch_and_add(&variant->ref_count, 1); + pthread_mutex_unlock(&cache->mutex); + return variant; + } + + entry = radv_alloc(&cache->alloc, sizeof(*entry) + code_size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_CACHE); + if (!entry) { + pthread_mutex_unlock(&cache->mutex); + return variant; + } + + memcpy(entry->sha1, sha1, 20); + memcpy(entry->code, code, code_size); + entry->config = variant->config; + entry->variant_info = variant->info; + entry->rsrc1 = variant->rsrc1; + entry->rsrc2 = variant->rsrc2; + entry->code_size = code_size; + entry->variant = variant; + __sync_fetch_and_add(&variant->ref_count, 1); + + radv_pipeline_cache_add_entry(cache, entry); + + cache->modified = true; + pthread_mutex_unlock(&cache->mutex); + return variant; +} + +struct cache_header { + uint32_t header_size; + uint32_t header_version; + uint32_t vendor_id; + uint32_t device_id; + uint8_t uuid[VK_UUID_SIZE]; +}; +void +radv_pipeline_cache_load(struct radv_pipeline_cache *cache, + const void *data, size_t size) +{ + struct radv_device *device = cache->device; + struct cache_header header; + uint8_t uuid[VK_UUID_SIZE]; + + if (size < sizeof(header)) + return; + memcpy(&header, data, sizeof(header)); + if (header.header_size < sizeof(header)) + return; + if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) + return; + if (header.vendor_id != 0x1002) + return; + if (header.device_id != device->instance->physicalDevice.rad_info.pci_id) + return; + radv_device_get_cache_uuid(uuid); + if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) + return; + + char *end = (void *) data + size; + char *p = (void *) data + header.header_size; + + while (end - p >= sizeof(struct cache_entry)) { + struct cache_entry *entry = (struct cache_entry*)p; + struct cache_entry *dest_entry; + if(end - p < sizeof(*entry) + entry->code_size) + break; + + dest_entry = radv_alloc(&cache->alloc, sizeof(*entry) + entry->code_size, + 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); + if (dest_entry) { + memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size); + dest_entry->variant = NULL; + radv_pipeline_cache_add_entry(cache, dest_entry); + } + p += sizeof (*entry) + entry->code_size; + } +} + +VkResult radv_CreatePipelineCache( + VkDevice _device, + const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineCache* pPipelineCache) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + struct radv_pipeline_cache *cache; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = radv_alloc2(&device->alloc, pAllocator, + sizeof(*cache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cache == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + cache->alloc = *pAllocator; + else + cache->alloc = device->alloc; + + radv_pipeline_cache_init(cache, device); + + if (pCreateInfo->initialDataSize > 0) { + radv_pipeline_cache_load(cache, + pCreateInfo->pInitialData, + pCreateInfo->initialDataSize); + } + + *pPipelineCache = radv_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +void radv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks* pAllocator) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); + + if (!cache) + return; + radv_pipeline_cache_finish(cache); + + radv_free2(&device->alloc, pAllocator, cache); +} + +VkResult radv_GetPipelineCacheData( + VkDevice _device, + VkPipelineCache _cache, + size_t* pDataSize, + void* pData) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); + struct cache_header *header; + VkResult result = VK_SUCCESS; + const size_t size = sizeof(*header) + cache->total_size; + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + if (*pDataSize < sizeof(*header)) { + *pDataSize = 0; + return VK_INCOMPLETE; + } + void *p = pData, *end = pData + *pDataSize; + header = p; + header->header_size = sizeof(*header); + header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; + header->vendor_id = 0x1002; + header->device_id = device->instance->physicalDevice.rad_info.pci_id; + radv_device_get_cache_uuid(header->uuid); + p += header->header_size; + + struct cache_entry *entry; + for (uint32_t i = 0; i < cache->table_size; i++) { + if (!cache->hash_table[i]) + continue; + entry = cache->hash_table[i]; + const uint32_t size = entry_size(entry); + if (end < p + size) { + result = VK_INCOMPLETE; + break; + } + + memcpy(p, entry, size); + ((struct cache_entry*)p)->variant = NULL; + p += size; + } + *pDataSize = p - pData; + + return result; +} + +static void +radv_pipeline_cache_merge(struct radv_pipeline_cache *dst, + struct radv_pipeline_cache *src) +{ + for (uint32_t i = 0; i < src->table_size; i++) { + struct cache_entry *entry = src->hash_table[i]; + if (!entry || radv_pipeline_cache_search(dst, entry->sha1)) + continue; + + radv_pipeline_cache_add_entry(dst, entry); + + src->hash_table[i] = NULL; + } +} + +VkResult radv_MergePipelineCaches( + VkDevice _device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache); + + for (uint32_t i = 0; i < srcCacheCount; i++) { + RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]); + + radv_pipeline_cache_merge(dst, src); + } + + return VK_SUCCESS; +} |