/* * Copyright © 2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "util/mesa-sha1.h" #include "util/debug.h" #include "util/u_atomic.h" #include "radv_private.h" #include "ac_nir_to_llvm.h" struct cache_entry { union { unsigned char sha1[20]; uint32_t sha1_dw[5]; }; uint32_t code_size; struct ac_shader_variant_info variant_info; struct ac_shader_config config; uint32_t rsrc1, rsrc2; struct radv_shader_variant *variant; uint32_t code[0]; }; void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device) { cache->device = device; pthread_mutex_init(&cache->mutex, NULL); cache->modified = false; cache->kernel_count = 0; cache->total_size = 0; cache->table_size = 1024; const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); cache->hash_table = malloc(byte_size); /* We don't consider allocation failure fatal, we just start with a 0-sized * cache. */ if (cache->hash_table == NULL || (device->debug_flags & RADV_DEBUG_NO_CACHE)) cache->table_size = 0; else memset(cache->hash_table, 0, byte_size); } void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache) { for (unsigned i = 0; i < cache->table_size; ++i) if (cache->hash_table[i]) { if (cache->hash_table[i]->variant) radv_shader_variant_destroy(cache->device, cache->hash_table[i]->variant); vk_free(&cache->alloc, cache->hash_table[i]); } pthread_mutex_destroy(&cache->mutex); free(cache->hash_table); } static uint32_t entry_size(struct cache_entry *entry) { return sizeof(*entry) + entry->code_size; } void radv_hash_shader(unsigned char *hash, struct radv_shader_module *module, const char *entrypoint, const VkSpecializationInfo *spec_info, const struct radv_pipeline_layout *layout, const union ac_shader_variant_key *key, uint32_t is_geom_copy_shader) { struct mesa_sha1 ctx; _mesa_sha1_init(&ctx); if (key) _mesa_sha1_update(&ctx, key, sizeof(*key)); _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint)); if (layout) _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); if (spec_info) { _mesa_sha1_update(&ctx, spec_info->pMapEntries, spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize); } _mesa_sha1_update(&ctx, &is_geom_copy_shader, 4); _mesa_sha1_final(&ctx, hash); } static struct cache_entry * radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, const unsigned char *sha1) { const uint32_t mask = cache->table_size - 1; const uint32_t start = (*(uint32_t *) sha1); for (uint32_t i = 0; i < cache->table_size; i++) { const uint32_t index = (start + i) & mask; struct cache_entry *entry = cache->hash_table[index]; if (!entry) return NULL; if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { return entry; } } unreachable("hash table should never be full"); } static struct cache_entry * radv_pipeline_cache_search(struct radv_pipeline_cache *cache, const unsigned char *sha1) { struct cache_entry *entry; pthread_mutex_lock(&cache->mutex); entry = radv_pipeline_cache_search_unlocked(cache, sha1); pthread_mutex_unlock(&cache->mutex); return entry; } struct radv_shader_variant * radv_create_shader_variant_from_pipeline_cache(struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1) { struct cache_entry *entry = NULL; if (cache) entry = radv_pipeline_cache_search(cache, sha1); if (!entry) return NULL; if (!entry->variant) { struct radv_shader_variant *variant; variant = calloc(1, sizeof(struct radv_shader_variant)); if (!variant) return NULL; variant->config = entry->config; variant->info = entry->variant_info; variant->rsrc1 = entry->rsrc1; variant->rsrc2 = entry->rsrc2; variant->code_size = entry->code_size; variant->ref_count = 1; variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); void *ptr = device->ws->buffer_map(variant->bo); memcpy(ptr, entry->code, entry->code_size); device->ws->buffer_unmap(variant->bo); entry->variant = variant; } p_atomic_inc(&entry->variant->ref_count); return entry->variant; } static void radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry) { const uint32_t mask = cache->table_size - 1; const uint32_t start = entry->sha1_dw[0]; /* We'll always be able to insert when we get here. */ assert(cache->kernel_count < cache->table_size / 2); for (uint32_t i = 0; i < cache->table_size; i++) { const uint32_t index = (start + i) & mask; if (!cache->hash_table[index]) { cache->hash_table[index] = entry; break; } } cache->total_size += entry_size(entry); cache->kernel_count++; } static VkResult radv_pipeline_cache_grow(struct radv_pipeline_cache *cache) { const uint32_t table_size = cache->table_size * 2; const uint32_t old_table_size = cache->table_size; const size_t byte_size = table_size * sizeof(cache->hash_table[0]); struct cache_entry **table; struct cache_entry **old_table = cache->hash_table; table = malloc(byte_size); if (table == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; cache->hash_table = table; cache->table_size = table_size; cache->kernel_count = 0; cache->total_size = 0; memset(cache->hash_table, 0, byte_size); for (uint32_t i = 0; i < old_table_size; i++) { struct cache_entry *entry = old_table[i]; if (!entry) continue; radv_pipeline_cache_set_entry(cache, entry); } free(old_table); return VK_SUCCESS; } static void radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry) { if (cache->kernel_count == cache->table_size / 2) radv_pipeline_cache_grow(cache); /* Failing to grow that hash table isn't fatal, but may mean we don't * have enough space to add this new kernel. Only add it if there's room. */ if (cache->kernel_count < cache->table_size / 2) radv_pipeline_cache_set_entry(cache, entry); } struct radv_shader_variant * radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache, const unsigned char *sha1, struct radv_shader_variant *variant, const void *code, unsigned code_size) { if (!cache) return variant; pthread_mutex_lock(&cache->mutex); struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1); if (entry) { if (entry->variant) { radv_shader_variant_destroy(cache->device, variant); variant = entry->variant; } else { entry->variant = variant; } p_atomic_inc(&variant->ref_count); pthread_mutex_unlock(&cache->mutex); return variant; } entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); if (!entry) { pthread_mutex_unlock(&cache->mutex); return variant; } memcpy(entry->sha1, sha1, 20); memcpy(entry->code, code, code_size); entry->config = variant->config; entry->variant_info = variant->info; entry->rsrc1 = variant->rsrc1; entry->rsrc2 = variant->rsrc2; entry->code_size = code_size; entry->variant = variant; p_atomic_inc(&variant->ref_count); radv_pipeline_cache_add_entry(cache, entry); cache->modified = true; pthread_mutex_unlock(&cache->mutex); return variant; } struct cache_header { uint32_t header_size; uint32_t header_version; uint32_t vendor_id; uint32_t device_id; uint8_t uuid[VK_UUID_SIZE]; }; void radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size) { struct radv_device *device = cache->device; struct cache_header header; if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); if (header.header_size < sizeof(header)) return; if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) return; if (header.vendor_id != 0x1002) return; if (header.device_id != device->physical_device->rad_info.pci_id) return; if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 0) return; char *end = (void *) data + size; char *p = (void *) data + header.header_size; while (end - p >= sizeof(struct cache_entry)) { struct cache_entry *entry = (struct cache_entry*)p; struct cache_entry *dest_entry; if(end - p < sizeof(*entry) + entry->code_size) break; dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); if (dest_entry) { memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size); dest_entry->variant = NULL; radv_pipeline_cache_add_entry(cache, dest_entry); } p += sizeof (*entry) + entry->code_size; } } VkResult radv_CreatePipelineCache( VkDevice _device, const VkPipelineCacheCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache) { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_pipeline_cache *cache; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); assert(pCreateInfo->flags == 0); cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (cache == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); if (pAllocator) cache->alloc = *pAllocator; else cache->alloc = device->alloc; radv_pipeline_cache_init(cache, device); if (pCreateInfo->initialDataSize > 0) { radv_pipeline_cache_load(cache, pCreateInfo->pInitialData, pCreateInfo->initialDataSize); } *pPipelineCache = radv_pipeline_cache_to_handle(cache); return VK_SUCCESS; } void radv_DestroyPipelineCache( VkDevice _device, VkPipelineCache _cache, const VkAllocationCallbacks* pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); if (!cache) return; radv_pipeline_cache_finish(cache); vk_free2(&device->alloc, pAllocator, cache); } VkResult radv_GetPipelineCacheData( VkDevice _device, VkPipelineCache _cache, size_t* pDataSize, void* pData) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); struct cache_header *header; VkResult result = VK_SUCCESS; const size_t size = sizeof(*header) + cache->total_size; if (pData == NULL) { *pDataSize = size; return VK_SUCCESS; } if (*pDataSize < sizeof(*header)) { *pDataSize = 0; return VK_INCOMPLETE; } void *p = pData, *end = pData + *pDataSize; header = p; header->header_size = sizeof(*header); header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; header->vendor_id = 0x1002; header->device_id = device->physical_device->rad_info.pci_id; memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE); p += header->header_size; struct cache_entry *entry; for (uint32_t i = 0; i < cache->table_size; i++) { if (!cache->hash_table[i]) continue; entry = cache->hash_table[i]; const uint32_t size = entry_size(entry); if (end < p + size) { result = VK_INCOMPLETE; break; } memcpy(p, entry, size); ((struct cache_entry*)p)->variant = NULL; p += size; } *pDataSize = p - pData; return result; } static void radv_pipeline_cache_merge(struct radv_pipeline_cache *dst, struct radv_pipeline_cache *src) { for (uint32_t i = 0; i < src->table_size; i++) { struct cache_entry *entry = src->hash_table[i]; if (!entry || radv_pipeline_cache_search(dst, entry->sha1)) continue; radv_pipeline_cache_add_entry(dst, entry); src->hash_table[i] = NULL; } } VkResult radv_MergePipelineCaches( VkDevice _device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches) { RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache); for (uint32_t i = 0; i < srcCacheCount; i++) { RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]); radv_pipeline_cache_merge(dst, src); } return VK_SUCCESS; }