/* * Copyright © 2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "util/mesa-sha1.h" #include "util/hash_table.h" #include "util/debug.h" #include "anv_private.h" static size_t anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params, uint32_t key_size, uint32_t surface_count, uint32_t sampler_count) { const uint32_t binding_data_size = (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding); return align_u32(sizeof(struct anv_shader_bin), 8) + align_u32(prog_data_size, 8) + align_u32(nr_params * sizeof(void *), 8) + align_u32(sizeof(uint32_t) + key_size, 8) + align_u32(binding_data_size, 8); } struct anv_shader_bin * anv_shader_bin_create(struct anv_device *device, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, const void *prog_data_param, const struct anv_pipeline_bind_map *bind_map) { const size_t size = anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size, bind_map->surface_count, bind_map->sampler_count); struct anv_shader_bin *shader = vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!shader) return NULL; shader->ref_cnt = 1; shader->kernel = anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64); memcpy(shader->kernel.map, kernel_data, kernel_size); shader->kernel_size = kernel_size; shader->bind_map = *bind_map; shader->prog_data_size = prog_data_size; /* Now we fill out the floating data at the end */ void *data = shader; data += align_u32(sizeof(struct anv_shader_bin), 8); shader->prog_data = data; struct brw_stage_prog_data *new_prog_data = data; memcpy(data, prog_data, prog_data_size); data += align_u32(prog_data_size, 8); assert(prog_data->nr_pull_params == 0); assert(prog_data->nr_image_params == 0); new_prog_data->param = data; uint32_t param_size = prog_data->nr_params * sizeof(void *); memcpy(data, prog_data_param, param_size); data += align_u32(param_size, 8); shader->key = data; struct anv_shader_bin_key *key = data; key->size = key_size; memcpy(key->data, key_data, key_size); data += align_u32(sizeof(*key) + key_size, 8); shader->bind_map.surface_to_descriptor = data; memcpy(data, bind_map->surface_to_descriptor, bind_map->surface_count * sizeof(struct anv_pipeline_binding)); data += bind_map->surface_count * sizeof(struct anv_pipeline_binding); shader->bind_map.sampler_to_descriptor = data; memcpy(data, bind_map->sampler_to_descriptor, bind_map->sampler_count * sizeof(struct anv_pipeline_binding)); return shader; } void anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader) { assert(shader->ref_cnt == 0); anv_state_pool_free(&device->instruction_state_pool, shader->kernel); vk_free(&device->alloc, shader); } static size_t anv_shader_bin_data_size(const struct anv_shader_bin *shader) { return anv_shader_bin_size(shader->prog_data_size, shader->prog_data->nr_params, shader->key->size, shader->bind_map.surface_count, shader->bind_map.sampler_count) + align_u32(shader->kernel_size, 8); } static void anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data) { size_t struct_size = anv_shader_bin_size(shader->prog_data_size, shader->prog_data->nr_params, shader->key->size, shader->bind_map.surface_count, shader->bind_map.sampler_count); memcpy(data, shader, struct_size); data += struct_size; memcpy(data, shader->kernel.map, shader->kernel_size); } /* Remaining work: * * - Compact binding table layout so it's tight and not dependent on * descriptor set layout. * * - Review prog_data struct for size and cacheability: struct * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 * bit quantities etc; param, pull_param, and image_params are pointers, we * just need the compation map. use bit fields for all bools, eg * dual_src_blend. */ static uint32_t shader_bin_key_hash_func(const void *void_key) { const struct anv_shader_bin_key *key = void_key; return _mesa_hash_data(key->data, key->size); } static bool shader_bin_key_compare_func(const void *void_a, const void *void_b) { const struct anv_shader_bin_key *a = void_a, *b = void_b; if (a->size != b->size) return false; return memcmp(a->data, b->data, a->size) == 0; } void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device, bool cache_enabled) { cache->device = device; pthread_mutex_init(&cache->mutex, NULL); if (cache_enabled) { cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func, shader_bin_key_compare_func); } else { cache->cache = NULL; } } void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) { pthread_mutex_destroy(&cache->mutex); if (cache->cache) { /* This is a bit unfortunate. In order to keep things from randomly * going away, the shader cache has to hold a reference to all shader * binaries it contains. We unref them when we destroy the cache. */ struct hash_entry *entry; hash_table_foreach(cache->cache, entry) anv_shader_bin_unref(cache->device, entry->data); _mesa_hash_table_destroy(cache->cache, NULL); } } void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, struct anv_shader_module *module, const char *entrypoint, const struct anv_pipeline_layout *pipeline_layout, const VkSpecializationInfo *spec_info) { struct mesa_sha1 *ctx; ctx = _mesa_sha1_init(); _mesa_sha1_update(ctx, key, key_size); _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); if (pipeline_layout) { _mesa_sha1_update(ctx, pipeline_layout->sha1, sizeof(pipeline_layout->sha1)); } /* hash in shader stage, pipeline layout? */ if (spec_info) { _mesa_sha1_update(ctx, spec_info->pMapEntries, spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); } _mesa_sha1_final(ctx, hash); } static struct anv_shader_bin * anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size) { uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))]; struct anv_shader_bin_key *key = (void *)vla; key->size = key_size; memcpy(key->data, key_data, key_size); struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key); if (entry) return entry->data; else return NULL; } struct anv_shader_bin * anv_pipeline_cache_search(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size) { if (!cache->cache) return NULL; pthread_mutex_lock(&cache->mutex); struct anv_shader_bin *shader = anv_pipeline_cache_search_locked(cache, key_data, key_size); pthread_mutex_unlock(&cache->mutex); /* We increment refcount before handing it to the caller */ if (shader) anv_shader_bin_ref(shader); return shader; } static struct anv_shader_bin * anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, const void *prog_data_param, const struct anv_pipeline_bind_map *bind_map) { struct anv_shader_bin *shader = anv_pipeline_cache_search_locked(cache, key_data, key_size); if (shader) return shader; struct anv_shader_bin *bin = anv_shader_bin_create(cache->device, key_data, key_size, kernel_data, kernel_size, prog_data, prog_data_size, prog_data_param, bind_map); if (!bin) return NULL; _mesa_hash_table_insert(cache->cache, bin->key, bin); return bin; } struct anv_shader_bin * anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, const struct anv_pipeline_bind_map *bind_map) { if (cache->cache) { pthread_mutex_lock(&cache->mutex); struct anv_shader_bin *bin = anv_pipeline_cache_add_shader(cache, key_data, key_size, kernel_data, kernel_size, prog_data, prog_data_size, prog_data->param, bind_map); pthread_mutex_unlock(&cache->mutex); /* We increment refcount before handing it to the caller */ anv_shader_bin_ref(bin); return bin; } else { /* In this case, we're not caching it so the caller owns it entirely */ return anv_shader_bin_create(cache->device, key_data, key_size, kernel_data, kernel_size, prog_data, prog_data_size, prog_data->param, bind_map); } } struct cache_header { uint32_t header_size; uint32_t header_version; uint32_t vendor_id; uint32_t device_id; uint8_t uuid[VK_UUID_SIZE]; }; static void anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const void *data, size_t size) { struct anv_device *device = cache->device; struct anv_physical_device *pdevice = &device->instance->physicalDevice; struct cache_header header; if (cache->cache == NULL) return; if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); if (header.header_size < sizeof(header)) return; if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) return; if (header.vendor_id != 0x8086) return; if (header.device_id != device->chipset_id) return; if (memcmp(header.uuid, pdevice->uuid, VK_UUID_SIZE) != 0) return; const void *end = data + size; const void *p = data + header.header_size; /* Count is the total number of valid entries */ uint32_t count; if (p + sizeof(count) >= end) return; memcpy(&count, p, sizeof(count)); p += align_u32(sizeof(count), 8); for (uint32_t i = 0; i < count; i++) { struct anv_shader_bin bin; if (p + sizeof(bin) > end) break; memcpy(&bin, p, sizeof(bin)); p += align_u32(sizeof(struct anv_shader_bin), 8); const struct brw_stage_prog_data *prog_data = p; p += align_u32(bin.prog_data_size, 8); if (p > end) break; uint32_t param_size = prog_data->nr_params * sizeof(void *); const void *prog_data_param = p; p += align_u32(param_size, 8); struct anv_shader_bin_key key; if (p + sizeof(key) > end) break; memcpy(&key, p, sizeof(key)); const void *key_data = p + sizeof(key); p += align_u32(sizeof(key) + key.size, 8); /* We're going to memcpy this so getting rid of const is fine */ struct anv_pipeline_binding *bindings = (void *)p; p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) * sizeof(struct anv_pipeline_binding), 8); bin.bind_map.surface_to_descriptor = bindings; bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count; const void *kernel_data = p; p += align_u32(bin.kernel_size, 8); if (p > end) break; anv_pipeline_cache_add_shader(cache, key_data, key.size, kernel_data, bin.kernel_size, prog_data, bin.prog_data_size, prog_data_param, &bin.bind_map); } } static bool pipeline_cache_enabled() { static int enabled = -1; if (enabled < 0) enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); return enabled; } VkResult anv_CreatePipelineCache( VkDevice _device, const VkPipelineCacheCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_pipeline_cache *cache; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); assert(pCreateInfo->flags == 0); cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (cache == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); anv_pipeline_cache_init(cache, device, pipeline_cache_enabled()); if (pCreateInfo->initialDataSize > 0) anv_pipeline_cache_load(cache, pCreateInfo->pInitialData, pCreateInfo->initialDataSize); *pPipelineCache = anv_pipeline_cache_to_handle(cache); return VK_SUCCESS; } void anv_DestroyPipelineCache( VkDevice _device, VkPipelineCache _cache, const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); if (!cache) return; anv_pipeline_cache_finish(cache); vk_free2(&device->alloc, pAllocator, cache); } VkResult anv_GetPipelineCacheData( VkDevice _device, VkPipelineCache _cache, size_t* pDataSize, void* pData) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); struct anv_physical_device *pdevice = &device->instance->physicalDevice; struct cache_header *header; if (pData == NULL) { size_t size = align_u32(sizeof(*header), 8) + align_u32(sizeof(uint32_t), 8); if (cache->cache) { struct hash_entry *entry; hash_table_foreach(cache->cache, entry) size += anv_shader_bin_data_size(entry->data); } *pDataSize = size; return VK_SUCCESS; } if (*pDataSize < sizeof(*header)) { *pDataSize = 0; return VK_INCOMPLETE; } void *p = pData, *end = pData + *pDataSize; header = p; header->header_size = sizeof(*header); header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; header->vendor_id = 0x8086; header->device_id = device->chipset_id; memcpy(header->uuid, pdevice->uuid, VK_UUID_SIZE); p += align_u32(header->header_size, 8); uint32_t *count = p; p += align_u32(sizeof(*count), 8); *count = 0; VkResult result = VK_SUCCESS; if (cache->cache) { struct hash_entry *entry; hash_table_foreach(cache->cache, entry) { struct anv_shader_bin *shader = entry->data; size_t data_size = anv_shader_bin_data_size(entry->data); if (p + data_size > end) { result = VK_INCOMPLETE; break; } anv_shader_bin_write_data(shader, p); p += data_size; (*count)++; } } *pDataSize = p - pData; return result; } VkResult anv_MergePipelineCaches( VkDevice _device, VkPipelineCache destCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches) { ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); if (!dst->cache) return VK_SUCCESS; for (uint32_t i = 0; i < srcCacheCount; i++) { ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); if (!src->cache) continue; struct hash_entry *entry; hash_table_foreach(src->cache, entry) { struct anv_shader_bin *bin = entry->data; if (_mesa_hash_table_search(dst->cache, bin->key)) continue; anv_shader_bin_ref(bin); _mesa_hash_table_insert(dst->cache, bin->key, bin); } } return VK_SUCCESS; }