diff options
author | Jason Ekstrand <[email protected]> | 2016-08-25 01:49:49 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-08-30 15:08:23 -0700 |
commit | 10f9901bcef7724cb72fb2fe7e3dd8d6660d2f34 (patch) | |
tree | 007712bd01daea07fe9389b2aad6de234d45df77 /src/intel/vulkan/anv_pipeline_cache.c | |
parent | 689971847005219178f5a484dffecf9e5e515192 (diff) |
anv: Rework pipeline caching
The original pipeline cache the Kristian wrote was based on a now-false
premise that the shaders can be stored in the pipeline cache. The Vulkan
1.0 spec explicitly states that the pipeline cache object is transiant and
you are allowed to delete it after using it to create a pipeline with no
ill effects. As nice as Kristian's design was, it doesn't jive with the
expectation provided by the Vulkan spec.
The new pipeline cache uses reference-counted anv_shader_bin objects that
are backed by a large state pool. The cache itself is just a hash table
mapping keys hashes to anv_shader_bin objects. This has the added
advantage of removing one more hand-rolled hash table from mesa.
Signed-off-by: Jason Ekstrand <[email protected]>
Cc: "12.0" <[email protected]>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97476
Acked-by: Kristian Høgsberg Kristensen <[email protected]>
Diffstat (limited to 'src/intel/vulkan/anv_pipeline_cache.c')
-rw-r--r-- | src/intel/vulkan/anv_pipeline_cache.c | 504 |
1 files changed, 192 insertions, 312 deletions
diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index abca9fe3313..2753c46fbde 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -22,6 +22,7 @@ */ #include "util/mesa-sha1.h" +#include "util/hash_table.h" #include "util/debug.h" #include "anv_private.h" @@ -147,67 +148,55 @@ anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data) * dual_src_blend. */ +static uint32_t +shader_bin_key_hash_func(const void *void_key) +{ + const struct shader_bin_key *key = void_key; + return _mesa_hash_data(key->data, key->size); +} + +static bool +shader_bin_key_compare_func(const void *void_a, const void *void_b) +{ + const struct shader_bin_key *a = void_a, *b = void_b; + if (a->size != b->size) + return false; + + return memcmp(a->data, b->data, a->size) == 0; +} + void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device) + struct anv_device *device, + bool cache_enabled) { cache->device = device; - anv_state_stream_init(&cache->program_stream, - &device->instruction_block_pool); pthread_mutex_init(&cache->mutex, NULL); - cache->kernel_count = 0; - cache->total_size = 0; - cache->table_size = 1024; - const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); - cache->hash_table = malloc(byte_size); - - /* We don't consider allocation failure fatal, we just start with a 0-sized - * cache. */ - if (cache->hash_table == NULL || - !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true)) - cache->table_size = 0; - else - memset(cache->hash_table, 0xff, byte_size); + if (cache_enabled) { + cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func, + shader_bin_key_compare_func); + } else { + cache->cache = NULL; + } } void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) { - anv_state_stream_finish(&cache->program_stream); pthread_mutex_destroy(&cache->mutex); - free(cache->hash_table); -} - -struct cache_entry { - unsigned char sha1[20]; - uint32_t prog_data_size; - uint32_t kernel_size; - uint32_t surface_count; - uint32_t sampler_count; - uint32_t image_count; - - char prog_data[0]; - - /* kernel follows prog_data at next 64 byte aligned address */ -}; - -static uint32_t -entry_size(struct cache_entry *entry) -{ - /* This returns the number of bytes needed to serialize an entry, which - * doesn't include the alignment padding bytes. - */ - struct brw_stage_prog_data *prog_data = (void *)entry->prog_data; - const uint32_t param_size = - prog_data->nr_params * sizeof(*prog_data->param); + if (cache->cache) { + /* This is a bit unfortunate. In order to keep things from randomly + * going away, the shader cache has to hold a reference to all shader + * binaries it contains. We unref them when we destroy the cache. + */ + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) + anv_shader_bin_unref(cache->device, entry->data); - const uint32_t map_size = - entry->surface_count * sizeof(struct anv_pipeline_binding) + - entry->sampler_count * sizeof(struct anv_pipeline_binding); - - return sizeof(*entry) + entry->prog_data_size + param_size + map_size; + _mesa_hash_table_destroy(cache->cache, NULL); + } } void @@ -236,221 +225,94 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, _mesa_sha1_final(ctx, hash); } -static uint32_t -anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) +static struct anv_shader_bin * +anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size) { - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) sha1); - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - const uint32_t offset = cache->hash_table[index]; - - if (offset == ~0) - return NO_KERNEL; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { - if (prog_data) { - assert(map); - void *p = entry->prog_data; - *prog_data = p; - p += entry->prog_data_size; - p += (*prog_data)->nr_params * sizeof(*(*prog_data)->param); - map->surface_count = entry->surface_count; - map->sampler_count = entry->sampler_count; - map->image_count = entry->image_count; - map->surface_to_descriptor = p; - p += map->surface_count * sizeof(struct anv_pipeline_binding); - map->sampler_to_descriptor = p; - } - - return offset + align_u32(entry_size(entry), 64); - } - } + uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))]; + struct shader_bin_key *key = (void *)vla; + key->size = key_size; + memcpy(key->data, key_data, key_size); - /* This can happen if the pipeline cache is disabled via - * ANV_ENABLE_PIPELINE_CACHE=false - */ - return NO_KERNEL; + struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key); + if (entry) + return entry->data; + else + return NULL; } -uint32_t +struct anv_shader_bin * anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) + const void *key_data, uint32_t key_size) { - uint32_t kernel; + if (!cache->cache) + return NULL; pthread_mutex_lock(&cache->mutex); - kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); + struct anv_shader_bin *shader = + anv_pipeline_cache_search_locked(cache, key_data, key_size); pthread_mutex_unlock(&cache->mutex); - return kernel; -} - -static void -anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) entry->sha1); - - /* We'll always be able to insert when we get here. */ - assert(cache->kernel_count < cache->table_size / 2); + /* We increment refcount before handing it to the caller */ + if (shader) + anv_shader_bin_ref(shader); - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - if (cache->hash_table[index] == ~0) { - cache->hash_table[index] = entry_offset; - break; - } - } - - cache->total_size += entry_size(entry) + entry->kernel_size; - cache->kernel_count++; + return shader; } -static VkResult -anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) +static struct anv_shader_bin * +anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) { - const uint32_t table_size = cache->table_size * 2; - const uint32_t old_table_size = cache->table_size; - const size_t byte_size = table_size * sizeof(cache->hash_table[0]); - uint32_t *table; - uint32_t *old_table = cache->hash_table; - - table = malloc(byte_size); - if (table == NULL) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - cache->hash_table = table; - cache->table_size = table_size; - cache->kernel_count = 0; - cache->total_size = 0; - - memset(cache->hash_table, 0xff, byte_size); - for (uint32_t i = 0; i < old_table_size; i++) { - const uint32_t offset = old_table[i]; - if (offset == ~0) - continue; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - anv_pipeline_cache_set_entry(cache, entry, offset); - } + struct anv_shader_bin *shader = + anv_pipeline_cache_search_locked(cache, key_data, key_size); + if (shader) + return shader; + + struct anv_shader_bin *bin = + anv_shader_bin_create(cache->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); + if (!bin) + return NULL; - free(old_table); + _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin); - return VK_SUCCESS; -} - -static void -anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - if (cache->kernel_count == cache->table_size / 2) - anv_pipeline_cache_grow(cache); - - /* Failing to grow that hash table isn't fatal, but may mean we don't - * have enough space to add this new kernel. Only add it if there's room. - */ - if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_set_entry(cache, entry, entry_offset); + return bin; } -uint32_t +struct anv_shader_bin * anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, size_t kernel_size, - const struct brw_stage_prog_data **prog_data, - size_t prog_data_size, - struct anv_pipeline_bind_map *map) + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) { - pthread_mutex_lock(&cache->mutex); - - /* Before uploading, check again that another thread didn't upload this - * shader while we were compiling it. - */ - if (sha1) { - uint32_t cached_kernel = - anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); - if (cached_kernel != NO_KERNEL) { - pthread_mutex_unlock(&cache->mutex); - return cached_kernel; - } - } - - struct cache_entry *entry; + if (cache->cache) { + pthread_mutex_lock(&cache->mutex); - assert((*prog_data)->nr_pull_params == 0); - assert((*prog_data)->nr_image_params == 0); + struct anv_shader_bin *bin = + anv_pipeline_cache_add_shader(cache, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); - const uint32_t param_size = - (*prog_data)->nr_params * sizeof(*(*prog_data)->param); + pthread_mutex_unlock(&cache->mutex); - const uint32_t map_size = - map->surface_count * sizeof(struct anv_pipeline_binding) + - map->sampler_count * sizeof(struct anv_pipeline_binding); + /* We increment refcount before handing it to the caller */ + anv_shader_bin_ref(bin); - const uint32_t preamble_size = - align_u32(sizeof(*entry) + prog_data_size + param_size + map_size, 64); - - const uint32_t size = preamble_size + kernel_size; - - assert(size < cache->program_stream.block_pool->block_size); - const struct anv_state state = - anv_state_stream_alloc(&cache->program_stream, size, 64); - - entry = state.map; - entry->prog_data_size = prog_data_size; - entry->surface_count = map->surface_count; - entry->sampler_count = map->sampler_count; - entry->image_count = map->image_count; - entry->kernel_size = kernel_size; - - void *p = entry->prog_data; - memcpy(p, *prog_data, prog_data_size); - p += prog_data_size; - - memcpy(p, (*prog_data)->param, param_size); - ((struct brw_stage_prog_data *)entry->prog_data)->param = p; - p += param_size; - - memcpy(p, map->surface_to_descriptor, - map->surface_count * sizeof(struct anv_pipeline_binding)); - map->surface_to_descriptor = p; - p += map->surface_count * sizeof(struct anv_pipeline_binding); - - memcpy(p, map->sampler_to_descriptor, - map->sampler_count * sizeof(struct anv_pipeline_binding)); - map->sampler_to_descriptor = p; - - if (sha1) { - assert(anv_pipeline_cache_search_unlocked(cache, sha1, - NULL, NULL) == NO_KERNEL); - - memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - anv_pipeline_cache_add_entry(cache, entry, state.offset); + return bin; + } else { + /* In this case, we're not caching it so the caller owns it entirely */ + return anv_shader_bin_create(cache->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); } - - pthread_mutex_unlock(&cache->mutex); - - memcpy(state.map + preamble_size, kernel, kernel_size); - - if (!cache->device->info.has_llc) - anv_state_clflush(state); - - *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; - - return state.offset + preamble_size; } struct cache_header { @@ -469,6 +331,9 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, struct cache_header header; uint8_t uuid[VK_UUID_SIZE]; + if (cache->cache == NULL) + return; + if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); @@ -484,48 +349,62 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) return; - void *end = (void *) data + size; - void *p = (void *) data + header.header_size; - - while (p < end) { - struct cache_entry *entry = p; - - void *data = entry->prog_data; - - /* Make a copy of prog_data so that it's mutable */ - uint8_t prog_data_tmp[512]; - assert(entry->prog_data_size <= sizeof(prog_data_tmp)); - memcpy(prog_data_tmp, data, entry->prog_data_size); - struct brw_stage_prog_data *prog_data = (void *)prog_data_tmp; - data += entry->prog_data_size; - - prog_data->param = data; - data += prog_data->nr_params * sizeof(*prog_data->param); - - struct anv_pipeline_binding *surface_to_descriptor = data; - data += entry->surface_count * sizeof(struct anv_pipeline_binding); - struct anv_pipeline_binding *sampler_to_descriptor = data; - data += entry->sampler_count * sizeof(struct anv_pipeline_binding); - void *kernel = data; - - struct anv_pipeline_bind_map map = { - .surface_count = entry->surface_count, - .sampler_count = entry->sampler_count, - .image_count = entry->image_count, - .surface_to_descriptor = surface_to_descriptor, - .sampler_to_descriptor = sampler_to_descriptor - }; - - const struct brw_stage_prog_data *const_prog_data = prog_data; - - anv_pipeline_cache_upload_kernel(cache, entry->sha1, - kernel, entry->kernel_size, - &const_prog_data, - entry->prog_data_size, &map); - p = kernel + entry->kernel_size; + const void *end = data + size; + const void *p = data + header.header_size; + + /* Count is the total number of valid entries */ + uint32_t count; + if (p + sizeof(count) >= end) + return; + memcpy(&count, p, sizeof(count)); + p += align_u32(sizeof(count), 8); + + for (uint32_t i = 0; i < count; i++) { + struct anv_shader_bin bin; + if (p + sizeof(bin) > end) + break; + memcpy(&bin, p, sizeof(bin)); + p += align_u32(sizeof(struct anv_shader_bin), 8); + + const void *prog_data = p; + p += align_u32(bin.prog_data_size, 8); + + struct shader_bin_key key; + if (p + sizeof(key) > end) + break; + memcpy(&key, p, sizeof(key)); + const void *key_data = p + sizeof(key); + p += align_u32(sizeof(key) + key.size, 8); + + /* We're going to memcpy this so getting rid of const is fine */ + struct anv_pipeline_binding *bindings = (void *)p; + p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) * + sizeof(struct anv_pipeline_binding), 8); + bin.bind_map.surface_to_descriptor = bindings; + bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count; + + const void *kernel_data = p; + p += align_u32(bin.kernel_size, 8); + + if (p > end) + break; + + anv_pipeline_cache_add_shader(cache, key_data, key.size, + kernel_data, bin.kernel_size, + prog_data, bin.prog_data_size, + &bin.bind_map); } } +static bool +pipeline_cache_enabled() +{ + static int enabled = -1; + if (enabled < 0) + enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); + return enabled; +} + VkResult anv_CreatePipelineCache( VkDevice _device, const VkPipelineCacheCreateInfo* pCreateInfo, @@ -544,7 +423,7 @@ VkResult anv_CreatePipelineCache( if (cache == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_pipeline_cache_init(cache, device); + anv_pipeline_cache_init(cache, device, pipeline_cache_enabled()); if (pCreateInfo->initialDataSize > 0) anv_pipeline_cache_load(cache, @@ -579,9 +458,16 @@ VkResult anv_GetPipelineCacheData( ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); struct cache_header *header; - const size_t size = sizeof(*header) + cache->total_size; - if (pData == NULL) { + size_t size = align_u32(sizeof(*header), 8) + + align_u32(sizeof(uint32_t), 8); + + if (cache->cache) { + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) + size += anv_shader_bin_data_size(entry->data); + } + *pDataSize = size; return VK_SUCCESS; } @@ -598,25 +484,25 @@ VkResult anv_GetPipelineCacheData( header->vendor_id = 0x8086; header->device_id = device->chipset_id; anv_device_get_cache_uuid(header->uuid); - p += header->header_size; - - struct cache_entry *entry; - for (uint32_t i = 0; i < cache->table_size; i++) { - if (cache->hash_table[i] == ~0) - continue; + p += align_u32(header->header_size, 8); - entry = cache->program_stream.block_pool->map + cache->hash_table[i]; - const uint32_t size = entry_size(entry); - if (end < p + size + entry->kernel_size) - break; + uint32_t *count = p; + p += align_u32(sizeof(*count), 8); + *count = 0; - memcpy(p, entry, size); - p += size; + if (cache->cache) { + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) { + struct anv_shader_bin *shader = entry->data; + size_t data_size = anv_shader_bin_data_size(entry->data); + if (p + data_size > end) + break; - void *kernel = (void *) entry + align_u32(size, 64); + anv_shader_bin_write_data(shader, p); + p += data_size; - memcpy(p, kernel, entry->kernel_size); - p += entry->kernel_size; + (*count)++; + } } *pDataSize = p - pData; @@ -624,25 +510,6 @@ VkResult anv_GetPipelineCacheData( return VK_SUCCESS; } -static void -anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, - struct anv_pipeline_cache *src) -{ - for (uint32_t i = 0; i < src->table_size; i++) { - const uint32_t offset = src->hash_table[i]; - if (offset == ~0) - continue; - - struct cache_entry *entry = - src->program_stream.block_pool->map + offset; - - if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL) - continue; - - anv_pipeline_cache_add_entry(dst, entry, offset); - } -} - VkResult anv_MergePipelineCaches( VkDevice _device, VkPipelineCache destCache, @@ -651,10 +518,23 @@ VkResult anv_MergePipelineCaches( { ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); + if (!dst->cache) + return VK_SUCCESS; + for (uint32_t i = 0; i < srcCacheCount; i++) { ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); + if (!src->cache) + continue; + + struct hash_entry *entry; + hash_table_foreach(src->cache, entry) { + struct anv_shader_bin *bin = entry->data; + if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin))) + continue; - anv_pipeline_cache_merge(dst, src); + anv_shader_bin_ref(bin); + _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin); + } } return VK_SUCCESS; |