diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/vulkan/anv_cmd_buffer.c | 6 | ||||
-rw-r--r-- | src/intel/vulkan/anv_device.c | 1 | ||||
-rw-r--r-- | src/intel/vulkan/anv_pipeline.c | 161 | ||||
-rw-r--r-- | src/intel/vulkan/anv_pipeline_cache.c | 504 | ||||
-rw-r--r-- | src/intel/vulkan/anv_private.h | 72 | ||||
-rw-r--r-- | src/intel/vulkan/genX_l3.c | 3 | ||||
-rw-r--r-- | src/intel/vulkan/genX_pipeline.c | 3 | ||||
-rw-r--r-- | src/intel/vulkan/genX_pipeline_util.h | 2 |
8 files changed, 320 insertions, 432 deletions
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 6c082aa77ef..9dedde8f4e6 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -757,7 +757,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; } - struct anv_pipeline_bind_map *map = &pipeline->bindings[stage]; + struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; if (bias + map->surface_count == 0) { *bt_state = (struct anv_state) { 0, }; return VK_SUCCESS; @@ -922,7 +922,7 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; } - struct anv_pipeline_bind_map *map = &pipeline->bindings[stage]; + struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; if (map->sampler_count == 0) { *state = (struct anv_state) { 0, }; return VK_SUCCESS; @@ -1096,7 +1096,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, struct anv_push_constants *data = cmd_buffer->state.push_constants[stage]; const struct brw_stage_prog_data *prog_data = - cmd_buffer->state.pipeline->prog_data[stage]; + anv_shader_bin_get_prog_data(cmd_buffer->state.pipeline->shaders[stage]); /* If we don't actually have any push constants, bail. */ if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 765dc6e38c9..cf6399364c5 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -881,7 +881,6 @@ VkResult anv_CreateDevice( anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); anv_state_pool_init(&device->instruction_state_pool, &device->instruction_block_pool); - anv_pipeline_cache_init(&device->default_pipeline_cache, device); anv_block_pool_init(&device->surface_state_block_pool, device, 4096); diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index e6afdc1a4b3..52ab7d0bf2d 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -205,6 +205,12 @@ void anv_DestroyPipeline( pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + if (pipeline->shaders[s]) + anv_shader_bin_unref(device, pipeline->shaders[s]); + } + anv_free2(&device->alloc, pAllocator, pipeline); } @@ -394,15 +400,34 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias) prog_data->binding_table.image_start = bias; } +static struct anv_shader_bin * +anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) +{ + if (cache) { + return anv_pipeline_cache_upload_kernel(cache, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, + bind_map); + } else { + return anv_shader_bin_create(pipeline->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); + } +} + + static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, - const struct brw_stage_prog_data *prog_data, - struct anv_pipeline_bind_map *map) + struct anv_shader_bin *shader) { - pipeline->prog_data[stage] = prog_data; + pipeline->shaders[stage] = shader; pipeline->active_stages |= mesa_to_vk_shader_stage(stage); - pipeline->bindings[stage] = *map; } static VkResult @@ -415,21 +440,20 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_vs_prog_key key; - uint32_t kernel = NO_KERNEL; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_vs_prog_key(&pipeline->device->info, &key); - if (module->size > 0) { + if (cache) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, pipeline->layout, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (kernel == NO_KERNEL) { + if (bin == NULL) { struct brw_vs_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -468,28 +492,29 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + ralloc_free(mem_ctx); } const struct brw_vs_prog_data *vs_prog_data = - (const struct brw_vs_prog_data *) stage_prog_data; + (const struct brw_vs_prog_data *)anv_shader_bin_get_prog_data(bin); if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { - pipeline->vs_simd8 = kernel; + pipeline->vs_simd8 = bin->kernel.offset; pipeline->vs_vec4 = NO_KERNEL; } else { pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = kernel; + pipeline->vs_vec4 = bin->kernel.offset; } - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - stage_prog_data, &map); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin); return VK_SUCCESS; } @@ -504,21 +529,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_gs_prog_key key; - uint32_t kernel = NO_KERNEL; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_gs_prog_key(&pipeline->device->info, &key); - if (module->size > 0) { + if (cache) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, pipeline->layout, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (kernel == NO_KERNEL) { + if (bin == NULL) { struct brw_gs_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -556,20 +580,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, } /* TODO: SIMD8 GS */ - stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } ralloc_free(mem_ctx); } - pipeline->gs_kernel = kernel; + pipeline->gs_kernel = bin->kernel.offset; - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - stage_prog_data, &map); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin); return VK_SUCCESS; } @@ -585,21 +609,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_wm_prog_key key; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_wm_prog_key(&pipeline->device->info, info, extra, &key); - if (module->size > 0) { + if (cache) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, pipeline->layout, spec_info); - pipeline->ps_ksp0 = - anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (pipeline->ps_ksp0 == NO_KERNEL) { + if (bin == NULL) { struct brw_wm_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -688,19 +711,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - stage_prog_data = &prog_data.base; - pipeline->ps_ksp0 = - anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } ralloc_free(mem_ctx); } - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - stage_prog_data, &map); + pipeline->ps_ksp0 = bin->kernel.offset; + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin); return VK_SUCCESS; } @@ -715,21 +739,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_cs_prog_key key; - uint32_t kernel = NO_KERNEL; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_cs_prog_key(&pipeline->device->info, &key); - if (module->size > 0) { + if (cache) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, pipeline->layout, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (module->size == 0 || kernel == NO_KERNEL) { + if (bin == NULL) { struct brw_cs_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -761,20 +784,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - stage_prog_data = &prog_data.base; - kernel = anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } ralloc_free(mem_ctx); } - pipeline->cs_simd = kernel; + pipeline->cs_simd = bin->kernel.offset; - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - stage_prog_data, &map); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin); return VK_SUCCESS; } @@ -1168,8 +1191,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; @@ -1278,6 +1300,11 @@ anv_pipeline_init(struct anv_pipeline *pipeline, return VK_SUCCESS; compile_fail: + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + if (pipeline->shaders[s]) + anv_shader_bin_unref(device, pipeline->shaders[s]); + } + anv_reloc_list_finish(&pipeline->batch_relocs, alloc); return result; @@ -1295,9 +1322,6 @@ anv_graphics_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - if (cache == NULL) - cache = &device->default_pipeline_cache; - switch (device->info.gen) { case 7: if (device->info.is_haswell) @@ -1351,9 +1375,6 @@ static VkResult anv_compute_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - if (cache == NULL) - cache = &device->default_pipeline_cache; - switch (device->info.gen) { case 7: if (device->info.is_haswell) diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index abca9fe3313..2753c46fbde 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -22,6 +22,7 @@ */ #include "util/mesa-sha1.h" +#include "util/hash_table.h" #include "util/debug.h" #include "anv_private.h" @@ -147,67 +148,55 @@ anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data) * dual_src_blend. */ +static uint32_t +shader_bin_key_hash_func(const void *void_key) +{ + const struct shader_bin_key *key = void_key; + return _mesa_hash_data(key->data, key->size); +} + +static bool +shader_bin_key_compare_func(const void *void_a, const void *void_b) +{ + const struct shader_bin_key *a = void_a, *b = void_b; + if (a->size != b->size) + return false; + + return memcmp(a->data, b->data, a->size) == 0; +} + void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device) + struct anv_device *device, + bool cache_enabled) { cache->device = device; - anv_state_stream_init(&cache->program_stream, - &device->instruction_block_pool); pthread_mutex_init(&cache->mutex, NULL); - cache->kernel_count = 0; - cache->total_size = 0; - cache->table_size = 1024; - const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); - cache->hash_table = malloc(byte_size); - - /* We don't consider allocation failure fatal, we just start with a 0-sized - * cache. */ - if (cache->hash_table == NULL || - !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true)) - cache->table_size = 0; - else - memset(cache->hash_table, 0xff, byte_size); + if (cache_enabled) { + cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func, + shader_bin_key_compare_func); + } else { + cache->cache = NULL; + } } void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) { - anv_state_stream_finish(&cache->program_stream); pthread_mutex_destroy(&cache->mutex); - free(cache->hash_table); -} - -struct cache_entry { - unsigned char sha1[20]; - uint32_t prog_data_size; - uint32_t kernel_size; - uint32_t surface_count; - uint32_t sampler_count; - uint32_t image_count; - - char prog_data[0]; - - /* kernel follows prog_data at next 64 byte aligned address */ -}; - -static uint32_t -entry_size(struct cache_entry *entry) -{ - /* This returns the number of bytes needed to serialize an entry, which - * doesn't include the alignment padding bytes. - */ - struct brw_stage_prog_data *prog_data = (void *)entry->prog_data; - const uint32_t param_size = - prog_data->nr_params * sizeof(*prog_data->param); + if (cache->cache) { + /* This is a bit unfortunate. In order to keep things from randomly + * going away, the shader cache has to hold a reference to all shader + * binaries it contains. We unref them when we destroy the cache. + */ + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) + anv_shader_bin_unref(cache->device, entry->data); - const uint32_t map_size = - entry->surface_count * sizeof(struct anv_pipeline_binding) + - entry->sampler_count * sizeof(struct anv_pipeline_binding); - - return sizeof(*entry) + entry->prog_data_size + param_size + map_size; + _mesa_hash_table_destroy(cache->cache, NULL); + } } void @@ -236,221 +225,94 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, _mesa_sha1_final(ctx, hash); } -static uint32_t -anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) +static struct anv_shader_bin * +anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size) { - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) sha1); - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - const uint32_t offset = cache->hash_table[index]; - - if (offset == ~0) - return NO_KERNEL; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { - if (prog_data) { - assert(map); - void *p = entry->prog_data; - *prog_data = p; - p += entry->prog_data_size; - p += (*prog_data)->nr_params * sizeof(*(*prog_data)->param); - map->surface_count = entry->surface_count; - map->sampler_count = entry->sampler_count; - map->image_count = entry->image_count; - map->surface_to_descriptor = p; - p += map->surface_count * sizeof(struct anv_pipeline_binding); - map->sampler_to_descriptor = p; - } - - return offset + align_u32(entry_size(entry), 64); - } - } + uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))]; + struct shader_bin_key *key = (void *)vla; + key->size = key_size; + memcpy(key->data, key_data, key_size); - /* This can happen if the pipeline cache is disabled via - * ANV_ENABLE_PIPELINE_CACHE=false - */ - return NO_KERNEL; + struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key); + if (entry) + return entry->data; + else + return NULL; } -uint32_t +struct anv_shader_bin * anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) + const void *key_data, uint32_t key_size) { - uint32_t kernel; + if (!cache->cache) + return NULL; pthread_mutex_lock(&cache->mutex); - kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); + struct anv_shader_bin *shader = + anv_pipeline_cache_search_locked(cache, key_data, key_size); pthread_mutex_unlock(&cache->mutex); - return kernel; -} - -static void -anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) entry->sha1); - - /* We'll always be able to insert when we get here. */ - assert(cache->kernel_count < cache->table_size / 2); + /* We increment refcount before handing it to the caller */ + if (shader) + anv_shader_bin_ref(shader); - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - if (cache->hash_table[index] == ~0) { - cache->hash_table[index] = entry_offset; - break; - } - } - - cache->total_size += entry_size(entry) + entry->kernel_size; - cache->kernel_count++; + return shader; } -static VkResult -anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) +static struct anv_shader_bin * +anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) { - const uint32_t table_size = cache->table_size * 2; - const uint32_t old_table_size = cache->table_size; - const size_t byte_size = table_size * sizeof(cache->hash_table[0]); - uint32_t *table; - uint32_t *old_table = cache->hash_table; - - table = malloc(byte_size); - if (table == NULL) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - cache->hash_table = table; - cache->table_size = table_size; - cache->kernel_count = 0; - cache->total_size = 0; - - memset(cache->hash_table, 0xff, byte_size); - for (uint32_t i = 0; i < old_table_size; i++) { - const uint32_t offset = old_table[i]; - if (offset == ~0) - continue; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - anv_pipeline_cache_set_entry(cache, entry, offset); - } + struct anv_shader_bin *shader = + anv_pipeline_cache_search_locked(cache, key_data, key_size); + if (shader) + return shader; + + struct anv_shader_bin *bin = + anv_shader_bin_create(cache->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); + if (!bin) + return NULL; - free(old_table); + _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin); - return VK_SUCCESS; -} - -static void -anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - if (cache->kernel_count == cache->table_size / 2) - anv_pipeline_cache_grow(cache); - - /* Failing to grow that hash table isn't fatal, but may mean we don't - * have enough space to add this new kernel. Only add it if there's room. - */ - if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_set_entry(cache, entry, entry_offset); + return bin; } -uint32_t +struct anv_shader_bin * anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, size_t kernel_size, - const struct brw_stage_prog_data **prog_data, - size_t prog_data_size, - struct anv_pipeline_bind_map *map) + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) { - pthread_mutex_lock(&cache->mutex); - - /* Before uploading, check again that another thread didn't upload this - * shader while we were compiling it. - */ - if (sha1) { - uint32_t cached_kernel = - anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); - if (cached_kernel != NO_KERNEL) { - pthread_mutex_unlock(&cache->mutex); - return cached_kernel; - } - } - - struct cache_entry *entry; + if (cache->cache) { + pthread_mutex_lock(&cache->mutex); - assert((*prog_data)->nr_pull_params == 0); - assert((*prog_data)->nr_image_params == 0); + struct anv_shader_bin *bin = + anv_pipeline_cache_add_shader(cache, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); - const uint32_t param_size = - (*prog_data)->nr_params * sizeof(*(*prog_data)->param); + pthread_mutex_unlock(&cache->mutex); - const uint32_t map_size = - map->surface_count * sizeof(struct anv_pipeline_binding) + - map->sampler_count * sizeof(struct anv_pipeline_binding); + /* We increment refcount before handing it to the caller */ + anv_shader_bin_ref(bin); - const uint32_t preamble_size = - align_u32(sizeof(*entry) + prog_data_size + param_size + map_size, 64); - - const uint32_t size = preamble_size + kernel_size; - - assert(size < cache->program_stream.block_pool->block_size); - const struct anv_state state = - anv_state_stream_alloc(&cache->program_stream, size, 64); - - entry = state.map; - entry->prog_data_size = prog_data_size; - entry->surface_count = map->surface_count; - entry->sampler_count = map->sampler_count; - entry->image_count = map->image_count; - entry->kernel_size = kernel_size; - - void *p = entry->prog_data; - memcpy(p, *prog_data, prog_data_size); - p += prog_data_size; - - memcpy(p, (*prog_data)->param, param_size); - ((struct brw_stage_prog_data *)entry->prog_data)->param = p; - p += param_size; - - memcpy(p, map->surface_to_descriptor, - map->surface_count * sizeof(struct anv_pipeline_binding)); - map->surface_to_descriptor = p; - p += map->surface_count * sizeof(struct anv_pipeline_binding); - - memcpy(p, map->sampler_to_descriptor, - map->sampler_count * sizeof(struct anv_pipeline_binding)); - map->sampler_to_descriptor = p; - - if (sha1) { - assert(anv_pipeline_cache_search_unlocked(cache, sha1, - NULL, NULL) == NO_KERNEL); - - memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - anv_pipeline_cache_add_entry(cache, entry, state.offset); + return bin; + } else { + /* In this case, we're not caching it so the caller owns it entirely */ + return anv_shader_bin_create(cache->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); } - - pthread_mutex_unlock(&cache->mutex); - - memcpy(state.map + preamble_size, kernel, kernel_size); - - if (!cache->device->info.has_llc) - anv_state_clflush(state); - - *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; - - return state.offset + preamble_size; } struct cache_header { @@ -469,6 +331,9 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, struct cache_header header; uint8_t uuid[VK_UUID_SIZE]; + if (cache->cache == NULL) + return; + if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); @@ -484,48 +349,62 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) return; - void *end = (void *) data + size; - void *p = (void *) data + header.header_size; - - while (p < end) { - struct cache_entry *entry = p; - - void *data = entry->prog_data; - - /* Make a copy of prog_data so that it's mutable */ - uint8_t prog_data_tmp[512]; - assert(entry->prog_data_size <= sizeof(prog_data_tmp)); - memcpy(prog_data_tmp, data, entry->prog_data_size); - struct brw_stage_prog_data *prog_data = (void *)prog_data_tmp; - data += entry->prog_data_size; - - prog_data->param = data; - data += prog_data->nr_params * sizeof(*prog_data->param); - - struct anv_pipeline_binding *surface_to_descriptor = data; - data += entry->surface_count * sizeof(struct anv_pipeline_binding); - struct anv_pipeline_binding *sampler_to_descriptor = data; - data += entry->sampler_count * sizeof(struct anv_pipeline_binding); - void *kernel = data; - - struct anv_pipeline_bind_map map = { - .surface_count = entry->surface_count, - .sampler_count = entry->sampler_count, - .image_count = entry->image_count, - .surface_to_descriptor = surface_to_descriptor, - .sampler_to_descriptor = sampler_to_descriptor - }; - - const struct brw_stage_prog_data *const_prog_data = prog_data; - - anv_pipeline_cache_upload_kernel(cache, entry->sha1, - kernel, entry->kernel_size, - &const_prog_data, - entry->prog_data_size, &map); - p = kernel + entry->kernel_size; + const void *end = data + size; + const void *p = data + header.header_size; + + /* Count is the total number of valid entries */ + uint32_t count; + if (p + sizeof(count) >= end) + return; + memcpy(&count, p, sizeof(count)); + p += align_u32(sizeof(count), 8); + + for (uint32_t i = 0; i < count; i++) { + struct anv_shader_bin bin; + if (p + sizeof(bin) > end) + break; + memcpy(&bin, p, sizeof(bin)); + p += align_u32(sizeof(struct anv_shader_bin), 8); + + const void *prog_data = p; + p += align_u32(bin.prog_data_size, 8); + + struct shader_bin_key key; + if (p + sizeof(key) > end) + break; + memcpy(&key, p, sizeof(key)); + const void *key_data = p + sizeof(key); + p += align_u32(sizeof(key) + key.size, 8); + + /* We're going to memcpy this so getting rid of const is fine */ + struct anv_pipeline_binding *bindings = (void *)p; + p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) * + sizeof(struct anv_pipeline_binding), 8); + bin.bind_map.surface_to_descriptor = bindings; + bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count; + + const void *kernel_data = p; + p += align_u32(bin.kernel_size, 8); + + if (p > end) + break; + + anv_pipeline_cache_add_shader(cache, key_data, key.size, + kernel_data, bin.kernel_size, + prog_data, bin.prog_data_size, + &bin.bind_map); } } +static bool +pipeline_cache_enabled() +{ + static int enabled = -1; + if (enabled < 0) + enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); + return enabled; +} + VkResult anv_CreatePipelineCache( VkDevice _device, const VkPipelineCacheCreateInfo* pCreateInfo, @@ -544,7 +423,7 @@ VkResult anv_CreatePipelineCache( if (cache == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_pipeline_cache_init(cache, device); + anv_pipeline_cache_init(cache, device, pipeline_cache_enabled()); if (pCreateInfo->initialDataSize > 0) anv_pipeline_cache_load(cache, @@ -579,9 +458,16 @@ VkResult anv_GetPipelineCacheData( ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); struct cache_header *header; - const size_t size = sizeof(*header) + cache->total_size; - if (pData == NULL) { + size_t size = align_u32(sizeof(*header), 8) + + align_u32(sizeof(uint32_t), 8); + + if (cache->cache) { + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) + size += anv_shader_bin_data_size(entry->data); + } + *pDataSize = size; return VK_SUCCESS; } @@ -598,25 +484,25 @@ VkResult anv_GetPipelineCacheData( header->vendor_id = 0x8086; header->device_id = device->chipset_id; anv_device_get_cache_uuid(header->uuid); - p += header->header_size; - - struct cache_entry *entry; - for (uint32_t i = 0; i < cache->table_size; i++) { - if (cache->hash_table[i] == ~0) - continue; + p += align_u32(header->header_size, 8); - entry = cache->program_stream.block_pool->map + cache->hash_table[i]; - const uint32_t size = entry_size(entry); - if (end < p + size + entry->kernel_size) - break; + uint32_t *count = p; + p += align_u32(sizeof(*count), 8); + *count = 0; - memcpy(p, entry, size); - p += size; + if (cache->cache) { + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) { + struct anv_shader_bin *shader = entry->data; + size_t data_size = anv_shader_bin_data_size(entry->data); + if (p + data_size > end) + break; - void *kernel = (void *) entry + align_u32(size, 64); + anv_shader_bin_write_data(shader, p); + p += data_size; - memcpy(p, kernel, entry->kernel_size); - p += entry->kernel_size; + (*count)++; + } } *pDataSize = p - pData; @@ -624,25 +510,6 @@ VkResult anv_GetPipelineCacheData( return VK_SUCCESS; } -static void -anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, - struct anv_pipeline_cache *src) -{ - for (uint32_t i = 0; i < src->table_size; i++) { - const uint32_t offset = src->hash_table[i]; - if (offset == ~0) - continue; - - struct cache_entry *entry = - src->program_stream.block_pool->map + offset; - - if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL) - continue; - - anv_pipeline_cache_add_entry(dst, entry, offset); - } -} - VkResult anv_MergePipelineCaches( VkDevice _device, VkPipelineCache destCache, @@ -651,10 +518,23 @@ VkResult anv_MergePipelineCaches( { ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); + if (!dst->cache) + return VK_SUCCESS; + for (uint32_t i = 0; i < srcCacheCount; i++) { ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); + if (!src->cache) + continue; + + struct hash_entry *entry; + hash_table_foreach(src->cache, entry) { + struct anv_shader_bin *bin = entry->data; + if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin))) + continue; - anv_pipeline_cache_merge(dst, src); + anv_shader_bin_ref(bin); + _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin); + } } return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 878bbaa7678..f24020cb810 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -400,7 +400,7 @@ struct anv_fixed_size_state_pool { }; #define ANV_MIN_STATE_SIZE_LOG2 6 -#define ANV_MAX_STATE_SIZE_LOG2 10 +#define ANV_MAX_STATE_SIZE_LOG2 17 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1) @@ -658,31 +658,27 @@ struct anv_queue { struct anv_pipeline_cache { struct anv_device * device; - struct anv_state_stream program_stream; pthread_mutex_t mutex; - uint32_t total_size; - uint32_t table_size; - uint32_t kernel_count; - uint32_t * hash_table; + struct hash_table * cache; }; struct anv_pipeline_bind_map; void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device); + struct anv_device *device, + bool cache_enabled); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); -uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map); -uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, - size_t kernel_size, - const struct brw_stage_prog_data **prog_data, - size_t prog_data_size, - struct anv_pipeline_bind_map *map); + +struct anv_shader_bin * +anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const void *key, uint32_t key_size); +struct anv_shader_bin * +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map); struct anv_device { VK_LOADER_DATA _loader_data; @@ -705,7 +701,6 @@ struct anv_device { struct anv_block_pool instruction_block_pool; struct anv_state_pool instruction_state_pool; - struct anv_pipeline_cache default_pipeline_cache; struct anv_block_pool surface_state_block_pool; struct anv_state_pool surface_state_pool; @@ -1519,12 +1514,12 @@ struct anv_pipeline { struct anv_dynamic_state dynamic_state; struct anv_pipeline_layout * layout; - struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; bool use_repclear; bool needs_data_cache; - const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + struct anv_shader_bin * shaders[MESA_SHADER_STAGES]; + struct { uint32_t start[MESA_SHADER_GEOMETRY + 1]; uint32_t size[MESA_SHADER_GEOMETRY + 1]; @@ -1574,29 +1569,22 @@ anv_pipeline_has_stage(const struct anv_pipeline *pipeline, return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0; } -static inline const struct brw_vs_prog_data * -get_vs_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX]; +#define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage) \ +static inline const struct brw_##prefix##_prog_data * \ +get_##prefix##_prog_data(struct anv_pipeline *pipeline) \ +{ \ + if (anv_pipeline_has_stage(pipeline, stage)) { \ + return (const struct brw_##prefix##_prog_data *) \ + anv_shader_bin_get_prog_data(pipeline->shaders[stage]); \ + } else { \ + return NULL; \ + } \ } -static inline const struct brw_gs_prog_data * -get_gs_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY]; -} - -static inline const struct brw_wm_prog_data * -get_wm_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT]; -} - -static inline const struct brw_cs_prog_data * -get_cs_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE]; -} +ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX) +ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY) +ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT) +ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE) struct anv_graphics_pipeline_create_info { /** diff --git a/src/intel/vulkan/genX_l3.c b/src/intel/vulkan/genX_l3.c index 8b3b8acb098..a74071cf124 100644 --- a/src/intel/vulkan/genX_l3.c +++ b/src/intel/vulkan/genX_l3.c @@ -318,7 +318,8 @@ get_pipeline_state_l3_weights(const struct anv_pipeline *pipeline) if (!anv_pipeline_has_stage(pipeline, i)) continue; - const struct brw_stage_prog_data *prog_data = pipeline->prog_data[i]; + const struct brw_stage_prog_data *prog_data = + anv_shader_bin_get_prog_data(pipeline->shaders[i]); needs_dc |= pipeline->needs_data_cache; needs_slm |= prog_data->total_shared; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 8cf801fe1f9..7d8129de9e9 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -63,8 +63,7 @@ genX(compute_pipeline_create)( /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 62fd01cd4b7..6518fae7988 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -671,7 +671,7 @@ emit_cb_state(struct anv_pipeline *pipeline, uint32_t surface_count = 0; struct anv_pipeline_bind_map *map; if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { - map = &pipeline->bindings[MESA_SHADER_FRAGMENT]; + map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; surface_count = map->surface_count; } |