diff options
author | Bas Nieuwenhuizen <[email protected]> | 2018-08-14 00:07:57 +0200 |
---|---|---|
committer | Bas Nieuwenhuizen <[email protected]> | 2018-08-14 10:26:24 +0200 |
commit | fbcd1673144facf0f4037330ba3d6b176dad955b (patch) | |
tree | a7ad865c25efd51f3b261d096ec27103b0d6d461 /src/amd/vulkan/radv_meta_blit2d.c | |
parent | 24a9033d6f7eb88a760d382ace64bffa65d14cdc (diff) |
radv: Add on-demand compilation of built-in shaders.
In environments where we cannot cache, e.g. Android (no homedir),
ChromeOS (readonly rootfs) or sandboxes (cannot open cache), the
startup cost of creating a device in radv is rather high, due
to compiling all possible built-in pipelines up front. This meant
depending on the CPU a 1-4 sec cost of creating a Device.
For CTS this cost is unacceptable, and likely for starting random
apps too.
So if there is no cache, with this patch radv will compile shaders
on demand. Once there is a cache from the first run, even if
incomplete, the driver knows that it can likely write the cache
and precompiles everything.
Note that I did not switch the buffer and itob/btoi compute pipelines
to on-demand, since you cannot really do anything in Vulkan without
them and there are only a few.
This reduces the CTS runtime for the no caches scenario on my
threadripper from 32 minutes to 8 minutes.
Reviewed-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd/vulkan/radv_meta_blit2d.c')
-rw-r--r-- | src/amd/vulkan/radv_meta_blit2d.c | 69 |
1 files changed, 68 insertions, 1 deletions
diff --git a/src/amd/vulkan/radv_meta_blit2d.c b/src/amd/vulkan/radv_meta_blit2d.c index 79652856942..d2975532d4b 100644 --- a/src/amd/vulkan/radv_meta_blit2d.c +++ b/src/amd/vulkan/radv_meta_blit2d.c @@ -35,6 +35,22 @@ enum blit2d_src_type { BLIT2D_NUM_SRC_TYPES, }; +static VkResult +blit2d_init_color_pipeline(struct radv_device *device, + enum blit2d_src_type src_type, + VkFormat format, + uint32_t log2_samples); + +static VkResult +blit2d_init_depth_only_pipeline(struct radv_device *device, + enum blit2d_src_type src_type, + uint32_t log2_samples); + +static VkResult +blit2d_init_stencil_only_pipeline(struct radv_device *device, + enum blit2d_src_type src_type, + uint32_t log2_samples); + static void create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, @@ -268,6 +284,14 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format); unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout); + if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) { + VkResult ret = blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + goto fail_pipeline; + } + } + radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, @@ -285,6 +309,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples); } else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout); + + if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) { + VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + goto fail_pipeline; + } + } + radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, @@ -303,6 +336,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, } else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout); + + if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) { + VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + goto fail_pipeline; + } + } + radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, @@ -357,6 +399,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer)); +fail_pipeline: /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ @@ -737,6 +780,12 @@ blit2d_init_color_pipeline(struct radv_device *device, unsigned fs_key = radv_format_meta_fs_key(format); const char *name; + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + texel_fetch_build_func src_func; switch(src_type) { case BLIT2D_SRC_TYPE_IMAGE: @@ -894,6 +943,7 @@ blit2d_init_color_pipeline(struct radv_device *device, ralloc_free(vs.nir); ralloc_free(fs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -905,6 +955,12 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, VkResult result; const char *name; + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + texel_fetch_build_func src_func; switch(src_type) { case BLIT2D_SRC_TYPE_IMAGE: @@ -1057,6 +1113,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, ralloc_free(vs.nir); ralloc_free(fs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -1068,6 +1125,12 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, VkResult result; const char *name; + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + texel_fetch_build_func src_func; switch(src_type) { case BLIT2D_SRC_TYPE_IMAGE: @@ -1236,6 +1299,7 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, ralloc_free(vs.nir); ralloc_free(fs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -1287,7 +1351,7 @@ fail: } VkResult -radv_device_init_meta_blit2d_state(struct radv_device *device) +radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand) { VkResult result; bool create_3d = device->physical_device->rad_info.chip_class >= GFX9; @@ -1305,6 +1369,9 @@ radv_device_init_meta_blit2d_state(struct radv_device *device) if (result != VK_SUCCESS) goto fail; + if (on_demand) + continue; + for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) { result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples); if (result != VK_SUCCESS) |