diff options
author | Bas Nieuwenhuizen <[email protected]> | 2018-08-14 00:07:57 +0200 |
---|---|---|
committer | Bas Nieuwenhuizen <[email protected]> | 2018-08-14 10:26:24 +0200 |
commit | fbcd1673144facf0f4037330ba3d6b176dad955b (patch) | |
tree | a7ad865c25efd51f3b261d096ec27103b0d6d461 /src/amd/vulkan/radv_meta_clear.c | |
parent | 24a9033d6f7eb88a760d382ace64bffa65d14cdc (diff) |
radv: Add on-demand compilation of built-in shaders.
In environments where we cannot cache, e.g. Android (no homedir),
ChromeOS (readonly rootfs) or sandboxes (cannot open cache), the
startup cost of creating a device in radv is rather high, due
to compiling all possible built-in pipelines up front. This meant
depending on the CPU a 1-4 sec cost of creating a Device.
For CTS this cost is unacceptable, and likely for starting random
apps too.
So if there is no cache, with this patch radv will compile shaders
on demand. Once there is a cache from the first run, even if
incomplete, the driver knows that it can likely write the cache
and precompiles everything.
Note that I did not switch the buffer and itob/btoi compute pipelines
to on-demand, since you cannot really do anything in Vulkan without
them and there are only a few.
This reduces the CTS runtime for the no caches scenario on my
threadripper from 32 minutes to 8 minutes.
Reviewed-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd/vulkan/radv_meta_clear.c')
-rw-r--r-- | src/amd/vulkan/radv_meta_clear.c | 96 |
1 files changed, 89 insertions, 7 deletions
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index 4f77e32b83f..0ae7191f17d 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -200,7 +200,13 @@ create_color_renderpass(struct radv_device *device, uint32_t samples, VkRenderPass *pass) { - return radv_CreateRenderPass(radv_device_to_handle(device), + mtx_lock(&device->meta_state.mtx); + if (*pass) { + mtx_unlock (&device->meta_state.mtx); + return VK_SUCCESS; + } + + VkResult result = radv_CreateRenderPass(radv_device_to_handle(device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, @@ -231,6 +237,8 @@ create_color_renderpass(struct radv_device *device, }, .dependencyCount = 0, }, &device->meta_state.alloc, pass); + mtx_unlock(&device->meta_state.mtx); + return result; } static VkResult @@ -243,6 +251,13 @@ create_color_pipeline(struct radv_device *device, struct nir_shader *vs_nir; struct nir_shader *fs_nir; VkResult result; + + mtx_lock(&device->meta_state.mtx); + if (*pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + build_color_shaders(&vs_nir, &fs_nir, frag_output); const VkPipelineVertexInputStateCreateInfo vi_state = { @@ -284,6 +299,7 @@ create_color_pipeline(struct radv_device *device, device->meta_state.clear_color_p_layout, &extra, &device->meta_state.alloc, pipeline); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -349,6 +365,26 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, return; } + if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) { + VkResult ret = create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key], + samples, + &device->meta_state.clear[samples_log2].render_pass[fs_key]); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } + + if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) { + VkResult ret = create_color_pipeline(device, samples, 0, + &device->meta_state.clear[samples_log2].color_pipelines[fs_key], + device->meta_state.clear[samples_log2].render_pass[fs_key]); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } + pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key]; if (!pipeline) { radv_finishme("color clears incomplete"); @@ -449,7 +485,13 @@ create_depthstencil_renderpass(struct radv_device *device, uint32_t samples, VkRenderPass *render_pass) { - return radv_CreateRenderPass(radv_device_to_handle(device), + mtx_lock(&device->meta_state.mtx); + if (*render_pass) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + + VkResult result = radv_CreateRenderPass(radv_device_to_handle(device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, @@ -477,6 +519,8 @@ create_depthstencil_renderpass(struct radv_device *device, }, .dependencyCount = 0, }, &device->meta_state.alloc, render_pass); + mtx_unlock(&device->meta_state.mtx); + return result; } static VkResult @@ -489,6 +533,13 @@ create_depthstencil_pipeline(struct radv_device *device, { struct nir_shader *vs_nir, *fs_nir; VkResult result; + + mtx_lock(&device->meta_state.mtx); + if (*pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + build_depthstencil_shader(&vs_nir, &fs_nir); const VkPipelineVertexInputStateCreateInfo vi_state = { @@ -536,6 +587,8 @@ create_depthstencil_pipeline(struct radv_device *device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, device->meta_state.clear_depth_p_layout, &extra, &device->meta_state.alloc, pipeline); + + mtx_unlock(&device->meta_state.mtx); return result; } @@ -579,6 +632,7 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, { bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value); int index = DEPTH_CLEAR_SLOW; + VkPipeline *pipeline; if (fast) { /* we don't know the previous clear values, so we always have @@ -588,13 +642,36 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, switch (aspects) { case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - return meta_state->clear[samples_log2].depthstencil_pipeline[index]; + pipeline = &meta_state->clear[samples_log2].depthstencil_pipeline[index]; + break; case VK_IMAGE_ASPECT_DEPTH_BIT: - return meta_state->clear[samples_log2].depth_only_pipeline[index]; + pipeline = &meta_state->clear[samples_log2].depth_only_pipeline[index]; + break; case VK_IMAGE_ASPECT_STENCIL_BIT: - return meta_state->clear[samples_log2].stencil_only_pipeline[index]; + pipeline = &meta_state->clear[samples_log2].stencil_only_pipeline[index]; + break; + default: + unreachable("expected depth or stencil aspect"); + } + + if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) { + VkResult ret = create_depthstencil_renderpass(cmd_buffer->device, 1u << samples_log2, + &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return VK_NULL_HANDLE; + } } - unreachable("expected depth or stencil aspect"); + + if (*pipeline == VK_NULL_HANDLE) { + VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index, + pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return VK_NULL_HANDLE; + } + } + return *pipeline; } static void @@ -638,6 +715,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, subpass->depth_stencil_attachment.layout, clear_rect, clear_value); + if (!pipeline) + return; radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -758,7 +837,7 @@ fail: } VkResult -radv_device_init_meta_clear_state(struct radv_device *device) +radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) { VkResult res; struct radv_meta_state *state = &device->meta_state; @@ -791,6 +870,9 @@ radv_device_init_meta_clear_state(struct radv_device *device) if (res != VK_SUCCESS) goto fail; + if (on_demand) + return VK_SUCCESS; + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { uint32_t samples = 1 << i; for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) { |