diff options
-rw-r--r-- | src/amd/vulkan/radv_meta.c | 36 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta.h | 18 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_blit.c | 86 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_blit2d.c | 69 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_clear.c | 96 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_decompress.c | 32 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_fast_clear.c | 29 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_resolve.c | 47 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_resolve_cs.c | 34 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_resolve_fs.c | 31 | ||||
-rw-r--r-- | src/amd/vulkan/radv_pipeline_cache.c | 16 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 8 | ||||
-rw-r--r-- | src/amd/vulkan/radv_query.c | 32 |
13 files changed, 454 insertions, 80 deletions
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c index a59f38fb21d..ccdcb9ceb07 100644 --- a/src/amd/vulkan/radv_meta.c +++ b/src/amd/vulkan/radv_meta.c @@ -257,7 +257,7 @@ radv_builtin_cache_path(char *path) return true; } -static void +static bool radv_load_meta_pipeline(struct radv_device *device) { char path[PATH_MAX + 1]; @@ -265,11 +265,11 @@ radv_load_meta_pipeline(struct radv_device *device) void *data = NULL; if (!radv_builtin_cache_path(path)) - return; + return false; int fd = open(path, O_RDONLY); if (fd < 0) - return; + return false; if (fstat(fd, &st)) goto fail; data = malloc(st.st_size); @@ -278,10 +278,11 @@ radv_load_meta_pipeline(struct radv_device *device) if(read(fd, data, st.st_size) == -1) goto fail; - radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size); + return radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size); fail: free(data); close(fd); + return false; } static void @@ -330,6 +331,8 @@ radv_device_init_meta(struct radv_device *device) { VkResult result; + memset(&device->meta_state, 0, sizeof(device->meta_state)); + device->meta_state.alloc = (VkAllocationCallbacks) { .pUserData = device, .pfnAllocation = meta_alloc, @@ -339,21 +342,24 @@ radv_device_init_meta(struct radv_device *device) device->meta_state.cache.alloc = device->meta_state.alloc; radv_pipeline_cache_init(&device->meta_state.cache, device); - radv_load_meta_pipeline(device); + bool loaded_cache = radv_load_meta_pipeline(device); + bool on_demand = !loaded_cache; + + mtx_init(&device->meta_state.mtx, mtx_plain); - result = radv_device_init_meta_clear_state(device); + result = radv_device_init_meta_clear_state(device, on_demand); if (result != VK_SUCCESS) goto fail_clear; - result = radv_device_init_meta_resolve_state(device); + result = radv_device_init_meta_resolve_state(device, on_demand); if (result != VK_SUCCESS) goto fail_resolve; - result = radv_device_init_meta_blit_state(device); + result = radv_device_init_meta_blit_state(device, on_demand); if (result != VK_SUCCESS) goto fail_blit; - result = radv_device_init_meta_blit2d_state(device); + result = radv_device_init_meta_blit2d_state(device, on_demand); if (result != VK_SUCCESS) goto fail_blit2d; @@ -361,7 +367,7 @@ radv_device_init_meta(struct radv_device *device) if (result != VK_SUCCESS) goto fail_bufimage; - result = radv_device_init_meta_depth_decomp_state(device); + result = radv_device_init_meta_depth_decomp_state(device, on_demand); if (result != VK_SUCCESS) goto fail_depth_decomp; @@ -369,19 +375,19 @@ radv_device_init_meta(struct radv_device *device) if (result != VK_SUCCESS) goto fail_buffer; - result = radv_device_init_meta_query_state(device); + result = radv_device_init_meta_query_state(device, on_demand); if (result != VK_SUCCESS) goto fail_query; - result = radv_device_init_meta_fast_clear_flush_state(device); + result = radv_device_init_meta_fast_clear_flush_state(device, on_demand); if (result != VK_SUCCESS) goto fail_fast_clear; - result = radv_device_init_meta_resolve_compute_state(device); + result = radv_device_init_meta_resolve_compute_state(device, on_demand); if (result != VK_SUCCESS) goto fail_resolve_compute; - result = radv_device_init_meta_resolve_fragment_state(device); + result = radv_device_init_meta_resolve_fragment_state(device, on_demand); if (result != VK_SUCCESS) goto fail_resolve_fragment; return VK_SUCCESS; @@ -407,6 +413,7 @@ fail_blit: fail_resolve: radv_device_finish_meta_clear_state(device); fail_clear: + mtx_destroy(&device->meta_state.mtx); radv_pipeline_cache_finish(&device->meta_state.cache); return result; } @@ -428,6 +435,7 @@ radv_device_finish_meta(struct radv_device *device) radv_store_meta_pipeline(device); radv_pipeline_cache_finish(&device->meta_state.cache); + mtx_destroy(&device->meta_state.mtx); } nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2) diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h index 35067f67124..f8d48f4d791 100644 --- a/src/amd/vulkan/radv_meta.h +++ b/src/amd/vulkan/radv_meta.h @@ -58,34 +58,34 @@ struct radv_meta_saved_state { VkRect2D render_area; }; -VkResult radv_device_init_meta_clear_state(struct radv_device *device); +VkResult radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_clear_state(struct radv_device *device); -VkResult radv_device_init_meta_resolve_state(struct radv_device *device); +VkResult radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_resolve_state(struct radv_device *device); -VkResult radv_device_init_meta_depth_decomp_state(struct radv_device *device); +VkResult radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_depth_decomp_state(struct radv_device *device); -VkResult radv_device_init_meta_fast_clear_flush_state(struct radv_device *device); +VkResult radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device); -VkResult radv_device_init_meta_blit_state(struct radv_device *device); +VkResult radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_blit_state(struct radv_device *device); -VkResult radv_device_init_meta_blit2d_state(struct radv_device *device); +VkResult radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_blit2d_state(struct radv_device *device); VkResult radv_device_init_meta_buffer_state(struct radv_device *device); void radv_device_finish_meta_buffer_state(struct radv_device *device); -VkResult radv_device_init_meta_query_state(struct radv_device *device); +VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_query_state(struct radv_device *device); -VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device); +VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_resolve_compute_state(struct radv_device *device); -VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device); +VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device); void radv_meta_save(struct radv_meta_saved_state *saved_state, diff --git a/src/amd/vulkan/radv_meta_blit.c b/src/amd/vulkan/radv_meta_blit.c index 370d4fcd831..a205686e553 100644 --- a/src/amd/vulkan/radv_meta_blit.c +++ b/src/amd/vulkan/radv_meta_blit.c @@ -31,6 +31,13 @@ struct blit_region { VkExtent3D dest_extent; }; +static VkResult +build_pipeline(struct radv_device *device, + VkImageAspectFlagBits aspect, + enum glsl_sampler_dim tex_dim, + unsigned fs_key, + VkPipeline *pipeline); + static nir_shader * build_nir_vertex_shader(void) { @@ -273,6 +280,20 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim) return b.shader; } +static enum glsl_sampler_dim +translate_sampler_dim(VkImageType type) { + switch(type) { + case VK_IMAGE_TYPE_1D: + return GLSL_SAMPLER_DIM_1D; + case VK_IMAGE_TYPE_2D: + return GLSL_SAMPLER_DIM_2D; + case VK_IMAGE_TYPE_3D: + return GLSL_SAMPLER_DIM_3D; + default: + unreachable("Unhandled image type"); + } +} + static void meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, @@ -333,11 +354,12 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, .height = dst_height, .layers = 1, }, &cmd_buffer->pool->alloc, &fb); - VkPipeline pipeline; + VkPipeline* pipeline = NULL; + unsigned fs_key = 0; switch (src_iview->aspect_mask) { case VK_IMAGE_ASPECT_COLOR_BIT: { - unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format); unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout); + fs_key = radv_format_meta_fs_key(dest_image->vk_format); radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { @@ -353,13 +375,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, }, VK_SUBPASS_CONTENTS_INLINE); switch (src_image->type) { case VK_IMAGE_TYPE_1D: - pipeline = device->meta_state.blit.pipeline_1d_src[fs_key]; + pipeline = &device->meta_state.blit.pipeline_1d_src[fs_key]; break; case VK_IMAGE_TYPE_2D: - pipeline = device->meta_state.blit.pipeline_2d_src[fs_key]; + pipeline = &device->meta_state.blit.pipeline_2d_src[fs_key]; break; case VK_IMAGE_TYPE_3D: - pipeline = device->meta_state.blit.pipeline_3d_src[fs_key]; + pipeline = &device->meta_state.blit.pipeline_3d_src[fs_key]; break; default: unreachable(!"bad VkImageType"); @@ -382,13 +404,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, }, VK_SUBPASS_CONTENTS_INLINE); switch (src_image->type) { case VK_IMAGE_TYPE_1D: - pipeline = device->meta_state.blit.depth_only_1d_pipeline; + pipeline = &device->meta_state.blit.depth_only_1d_pipeline; break; case VK_IMAGE_TYPE_2D: - pipeline = device->meta_state.blit.depth_only_2d_pipeline; + pipeline = &device->meta_state.blit.depth_only_2d_pipeline; break; case VK_IMAGE_TYPE_3D: - pipeline = device->meta_state.blit.depth_only_3d_pipeline; + pipeline = &device->meta_state.blit.depth_only_3d_pipeline; break; default: unreachable(!"bad VkImageType"); @@ -411,13 +433,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, }, VK_SUBPASS_CONTENTS_INLINE); switch (src_image->type) { case VK_IMAGE_TYPE_1D: - pipeline = device->meta_state.blit.stencil_only_1d_pipeline; + pipeline = &device->meta_state.blit.stencil_only_1d_pipeline; break; case VK_IMAGE_TYPE_2D: - pipeline = device->meta_state.blit.stencil_only_2d_pipeline; + pipeline = &device->meta_state.blit.stencil_only_2d_pipeline; break; case VK_IMAGE_TYPE_3D: - pipeline = device->meta_state.blit.stencil_only_3d_pipeline; + pipeline = &device->meta_state.blit.stencil_only_3d_pipeline; break; default: unreachable(!"bad VkImageType"); @@ -428,8 +450,16 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, unreachable(!"bad VkImageType"); } + if (!*pipeline) { + VkResult ret = build_pipeline(device, src_iview->aspect_mask, translate_sampler_dim(src_image->type), fs_key, pipeline); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + goto fail_pipeline; + } + } + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout, @@ -471,6 +501,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); +fail_pipeline: radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer)); /* At the point where we emit the draw call, all data from the @@ -722,6 +753,14 @@ build_pipeline(struct radv_device *device, VkPipeline *pipeline) { VkResult result = VK_SUCCESS; + + mtx_lock(&device->meta_state.mtx); + + if (*pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + struct radv_shader_module fs = {0}; struct radv_shader_module vs = {.nir = build_nir_vertex_shader()}; VkRenderPass rp; @@ -871,11 +910,12 @@ build_pipeline(struct radv_device *device, &device->meta_state.alloc, pipeline); ralloc_free(vs.nir); ralloc_free(fs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } static VkResult -radv_device_init_meta_blit_color(struct radv_device *device) +radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand) { VkResult result; @@ -917,6 +957,9 @@ radv_device_init_meta_blit_color(struct radv_device *device) goto fail; } + if (on_demand) + continue; + result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_1D, key, &device->meta_state.blit.pipeline_1d_src[key]); if (result != VK_SUCCESS) goto fail; @@ -937,7 +980,7 @@ fail: } static VkResult -radv_device_init_meta_blit_depth(struct radv_device *device) +radv_device_init_meta_blit_depth(struct radv_device *device, bool on_demand) { VkResult result; @@ -974,6 +1017,9 @@ radv_device_init_meta_blit_depth(struct radv_device *device) goto fail; } + if (on_demand) + return VK_SUCCESS; + result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.depth_only_1d_pipeline); if (result != VK_SUCCESS) goto fail; @@ -991,7 +1037,7 @@ fail: } static VkResult -radv_device_init_meta_blit_stencil(struct radv_device *device) +radv_device_init_meta_blit_stencil(struct radv_device *device, bool on_demand) { VkResult result; @@ -1028,6 +1074,8 @@ radv_device_init_meta_blit_stencil(struct radv_device *device) if (result != VK_SUCCESS) goto fail; + if (on_demand) + return VK_SUCCESS; result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.stencil_only_1d_pipeline); if (result != VK_SUCCESS) @@ -1047,7 +1095,7 @@ fail: } VkResult -radv_device_init_meta_blit_state(struct radv_device *device) +radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand) { VkResult result; @@ -1086,15 +1134,15 @@ radv_device_init_meta_blit_state(struct radv_device *device) if (result != VK_SUCCESS) goto fail; - result = radv_device_init_meta_blit_color(device); + result = radv_device_init_meta_blit_color(device, on_demand); if (result != VK_SUCCESS) goto fail; - result = radv_device_init_meta_blit_depth(device); + result = radv_device_init_meta_blit_depth(device, on_demand); if (result != VK_SUCCESS) goto fail; - result = radv_device_init_meta_blit_stencil(device); + result = radv_device_init_meta_blit_stencil(device, on_demand); fail: if (result != VK_SUCCESS) diff --git a/src/amd/vulkan/radv_meta_blit2d.c b/src/amd/vulkan/radv_meta_blit2d.c index 79652856942..d2975532d4b 100644 --- a/src/amd/vulkan/radv_meta_blit2d.c +++ b/src/amd/vulkan/radv_meta_blit2d.c @@ -35,6 +35,22 @@ enum blit2d_src_type { BLIT2D_NUM_SRC_TYPES, }; +static VkResult +blit2d_init_color_pipeline(struct radv_device *device, + enum blit2d_src_type src_type, + VkFormat format, + uint32_t log2_samples); + +static VkResult +blit2d_init_depth_only_pipeline(struct radv_device *device, + enum blit2d_src_type src_type, + uint32_t log2_samples); + +static VkResult +blit2d_init_stencil_only_pipeline(struct radv_device *device, + enum blit2d_src_type src_type, + uint32_t log2_samples); + static void create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, @@ -268,6 +284,14 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format); unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout); + if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) { + VkResult ret = blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + goto fail_pipeline; + } + } + radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, @@ -285,6 +309,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples); } else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout); + + if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) { + VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + goto fail_pipeline; + } + } + radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, @@ -303,6 +336,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, } else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout); + + if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) { + VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + goto fail_pipeline; + } + } + radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, @@ -357,6 +399,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer)); +fail_pipeline: /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ @@ -737,6 +780,12 @@ blit2d_init_color_pipeline(struct radv_device *device, unsigned fs_key = radv_format_meta_fs_key(format); const char *name; + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + texel_fetch_build_func src_func; switch(src_type) { case BLIT2D_SRC_TYPE_IMAGE: @@ -894,6 +943,7 @@ blit2d_init_color_pipeline(struct radv_device *device, ralloc_free(vs.nir); ralloc_free(fs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -905,6 +955,12 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, VkResult result; const char *name; + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + texel_fetch_build_func src_func; switch(src_type) { case BLIT2D_SRC_TYPE_IMAGE: @@ -1057,6 +1113,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device, ralloc_free(vs.nir); ralloc_free(fs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -1068,6 +1125,12 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, VkResult result; const char *name; + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + texel_fetch_build_func src_func; switch(src_type) { case BLIT2D_SRC_TYPE_IMAGE: @@ -1236,6 +1299,7 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device, ralloc_free(vs.nir); ralloc_free(fs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -1287,7 +1351,7 @@ fail: } VkResult -radv_device_init_meta_blit2d_state(struct radv_device *device) +radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand) { VkResult result; bool create_3d = device->physical_device->rad_info.chip_class >= GFX9; @@ -1305,6 +1369,9 @@ radv_device_init_meta_blit2d_state(struct radv_device *device) if (result != VK_SUCCESS) goto fail; + if (on_demand) + continue; + for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) { result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples); if (result != VK_SUCCESS) diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index 4f77e32b83f..0ae7191f17d 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -200,7 +200,13 @@ create_color_renderpass(struct radv_device *device, uint32_t samples, VkRenderPass *pass) { - return radv_CreateRenderPass(radv_device_to_handle(device), + mtx_lock(&device->meta_state.mtx); + if (*pass) { + mtx_unlock (&device->meta_state.mtx); + return VK_SUCCESS; + } + + VkResult result = radv_CreateRenderPass(radv_device_to_handle(device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, @@ -231,6 +237,8 @@ create_color_renderpass(struct radv_device *device, }, .dependencyCount = 0, }, &device->meta_state.alloc, pass); + mtx_unlock(&device->meta_state.mtx); + return result; } static VkResult @@ -243,6 +251,13 @@ create_color_pipeline(struct radv_device *device, struct nir_shader *vs_nir; struct nir_shader *fs_nir; VkResult result; + + mtx_lock(&device->meta_state.mtx); + if (*pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + build_color_shaders(&vs_nir, &fs_nir, frag_output); const VkPipelineVertexInputStateCreateInfo vi_state = { @@ -284,6 +299,7 @@ create_color_pipeline(struct radv_device *device, device->meta_state.clear_color_p_layout, &extra, &device->meta_state.alloc, pipeline); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -349,6 +365,26 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, return; } + if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) { + VkResult ret = create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key], + samples, + &device->meta_state.clear[samples_log2].render_pass[fs_key]); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } + + if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) { + VkResult ret = create_color_pipeline(device, samples, 0, + &device->meta_state.clear[samples_log2].color_pipelines[fs_key], + device->meta_state.clear[samples_log2].render_pass[fs_key]); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } + pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key]; if (!pipeline) { radv_finishme("color clears incomplete"); @@ -449,7 +485,13 @@ create_depthstencil_renderpass(struct radv_device *device, uint32_t samples, VkRenderPass *render_pass) { - return radv_CreateRenderPass(radv_device_to_handle(device), + mtx_lock(&device->meta_state.mtx); + if (*render_pass) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + + VkResult result = radv_CreateRenderPass(radv_device_to_handle(device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, @@ -477,6 +519,8 @@ create_depthstencil_renderpass(struct radv_device *device, }, .dependencyCount = 0, }, &device->meta_state.alloc, render_pass); + mtx_unlock(&device->meta_state.mtx); + return result; } static VkResult @@ -489,6 +533,13 @@ create_depthstencil_pipeline(struct radv_device *device, { struct nir_shader *vs_nir, *fs_nir; VkResult result; + + mtx_lock(&device->meta_state.mtx); + if (*pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + build_depthstencil_shader(&vs_nir, &fs_nir); const VkPipelineVertexInputStateCreateInfo vi_state = { @@ -536,6 +587,8 @@ create_depthstencil_pipeline(struct radv_device *device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, device->meta_state.clear_depth_p_layout, &extra, &device->meta_state.alloc, pipeline); + + mtx_unlock(&device->meta_state.mtx); return result; } @@ -579,6 +632,7 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, { bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value); int index = DEPTH_CLEAR_SLOW; + VkPipeline *pipeline; if (fast) { /* we don't know the previous clear values, so we always have @@ -588,13 +642,36 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, switch (aspects) { case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - return meta_state->clear[samples_log2].depthstencil_pipeline[index]; + pipeline = &meta_state->clear[samples_log2].depthstencil_pipeline[index]; + break; case VK_IMAGE_ASPECT_DEPTH_BIT: - return meta_state->clear[samples_log2].depth_only_pipeline[index]; + pipeline = &meta_state->clear[samples_log2].depth_only_pipeline[index]; + break; case VK_IMAGE_ASPECT_STENCIL_BIT: - return meta_state->clear[samples_log2].stencil_only_pipeline[index]; + pipeline = &meta_state->clear[samples_log2].stencil_only_pipeline[index]; + break; + default: + unreachable("expected depth or stencil aspect"); + } + + if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) { + VkResult ret = create_depthstencil_renderpass(cmd_buffer->device, 1u << samples_log2, + &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return VK_NULL_HANDLE; + } } - unreachable("expected depth or stencil aspect"); + + if (*pipeline == VK_NULL_HANDLE) { + VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index, + pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return VK_NULL_HANDLE; + } + } + return *pipeline; } static void @@ -638,6 +715,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, subpass->depth_stencil_attachment.layout, clear_rect, clear_value); + if (!pipeline) + return; radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -758,7 +837,7 @@ fail: } VkResult -radv_device_init_meta_clear_state(struct radv_device *device) +radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) { VkResult res; struct radv_meta_state *state = &device->meta_state; @@ -791,6 +870,9 @@ radv_device_init_meta_clear_state(struct radv_device *device) if (res != VK_SUCCESS) goto fail; + if (on_demand) + return VK_SUCCESS; + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { uint32_t samples = 1 << i; for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) { diff --git a/src/amd/vulkan/radv_meta_decompress.c b/src/amd/vulkan/radv_meta_decompress.c index 1a8058c7cc5..41ed7b6d043 100644 --- a/src/amd/vulkan/radv_meta_decompress.c +++ b/src/amd/vulkan/radv_meta_decompress.c @@ -103,6 +103,18 @@ create_pipeline(struct radv_device *device, { VkResult result; VkDevice device_h = radv_device_to_handle(device); + struct radv_shader_module vs_module = {0}; + + mtx_lock(&device->meta_state.mtx); + if (*decompress_pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + + if (!vs_module_h) { + vs_module.nir = radv_meta_build_nir_vs_generate_vertices(); + vs_module_h = radv_shader_module_to_handle(&vs_module); + } struct radv_shader_module fs_module = { .nir = radv_meta_build_nir_fs_noop(), @@ -219,6 +231,9 @@ create_pipeline(struct radv_device *device, cleanup: ralloc_free(fs_module.nir); + if (vs_module.nir) + ralloc_free(vs_module.nir); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -244,7 +259,7 @@ radv_device_finish_meta_depth_decomp_state(struct radv_device *device) } VkResult -radv_device_init_meta_depth_decomp_state(struct radv_device *device) +radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand) { struct radv_meta_state *state = &device->meta_state; VkResult res = VK_SUCCESS; @@ -270,6 +285,9 @@ radv_device_init_meta_depth_decomp_state(struct radv_device *device) if (res != VK_SUCCESS) goto fail; + if (on_demand) + continue; + res = create_pipeline(device, vs_module_h, samples, state->depth_decomp[i].pass, state->depth_decomp[i].p_layout, @@ -343,6 +361,18 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, if (!radv_image_has_htile(image)) return; + if (!meta_state->depth_decomp[samples_log2].decompress_pipeline) { + VkResult ret = create_pipeline(cmd_buffer->device, NULL, samples, + meta_state->depth_decomp[samples_log2].pass, + meta_state->depth_decomp[samples_log2].p_layout, + &meta_state->depth_decomp[samples_log2].decompress_pipeline, + &meta_state->depth_decomp[samples_log2].resummarize_pipeline); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS); diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index b42a6783fd2..f469a9ee8f8 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -489,11 +489,17 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device) &state->alloc); } -VkResult -radv_device_init_meta_fast_clear_flush_state(struct radv_device *device) +static VkResult +radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device) { VkResult res = VK_SUCCESS; + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() }; if (!vs_module.nir) { /* XXX: Need more accurate error */ @@ -527,10 +533,21 @@ fail: cleanup: ralloc_free(vs_module.nir); + mtx_unlock(&device->meta_state.mtx); return res; } + +VkResult +radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand) +{ + if (on_demand) + return VK_SUCCESS; + + return radv_device_init_meta_fast_clear_flush_state_internal(device); +} + static void emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer, const VkExtent2D *resolve_extent, @@ -591,6 +608,14 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer, assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); + if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) { + VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS); diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c index 30fed974414..309c7a5be0d 100644 --- a/src/amd/vulkan/radv_meta_resolve.c +++ b/src/amd/vulkan/radv_meta_resolve.c @@ -252,8 +252,11 @@ radv_device_finish_meta_resolve_state(struct radv_device *device) } VkResult -radv_device_init_meta_resolve_state(struct radv_device *device) +radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand) { + if (on_demand) + return VK_SUCCESS; + VkResult res = VK_SUCCESS; struct radv_meta_state *state = &device->meta_state; struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() }; @@ -353,6 +356,36 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image, } } +static VkResult +build_resolve_pipeline(struct radv_device *device, + unsigned fs_key) +{ + VkResult result = VK_SUCCESS; + + if (device->meta_state.resolve.pipeline[fs_key]) + return result; + + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.resolve.pipeline[fs_key]) { + mtx_unlock(&device->meta_state.mtx); + return result; + } + + struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() }; + + result = create_pass(device, radv_fs_key_format_exemplars[fs_key], &device->meta_state.resolve.pass[fs_key]); + if (result != VK_SUCCESS) + goto fail; + + VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module); + result = create_pipeline(device, vs_module_h, &device->meta_state.resolve.pipeline[fs_key], device->meta_state.resolve.pass[fs_key]); + +fail: + ralloc_free(vs_module.nir); + mtx_unlock(&device->meta_state.mtx); + return result; +} + void radv_CmdResolveImage( VkCommandBuffer cmd_buffer_h, VkImage src_image_h, @@ -483,6 +516,12 @@ void radv_CmdResolveImage( for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) { + VkResult ret = build_resolve_pipeline(device, fs_key); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + break; + } + struct radv_image_view src_iview; radv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -648,6 +687,12 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer) radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false); + VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(dst_img->vk_format)); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + continue; + } + emit_resolve(cmd_buffer, dst_img->vk_format, &(VkOffset2D) { 0, 0 }, diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c index 2d79cb09fec..fca49a01bb0 100644 --- a/src/amd/vulkan/radv_meta_resolve_cs.c +++ b/src/amd/vulkan/radv_meta_resolve_cs.c @@ -212,6 +212,12 @@ create_resolve_pipeline(struct radv_device *device, VkResult result; struct radv_shader_module cs = { .nir = NULL }; + mtx_lock(&device->meta_state.mtx); + if (*pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + cs.nir = build_resolve_compute_shader(device, is_integer, is_srgb, samples); /* compute shader */ @@ -239,14 +245,16 @@ create_resolve_pipeline(struct radv_device *device, goto fail; ralloc_free(cs.nir); + mtx_unlock(&device->meta_state.mtx); return VK_SUCCESS; fail: ralloc_free(cs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } VkResult -radv_device_init_meta_resolve_compute_state(struct radv_device *device) +radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand) { struct radv_meta_state *state = &device->meta_state; VkResult res; @@ -255,6 +263,9 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device) if (res != VK_SUCCESS) goto fail; + if (on_demand) + return VK_SUCCESS; + for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { uint32_t samples = 1 << i; @@ -353,16 +364,27 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, } }); - VkPipeline pipeline; + VkPipeline *pipeline; if (vk_format_is_int(src_iview->image->vk_format)) - pipeline = device->meta_state.resolve_compute.rc[samples_log2].i_pipeline; + pipeline = &device->meta_state.resolve_compute.rc[samples_log2].i_pipeline; else if (vk_format_is_srgb(src_iview->image->vk_format)) - pipeline = device->meta_state.resolve_compute.rc[samples_log2].srgb_pipeline; + pipeline = &device->meta_state.resolve_compute.rc[samples_log2].srgb_pipeline; else - pipeline = device->meta_state.resolve_compute.rc[samples_log2].pipeline; + pipeline = &device->meta_state.resolve_compute.rc[samples_log2].pipeline; + + if (!*pipeline) { + VkResult ret = create_resolve_pipeline(device, samples, + vk_format_is_int(src_iview->image->vk_format), + vk_format_is_srgb(src_iview->image->vk_format), + pipeline); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); unsigned push_constants[4] = { src_offset->x, diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c b/src/amd/vulkan/radv_meta_resolve_fs.c index 6013503b837..21a5922f5df 100644 --- a/src/amd/vulkan/radv_meta_resolve_fs.c +++ b/src/amd/vulkan/radv_meta_resolve_fs.c @@ -161,10 +161,18 @@ create_resolve_pipeline(struct radv_device *device, int samples_log2, VkFormat format) { + mtx_lock(&device->meta_state.mtx); + + unsigned fs_key = radv_format_meta_fs_key(format); + VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key]; + if (*pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + VkResult result; bool is_integer = false; uint32_t samples = 1 << samples_log2; - unsigned fs_key = radv_format_meta_fs_key(format); const VkPipelineVertexInputStateCreateInfo *vi_create_info; vi_create_info = &normal_vi_create_info; if (vk_format_is_int(format)) @@ -180,9 +188,6 @@ create_resolve_pipeline(struct radv_device *device, assert(!*rp); - VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key]; - assert(!*pipeline); - VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -307,11 +312,12 @@ create_resolve_pipeline(struct radv_device *device, ralloc_free(vs.nir); ralloc_free(fs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } VkResult -radv_device_init_meta_resolve_fragment_state(struct radv_device *device) +radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand) { VkResult res; @@ -319,6 +325,9 @@ radv_device_init_meta_resolve_fragment_state(struct radv_device *device) if (res != VK_SUCCESS) goto fail; + if (on_demand) + return VK_SUCCESS; + for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) { res = create_resolve_pipeline(device, i, radv_fs_key_format_exemplars[j]); @@ -404,10 +413,18 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, push_constants); unsigned fs_key = radv_format_meta_fs_key(dest_iview->vk_format); - VkPipeline pipeline_h = device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key]; + VkPipeline* pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key]; + + if (*pipeline == VK_NULL_HANDLE) { + VkResult ret = create_resolve_pipeline(device, samples_log2, radv_fs_key_format_exemplars[fs_key]); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_h); + *pipeline); radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { .x = dest_offset->x, diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index 7e2c305b1a0..9f01191a2f6 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -455,7 +455,7 @@ struct cache_header { uint8_t uuid[VK_UUID_SIZE]; }; -void +bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size) { @@ -463,18 +463,18 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache, struct cache_header header; if (size < sizeof(header)) - return; + return false; memcpy(&header, data, sizeof(header)); if (header.header_size < sizeof(header)) - return; + return false; if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) - return; + return false; if (header.vendor_id != ATI_VENDOR_ID) - return; + return false; if (header.device_id != device->physical_device->rad_info.pci_id) - return; + return false; if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0) - return; + return false; char *end = (void *) data + size; char *p = (void *) data + header.header_size; @@ -496,6 +496,8 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache, } p += size; } + + return true; } VkResult radv_CreatePipelineCache( diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 9374b730b52..01a5a698a0d 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -372,7 +372,7 @@ radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device); void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache); -void +bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size); @@ -429,6 +429,12 @@ struct radv_meta_state { struct radv_pipeline_cache cache; + /* + * For on-demand pipeline creation, makes sure that + * only one thread tries to build a pipeline at the same time. + */ + mtx_t mtx; + /** * Use array element `i` for images with `2^i` samples. */ diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index e3229ab59bb..bdfd7620cfc 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -511,12 +511,17 @@ build_pipeline_statistics_query_shader(struct radv_device *device) { return b.shader; } -VkResult radv_device_init_meta_query_state(struct radv_device *device) +static VkResult radv_device_init_meta_query_state_internal(struct radv_device *device) { VkResult result; struct radv_shader_module occlusion_cs = { .nir = NULL }; struct radv_shader_module pipeline_statistics_cs = { .nir = NULL }; + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.query.pipeline_statistics_query_pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } occlusion_cs.nir = build_occlusion_query_shader(device); pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device); @@ -611,9 +616,18 @@ fail: radv_device_finish_meta_query_state(device); ralloc_free(occlusion_cs.nir); ralloc_free(pipeline_statistics_cs.nir); + mtx_unlock(&device->meta_state.mtx); return result; } +VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_demand) +{ + if (on_demand) + return VK_SUCCESS; + + return radv_device_init_meta_query_state_internal(device); +} + void radv_device_finish_meta_query_state(struct radv_device *device) { if (device->meta_state.query.pipeline_statistics_query_pipeline) @@ -638,7 +652,7 @@ void radv_device_finish_meta_query_state(struct radv_device *device) } static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, - VkPipeline pipeline, + VkPipeline *pipeline, struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset, @@ -649,6 +663,14 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_device *device = cmd_buffer->device; struct radv_meta_saved_state saved_state; + if (!*pipeline) { + VkResult ret = radv_device_init_meta_query_state_internal(device); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | @@ -667,7 +689,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, }; radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -974,7 +996,7 @@ void radv_CmdCopyQueryPoolResults( radeon_emit(cs, 4); /* poll interval */ } } - radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.occlusion_query_pipeline, + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline, pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, get_max_db(cmd_buffer->device) * 16, stride, @@ -993,7 +1015,7 @@ void radv_CmdCopyQueryPoolResults( si_emit_wait_fence(cs, avail_va, 1, 0xffffffff); } } - radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pipelinestat_block_size * 2, stride, queryCount, flags, |