summaryrefslogtreecommitdiffstats
path: root/src/amd/vulkan
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2019-05-22 09:42:12 +0200
committerSamuel Pitoiset <[email protected]>2019-06-21 14:50:19 +0200
commit5cf350f56511e94caccdab4afafadca7c6f316f2 (patch)
tree488c6c983ee812c99464391a5750867ee778a17c /src/amd/vulkan
parentcdc6efddf918bc07d30db779f3b0675192a77333 (diff)
radv: implement all depth/stencil resolve modes using compute
This path supports layers but it requires to decompress HTILE before resolving. The driver also needs to fixup HTILE after the resolve. This path is probably slower than the graphics one. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/vulkan')
-rw-r--r--src/amd/vulkan/radv_meta_resolve_cs.c506
-rw-r--r--src/amd/vulkan/radv_private.h16
2 files changed, 522 insertions, 0 deletions
diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
index fc4bcf27bb9..c06f0f2c5ce 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -139,6 +139,165 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
return b.shader;
}
+enum {
+ DEPTH_RESOLVE,
+ STENCIL_RESOLVE,
+};
+
+static const char *
+get_resolve_mode_str(VkResolveModeFlagBitsKHR resolve_mode)
+{
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+ return "zero";
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ return "average";
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ return "min";
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ return "max";
+ default:
+ unreachable("invalid resolve mode");
+ }
+}
+
+static nir_shader *
+build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
+ int index,
+ VkResolveModeFlagBitsKHR resolve_mode)
+{
+ nir_builder b;
+ char name[64];
+ const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+ false,
+ false,
+ GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+ false,
+ false,
+ GLSL_TYPE_FLOAT);
+ snprintf(name, 64, "meta_resolve_cs_%s-%s-%d",
+ index == DEPTH_RESOLVE ? "depth" : "stencil",
+ get_resolve_mode_str(resolve_mode), samples);
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, name);
+ b.shader->info.cs.local_size[0] = 16;
+ b.shader->info.cs.local_size[1] = 16;
+ b.shader->info.cs.local_size[2] = 1;
+
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+ sampler_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+ img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+ nir_ssa_def *block_size = nir_imm_ivec4(&b,
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+ nir_intrinsic_set_base(src_offset, 0);
+ nir_intrinsic_set_range(src_offset, 16);
+ src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+ src_offset->num_components = 2;
+ nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
+ nir_builder_instr_insert(&b, &src_offset->instr);
+
+ nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+ nir_intrinsic_set_base(dst_offset, 0);
+ nir_intrinsic_set_range(dst_offset, 16);
+ dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+ dst_offset->num_components = 2;
+ nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
+ nir_builder_instr_insert(&b, &dst_offset->instr);
+
+ nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
+
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+ nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float : nir_type_uint;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex->op = nir_texop_txf_ms;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(img_coord);
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = type;
+ tex->is_array = false;
+ tex->coord_components = 2;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_ssa_def *outval = &tex->dest.ssa;
+
+ if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
+ for (int i = 1; i < samples; i++) {
+ nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
+ tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex_add->op = nir_texop_txf_ms;
+ tex_add->src[0].src_type = nir_tex_src_coord;
+ tex_add->src[0].src = nir_src_for_ssa(img_coord);
+ tex_add->src[1].src_type = nir_tex_src_ms_index;
+ tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+ tex_add->src[2].src_type = nir_tex_src_texture_deref;
+ tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex_add->dest_type = type;
+ tex_add->is_array = false;
+ tex_add->coord_components = 2;
+
+ nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex_add->instr);
+
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ assert(index == DEPTH_RESOLVE);
+ outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
+ break;
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ if (index == DEPTH_RESOLVE)
+ outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
+ else
+ outval = nir_umin(&b, outval, &tex_add->dest.ssa);
+ break;
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ if (index == DEPTH_RESOLVE)
+ outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
+ else
+ outval = nir_umax(&b, outval, &tex_add->dest.ssa);
+ break;
+ default:
+ unreachable("invalid resolve mode");
+ }
+ }
+
+ if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
+ outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
+ }
+
+ nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
+ nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
+ store->num_components = 4;
+ store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
+ store->src[1] = nir_src_for_ssa(coord);
+ store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+ store->src[3] = nir_src_for_ssa(outval);
+ nir_builder_instr_insert(&b, &store->instr);
+ return b.shader;
+}
static VkResult
create_layout(struct radv_device *device)
@@ -248,6 +407,57 @@ fail:
return result;
}
+static VkResult
+create_depth_stencil_resolve_pipeline(struct radv_device *device,
+ int samples,
+ int index,
+ VkResolveModeFlagBitsKHR resolve_mode,
+ VkPipeline *pipeline)
+{
+ VkResult result;
+ struct radv_shader_module cs = { .nir = NULL };
+
+ mtx_lock(&device->meta_state.mtx);
+ if (*pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ cs.nir = build_depth_stencil_resolve_compute_shader(device, samples,
+ index, resolve_mode);
+
+ /* compute shader */
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = radv_shader_module_to_handle(&cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.resolve_compute.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ 1, &vk_pipeline_info, NULL,
+ pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ ralloc_free(cs.nir);
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+fail:
+ ralloc_free(cs.nir);
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
+}
+
VkResult
radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
{
@@ -279,8 +489,56 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_
if (res != VK_SUCCESS)
goto fail;
+ res = create_depth_stencil_resolve_pipeline(device, samples,
+ DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
+ &state->resolve_compute.depth[i].average_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, samples,
+ DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_MAX_BIT_KHR,
+ &state->resolve_compute.depth[i].max_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, samples,
+ DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_MIN_BIT_KHR,
+ &state->resolve_compute.depth[i].min_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, samples,
+ STENCIL_RESOLVE,
+ VK_RESOLVE_MODE_MAX_BIT_KHR,
+ &state->resolve_compute.stencil[i].max_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, samples,
+ STENCIL_RESOLVE,
+ VK_RESOLVE_MODE_MIN_BIT_KHR,
+ &state->resolve_compute.stencil[i].min_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
}
+ res = create_depth_stencil_resolve_pipeline(device, 0,
+ DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+ &state->resolve_compute.depth_zero_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, 0,
+ STENCIL_RESOLVE,
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+ &state->resolve_compute.stencil_zero_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
return VK_SUCCESS;
fail:
radv_device_finish_meta_resolve_compute_state(device);
@@ -303,8 +561,36 @@ radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
radv_DestroyPipeline(radv_device_to_handle(device),
state->resolve_compute.rc[i].srgb_pipeline,
&state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.depth[i].average_pipeline,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.depth[i].max_pipeline,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.depth[i].min_pipeline,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.stencil[i].max_pipeline,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.stencil[i].min_pipeline,
+ &state->alloc);
}
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.depth_zero_pipeline,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.stencil_zero_pipeline,
+ &state->alloc);
+
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
state->resolve_compute.ds_layout,
&state->alloc);
@@ -411,6 +697,113 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
}
+static void
+emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image_view *src_iview,
+ struct radv_image_view *dest_iview,
+ const VkOffset2D *src_offset,
+ const VkOffset2D *dest_offset,
+ const VkExtent2D *resolve_extent,
+ VkImageAspectFlags aspects,
+ VkResolveModeFlagBitsKHR resolve_mode)
+{
+ struct radv_device *device = cmd_buffer->device;
+ const uint32_t samples = src_iview->image->info.samples;
+ const uint32_t samples_log2 = ffs(samples) - 1;
+ VkPipeline *pipeline;
+
+ radv_meta_push_descriptor_set(cmd_buffer,
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.resolve_compute.p_layout,
+ 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
+ }
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(dest_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ }
+ });
+
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
+ break;
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
+ pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
+ break;
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
+ break;
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
+ break;
+ default:
+ unreachable("invalid resolve mode");
+ }
+
+ if (!*pipeline) {
+ int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
+ VkResult ret;
+
+ ret = create_depth_stencil_resolve_pipeline(device, samples,
+ index, resolve_mode,
+ pipeline);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+
+ unsigned push_constants[4] = {
+ src_offset->x,
+ src_offset->y,
+ dest_offset->x,
+ dest_offset->y,
+ };
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.resolve_compute.p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+ push_constants);
+ radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
+
+}
+
void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
VkFormat src_format,
@@ -561,3 +954,116 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1;
}
+
+void
+radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+ VkImageAspectFlags aspects,
+ VkResolveModeFlagBitsKHR resolve_mode)
+{
+ struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ struct radv_meta_saved_state saved_state;
+ struct radv_subpass_barrier barrier;
+ uint32_t layer_count = fb->layers;
+
+ if (subpass->view_mask)
+ layer_count = util_last_bit(subpass->view_mask);
+
+ /* Resolves happen before the end-of-subpass barriers get executed, so
+ * we have to make the attachment shader-readable.
+ */
+ barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+ radv_subpass_barrier(cmd_buffer, &barrier);
+
+ radv_decompress_resolve_subpass_src(cmd_buffer);
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
+
+ struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+ struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
+
+ struct radv_image_view *src_iview =
+ cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
+ struct radv_image_view *dst_iview =
+ cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
+
+ struct radv_image *src_image = src_iview->image;
+ struct radv_image *dst_image = dst_iview->image;
+
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ struct radv_image_view tsrc_iview;
+ radv_image_view_init(&tsrc_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(src_image),
+ .viewType = radv_meta_get_view_type(src_image),
+ .format = src_iview->vk_format,
+ .subresourceRange = {
+ .aspectMask = aspects,
+ .baseMipLevel = src_iview->base_mip,
+ .levelCount = 1,
+ .baseArrayLayer = src_iview->base_layer + layer,
+ .layerCount = 1,
+ },
+ });
+
+ struct radv_image_view tdst_iview;
+ radv_image_view_init(&tdst_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(dst_image),
+ .viewType = radv_meta_get_view_type(dst_image),
+ .format = dst_iview->vk_format,
+ .subresourceRange = {
+ .aspectMask = aspects,
+ .baseMipLevel = dst_iview->base_mip,
+ .levelCount = 1,
+ .baseArrayLayer = dst_iview->base_layer + layer,
+ .layerCount = 1,
+ },
+ });
+
+ emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
+ &(VkOffset2D) { 0, 0 },
+ &(VkOffset2D) { 0, 0 },
+ &(VkExtent2D) { fb->width, fb->height },
+ aspects,
+ resolve_mode);
+ }
+
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_INV_VMEM_L1;
+
+ if (radv_image_has_htile(dst_image)) {
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ VkImageSubresourceRange range = {};
+ range.aspectMask = aspects;
+ range.baseMipLevel = dst_iview->base_mip;
+ range.levelCount = 1;
+ range.baseArrayLayer = dst_iview->base_layer;
+ range.layerCount = layer_count;
+
+ uint32_t clear_value = 0xfffc000f;
+
+ if (vk_format_is_stencil(dst_image->vk_format) &&
+ subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+ /* Only clear the stencil part of the HTILE
+ * buffer if it's resolved, otherwise this
+ * might break if the stencil has been cleared.
+ */
+ clear_value = 0xfffff30f;
+ }
+
+ cmd_buffer->state.flush_bits |=
+ radv_clear_htile(cmd_buffer, dst_image, &range,
+ clear_value);
+ }
+ }
+
+ radv_meta_restore(&saved_state, cmd_buffer);
+}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 66483e306d9..9de46494454 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -577,6 +577,19 @@ struct radv_meta_state {
VkPipeline i_pipeline;
VkPipeline srgb_pipeline;
} rc[MAX_SAMPLES_LOG2];
+
+ VkPipeline depth_zero_pipeline;
+ struct {
+ VkPipeline average_pipeline;
+ VkPipeline max_pipeline;
+ VkPipeline min_pipeline;
+ } depth[MAX_SAMPLES_LOG2];
+
+ VkPipeline stencil_zero_pipeline;
+ struct {
+ VkPipeline max_pipeline;
+ VkPipeline min_pipeline;
+ } stencil[MAX_SAMPLES_LOG2];
} resolve_compute;
struct {
@@ -1256,6 +1269,9 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
+void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+ VkImageAspectFlags aspects,
+ VkResolveModeFlagBitsKHR resolve_mode);
void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
VkImageAspectFlags aspects,