summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2017-11-15 15:44:01 +0100
committerSamuel Pitoiset <[email protected]>2017-11-20 10:45:27 +0100
commitcf54ea155eb408a37a27ab61a8315d857cd0250f (patch)
tree50eee2826c281e551bdac7418c697523da8c5dbc /src/amd
parente55b7609fa2b2c692d5cf56732b198ccf89fde2e (diff)
radv: only load needed depth clear regs for fast depth clears
Similar to how the driver sets the depth clear regs after a fast depth clear. Most of the time, this will copy a 32-bit reg instead of a 64-bit reg. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c14
1 files changed, 12 insertions, 2 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index ae522d2088f..7d86eee9791 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1331,20 +1331,30 @@ static void
radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image)
{
+ VkImageAspectFlags aspects = vk_format_aspects(image->vk_format);
uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->clear_value_offset;
+ unsigned reg_offset = 0, reg_count = 0;
if (!image->surface.htile_size)
return;
+ if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ ++reg_count;
+ } else {
+ ++reg_offset;
+ va += 4;
+ }
+ if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+ ++reg_count;
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG) |
- COPY_DATA_COUNT_SEL);
+ (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
- radeon_emit(cmd_buffer->cs, R_028028_DB_STENCIL_CLEAR >> 2);
+ radeon_emit(cmd_buffer->cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2);
radeon_emit(cmd_buffer->cs, 0);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));