From f56aa25ac5d2509392177bfa503aa52a565b9a85 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sat, 10 Jun 2017 18:34:22 +0300 Subject: radv: don't even attempt to prefetch on SI Before bcae327469 this was emitting CP DMA packet even on SI, but apparently hasn't caused too many problems. After that commit the CP DMA code now always sets the CIK+ only bit for prefetch. Just follow radeonsi there and don't try to prefetch at all. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101334 Fixes: bcae327469 "radv: realign cp dma code with radeonsi" Signed-off-by: Grazvydas Ignotas Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_cmd_buffer.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'src/amd/vulkan') diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 1ac9de139f1..b08f218549a 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -531,6 +531,14 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer, raster->pa_su_sc_mode_cntl); } +static inline void +radv_emit_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, + unsigned size) +{ + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) + si_cp_dma_prefetch(cmd_buffer, va, size); +} + static void radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, @@ -542,7 +550,7 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, unsigned export_count; ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); + radv_emit_prefetch(cmd_buffer, va, shader->code_size); export_count = MAX2(1, outinfo->param_exports); radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG, @@ -591,7 +599,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, uint64_t va = ws->buffer_get_va(shader->bo); ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); + radv_emit_prefetch(cmd_buffer, va, shader->code_size); radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, outinfo->esgs_itemsize / 4); @@ -611,7 +619,7 @@ radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, uint32_t rsrc2 = shader->rsrc2; ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); + radv_emit_prefetch(cmd_buffer, va, shader->code_size); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2); radeon_emit(cmd_buffer->cs, va >> 8); @@ -635,7 +643,7 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, uint64_t va = ws->buffer_get_va(shader->bo); ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); + radv_emit_prefetch(cmd_buffer, va, shader->code_size); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4); radeon_emit(cmd_buffer->cs, va >> 8); @@ -769,7 +777,8 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, va = ws->buffer_get_va(gs->bo); ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, gs->code_size); + radv_emit_prefetch(cmd_buffer, va, gs->code_size); + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); radeon_emit(cmd_buffer->cs, va >> 8); radeon_emit(cmd_buffer->cs, va >> 40); @@ -810,7 +819,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, va = ws->buffer_get_va(ps->bo); ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, ps->code_size); + radv_emit_prefetch(cmd_buffer, va, ps->code_size); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4); radeon_emit(cmd_buffer->cs, va >> 8); @@ -2215,7 +2224,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) va = ws->buffer_get_va(compute_shader->bo); ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, compute_shader->code_size); + radv_emit_prefetch(cmd_buffer, va, compute_shader->code_size); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 16); -- cgit v1.2.3