summaryrefslogtreecommitdiffstats
path: root/src/amd/vulkan/radv_cmd_buffer.c
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2018-01-14 21:20:20 +0100
committerBas Nieuwenhuizen <[email protected]>2018-01-30 22:00:34 +0100
commitf0c9ef410adb52167509e62b5d4a4dfeb602c163 (patch)
tree0daef9054d17fc2726120eeb0276eb2181fe84be /src/amd/vulkan/radv_cmd_buffer.c
parentbeeab44190a7d73d989e8dac413b4c0201822986 (diff)
radv: Add PM4 pregeneration for compute pipelines.
Reviewed-by: Dave Airlie <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/amd/vulkan/radv_cmd_buffer.c')
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c60
1 files changed, 2 insertions, 58 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f3f765a96e0..1280a186525 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2180,76 +2180,20 @@ VkResult radv_EndCommandBuffer(
static void
radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_shader_variant *compute_shader;
struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- struct radv_device *device = cmd_buffer->device;
- unsigned compute_resource_limits;
- unsigned waves_per_threadgroup;
- uint64_t va;
if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
return;
cmd_buffer->state.emitted_compute_pipeline = pipeline;
- compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
- va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 19);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B830_COMPUTE_PGM_LO, 2);
- radeon_emit(cmd_buffer->cs, va >> 8);
- radeon_emit(cmd_buffer->cs, va >> 40);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
- radeon_emit(cmd_buffer->cs, compute_shader->rsrc1);
- radeon_emit(cmd_buffer->cs, compute_shader->rsrc2);
-
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
+ radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
cmd_buffer->compute_scratch_size_needed =
MAX2(cmd_buffer->compute_scratch_size_needed,
pipeline->max_waves * pipeline->scratch_bytes_per_wave);
- /* change these once we have scratch support */
- radeon_set_sh_reg(cmd_buffer->cs, R_00B860_COMPUTE_TMPRING_SIZE,
- S_00B860_WAVES(pipeline->max_waves) |
- S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
-
- /* Calculate best compute resource limits. */
- waves_per_threadgroup =
- DIV_ROUND_UP(compute_shader->info.cs.block_size[0] *
- compute_shader->info.cs.block_size[1] *
- compute_shader->info.cs.block_size[2], 64);
- compute_resource_limits =
- S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
-
- if (device->physical_device->rad_info.chip_class >= CIK) {
- unsigned num_cu_per_se =
- device->physical_device->rad_info.num_good_compute_units /
- device->physical_device->rad_info.max_se;
-
- /* Force even distribution on all SIMDs in CU if the workgroup
- * size is 64. This has shown some good improvements if # of
- * CUs per SE is not a multiple of 4.
- */
- if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
- compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
- }
-
- radeon_set_sh_reg(cmd_buffer->cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
- compute_resource_limits);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
- radeon_emit(cmd_buffer->cs,
- S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0]));
- radeon_emit(cmd_buffer->cs,
- S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1]));
- radeon_emit(cmd_buffer->cs,
- S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]));
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
-
if (unlikely(cmd_buffer->device->trace_bo))
radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
}