diff options
author | Bas Nieuwenhuizen <[email protected]> | 2018-01-14 21:20:20 +0100 |
---|---|---|
committer | Bas Nieuwenhuizen <[email protected]> | 2018-01-30 22:00:34 +0100 |
commit | f0c9ef410adb52167509e62b5d4a4dfeb602c163 (patch) | |
tree | 0daef9054d17fc2726120eeb0276eb2181fe84be /src/amd/vulkan/radv_cmd_buffer.c | |
parent | beeab44190a7d73d989e8dac413b4c0201822986 (diff) |
radv: Add PM4 pregeneration for compute pipelines.
Reviewed-by: Dave Airlie <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/amd/vulkan/radv_cmd_buffer.c')
-rw-r--r-- | src/amd/vulkan/radv_cmd_buffer.c | 60 |
1 files changed, 2 insertions, 58 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f3f765a96e0..1280a186525 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2180,76 +2180,20 @@ VkResult radv_EndCommandBuffer( static void radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) { - struct radv_shader_variant *compute_shader; struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct radv_device *device = cmd_buffer->device; - unsigned compute_resource_limits; - unsigned waves_per_threadgroup; - uint64_t va; if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline) return; cmd_buffer->state.emitted_compute_pipeline = pipeline; - compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; - va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset; - - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, 19); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B830_COMPUTE_PGM_LO, 2); - radeon_emit(cmd_buffer->cs, va >> 8); - radeon_emit(cmd_buffer->cs, va >> 40); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B848_COMPUTE_PGM_RSRC1, 2); - radeon_emit(cmd_buffer->cs, compute_shader->rsrc1); - radeon_emit(cmd_buffer->cs, compute_shader->rsrc2); - + radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw); + radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); cmd_buffer->compute_scratch_size_needed = MAX2(cmd_buffer->compute_scratch_size_needed, pipeline->max_waves * pipeline->scratch_bytes_per_wave); - /* change these once we have scratch support */ - radeon_set_sh_reg(cmd_buffer->cs, R_00B860_COMPUTE_TMPRING_SIZE, - S_00B860_WAVES(pipeline->max_waves) | - S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); - - /* Calculate best compute resource limits. */ - waves_per_threadgroup = - DIV_ROUND_UP(compute_shader->info.cs.block_size[0] * - compute_shader->info.cs.block_size[1] * - compute_shader->info.cs.block_size[2], 64); - compute_resource_limits = - S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); - - if (device->physical_device->rad_info.chip_class >= CIK) { - unsigned num_cu_per_se = - device->physical_device->rad_info.num_good_compute_units / - device->physical_device->rad_info.max_se; - - /* Force even distribution on all SIMDs in CU if the workgroup - * size is 64. This has shown some good improvements if # of - * CUs per SE is not a multiple of 4. - */ - if (num_cu_per_se % 4 && waves_per_threadgroup == 1) - compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1); - } - - radeon_set_sh_reg(cmd_buffer->cs, R_00B854_COMPUTE_RESOURCE_LIMITS, - compute_resource_limits); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cmd_buffer->cs, - S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0])); - radeon_emit(cmd_buffer->cs, - S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1])); - radeon_emit(cmd_buffer->cs, - S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2])); - - assert(cmd_buffer->cs->cdw <= cdw_max); - if (unlikely(cmd_buffer->device->trace_bo)) radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE); } |