diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/vulkan/radv_cmd_buffer.c | 41 | ||||
-rw-r--r-- | src/amd/vulkan/radv_device.c | 3 |
2 files changed, 41 insertions, 3 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 9238f76305d..1e7d035d344 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3182,6 +3182,11 @@ struct radv_dispatch_info { uint32_t blocks[3]; /** + * A starting offset for the grid. If unaligned is set, the offset + * must still be aligned. + */ + uint32_t offsets[3]; + /** * Whether it's an unaligned compute dispatch. */ bool unaligned; @@ -3249,6 +3254,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, } } else { unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] }; + unsigned offsets[3] = { info->offsets[0], info->offsets[1], info->offsets[2] }; if (info->unaligned) { unsigned *cs_block_size = compute_shader->info.cs.block_size; @@ -3268,6 +3274,11 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, blocks[1] = round_up_u32(blocks[1], cs_block_size[1]); blocks[2] = round_up_u32(blocks[2], cs_block_size[2]); + for(unsigned i = 0; i < 3; ++i) { + assert(offsets[i] % cs_block_size[i] == 0); + offsets[i] /= cs_block_size[i]; + } + radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) | @@ -3293,6 +3304,19 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, blocks[2]); } + if (offsets[0] || offsets[1] || offsets[2]) { + radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); + radeon_emit(cs, offsets[0]); + radeon_emit(cs, offsets[1]); + radeon_emit(cs, offsets[2]); + + /* The blocks in the packet are not counts but end values. */ + for (unsigned i = 0; i < 3; ++i) + blocks[i] += offsets[i]; + } else { + dispatch_initiator |= S_00B800_FORCE_START_AT_000(1); + } + radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cs, blocks[0]); @@ -3368,8 +3392,11 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH); } -void radv_CmdDispatch( +void radv_CmdDispatchBase( VkCommandBuffer commandBuffer, + uint32_t base_x, + uint32_t base_y, + uint32_t base_z, uint32_t x, uint32_t y, uint32_t z) @@ -3381,9 +3408,21 @@ void radv_CmdDispatch( info.blocks[1] = y; info.blocks[2] = z; + info.offsets[0] = base_x; + info.offsets[1] = base_y; + info.offsets[2] = base_z; radv_dispatch(cmd_buffer, &info); } +void radv_CmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z); +} + void radv_CmdDispatchIndirect( VkCommandBuffer commandBuffer, VkBuffer _buffer, diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 586a6e6b7cd..f672daff842 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1254,8 +1254,7 @@ VkResult radv_CreateDevice( device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64); - device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | - S_00B800_FORCE_START_AT_000(1); + device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); if (device->physical_device->rad_info.chip_class >= CIK) { /* If the KMD allows it (there is a KMD hw register for it), |