summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2016-03-19 15:15:20 +0100
committerBas Nieuwenhuizen <[email protected]>2016-04-19 18:10:31 +0200
commitc3083d841e04e14d3682e55cf5d1004f5310e9d4 (patch)
tree0f1d7c7a8165dcdb73e404f317c2065ea2f26f5e /src/gallium/drivers
parent1349dd16ff4f0e40e9ecefcb818302752cd0bf38 (diff)
radeonsi: implement TGSI compute dispatch
v2: - Use radeon_set_sh_reg_seq. - Set predicate bit for conditional rendering. Signed-off-by: Bas Nieuwenhuizen <[email protected]> Reviewed-by: Marek Olšák <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]> Reviewed-by: Edward O'Callaghan <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c104
1 files changed, 77 insertions, 27 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 12cbaedb866..1cdaf6c9ecf 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -354,13 +354,85 @@ static void si_upload_compute_input(struct si_context *sctx,
pipe_resource_reference((struct pipe_resource**)&input_buffer, NULL);
}
+static void si_setup_tgsi_grid(struct si_context *sctx,
+ const struct pipe_grid_info *info)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 +
+ 4 * SI_SGPR_GRID_SIZE;
+
+ if (info->indirect) {
+ uint64_t base_va = r600_resource(info->indirect)->gpu_address;
+ uint64_t va = base_va + info->indirect_offset;
+ int i;
+
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ (struct r600_resource *)info->indirect,
+ RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
+
+ for (i = 0; i < 3; ++i) {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ COPY_DATA_DST_SEL(COPY_DATA_REG));
+ radeon_emit(cs, (va + 4 * i));
+ radeon_emit(cs, (va + 4 * i) >> 32);
+ radeon_emit(cs, (grid_size_reg >> 2) + i);
+ radeon_emit(cs, 0);
+ }
+ } else {
+
+ radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
+ radeon_emit(cs, info->grid[0]);
+ radeon_emit(cs, info->grid[1]);
+ radeon_emit(cs, info->grid[2]);
+ }
+}
+
+static void si_emit_dispatch_packets(struct si_context *sctx,
+ const struct pipe_grid_info *info)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
+
+ radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
+ radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
+ radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
+
+ if (info->indirect) {
+ uint64_t base_va = r600_resource(info->indirect)->gpu_address;
+
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ (struct r600_resource *)info->indirect,
+ RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
+
+ radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, 1);
+ radeon_emit(cs, base_va);
+ radeon_emit(cs, base_va >> 32);
+
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, render_cond_bit) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, 1);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, render_cond_bit) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, info->grid[0]);
+ radeon_emit(cs, info->grid[1]);
+ radeon_emit(cs, info->grid[2]);
+ radeon_emit(cs, 1);
+ }
+}
+
+
static void si_launch_grid(
struct pipe_context *ctx, const struct pipe_grid_info *info)
{
struct si_context *sctx = (struct si_context*)ctx;
struct si_compute *program = sctx->cs_shader_state.program;
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
- unsigned i;
+ int i;
si_need_cs_space(sctx);
@@ -375,21 +447,12 @@ static void si_launch_grid(
SI_CONTEXT_FLAG_COMPUTE;
si_emit_cache_flush(sctx, NULL);
- pm4->compute_pkt = true;
-
if (!si_switch_compute_shader(sctx, program, &program->shader, info->pc))
return;
if (program->input_size || program->ir_type == PIPE_SHADER_IR_NATIVE)
si_upload_compute_input(sctx, info);
- si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
- S_00B81C_NUM_THREAD_FULL(info->block[0]));
- si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
- S_00B820_NUM_THREAD_FULL(info->block[1]));
- si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
- S_00B824_NUM_THREAD_FULL(info->block[2]));
-
/* Global buffers */
for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
struct r600_resource *buffer =
@@ -402,23 +465,10 @@ static void si_launch_grid(
RADEON_PRIO_COMPUTE_GLOBAL);
}
- si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
- si_pm4_cmd_add(pm4, info->grid[0]); /* Thread groups DIM_X */
- si_pm4_cmd_add(pm4, info->grid[1]); /* Thread groups DIM_Y */
- si_pm4_cmd_add(pm4, info->grid[2]); /* Thread gropus DIM_Z */
- si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
- si_pm4_cmd_end(pm4, false);
-
- si_pm4_emit(sctx, pm4);
-
-#if 0
- fprintf(stderr, "cdw: %i\n", sctx->cs->cdw);
- for (i = 0; i < sctx->cs->cdw; i++) {
- fprintf(stderr, "%4i : 0x%08X\n", i, sctx->cs->buf[i]);
- }
-#endif
+ if (program->ir_type == PIPE_SHADER_IR_TGSI)
+ si_setup_tgsi_grid(sctx, info);
- si_pm4_free_state(sctx, pm4, ~0);
+ si_emit_dispatch_packets(sctx, info);
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_VMEM_L1 |