summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2012-07-24 16:59:05 +0000
committerTom Stellard <[email protected]>2012-07-27 17:08:09 +0000
commitfdd8df20e4a730f80bf4c331012d832bffd7072e (patch)
tree3944202a4e905ccbb6e8fae4c103a02cfcad7542 /src/gallium/drivers/r600
parentdc0b8a46289d0e6b10c542df0856d51a0aabf9b0 (diff)
r600g: Emit dispatch state for compute directly to the cs
We no longer rely on an evergreen_compute_resource for emitting dispatch state. Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c108
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.h1
-rw-r--r--src/gallium/drivers/r600/evergreen_compute_internal.c19
-rw-r--r--src/gallium/drivers/r600/evergreen_compute_internal.h1
4 files changed, 60 insertions, 69 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 5e43faebeb1..0d6eb4e6f54 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -249,37 +249,21 @@ void evergreen_compute_upload_input(
shader->input_size, 0);
}
-void evergreen_direct_dispatch(
- struct pipe_context *ctx_,
+static void evergreen_emit_direct_dispatch(
+ struct r600_context *rctx,
const uint *block_layout, const uint *grid_layout)
{
- /* This struct r600_context* must be called rctx, because the
- * r600_pipe_state_add_reg macro assumes there is a local variable
- * of type struct r600_context* called rctx.
- */
- struct r600_context *rctx = (struct r600_context *)ctx_;
- struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
-
int i;
-
- struct evergreen_compute_resource* res = get_empty_res(shader,
- COMPUTE_RESOURCE_DISPATCH, 0);
-
- /* Set CB_TARGET_MASK */
- evergreen_reg_set(res, R_028238_CB_TARGET_MASK, rctx->compute_cb_target_mask);
-
- evergreen_reg_set(res, R_00899C_VGT_COMPUTE_START_X, 0);
- evergreen_reg_set(res, R_0089A0_VGT_COMPUTE_START_Y, 0);
- evergreen_reg_set(res, R_0089A4_VGT_COMPUTE_START_Z, 0);
-
- evergreen_reg_set(res, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, block_layout[0]);
- evergreen_reg_set(res, R_0286F0_SPI_COMPUTE_NUM_THREAD_Y, block_layout[1]);
- evergreen_reg_set(res, R_0286F4_SPI_COMPUTE_NUM_THREAD_Z, block_layout[2]);
-
+ struct radeon_winsys_cs *cs = rctx->cs;
+ unsigned num_waves;
+ unsigned num_pipes = rctx->screen->info.r600_max_pipes;
+ unsigned wave_divisor = (16 * num_pipes);
int group_size = 1;
-
int grid_size = 1;
+ /* XXX: Enable lds and get size from cs_shader_state */
+ unsigned lds_size = 0;
+ /* Calculate group_size/grid_size */
for (i = 0; i < 3; i++) {
group_size *= block_layout[i];
}
@@ -288,18 +272,50 @@ void evergreen_direct_dispatch(
grid_size *= grid_layout[i];
}
- evergreen_reg_set(res, R_008970_VGT_NUM_INDICES, group_size);
- evergreen_reg_set(res, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE, group_size);
+ /* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */
+ num_waves = (block_layout[0] * block_layout[1] * block_layout[2] +
+ wave_divisor - 1) / wave_divisor;
+
+ COMPUTE_DBG("Using %u pipes, there are %u wavefronts per thread block\n",
+ num_pipes, num_waves);
- evergreen_emit_raw_value(res, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0));
- evergreen_emit_raw_value(res, grid_layout[0]);
- evergreen_emit_raw_value(res, grid_layout[1]);
- evergreen_emit_raw_value(res, grid_layout[2]);
- ///VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN
- evergreen_emit_raw_value(res, 1);
+ /* XXX: Partition the LDS between PS/CS. By default half (4096 dwords
+ * on Evergreen) oes to Pixel Shaders and half goes to Compute Shaders.
+ * We may need to allocat the entire LDS space for Compute Shaders.
+ *
+ * EG: R_008E2C_SQ_LDS_RESOURCE_MGMT := S_008E2C_NUM_LS_LDS(lds_dwords)
+ * CM: CM_R_0286FC_SPI_LDS_MGMT := S_0286FC_NUM_LS_LDS(lds_dwords)
+ */
+
+ r600_write_config_reg(cs, R_008970_VGT_NUM_INDICES, group_size);
+
+ r600_write_config_reg_seq(cs, R_00899C_VGT_COMPUTE_START_X, 3);
+ r600_write_value(cs, 0); /* R_00899C_VGT_COMPUTE_START_X */
+ r600_write_value(cs, 0); /* R_0089A0_VGT_COMPUTE_START_Y */
+ r600_write_value(cs, 0); /* R_0089A4_VGT_COMPUTE_START_Z */
+
+ r600_write_config_reg(cs, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE,
+ group_size);
+
+ r600_write_compute_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3);
+ r600_write_value(cs, block_layout[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */
+ r600_write_value(cs, block_layout[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */
+ r600_write_value(cs, block_layout[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */
+
+ r600_write_compute_context_reg(cs, CM_R_0288E8_SQ_LDS_ALLOC,
+ lds_size | (num_waves << 14));
+
+ /* Dispatch packet */
+ r600_write_value(cs, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0));
+ r600_write_value(cs, grid_layout[0]);
+ r600_write_value(cs, grid_layout[1]);
+ r600_write_value(cs, grid_layout[2]);
+ /* VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN */
+ r600_write_value(cs, 1);
}
-static void compute_emit_cs(struct r600_context *ctx)
+static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
+ const uint *grid_layout)
{
struct radeon_winsys_cs *cs = ctx->cs;
int i;
@@ -330,6 +346,11 @@ static void compute_emit_cs(struct r600_context *ctx)
cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER];
r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE);
+ /* Set CB_TARGET_MASK XXX: Use cb_misc_state */
+ r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK,
+ ctx->compute_cb_target_mask);
+
+
/* Emit vertex buffer state */
ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);
@@ -370,6 +391,9 @@ static void compute_emit_cs(struct r600_context *ctx)
}
}
+ /* Emit dispatch state and dispatch packet */
+ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout);
+
/* r600_flush_framebuffer() updates the cb_flush_flags and then
* calls r600_emit_atom() on the ctx->surface_sync_cmd.atom, which emits
* a SURFACE_SYNC packet via r600_emit_surface_sync().
@@ -438,24 +462,12 @@ static void evergreen_launch_grid(
const uint *block_layout, const uint *grid_layout,
uint32_t pc, const void *input)
{
- COMPUTE_DBG("PC: %i\n", pc);
-
struct r600_context *ctx = (struct r600_context *)ctx_;
- unsigned num_waves;
- unsigned num_pipes = ctx->screen->info.r600_max_pipes;
- unsigned wave_divisor = (16 * num_pipes);
-
- /* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */
- num_waves = (block_layout[0] * block_layout[1] * block_layout[2] +
- wave_divisor - 1) / wave_divisor;
- COMPUTE_DBG("Using %u pipes, there are %u wavefronts per thread block\n",
- num_pipes, num_waves);
+ COMPUTE_DBG("PC: %i\n", pc);
- evergreen_set_lds(ctx->cs_shader_state.shader, 0, 0, num_waves);
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
- evergreen_direct_dispatch(ctx_, block_layout, grid_layout);
- compute_emit_cs(ctx);
+ compute_emit_cs(ctx, block_layout, grid_layout);
}
static void evergreen_set_compute_resources(struct pipe_context * ctx_,
diff --git a/src/gallium/drivers/r600/evergreen_compute.h b/src/gallium/drivers/r600/evergreen_compute.h
index df3fb83a317..f29d91b9349 100644
--- a/src/gallium/drivers/r600/evergreen_compute.h
+++ b/src/gallium/drivers/r600/evergreen_compute.h
@@ -34,7 +34,6 @@ struct evergreen_compute_resource;
void *evergreen_create_compute_state(struct pipe_context *ctx, const const struct pipe_compute_state *cso);
void evergreen_delete_compute_state(struct pipe_context *ctx, void *state);
-void evergreen_direct_dispatch( struct pipe_context *context, const uint *block_layout, const uint *grid_layout);
void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input);
void evergreen_init_atom_start_compute_cs(struct r600_context *rctx);
void evergreen_init_compute_state_functions(struct r600_context *rctx);
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
index 1d11bab8274..ac884b4e3fb 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -301,25 +301,6 @@ void evergreen_set_rat(
r600_context_pipe_state_set(pipe->ctx, state);
}
-void evergreen_set_lds(
- struct r600_pipe_compute *pipe,
- int num_lds,
- int size,
- int num_waves)
-{
- struct evergreen_compute_resource* res =
- get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0);
-
- if (pipe->ctx->chip_class < CAYMAN) {
- evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT,
- S_008E2C_NUM_LS_LDS(num_lds));
- } else {
- evergreen_reg_set(res, CM_R_0286FC_SPI_LDS_MGMT,
- S_0286FC_NUM_LS_LDS(num_lds));
- }
- evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14);
-}
-
void evergreen_set_gds(
struct r600_pipe_compute *pipe,
uint32_t addr,
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h
index 5fa9c48fb5e..2bef261ebb5 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.h
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.h
@@ -102,7 +102,6 @@ int get_compute_resource_num(void);
#define evergreen_mult_reg_set(res, index, array) evergreen_mult_reg_set_(res, index, array, sizeof(array))
void evergreen_set_rat(struct r600_pipe_compute *pipe, int id, struct r600_resource* bo, int start, int size);
-void evergreen_set_lds(struct r600_pipe_compute *pipe, int num_lds, int size, int num_waves);
void evergreen_set_gds(struct r600_pipe_compute *pipe, uint32_t addr, uint32_t size);
void evergreen_set_export(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size);
void evergreen_set_loop_const(struct r600_pipe_compute *pipe, int id, int count, int init, int inc);