summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2016-04-02 12:35:36 +0200
committerBas Nieuwenhuizen <[email protected]>2016-04-19 18:10:31 +0200
commit107f4d3538e6eeab396bf41a4d4334950adf81ac (patch)
tree16869d65877bdcf1af234d889681ff5837cc3cca
parent52d3584decfa00f3f96633edd7f2b6c4a0febabf (diff)
radeonsi: do per cs setup for compute shaders once per cs
Also removes PKT3_CONTEXT_CONTROL as that is already being done by si_begin_new_cs, when emitting init_config. v2: - Use radeon_set_sh_reg_seq. - Also set COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 for CIK+ Signed-off-by: Bas Nieuwenhuizen <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]> Reviewed-by: Marek Olšák <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c77
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
3 files changed, 48 insertions, 32 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index a3ba450bdad..2ce9306d069 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -230,6 +230,47 @@ static unsigned compute_num_waves_for_scratch(
return scratch_waves;
}
+static void si_initialize_compute(struct si_context *sctx)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+
+ radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+
+ radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 3);
+ radeon_emit(cs, 0);
+ /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
+ radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+ radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff));
+
+ if (sctx->b.chip_class >= CIK) {
+ /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
+ radeon_set_sh_reg_seq(cs,
+ R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
+ radeon_emit(cs, S_00B864_SH0_CU_EN(0xffff) |
+ S_00B864_SH1_CU_EN(0xffff));
+ radeon_emit(cs, S_00B868_SH0_CU_EN(0xffff) |
+ S_00B868_SH1_CU_EN(0xffff));
+ }
+
+ /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
+ * and is now per pipe, so it should be handled in the
+ * kernel if we want to use something other than the default value,
+ * which is now 0x22f.
+ */
+ if (sctx->b.chip_class <= SI) {
+ /* XXX: This should be:
+ * (number of compute units) * 4 * (waves per simd) - 1 */
+
+ radeon_set_sh_reg(cs, R_00B82C_COMPUTE_MAX_WAVE_ID,
+ 0x190 /* Default value */);
+ }
+
+ sctx->cs_shader_state.initialized = true;
+}
+
static void si_upload_compute_input(struct si_context *sctx,
const struct pipe_grid_info *info)
{
@@ -284,7 +325,6 @@ static void si_launch_grid(
struct pipe_context *ctx, const struct pipe_grid_info *info)
{
struct si_context *sctx = (struct si_context*)ctx;
- struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_compute *program = sctx->cs_shader_state.program;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
uint64_t shader_va;
@@ -293,9 +333,10 @@ static void si_launch_grid(
unsigned lds_blocks;
unsigned num_waves_for_scratch;
- radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0) | PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, 0x80000000);
- radeon_emit(cs, 0x80000000);
+ si_need_cs_space(sctx);
+
+ if (!sctx->cs_shader_state.initialized)
+ si_initialize_compute(sctx);
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
SI_CONTEXT_INV_GLOBAL_L2 |
@@ -330,10 +371,6 @@ static void si_launch_grid(
RADEON_PRIO_SCRATCH_BUFFER);
}
- si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
- si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
- si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0);
-
si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
S_00B81C_NUM_THREAD_FULL(info->block[0]));
si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
@@ -353,19 +390,6 @@ static void si_launch_grid(
RADEON_PRIO_COMPUTE_GLOBAL);
}
- /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
- * and is now per pipe, so it should be handled in the
- * kernel if we want to use something other than the default value,
- * which is now 0x22f.
- */
- if (sctx->b.chip_class <= SI) {
- /* XXX: This should be:
- * (number of compute units) * 4 * (waves per simd) - 1 */
-
- si_pm4_set_reg(pm4, R_00B82C_COMPUTE_MAX_WAVE_ID,
- 0x190 /* Default value */);
- }
-
shader_va = shader->bo->gpu_address;
shader_va += info->pc;
@@ -394,17 +418,6 @@ static void si_launch_grid(
shader->config.rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->config.rsrc2);
- si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);
-
- si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0,
- S_00B858_SH0_CU_EN(0xffff /* Default value */)
- | S_00B858_SH1_CU_EN(0xffff /* Default value */))
- ;
-
- si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1,
- S_00B85C_SH0_CU_EN(0xffff /* Default value */)
- | S_00B85C_SH1_CU_EN(0xffff /* Default value */))
- ;
num_waves_for_scratch =
MIN2(num_waves_for_scratch,
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 5294898aa7f..69fecce0308 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -244,4 +244,6 @@ void si_begin_new_cs(struct si_context *ctx)
ctx->last_tcs = NULL;
ctx->last_tes_sh_base = -1;
ctx->last_num_tcs_input_cp = -1;
+
+ ctx->cs_shader_state.initialized = false;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 1540c7f1014..91ccbea6ba3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -133,6 +133,7 @@ struct si_sampler_state {
struct si_cs_shader_state {
struct si_compute *program;
+ bool initialized;
};
struct si_textures_info {