summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/radeonsi_compute.c
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2013-08-22 11:22:58 -0400
committerTom Stellard <[email protected]>2013-11-21 16:14:58 -0800
commit1bdb99330ac68003a9ee6c963f46bddb03b9073d (patch)
tree2f82b3343c6a6d8817df8e48c27732fe53a35eba /src/gallium/drivers/radeonsi/radeonsi_compute.c
parent7a30cd7085b6879d3858f5c1a6945fbe30c818a6 (diff)
radeonsi/compute: Fix LDS size calculation
We need to include the number of LDS bytes allocated by the state tracker. CC: "10.0" <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/radeonsi_compute.c')
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_compute.c17
1 files changed, 16 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeonsi/radeonsi_compute.c b/src/gallium/drivers/radeonsi/radeonsi_compute.c
index 5df972fb20f..2d53f2d9864 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_compute.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_compute.c
@@ -103,6 +103,7 @@ static void radeonsi_launch_grid(
unsigned arg_user_sgpr_count = 2;
unsigned i;
struct si_pipe_shader *shader = &program->kernels[pc];
+ unsigned lds_blocks;
pm4->compute_pkt = true;
si_cmd_context_control(pm4);
@@ -194,6 +195,20 @@ static void radeonsi_launch_grid(
shader->num_sgprs)) - 1) / 8))
;
+ lds_blocks = shader->lds_size;
+ /* XXX: We are over allocating LDS. For SI, the shader reports LDS in
+ * blocks of 256 bytes, so if there are 4 bytes lds allocated in
+ * the shader and 4 bytes allocated by the state tracker, then
+ * we will set LDS_SIZE to 512 bytes rather than 256.
+ */
+ if (rctx->b.chip_class <= SI) {
+ lds_blocks += align(program->local_size, 256) >> 8;
+ } else {
+ lds_blocks += align(program->local_size, 512) >> 9;
+ }
+
+ assert(lds_blocks <= 0xFF);
+
si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2,
S_00B84C_SCRATCH_EN(0)
| S_00B84C_USER_SGPR(arg_user_sgpr_count)
@@ -202,7 +217,7 @@ static void radeonsi_launch_grid(
| S_00B84C_TGID_Z_EN(1)
| S_00B84C_TG_SIZE_EN(1)
| S_00B84C_TIDIG_COMP_CNT(2)
- | S_00B84C_LDS_SIZE(shader->lds_size)
+ | S_00B84C_LDS_SIZE(lds_blocks)
| S_00B84C_EXCP_EN(0))
;
si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);