From 620d11aed40088a33011eb62dee911ac5b5a6985 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 22 Aug 2013 11:22:58 -0400 Subject: radeonsi/compute: Fix LDS size calculation We need to include the number of LDS bytes allocated by the state tracker. CC: "10.0" (cherry picked from commit 1bdb99330ac68003a9ee6c963f46bddb03b9073d) --- src/gallium/drivers/radeonsi/radeonsi_compute.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/radeonsi_compute.c b/src/gallium/drivers/radeonsi/radeonsi_compute.c index 5df972fb20f..2d53f2d9864 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_compute.c +++ b/src/gallium/drivers/radeonsi/radeonsi_compute.c @@ -103,6 +103,7 @@ static void radeonsi_launch_grid( unsigned arg_user_sgpr_count = 2; unsigned i; struct si_pipe_shader *shader = &program->kernels[pc]; + unsigned lds_blocks; pm4->compute_pkt = true; si_cmd_context_control(pm4); @@ -194,6 +195,20 @@ static void radeonsi_launch_grid( shader->num_sgprs)) - 1) / 8)) ; + lds_blocks = shader->lds_size; + /* XXX: We are over allocating LDS. For SI, the shader reports LDS in + * blocks of 256 bytes, so if there are 4 bytes lds allocated in + * the shader and 4 bytes allocated by the state tracker, then + * we will set LDS_SIZE to 512 bytes rather than 256. + */ + if (rctx->b.chip_class <= SI) { + lds_blocks += align(program->local_size, 256) >> 8; + } else { + lds_blocks += align(program->local_size, 512) >> 9; + } + + assert(lds_blocks <= 0xFF); + si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, S_00B84C_SCRATCH_EN(0) | S_00B84C_USER_SGPR(arg_user_sgpr_count) @@ -202,7 +217,7 @@ static void radeonsi_launch_grid( | S_00B84C_TGID_Z_EN(1) | S_00B84C_TG_SIZE_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) - | S_00B84C_LDS_SIZE(shader->lds_size) + | S_00B84C_LDS_SIZE(lds_blocks) | S_00B84C_EXCP_EN(0)) ; si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0); -- cgit v1.2.3