summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-07-13 00:23:36 -0400
committerMarek Olšák <[email protected]>2018-07-23 20:23:52 -0400
commit86b52d42368ac496fe24bc6674e754c323381635 (patch)
tree755cd4b6dad029f8882473bca8fdcdc78a42f78e
parent0866edede0116e33b4bed28737e4d242ad0da2ad (diff)
radeonsi: reduce LDS stalls by 40% for tessellation
40% is the decrease in the LGKM counter (which includes SMEM too) for the GFX9 LSHS stage. This will make the LDS size slightly larger, but I wasn't able to increase the patch stride without corruption, so I'm increasing the vertex stride.
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h3
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c7
4 files changed, 14 insertions, 6 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 5dc12d87243..43ba23ff494 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -417,14 +417,14 @@ static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
switch (ctx->type) {
case PIPE_SHADER_VERTEX:
- stride = util_last_bit64(ctx->shader->selector->outputs_written);
- return LLVMConstInt(ctx->i32, stride * 4, 0);
+ stride = ctx->shader->selector->lshs_vertex_stride / 4;
+ return LLVMConstInt(ctx->i32, stride, 0);
case PIPE_SHADER_TESS_CTRL:
if (ctx->screen->info.chip_class >= GFX9 &&
ctx->shader->is_monolithic) {
- stride = util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written);
- return LLVMConstInt(ctx->i32, stride * 4, 0);
+ stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
+ return LLVMConstInt(ctx->i32, stride, 0);
}
return si_unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index a2fb48ab023..2dc4bc7e787 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -368,7 +368,8 @@ struct si_shader_selector {
ubyte culldist_mask;
/* ES parameters. */
- unsigned esgs_itemsize;
+ unsigned esgs_itemsize; /* vertex stride */
+ unsigned lshs_vertex_stride;
/* GS parameters. */
unsigned gs_input_verts_per_prim;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index f35f73a37ce..d901401f0bb 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -134,7 +134,7 @@ static bool si_emit_derived_tess_state(struct si_context *sctx,
num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
}
- input_vertex_size = num_tcs_inputs * 16;
+ input_vertex_size = ls->lshs_vertex_stride;
output_vertex_size = num_tcs_outputs * 16;
input_patch_size = num_tcs_input_cp * input_vertex_size;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4e0320a226d..de33d250301 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2160,6 +2160,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
}
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
+ sel->lshs_vertex_stride = sel->esgs_itemsize;
+
+ /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
+ * will start on a different bank. (except for the maximum 32*16).
+ */
+ if (sel->lshs_vertex_stride < 32*16)
+ sel->lshs_vertex_stride += 4;
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
* conflicts, i.e. each vertex will start at a different bank.