summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-07-05 23:33:13 +0200
committerMarek Olšák <[email protected]>2017-07-17 10:50:39 -0400
commitfacfab28fe47467d3adb3eae179075e4424c3b82 (patch)
treee1455ff7da705d59e55c074bfbcadde07cd6f454
parent93391ac47895f2235a6102e7a923dfea3fb44fc4 (diff)
radeonsi/gfx9: add workarounds to avoid VGPR indexing completely
For inputs and outputs, indirect indexing is lowered by the GLSL compiler. For temporaries, use alloca and disable the "promote-alloca" pass. In the future, we could switch all codepaths to alloca permanently and just rely on the "promote-alloca" pass. Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c25
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c3
3 files changed, 21 insertions, 8 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index afb2bcbf078..8a4bc41a4ed 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -141,8 +141,9 @@ si_create_llvm_target_machine(struct si_screen *sscreen)
char features[256];
snprintf(features, sizeof(features),
- "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s",
+ "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s",
sscreen->b.chip_class >= GFX9 ? ",+xnack" : ",-xnack",
+ sscreen->llvm_has_working_vgpr_indexing ? "" : ",-promote-alloca",
sscreen->b.debug_flags & DBG_SI_SCHED ? ",+si-scheduler" : "");
return LLVMCreateTargetMachine(ac_get_llvm_target(triple), triple,
@@ -757,7 +758,6 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
/* Supported boolean features. */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
- case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
case PIPE_SHADER_CAP_INTEGERS:
@@ -767,10 +767,18 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
- /* TODO: Indirection of geometry shader input dimension is not
- * handled yet
- */
- return shader != PIPE_SHADER_GEOMETRY;
+ /* TODO: Indirect indexing of GS inputs is unimplemented. */
+ return shader != PIPE_SHADER_GEOMETRY &&
+ (sscreen->llvm_has_working_vgpr_indexing ||
+ /* TCS and TES load inputs directly from LDS or
+ * offchip memory, so indirect indexing is trivial. */
+ shader == PIPE_SHADER_TESS_CTRL ||
+ shader == PIPE_SHADER_TESS_EVAL);
+
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ return sscreen->llvm_has_working_vgpr_indexing ||
+ /* TCS stores outputs directly to memory. */
+ shader == PIPE_SHADER_TESS_CTRL;
/* Unsupported boolean features. */
case PIPE_SHADER_CAP_SUBROUTINES:
@@ -1006,6 +1014,11 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
sscreen->b.family <= CHIP_POLARIS12) ||
sscreen->b.family == CHIP_VEGA10 ||
sscreen->b.family == CHIP_RAVEN;
+ /* While it would be nice not to have this flag, we are constrained
+ * by the reality that LLVM 5.0 doesn't have working VGPR indexing
+ * on GFX9.
+ */
+ sscreen->llvm_has_working_vgpr_indexing = sscreen->b.chip_class <= VI;
sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index bd724e80a06..c028aba3081 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -83,6 +83,7 @@ struct si_screen {
bool has_draw_indirect_multi;
bool has_ds_bpermute;
bool has_msaa_sample_loc_bug;
+ bool llvm_has_working_vgpr_indexing;
/* Whether shaders are monolithic (1-part) or separate (3-part). */
bool use_monolithic_shaders;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index b37d4b232b1..9c4a2343f54 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -755,8 +755,7 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
* promote allocas into registers when profitable.
*/
if (array_size > 16 ||
- /* TODO: VGPR indexing is buggy on GFX9. */
- ctx->screen->b.chip_class == GFX9) {
+ !ctx->screen->llvm_has_working_vgpr_indexing) {
array_alloca = LLVMBuildAlloca(builder,
LLVMArrayType(ctx->f32,
array_size), "array");