summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-07-19 22:55:49 -0400
committerMarek Olšák <[email protected]>2018-08-01 15:25:18 -0400
commitcb6b241c301d5352a5bcaab52bbfaf89e700b2b2 (patch)
treeff4e139b3e472f4b1afc4ca6eb9f2700db79c3e4 /src/gallium
parentc2eab33b088f90cb66802a9e96e92305cccebdc7 (diff)
ac,radeonsi: reduce optimizations for complex compute shaders on older APUs (v2)
To make dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 finish sooner on the older CPUs. (otherwise it gets killed and we fail the test) Acked-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c12
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c29
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_internal.h3
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c8
4 files changed, 43 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 9e3a579d743..cc05d2f8de3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -108,22 +108,32 @@ static const struct debug_named_value debug_options[] = {
static void si_init_compiler(struct si_screen *sscreen,
struct ac_llvm_compiler *compiler)
{
+ /* Only create the less-optimizing version of the compiler on APUs
+ * predating Ryzen (Raven). */
+ bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram &&
+ sscreen->info.chip_class <= VI;
+
enum ac_target_machine_options tm_options =
(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
(sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) |
(!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
- (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0);
+ (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) |
+ (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
ac_init_llvm_once();
ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options);
compiler->passes = ac_create_llvm_passes(compiler->tm);
+
+ if (compiler->low_opt_tm)
+ compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm);
}
static void si_destroy_compiler(struct ac_llvm_compiler *compiler)
{
ac_destroy_llvm_passes(compiler->passes);
+ ac_destroy_llvm_passes(compiler->low_opt_passes);
ac_destroy_llvm_compiler(compiler);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 43ba23ff494..405833d3ba7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5645,7 +5645,8 @@ static int si_compile_llvm(struct si_screen *sscreen,
LLVMModuleRef mod,
struct pipe_debug_callback *debug,
unsigned processor,
- const char *name)
+ const char *name,
+ bool less_optimized)
{
int r = 0;
unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
@@ -5667,7 +5668,8 @@ static int si_compile_llvm(struct si_screen *sscreen,
}
if (!si_replace_shader(count, binary)) {
- r = si_llvm_compile(mod, binary, compiler, debug);
+ r = si_llvm_compile(mod, binary, compiler, debug,
+ less_optimized);
if (r)
return r;
}
@@ -5884,7 +5886,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
&ctx.shader->config, ctx.compiler,
ctx.ac.module,
debug, PIPE_SHADER_GEOMETRY,
- "GS Copy Shader");
+ "GS Copy Shader", false);
if (!r) {
if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
fprintf(stderr, "GS Copy Shader:\n");
@@ -6790,6 +6792,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
LLVMBuildRetVoid(builder);
}
+static bool si_should_optimize_less(struct ac_llvm_compiler *compiler,
+ struct si_shader_selector *sel)
+{
+ if (!compiler->low_opt_passes)
+ return false;
+
+ /* Assume a slow CPU. */
+ assert(!sel->screen->info.has_dedicated_vram &&
+ sel->screen->info.chip_class <= VI);
+
+ /* For a crazy dEQP test containing 2597 memory opcodes, mostly
+ * buffer stores. */
+ return sel->type == PIPE_SHADER_COMPUTE &&
+ sel->info.num_memory_instructions > 1000;
+}
+
int si_compile_tgsi_shader(struct si_screen *sscreen,
struct ac_llvm_compiler *compiler,
struct si_shader *shader,
@@ -7022,7 +7040,8 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
/* Compile to bytecode. */
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,
- ctx.ac.module, debug, ctx.type, "TGSI shader");
+ ctx.ac.module, debug, ctx.type, "TGSI shader",
+ si_should_optimize_less(compiler, shader->selector));
si_llvm_dispose(&ctx);
if (r) {
fprintf(stderr, "LLVM failed to compile shader\n");
@@ -7189,7 +7208,7 @@ si_get_shader_part(struct si_screen *sscreen,
si_llvm_optimize_module(&ctx);
if (si_compile_llvm(sscreen, &result->binary, &result->config, compiler,
- ctx.ac.module, debug, ctx.type, name)) {
+ ctx.ac.module, debug, ctx.type, name, false)) {
FREE(result);
result = NULL;
goto out;
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 21e325c2d82..36351391d95 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -217,7 +217,8 @@ si_shader_context_from_abi(struct ac_shader_abi *abi)
unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
struct ac_llvm_compiler *compiler,
- struct pipe_debug_callback *debug);
+ struct pipe_debug_callback *debug,
+ bool less_optimized);
LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type);
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index b486be25749..b9ed0fc3ab0 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -82,8 +82,12 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
*/
unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
struct ac_llvm_compiler *compiler,
- struct pipe_debug_callback *debug)
+ struct pipe_debug_callback *debug,
+ bool less_optimized)
{
+ struct ac_compiler_passes *passes =
+ less_optimized && compiler->low_opt_passes ?
+ compiler->low_opt_passes : compiler->passes;
struct si_llvm_diagnostics diag;
LLVMContextRef llvm_ctx;
@@ -96,7 +100,7 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
/* Compile IR. */
- if (!ac_compile_module_to_binary(compiler->passes, M, binary))
+ if (!ac_compile_module_to_binary(passes, M, binary))
diag.retval = 1;
if (diag.retval != 0)