From cb6b241c301d5352a5bcaab52bbfaf89e700b2b2 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 19 Jul 2018 22:55:49 -0400 Subject: ac,radeonsi: reduce optimizations for complex compute shaders on older APUs (v2) To make dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 finish sooner on the older CPUs. (otherwise it gets killed and we fail the test) Acked-by: Dave Airlie --- src/gallium/drivers/radeonsi/si_pipe.c | 12 ++++++++- src/gallium/drivers/radeonsi/si_shader.c | 29 ++++++++++++++++++---- src/gallium/drivers/radeonsi/si_shader_internal.h | 3 ++- .../drivers/radeonsi/si_shader_tgsi_setup.c | 8 ++++-- 4 files changed, 43 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 9e3a579d743..cc05d2f8de3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -108,22 +108,32 @@ static const struct debug_named_value debug_options[] = { static void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler) { + /* Only create the less-optimizing version of the compiler on APUs + * predating Ryzen (Raven). */ + bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram && + sscreen->info.chip_class <= VI; + enum ac_target_machine_options tm_options = (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | (sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) | (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) | (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) | (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) | - (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0); + (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) | + (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0); ac_init_llvm_once(); ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options); compiler->passes = ac_create_llvm_passes(compiler->tm); + + if (compiler->low_opt_tm) + compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm); } static void si_destroy_compiler(struct ac_llvm_compiler *compiler) { ac_destroy_llvm_passes(compiler->passes); + ac_destroy_llvm_passes(compiler->low_opt_passes); ac_destroy_llvm_compiler(compiler); } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 43ba23ff494..405833d3ba7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5645,7 +5645,8 @@ static int si_compile_llvm(struct si_screen *sscreen, LLVMModuleRef mod, struct pipe_debug_callback *debug, unsigned processor, - const char *name) + const char *name, + bool less_optimized) { int r = 0; unsigned count = p_atomic_inc_return(&sscreen->num_compilations); @@ -5667,7 +5668,8 @@ static int si_compile_llvm(struct si_screen *sscreen, } if (!si_replace_shader(count, binary)) { - r = si_llvm_compile(mod, binary, compiler, debug); + r = si_llvm_compile(mod, binary, compiler, debug, + less_optimized); if (r) return r; } @@ -5884,7 +5886,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, &ctx.shader->config, ctx.compiler, ctx.ac.module, debug, PIPE_SHADER_GEOMETRY, - "GS Copy Shader"); + "GS Copy Shader", false); if (!r) { if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY)) fprintf(stderr, "GS Copy Shader:\n"); @@ -6790,6 +6792,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, LLVMBuildRetVoid(builder); } +static bool si_should_optimize_less(struct ac_llvm_compiler *compiler, + struct si_shader_selector *sel) +{ + if (!compiler->low_opt_passes) + return false; + + /* Assume a slow CPU. */ + assert(!sel->screen->info.has_dedicated_vram && + sel->screen->info.chip_class <= VI); + + /* For a crazy dEQP test containing 2597 memory opcodes, mostly + * buffer stores. */ + return sel->type == PIPE_SHADER_COMPUTE && + sel->info.num_memory_instructions > 1000; +} + int si_compile_tgsi_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, struct si_shader *shader, @@ -7022,7 +7040,8 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, /* Compile to bytecode. */ r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler, - ctx.ac.module, debug, ctx.type, "TGSI shader"); + ctx.ac.module, debug, ctx.type, "TGSI shader", + si_should_optimize_less(compiler, shader->selector)); si_llvm_dispose(&ctx); if (r) { fprintf(stderr, "LLVM failed to compile shader\n"); @@ -7189,7 +7208,7 @@ si_get_shader_part(struct si_screen *sscreen, si_llvm_optimize_module(&ctx); if (si_compile_llvm(sscreen, &result->binary, &result->config, compiler, - ctx.ac.module, debug, ctx.type, name)) { + ctx.ac.module, debug, ctx.type, name, false)) { FREE(result); result = NULL; goto out; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 21e325c2d82..36351391d95 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -217,7 +217,8 @@ si_shader_context_from_abi(struct ac_shader_abi *abi) unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, struct ac_llvm_compiler *compiler, - struct pipe_debug_callback *debug); + struct pipe_debug_callback *debug, + bool less_optimized); LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index b486be25749..b9ed0fc3ab0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -82,8 +82,12 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) */ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, struct ac_llvm_compiler *compiler, - struct pipe_debug_callback *debug) + struct pipe_debug_callback *debug, + bool less_optimized) { + struct ac_compiler_passes *passes = + less_optimized && compiler->low_opt_passes ? + compiler->low_opt_passes : compiler->passes; struct si_llvm_diagnostics diag; LLVMContextRef llvm_ctx; @@ -96,7 +100,7 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag); /* Compile IR. */ - if (!ac_compile_module_to_binary(compiler->passes, M, binary)) + if (!ac_compile_module_to_binary(passes, M, binary)) diag.retval = 1; if (diag.retval != 0) -- cgit v1.2.3