summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-07-19 22:55:49 -0400
committerMarek Olšák <[email protected]>2018-08-01 15:25:18 -0400
commitcb6b241c301d5352a5bcaab52bbfaf89e700b2b2 (patch)
treeff4e139b3e472f4b1afc4ca6eb9f2700db79c3e4 /src/amd
parentc2eab33b088f90cb66802a9e96e92305cccebdc7 (diff)
ac,radeonsi: reduce optimizations for complex compute shaders on older APUs (v2)
To make dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 finish sooner on the older CPUs. (otherwise it gets killed and we fail the test) Acked-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_llvm_util.c18
-rw-r--r--src/amd/common/ac_llvm_util.h11
2 files changed, 25 insertions, 4 deletions
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 678bc34e6f8..10e1ca99d41 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -142,6 +142,7 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
enum ac_target_machine_options tm_options,
+ LLVMCodeGenOptLevel level,
const char **out_triple)
{
assert(family >= CHIP_TAHITI);
@@ -163,7 +164,7 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
triple,
ac_get_llvm_processor_name(family),
features,
- LLVMCodeGenLevelDefault,
+ level,
LLVMRelocDefault,
LLVMCodeModelDefault);
@@ -308,11 +309,20 @@ ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
const char *triple;
memset(compiler, 0, sizeof(*compiler));
- compiler->tm = ac_create_target_machine(family,
- tm_options, &triple);
+ compiler->tm = ac_create_target_machine(family, tm_options,
+ LLVMCodeGenLevelDefault,
+ &triple);
if (!compiler->tm)
return false;
+ if (tm_options & AC_TM_CREATE_LOW_OPT) {
+ compiler->low_opt_tm =
+ ac_create_target_machine(family, tm_options,
+ LLVMCodeGenLevelLess, NULL);
+ if (!compiler->low_opt_tm)
+ goto fail;
+ }
+
if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) {
compiler->target_library_info =
ac_create_target_library_info(triple);
@@ -341,6 +351,8 @@ ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
if (compiler->target_library_info)
ac_dispose_target_library_info(compiler->target_library_info);
#endif
+ if (compiler->low_opt_tm)
+ LLVMDisposeTargetMachine(compiler->low_opt_tm);
if (compiler->tm)
LLVMDisposeTargetMachine(compiler->tm);
}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index d4dea4dfde6..eaf5f21876b 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -64,6 +64,7 @@ enum ac_target_machine_options {
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
AC_TM_CHECK_IR = (1 << 5),
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
+ AC_TM_CREATE_LOW_OPT = (1 << 7),
};
enum ac_float_mode {
@@ -74,10 +75,18 @@ enum ac_float_mode {
/* Per-thread persistent LLVM objects. */
struct ac_llvm_compiler {
- LLVMTargetMachineRef tm;
LLVMTargetLibraryInfoRef target_library_info;
LLVMPassManagerRef passmgr;
+
+ /* Default compiler. */
+ LLVMTargetMachineRef tm;
struct ac_compiler_passes *passes;
+
+ /* Optional compiler for faster compilation with fewer optimizations.
+ * LLVM modules can be created with "tm" too. There is no difference.
+ */
+ LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
+ struct ac_compiler_passes *low_opt_passes;
};
const char *ac_get_llvm_processor_name(enum radeon_family family);