summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/common/ac_llvm_util.c16
-rw-r--r--src/amd/common/ac_llvm_util.h5
2 files changed, 20 insertions, 1 deletions
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 1cf51b79c76..e4a353a4967 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -157,7 +157,8 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
snprintf(features, sizeof(features),
"+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s%s",
HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
- family >= CHIP_NAVI10 ? ",+wavefrontsize64,-wavefrontsize32" : "",
+ family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ?
+ ",+wavefrontsize64,-wavefrontsize32" : "",
tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
@@ -337,6 +338,16 @@ ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
goto fail;
}
+ if (family >= CHIP_NAVI10) {
+ assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
+ compiler->tm_wave32 = ac_create_target_machine(family,
+ tm_options | AC_TM_WAVE32,
+ LLVMCodeGenLevelDefault,
+ NULL);
+ if (!compiler->tm_wave32)
+ goto fail;
+ }
+
compiler->target_library_info =
ac_create_target_library_info(triple);
if (!compiler->target_library_info)
@@ -357,6 +368,7 @@ void
ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
{
ac_destroy_llvm_passes(compiler->passes);
+ ac_destroy_llvm_passes(compiler->passes_wave32);
ac_destroy_llvm_passes(compiler->low_opt_passes);
if (compiler->passmgr)
@@ -367,4 +379,6 @@ ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
LLVMDisposeTargetMachine(compiler->low_opt_tm);
if (compiler->tm)
LLVMDisposeTargetMachine(compiler->tm);
+ if (compiler->tm_wave32)
+ LLVMDisposeTargetMachine(compiler->tm_wave32);
}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 0c900885de5..8209b6dcabc 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -65,6 +65,7 @@ enum ac_target_machine_options {
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
AC_TM_CREATE_LOW_OPT = (1 << 7),
AC_TM_NO_LOAD_STORE_OPT = (1 << 8),
+ AC_TM_WAVE32 = (1 << 9),
};
enum ac_float_mode {
@@ -82,6 +83,10 @@ struct ac_llvm_compiler {
LLVMTargetMachineRef tm;
struct ac_compiler_passes *passes;
+ /* Wave32 compiler for GFX10. */
+ LLVMTargetMachineRef tm_wave32;
+ struct ac_compiler_passes *passes_wave32;
+
/* Optional compiler for faster compilation with fewer optimizations.
* LLVM modules can be created with "tm" too. There is no difference.
*/