aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd/common/ac_llvm_helper.cpp
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-04-25 14:44:40 +0100
committerRhys Perry <[email protected]>2019-04-29 18:20:44 +0100
commitbd4c661ad08e772fdccb562ffbb2f45705c4fec8 (patch)
tree5bdb078e9cd7a79d4b2cd1a46dc69ec7bfe867fb /src/amd/common/ac_llvm_helper.cpp
parente91ee763c378d03883eb88cf0eadd8aa916f7878 (diff)
ac,ac/nir: use a better sync scope for shared atomics
https://reviews.llvm.org/rL356946 (present in LLVM 9 and later) changed the meaning of the "system" sync scope, making it no longer restricted to the memory operation's address space. So a single address space sync scope is needed for shared atomic operations (such as "system-one-as" or "workgroup-one-as") otherwise buffer_wbinvl1 and s_waitcnt instructions can be created at each shared atomic operation. This mostly reimplements LLVMBuildAtomicRMW and LLVMBuildAtomicCmpXchg to allow for more sync scopes and uses the new functions in ac->nir with the "workgroup-one-as" or "workgroup" sync scopes. F1 2017 (4K, Ultra High settings, TAA), avg FPS : 59 -> 59.67 (+1.14%) Strange Brigade (4K, ~highest settings), avg FPS : 51.5 -> 51.6 (+0.19%) RotTR/mountain (4K, VeryHigh settings, FXAA), avg FPS : 57.2 -> 57.2 (+0.0%) RotTR/tomb (4K, VeryHigh settings, FXAA), avg FPS : 42.5 -> 43.0 (+1.17%) RotTR/valley (4K, VeryHigh settings, FXAA), avg FPS : 40.7 -> 41.6 (+2.21%) Warhammer II/fallen, avg FPS : 31.63 -> 31.83 (+0.63%) Warhammer II/skaven, avg FPS : 37.77 -> 38.07 (+0.79%) Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/common/ac_llvm_helper.cpp')
-rw-r--r--src/amd/common/ac_llvm_helper.cpp59
1 files changed, 59 insertions, 0 deletions
diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp
index dcfb8008546..e5030c6f472 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -31,6 +31,7 @@
#include "ac_binary.h"
#include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
#include <llvm-c/Core.h>
#include <llvm/Target/TargetMachine.h>
@@ -167,3 +168,61 @@ void ac_enable_global_isel(LLVMTargetMachineRef tm)
{
reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
}
+
+LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
+ LLVMValueRef ptr, LLVMValueRef val,
+ const char *sync_scope) {
+ llvm::AtomicRMWInst::BinOp binop;
+ switch (op) {
+ case LLVMAtomicRMWBinOpXchg:
+ binop = llvm::AtomicRMWInst::Xchg;
+ break;
+ case LLVMAtomicRMWBinOpAdd:
+ binop = llvm::AtomicRMWInst::Add;
+ break;
+ case LLVMAtomicRMWBinOpSub:
+ binop = llvm::AtomicRMWInst::Sub;
+ break;
+ case LLVMAtomicRMWBinOpAnd:
+ binop = llvm::AtomicRMWInst::And;
+ break;
+ case LLVMAtomicRMWBinOpNand:
+ binop = llvm::AtomicRMWInst::Nand;
+ break;
+ case LLVMAtomicRMWBinOpOr:
+ binop = llvm::AtomicRMWInst::Or;
+ break;
+ case LLVMAtomicRMWBinOpXor:
+ binop = llvm::AtomicRMWInst::Xor;
+ break;
+ case LLVMAtomicRMWBinOpMax:
+ binop = llvm::AtomicRMWInst::Max;
+ break;
+ case LLVMAtomicRMWBinOpMin:
+ binop = llvm::AtomicRMWInst::Min;
+ break;
+ case LLVMAtomicRMWBinOpUMax:
+ binop = llvm::AtomicRMWInst::UMax;
+ break;
+ case LLVMAtomicRMWBinOpUMin:
+ binop = llvm::AtomicRMWInst::UMin;
+ break;
+ default:
+ unreachable(!"invalid LLVMAtomicRMWBinOp");
+ break;
+ }
+ unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+ return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
+ binop, llvm::unwrap(ptr), llvm::unwrap(val),
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+}
+
+LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMValueRef cmp, LLVMValueRef val,
+ const char *sync_scope) {
+ unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+ return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
+ llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
+ llvm::AtomicOrdering::SequentiallyConsistent,
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+}