summaryrefslogtreecommitdiffstats
path: root/src/amd/common/ac_llvm_build.h
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2018-05-23 22:09:27 +0200
committerNicolai Hähnle <[email protected]>2018-12-19 12:01:17 +0100
commit300876a9a7f849a2b165360e19ec1708a342b68c (patch)
treec9924080eed77b534a5cba637e5c8de5c3ecc6de /src/amd/common/ac_llvm_build.h
parent3963402fd343dd4cd7bef0f6b64e51e029798944 (diff)
amd/common: scan/reduce across waves of a workgroup
Order-aware scan/reduce can trade-off LDS traffic for external atomics memory traffic in producer/consumer compute shaders. Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/amd/common/ac_llvm_build.h')
-rw-r--r--src/amd/common/ac_llvm_build.h36
1 files changed, 36 insertions, 0 deletions
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index ab063546338..e47893bbbe6 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -524,6 +524,42 @@ ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op
LLVMValueRef
ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
+/**
+ * Common arguments for a scan/reduce operation that accumulates per-wave
+ * values across an entire workgroup, while respecting the order of waves.
+ */
+struct ac_wg_scan {
+ bool enable_reduce;
+ bool enable_exclusive;
+ bool enable_inclusive;
+ nir_op op;
+ LLVMValueRef src; /* clobbered! */
+ LLVMValueRef result_reduce;
+ LLVMValueRef result_exclusive;
+ LLVMValueRef result_inclusive;
+ LLVMValueRef extra;
+ LLVMValueRef waveidx;
+ LLVMValueRef numwaves; /* only needed for "reduce" operations */
+
+ /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
+ LLVMValueRef scratch;
+ unsigned maxwaves;
+};
+
+void
+ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
+void
+ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
LLVMValueRef
ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);