diff options
author | Nicolai Hähnle <[email protected]> | 2018-05-23 22:09:27 +0200 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2018-12-19 12:01:17 +0100 |
commit | 300876a9a7f849a2b165360e19ec1708a342b68c (patch) | |
tree | c9924080eed77b534a5cba637e5c8de5c3ecc6de /src/amd/common/ac_llvm_build.h | |
parent | 3963402fd343dd4cd7bef0f6b64e51e029798944 (diff) |
amd/common: scan/reduce across waves of a workgroup
Order-aware scan/reduce can trade-off LDS traffic for external atomics
memory traffic in producer/consumer compute shaders.
Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/amd/common/ac_llvm_build.h')
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index ab063546338..e47893bbbe6 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -524,6 +524,42 @@ ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size); +/** + * Common arguments for a scan/reduce operation that accumulates per-wave + * values across an entire workgroup, while respecting the order of waves. + */ +struct ac_wg_scan { + bool enable_reduce; + bool enable_exclusive; + bool enable_inclusive; + nir_op op; + LLVMValueRef src; /* clobbered! */ + LLVMValueRef result_reduce; + LLVMValueRef result_exclusive; + LLVMValueRef result_inclusive; + LLVMValueRef extra; + LLVMValueRef waveidx; + LLVMValueRef numwaves; /* only needed for "reduce" operations */ + + /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */ + LLVMValueRef scratch; + unsigned maxwaves; +}; + +void +ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void +ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void +ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); + +void +ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void +ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void +ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); + LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3); |