aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorTimur Kristóf <[email protected]>2020-01-10 12:06:53 +0100
committerTimur Kristóf <[email protected]>2020-01-10 12:30:44 +0100
commiteccac46cdc6086b7a10adff2ea95de986d70a85d (patch)
treee8649066abdae325e004c561c916f7cfd4909e65 /src/amd
parenta5fe84aefb6858bee123f60b330db1e0287f9cc0 (diff)
ac/llvm: Fix ac_build_reduce in wave32 mode.
Previously, when cluster_size was set to 0, it always worked as if the cluster size was 64. This commit fixes it in wave32 mode by changing to work as if the cluster size was set to 32. Signed-off-by: Timur Kristóf <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/llvm/ac_llvm_build.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 79831260066..e97eb919c6a 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -4344,12 +4344,15 @@ ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsign
if (cluster_size == 32) return ac_build_wwm(ctx, result);
if (ctx->chip_class >= GFX8) {
- if (ctx->chip_class >= GFX10)
- swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
- else
- swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
- result = ac_build_alu_op(ctx, result, swap, op);
- result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
+ if (ctx->wave_size == 64) {
+ if (ctx->chip_class >= GFX10)
+ swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
+ else
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
+ result = ac_build_alu_op(ctx, result, swap, op);
+ result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
+ }
+
return ac_build_wwm(ctx, result);
} else {
swap = ac_build_readlane(ctx, result, ctx->i32_0);