aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd/llvm
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2020-03-23 12:02:15 +0100
committerMarge Bot <[email protected]>2020-03-23 14:19:03 +0000
commit7ac8bb33cd6025f805a390e7647506e932f4db0d (patch)
tree4207815186c10d6e4352e2f05e5ccc8b4d9d4aa6 /src/amd/llvm
parent2a70a1d69d3151e6c95111a297e715e887692ce3 (diff)
radv/llvm: fix subgroup shuffle for chips without bpermute
bpermute only exists on GFX8+ and only with Wave32 on GFX10. Instead we have to use readlane with a waterfall loop to defeat the LLVM backend. This fixes DOOM Eternal which requires subgroup shuffle. Cc: <[email protected]> Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4284> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4284>
Diffstat (limited to 'src/amd/llvm')
-rw-r--r--src/amd/llvm/ac_nir_to_llvm.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 49627990163..871c6abc17f 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -3950,8 +3950,33 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
break;
}
case nir_intrinsic_shuffle:
- result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]));
+ if (ctx->ac.chip_class == GFX8 ||
+ ctx->ac.chip_class == GFX9 ||
+ (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) {
+ result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
+ get_src(ctx, instr->src[1]));
+ } else {
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+ LLVMValueRef index = get_src(ctx, instr->src[1]);
+ LLVMTypeRef type = LLVMTypeOf(src);
+ struct waterfall_context wctx;
+ LLVMValueRef index_val;
+
+ index_val = enter_waterfall(ctx, &wctx, index, true);
+
+ src = LLVMBuildZExt(ctx->ac.builder, src,
+ ctx->ac.i32, "");
+
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
+ ctx->ac.i32,
+ (LLVMValueRef []) { src, index_val }, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+
+ result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
+
+ result = exit_waterfall(ctx, &wctx, result);
+ }
break;
case nir_intrinsic_reduce:
result = ac_build_reduce(&ctx->ac,