diff options
author | Marek Olšák <[email protected]> | 2019-07-16 00:55:46 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2019-07-19 20:16:19 -0400 |
commit | 54e6900eded795fa6b63add92f31ee88674f783a (patch) | |
tree | 7cde7025cb8fb57e739f64adf0f41dd8707e2cfb /src/amd/common | |
parent | 81091a5183fb853d2de37ea82b51122601a36c93 (diff) |
radeonsi/gfx10: use 32-bit wavemasks for Wave32
Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/amd/common')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 24 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 1 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 6 |
3 files changed, 23 insertions, 8 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 855ebb3d3dd..250bfc5229e 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -92,6 +92,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->v3f32 = LLVMVectorType(ctx->f32, 3); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); + ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size); ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false); ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false); @@ -447,7 +448,16 @@ LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value) { - const char *name = HAVE_LLVM >= 0x900 ? "llvm.amdgcn.icmp.i64.i32" : "llvm.amdgcn.icmp.i32"; + const char *name; + + if (HAVE_LLVM >= 0x900) { + if (ctx->wave_size == 64) + name = "llvm.amdgcn.icmp.i64.i32"; + else + name = "llvm.amdgcn.icmp.i32.i32"; + } else { + name = "llvm.amdgcn.icmp.i32"; + } LLVMValueRef args[3] = { value, ctx->i32_0, @@ -461,8 +471,7 @@ ac_build_ballot(struct ac_llvm_context *ctx, args[0] = ac_to_integer(ctx, args[0]); - return ac_build_intrinsic(ctx, name, - ctx->i64, args, 3, + return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3, AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); @@ -498,7 +507,7 @@ ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value) { LLVMValueRef vote_set = ac_build_ballot(ctx, value); return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, - LLVMConstInt(ctx->i64, 0, 0), ""); + LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); } LLVMValueRef @@ -511,7 +520,7 @@ ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value) vote_set, active_set, ""); LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, - LLVMConstInt(ctx->i64, 0, 0), ""); + LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); return LLVMBuildOr(ctx->builder, all, none, ""); } @@ -3848,6 +3857,11 @@ ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef v LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask) { + if (ctx->wave_size == 32) { + return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, + (LLVMValueRef []) { mask, ctx->i32_0 }, + 2, AC_FUNC_ATTR_READNONE); + } LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, LLVMVectorType(ctx->i32, 2), ""); diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index cc1807221b2..8fcede66fb2 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -74,6 +74,7 @@ struct ac_llvm_context { LLVMTypeRef v3f32; LLVMTypeRef v4f32; LLVMTypeRef v8i32; + LLVMTypeRef iN_wavemask; LLVMValueRef i8_0; LLVMValueRef i8_1; diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 96bf89a8bf9..f69e02f6d0a 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2809,12 +2809,12 @@ static LLVMValueRef visit_first_invocation(struct ac_nir_context *ctx) { LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1); + const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64"; /* The second argument is whether cttz(0) should be defined, but we do not care. */ LLVMValueRef args[] = {active_set, ctx->ac.i1false}; - LLVMValueRef result = ac_build_intrinsic(&ctx->ac, - "llvm.cttz.i64", - ctx->ac.i64, args, 2, + LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr, + ctx->ac.iN_wavemask, args, 2, AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE); |