summaryrefslogtreecommitdiffstats
path: root/src/amd/common
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-07-16 00:55:46 -0400
committerMarek Olšák <[email protected]>2019-07-19 20:16:19 -0400
commit54e6900eded795fa6b63add92f31ee88674f783a (patch)
tree7cde7025cb8fb57e739f64adf0f41dd8707e2cfb /src/amd/common
parent81091a5183fb853d2de37ea82b51122601a36c93 (diff)
radeonsi/gfx10: use 32-bit wavemasks for Wave32
Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/amd/common')
-rw-r--r--src/amd/common/ac_llvm_build.c24
-rw-r--r--src/amd/common/ac_llvm_build.h1
-rw-r--r--src/amd/common/ac_nir_to_llvm.c6
3 files changed, 23 insertions, 8 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 855ebb3d3dd..250bfc5229e 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -92,6 +92,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+ ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
@@ -447,7 +448,16 @@ LLVMValueRef
ac_build_ballot(struct ac_llvm_context *ctx,
LLVMValueRef value)
{
- const char *name = HAVE_LLVM >= 0x900 ? "llvm.amdgcn.icmp.i64.i32" : "llvm.amdgcn.icmp.i32";
+ const char *name;
+
+ if (HAVE_LLVM >= 0x900) {
+ if (ctx->wave_size == 64)
+ name = "llvm.amdgcn.icmp.i64.i32";
+ else
+ name = "llvm.amdgcn.icmp.i32.i32";
+ } else {
+ name = "llvm.amdgcn.icmp.i32";
+ }
LLVMValueRef args[3] = {
value,
ctx->i32_0,
@@ -461,8 +471,7 @@ ac_build_ballot(struct ac_llvm_context *ctx,
args[0] = ac_to_integer(ctx, args[0]);
- return ac_build_intrinsic(ctx, name,
- ctx->i64, args, 3,
+ return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
AC_FUNC_ATTR_NOUNWIND |
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
@@ -498,7 +507,7 @@ ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
{
LLVMValueRef vote_set = ac_build_ballot(ctx, value);
return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
- LLVMConstInt(ctx->i64, 0, 0), "");
+ LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
}
LLVMValueRef
@@ -511,7 +520,7 @@ ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
vote_set, active_set, "");
LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
vote_set,
- LLVMConstInt(ctx->i64, 0, 0), "");
+ LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
return LLVMBuildOr(ctx->builder, all, none, "");
}
@@ -3848,6 +3857,11 @@ ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef v
LLVMValueRef
ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
{
+ if (ctx->wave_size == 32) {
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
+ (LLVMValueRef []) { mask, ctx->i32_0 },
+ 2, AC_FUNC_ATTR_READNONE);
+ }
LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
LLVMVectorType(ctx->i32, 2),
"");
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index cc1807221b2..8fcede66fb2 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -74,6 +74,7 @@ struct ac_llvm_context {
LLVMTypeRef v3f32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
+ LLVMTypeRef iN_wavemask;
LLVMValueRef i8_0;
LLVMValueRef i8_1;
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 96bf89a8bf9..f69e02f6d0a 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2809,12 +2809,12 @@ static LLVMValueRef
visit_first_invocation(struct ac_nir_context *ctx)
{
LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
+ const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
/* The second argument is whether cttz(0) should be defined, but we do not care. */
LLVMValueRef args[] = {active_set, ctx->ac.i1false};
- LLVMValueRef result = ac_build_intrinsic(&ctx->ac,
- "llvm.cttz.i64",
- ctx->ac.i64, args, 2,
+ LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr,
+ ctx->ac.iN_wavemask, args, 2,
AC_FUNC_ATTR_NOUNWIND |
AC_FUNC_ATTR_READNONE);