summaryrefslogtreecommitdiffstats
path: root/src/amd/common
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-06-24 16:13:24 -0400
committerMarek Olšák <[email protected]>2019-07-03 15:51:13 -0400
commit969e5176c23252b92fbeca0dc4109a1b06de06c5 (patch)
tree65deab045607a76765626f430e9aa18c3b997afd /src/amd/common
parent214ddfb6884879a73168b55c306680c7d49d19f5 (diff)
ac: rework ac_build_waitcnt for gfx10
Acked-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/common')
-rw-r--r--src/amd/common/ac_llvm_build.c38
-rw-r--r--src/amd/common/ac_llvm_build.h11
-rw-r--r--src/amd/common/ac_nir_to_llvm.c14
3 files changed, 49 insertions, 14 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index eb71a69eebd..4c48fe1d1d7 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2882,13 +2882,49 @@ LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
}
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
{
+ if (!wait_flags)
+ return;
+
+ unsigned lgkmcnt = 63;
+ unsigned expcnt = 7;
+ unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
+ unsigned vscnt = 63;
+
+ if (wait_flags & AC_WAIT_LGKM)
+ lgkmcnt = 0;
+ if (wait_flags & AC_WAIT_EXP)
+ expcnt = 0;
+ if (wait_flags & AC_WAIT_VLOAD)
+ vmcnt = 0;
+
+ if (wait_flags & AC_WAIT_VSTORE) {
+ if (ctx->chip_class >= GFX10)
+ vscnt = 0;
+ else
+ vmcnt = 0;
+ }
+
+ unsigned simm16 = (lgkmcnt << 8) |
+ (expcnt << 4) |
+ (vmcnt & 0xf) |
+ ((vmcnt >> 4) << 14);
+
LLVMValueRef args[1] = {
LLVMConstInt(ctx->i32, simm16, false),
};
ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
ctx->voidt, args, 1, 0);
+
+ /* TODO: add llvm.amdgcn.s.waitcnt.vscnt into LLVM: */
+ if (0 && ctx->chip_class >= GFX10 && vscnt == 0) {
+ LLVMValueRef args[1] = {
+ LLVMConstInt(ctx->i32, vscnt, false),
+ };
+ ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt.vscnt",
+ ctx->voidt, args, 1, 0);
+ }
}
LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 4917315cc50..eba01e51616 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -43,11 +43,10 @@ enum {
AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
};
-/* Combine these with & instead of |. */
-#define NOOP_WAITCNT 0xcf7f
-#define LGKM_CNT 0xc07f
-#define EXP_CNT 0xcf0f
-#define VM_CNT 0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
+#define AC_WAIT_EXP (1 << 1) /* exports */
+#define AC_WAIT_VLOAD (1 << 2) /* VMEM load/sample instructions */
+#define AC_WAIT_VSTORE (1 << 3) /* VMEM store instructions */
struct ac_llvm_flow;
@@ -575,7 +574,7 @@ LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
LLVMValueRef s1, LLVMValueRef s2);
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 73941ba6f45..e5a9389e7bf 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2741,26 +2741,26 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
static void emit_membar(struct ac_llvm_context *ac,
const nir_intrinsic_instr *instr)
{
- unsigned waitcnt = NOOP_WAITCNT;
+ unsigned wait_flags = 0;
switch (instr->intrinsic) {
case nir_intrinsic_memory_barrier:
case nir_intrinsic_group_memory_barrier:
- waitcnt &= VM_CNT & LGKM_CNT;
+ wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
break;
case nir_intrinsic_memory_barrier_atomic_counter:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
- waitcnt &= VM_CNT;
+ wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
break;
case nir_intrinsic_memory_barrier_shared:
- waitcnt &= LGKM_CNT;
+ wait_flags = AC_WAIT_LGKM;
break;
default:
break;
}
- if (waitcnt != NOOP_WAITCNT)
- ac_build_waitcnt(ac, waitcnt);
+
+ ac_build_waitcnt(ac, wait_flags);
}
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
@@ -2770,7 +2770,7 @@ void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
* always fits into a single wave.
*/
if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
- ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
+ ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
return;
}
ac_build_s_barrier(ac);