radv: apply the indexing workaround for atomic buffer operations on GFX9

Because the new raw/struct intrinsics are buggy with LLVM 8 (they weren't marked as source of divergence), we fallback to the old instrinsics for atomic buffer operations only. This means we need to apply the indexing workaround for GFX9. The load/store operations still use the new LLVM 8 intrinsics. The fact that we need another workaround is painful but we should be able to clean up that a bit once LLVM 7 support will be dropped. This fixes a GPU hang with AC Odyssey and some rendering problems with Nioh. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110573 Fixes: 31164cf5f70 ("ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+") Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
author: Samuel Pitoiset <[email protected]> 2019-05-03 11:45:34 +0200
committer: Samuel Pitoiset <[email protected]> 2019-05-03 17:59:12 +0200
commit: 4f18c43d1df64135e8968a7d4fbfd2c9918b76ae (patch)
tree: efd2296ec3caf159bf2a2102478594ab145c2ed2 /src/amd/common
parent: e340d7beef9573facc53f3eeece640b2a14a15f5 (diff)
2 files changed, 8 insertions, 5 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index c92eaaca31d..151e0d0f961 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2417,10 +2417,12 @@ static void get_image_coords(struct ac_nir_context *ctx,
 }
 
 static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
-                                                const nir_intrinsic_instr *instr, bool write)
+                                                const nir_intrinsic_instr *instr,
+						bool write, bool atomic)
 {
 	LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
-	if (ctx->abi->gfx9_stride_size_workaround) {
+	if (ctx->abi->gfx9_stride_size_workaround ||
+	    (ctx->abi->gfx9_stride_size_workaround_for_atomic && atomic)) {
 		LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
 		LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
 		stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
@@ -2466,7 +2468,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 		unsigned num_channels = util_last_bit(mask);
 		LLVMValueRef rsrc, vindex;
 
-		rsrc = get_image_buffer_descriptor(ctx, instr, false);
+		rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
 		vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 						 ctx->ac.i32_0, "");
 
@@ -2520,7 +2522,7 @@ static void visit_image_store(struct ac_nir_context *ctx,
 	args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
 
 	if (dim == GLSL_SAMPLER_DIM_BUF) {
-		LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
+		LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false);
 		LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
 		unsigned src_channels = ac_get_llvm_num_components(src);
 		LLVMValueRef vindex;
@@ -2632,7 +2634,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
 	params[param_count++] = get_src(ctx, instr->src[3]);
 
 	if (dim == GLSL_SAMPLER_DIM_BUF) {
-		params[param_count++] = get_image_buffer_descriptor(ctx, instr, true);
+		params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true);
 		params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 								ctx->ac.i32_0, ""); /* vindex */
 		params[param_count++] = ctx->ac.i32_0; /* voffset */
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 108fe58ce57..8debb1ff986 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -203,6 +203,7 @@ struct ac_shader_abi {
 	/* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
 	* and LLVM optimizes an indexed load with constant index to IDXEN=0. */
 	bool gfx9_stride_size_workaround;
+	bool gfx9_stride_size_workaround_for_atomic;
 };
 
 #endif /* AC_SHADER_ABI_H */
author	Samuel Pitoiset <[email protected]>	2019-05-03 11:45:34 +0200
committer	Samuel Pitoiset <[email protected]>	2019-05-03 17:59:12 +0200
commit	4f18c43d1df64135e8968a7d4fbfd2c9918b76ae (patch)
tree	efd2296ec3caf159bf2a2102478594ab145c2ed2 /src/amd/common
parent	e340d7beef9573facc53f3eeece640b2a14a15f5 (diff)