diff options
author | Samuel Pitoiset <[email protected]> | 2019-05-03 11:45:34 +0200 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2019-05-03 17:59:12 +0200 |
commit | 4f18c43d1df64135e8968a7d4fbfd2c9918b76ae (patch) | |
tree | efd2296ec3caf159bf2a2102478594ab145c2ed2 | |
parent | e340d7beef9573facc53f3eeece640b2a14a15f5 (diff) |
radv: apply the indexing workaround for atomic buffer operations on GFX9
Because the new raw/struct intrinsics are buggy with LLVM 8
(they weren't marked as source of divergence), we fallback to the
old instrinsics for atomic buffer operations only. This means we need
to apply the indexing workaround for GFX9. The load/store
operations still use the new LLVM 8 intrinsics.
The fact that we need another workaround is painful but we should
be able to clean up that a bit once LLVM 7 support will be dropped.
This fixes a GPU hang with AC Odyssey and some rendering problems
with Nioh.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110573
Fixes: 31164cf5f70 ("ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+")
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 12 | ||||
-rw-r--r-- | src/amd/common/ac_shader_abi.h | 1 | ||||
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 6 |
3 files changed, 14 insertions, 5 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index c92eaaca31d..151e0d0f961 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2417,10 +2417,12 @@ static void get_image_coords(struct ac_nir_context *ctx, } static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr, bool write) + const nir_intrinsic_instr *instr, + bool write, bool atomic) { LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write); - if (ctx->abi->gfx9_stride_size_workaround) { + if (ctx->abi->gfx9_stride_size_workaround || + (ctx->abi->gfx9_stride_size_workaround_for_atomic && atomic)) { LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), ""); LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), ""); stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), ""); @@ -2466,7 +2468,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, unsigned num_channels = util_last_bit(mask); LLVMValueRef rsrc, vindex; - rsrc = get_image_buffer_descriptor(ctx, instr, false); + rsrc = get_image_buffer_descriptor(ctx, instr, false, false); vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); @@ -2520,7 +2522,7 @@ static void visit_image_store(struct ac_nir_context *ctx, args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory); if (dim == GLSL_SAMPLER_DIM_BUF) { - LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true); + LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false); LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); unsigned src_channels = ac_get_llvm_num_components(src); LLVMValueRef vindex; @@ -2632,7 +2634,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, params[param_count++] = get_src(ctx, instr->src[3]); if (dim == GLSL_SAMPLER_DIM_BUF) { - params[param_count++] = get_image_buffer_descriptor(ctx, instr, true); + params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true); params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); /* vindex */ params[param_count++] = ctx->ac.i32_0; /* voffset */ diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index 108fe58ce57..8debb1ff986 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -203,6 +203,7 @@ struct ac_shader_abi { /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0 * and LLVM optimizes an indexed load with constant index to IDXEN=0. */ bool gfx9_stride_size_workaround; + bool gfx9_stride_size_workaround_for_atomic; }; #endif /* AC_SHADER_ABI_H */ diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 796d78e34f4..d83f0bd547f 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3687,6 +3687,12 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.clamp_shadow_reference = false; ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800; + /* Because the new raw/struct atomic intrinsics are buggy with LLVM 8, + * we fallback to the old intrinsics for atomic buffer image operations + * and thus we need to apply the indexing workaround... + */ + ctx.abi.gfx9_stride_size_workaround_for_atomic = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x900; + if (shader_count >= 2) ac_init_exec_full_mask(&ctx.ac); |