diff options
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 24 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 10 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 38 | ||||
-rw-r--r-- | src/amd/common/ac_shader_abi.h | 4 | ||||
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 1 |
5 files changed, 70 insertions, 7 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 1ae2b9dd170..6f577cdac7f 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1082,6 +1082,30 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, can_speculate, true); } +LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vindex, + LLVMValueRef voffset, + unsigned num_channels, + bool glc, + bool can_speculate) +{ + LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), ""); + LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), ""); + stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), ""); + + LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder, + LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""), + elem_count, stride, ""); + + LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count, + LLVMConstInt(ctx->i32, 2, 0), ""); + + return ac_build_buffer_load_common(ctx, new_rsrc, vindex, voffset, + num_channels, glc, false, + can_speculate, true); +} + /** * Set range metadata on an instruction. This can only be used on load and * call instructions. If you know an instruction can only produce the values diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 6adcc11448c..f901f336857 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -242,6 +242,16 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, bool glc, bool can_speculate); +/* load_format that handles the stride & element count better if idxen is + * disabled by LLVM. */ +LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vindex, + LLVMValueRef voffset, + unsigned num_channels, + bool glc, + bool can_speculate); + LLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 7de59efcfff..2da73a7bfb1 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1175,12 +1175,21 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); - return ac_build_buffer_load_format(&ctx->ac, - args->resource, - args->addr, - ctx->ac.i32_0, - util_last_bit(mask), - false, true); + if (ctx->abi->gfx9_stride_size_workaround) { + return ac_build_buffer_load_format_gfx9_safe(&ctx->ac, + args->resource, + args->addr, + ctx->ac.i32_0, + util_last_bit(mask), + false, true); + } else { + return ac_build_buffer_load_format(&ctx->ac, + args->resource, + args->addr, + ctx->ac.i32_0, + util_last_bit(mask), + false, true); + } } args->opcode = ac_image_sample; @@ -2198,8 +2207,23 @@ static void visit_image_store(struct ac_nir_context *ctx, glc = ctx->ac.i1true; if (dim == GLSL_SAMPLER_DIM_BUF) { + LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true); + + if (ctx->abi->gfx9_stride_size_workaround) { + LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), ""); + LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), ""); + stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), ""); + + LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder, + LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""), + elem_count, stride, ""); + + rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count, + LLVMConstInt(ctx->ac.i32, 2, 0), ""); + } + params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */ - params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true); + params[1] = rsrc; params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), ctx->ac.i32_0, ""); /* vindex */ params[3] = ctx->ac.i32_0; /* voffset */ diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index 2f222cf8d61..6b9a91c92a9 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -188,6 +188,10 @@ struct ac_shader_abi { /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently * uses it due to promoting D16 to D32, but radv needs it off. */ bool clamp_shadow_reference; + + /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0 + * and LLVM optimizes an indexed load with constant index to IDXEN=0. */ + bool gfx9_stride_size_workaround; }; #endif /* AC_SHADER_ABI_H */ diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 23b58c37b23..c6b4e8b5328 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3068,6 +3068,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, ctx.abi.load_sampler_desc = radv_get_sampler_desc; ctx.abi.load_resource = radv_load_resource; ctx.abi.clamp_shadow_reference = false; + ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9; if (shader_count >= 2) ac_init_exec_full_mask(&ctx.ac); |