aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_llvm_build.c24
-rw-r--r--src/amd/common/ac_llvm_build.h10
-rw-r--r--src/amd/common/ac_nir_to_llvm.c38
-rw-r--r--src/amd/common/ac_shader_abi.h4
-rw-r--r--src/amd/vulkan/radv_nir_to_llvm.c1
5 files changed, 70 insertions, 7 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1ae2b9dd170..6f577cdac7f 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1082,6 +1082,30 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
can_speculate, true);
}
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ bool glc,
+ bool can_speculate)
+{
+ LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
+ LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), "");
+ stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
+
+ LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
+ LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
+ elem_count, stride, "");
+
+ LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count,
+ LLVMConstInt(ctx->i32, 2, 0), "");
+
+ return ac_build_buffer_load_common(ctx, new_rsrc, vindex, voffset,
+ num_channels, glc, false,
+ can_speculate, true);
+}
+
/**
* Set range metadata on an instruction. This can only be used on load and
* call instructions. If you know an instruction can only produce the values
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 6adcc11448c..f901f336857 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -242,6 +242,16 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
bool glc,
bool can_speculate);
+/* load_format that handles the stride & element count better if idxen is
+ * disabled by LLVM. */
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ bool glc,
+ bool can_speculate);
+
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7de59efcfff..2da73a7bfb1 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1175,12 +1175,21 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
- return ac_build_buffer_load_format(&ctx->ac,
- args->resource,
- args->addr,
- ctx->ac.i32_0,
- util_last_bit(mask),
- false, true);
+ if (ctx->abi->gfx9_stride_size_workaround) {
+ return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
+ args->resource,
+ args->addr,
+ ctx->ac.i32_0,
+ util_last_bit(mask),
+ false, true);
+ } else {
+ return ac_build_buffer_load_format(&ctx->ac,
+ args->resource,
+ args->addr,
+ ctx->ac.i32_0,
+ util_last_bit(mask),
+ false, true);
+ }
}
args->opcode = ac_image_sample;
@@ -2198,8 +2207,23 @@ static void visit_image_store(struct ac_nir_context *ctx,
glc = ctx->ac.i1true;
if (dim == GLSL_SAMPLER_DIM_BUF) {
+ LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
+
+ if (ctx->abi->gfx9_stride_size_workaround) {
+ LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
+ LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
+ stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
+
+ LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
+ elem_count, stride, "");
+
+ rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
+ LLVMConstInt(ctx->ac.i32, 2, 0), "");
+ }
+
params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
- params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
+ params[1] = rsrc;
params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
ctx->ac.i32_0, ""); /* vindex */
params[3] = ctx->ac.i32_0; /* voffset */
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 2f222cf8d61..6b9a91c92a9 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -188,6 +188,10 @@ struct ac_shader_abi {
/* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
* uses it due to promoting D16 to D32, but radv needs it off. */
bool clamp_shadow_reference;
+
+ /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
+ * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
+ bool gfx9_stride_size_workaround;
};
#endif /* AC_SHADER_ABI_H */
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 23b58c37b23..c6b4e8b5328 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3068,6 +3068,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
ctx.abi.load_resource = radv_load_resource;
ctx.abi.clamp_shadow_reference = false;
+ ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
if (shader_count >= 2)
ac_init_exec_full_mask(&ctx.ac);