summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_llvm_build.c46
-rw-r--r--src/amd/common/ac_llvm_build.h3
-rw-r--r--src/amd/common/ac_nir_to_llvm.c2
3 files changed, 37 insertions, 14 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 3df9f53ed7a..237e9291d41 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -633,14 +633,46 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
unsigned inst_offset,
unsigned glc,
unsigned slc,
- bool can_speculate)
+ bool can_speculate,
+ bool allow_smem)
{
+ LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
+ if (voffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+ if (soffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
+
+ /* TODO: VI and later generations can use SMEM with GLC=1.*/
+ if (allow_smem && !glc && !slc) {
+ assert(vindex == NULL);
+
+ LLVMValueRef result[4];
+
+ for (int i = 0; i < num_channels; i++) {
+ if (i) {
+ offset = LLVMBuildAdd(ctx->builder, offset,
+ LLVMConstInt(ctx->i32, 4, 0), "");
+ }
+ LLVMValueRef args[2] = {rsrc, offset};
+ result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
+ ctx->f32, args, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_LEGACY);
+ }
+ if (num_channels == 1)
+ return result[0];
+
+ if (num_channels == 3)
+ result[num_channels++] = LLVMGetUndef(ctx->f32);
+ return ac_build_gather_values(ctx, result, num_channels);
+ }
+
unsigned func = CLAMP(num_channels, 1, 3) - 1;
LLVMValueRef args[] = {
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
- LLVMConstInt(ctx->i32, inst_offset, 0),
+ offset,
LLVMConstInt(ctx->i1, glc, 0),
LLVMConstInt(ctx->i1, slc, 0)
};
@@ -650,16 +682,6 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
const char *type_names[] = {"f32", "v2f32", "v4f32"};
char name[256];
- if (voffset) {
- args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
- "");
- }
-
- if (soffset) {
- args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
- "");
- }
-
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
type_names[func]);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index c1b5f3d228e..ebb78fbd79b 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -143,7 +143,8 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
unsigned inst_offset,
unsigned glc,
unsigned slc,
- bool can_speculate);
+ bool can_speculate,
+ bool allow_smem);
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 8ae0a75fd04..28ba47d502f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2823,7 +2823,7 @@ load_tes_input(struct nir_to_llvm_context *ctx,
is_compact, vertex_index, indir_index);
result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
- buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true);
+ buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
result = trim_vector(ctx, result, instr->num_components);
result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
return result;