summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-02-24 02:09:47 +0100
committerMarek Olšák <[email protected]>2017-03-03 15:29:30 +0100
commit97e21cfa257292ea57b1d5f02a63908b5e373836 (patch)
tree9b4d983c1e52cbabccc062d7c75a7f52dff67212 /src/amd
parent684339827c357801ea80cd19367124236dd2fc92 (diff)
ac: replace llvm.SI.tbuffer.store with llvm.amdgcn.buffer.store if ADD_TID=0
ADD_TID doesn't work. Needs more investigation. v2: remove leftover dead code Reviewed-by: Dave Airlie <[email protected]> (v1)
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_llvm_build.c58
-rw-r--r--src/amd/common/ac_llvm_build.h4
-rw-r--r--src/amd/common/ac_nir_to_llvm.c4
3 files changed, 62 insertions, 4 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 08fedc7bf41..9435b189de4 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -551,8 +551,64 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
LLVMValueRef soffset,
unsigned inst_offset,
bool glc,
- bool slc)
+ bool slc,
+ bool writeonly_memory,
+ bool has_add_tid)
{
+ /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
+ if (HAVE_LLVM >= 0x0309 && !has_add_tid) {
+ /* Split 3 channel stores, becase LLVM doesn't support 3-channel
+ * intrinsics. */
+ if (num_channels == 3) {
+ LLVMValueRef v[3], v01;
+
+ for (int i = 0; i < 3; i++) {
+ v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
+ LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ v01 = ac_build_gather_values(ctx, v, 2);
+
+ ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
+ soffset, inst_offset, glc, slc,
+ writeonly_memory, has_add_tid);
+ ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
+ soffset, inst_offset + 8,
+ glc, slc,
+ writeonly_memory, has_add_tid);
+ return;
+ }
+
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
+ static const char *types[] = {"f32", "v2f32", "v4f32"};
+ char name[256];
+ LLVMValueRef offset = soffset;
+
+ if (inst_offset)
+ offset = LLVMBuildAdd(ctx->builder, offset,
+ LLVMConstInt(ctx->i32, inst_offset, 0), "");
+ if (voffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+
+ LLVMValueRef args[] = {
+ bitcast_to_float(ctx, vdata),
+ LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
+ LLVMConstInt(ctx->i32, 0, 0),
+ offset,
+ LLVMConstInt(ctx->i1, glc, 0),
+ LLVMConstInt(ctx->i1, slc, 0),
+ };
+
+ snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
+ types[func]);
+
+ ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
+ args, ARRAY_SIZE(args),
+ writeonly_memory ?
+ AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
+ AC_FUNC_ATTR_WRITEONLY);
+ return;
+ }
+
static unsigned dfmt[] = {
V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_DATA_FORMAT_32_32,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 78df441b6dc..aa99e92e256 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -130,7 +130,9 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
LLVMValueRef soffset,
unsigned inst_offset,
bool glc,
- bool slc);
+ bool slc,
+ bool writeonly_memory,
+ bool has_add_tid);
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index c3634701dad..2c9ef4916ba 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3159,7 +3159,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
out_val, 1,
voffset, ctx->gs2vs_offset, 0,
- 1, 1);
+ 1, 1, true, true);
}
idx += slot_inc;
}
@@ -4675,7 +4675,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
out_val, 1,
NULL, ctx->es2gs_offset,
(4 * param_index + j + start) * 4,
- 1, 1);
+ 1, 1, true, true);
}
}
ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16;