diff options
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 58 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 4 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 4 |
3 files changed, 62 insertions, 4 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 08fedc7bf41..9435b189de4 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -551,8 +551,64 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef soffset, unsigned inst_offset, bool glc, - bool slc) + bool slc, + bool writeonly_memory, + bool has_add_tid) { + /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */ + if (HAVE_LLVM >= 0x0309 && !has_add_tid) { + /* Split 3 channel stores, becase LLVM doesn't support 3-channel + * intrinsics. */ + if (num_channels == 3) { + LLVMValueRef v[3], v01; + + for (int i = 0; i < 3; i++) { + v[i] = LLVMBuildExtractElement(ctx->builder, vdata, + LLVMConstInt(ctx->i32, i, 0), ""); + } + v01 = ac_build_gather_values(ctx, v, 2); + + ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, + soffset, inst_offset, glc, slc, + writeonly_memory, has_add_tid); + ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, + soffset, inst_offset + 8, + glc, slc, + writeonly_memory, has_add_tid); + return; + } + + unsigned func = CLAMP(num_channels, 1, 3) - 1; + static const char *types[] = {"f32", "v2f32", "v4f32"}; + char name[256]; + LLVMValueRef offset = soffset; + + if (inst_offset) + offset = LLVMBuildAdd(ctx->builder, offset, + LLVMConstInt(ctx->i32, inst_offset, 0), ""); + if (voffset) + offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); + + LLVMValueRef args[] = { + bitcast_to_float(ctx, vdata), + LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), + LLVMConstInt(ctx->i32, 0, 0), + offset, + LLVMConstInt(ctx->i1, glc, 0), + LLVMConstInt(ctx->i1, slc, 0), + }; + + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s", + types[func]); + + ac_emit_llvm_intrinsic(ctx, name, ctx->voidt, + args, ARRAY_SIZE(args), + writeonly_memory ? + AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY : + AC_FUNC_ATTR_WRITEONLY); + return; + } + static unsigned dfmt[] = { V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32, diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 78df441b6dc..aa99e92e256 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -130,7 +130,9 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef soffset, unsigned inst_offset, bool glc, - bool slc); + bool slc, + bool writeonly_memory, + bool has_add_tid); LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index c3634701dad..2c9ef4916ba 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3159,7 +3159,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring, out_val, 1, voffset, ctx->gs2vs_offset, 0, - 1, 1); + 1, 1, true, true); } idx += slot_inc; } @@ -4675,7 +4675,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx) out_val, 1, NULL, ctx->es2gs_offset, (4 * param_index + j + start) * 4, - 1, 1); + 1, 1, true, true); } } ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16; |