ac/nir: replace SI.buffer.load.dword with amdgcn.buffer.load

The old one generates useless instructions in there, found while comparing geometry shaders between RadeonSI and RADV. This improves all Vulkan demos that use geometry shaders, +4% for deferredshadows, +9% for viewportarray, +7% for geometryshader on Polaris10. This seems to also improve DOW3 a little bit (+1%). Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
author: Samuel Pitoiset <[email protected]> 2018-02-01 16:37:15 +0100
committer: Samuel Pitoiset <[email protected]> 2018-02-02 12:32:21 +0100
commit: df1d5174fccc6771e24ef09e0cd77dfa377a7b6a (patch)
tree: 07a37c56b2b215a7a538ea26e11c17d38a1b178b /src/amd/common
parent: f9c121c420eb3d4b39aad3635b63cd48fe268783 (diff)
1 files changed, 20 insertions, 32 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 05b937803f0..0f7d6258acd 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3047,7 +3047,6 @@ load_gs_input(struct ac_shader_abi *abi,
 {
 	struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
 	LLVMValueRef vtx_offset;
-	LLVMValueRef args[9];
 	unsigned param, vtx_offset_param;
 	LLVMValueRef value[4], result;
 
@@ -3065,20 +3064,16 @@ load_gs_input(struct ac_shader_abi *abi,
 			                       LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
 			value[i] = ac_lds_load(&ctx->ac, dw_addr);
 		} else {
-			args[0] = ctx->esgs_ring;
-			args[1] = vtx_offset;
-			args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false);
-			args[3] = ctx->ac.i32_0;
-			args[4] = ctx->ac.i32_1; /* OFFEN */
-			args[5] = ctx->ac.i32_0; /* IDXEN */
-			args[6] = ctx->ac.i32_1; /* GLC */
-			args[7] = ctx->ac.i32_0; /* SLC */
-			args[8] = ctx->ac.i32_0; /* TFE */
-
-			value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
-			                              ctx->ac.i32, args, 9,
-			                              AC_FUNC_ATTR_READONLY |
-			                              AC_FUNC_ATTR_LEGACY);
+			LLVMValueRef soffset =
+				LLVMConstInt(ctx->ac.i32,
+					     (param * 4 + i + const_index) * 256,
+					     false);
+
+			value[i] = ac_build_buffer_load(&ctx->ac,
+							ctx->esgs_ring, 1,
+							ctx->ac.i32_0,
+							vtx_offset, soffset,
+							0, 1, 0, true, false);
 		}
 	}
 	result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
@@ -7166,16 +7161,9 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
 static void
 ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
 {
-	LLVMValueRef args[9];
-	args[0] = ctx->gsvs_ring;
-	args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->ac.i32, 4, false), "");
-	args[3] = ctx->ac.i32_0;
-	args[4] = ctx->ac.i32_1;  /* OFFEN */
-	args[5] = ctx->ac.i32_0; /* IDXEN */
-	args[6] = ctx->ac.i32_1;  /* GLC */
-	args[7] = ctx->ac.i32_1;  /* SLC */
-	args[8] = ctx->ac.i32_0; /* TFE */
-
+	LLVMValueRef vtx_offset =
+		LLVMBuildMul(ctx->builder, ctx->abi.vertex_id,
+			     LLVMConstInt(ctx->ac.i32, 4, false), "");
 	int idx = 0;
 
 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
@@ -7193,16 +7181,16 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
 		}
 
 		for (unsigned j = 0; j < length; j++) {
-			LLVMValueRef value;
-			args[2] = LLVMConstInt(ctx->ac.i32,
+			LLVMValueRef value, soffset;
+
+			soffset = LLVMConstInt(ctx->ac.i32,
 					       (slot * 4 + j) *
 					       ctx->gs_max_out_vertices * 16 * 4, false);
 
-			value = ac_build_intrinsic(&ctx->ac,
-						   "llvm.SI.buffer.load.dword.i32.i32",
-						   ctx->ac.i32, args, 9,
-						   AC_FUNC_ATTR_READONLY |
-						   AC_FUNC_ATTR_LEGACY);
+			value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring,
+						     1, ctx->ac.i32_0,
+						     vtx_offset, soffset,
+						     0, 1, 1, true, false);
 
 			LLVMBuildStore(ctx->builder,
 				       ac_to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
author	Samuel Pitoiset <[email protected]>	2018-02-01 16:37:15 +0100
committer	Samuel Pitoiset <[email protected]>	2018-02-02 12:32:21 +0100
commit	df1d5174fccc6771e24ef09e0cd77dfa377a7b6a (patch)
tree	07a37c56b2b215a7a538ea26e11c17d38a1b178b /src/amd/common
parent	f9c121c420eb3d4b39aad3635b63cd48fe268783 (diff)