radeonsi/nir: gather buffers declared more accurately and use const fast path

For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip setting the more accurate masks for builtin uniforms for now as it causes some piglit regressions. Reviewed-by: Marek Olšák <[email protected]>
author: Timothy Arceri <[email protected]> 2018-03-27 10:39:49 +1100
committer: Timothy Arceri <[email protected]> 2018-04-02 14:56:00 +1000
commit: dd3d3cc877423027562087261a6e47226b69df84 (patch)
tree: 7fd3cce25d210069e7bc7b5e6d262e637bb91c43
parent: 56017d81004072fd00979f3a987ff2e94e3b6315 (diff)
2 files changed, 90 insertions, 6 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 1661b54d056..18d5afe6ff6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2374,8 +2374,15 @@ static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
 static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
 {
 	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+	struct si_shader_selector *sel = ctx->shader->selector;
+
 	LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
 
+	if (sel->info.const_buffers_declared == 1 &&
+	    sel->info.shader_buffers_declared == 0) {
+		return load_const_buffer_desc_fast_path(ctx);
+	}
+
 	index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
 	index = LLVMBuildAdd(ctx->ac.builder, index,
 			     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 437eefc54d0..7dd8314d4b5 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -600,23 +600,97 @@ void si_nir_scan_shader(const struct nir_shader *nir,
 
 	info->num_outputs = num_outputs;
 
+	struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
+					       _mesa_key_pointer_equal);
+
+	/* Intialise const_file_max[0] */
+	info->const_file_max[0] = -1;
+
+	unsigned ubo_idx = 1;
 	nir_foreach_variable(variable, &nir->uniforms) {
 		const struct glsl_type *type = variable->type;
 		enum glsl_base_type base_type =
 			glsl_get_base_type(glsl_without_array(type));
 		unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
 
+		/* Gather buffers declared bitmasks. Note: radeonsi doesn't
+		 * really use the mask (other than ubo_idx == 1 for regular
+		 * uniforms) its really only used for getting the buffer count
+		 * so we don't need to worry about the ordering.
+		 */
+		if (variable->interface_type != NULL) {
+			if (variable->data.mode == nir_var_uniform) {
+
+				unsigned block_count;
+				if (base_type != GLSL_TYPE_INTERFACE) {
+					struct set_entry *entry =
+						_mesa_set_search(ubo_set, variable->interface_type);
+
+					/* Check if we have already processed
+					 * a member from this ubo.
+					 */
+					if (entry)
+						continue;
+
+					block_count = 1;
+				} else {
+					block_count = aoa_size;
+				}
+
+				info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_count);
+				ubo_idx += block_count;
+
+				_mesa_set_add(ubo_set, variable->interface_type);
+			}
+
+			if (variable->data.mode == nir_var_shader_storage) {
+				/* TODO: make this more accurate */
+				info->shader_buffers_declared =
+					u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
+			}
+
+			continue;
+		}
+
 		/* We rely on the fact that nir_lower_samplers_as_deref has
 		 * eliminated struct dereferences.
 		 */
-		if (base_type == GLSL_TYPE_SAMPLER)
+		if (base_type == GLSL_TYPE_SAMPLER) {
 			info->samplers_declared |=
 				u_bit_consecutive(variable->data.binding, aoa_size);
-		else if (base_type == GLSL_TYPE_IMAGE)
+
+			if (variable->data.bindless) {
+				info->const_buffers_declared |= 1;
+				info->const_file_max[0] +=
+					glsl_count_attribute_slots(type, false);
+			}
+		} else if (base_type == GLSL_TYPE_IMAGE) {
 			info->images_declared |=
 				u_bit_consecutive(variable->data.binding, aoa_size);
+
+			if (variable->data.bindless) {
+				info->const_buffers_declared |= 1;
+				info->const_file_max[0] +=
+					glsl_count_attribute_slots(type, false);
+			}
+		} else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
+			if (strncmp(variable->name, "state.", 6) == 0 ||
+			    strncmp(variable->name, "gl_", 3) == 0) {
+				/* FIXME: figure out why piglit tests with builtin
+				 * uniforms are failing without this.
+				 */
+				info->const_buffers_declared =
+					u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
+			} else {
+				info->const_buffers_declared |= 1;
+				info->const_file_max[0] +=
+					glsl_count_attribute_slots(type, false);
+			}
+		}
 	}
 
+	_mesa_set_destroy(ubo_set, NULL);
+
 	info->num_written_clipdistance = nir->info.clip_distance_array_size;
 	info->num_written_culldistance = nir->info.cull_distance_array_size;
 	info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
@@ -625,10 +699,6 @@ void si_nir_scan_shader(const struct nir_shader *nir,
 	if (info->processor == PIPE_SHADER_FRAGMENT)
 		info->uses_kill = nir->info.fs.uses_discard;
 
-	/* TODO make this more accurate */
-	info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
-	info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
-
 	func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
 	nir_foreach_block(block, func->impl) {
 		nir_foreach_instr(instr, block)
@@ -643,6 +713,13 @@ void si_nir_scan_shader(const struct nir_shader *nir,
 void
 si_lower_nir(struct si_shader_selector* sel)
 {
+	/* Disable const buffer fast path for old LLVM versions */
+	if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600 &&
+	    sel->info.const_buffers_declared == 1 &&
+	    sel->info.shader_buffers_declared == 0) {
+		sel->info.const_buffers_declared |= 0x2;
+	}
+
 	/* Adjust the driver location of inputs and outputs. The state tracker
 	 * interprets them as slots, while the ac/nir backend interprets them
 	 * as individual components.
author	Timothy Arceri <[email protected]>	2018-03-27 10:39:49 +1100
committer	Timothy Arceri <[email protected]>	2018-04-02 14:56:00 +1000
commit	dd3d3cc877423027562087261a6e47226b69df84 (patch)
tree	7fd3cce25d210069e7bc7b5e6d262e637bb91c43
parent	56017d81004072fd00979f3a987ff2e94e3b6315 (diff)