summaryrefslogtreecommitdiffstats
path: root/src/glsl/lower_ubo_reference.cpp
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2015-11-23 13:53:08 +0100
committerIago Toral Quiroga <[email protected]>2015-12-01 13:29:57 +0100
commit867c436ca841b4196b4dde4786f5086c76b20dd7 (patch)
tree4581d299302d7b8902b375d40fbd3151353e7f16 /src/glsl/lower_ubo_reference.cpp
parente483cba9f5ff45395fdb1cd40a796799707eb1e0 (diff)
glsl/lower_ubo_reference: split array copies into element copies
Improves register pressure, since otherwise we end up emitting loads for all the elements in the RHS and them emitting stores for all elements in the LHS. v2: - Mark progress properly. This also fixes some instances where the added nodes with individual element copies where not being lowered, which is expected behavior as explained in the documentation for visit_list_elements. - Only need to do this if the RHS is a buffer-backed variable. - We can also have arrays inside structs. A later patch will make it so we also split struct copies and end up with multiple ir_dereference_record assignments, so make sure that if any of these is an array copy, we also split it. Fixes the following piglit tests: tests/spec/arb_shader_storage_buffer_object/execution/large-field-copy.shader_test tests/spec/arb_shader_storage_buffer_object/linker/copy-large-array.shader_test Reviewed-by: Jordan Justen <[email protected]>
Diffstat (limited to 'src/glsl/lower_ubo_reference.cpp')
-rw-r--r--src/glsl/lower_ubo_reference.cpp67
1 files changed, 67 insertions, 0 deletions
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index b74aa3d0630..b82d800eefb 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -154,6 +154,7 @@ public:
ir_call *ssbo_load(const struct glsl_type *type,
ir_rvalue *offset);
+ bool check_for_buffer_array_copy(ir_assignment *ir);
void check_for_ssbo_store(ir_assignment *ir);
void write_to_memory(ir_dereference *deref,
ir_variable *var,
@@ -1132,10 +1133,76 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
progress = true;
}
+static bool
+is_buffer_backed_variable(ir_variable *var)
+{
+ return var->is_in_buffer_block() ||
+ var->data.mode == ir_var_shader_shared;
+}
+
+bool
+lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
+{
+ if (!ir || !ir->lhs || !ir->rhs)
+ return false;
+
+ /* LHS and RHS must be arrays
+ * FIXME: arrays of arrays?
+ */
+ if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
+ return false;
+
+ /* RHS must be a buffer-backed variable. This is what can cause the problem
+ * since it would lead to a series of loads that need to live until we
+ * see the writes to the LHS.
+ */
+ ir_variable *rhs_var = ir->rhs->variable_referenced();
+ if (!rhs_var || !is_buffer_backed_variable(rhs_var))
+ return false;
+
+ /* Split the array copy into individual element copies to reduce
+ * register pressure
+ */
+ ir_dereference *rhs_deref = ir->rhs->as_dereference();
+ if (!rhs_deref)
+ return false;
+
+ ir_dereference *lhs_deref = ir->lhs->as_dereference();
+ if (!lhs_deref)
+ return false;
+
+ assert(lhs_deref->type->length == rhs_deref->type->length);
+ mem_ctx = ralloc_parent(shader->ir);
+
+ for (unsigned i = 0; i < lhs_deref->type->length; i++) {
+ ir_dereference *lhs_i =
+ new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
+ new(mem_ctx) ir_constant(i));
+
+ ir_dereference *rhs_i =
+ new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
+ new(mem_ctx) ir_constant(i));
+ ir->insert_after(assign(lhs_i, rhs_i));
+ }
+
+ ir->remove();
+ progress = true;
+ return true;
+}
ir_visitor_status
lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
{
+ /* Array copies could involve large amounts of load/store
+ * operations. To improve register pressure we want to special-case
+ * these and split array copies into individual element copies.
+ * This way we avoid emitting all the loads for the RHS first and
+ * all the writes for the LHS second and register usage is more
+ * efficient.
+ */
+ if (check_for_buffer_array_copy(ir))
+ return visit_continue_with_parent;
+
check_ssbo_unsized_array_length_assignment(ir);
check_for_ssbo_store(ir);
return rvalue_visit(ir);