aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGert Wollny <[email protected]>2018-06-05 22:26:36 +0200
committerGert Wollny <[email protected]>2018-08-11 12:32:42 +0200
commit568bda2f2d3a3c1bef172563941d36d35a91b5b7 (patch)
tree864c0e06221ce2c8139b4153d5633cdfe6c0060c
parentb1cead3add3eb47be2505430051c51f5a7a803b8 (diff)
mesa/st/glsl_to_tgsi: Split arrays whose elements are only accessed directly
Array whose elements are only accessed directly are replaced by the according number of temporary registers. By doing so the otherwise reserved register range becomes subject to further optimizations like copy propagation and register merging. Thanks to the resulting reduced register pressure this patch makes the piglits spec/glsl-1.50/execution - variable-indexing/vs-output-array-vec3-index-wr-before-gs geometry/max-input-components pass on r600 (barts) where they would fail before with a "GPR limit exceeded" error (even with the spilling that was recently added). v2: * rename method dissolve_arrays to split_arrays * unify the tracking and remapping methods for src and dst registers * also track access to arrays via reladdr* v3: * enable this optimization only if the driver requests register merge v4: * Correct comments * Also update inst->resource if it is an array element (thanks: Benedikt Schemmer for testing the patches on radeonsi, which revealed that I was missing tracking this) Signed-off-by: Gert Wollny <[email protected]> Acked-by: Dave Airlie <[email protected]>
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp113
1 files changed, 112 insertions, 1 deletions
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index a865379d703..2d8913541f2 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -383,6 +383,7 @@ public:
void copy_propagate(void);
int eliminate_dead_code(void);
+ void split_arrays(void);
void merge_two_dsts(void);
void merge_registers(void);
void renumber_registers(void);
@@ -5483,6 +5484,107 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
}
}
+template <typename st_reg>
+void test_indirect_access(const st_reg& reg, bool *has_indirect_access)
+{
+ if (reg.file == PROGRAM_ARRAY) {
+ if (reg.reladdr || reg.reladdr2 || reg.has_index2) {
+ has_indirect_access[reg.array_id] = true;
+ if (reg.reladdr)
+ test_indirect_access(*reg.reladdr, has_indirect_access);
+ if (reg.reladdr2)
+ test_indirect_access(*reg.reladdr, has_indirect_access);
+ }
+ }
+}
+
+template <typename st_reg>
+void remap_array(st_reg& reg, const int *array_remap_info,
+ const bool *has_indirect_access)
+{
+ if (reg.file == PROGRAM_ARRAY) {
+ if (!has_indirect_access[reg.array_id]) {
+ reg.file = PROGRAM_TEMPORARY;
+ reg.index = reg.index + array_remap_info[reg.array_id];
+ reg.array_id = 0;
+ } else {
+ reg.array_id = array_remap_info[reg.array_id];
+ }
+
+ if (reg.reladdr)
+ remap_array(*reg.reladdr, array_remap_info, has_indirect_access);
+
+ if (reg.reladdr2)
+ remap_array(*reg.reladdr2, array_remap_info, has_indirect_access);
+ }
+}
+
+/* One-dimensional arrays whose elements are only accessed directly are
+ * replaced by an according set of temporary registers that then can become
+ * subject to further optimization steps like copy propagation and
+ * register merging.
+ */
+void
+glsl_to_tgsi_visitor::split_arrays(void)
+{
+ if (!next_array)
+ return;
+
+ bool *has_indirect_access = rzalloc_array(mem_ctx, bool, next_array + 1);
+
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
+ for (unsigned j = 0; j < num_inst_src_regs(inst); j++)
+ test_indirect_access(inst->src[j], has_indirect_access);
+
+ for (unsigned j = 0; j < inst->tex_offset_num_offset; j++)
+ test_indirect_access(inst->tex_offsets[j], has_indirect_access);
+
+ for (unsigned j = 0; j < num_inst_dst_regs(inst); j++)
+ test_indirect_access(inst->dst[j], has_indirect_access);
+
+ test_indirect_access(inst->resource, has_indirect_access);
+ }
+
+ unsigned array_offset = 0;
+ unsigned n_remaining_arrays = 0;
+
+ /* Double use: For arrays that get split this value will contain
+ * the base index of the temporary registers this array is replaced
+ * with. For arrays that remain it contains the new array ID.
+ */
+ int *array_remap_info = rzalloc_array(has_indirect_access, int,
+ next_array + 1);
+
+ for (unsigned i = 1; i <= next_array; ++i) {
+ if (!has_indirect_access[i]) {
+ array_remap_info[i] = this->next_temp + array_offset;
+ array_offset += array_sizes[i - 1];
+ } else {
+ array_sizes[n_remaining_arrays] = array_sizes[i-1];
+ array_remap_info[i] = ++n_remaining_arrays;
+ }
+ }
+
+ if (next_array != n_remaining_arrays) {
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
+ for (unsigned j = 0; j < num_inst_src_regs(inst); j++)
+ remap_array(inst->src[j], array_remap_info, has_indirect_access);
+
+ for (unsigned j = 0; j < inst->tex_offset_num_offset; j++)
+ remap_array(inst->tex_offsets[j], array_remap_info, has_indirect_access);
+
+ for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
+ remap_array(inst->dst[j], array_remap_info, has_indirect_access);
+ }
+ remap_array(inst->resource, array_remap_info, has_indirect_access);
+ }
+ }
+
+ ralloc_free(has_indirect_access);
+ this->next_temp += array_offset;
+ next_array = n_remaining_arrays;
+}
+
/* Merges temporary registers together where possible to reduce the number of
* registers needed to run a program.
*
@@ -6981,8 +7083,17 @@ get_mesa_program_tgsi(struct gl_context *ctx,
while (v->eliminate_dead_code());
v->merge_two_dsts();
- if (!skip_merge_registers)
+
+ if (!skip_merge_registers) {
+ v->split_arrays();
+ v->copy_propagate();
+ while (v->eliminate_dead_code());
+
v->merge_registers();
+ v->copy_propagate();
+ while (v->eliminate_dead_code());
+ }
+
v->renumber_registers();
/* Write the END instruction. */