diff options
author | Samuel Pitoiset <[email protected]> | 2017-05-04 10:35:29 +0200 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2017-05-05 09:48:01 +0200 |
commit | 92ab06e782c31fe0209e5d0181967a2ff6739c9b (patch) | |
tree | 3b7a6872549b649428989e665a2bc9a0814cf7c2 | |
parent | 7761cf6d01e97aeb80606e51c11e4885a278ed54 (diff) |
st/glsl_to_tgsi: fix renumber_registers() in presence of dead code
The TGSI DCE pass doesn't eliminate dead assignments like
MOV TEMP[0], TEMP[1] in presence of loops because it assumes
that the visitor doesn't emit dead code. This assumption is
actually wrong and this situation happens.
However, it appears that the merge_registers() pass accidentally
takes care of this for some weird reasons. But since this pass has
been disabled for RadeonSI and Nouveau, the renumber_registers()
pass which is called *after*, can't do its job correctly.
This is because it assumes that no dead code is present. But if
there is still a dead assignment, it might re-use the TEMP
register id incorrectly and emits wrong code.
This patches fixes the issue by recording writes instead of reads,
and this has the advantage to be faster.
This should fix Unigine Heaven on RadeonSI and Nouveau.
shader-db results with RadeonSI:
47109 shaders in 29632 tests
Totals:
SGPRS: 1923308 -> 1923316 (0.00 %)
VGPRS: 1133843 -> 1133847 (0.00 %)
Spilled SGPRs: 2516 -> 2518 (0.08 %)
Spilled VGPRs: 65 -> 65 (0.00 %)
Private memory VGPRs: 1184 -> 1184 (0.00 %)
Scratch size: 1308 -> 1308 (0.00 %) dwords per thread
Code Size: 60095968 -> 60096256 (0.00 %) bytes
LDS: 1077 -> 1077 (0.00 %) blocks
Max Waves: 431889 -> 431889 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
It's still interesting to disable the merge_registers() pass.
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 39 |
1 files changed, 34 insertions, 5 deletions
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 9858673ff44..81c1d00dfbc 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -559,6 +559,7 @@ public: void rename_temp_registers(int num_renames, struct rename_reg_pair *renames); void get_first_temp_read(int *first_reads); + void get_first_temp_write(int *first_writes); void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); void get_last_temp_write(int *last_writes); @@ -4759,6 +4760,33 @@ glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_p } void +glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + unsigned i = 0, j; + + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { + for (j = 0; j < num_inst_dst_regs(inst); j++) { + if (inst->dst[j].file == PROGRAM_TEMPORARY) { + if (first_writes[inst->dst[j].index] == -1) + first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; + } + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + i++; + } +} + +void glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) { int depth = 0; /* loop depth */ @@ -5347,16 +5375,17 @@ glsl_to_tgsi_visitor::renumber_registers(void) { int i = 0; int new_index = 0; - int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp); + int *first_writes = ralloc_array(mem_ctx, int, this->next_temp); struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); int num_renames = 0; + for (i = 0; i < this->next_temp; i++) { - first_reads[i] = -1; + first_writes[i] = -1; } - get_first_temp_read(first_reads); + get_first_temp_write(first_writes); for (i = 0; i < this->next_temp; i++) { - if (first_reads[i] < 0) continue; + if (first_writes[i] < 0) continue; if (i != new_index) { renames[num_renames].old_reg = i; renames[num_renames].new_reg = new_index; @@ -5368,7 +5397,7 @@ glsl_to_tgsi_visitor::renumber_registers(void) rename_temp_registers(num_renames, renames); this->next_temp = new_index; ralloc_free(renames); - ralloc_free(first_reads); + ralloc_free(first_writes); } /* ------------------------- TGSI conversion stuff -------------------------- */ |