summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2017-05-04 10:35:29 +0200
committerSamuel Pitoiset <[email protected]>2017-05-05 09:48:01 +0200
commit92ab06e782c31fe0209e5d0181967a2ff6739c9b (patch)
tree3b7a6872549b649428989e665a2bc9a0814cf7c2
parent7761cf6d01e97aeb80606e51c11e4885a278ed54 (diff)
st/glsl_to_tgsi: fix renumber_registers() in presence of dead code
The TGSI DCE pass doesn't eliminate dead assignments like MOV TEMP[0], TEMP[1] in presence of loops because it assumes that the visitor doesn't emit dead code. This assumption is actually wrong and this situation happens. However, it appears that the merge_registers() pass accidentally takes care of this for some weird reasons. But since this pass has been disabled for RadeonSI and Nouveau, the renumber_registers() pass which is called *after*, can't do its job correctly. This is because it assumes that no dead code is present. But if there is still a dead assignment, it might re-use the TEMP register id incorrectly and emits wrong code. This patches fixes the issue by recording writes instead of reads, and this has the advantage to be faster. This should fix Unigine Heaven on RadeonSI and Nouveau. shader-db results with RadeonSI: 47109 shaders in 29632 tests Totals: SGPRS: 1923308 -> 1923316 (0.00 %) VGPRS: 1133843 -> 1133847 (0.00 %) Spilled SGPRs: 2516 -> 2518 (0.08 %) Spilled VGPRs: 65 -> 65 (0.00 %) Private memory VGPRs: 1184 -> 1184 (0.00 %) Scratch size: 1308 -> 1308 (0.00 %) dwords per thread Code Size: 60095968 -> 60096256 (0.00 %) bytes LDS: 1077 -> 1077 (0.00 %) blocks Max Waves: 431889 -> 431889 (0.00 %) Wait states: 0 -> 0 (0.00 %) It's still interesting to disable the merge_registers() pass. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp39
1 files changed, 34 insertions, 5 deletions
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 9858673ff44..81c1d00dfbc 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -559,6 +559,7 @@ public:
void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
void get_first_temp_read(int *first_reads);
+ void get_first_temp_write(int *first_writes);
void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
void get_last_temp_write(int *last_writes);
@@ -4759,6 +4760,33 @@ glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_p
}
void
+glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes)
+{
+ int depth = 0; /* loop depth */
+ int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+ unsigned i = 0, j;
+
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
+ for (j = 0; j < num_inst_dst_regs(inst); j++) {
+ if (inst->dst[j].file == PROGRAM_TEMPORARY) {
+ if (first_writes[inst->dst[j].index] == -1)
+ first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
+ }
+ }
+
+ if (inst->op == TGSI_OPCODE_BGNLOOP) {
+ if(depth++ == 0)
+ loop_start = i;
+ } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
+ if (--depth == 0)
+ loop_start = -1;
+ }
+ assert(depth >= 0);
+ i++;
+ }
+}
+
+void
glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
{
int depth = 0; /* loop depth */
@@ -5347,16 +5375,17 @@ glsl_to_tgsi_visitor::renumber_registers(void)
{
int i = 0;
int new_index = 0;
- int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
+ int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
int num_renames = 0;
+
for (i = 0; i < this->next_temp; i++) {
- first_reads[i] = -1;
+ first_writes[i] = -1;
}
- get_first_temp_read(first_reads);
+ get_first_temp_write(first_writes);
for (i = 0; i < this->next_temp; i++) {
- if (first_reads[i] < 0) continue;
+ if (first_writes[i] < 0) continue;
if (i != new_index) {
renames[num_renames].old_reg = i;
renames[num_renames].new_reg = new_index;
@@ -5368,7 +5397,7 @@ glsl_to_tgsi_visitor::renumber_registers(void)
rename_temp_registers(num_renames, renames);
this->next_temp = new_index;
ralloc_free(renames);
- ralloc_free(first_reads);
+ ralloc_free(first_writes);
}
/* ------------------------- TGSI conversion stuff -------------------------- */