aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2015-08-27 01:46:33 +0100
committerDave Airlie <[email protected]>2015-08-31 11:27:18 +0100
commitaee73f2942eff2ffb4a0497ac81f01a3b00294b8 (patch)
tree8f0a1090e492fce218bf79aea75c8f3684db3dae
parent46968c114003b30be335adccbc30445aca9b5dea (diff)
st/mesa: reduce time spent in calculating temp read/writes
The glsl->tgsi convertor does some temporary register reduction however in profiling shader-db this shows up quite highly, so optimise things to reduce the number of loops through all the instructions we do. This drops merge_registers from 4-5% on the profile to 1%. I think this can be reduced further by possibly optimising the renumber pass. Acked-by: Marek Olšák <[email protected] Signed-off-by: Dave Airlie <[email protected]>
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp153
1 files changed, 79 insertions, 74 deletions
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index db6a5a417ea..9174b41d8c9 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -480,10 +480,9 @@ public:
void simplify_cmp(void);
void rename_temp_register(int index, int new_index);
- int get_first_temp_read(int index);
- int get_first_temp_write(int index);
- int get_last_temp_read(int index);
- int get_last_temp_write(int index);
+ void get_first_temp_read(int *first_reads);
+ void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
+ void get_last_temp_write(int *last_writes);
void copy_propagate(void);
int eliminate_dead_code(void);
@@ -3714,8 +3713,8 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
}
}
-int
-glsl_to_tgsi_visitor::get_first_temp_read(int index)
+void
+glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
{
int depth = 0; /* loop depth */
int loop_start = -1; /* index of the first active BGNLOOP (if any) */
@@ -3723,15 +3722,15 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
for (j = 0; j < num_inst_src_regs(inst); j++) {
- if (inst->src[j].file == PROGRAM_TEMPORARY &&
- inst->src[j].index == index) {
- return (depth == 0) ? i : loop_start;
+ if (inst->src[j].file == PROGRAM_TEMPORARY) {
+ if (first_reads[inst->src[j].index] == -1)
+ first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
}
}
for (j = 0; j < inst->tex_offset_num_offset; j++) {
- if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
- inst->tex_offsets[j].index == index) {
- return (depth == 0) ? i : loop_start;
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
+ if (first_reads[inst->tex_offsets[j].index] == -1)
+ first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
}
}
if (inst->op == TGSI_OPCODE_BGNLOOP) {
@@ -3744,91 +3743,73 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
assert(depth >= 0);
i++;
}
- return -1;
}
-int
-glsl_to_tgsi_visitor::get_first_temp_write(int index)
+void
+glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
{
int depth = 0; /* loop depth */
int loop_start = -1; /* index of the first active BGNLOOP (if any) */
- int i = 0;
- unsigned j;
-
+ unsigned i = 0, j;
+ int k;
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
+ for (j = 0; j < num_inst_src_regs(inst); j++) {
+ if (inst->src[j].file == PROGRAM_TEMPORARY)
+ last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
+ }
for (j = 0; j < num_inst_dst_regs(inst); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
- return (depth == 0) ? i : loop_start;
- }
+ if (inst->dst[j].file == PROGRAM_TEMPORARY)
+ if (first_writes[inst->dst[j].index] == -1)
+ first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
+ }
+ for (j = 0; j < inst->tex_offset_num_offset; j++) {
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
+ last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
}
if (inst->op == TGSI_OPCODE_BGNLOOP) {
if(depth++ == 0)
loop_start = i;
} else if (inst->op == TGSI_OPCODE_ENDLOOP) {
- if (--depth == 0)
+ if (--depth == 0) {
loop_start = -1;
- }
- assert(depth >= 0);
- i++;
- }
- return -1;
-}
-
-int
-glsl_to_tgsi_visitor::get_last_temp_read(int index)
-{
- int depth = 0; /* loop depth */
- int last = -1; /* index of last instruction that reads the temporary */
- unsigned i = 0, j;
-
- foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
- for (j = 0; j < num_inst_src_regs(inst); j++) {
- if (inst->src[j].file == PROGRAM_TEMPORARY &&
- inst->src[j].index == index) {
- last = (depth == 0) ? i : -2;
+ for (k = 0; k < this->next_temp; k++) {
+ if (last_reads[k] == -2) {
+ last_reads[k] = i;
+ }
+ }
}
}
- for (j = 0; j < inst->tex_offset_num_offset; j++) {
- if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
- inst->tex_offsets[j].index == index)
- last = (depth == 0) ? i : -2;
- }
- if (inst->op == TGSI_OPCODE_BGNLOOP)
- depth++;
- else if (inst->op == TGSI_OPCODE_ENDLOOP)
- if (--depth == 0 && last == -2)
- last = i;
assert(depth >= 0);
i++;
}
- assert(last >= -1);
- return last;
}
-int
-glsl_to_tgsi_visitor::get_last_temp_write(int index)
+void
+glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
{
int depth = 0; /* loop depth */
- int last = -1; /* index of last instruction that writes to the temporary */
- int i = 0;
+ int i = 0, k;
unsigned j;
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
for (j = 0; j < num_inst_dst_regs(inst); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index)
- last = (depth == 0) ? i : -2;
+ if (inst->dst[j].file == PROGRAM_TEMPORARY)
+ last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
}
if (inst->op == TGSI_OPCODE_BGNLOOP)
depth++;
else if (inst->op == TGSI_OPCODE_ENDLOOP)
- if (--depth == 0 && last == -2)
- last = i;
+ if (--depth == 0) {
+ for (k = 0; k < this->next_temp; k++) {
+ if (last_writes[k] == -2) {
+ last_writes[k] = i;
+ }
+ }
+ }
assert(depth >= 0);
i++;
}
- assert(last >= -1);
- return last;
}
/*
@@ -4264,9 +4245,10 @@ glsl_to_tgsi_visitor::merge_registers(void)
* into an array so that we don't have to traverse the instruction list as
* much. */
for (i = 0; i < this->next_temp; i++) {
- last_reads[i] = get_last_temp_read(i);
- first_writes[i] = get_first_temp_write(i);
+ last_reads[i] = -1;
+ first_writes[i] = -1;
}
+ get_last_temp_read_first_temp_write(last_reads, first_writes);
/* Start looking for registers with non-overlapping usages that can be
* merged together. */
@@ -4307,15 +4289,21 @@ glsl_to_tgsi_visitor::renumber_registers(void)
{
int i = 0;
int new_index = 0;
+ int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
+
+ for (i = 0; i < this->next_temp; i++)
+ first_reads[i] = -1;
+ get_first_temp_read(first_reads);
for (i = 0; i < this->next_temp; i++) {
- if (get_first_temp_read(i) < 0) continue;
+ if (first_reads[i] < 0) continue;
if (i != new_index)
rename_temp_register(i, new_index);
new_index++;
}
this->next_temp = new_index;
+ ralloc_free(first_reads);
}
/**
@@ -5790,14 +5778,31 @@ get_mesa_program(struct gl_context *ctx,
#if 0
/* Print out some information (for debugging purposes) used by the
* optimization passes. */
- for (i = 0; i < v->next_temp; i++) {
- int fr = v->get_first_temp_read(i);
- int fw = v->get_first_temp_write(i);
- int lr = v->get_last_temp_read(i);
- int lw = v->get_last_temp_write(i);
-
- printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
- assert(fw <= fr);
+ {
+ int i;
+ int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
+ int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
+ int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
+ int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
+
+ for (i = 0; i < v->next_temp; i++) {
+ first_writes[i] = -1;
+ first_reads[i] = -1;
+ last_writes[i] = -1;
+ last_reads[i] = -1;
+ }
+ v->get_first_temp_read(first_reads);
+ v->get_last_temp_read_first_temp_write(last_reads, first_writes);
+ v->get_last_temp_write(last_writes);
+ for (i = 0; i < v->next_temp; i++)
+ printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
+ first_writes[i],
+ last_reads[i],
+ last_writes[i]);
+ ralloc_free(first_writes);
+ ralloc_free(first_reads);
+ ralloc_free(last_writes);
+ ralloc_free(last_reads);
}
#endif