aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4/vc4_qir_schedule.c
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-11-30 11:52:06 -0800
committerEric Anholt <[email protected]>2016-11-30 19:58:09 -0800
commitd182740ac8bf2fc3aefe331235bfe72bbdd9987d (patch)
treedea5aa69162b710cb1fc1d2ab7577e0e5fd4acc1 /src/gallium/drivers/vc4/vc4_qir_schedule.c
parent1f9daf7cd1cf3d31571776a0142e9e339168a8df (diff)
vc4: Improve interleaving of texture coordinates vs results.
The latency_between was trying to handle the delay between the coordinate write ("before") and the corresponding sample read ("after"), but we were handing in the two instructions swapped. This meant that we tried to fit things between a tex_s and its *preceding* tex_result. This made us only interleave normal texture coordinates by accident, and pessimized UBO reads by pushing the tex_result collection earlier until there was nothing but it (and then its preceding coordinate setup) left. In addition to latency reduction, things end up packing better (probably due to reduced live ranges of the texture results): total instructions in shared programs: 98121 -> 94775 (-3.41%) instructions in affected programs: 91196 -> 87850 (-3.67%) 3DMMES performance +1.15569% +/- 0.124714% (n=8,10)
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_qir_schedule.c')
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index a8ef189e583..ea48a858337 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -593,7 +593,7 @@ compute_delay(struct schedule_node *n)
compute_delay(n->children[i]);
n->delay = MAX2(n->delay,
n->children[i]->delay +
- latency_between(n, n->children[i]));
+ latency_between(n->children[i], n));
}
}
}
@@ -644,8 +644,8 @@ schedule_instructions(struct vc4_compile *c,
child->unblocked_time = MAX2(child->unblocked_time,
state->time +
- latency_between(chosen,
- child));
+ latency_between(child,
+ chosen));
child->parent_count--;
if (child->parent_count == 0)
list_add(&child->link, &state->worklist);