summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-11-30 12:19:38 -0800
committerEric Anholt <[email protected]>2016-11-30 19:58:09 -0800
commit6c34084d8eafff3a764cd1ad49afacd211470f7b (patch)
tree49c8fa14aa8360196a45abc3bdd885a517796e9b /src
parentd182740ac8bf2fc3aefe331235bfe72bbdd9987d (diff)
vc4: Try to schedule QIR instructions between writing to and reading math.
This helps us get the delay slots between SFU writes and reads filled. total instructions in shared programs: 94494 -> 93970 (-0.55%) instructions in affected programs: 59206 -> 58682 (-0.89%) 3DMMES performance +1.89967% +/- 0.157611% (n=10,9)
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index ea48a858337..89e6d1d0d60 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -569,6 +569,28 @@ latency_between(struct schedule_node *before, struct schedule_node *after)
after->inst->op == QOP_TEX_RESULT)
return 100;
+ switch (before->inst->op) {
+ case QOP_RCP:
+ case QOP_RSQ:
+ case QOP_EXP2:
+ case QOP_LOG2:
+ for (int i = 0; i < qir_get_nsrc(after->inst); i++) {
+ if (after->inst->src[i].file ==
+ before->inst->dst.file &&
+ after->inst->src[i].index ==
+ before->inst->dst.index) {
+ /* There are two QPU delay slots before we can
+ * read a math result, which could be up to 4
+ * QIR instructions if they packed well.
+ */
+ return 4;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
return 1;
}