summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4/vc4_qir.h
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-11-21 23:52:37 -0800
committerEric Anholt <[email protected]>2016-11-22 16:46:03 -0800
commit414dbb2d5c48b7e9dc0dc8b14583f91415ca3960 (patch)
tree51ddf15c8fb26855552a9455568d5eecf4b42c71 /src/gallium/drivers/vc4/vc4_qir.h
parent1f0ba902f0ef87e182e50b7e19a406b0936f5293 (diff)
vc4: Don't conditionalize the src1 mov of qir_SEL().
My thought in having both arguments conditionally moved was that it should theoretically save some power by not doing work in those channels. However, it ends up costing us instructions because we can't register-coalesce the first of the MOVs, and it also introduces extra scheduling dependencies. The instruction cost would swamp whatever power benefit I was hoping for. shader-db results: total instructions in shared programs: 100548 -> 99741 (-0.80%) instructions in affected programs: 42450 -> 41643 (-1.90%) With obvious outliers removed (I had an X11 emacs running over the network in the "after" case), 3DMMES Taiji showed 1.07231% +/- 0.488241% fps improvement (n=18, 30).
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_qir.h')
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h6
1 files changed, 2 insertions, 4 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index eebfdf047df..ba3fbd7b283 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -757,10 +757,8 @@ static inline struct qreg
qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1)
{
struct qreg t = qir_get_temp(c);
- struct qinst *a = qir_MOV_dest(c, t, src0);
- struct qinst *b = qir_MOV_dest(c, t, src1);
- a->cond = cond;
- b->cond = qpu_cond_complement(cond);
+ qir_MOV_dest(c, t, src1);
+ qir_MOV_dest(c, t, src0)->cond = cond;
return t;
}