summaryrefslogtreecommitdiffstats
path: root/src/broadcom/compiler/qpu_schedule.c
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2018-07-20 14:27:09 -0700
committerEric Anholt <[email protected]>2018-07-23 10:21:43 -0700
commit6b73a97f84f86f4c3d3bbbbadf963c20b8e52b57 (patch)
tree6c8701d6c84c1478f7e829a08e5bb141777846e5 /src/broadcom/compiler/qpu_schedule.c
parent79e0f042bcd6a1956015cf332c2232ade2c2321f (diff)
v3d: Implement a small immediates optimization, based on VC4's.
We can do one per instruction, and we have to be careful not to overwrite raddr_b, but this greatly reduces the pressure on uniform loads (particularly around ldvpm/stvpm instructions). total instructions in shared programs: 90768 -> 88220 (-2.81%) instructions in affected programs: 82711 -> 80163 (-3.08%)
Diffstat (limited to 'src/broadcom/compiler/qpu_schedule.c')
-rw-r--r--src/broadcom/compiler/qpu_schedule.c3
1 files changed, 2 insertions, 1 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index af0b9b86b1c..441b6327825 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -670,7 +670,8 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_B)) {
if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_B) &&
- a->raddr_b != b->raddr_b) {
+ (a->raddr_b != b->raddr_b ||
+ a->sig.small_imm != b->sig.small_imm)) {
return false;
}
merge.raddr_b = b->raddr_b;