summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2015-03-29 21:26:16 -0700
committerEric Anholt <[email protected]>2015-03-30 09:23:39 -0700
commitc519c4d85e7b4f9cad4e51dc08e8ae99bf3c810d (patch)
tree8a2e3dd9b37453f23a4cd49b4cc08891d65506d8 /src
parent5df8bf86fe40ae95ad3888cb167ce80c710af227 (diff)
vc4: Don't bother masking out the low 24 bits for integer multiplies
The hardware just uses the low 24 lines, saving us an AND to drop the high bits. total uniforms in shared programs: 13433 -> 13423 (-0.07%) uniforms in affected programs: 356 -> 346 (-2.81%) total instructions in shared programs: 40003 -> 39989 (-0.03%) instructions in affected programs: 910 -> 896 (-1.54%)
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c20
1 files changed, 8 insertions, 12 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 49b94666b88..9e145e54ccd 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -366,18 +366,14 @@ tgsi_to_qir_umul(struct vc4_compile *c,
struct tgsi_full_instruction *tgsi_inst,
enum qop op, struct qreg *src, int i)
{
- struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
- qir_uniform_ui(c, 24));
- struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
- qir_uniform_ui(c, 0xffffff));
- struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
- qir_uniform_ui(c, 24));
- struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
- qir_uniform_ui(c, 0xffffff));
-
- struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
- struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
- struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
+ struct qreg src0 = src[0 * 4 + i];
+ struct qreg src0_hi = qir_SHR(c, src0, qir_uniform_ui(c, 24));
+ struct qreg src1 = src[1 * 4 + i];
+ struct qreg src1_hi = qir_SHR(c, src1, qir_uniform_ui(c, 24));
+
+ struct qreg hilo = qir_MUL24(c, src0_hi, src1);
+ struct qreg lohi = qir_MUL24(c, src0, src1_hi);
+ struct qreg lolo = qir_MUL24(c, src0, src1);
return qir_ADD(c, lolo, qir_SHL(c,
qir_ADD(c, hilo, lohi),