diff options
author | Eric Anholt <[email protected]> | 2015-03-29 21:26:16 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2015-03-30 09:23:39 -0700 |
commit | c519c4d85e7b4f9cad4e51dc08e8ae99bf3c810d (patch) | |
tree | 8a2e3dd9b37453f23a4cd49b4cc08891d65506d8 /src/gallium | |
parent | 5df8bf86fe40ae95ad3888cb167ce80c710af227 (diff) |
vc4: Don't bother masking out the low 24 bits for integer multiplies
The hardware just uses the low 24 lines, saving us an AND to drop the high
bits.
total uniforms in shared programs: 13433 -> 13423 (-0.07%)
uniforms in affected programs: 356 -> 346 (-2.81%)
total instructions in shared programs: 40003 -> 39989 (-0.03%)
instructions in affected programs: 910 -> 896 (-1.54%)
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 20 |
1 files changed, 8 insertions, 12 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 49b94666b88..9e145e54ccd 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -366,18 +366,14 @@ tgsi_to_qir_umul(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst, enum qop op, struct qreg *src, int i) { - struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i], - qir_uniform_ui(c, 24)); - struct qreg src0_lo = qir_AND(c, src[0 * 4 + i], - qir_uniform_ui(c, 0xffffff)); - struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i], - qir_uniform_ui(c, 24)); - struct qreg src1_lo = qir_AND(c, src[1 * 4 + i], - qir_uniform_ui(c, 0xffffff)); - - struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo); - struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi); - struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo); + struct qreg src0 = src[0 * 4 + i]; + struct qreg src0_hi = qir_SHR(c, src0, qir_uniform_ui(c, 24)); + struct qreg src1 = src[1 * 4 + i]; + struct qreg src1_hi = qir_SHR(c, src1, qir_uniform_ui(c, 24)); + + struct qreg hilo = qir_MUL24(c, src0_hi, src1); + struct qreg lohi = qir_MUL24(c, src0, src1_hi); + struct qreg lolo = qir_MUL24(c, src0, src1); return qir_ADD(c, lolo, qir_SHL(c, qir_ADD(c, hilo, lohi), |