diff options
author | Eric Anholt <[email protected]> | 2014-10-17 14:01:15 +0100 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2014-10-18 10:08:59 +0100 |
commit | 1fc124b80f228319ded06f80a51681c75dc0a4f3 (patch) | |
tree | 2fb51ea58ba7611d2467de209b9de53e98386d32 /src | |
parent | 0fdc5111b4e659de8258ae8f3eb8e33ef466beb3 (diff) |
vc4: Apply a Newton-Raphson step to improve RCP.
Fixes all the piglit floating-point *-op-div tests, among others.
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 70a2b867ad8..0046b2262da 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -297,6 +297,22 @@ tgsi_to_qir_scalar(struct vc4_compile *c, } static struct qreg +tgsi_to_qir_rcp(struct vc4_compile *c, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qreg x = src[0 * 4 + 0]; + struct qreg r = qir_RCP(c, x); + + /* Apply a Newton-Raphson step to improve the accuracy. */ + r = qir_FMUL(c, r, qir_FSUB(c, + qir_uniform_f(c, 2.0), + qir_FMUL(c, x, r))); + + return r; +} + +static struct qreg qir_srgb_decode(struct vc4_compile *c, struct qreg srgb) { struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92)); @@ -1165,7 +1181,7 @@ emit_tgsi_instruction(struct vc4_compile *c, [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp }, [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad }, - [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_scalar }, + [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp }, [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar }, [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar }, [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar }, |