summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-10-17 15:28:02 +0100
committerEric Anholt <[email protected]>2014-10-18 10:08:59 +0100
commit15eb4c59f6504473989e6a064fda11d6c009ed8f (patch)
treed4f83ab6ee983003142a2c1c5ae956c064fb0c10 /src/gallium
parent1fc124b80f228319ded06f80a51681c75dc0a4f3 (diff)
vc4: Apply a Newton-Raphson step to improve RSQ
Fixes all the piglit built-in-functions/*sqrt tests, among others.
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c22
1 files changed, 20 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 0046b2262da..66dff974a71 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -313,6 +313,25 @@ tgsi_to_qir_rcp(struct vc4_compile *c,
}
static struct qreg
+tgsi_to_qir_rsq(struct vc4_compile *c,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+ struct qreg x = src[0 * 4 + 0];
+ struct qreg r = qir_RSQ(c, x);
+
+ /* Apply a Newton-Raphson step to improve the accuracy. */
+ r = qir_FMUL(c, r, qir_FSUB(c,
+ qir_uniform_f(c, 1.5),
+ qir_FMUL(c,
+ qir_uniform_f(c, 0.5),
+ qir_FMUL(c, x,
+ qir_FMUL(c, r, r)))));
+
+ return r;
+}
+
+static struct qreg
qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
{
struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92));
@@ -1165,7 +1184,6 @@ emit_tgsi_instruction(struct vc4_compile *c,
[TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
[TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
- [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
[TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
[TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
[TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
@@ -1182,7 +1200,7 @@ emit_tgsi_instruction(struct vc4_compile *c,
[TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
[TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
[TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp },
- [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar },
+ [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq },
[TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar },
[TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar },
[TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },