diff options
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 0046b2262da..66dff974a71 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -313,6 +313,25 @@ tgsi_to_qir_rcp(struct vc4_compile *c, } static struct qreg +tgsi_to_qir_rsq(struct vc4_compile *c, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qreg x = src[0 * 4 + 0]; + struct qreg r = qir_RSQ(c, x); + + /* Apply a Newton-Raphson step to improve the accuracy. */ + r = qir_FMUL(c, r, qir_FSUB(c, + qir_uniform_f(c, 1.5), + qir_FMUL(c, + qir_uniform_f(c, 0.5), + qir_FMUL(c, x, + qir_FMUL(c, r, r))))); + + return r; +} + +static struct qreg qir_srgb_decode(struct vc4_compile *c, struct qreg srgb) { struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92)); @@ -1165,7 +1184,6 @@ emit_tgsi_instruction(struct vc4_compile *c, [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv }, [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg }, - [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu }, [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq }, [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne }, [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge }, @@ -1182,7 +1200,7 @@ emit_tgsi_instruction(struct vc4_compile *c, [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp }, [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad }, [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp }, - [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar }, + [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq }, [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar }, [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar }, [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp }, |