diff options
author | Eric Anholt <[email protected]> | 2008-02-06 11:34:14 -0800 |
---|---|---|
committer | Xiang, Haihao <[email protected]> | 2008-06-10 14:22:36 +0800 |
commit | db5f206c002c2306ca0f138198fca22e454ea4f9 (patch) | |
tree | d1d0ee706653a8837f7462070752c39304e3cf37 /src | |
parent | 9dface8347773bc7de27c93cddda05b32d2c6b81 (diff) |
[915] Fix fp SIN function, and use a quadratic approximation instead of Taylor.
The Taylor series notably fails at producing sin(pi) == 0, which leads to
discontinuity every 2*pi. The quadratic gets us sin(pi) == 0 behavior, at the
expense of going from 2.4% THD with working Taylor series to 3.8% THD (easily
seen on comparative graphs of the two). However, our previous implementation
was producing sin(pi) < -1 and worse, so any reasonable approximation is an
improvement. This also fixes the repeating behavior, where the previous
implementation would repeat sin(x) for x>pi as sin(x % pi) and the opposite
for x < -pi.
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i915/i915_fragprog.c | 108 |
1 files changed, 56 insertions, 52 deletions
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index c43824871d7..7e690b413bc 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -42,7 +42,12 @@ #include "program.h" #include "programopt.h" - +static const GLfloat sin_quad_constants[4] = { + 4.0, + -4.0, + 2.0, + -1.0 +}; /* 1, -1/3!, 1/5!, -1/7! */ static const GLfloat sin_constants[4] = { 1.0, @@ -269,7 +274,7 @@ static void upload_program( struct i915_fragment_program *p ) while (1) { GLuint src0, src1, src2, flags; - GLuint tmp = 0; + GLuint tmp = 0, consts = 0; switch (inst->Opcode) { case OPCODE_ABS: @@ -638,62 +643,61 @@ static void upload_program( struct i915_fragment_program *p ) break; case OPCODE_SIN: - src0 = src_vector( p, &inst->SrcReg[0], program); - tmp = i915_get_utemp( p ); - - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - src0, - i915_emit_const1f(p, 1.0/(M_PI * 2)), - 0); - - i915_emit_arith( p, - A0_MOD, + src0 = src_vector(p, &inst->SrcReg[0], program); + tmp = i915_get_utemp(p); + consts = i915_emit_const4fv(p, sin_quad_constants); + + /* Reduce range from repeating about [-pi,pi] to [-1,1] */ + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_X, 0, + src0, + i915_emit_const1f(p, 1.0 / (2.0 * M_PI)), + i915_emit_const1f(p, .5)); + i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + i915_emit_arith(p, + A0_MAD, tmp, A0_DEST_CHANNEL_X, 0, - tmp, - 0, 0 ); - - /* By choosing different taylor constants, could get rid of this mul: + tmp, + swizzle(consts, Z, ZERO, ZERO, ZERO), /* 2 */ + swizzle(consts, W, ZERO, ZERO, ZERO)); /* -1 */ + /* Compute sin using a quadratic. While it has increased total + * error over the range, it does give continuity that the 4-component + * Taylor series lacks when repeating the range due to its + * sin(PI) != 0 behavior. + * + * The idea was described at: + * http://www.devmaster.net/forums/showthread.php?t=5784 + * + * If we're concerned about the error of this approximation, we should + * probably incorporate a second pass to include a x**4 factor. */ - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, - i915_emit_const1f(p, (M_PI * 2)), + /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ + i915_emit_arith(p, + A0_MAX, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); - /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 - * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x - * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x - * result = DP4 t1.wzyx, sin_constants - */ - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XY, 0, - swizzle(tmp, X,X,ONE,ONE), - swizzle(tmp, X,ONE,ONE,ONE), 0); - - i915_emit_arith( p, + /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ + i915_emit_arith(p, A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X,Y,X,Y), - swizzle(tmp, X,X,ONE,ONE), 0); + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + tmp, + 0); - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X,Y,Y,W), - swizzle(tmp, X,Z,ONE,ONE), 0); - - i915_emit_arith( p, - A0_DP4, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(tmp, W, Z, Y, X ), - i915_emit_const4fv( p, sin_constants ), 0); - break; + /* result = tmp.xy DP sin_quad_constants.xy */ + i915_emit_arith(p, + A0_DP3, + get_result_vector(p, inst), + get_result_flags(inst), 0, + tmp, + swizzle(i915_emit_const4fv(p, sin_quad_constants), + X, Y, ZERO, ZERO), + 0); + break; case OPCODE_SLT: EMIT_2ARG_ARITH( A0_SLT ); |