[915] Fix fp SIN function, and use a quadratic approximation instead of Taylor.

The Taylor series notably fails at producing sin(pi) == 0, which leads to discontinuity every 2*pi. The quadratic gets us sin(pi) == 0 behavior, at the expense of going from 2.4% THD with working Taylor series to 3.8% THD (easily seen on comparative graphs of the two). However, our previous implementation was producing sin(pi) < -1 and worse, so any reasonable approximation is an improvement. This also fixes the repeating behavior, where the previous implementation would repeat sin(x) for x>pi as sin(x % pi) and the opposite for x < -pi.
author: Eric Anholt <[email protected]> 2008-02-06 11:34:14 -0800
committer: Xiang, Haihao <[email protected]> 2008-06-10 14:22:36 +0800
commit: db5f206c002c2306ca0f138198fca22e454ea4f9 (patch)
tree: d1d0ee706653a8837f7462070752c39304e3cf37 /src
parent: 9dface8347773bc7de27c93cddda05b32d2c6b81 (diff)
1 files changed, 56 insertions, 52 deletions
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index c43824871d7..7e690b413bc 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -42,7 +42,12 @@
 #include "program.h"
 #include "programopt.h"
 
-
+static const GLfloat sin_quad_constants[4] = {
+   4.0,
+   -4.0,
+   2.0,
+   -1.0
+};
 
 /* 1, -1/3!, 1/5!, -1/7! */
 static const GLfloat sin_constants[4] = {  1.0, 
@@ -269,7 +274,7 @@ static void upload_program( struct i915_fragment_program *p )
 
    while (1) {
       GLuint src0, src1, src2, flags;
-      GLuint tmp = 0;
+      GLuint tmp = 0, consts = 0;
 
       switch (inst->Opcode) {
       case OPCODE_ABS: 
@@ -638,62 +643,61 @@ static void upload_program( struct i915_fragment_program *p )
 	 break;
 
       case OPCODE_SIN:
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 tmp = i915_get_utemp( p );
-
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 src0, 
-			 i915_emit_const1f(p, 1.0/(M_PI * 2)),
-			 0);
-
-	 i915_emit_arith( p, 
-			 A0_MOD,
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+	 consts = i915_emit_const4fv(p, sin_quad_constants);
+
+	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         src0,
+			 i915_emit_const1f(p, 1.0 / (2.0 * M_PI)),
+			 i915_emit_const1f(p, .5));
+         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+	 i915_emit_arith(p,
+			 A0_MAD,
 			 tmp, A0_DEST_CHANNEL_X, 0,
-			 tmp, 
-			 0, 0 );
-
-	 /* By choosing different taylor constants, could get rid of this mul:
+			 tmp,
+			 swizzle(consts, Z, ZERO, ZERO, ZERO), /* 2 */
+			 swizzle(consts, W, ZERO, ZERO, ZERO)); /* -1 */
+	 /* Compute sin using a quadratic.  While it has increased total
+	  * error over the range, it does give continuity that the 4-component
+	  * Taylor series lacks when repeating the range due to its
+	  * sin(PI) != 0 behavior.
+	  *
+	  * The idea was described at:
+	  * http://www.devmaster.net/forums/showthread.php?t=5784
+	  *
+	  * If we're concerned about the error of this approximation, we should
+	  * probably incorporate a second pass to include a x**4 factor.
 	  */
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 tmp, 
-			 i915_emit_const1f(p, (M_PI * 2)),
+	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
 			 0);
 
-	 /* 
-	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
-	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
-	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
-	  * result = DP4 t1.wzyx, sin_constants
-	  */
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_XY, 0,
-			 swizzle(tmp, X,X,ONE,ONE), 
-			 swizzle(tmp, X,ONE,ONE,ONE), 0);
-
-	 i915_emit_arith( p, 
+	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
+	 i915_emit_arith(p,
 			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_ALL, 0,
-			 swizzle(tmp, X,Y,X,Y), 
-			 swizzle(tmp, X,X,ONE,ONE), 0);
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 tmp,
+			 0);
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_ALL, 0,
-			 swizzle(tmp, X,Y,Y,W), 
-			 swizzle(tmp, X,Z,ONE,ONE), 0);
-	    
-	 i915_emit_arith( p, 
-			 A0_DP4,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(tmp, W, Z, Y, X ),
-			 i915_emit_const4fv( p, sin_constants ), 0);
-	 break;
+	 /* result = tmp.xy DP sin_quad_constants.xy */
+         i915_emit_arith(p,
+                         A0_DP3,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         tmp,
+                         swizzle(i915_emit_const4fv(p, sin_quad_constants),
+				 X, Y, ZERO, ZERO),
+			 0);
+         break;
 
       case OPCODE_SLT: 
 	 EMIT_2ARG_ARITH( A0_SLT );
author	Eric Anholt <[email protected]>	2008-02-06 11:34:14 -0800
committer	Xiang, Haihao <[email protected]>	2008-06-10 14:22:36 +0800
commit	db5f206c002c2306ca0f138198fca22e454ea4f9 (patch)
tree	d1d0ee706653a8837f7462070752c39304e3cf37 /src
parent	9dface8347773bc7de27c93cddda05b32d2c6b81 (diff)