llvmpipe: calculate more accurate interpolation value at origin

Some rounding errors could crop up when calculating a0. Use a more accurate method (barycentric interpolation essentially) to fix this, though to fix the REAL problem (which is that our interpolation will give very bad results with small triangles far away from the origin when they have steep gradients) this does absolutely nothing (actually makes it worse). (To fix the real problem, either would need to use a vertex corner (or some other point inside the tri) as starting point value instead of fb origin and pass that down to interpolation, or mimic what hw does, use barycentric interpolation (using the coordinates extracted from the rasterizer edge functions) - maybe another time.) Some (silly) tests though really want a high accuracy at fb origin and don't care much about anything else (Just. Don't. Ask.). Reviewed-by: Jose Fonseca <[email protected]>
author: Roland Scheidegger <[email protected]> 2013-11-20 05:17:56 +0000
committer: Roland Scheidegger <[email protected]> 2013-11-21 20:39:19 +0000
commit: 28d7b4147d4048031dd1a99c0858472912ea7e7e (patch)
tree: 67ba605999e435c82113162352fa66ae6e4e0790 /src
parent: 9d1c71e34d5ec225f1d4f12f6d7dad5148ab0e8b (diff)
1 files changed, 82 insertions, 6 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 59ab467fb28..ef000fb380e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -49,6 +49,15 @@
 #include "lp_state_fs.h"
 #include "lp_state_setup.h"
 
+/*
+ * Set if the start point for interpolation should be calculated with a
+ * more accurate method (barycentric interpolation).
+ * Unfortunately, actual interpolation results of small tris with steep
+ * gradients far away from the origin are still very busted, this does
+ * nothing to change that (in fact it may make it worse), but some tests
+ * (don't ask) really want accurate values at origin (and ONLY origin).
+ */
+#define ACCURATE_A0 1
 
 
 /* currently organized to interpolate full float[4] attributes even
@@ -77,6 +86,9 @@ struct lp_setup_args
    LLVMValueRef dy01_ooa;
    LLVMValueRef dx20_ooa;
    LLVMValueRef dx01_ooa;
+   LLVMValueRef e01o;
+   LLVMValueRef e20o;
+   LLVMValueRef e12o;
    struct lp_build_context bld;
 };
 
@@ -376,6 +388,19 @@ load_attribute(struct gallivm_state *gallivm,
    }
 }
 
+/*
+ * FIXME: interpolation is always done wrt fb origin (0/0).
+ * However, if some (small) tri is far away from the origin and gradients
+ * are large, this can lead to HUGE errors, since the a0 value calculated
+ * here can get very large (with the actual values inside the triangle way
+ * smaller), leading to complete loss of accuracy. This could be prevented
+ * by using some point inside (or at corner) of the tri as interpolation
+ * origin, or just use barycentric interpolation (which GL suggests and is
+ * what real hw does - you can get the barycentric coordinates from the
+ * edge functions in rasterization in principle (though we skip these
+ * sometimes completely in case of tris covering a block fully,
+ * which obviously wouldn't work)).
+ */
 static void 
 emit_coef4( struct gallivm_state *gallivm,
             struct lp_setup_args *args,
@@ -385,6 +410,8 @@ emit_coef4( struct gallivm_state *gallivm,
             LLVMValueRef a2)
 {
    LLVMBuilderRef b = gallivm->builder;
+   bool accurate_a0 = ACCURATE_A0;
+   LLVMValueRef attr_0;
    LLVMValueRef dy20_ooa = args->dy20_ooa;
    LLVMValueRef dy01_ooa = args->dy01_ooa;
    LLVMValueRef dx20_ooa = args->dx20_ooa;
@@ -408,10 +435,19 @@ emit_coef4( struct gallivm_state *gallivm,
 
    /* Calculate a0 - the attribute value at the origin
     */
-   LLVMValueRef dadx_x0       = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
-   LLVMValueRef dady_y0       = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); 
-   LLVMValueRef attr_v0       = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
-   LLVMValueRef attr_0        = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
+   if (!accurate_a0) {
+      LLVMValueRef dadx_x0    = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
+      LLVMValueRef dady_y0    = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
+      LLVMValueRef attr_v0    = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
+      attr_0                  = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
+   }
+   else {
+      LLVMValueRef ao2 = LLVMBuildFMul(b, args->e01o, a2, "");
+      LLVMValueRef ao1 = LLVMBuildFMul(b, args->e20o, a1, "");
+      LLVMValueRef ao0 = LLVMBuildFMul(b, args->e12o, a0, "");
+      attr_0 = LLVMBuildFAdd(b, ao0, ao1, "");
+      attr_0 = LLVMBuildFAdd(b, attr_0, ao2, "");
+   }
 
    store_coef(gallivm, args, slot, attr_0, dadx, dady);
 }
@@ -623,10 +659,11 @@ init_args(struct gallivm_state *gallivm,
    LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
    LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20;
    LLVMValueRef e, f, ef, ooa;
-   LLVMValueRef shuffles[4];
+   LLVMValueRef shuffles[4], shuf10;
    LLVMValueRef attr_pos[3];
    struct lp_type typef4 = lp_type_float_vec(32, 128);
    struct lp_build_context bld;
+   bool accurate_a0 = ACCURATE_A0;
 
    lp_build_context_init(&bld, gallivm, typef4);
    args->bld = bld;
@@ -651,8 +688,9 @@ init_args(struct gallivm_state *gallivm,
    shuffles[1] = zeroi;
    shuffles[2] = LLVMGetUndef(shuf_type);
    shuffles[3] = LLVMGetUndef(shuf_type);
+   shuf10 = LLVMConstVector(shuffles, 4);
 
-   dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, LLVMConstVector(shuffles, 4), "");
+   dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, "");
 
    ef = LLVMBuildFMul(b, dxy01, dyx20, "ef");
    e = LLVMBuildExtractElement(b, ef, zeroi, "");
@@ -670,6 +708,44 @@ init_args(struct gallivm_state *gallivm,
    dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
    dxy01 = LLVMBuildFMul(b, dxy01, ooa, "");
 
+   if (accurate_a0) {
+      LLVMValueRef xy1xy2, xy1xy2_center, dxy12, dyx01, dyx12yx20;
+      LLVMValueRef p0, p1p2, tmp0, tmp1, shuf0145, shuf1054, shuf1u3u;
+
+      shuffles[0] = zeroi;
+      shuffles[1] = onei;
+      shuffles[2] = lp_build_const_int32(gallivm, 4);
+      shuffles[3] = lp_build_const_int32(gallivm, 5);
+      shuf0145 = LLVMConstVector(shuffles, 4);
+      shuffles[0] = onei;
+      shuffles[1] = zeroi;
+      shuffles[2] = lp_build_const_int32(gallivm, 5);
+      shuffles[3] = lp_build_const_int32(gallivm, 4);
+      shuf1054 = LLVMConstVector(shuffles, 4);
+      shuffles[0] = onei;
+      shuffles[1] = LLVMGetUndef(shuf_type);
+      shuffles[2] = lp_build_const_int32(gallivm, 3);
+      shuffles[3] = LLVMGetUndef(shuf_type);
+      shuf1u3u = LLVMConstVector(shuffles, 4);
+
+      xy1xy2 = LLVMBuildShuffleVector(b, attr_pos[1], attr_pos[2], shuf0145, "");
+      xy1xy2_center = LLVMBuildFSub(b, xy1xy2, pixel_center, "");
+      dxy12 = LLVMBuildFSub(b, attr_pos[1], attr_pos[2], "dxy12");
+      dxy12 = LLVMBuildFMul(b, dxy12, ooa, "");
+      dyx12yx20 = LLVMBuildShuffleVector(b, dxy12, dxy20, shuf1054, "dyx12yx20");
+      dyx01 = LLVMBuildShuffleVector(b, dxy01, dxy01, shuf10, "");
+      p0 = LLVMBuildFMul(b, dyx01, xy0_center, "");
+      p1p2 = LLVMBuildFMul(b, dyx12yx20, xy1xy2_center, "");
+      tmp0 = LLVMBuildExtractElement(b, p0, zeroi, "");
+      tmp1 = LLVMBuildExtractElement(b, p0, onei, "");
+      args->e01o = lp_build_broadcast_scalar(&bld, LLVMBuildFSub(b, tmp0, tmp1, "e01o"));
+      tmp1 = LLVMBuildShuffleVector(b, p1p2, p1p2, shuf1u3u, "");
+      tmp0 = LLVMBuildFSub(b, p1p2, tmp1, "e12o20o");
+      args->e12o = lp_build_extract_broadcast(gallivm, typef4, typef4, tmp0, zeroi);
+      args->e20o = lp_build_extract_broadcast(gallivm, typef4, typef4, tmp0,
+                                              lp_build_const_int32(gallivm, 2));
+   }
+
    args->dy20_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei);
    args->dy01_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei);
author	Roland Scheidegger <[email protected]>	2013-11-20 05:17:56 +0000
committer	Roland Scheidegger <[email protected]>	2013-11-21 20:39:19 +0000
commit	28d7b4147d4048031dd1a99c0858472912ea7e7e (patch)
tree	67ba605999e435c82113162352fa66ae6e4e0790 /src
parent	9d1c71e34d5ec225f1d4f12f6d7dad5148ab0e8b (diff)