llvmpipe: Call lp_rast_shade_quads from tri rasterizer.

author: José Fonseca <jfonseca@vmware.com> 2009-10-08 19:53:14 +0100
committer: José Fonseca <jfonseca@vmware.com> 2009-10-08 19:53:14 +0100
commit: 7ef36171d5d58b97ffa179f824d77a9c339a7ae4 (patch)
tree: 6319279c2e8a31c2ff4c5e28d50bcd2afce93cc9
parent: 07ee87e6645318a34a395a50f4e8d554d118d24f (diff)
2 files changed, 90 insertions, 189 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 0aa111b4723..41a7f5ebbf3 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -92,6 +92,9 @@ struct lp_rast_triangle {
    float dx12;
    float dx23;
    float dx31;
+
+   /* inputs for the shader */
+   struct lp_rast_shader_inputs *inputs;
 };
 
 struct clear_tile {
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 896ac253a02..138d6f55e00 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -29,80 +29,46 @@
  * Rasterization for binned triangles within a tile
  */
 
+#include "util/u_math.h"
 #include "lp_quad.h"
-#include "lp_quad_pipe.h"
 #include "lp_rast_priv.h"
+#include "lp_tile_soa.h"
 
 
-#define BLOCKSIZE 4
+#define BLOCKSIZE 8
 
 
 /* Convert 8x8 block into four runs of quads and render each in turn.
  */
 #if (BLOCKSIZE == 8)
-static void block_full( struct lp_rast_triangle *tri, int x, int y )
+static void block_full( struct lp_rasterizer *rast,
+                        const struct lp_rast_triangle *tri,
+                        int x, int y )
 {
-   struct quad_header *ptrs[4];
-   int i;
-
-   tri->quad[0].input.x0 = x + 0;
-   tri->quad[1].input.x0 = x + 2;
-   tri->quad[2].input.x0 = x + 4;
-   tri->quad[3].input.x0 = x + 6;
-
-   for (i = 0; i < 4; i++, y += 2) {
-      tri->quad[0].inout.mask = 0xf;
-      tri->quad[1].inout.mask = 0xf;
-      tri->quad[2].inout.mask = 0xf;
-      tri->quad[3].inout.mask = 0xf;
-
-      tri->quad[0].input.y0 = y;
-      tri->quad[1].input.y0 = y;
-      tri->quad[2].input.y0 = y;
-      tri->quad[3].input.y0 = y;
-
-      /* XXX: don't bother with this ptrs business */
-      ptrs[0] = &tri->quad[0];
-      ptrs[1] = &tri->quad[1];
-      ptrs[2] = &tri->quad[2];
-      ptrs[3] = &tri->quad[3];
-
-      tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 );
-   }
+   const unsigned masks[4] = {~0, ~0, ~0, ~0};
+   int iy;
+
+   for (iy = 0; iy < 8; iy += 2)
+      lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);
 }
 #else
-static void block_full( struct lp_rast_triangle *tri, int x, int y )
+static void block_full( struct lp_rasterizer *rast,
+                        const struct lp_rast_triangle *tri,
+                        int x, int y )
 {
-   struct quad_header *ptrs[4];
+   const unsigned masks[4] = {~0, ~0, 0, 0}; /* FIXME: Wasting quads!!! */
    int iy;
 
-   tri->quad[0].input.x0 = x + 0;
-   tri->quad[1].input.x0 = x + 2;
-
-   for (iy = 0; iy < 4; iy += 2) {
-      tri->quad[0].inout.mask = 0xf;
-      tri->quad[1].inout.mask = 0xf;
-
-      tri->quad[0].input.y0 = y + iy;
-      tri->quad[1].input.y0 = y + iy;
-
-      /* XXX: don't bother with this ptrs business */
-      ptrs[0] = &tri->quad[0];
-      ptrs[1] = &tri->quad[1];
-
-      tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 );
-   }
+   for (iy = 0; iy < 4; iy += 2)
+      lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);
 }
 #endif
 
-static void
-do_quad( struct lp_rasterizer *rast,
+static INLINE unsigned
+do_quad( const struct lp_rast_triangle *tri,
 	 int x, int y,
 	 float c1, float c2, float c3 )
 {
-   struct lp_rast_triangle *tri = rast->tri;
-   struct quad_header *quad = &rast->quad[0];
-
    float xstep1 = -tri->dy12;
    float xstep2 = -tri->dy23;
    float xstep3 = -tri->dy31;
@@ -111,43 +77,41 @@ do_quad( struct lp_rasterizer *rast,
    float ystep2 = tri->dx23;
    float ystep3 = tri->dx31;
 
-   quad->input.x0 = x;
-   quad->input.y0 = y;
-   quad->inout.mask = 0;
+   unsigned mask = 0;
 
    if (c1 > 0 &&
        c2 > 0 &&
        c3 > 0)
-      quad->inout.mask |= 1;
+      mask |= 1;
 	 
    if (c1 + xstep1 > 0 && 
        c2 + xstep2 > 0 && 
        c3 + xstep3 > 0)
-      quad->inout.mask |= 2;
+      mask |= 2;
 
    if (c1 + ystep1 > 0 && 
        c2 + ystep2 > 0 && 
        c3 + ystep3 > 0)
-      quad->inout.mask |= 4;
+      mask |= 4;
 
    if (c1 + ystep1 + xstep1 > 0 && 
        c2 + ystep2 + xstep2 > 0 && 
        c3 + ystep3 + xstep3 > 0)
-      quad->inout.mask |= 8;
+      mask |= 8;
 
-   if (quad->inout.mask)
-      rast->state->run( rast->state->state, &quad, 1 );
+   return mask;
 }
 
 /* Evaluate each pixel in a block, generate a mask and possibly render
  * the quad:
  */
 static void
-do_block( struct lp_rast_triangle *tri,
-	 int x, int y,
-	 float c1,
-	 float c2,
-	 float c3 )
+do_block( struct lp_rasterizer *rast,
+          const struct lp_rast_triangle *tri,
+          int x, int y,
+          float c1,
+          float c2,
+          float c3 )
 {
    const int step = 2;
 
@@ -166,19 +130,24 @@ do_block( struct lp_rast_triangle *tri,
       float cx2 = c2;
       float cx3 = c3;
 
+      unsigned masks[4] = {0, 0, 0, 0};
+
       for (ix = 0; ix < BLOCKSIZE; ix += 2) {
 
-	 do_quad(tri, x+ix, y+iy, cx1, cx2, cx3);
+	 masks[ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3);
 
 	 cx1 += xstep1;
 	 cx2 += xstep2;
 	 cx3 += xstep3;
       }
 
+      lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);
+
       c1 += ystep1;
       c2 += ystep2;
       c3 += ystep3;
    }
+
 }
 
 
@@ -187,8 +156,9 @@ do_block( struct lp_rast_triangle *tri,
  * for this triangle:
  */
 void lp_rast_triangle( struct lp_rasterizer *rast,
-		       const struct lp_rast_triangle *tri )
+                       const union lp_rast_cmd_arg *arg )
 {
+   const struct lp_rast_triangle *tri = arg->triangle;
    int minx, maxx, miny, maxy;
 
    /* Clamp to tile dimensions:
@@ -205,136 +175,64 @@ void lp_rast_triangle( struct lp_rasterizer *rast,
       return;
    }
 
-   /* Bind parameter interpolants:
-    */
-   for (i = 0; i < Elements(rast->quad); i++) {
-      rast->quad[i].coef = tri->coef;
-      rast->quad[i].posCoef = &tri->position_coef;
-   }
-
-   /* Small area?
-    */
-   if (miny + 16 > maxy &&
-       minx + 16 > maxx)
-   {
-      const int step = 2;
+   const int step = BLOCKSIZE;
 
-      float xstep1 = -step * tri->dy12;
-      float xstep2 = -step * tri->dy23;
-      float xstep3 = -step * tri->dy31;
+   float ei1 = tri->ei1 * step;
+   float ei2 = tri->ei2 * step;
+   float ei3 = tri->ei3 * step;
 
-      float ystep1 = step * tri->dx12;
-      float ystep2 = step * tri->dx23;
-      float ystep3 = step * tri->dx31;
+   float eo1 = tri->eo1 * step;
+   float eo2 = tri->eo2 * step;
+   float eo3 = tri->eo3 * step;
 
-      float eo1 = tri->eo1 * step;
-      float eo2 = tri->eo2 * step;
-      float eo3 = tri->eo3 * step;
+   float xstep1 = -step * tri->dy12;
+   float xstep2 = -step * tri->dy23;
+   float xstep3 = -step * tri->dy31;
 
-      int x, y;
+   float ystep1 = step * tri->dx12;
+   float ystep2 = step * tri->dx23;
+   float ystep3 = step * tri->dx31;
+   int x, y;
 
-      minx &= ~(step-1);
-      maxx &= ~(step-1);
+   minx &= ~(step-1);
+   miny &= ~(step-1);
 
-      /* Subdivide space into NxM blocks, where each block is square and
-       * power-of-four in dimension.
-       *
-       * Trivially accept or reject blocks, else jump to per-pixel
-       * examination above.
-       */
-      for (y = miny; y < maxy; y += step)
-      {
-	 float cx1 = c1;
-	 float cx2 = c2;
-	 float cx3 = c3;
-
-	 for (x = minx; x < maxx; x += step)
-	 {
-	    if (cx1 + eo1 < 0 || 
-		cx2 + eo2 < 0 ||
-		cx3 + eo3 < 0) 
-	    {
-	    }
-	    else 
-	    {
-	       do_quad(&tri, x, y, cx1, cx2, cx3);
-	    }
-
-	    /* Iterate cx values across the region:
-	     */
-	    cx1 += xstep1;
-	    cx2 += xstep2;
-	    cx3 += xstep3;
-	 }
-      
-	 /* Iterate c values down the region:
-	  */
-	 c1 += ystep1;
-	 c2 += ystep2;
-	 c3 += ystep3;    
-      }
-   }
-   else 
+   for (y = miny; y < maxy; y += step)
    {
-      const int step = BLOCKSIZE;
-
-      float ei1 = tri->ei1 * step;
-      float ei2 = tri->ei2 * step;
-      float ei3 = tri->ei3 * step;
-
-      float eo1 = tri->eo1 * step;
-      float eo2 = tri->eo2 * step;
-      float eo3 = tri->eo3 * step;
-
-      float xstep1 = -step * tri->dy12;
-      float xstep2 = -step * tri->dy23;
-      float xstep3 = -step * tri->dy31;
-
-      float ystep1 = step * tri->dx12;
-      float ystep2 = step * tri->dx23;
-      float ystep3 = step * tri->dx31;
-      int x, y;
-
-      minx &= ~(step-1);
-      miny &= ~(step-1);
+      float cx1 = c1;
+      float cx2 = c2;
+      float cx3 = c3;
 
-      for (y = miny; y < maxy; y += step)
+      for (x = minx; x < maxx; x += step)
       {
-	 float cx1 = c1;
-	 float cx2 = c2;
-	 float cx3 = c3;
-
-	 for (x = minx; x < maxx; x += step)
-	 {
-	    if (cx1 + eo1 < 0 || 
-		cx2 + eo2 < 0 ||
-		cx3 + eo3 < 0) 
-	    {
-	    }
-	    else if (cx1 + ei1 > 0 &&
-		     cx2 + ei2 > 0 &&
-		     cx3 + ei3 > 0) 
-	    {
-	       block_full(&tri, x, y); /* trivial accept */
-	    }
-	    else 
-	    {
-	       do_block(&tri, x, y, cx1, cx2, cx3);
-	    }
-
-	    /* Iterate cx values across the region:
-	     */
-	    cx1 += xstep1;
-	    cx2 += xstep2;
-	    cx3 += xstep3;
-	 }
-      
-	 /* Iterate c values down the region:
-	  */
-	 c1 += ystep1;
-	 c2 += ystep2;
-	 c3 += ystep3;    
+         if (cx1 + eo1 < 0 ||
+             cx2 + eo2 < 0 ||
+             cx3 + eo3 < 0)
+         {
+         }
+         else if (cx1 + ei1 > 0 &&
+                  cx2 + ei2 > 0 &&
+                  cx3 + ei3 > 0)
+         {
+            block_full(rast, tri, x, y); /* trivial accept */
+         }
+         else
+         {
+            do_block(rast, tri, x, y, cx1, cx2, cx3);
+         }
+
+         /* Iterate cx values across the region:
+          */
+         cx1 += xstep1;
+         cx2 += xstep2;
+         cx3 += xstep3;
       }
+
+      /* Iterate c values down the region:
+       */
+      c1 += ystep1;
+      c2 += ystep2;
+      c3 += ystep3;
    }
 }
author	José Fonseca <jfonseca@vmware.com>	2009-10-08 19:53:14 +0100
committer	José Fonseca <jfonseca@vmware.com>	2009-10-08 19:53:14 +0100
commit	7ef36171d5d58b97ffa179f824d77a9c339a7ae4 (patch)
tree	6319279c2e8a31c2ff4c5e28d50bcd2afce93cc9
parent	07ee87e6645318a34a395a50f4e8d554d118d24f (diff)