summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBrian Paul <[email protected]>2009-12-16 16:02:59 -0700
committerBrian Paul <[email protected]>2009-12-16 16:10:05 -0700
commitab9438193083b7f9a3180cb9cea45e269131048a (patch)
tree56affd185611182349f1db88cc2a49aa7d431e08 /src
parente288796c92bb7d75cd6dfee968804c6230ef38d7 (diff)
llvmpipe: do final the pixel in/out triangle test in the fragment shader
The test to determine which of the pixels in a 2x2 quad is now done in the fragment shader rather than in the calling C code. This is a little faster but there's a few more things to do. Note that the step[] array elements are in a different order now. Rather than being in row-major order for the 4x4 grid, they're in "quad-major" order. The setup of the step arrays is a little more complicated now. So is the course/intermediate tile test code, but some lookup tables help with that. Next steps: - early-cull 2x2 quads which are totally outside the triangle. - skip the in/out test for fully contained quads - make the in/out comparison code tighter/faster.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c76
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c222
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c49
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c144
7 files changed, 302 insertions, 220 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 7eccb5da859..e8fb7d990f8 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -114,9 +114,14 @@ typedef void
const void *a0,
const void *dadx,
const void *dady,
- const uint32_t *mask,
void *color,
- void *depth);
+ void *depth,
+ const int32_t c1,
+ const int32_t c2,
+ const int32_t c3,
+ const int32_t *step1,
+ const int32_t *step2,
+ const int32_t *step3);
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index ec87d907b81..b1bd27d3406 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -25,6 +25,7 @@
*
**************************************************************************/
+#include <limits.h>
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_cpu_detect.h"
@@ -279,6 +280,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
unsigned thread_index,
const union lp_rast_cmd_arg arg )
{
+ /* Set c1,c2,c3 to large values so the in/out test always passes */
+ const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2;
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
const unsigned tile_x = rast->tasks[thread_index].x;
const unsigned tile_y = rast->tasks[thread_index].y;
@@ -296,7 +299,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
inputs,
tile_x + x,
tile_y + y,
- mask);
+ c1, c2, c3);
}
@@ -308,58 +311,25 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
unsigned thread_index,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
- unsigned mask)
+ int32_t c1, int32_t c2, int32_t c3)
{
-#if 1
const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
void *color;
void *depth;
- uint32_t ALIGN16_ATTRIB masks[2][2][2][2];
unsigned ix, iy;
int block_offset;
+#ifdef DEBUG
assert(state);
/* Sanity checks */
assert(x % TILE_VECTOR_WIDTH == 0);
assert(y % TILE_VECTOR_HEIGHT == 0);
- /* mask: the rasterizer wants to treat pixels in 4x4 blocks, but
- * the pixel shader wants to swizzle them into 4 2x2 quads.
- *
- * Additionally, the pixel shader wants masks as full dword ~0,
- * while the rasterizer wants to pack per-pixel bits tightly.
- */
-#if 0
- unsigned qx, qy;
- for (qy = 0; qy < 2; ++qy)
- for (qx = 0; qx < 2; ++qx)
- for (iy = 0; iy < 2; ++iy)
- for (ix = 0; ix < 2; ++ix)
- masks[qy][qx][iy][ix] = mask & (1 << (qy*8+iy*4+qx*2+ix)) ? ~0 : 0;
-#else
- masks[0][0][0][0] = mask & (1 << (0*8+0*4+0*2+0)) ? ~0 : 0;
- masks[0][0][0][1] = mask & (1 << (0*8+0*4+0*2+1)) ? ~0 : 0;
- masks[0][0][1][0] = mask & (1 << (0*8+1*4+0*2+0)) ? ~0 : 0;
- masks[0][0][1][1] = mask & (1 << (0*8+1*4+0*2+1)) ? ~0 : 0;
- masks[0][1][0][0] = mask & (1 << (0*8+0*4+1*2+0)) ? ~0 : 0;
- masks[0][1][0][1] = mask & (1 << (0*8+0*4+1*2+1)) ? ~0 : 0;
- masks[0][1][1][0] = mask & (1 << (0*8+1*4+1*2+0)) ? ~0 : 0;
- masks[0][1][1][1] = mask & (1 << (0*8+1*4+1*2+1)) ? ~0 : 0;
-
- masks[1][0][0][0] = mask & (1 << (1*8+0*4+0*2+0)) ? ~0 : 0;
- masks[1][0][0][1] = mask & (1 << (1*8+0*4+0*2+1)) ? ~0 : 0;
- masks[1][0][1][0] = mask & (1 << (1*8+1*4+0*2+0)) ? ~0 : 0;
- masks[1][0][1][1] = mask & (1 << (1*8+1*4+0*2+1)) ? ~0 : 0;
- masks[1][1][0][0] = mask & (1 << (1*8+0*4+1*2+0)) ? ~0 : 0;
- masks[1][1][0][1] = mask & (1 << (1*8+0*4+1*2+1)) ? ~0 : 0;
- masks[1][1][1][0] = mask & (1 << (1*8+1*4+1*2+0)) ? ~0 : 0;
- masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0;
-#endif
-
assert((x % 4) == 0);
assert((y % 4) == 0);
+#endif
ix = x % TILE_SIZE;
iy = y % TILE_SIZE;
@@ -373,39 +343,27 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
/* depth buffer */
depth = tile->depth + block_offset;
- /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
- assert(lp_check_alignment(masks, 16));
-
+#ifdef DEBUG
assert(lp_check_alignment(depth, 16));
assert(lp_check_alignment(color, 16));
assert(lp_check_alignment(state->jit_context.blend_color, 16));
+ assert(lp_check_alignment(inputs->step[0], 16));
+ assert(lp_check_alignment(inputs->step[1], 16));
+ assert(lp_check_alignment(inputs->step[2], 16));
+#endif
+
/* run shader */
state->jit_function( &state->jit_context,
x, y,
inputs->a0,
inputs->dadx,
inputs->dady,
- &masks[0][0][0][0],
color,
- depth);
-#else
- struct lp_rast_tile *tile = &rast->tile;
- unsigned chan_index;
- unsigned q, ix, iy;
-
- x %= TILE_SIZE;
- y %= TILE_SIZE;
-
- /* mask */
- for (q = 0; q < 4; ++q)
- for(iy = 0; iy < 2; ++iy)
- for(ix = 0; ix < 2; ++ix)
- if(masks[q] & (1 << (iy*2 + ix)))
- for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
- TILE_PIXEL(tile->color, x + q*2 + ix, y + iy, chan_index) = 0xff;
-
-#endif
+ depth,
+ c1, c2, c3,
+ inputs->step[0], inputs->step[1], inputs->step[2]
+ );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 2dd0193d8dc..46e22f69a61 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -80,6 +80,9 @@ struct lp_rast_shader_inputs {
float (*a0)[4];
float (*dadx)[4];
float (*dady)[4];
+
+ /* edge/step info for 3 edges and 4x4 block of pixels */
+ int ALIGN16_ATTRIB step[3][16];
};
@@ -117,14 +120,10 @@ struct lp_rast_triangle {
int dx31;
/* edge function values at minx,miny ?? */
- int c1;
- int c2;
- int c3;
-
- int step[3][16];
+ int c1, c2, c3;
/* inputs for the shader */
- struct lp_rast_shader_inputs inputs;
+ struct lp_rast_shader_inputs ALIGN16_ATTRIB inputs;
};
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 79a90f6610c..cd72d7e69d8 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -61,15 +61,6 @@ struct lp_rasterizer_task
unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */
- /* Pixel blocks produced during rasterization
- */
- unsigned nr_blocks;
- struct {
- unsigned x;
- unsigned y;
- unsigned mask;
- } blocks[256];
-
const struct lp_rast_state *current_state;
/** "back" pointer */
@@ -133,6 +124,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
unsigned thread_index,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
- unsigned masks);
+ int32_t c1, int32_t c2, int32_t c3);
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 6c96010c52f..9b1861223ae 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -29,6 +29,7 @@
* Rasterization for binned triangles within a tile
*/
+#include <limits.h>
#include "util/u_math.h"
#include "lp_debug.h"
#include "lp_rast_priv.h"
@@ -36,42 +37,89 @@
/**
- * Add a 4x4 block of pixels to the block list.
- * All pixels are known to be inside the triangle's bounds.
+ * Map an index in [0,15] to an x,y position, multiplied by 4.
+ * This is used to get the position of each subtile in a 4x4
+ * grid of edge step values.
+ */
+static const int pos_table4[16][2] = {
+ { 0, 0 },
+ { 4, 0 },
+ { 0, 4 },
+ { 4, 4 },
+ { 8, 0 },
+ { 12, 0 },
+ { 8, 4 },
+ { 12, 4 },
+ { 0, 8 },
+ { 4, 8 },
+ { 0, 12 },
+ { 4, 12 },
+ { 8, 8 },
+ { 12, 8 },
+ { 8, 12 },
+ { 12, 12 }
+};
+
+
+static const int pos_table16[16][2] = {
+ { 0, 0 },
+ { 16, 0 },
+ { 0, 16 },
+ { 16, 16 },
+ { 32, 0 },
+ { 48, 0 },
+ { 32, 16 },
+ { 48, 16 },
+ { 0, 32 },
+ { 16, 32 },
+ { 0, 48 },
+ { 16, 48 },
+ { 32, 32 },
+ { 48, 32 },
+ { 32, 48 },
+ { 48, 48 }
+};
+
+
+/**
+ * Shade all pixels in a 4x4 block.
*/
static void
-block_full_4( struct lp_rasterizer_task *rast_task, int x, int y )
+block_full_4( struct lp_rasterizer_task *rast_task,
+ const struct lp_rast_triangle *tri,
+ int x, int y )
{
- const unsigned i = rast_task->nr_blocks;
- assert(x % 4 == 0);
- assert(y % 4 == 0);
- rast_task->blocks[i].x = x;
- rast_task->blocks[i].y = y;
- rast_task->blocks[i].mask = ~0;
- rast_task->nr_blocks++;
+ /* Set c1,c2,c3 to large values so the in/out test always passes */
+ const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2;
+ lp_rast_shade_quads(rast_task->rast,
+ rast_task->thread_index,
+ &tri->inputs,
+ x, y,
+ c1, c2, c3);
}
/**
- * Add a 16x16 block of pixels to the block list.
- * All pixels are known to be inside the triangle's bounds.
+ * Shade all pixels in a 16x16 block.
*/
static void
-block_full_16( struct lp_rasterizer_task *rast_task, int x, int y )
+block_full_16( struct lp_rasterizer_task *rast_task,
+ const struct lp_rast_triangle *tri,
+ int x, int y )
{
unsigned ix, iy;
assert(x % 16 == 0);
assert(y % 16 == 0);
for (iy = 0; iy < 16; iy += 4)
for (ix = 0; ix < 16; ix += 4)
- block_full_4(rast_task, x + ix, y + iy);
+ block_full_4(rast_task, tri, x + ix, y + iy);
}
/**
- * Evaluate each pixel in a 4x4 block to determine if it lies within
- * the triangle's bounds.
- * Generate a mask of in/out flags and add the block to the blocks list.
+ * Pass the 4x4 pixel block to the shader function.
+ * Determination of which of the 16 pixels lies inside the triangle
+ * will be done as part of the fragment shader.
*/
static void
do_block_4( struct lp_rasterizer_task *rast_task,
@@ -81,28 +129,11 @@ do_block_4( struct lp_rasterizer_task *rast_task,
int c2,
int c3 )
{
- int i;
- unsigned mask = 0;
-
- assert(x % 4 == 0);
- assert(y % 4 == 0);
-
- for (i = 0; i < 16; i++) {
- int any_negative = ((c1 + tri->step[0][i]) |
- (c2 + tri->step[1][i]) |
- (c3 + tri->step[2][i])) >> 31;
- mask |= (~any_negative) & (1 << i);
- }
-
- /* As we do trivial reject already, masks should rarely be all zero:
- */
- if (mask) {
- const unsigned i = rast_task->nr_blocks;
- rast_task->blocks[i].x = x;
- rast_task->blocks[i].y = y;
- rast_task->blocks[i].mask = mask;
- rast_task->nr_blocks++;
- }
+ lp_rast_shade_quads(rast_task->rast,
+ rast_task->thread_index,
+ &tri->inputs,
+ x, y,
+ c1, c2, c3);
}
@@ -118,40 +149,42 @@ do_block_16( struct lp_rasterizer_task *rast_task,
int c2,
int c3 )
{
- int ix, iy, i = 0;
+ const int ei1 = tri->ei1 * 4;
+ const int ei2 = tri->ei2 * 4;
+ const int ei3 = tri->ei3 * 4;
- int ei1 = tri->ei1 * 4;
- int ei2 = tri->ei2 * 4;
- int ei3 = tri->ei3 * 4;
+ const int eo1 = tri->eo1 * 4;
+ const int eo2 = tri->eo2 * 4;
+ const int eo3 = tri->eo3 * 4;
- int eo1 = tri->eo1 * 4;
- int eo2 = tri->eo2 * 4;
- int eo3 = tri->eo3 * 4;
+ int i;
assert(x % 16 == 0);
assert(y % 16 == 0);
- for (iy = 0; iy < 16; iy+=4) {
- for (ix = 0; ix < 16; ix+=4, i++) {
- int cx1 = c1 + (tri->step[0][i] * 4);
- int cx2 = c2 + (tri->step[1][i] * 4);
- int cx3 = c3 + (tri->step[2][i] * 4);
-
- if (cx1 + eo1 < 0 ||
- cx2 + eo2 < 0 ||
- cx3 + eo3 < 0) {
- /* the block is completely outside the triangle - nop */
- }
- else if (cx1 + ei1 > 0 &&
- cx2 + ei2 > 0 &&
- cx3 + ei3 > 0) {
+ for (i = 0; i < 16; i++) {
+ int cx1 = c1 + (tri->inputs.step[0][i] * 4);
+ int cx2 = c2 + (tri->inputs.step[1][i] * 4);
+ int cx3 = c3 + (tri->inputs.step[2][i] * 4);
+
+ if (cx1 + eo1 < 0 ||
+ cx2 + eo2 < 0 ||
+ cx3 + eo3 < 0) {
+ /* the block is completely outside the triangle - nop */
+ }
+ else {
+ int px = x + pos_table4[i][0];
+ int py = y + pos_table4[i][1];
+ if (cx1 + ei1 > 0 &&
+ cx2 + ei2 > 0 &&
+ cx3 + ei3 > 0) {
/* the block is completely inside the triangle */
- block_full_4(rast_task, x+ix, y+iy);
- }
- else {
+ block_full_4(rast_task, tri, px, py);
+ }
+ else {
/* the block is partially in/out of the triangle */
- do_block_4(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3);
- }
+ do_block_4(rast_task, tri, px, py, cx1, cx2, cx3);
+ }
}
}
}
@@ -171,8 +204,7 @@ lp_rast_triangle( struct lp_rasterizer *rast,
int x = rast_task->x;
int y = rast_task->y;
- int ix, iy;
- unsigned i = 0;
+ unsigned i;
int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x;
int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x;
@@ -186,48 +218,36 @@ lp_rast_triangle( struct lp_rasterizer *rast,
int eo2 = tri->eo2 * 16;
int eo3 = tri->eo3 * 16;
- assert(Elements(rast_task->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4));
-
LP_DBG(DEBUG_RAST, "lp_rast_triangle\n");
- rast_task->nr_blocks = 0;
-
/* Walk over the tile to build a list of 4x4 pixel blocks which will
* be filled/shaded. We do this at two granularities: 16x16 blocks
* and then 4x4 blocks.
*/
- for (iy = 0; iy < TILE_SIZE; iy += 16) {
- for (ix = 0; ix < TILE_SIZE; ix += 16, i++) {
- int cx1 = c1 + (tri->step[0][i] * 16);
- int cx2 = c2 + (tri->step[1][i] * 16);
- int cx3 = c3 + (tri->step[2][i] * 16);
-
- if (cx1 + eo1 < 0 ||
- cx2 + eo2 < 0 ||
- cx3 + eo3 < 0) {
- /* the block is completely outside the triangle - nop */
- }
- else if (cx1 + ei1 > 0 &&
- cx2 + ei2 > 0 &&
- cx3 + ei3 > 0) {
+ for (i = 0; i < 16; i++) {
+ int cx1 = c1 + (tri->inputs.step[0][i] * 16);
+ int cx2 = c2 + (tri->inputs.step[1][i] * 16);
+ int cx3 = c3 + (tri->inputs.step[2][i] * 16);
+
+ if (cx1 + eo1 < 0 ||
+ cx2 + eo2 < 0 ||
+ cx3 + eo3 < 0) {
+ /* the block is completely outside the triangle - nop */
+ }
+ else {
+ int px = x + pos_table16[i][0];
+ int py = y + pos_table16[i][1];
+
+ if (cx1 + ei1 > 0 &&
+ cx2 + ei2 > 0 &&
+ cx3 + ei3 > 0) {
/* the block is completely inside the triangle */
- block_full_16(rast_task, x+ix, y+iy);
- }
- else {
+ block_full_16(rast_task, tri, px, py);
+ }
+ else {
/* the block is partially in/out of the triangle */
- do_block_16(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3);
- }
+ do_block_16(rast_task, tri, px, py, cx1, cx2, cx3);
+ }
}
}
-
- assert(rast_task->nr_blocks <= Elements(rast_task->blocks));
-
- /* Shade the 4x4 pixel blocks */
- for (i = 0; i < rast_task->nr_blocks; i++)
- lp_rast_shade_quads(rast,
- thread_index,
- &tri->inputs,
- rast_task->blocks[i].x,
- rast_task->blocks[i].y,
- rast_task->blocks[i].mask);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index aeaf260af27..e15b987767c 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -265,7 +265,7 @@ do_triangle_ccw(struct setup_context *setup,
const int y3 = subpixel_snap(v3[0][1]);
struct lp_scene *scene = lp_setup_get_current_scene(setup);
- struct lp_rast_triangle *tri = lp_scene_alloc( scene, sizeof *tri );
+ struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 );
float area, oneoverarea;
int minx, maxx, miny, maxy;
@@ -354,38 +354,29 @@ do_triangle_ccw(struct setup_context *setup,
tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
{
- int xstep1 = -tri->dy12;
- int xstep2 = -tri->dy23;
- int xstep3 = -tri->dy31;
+ const int xstep1 = -tri->dy12;
+ const int xstep2 = -tri->dy23;
+ const int xstep3 = -tri->dy31;
- int ystep1 = tri->dx12;
- int ystep2 = tri->dx23;
- int ystep3 = tri->dx31;
+ const int ystep1 = tri->dx12;
+ const int ystep2 = tri->dx23;
+ const int ystep3 = tri->dx31;
- int ix, iy;
+ int qx, qy, ix, iy;
int i = 0;
- int c1 = 0;
- int c2 = 0;
- int c3 = 0;
-
- for (iy = 0; iy < 4; iy++) {
- int cx1 = c1;
- int cx2 = c2;
- int cx3 = c3;
-
- for (ix = 0; ix < 4; ix++, i++) {
- tri->step[0][i] = cx1;
- tri->step[1][i] = cx2;
- tri->step[2][i] = cx3;
- cx1 += xstep1;
- cx2 += xstep2;
- cx3 += xstep3;
- }
-
- c1 += ystep1;
- c2 += ystep2;
- c3 += ystep3;
+ for (qy = 0; qy < 2; qy++) {
+ for (qx = 0; qx < 2; qx++) {
+ for (iy = 0; iy < 2; iy++) {
+ for (ix = 0; ix < 2; ix++, i++) {
+ int x = qx * 2 + ix;
+ int y = qy * 2 + iy;
+ tri->inputs.step[0][i] = x * xstep1 + y * ystep1;
+ tri->inputs.step[1][i] = x * xstep2 + y * ystep2;
+ tri->inputs.step[2][i] = x * xstep3 + y * ystep3;
+ }
+ }
+ }
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index c0d5a70a553..4af37e365ec 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -176,7 +176,92 @@ generate_depth(LLVMBuilderRef builder,
/**
+ * Generate the code to do inside/outside triangle testing for the
+ * four pixels in a 2x2 quad. This will set the four elements of the
+ * quad mask vector to 0 or ~0.
+ * \param i which quad of the quad group to test, in [0,3]
+ */
+static void
+generate_tri_edge_mask(LLVMBuilderRef builder,
+ unsigned i,
+ LLVMValueRef *mask, /* ivec4, out */
+ LLVMValueRef c0, /* int32 */
+ LLVMValueRef c1, /* int32 */
+ LLVMValueRef c2, /* int32 */
+ LLVMValueRef step0_ptr, /* ivec4 */
+ LLVMValueRef step1_ptr, /* ivec4 */
+ LLVMValueRef step2_ptr) /* ivec4 */
+{
+ /*
+ c0_vec = splat(c0)
+ c1_vec = splat(c1)
+ c2_vec = splat(c2)
+ s0_vec = c0_vec + step0_ptr[i]
+ s1_vec = c1_vec + step1_ptr[i]
+ s2_vec = c2_vec + step2_ptr[i]
+ m0_vec = s0_vec > {0,0,0,0}
+ m1_vec = s1_vec > {0,0,0,0}
+ m2_vec = s2_vec > {0,0,0,0}
+ mask = m0_vec & m1_vec & m2_vec
+ */
+ struct lp_type i32_type;
+ LLVMTypeRef i32vec4_type;
+
+ LLVMValueRef index;
+ LLVMValueRef c0_vec, c1_vec, c2_vec;
+ LLVMValueRef step0_vec, step1_vec, step2_vec;
+ LLVMValueRef m0_vec, m1_vec, m2_vec;
+ LLVMValueRef s0_vec, s1_vec, s2_vec;
+ LLVMValueRef m;
+
+ LLVMValueRef zeros;
+
+ assert(i < 4);
+
+ /* int32 vector type */
+ memset(&i32_type, 0, sizeof i32_type);
+ i32_type.floating = FALSE; /* values are integers */
+ i32_type.sign = TRUE; /* values are signed */
+ i32_type.norm = FALSE; /* values are not normalized */
+ i32_type.width = 32; /* 32-bit int values */
+ i32_type.length = 4; /* 4 elements per vector */
+
+ i32vec4_type = lp_build_int32_vec4_type();
+
+ /* int32_vec4 zero = {0,0,0,0} */
+ zeros = LLVMConstNull(i32vec4_type);
+
+ c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
+ c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
+ c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
+
+ index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
+ step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
+ step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
+
+ /** XXX with a little work, we could remove the add here and just
+ * compare c0_vec > step0_vec.
+ */
+ s0_vec = LLVMBuildAdd(builder, c0_vec, step0_vec, "");
+ s1_vec = LLVMBuildAdd(builder, c1_vec, step1_vec, "");
+ s2_vec = LLVMBuildAdd(builder, c2_vec, step2_vec, "");
+ m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s0_vec, zeros);
+ m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s1_vec, zeros);
+ m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s2_vec, zeros);
+
+ m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
+ m = LLVMBuildAnd(builder, m, m2_vec, "");
+
+ lp_build_name(m, "m");
+
+ *mask = m;
+}
+
+
+/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
+ * \param i which quad in the tile, in range [0,3]
*/
static void
generate_fs(struct llvmpipe_context *lp,
@@ -190,7 +275,13 @@ generate_fs(struct llvmpipe_context *lp,
struct lp_build_sampler_soa *sampler,
LLVMValueRef *pmask,
LLVMValueRef *color,
- LLVMValueRef depth_ptr)
+ LLVMValueRef depth_ptr,
+ LLVMValueRef c0,
+ LLVMValueRef c1,
+ LLVMValueRef c2,
+ LLVMValueRef step0_ptr,
+ LLVMValueRef step1_ptr,
+ LLVMValueRef step2_ptr)
{
const struct tgsi_token *tokens = shader->base.tokens;
LLVMTypeRef elem_type;
@@ -205,6 +296,8 @@ generate_fs(struct llvmpipe_context *lp,
unsigned attrib;
unsigned chan;
+ assert(i < 4);
+
elem_type = lp_build_elem_type(type);
vec_type = lp_build_vec_type(type);
int_vec_type = lp_build_int_vec_type(type);
@@ -224,8 +317,13 @@ generate_fs(struct llvmpipe_context *lp,
}
lp_build_flow_scope_declare(flow, &z);
+ /* do triangle edge testing */
+ generate_tri_edge_mask(builder, i, pmask,
+ c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+
lp_build_mask_begin(&mask, flow, type, *pmask);
+
early_depth_test =
key->depth.enabled &&
!key->alpha.enabled &&
@@ -376,17 +474,18 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMTypeRef fs_int_vec_type;
LLVMTypeRef blend_vec_type;
LLVMTypeRef blend_int_vec_type;
- LLVMTypeRef arg_types[9];
+ LLVMTypeRef arg_types[14];
LLVMTypeRef func_type;
+ LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();
LLVMValueRef context_ptr;
LLVMValueRef x;
LLVMValueRef y;
LLVMValueRef a0_ptr;
LLVMValueRef dadx_ptr;
LLVMValueRef dady_ptr;
- LLVMValueRef mask_ptr;
LLVMValueRef color_ptr;
LLVMValueRef depth_ptr;
+ LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef x0;
@@ -468,9 +567,17 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */
arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */
arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */
- arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */
- arg_types[7] = LLVMPointerType(blend_vec_type, 0); /* color */
- arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+ arg_types[6] = LLVMPointerType(blend_vec_type, 0); /* color */
+ arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+ arg_types[8] = LLVMInt32Type(); /* c0 */
+ arg_types[9] = LLVMInt32Type(); /* c1 */
+ arg_types[10] = LLVMInt32Type(); /* c2 */
+ /* Note: the step arrays are built as int32[16] but we interpret
+ * them here as int32_vec4[4].
+ */
+ arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */
+ arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */
+ arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
@@ -486,9 +593,14 @@ generate_fragment(struct llvmpipe_context *lp,
a0_ptr = LLVMGetParam(variant->function, 3);
dadx_ptr = LLVMGetParam(variant->function, 4);
dady_ptr = LLVMGetParam(variant->function, 5);
- mask_ptr = LLVMGetParam(variant->function, 6);
- color_ptr = LLVMGetParam(variant->function, 7);
- depth_ptr = LLVMGetParam(variant->function, 8);
+ color_ptr = LLVMGetParam(variant->function, 6);
+ depth_ptr = LLVMGetParam(variant->function, 7);
+ c0 = LLVMGetParam(variant->function, 8);
+ c1 = LLVMGetParam(variant->function, 9);
+ c2 = LLVMGetParam(variant->function, 10);
+ step0_ptr = LLVMGetParam(variant->function, 11);
+ step1_ptr = LLVMGetParam(variant->function, 12);
+ step2_ptr = LLVMGetParam(variant->function, 13);
lp_build_name(context_ptr, "context");
lp_build_name(x, "x");
@@ -496,9 +608,14 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(a0_ptr, "a0");
lp_build_name(dadx_ptr, "dadx");
lp_build_name(dady_ptr, "dady");
- lp_build_name(mask_ptr, "mask");
lp_build_name(color_ptr, "color");
lp_build_name(depth_ptr, "depth");
+ lp_build_name(c0, "c0");
+ lp_build_name(c1, "c1");
+ lp_build_name(c2, "c2");
+ lp_build_name(step0_ptr, "step0");
+ lp_build_name(step1_ptr, "step1");
+ lp_build_name(step2_ptr, "step2");
/*
* Function body
@@ -526,7 +643,6 @@ generate_fragment(struct llvmpipe_context *lp,
if(i != 0)
lp_build_interp_soa_update(&interp, i);
- fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
generate_fs(lp, shader, key,
@@ -536,9 +652,11 @@ generate_fragment(struct llvmpipe_context *lp,
i,
&interp,
sampler,
- &fs_mask[i],
+ &fs_mask[i], /* output */
out_color,
- depth_ptr_i);
+ depth_ptr_i,
+ c0, c1, c2,
+ step0_ptr, step1_ptr, step2_ptr);
for(chan = 0; chan < NUM_CHANNELS; ++chan)
fs_out_color[chan][i] = out_color[chan];