diff options
author | Keith Whitwell <[email protected]> | 2009-10-20 02:46:00 +0100 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2009-10-20 02:46:00 +0100 |
commit | 7670628061c2a6ce0a1a787556b0e33a38fd3049 (patch) | |
tree | 7aed343a2d002847cb33e0b6ec61454fcdb34156 | |
parent | 5b07d4de38b732f99237161d940f40e3ce6e29c3 (diff) |
llvmpipe: precalculate some offsets
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_tri.c | 80 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup_tri.c | 26 |
5 files changed, 51 insertions, 79 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 85b756e4535..39fb8cdb6ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,12 +193,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - static const uint32_t ALIGN16_ATTRIB masks[4][4] = - { {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0} }; - + const unsigned mask = ~0; unsigned x, y; RAST_DEBUG("%s\n", __FUNCTION__); @@ -207,26 +202,31 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 4) for (x = 0; x < TILE_SIZE; x += 4) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); + lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask); } void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const uint32_t *masks) + unsigned mask) { #if 1 const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; - unsigned ix, iy; + uint32_t ALIGN16_ATTRIB masks[16]; + unsigned ix, iy, i; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); + /* mask */ + for (i = 0; i < 16; ++i) + masks[i] = mask & (1 << i) ? ~0 : 0; + ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -251,7 +251,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, inputs->a0, inputs->dadx, inputs->dady, - masks, + &masks[0], color, depth); #else diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 97250071199..318bf73b159 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -111,6 +111,8 @@ struct lp_rast_triangle { int c2; int c3; + int step[3][16]; + /* XXX: this is only used inside lp_setup_tri.c, don't really * need it here: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index f438faaf36c..2333729807e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -80,6 +80,6 @@ struct lp_rasterizer { void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const uint32_t *masks); + unsigned masks); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5f22aca668a..b5a3753a881 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,100 +37,44 @@ #define BLOCKSIZE 4 + /* Render a 4x4 unmasked block: */ static void block_full( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { - static const uint32_t ALIGN16_ATTRIB masks[4][4] = - { {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0} }; + unsigned mask = ~0; - lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask); } -static INLINE void -do_quad( const struct lp_rast_triangle *tri, - int c1, int c2, int c3, - int32_t *mask ) -{ - const int xstep1 = -tri->dy12 ; - const int xstep2 = -tri->dy23 ; - const int xstep3 = -tri->dy31 ; - - const int ystep1 = tri->dx12 ; - const int ystep2 = tri->dx23 ; - const int ystep3 = tri->dx31 ; - - mask[0] = ~(((c1) | - (c2) | - (c3)) >> 31); - - mask[1] = ~(((c1 + xstep1) | - (c2 + xstep2) | - (c3 + xstep3)) >> 31); - - mask[2] = ~(((c1 + ystep1) | - (c2 + ystep2) | - (c3 + ystep3)) >> 31); - - mask[3] = ~(((c1 + ystep1 + xstep1) | - (c2 + ystep2 + xstep2) | - (c3 + ystep3 + xstep3)) >> 31); -} /* Evaluate each pixel in a block, generate a mask and possibly render * the quad: */ static void do_block( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, + const struct lp_rast_triangle *tri, int x, int y, int c1, int c2, int c3 ) { - const int step = 2 ; - - const int xstep1 = -step * tri->dy12; - const int xstep2 = -step * tri->dy23; - const int xstep3 = -step * tri->dy31; - - const int ystep1 = step * tri->dx12; - const int ystep2 = step * tri->dx23; - const int ystep3 = step * tri->dx31; + int i; + unsigned mask = 0; - int ix, iy; - uint32_t ALIGN16_ATTRIB mask[4][4]; - - - for (iy = 0; iy < 4; iy += 2) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; - - for (ix = 0; ix < 2; ix ++) { - - do_quad(tri, cx1, cx2, cx3, (int32_t *)mask[iy+ix]); - - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } + for (i = 0; i < 16; i++) + mask |= (~(((c1 + tri->step[0][i]) | + (c2 + tri->step[1][i]) | + (c3 + tri->step[2][i])) >> 31)) & (1 << i); + /* As we do trivial reject already, masks should rarely be all * zero: */ - lp_rast_shade_quads(rast, &tri->inputs, x, y, &mask[0][0] ); + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 6c9f75e90ce..a5a0407a57e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -359,6 +359,32 @@ do_triangle_ccw(struct setup_context *setup, maxx = tri->maxx / TILESIZE; maxy = tri->maxy / TILESIZE; + { + int xstep1 = -tri->dy12; + int xstep2 = -tri->dy23; + int xstep3 = -tri->dy31; + + int ystep1 = tri->dx12; + int ystep2 = tri->dx23; + int ystep3 = tri->dx31; + + int ix, iy; + int qx, qy; + int i = 0; + + for (qy = 0; qy < 4; qy += 2) { + for (qx = 0; qx < 4; qx += 2) { + for (iy = 0; iy < 2; iy++) { + for (ix = 0; ix < 2; ix++, i++) { + tri->step[0][i] = (xstep1 * (qx+ix)) + (ystep1 * (qy+iy)); + tri->step[1][i] = (xstep2 * (qx+ix)) + (ystep2 * (qy+iy)); + tri->step[2][i] = (xstep3 * (qx+ix)) + (ystep3 * (qy+iy)); + } + } + } + } + } + /* Convert to tile coordinates: */ if (miny == maxy && minx == maxx) |