summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Paul <[email protected]>2009-12-02 15:13:45 -0700
committerBrian Paul <[email protected]>2009-12-02 15:13:47 -0700
commit866e6856d39efe9b1ec739587f420a640ad8618e (patch)
tree66e8ca0bd3e48ae46bd73a7d568e64ae115c3679
parent5750a6426bc8d47f9801be5896b2d0f5ae3a5b12 (diff)
llvmpipe: execute shaders on 4x4 blocks instead of 8x2
This matches the convention used by the recursive rasterizer. Also fixed assorted typos, comments, etc. Now tri-z.c, gears.c, etc look basically right but there's still some cracks in triangle rasterization.
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c135
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c22
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c12
4 files changed, 116 insertions, 63 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 338dbca6d1e..affeeca6ff9 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -45,6 +45,36 @@
#include "lp_bld_interp.h"
+/*
+ * The shader JIT function operates on blocks of quads.
+ * Each block has 2x2 quads and each quad has 2x2 pixels.
+ *
+ * We iterate over the quads in order 0, 1, 2, 3:
+ *
+ * #################
+ * # | # | #
+ * #---0---#---1---#
+ * # | # | #
+ * #################
+ * # | # | #
+ * #---2---#---3---#
+ * # | # | #
+ * #################
+ *
+ * Within each quad, we have four pixels which are represented in SOA
+ * order:
+ *
+ * #########
+ * # 0 | 1 #
+ * #---+---#
+ * # 2 | 3 #
+ * #########
+ *
+ * So the green channel (for example) of the four pixels is stored in
+ * a single vector register: {g0, g1, g2, g3}.
+ */
+
+
static void
attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
{
@@ -55,6 +85,10 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix
}
+/**
+ * Initialize the bld->a0, dadx, dady fields. This involves fetching
+ * those values from the arrays which are passed into the JIT function.
+ */
static void
coeffs_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef a0_ptr,
@@ -91,7 +125,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
case TGSI_INTERPOLATE_CONSTANT:
a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
a0 = lp_build_broadcast_scalar(&bld->base, a0);
- attrib_name(a0, attrib, chan, ".dady");
+ attrib_name(a0, attrib, chan, ".a0");
break;
default:
@@ -135,30 +169,13 @@ coeff_multiply(struct lp_build_interp_soa_context *bld,
/**
- * Multiply the dadx and dady with the xstep and ystep respectively.
+ * Emit LLVM code to compute the fragment shader input attribute values.
+ * For example, for a color input, we'll compute red, green, blue and alpha
+ * values for the four pixels in a quad.
+ * Recall that we're operating on 4-element vectors so each arithmetic
+ * operation is operating on the four pixels in a quad.
*/
static void
-coeffs_update(struct lp_build_interp_soa_context *bld)
-{
- unsigned attrib;
- unsigned chan;
-
- for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
- unsigned mask = bld->mask[attrib];
- unsigned mode = bld->mode[attrib];
- if (mode != TGSI_INTERPOLATE_CONSTANT) {
- for(chan = 0; chan < NUM_CHANNELS; ++chan) {
- if(mask & (1 << chan)) {
- bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
- bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
- }
- }
- }
- }
-}
-
-
-static void
attribs_init(struct lp_build_interp_soa_context *bld)
{
LLVMValueRef x = bld->pos[0];
@@ -180,7 +197,9 @@ attribs_init(struct lp_build_interp_soa_context *bld)
res = a0;
if (mode != TGSI_INTERPOLATE_CONSTANT) {
+ /* res = res + x * dadx */
res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx));
+ /* res = res + y * dady */
res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady));
}
@@ -204,13 +223,19 @@ attribs_init(struct lp_build_interp_soa_context *bld)
}
+/**
+ * Increment the shader input attribute values.
+ * This is called when we move from one quad to the next.
+ */
static void
-attribs_update(struct lp_build_interp_soa_context *bld)
+attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
{
LLVMValueRef oow = NULL;
unsigned attrib;
unsigned chan;
+ assert(quad_index < 4);
+
for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
unsigned mask = bld->mask[attrib];
unsigned mode = bld->mode[attrib];
@@ -224,13 +249,21 @@ attribs_update(struct lp_build_interp_soa_context *bld)
res = bld->attribs_pre[attrib][chan];
- if(bld->xstep)
+ if (quad_index == 1 || quad_index == 3) {
+ /* top-right or bottom-right quad */
+ /* build res = res + dadx + dadx */
res = lp_build_add(&bld->base, res, dadx);
+ res = lp_build_add(&bld->base, res, dadx);
+ }
- if(bld->ystep)
+ if (quad_index == 2 || quad_index == 3) {
+ /* bottom-left or bottom-right quad */
+ /* build res = res + dady + dady */
res = lp_build_add(&bld->base, res, dady);
+ res = lp_build_add(&bld->base, res, dady);
+ }
- bld->attribs_pre[attrib][chan] = res;
+ //XXX bld->attribs_pre[attrib][chan] = res;
if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
LLVMValueRef w = bld->pos[3];
@@ -268,17 +301,32 @@ pos_init(struct lp_build_interp_soa_context *bld,
}
+/**
+ * Update quad position values when moving to the next quad.
+ */
static void
-pos_update(struct lp_build_interp_soa_context *bld)
+pos_update(struct lp_build_interp_soa_context *bld, int quad_index)
{
LLVMValueRef x = bld->attribs[0][0];
LLVMValueRef y = bld->attribs[0][1];
+ const int xstep = 2, ystep = 2;
- if(bld->xstep)
- x = lp_build_add(&bld->base, x, lp_build_const_scalar(bld->base.type, bld->xstep));
+ if (quad_index == 1 || quad_index == 3) {
+ /* top-right or bottom-right quad in block */
+ /* build x += xstep */
+ x = lp_build_add(&bld->base, x,
+ lp_build_const_scalar(bld->base.type, xstep));
+ }
- if(bld->ystep)
- y = lp_build_add(&bld->base, y, lp_build_const_scalar(bld->base.type, bld->ystep));
+ if (quad_index == 2) {
+ /* bottom-left quad in block */
+ /* build y += ystep */
+ y = lp_build_add(&bld->base, y,
+ lp_build_const_scalar(bld->base.type, ystep));
+ /* build x -= xstep */
+ x = lp_build_sub(&bld->base, x,
+ lp_build_const_scalar(bld->base.type, xstep));
+ }
lp_build_name(x, "pos.x");
lp_build_name(y, "pos.y");
@@ -288,6 +336,9 @@ pos_update(struct lp_build_interp_soa_context *bld)
}
+/**
+ * Initialize fragment shader input attribute info.
+ */
void
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
const struct tgsi_token *tokens,
@@ -297,9 +348,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef dadx_ptr,
LLVMValueRef dady_ptr,
LLVMValueRef x0,
- LLVMValueRef y0,
- int xstep,
- int ystep)
+ LLVMValueRef y0)
{
struct tgsi_parse_context parse;
struct tgsi_full_declaration *decl;
@@ -357,21 +406,19 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
pos_init(bld, x0, y0);
attribs_init(bld);
-
- bld->xstep = xstep;
- bld->ystep = ystep;
-
- coeffs_update(bld);
}
/**
- * Advance the position and inputs with the xstep and ystep.
+ * Advance the position and inputs to the given quad within the block.
*/
void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld)
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
+ int quad_index)
{
- pos_update(bld);
+ assert(quad_index < 4);
+
+ pos_update(bld, quad_index);
- attribs_update(bld);
+ attribs_update(bld, quad_index);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 9c57a10879b..e2b3bc1bf0b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -63,9 +63,6 @@ struct lp_build_interp_soa_context
LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
- int xstep;
- int ystep;
-
/* Attribute values before perspective divide */
LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
@@ -88,12 +85,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef dadx_ptr,
LLVMValueRef dady_ptr,
LLVMValueRef x0,
- LLVMValueRef y0,
- int xstep,
- int ystep);
+ LLVMValueRef y0);
void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld);
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
+ int quad_index);
#endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 09495f6288e..f88dd4ae680 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -126,8 +126,6 @@ void lp_rast_end( struct lp_rasterizer *rast )
}
-
-
/**
* Begining rasterization of a tile.
* \param x window X position of the tile, in pixels
@@ -152,7 +150,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast,
{
const uint8_t *clear_color = arg.clear_color;
- RAST_DEBUG("%s %x,%x,%x,%x\n", __FUNCTION__,
+ RAST_DEBUG("%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
clear_color[0],
clear_color[1],
clear_color[2],
@@ -181,7 +179,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
{
unsigned i, j;
- RAST_DEBUG("%s\n", __FUNCTION__);
+ RAST_DEBUG("%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
for (i = 0; i < TILE_SIZE; i++)
for (j = 0; j < TILE_SIZE; j++)
@@ -225,6 +223,9 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
}
+/**
+ * Compute shading for a 4x4 block of pixels.
+ */
void lp_rast_shade_quads( struct lp_rasterizer *rast,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
@@ -237,6 +238,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
void *depth;
uint32_t ALIGN16_ATTRIB masks[2][2][2][2];
unsigned ix, iy;
+ int block_offset;
/* Sanity checks */
assert(x % TILE_VECTOR_WIDTH == 0);
@@ -275,16 +277,20 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0;
#endif
+ assert((x % 2) == 0);
+ assert((y % 2) == 0);
+
ix = x % TILE_SIZE;
iy = y % TILE_SIZE;
+ /* offset of the 16x16 pixel block within the tile */
+ block_offset = ((iy/4)*(16*16) + (ix/4)*16);
+
/* color buffer */
- color = &TILE_PIXEL(tile->color, ix, iy, 0);
+ color = tile->color + 4 * block_offset;
/* depth buffer */
- assert((x % 2) == 0);
- assert((y % 2) == 0);
- depth = tile->depth + (iy/4)*(16*16) + (ix/4)*16;
+ depth = tile->depth + block_offset;
/* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
assert(lp_check_alignment(masks, 16));
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 0541d36580c..aa9c0066333 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -358,6 +358,9 @@ generate_blend(const struct pipe_blend_state *blend,
/**
* Generate the runtime callable function for the whole fragment pipeline.
+ * Note that the function which we generate operates on a block of 16
+ * pixels at at time. The block contains 2x2 quads. Each quad contains
+ * 2x2 pixels.
*/
static struct lp_fragment_shader_variant *
generate_fragment(struct llvmpipe_context *lp,
@@ -437,8 +440,8 @@ generate_fragment(struct llvmpipe_context *lp,
fs_type.sign = TRUE; /* values are signed */
fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
fs_type.width = 32; /* 32-bit float */
- fs_type.length = 4; /* 4 element per vector */
- num_fs = 4;
+ fs_type.length = 4; /* 4 elements per vector */
+ num_fs = 4; /* number of quads per block */
memset(&blend_type, 0, sizeof blend_type);
blend_type.floating = FALSE; /* values are integers */
@@ -509,18 +512,19 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type,
a0_ptr, dadx_ptr, dady_ptr,
- x0, y0, 2, 0);
+ x0, y0);
/* code generated texture sampling */
sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
+ /* loop over quads in the block */
for(i = 0; i < num_fs; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
LLVMValueRef out_color[NUM_CHANNELS];
LLVMValueRef depth_ptr_i;
if(i != 0)
- lp_build_interp_soa_update(&interp);
+ lp_build_interp_soa_update(&interp, i);
fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");