summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorBrian Paul <brianp@vmware.com>2010-01-15 11:21:16 -0700
committerBrian Paul <brianp@vmware.com>2010-01-15 11:21:16 -0700
commit2797f2bf57562c95a601a67edca3089641215cc4 (patch)
treeff273ffd3535b8cd2805b20524054b89687fd020 /src/gallium/drivers
parent3b1920a34903dfb753bc2a0461fef204d39846c6 (diff)
llvmpipe: generate two shader varients, one omits triangle in/out testing
When we know that a 4x4 pixel block is entirely inside of a triangle use the jit function which omits the in/out test code. Results in a few percent speedup in many tests.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c52
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h43
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c59
8 files changed, 142 insertions, 52 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 75562bf62dc..d03ba1752d6 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -344,9 +344,6 @@ void lp_rast_set_state( struct lp_rasterizer *rast,
-/* Within a tile:
- */
-
/**
* Run the shader on all blocks in a tile. This is used when a tile is
* completely contained inside a triangle.
@@ -356,8 +353,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
unsigned thread_index,
const union lp_rast_cmd_arg arg )
{
- /* Set c1,c2,c3 to large values so the in/out test always passes */
- const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN;
+ const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
+ struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
const unsigned tile_x = rast->tasks[thread_index].x;
const unsigned tile_y = rast->tasks[thread_index].y;
@@ -365,16 +362,35 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
- /* Use the existing preference for 4x4 (four quads) shading:
- */
- for (y = 0; y < TILE_SIZE; y += 4)
- for (x = 0; x < TILE_SIZE; x += 4)
- lp_rast_shade_quads( rast,
- thread_index,
- inputs,
- tile_x + x,
- tile_y + y,
- c1, c2, c3);
+ /* render the whole 64x64 tile in 4x4 chunks */
+ for (y = 0; y < TILE_SIZE; y += 4){
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ uint32_t *depth;
+ unsigned block_offset, i;
+
+ /* offset of the 16x16 pixel block within the tile */
+ block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16);
+
+ /* color buffer */
+ for (i = 0; i < rast->state.fb.nr_cbufs; i++)
+ color[i] = tile->color[i] + 4 * block_offset;
+
+ /* depth buffer */
+ depth = tile->depth + block_offset;
+
+ /* run shader */
+ state->jit_function[0]( &state->jit_context,
+ tile_x + x, tile_y + y,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ INT_MIN, INT_MIN, INT_MIN,
+ NULL, NULL, NULL );
+ }
+ }
}
@@ -411,7 +427,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
iy = y % TILE_SIZE;
/* offset of the 16x16 pixel block within the tile */
- block_offset = ((iy/4)*(16*16) + (ix/4)*16);
+ block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16);
/* color buffer */
for (i = 0; i < rast->state.fb.nr_cbufs; i++)
@@ -433,7 +449,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
#endif
/* run shader */
- state->jit_function( &state->jit_context,
+ state->jit_function[1]( &state->jit_context,
x, y,
inputs->a0,
inputs->dadx,
@@ -445,8 +461,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
}
-/* End of tile:
- */
/**
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index d926adb6b22..2a97fe4c67b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -66,8 +66,10 @@ struct lp_rast_state {
/* The shader itself. Probably we also need to pass a pointer to
* the tile color/z/stencil data somehow:
- */
- lp_jit_frag_func jit_function;
+ * jit_function[0] skips the triangle in/out test code
+ * jit_function[1] does triangle in/out testing
+ */
+ lp_jit_frag_func jit_function[2];
boolean opaque;
};
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 5afdeab049c..607968e3459 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -30,6 +30,7 @@
#include "pipe/p_thread.h"
#include "lp_rast.h"
+#include "lp_tile_soa.h"
#define MAX_THREADS 8 /* XXX probably temporary here */
@@ -126,4 +127,46 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
unsigned x, unsigned y,
int32_t c1, int32_t c2, int32_t c3);
+
+/**
+ * Shade all pixels in a 4x4 block. The fragment code omits the
+ * triangle in/out tests.
+ * \param x, y location of 4x4 block in window coords
+ */
+static INLINE void
+lp_rast_shade_quads_all( struct lp_rasterizer *rast,
+ unsigned thread_index,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y )
+{
+ const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
+ struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
+ const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE;
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ void *depth;
+ unsigned block_offset, i;
+
+ /* offset of the containing 16x16 pixel block within the tile */
+ block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16;
+
+ /* color buffer */
+ for (i = 0; i < rast->state.fb.nr_cbufs; i++)
+ color[i] = tile->color[i] + 4 * block_offset;
+
+ /* depth buffer */
+ depth = tile->depth + block_offset;
+
+ /* run shader */
+ state->jit_function[0]( &state->jit_context,
+ x, y,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ INT_MIN, INT_MIN, INT_MIN,
+ NULL, NULL, NULL );
+}
+
+
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index bc7397f50c5..9c3f699ec71 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -89,13 +89,10 @@ block_full_4( struct lp_rasterizer_task *rast_task,
const struct lp_rast_triangle *tri,
int x, int y )
{
- /* Set c1,c2,c3 to large values so the in/out test always passes */
- const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN;
- lp_rast_shade_quads(rast_task->rast,
- rast_task->thread_index,
- &tri->inputs,
- x, y,
- c1, c2, c3);
+ lp_rast_shade_quads_all(rast_task->rast,
+ rast_task->thread_index,
+ &tri->inputs,
+ x, y);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 284337e8252..355c0518372 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -362,14 +362,16 @@ lp_setup_set_fs_inputs( struct setup_context *setup,
}
void
-lp_setup_set_fs_function( struct setup_context *setup,
- lp_jit_frag_func jit_function,
- boolean opaque )
+lp_setup_set_fs_functions( struct setup_context *setup,
+ lp_jit_frag_func jit_function0,
+ lp_jit_frag_func jit_function1,
+ boolean opaque )
{
- LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function);
+ LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function0);
/* FIXME: reference count */
- setup->fs.current.jit_function = jit_function;
+ setup->fs.current.jit_function[0] = jit_function0;
+ setup->fs.current.jit_function[1] = jit_function1;
setup->fs.current.opaque = opaque;
setup->dirty |= LP_SETUP_NEW_FS;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index c7ef3d394a4..407f7527770 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -96,9 +96,10 @@ lp_setup_set_fs_inputs( struct setup_context *setup,
unsigned nr );
void
-lp_setup_set_fs_function( struct setup_context *setup,
- lp_jit_frag_func jit_function,
- boolean opaque );
+lp_setup_set_fs_functions( struct setup_context *setup,
+ lp_jit_frag_func jit_function0,
+ lp_jit_frag_func jit_function1,
+ boolean opaque );
void
lp_setup_set_fs_constants(struct setup_context *setup,
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index ddb152c0740..224b6e523c3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -88,9 +88,9 @@ struct lp_fragment_shader_variant
struct lp_fragment_shader_variant_key key;
- LLVMValueRef function;
+ LLVMValueRef function[2];
- lp_jit_frag_func jit_function;
+ lp_jit_frag_func jit_function[2];
struct lp_fragment_shader_variant *next;
};
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index f15fca293bc..a8f4a4ed463 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -349,9 +349,26 @@ generate_scissor_test(LLVMBuilderRef builder,
}
+static LLVMValueRef
+build_int32_vec_const(int value)
+{
+ struct lp_type i32_type;
+
+ memset(&i32_type, 0, sizeof i32_type);
+ i32_type.floating = FALSE; /* values are integers */
+ i32_type.sign = TRUE; /* values are signed */
+ i32_type.norm = FALSE; /* values are not normalized */
+ i32_type.width = 32; /* 32-bit int values */
+ i32_type.length = 4; /* 4 elements per vector */
+ return lp_build_int_const_scalar(i32_type, value);
+}
+
+
+
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
* \param i which quad in the tile, in range [0,3]
+ * \param do_tri_test if 1, do triangle edge in/out testing
*/
static void
generate_fs(struct llvmpipe_context *lp,
@@ -366,6 +383,7 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef *pmask,
LLVMValueRef (*color)[4],
LLVMValueRef depth_ptr,
+ unsigned do_tri_test,
LLVMValueRef c0,
LLVMValueRef c1,
LLVMValueRef c2,
@@ -411,8 +429,13 @@ generate_fs(struct llvmpipe_context *lp,
lp_build_flow_scope_declare(flow, &z);
/* do triangle edge testing */
- generate_tri_edge_mask(builder, i, pmask,
- c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+ if (do_tri_test) {
+ generate_tri_edge_mask(builder, i, pmask,
+ c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+ }
+ else {
+ *pmask = build_int32_vec_const(~0);
+ }
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
@@ -563,7 +586,8 @@ generate_blend(const struct pipe_blend_state *blend,
static void
generate_fragment(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
- struct lp_fragment_shader_variant *variant)
+ struct lp_fragment_shader_variant *variant,
+ unsigned do_tri_test)
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
const struct lp_fragment_shader_variant_key *key = &variant->key;
@@ -656,7 +680,7 @@ generate_fragment(struct llvmpipe_context *lp,
function = LLVMAddFunction(screen->module, "shader", func_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
- variant->function = function;
+ variant->function[do_tri_test] = function;
/* XXX: need to propagate noalias down into color param now we are
@@ -738,6 +762,7 @@ generate_fragment(struct llvmpipe_context *lp,
&fs_mask[i], /* output */
out_color,
depth_ptr_i,
+ do_tri_test,
c0, c1, c2,
step0_ptr, step1_ptr, step2_ptr);
@@ -812,10 +837,10 @@ generate_fragment(struct llvmpipe_context *lp,
/*
* Translate the LLVM IR into machine code.
*/
- variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function);
+ variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function);
if (LP_DEBUG & DEBUG_ASM)
- lp_disassemble(variant->jit_function);
+ lp_disassemble(variant->jit_function[do_tri_test]);
}
@@ -887,7 +912,8 @@ generate_variant(struct llvmpipe_context *lp,
variant->shader = shader;
memcpy(&variant->key, key, sizeof *key);
- generate_fragment(lp, shader, variant);
+ generate_fragment(lp, shader, variant, 0);
+ generate_fragment(lp, shader, variant, 1);
/* insert new variant into linked list */
variant->next = shader->variants;
@@ -947,11 +973,15 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
variant = shader->variants;
while(variant) {
struct lp_fragment_shader_variant *next = variant->next;
+ unsigned i;
- if(variant->function) {
- if(variant->jit_function)
- LLVMFreeMachineCodeForFunction(screen->engine, variant->function);
- LLVMDeleteFunction(variant->function);
+ for (i = 0; i < Elements(variant->function); i++) {
+ if (variant->function[i]) {
+ if (variant->jit_function[i])
+ LLVMFreeMachineCodeForFunction(screen->engine,
+ variant->function[i]);
+ LLVMDeleteFunction(variant->function[i]);
+ }
}
FREE(variant);
@@ -1093,7 +1123,8 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
!shader->info.uses_kill
? TRUE : FALSE;
- lp_setup_set_fs_function(lp->setup,
- shader->current->jit_function,
- opaque);
+ lp_setup_set_fs_functions(lp->setup,
+ shader->current->jit_function[0],
+ shader->current->jit_function[1],
+ opaque);
}