summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorBrian Paul <[email protected]>2010-01-08 14:49:34 -0700
committerBrian Paul <[email protected]>2010-01-08 14:49:34 -0700
commitf4321fbd961a0a891c7f40b16efc61aa791e03a9 (patch)
tree9749777e97806cb35fefcd128cc549bb3753e737 /src/gallium/drivers
parent5208af7853989c30bea6ce8c4ac659a2f2304225 (diff)
llvmpipe: optimize case when all four pixels are inside the triangle
When the incoming c0,c1,c2 values are equal to INT_MIN it means that all pixels are inside the triangle. Thus we can skip the detailed pixel inside/outside triangle tests. Use the new lp_build_if()/endif() functions to generate the branching code. The code is disabled ATM however because it's actually a little slower than the original code. A little more tuning may fix that though...
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c106
1 files changed, 77 insertions, 29 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 3a669ba859a..293535387ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -58,6 +58,7 @@
* @author Jose Fonseca <[email protected]>
*/
+#include <limits.h>
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "util/u_format.h"
@@ -212,14 +213,16 @@ generate_tri_edge_mask(LLVMBuilderRef builder,
m2_vec = step2_ptr[i] > c2_vec
mask = m0_vec & m1_vec & m2_vec
*/
+ struct lp_build_flow_context *flow;
+ struct lp_build_if_state ifctx;
struct lp_type i32_type;
- LLVMTypeRef i32vec4_type;
+ LLVMTypeRef i32vec4_type, mask_type;
- LLVMValueRef index;
LLVMValueRef c0_vec, c1_vec, c2_vec;
- LLVMValueRef step0_vec, step1_vec, step2_vec;
- LLVMValueRef m0_vec, m1_vec, m2_vec;
- LLVMValueRef m;
+
+ LLVMValueRef int_min_vec;
+ LLVMValueRef not_draw_all;
+ LLVMValueRef in_out_mask;
assert(i < 4);
@@ -233,6 +236,12 @@ generate_tri_edge_mask(LLVMBuilderRef builder,
i32vec4_type = lp_build_int32_vec4_type();
+ mask_type = LLVMIntType(32 * 4);
+
+ /* int_min_vec = {INT_MIN, INT_MIN, INT_MIN, INT_MIN} */
+ int_min_vec = lp_build_int_const_scalar(i32_type, INT_MIN);
+
+
/* c0_vec = {c0, c0, c0, c0}
* Note that we emit this code four times but LLVM optimizes away
* three instances of it.
@@ -240,34 +249,66 @@ generate_tri_edge_mask(LLVMBuilderRef builder,
c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
-
lp_build_name(c0_vec, "edgeconst0vec");
lp_build_name(c1_vec, "edgeconst1vec");
lp_build_name(c2_vec, "edgeconst2vec");
- index = LLVMConstInt(LLVMInt32Type(), i, 0);
- step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
- step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
- step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
+ /*
+ * Use a conditional here to do detailed pixel in/out testing.
+ * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN}
+ */
+ flow = lp_build_flow_create(builder);
+ lp_build_flow_scope_begin(flow);
- lp_build_name(step0_vec, "step0vec");
- lp_build_name(step1_vec, "step1vec");
- lp_build_name(step2_vec, "step2vec");
+#define OPTIMIZE_IN_OUT_TEST 0
+#if OPTIMIZE_IN_OUT_TEST
+ in_out_mask = lp_build_compare(builder, i32_type, PIPE_FUNC_EQUAL, c0_vec, int_min_vec);
+ lp_build_name(in_out_mask, "inoutmaskvec");
- m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
- m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
- m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
+ not_draw_all = LLVMBuildICmp(builder,
+ LLVMIntEQ,
+ LLVMBuildBitCast(builder, in_out_mask, mask_type, ""),
+ LLVMConstNull(mask_type),
+ "");
- m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
- m = LLVMBuildAnd(builder, m, m2_vec, "");
+ lp_build_flow_scope_declare(flow, &in_out_mask);
- lp_build_name(m, "inoutmaskvec");
+ lp_build_if(&ifctx, flow, builder, not_draw_all);
+#endif
+ {
+ LLVMValueRef step0_vec, step1_vec, step2_vec;
+ LLVMValueRef m0_vec, m1_vec, m2_vec;
+ LLVMValueRef index, m;
+
+ index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
+ step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
+ step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
+
+ lp_build_name(step0_vec, "step0vec");
+ lp_build_name(step1_vec, "step1vec");
+ lp_build_name(step2_vec, "step2vec");
+
+ m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
+ m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
+ m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
+
+ m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
+ in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
+ lp_build_name(in_out_mask, "inoutmaskvec");
+
+ /* This is the initial alive/dead pixel mask. Additional bits will get cleared
+ * when the Z test fails, etc.
+ */
+ }
+#if OPTIMIZE_IN_OUT_TEST
+ lp_build_endif(&ifctx);
+#endif
- *mask = m;
+ lp_build_flow_scope_end(flow);
+ lp_build_flow_destroy(flow);
- /*
- * if mask = {0,0,0,0} skip quad
- */
+ *mask = in_out_mask;
}
@@ -432,6 +473,8 @@ generate_blend(const struct pipe_blend_state *blend,
lp_build_context_init(&bld, builder, type);
flow = lp_build_flow_create(builder);
+
+ /* we'll use this mask context to skip blending if all pixels are dead */
lp_build_mask_begin(&mask_ctx, flow, type, mask);
vec_type = lp_build_vec_type(type);
@@ -737,24 +780,29 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMDisposeBuilder(builder);
- /*
- * Translate the LLVM IR into machine code.
- */
+ /* Verify the LLVM IR. If invalid, dump and abort */
#ifdef DEBUG
if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function);
- assert(0);
+ if (1)
+ LLVMDumpValue(variant->function);
+ abort();
}
#endif
- LLVMRunFunctionPassManager(screen->pass, variant->function);
+ /* Apply optimizations to LLVM IR */
+ if (1)
+ LLVMRunFunctionPassManager(screen->pass, variant->function);
if (LP_DEBUG & DEBUG_JIT) {
+ /* Print the LLVM IR to stderr */
LLVMDumpValue(variant->function);
debug_printf("\n");
}
+ /*
+ * Translate the LLVM IR into machine code.
+ */
variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function);
if (LP_DEBUG & DEBUG_ASM)