diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_state_fs.c')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 874 |
1 files changed, 463 insertions, 411 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 965777b6411..3ce8be5a0a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -31,9 +31,6 @@ * Code generate the whole fragment pipeline. * * The fragment pipeline consists of the following stages: - * - triangle edge in/out testing - * - scissor test - * - stipple (TBI) * - early depth test * - fragment shader * - alpha test @@ -64,8 +61,11 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_pointer.h" #include "util/u_format.h" #include "util/u_dump.h" +#include "util/u_string.h" +#include "util/u_simple_list.h" #include "os/os_time.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" @@ -75,6 +75,7 @@ #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_tgsi.h" @@ -93,48 +94,14 @@ #include "lp_setup.h" #include "lp_state.h" #include "lp_tex_sample.h" +#include "lp_flush.h" +#include "lp_state_fs.h" #include <llvm-c/Analysis.h> -static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; -static const unsigned char quad_offset_y[4] = {0, 0, 1, 1}; - - -/* - * Derive from the quad's upper left scalar coordinates the coordinates for - * all other quad pixels - */ -static void -generate_pos0(LLVMBuilderRef builder, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef *x0, - LLVMValueRef *y0) -{ - LLVMTypeRef int_elem_type = LLVMInt32Type(); - LLVMTypeRef int_vec_type = LLVMVectorType(int_elem_type, QUAD_SIZE); - LLVMTypeRef elem_type = LLVMFloatType(); - LLVMTypeRef vec_type = LLVMVectorType(elem_type, QUAD_SIZE); - LLVMValueRef x_offsets[QUAD_SIZE]; - LLVMValueRef y_offsets[QUAD_SIZE]; - unsigned i; - - x = lp_build_broadcast(builder, int_vec_type, x); - y = lp_build_broadcast(builder, int_vec_type, y); - - for(i = 0; i < QUAD_SIZE; ++i) { - x_offsets[i] = LLVMConstInt(int_elem_type, quad_offset_x[i], 0); - y_offsets[i] = LLVMConstInt(int_elem_type, quad_offset_y[i], 0); - } - - x = LLVMBuildAdd(builder, x, LLVMConstVector(x_offsets, QUAD_SIZE), ""); - y = LLVMBuildAdd(builder, y, LLVMConstVector(y_offsets, QUAD_SIZE), ""); - - *x0 = LLVMBuildSIToFP(builder, x, vec_type, ""); - *y0 = LLVMBuildSIToFP(builder, y, vec_type, ""); -} +static unsigned fs_no = 0; /** @@ -148,7 +115,8 @@ generate_depth_stencil(LLVMBuilderRef builder, LLVMValueRef stencil_refs[2], LLVMValueRef src, LLVMValueRef dst_ptr, - LLVMValueRef facing) + LLVMValueRef facing, + LLVMValueRef counter) { const struct util_format_description *format_desc; struct lp_type dst_type; @@ -195,182 +163,88 @@ generate_depth_stencil(LLVMBuilderRef builder, stencil_refs, src, dst_ptr, - facing); + facing, + counter); } /** - * Generate the code to do inside/outside triangle testing for the + * Expand the relevent bits of mask_input to a 4-dword mask for the * four pixels in a 2x2 quad. This will set the four elements of the * quad mask vector to 0 or ~0. - * \param i which quad of the quad group to test, in [0,3] + * + * \param quad which quad of the quad group to test, in [0,3] + * \param mask_input bitwise mask for the whole 4x4 stamp */ -static void -generate_tri_edge_mask(LLVMBuilderRef builder, - unsigned i, - LLVMValueRef *mask, /* ivec4, out */ - LLVMValueRef c0, /* int32 */ - LLVMValueRef c1, /* int32 */ - LLVMValueRef c2, /* int32 */ - LLVMValueRef step0_ptr, /* ivec4 */ - LLVMValueRef step1_ptr, /* ivec4 */ - LLVMValueRef step2_ptr) /* ivec4 */ +static LLVMValueRef +generate_quad_mask(LLVMBuilderRef builder, + struct lp_type fs_type, + unsigned quad, + LLVMValueRef mask_input) /* int32 */ { -#define OPTIMIZE_IN_OUT_TEST 0 -#if OPTIMIZE_IN_OUT_TEST - struct lp_build_if_state ifctx; - LLVMValueRef not_draw_all; -#endif - struct lp_build_flow_context *flow; - struct lp_type i32_type; - LLVMTypeRef i32vec4_type; - LLVMValueRef c0_vec, c1_vec, c2_vec; - LLVMValueRef in_out_mask; - - assert(i < 4); - - /* int32 vector type */ - memset(&i32_type, 0, sizeof i32_type); - i32_type.floating = FALSE; /* values are integers */ - i32_type.sign = TRUE; /* values are signed */ - i32_type.norm = FALSE; /* values are not normalized */ - i32_type.width = 32; /* 32-bit int values */ - i32_type.length = 4; /* 4 elements per vector */ - - i32vec4_type = lp_build_int32_vec4_type(); + struct lp_type mask_type; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef bits[4]; + LLVMValueRef mask; + int shift; /* - * Use a conditional here to do detailed pixel in/out testing. - * We only have to do this if c0 != INT_MIN. + * XXX: We'll need a different path for 16 x u8 */ - flow = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow); + assert(fs_type.width == 32); + assert(fs_type.length == 4); + mask_type = lp_int_type(fs_type); - { -#if OPTIMIZE_IN_OUT_TEST - /* not_draw_all = (c0 != INT_MIN) */ - not_draw_all = LLVMBuildICmp(builder, - LLVMIntNE, - c0, - LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), - ""); - - in_out_mask = lp_build_const_int_vec(i32_type, ~0); - - - lp_build_flow_scope_declare(flow, &in_out_mask); - - /* if (not_draw_all) {... */ - lp_build_if(&ifctx, flow, builder, not_draw_all); -#endif - { - LLVMValueRef step0_vec, step1_vec, step2_vec; - LLVMValueRef m0_vec, m1_vec, m2_vec; - LLVMValueRef index, m; - - /* c0_vec = {c0, c0, c0, c0} - * Note that we emit this code four times but LLVM optimizes away - * three instances of it. - */ - c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); - c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); - c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); - lp_build_name(c0_vec, "edgeconst0vec"); - lp_build_name(c1_vec, "edgeconst1vec"); - lp_build_name(c2_vec, "edgeconst2vec"); - - /* load step0vec, step1, step2 vec from memory */ - index = LLVMConstInt(LLVMInt32Type(), i, 0); - step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); - step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); - step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); - lp_build_name(step0_vec, "step0vec"); - lp_build_name(step1_vec, "step1vec"); - lp_build_name(step2_vec, "step2vec"); - - /* m0_vec = step0_ptr[i] > c0_vec */ - m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); - m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); - m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); - - /* in_out_mask = m0_vec & m1_vec & m2_vec */ - m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); - in_out_mask = LLVMBuildAnd(builder, m, m2_vec, ""); - lp_build_name(in_out_mask, "inoutmaskvec"); - } -#if OPTIMIZE_IN_OUT_TEST - lp_build_endif(&ifctx); -#endif - - } - lp_build_flow_scope_end(flow); - lp_build_flow_destroy(flow); - - /* This is the initial alive/dead pixel mask for a quad of four pixels. - * It's an int[4] vector with each word set to 0 or ~0. - * Words will get cleared when pixels faile the Z test, etc. + /* + * mask_input >>= (quad * 4) */ - *mask = in_out_mask; -} - - -static LLVMValueRef -generate_scissor_test(LLVMBuilderRef builder, - LLVMValueRef context_ptr, - const struct lp_build_interp_soa_context *interp, - struct lp_type type) -{ - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1]; - LLVMValueRef xmin, ymin, xmax, ymax; - LLVMValueRef m0, m1, m2, m3, m; - - /* xpos, ypos contain the window coords for the four pixels in the quad */ - assert(xpos); - assert(ypos); - - /* get the current scissor bounds, convert to vectors */ - xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr); - xmin = lp_build_broadcast(builder, vec_type, xmin); - - ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr); - ymin = lp_build_broadcast(builder, vec_type, ymin); - - xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr); - xmax = lp_build_broadcast(builder, vec_type, xmax); + + switch (quad) { + case 0: + shift = 0; + break; + case 1: + shift = 2; + break; + case 2: + shift = 8; + break; + case 3: + shift = 10; + break; + default: + assert(0); + shift = 0; + } - ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr); - ymax = lp_build_broadcast(builder, vec_type, ymax); + mask_input = LLVMBuildLShr(builder, + mask_input, + LLVMConstInt(i32t, shift, 0), + ""); - /* compare the fragment's position coordinates against the scissor bounds */ - m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin); - m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin); - m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax); - m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax); + /* + * mask = { mask_input & (1 << i), for i in [0,3] } + */ - /* AND all the masks together */ - m = LLVMBuildAnd(builder, m0, m1, ""); - m = LLVMBuildAnd(builder, m, m2, ""); - m = LLVMBuildAnd(builder, m, m3, ""); + mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input); - lp_build_name(m, "scissormask"); + bits[0] = LLVMConstInt(i32t, 1 << 0, 0); + bits[1] = LLVMConstInt(i32t, 1 << 1, 0); + bits[2] = LLVMConstInt(i32t, 1 << 4, 0); + bits[3] = LLVMConstInt(i32t, 1 << 5, 0); + + mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), ""); - return m; -} + /* + * mask = mask != 0 ? ~0 : 0 + */ + mask = lp_build_compare(builder, + mask_type, PIPE_FUNC_NOTEQUAL, + mask, + lp_build_const_int_vec(mask_type, 0)); -static LLVMValueRef -build_int32_vec_const(int value) -{ - struct lp_type i32_type; - - memset(&i32_type, 0, sizeof i32_type); - i32_type.floating = FALSE; /* values are integers */ - i32_type.sign = TRUE; /* values are signed */ - i32_type.norm = FALSE; /* values are not normalized */ - i32_type.width = 32; /* 32-bit int values */ - i32_type.length = 4; /* 4 elements per vector */ - return lp_build_const_int_vec(i32_type, value); + return mask; } @@ -378,7 +252,7 @@ build_int32_vec_const(int value) /** * Generate the fragment shader, depth/stencil test, and alpha tests. * \param i which quad in the tile, in range [0,3] - * \param do_tri_test if 1, do triangle edge in/out testing + * \param partial_mask if 1, do mask_input testing */ static void generate_fs(struct llvmpipe_context *lp, @@ -394,13 +268,9 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef (*color)[4], LLVMValueRef depth_ptr, LLVMValueRef facing, - unsigned do_tri_test, - LLVMValueRef c0, - LLVMValueRef c1, - LLVMValueRef c2, - LLVMValueRef step0_ptr, - LLVMValueRef step1_ptr, - LLVMValueRef step2_ptr) + unsigned partial_mask, + LLVMValueRef mask_input, + LLVMValueRef counter) { const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef vec_type; @@ -440,23 +310,17 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ - if (do_tri_test) { - generate_tri_edge_mask(builder, i, pmask, - c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + if (partial_mask) { + *pmask = generate_quad_mask(builder, type, + i, mask_input); } else { - *pmask = build_int32_vec_const(~0); + *pmask = lp_build_const_int_vec(type, ~0); } /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); - if (key->scissor) { - LLVMValueRef smask = - generate_scissor_test(builder, context_ptr, interp, type); - lp_build_mask_update(&mask, smask); - } - early_depth_stencil_test = (key->depth.enabled || key->stencil[0].enabled) && !key->alpha.enabled && @@ -466,12 +330,13 @@ generate_fs(struct llvmpipe_context *lp, if (early_depth_stencil_test) generate_depth_stencil(builder, key, type, &mask, - stencil_refs, z, depth_ptr, facing); + stencil_refs, z, depth_ptr, facing, counter); lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, outputs, sampler, &shader->info); + /* loop over fragment shader outputs/results */ for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(outputs[attrib][chan]) { @@ -486,14 +351,13 @@ generate_fs(struct llvmpipe_context *lp, lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); /* Alpha test */ - /* XXX: should the alpha reference value be passed separately? */ /* XXX: should only test the final assignment to alpha */ - if(cbuf == 0 && chan == 3) { + if (cbuf == 0 && chan == 3 && key->alpha.enabled) { LLVMValueRef alpha = out; LLVMValueRef alpha_ref_value; alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); - lp_build_alpha_test(builder, &key->alpha, type, + lp_build_alpha_test(builder, key->alpha.func, type, &mask, alpha, alpha_ref_value); } @@ -513,7 +377,7 @@ generate_fs(struct llvmpipe_context *lp, if (!early_depth_stencil_test) generate_depth_stencil(builder, key, type, &mask, - stencil_refs, z, depth_ptr, facing); + stencil_refs, z, depth_ptr, facing, counter); lp_build_mask_end(&mask); @@ -528,9 +392,16 @@ generate_fs(struct llvmpipe_context *lp, /** * Generate color blending and color output. + * \param rt the render target index (to index blend, colormask state) + * \param type the pixel color type + * \param context_ptr pointer to the runtime JIT context + * \param mask execution mask (active fragment/pixel mask) + * \param src colors from the fragment shader + * \param dst_ptr the destination color buffer pointer */ static void generate_blend(const struct pipe_blend_state *blend, + unsigned rt, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, @@ -561,6 +432,7 @@ generate_blend(const struct pipe_blend_state *blend, const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); + /* load constant blend color and colors from the dest color buffer */ for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); @@ -571,10 +443,12 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } - lp_build_blend_soa(builder, blend, type, src, dst, con, res); + /* do blend */ + lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); + /* store results to color buffer */ for(chan = 0; chan < 4; ++chan) { - if(blend->rt[0].colormask & (1 << chan)) { + if(blend->rt[rt].colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); @@ -587,20 +461,6 @@ generate_blend(const struct pipe_blend_state *blend, } -/** casting function to avoid compiler warnings */ -static lp_jit_frag_func -cast_voidptr_to_lp_jit_frag_func(void *p) -{ - union { - void *v; - lp_jit_frag_func f; - } tmp; - assert(sizeof(tmp.v) == sizeof(tmp.f)); - tmp.v = p; - return tmp.f; -} - - /** * Generate the runtime callable function for the whole fragment pipeline. * Note that the function which we generate operates on a block of 16 @@ -611,19 +471,18 @@ static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, - unsigned do_tri_test) + unsigned partial_mask) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; + char func_name[256]; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; - LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef arg_types[15]; + LLVMTypeRef arg_types[11]; LLVMTypeRef func_type; - LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; @@ -632,17 +491,15 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; - LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; + LLVMValueRef mask_input; + LLVMValueRef counter = NULL; LLVMBasicBlockRef block; LLVMBuilderRef builder; - LLVMValueRef x0; - LLVMValueRef y0; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; - LLVMValueRef blend_in_color[NUM_CHANNELS]; LLVMValueRef function; LLVMValueRef facing; unsigned num_fs; @@ -675,11 +532,13 @@ generate_fragment(struct llvmpipe_context *lp, */ fs_elem_type = lp_build_elem_type(fs_type); - fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); + util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", + shader->no, variant->no, partial_mask ? "partial" : "whole"); + arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ @@ -689,22 +548,15 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ - arg_types[9] = LLVMInt32Type(); /* c0 */ - arg_types[10] = LLVMInt32Type(); /* c1 */ - arg_types[11] = LLVMInt32Type(); /* c2 */ - /* Note: the step arrays are built as int32[16] but we interpret - * them here as int32_vec4[4]. - */ - arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ - arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ - arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ + arg_types[9] = LLVMInt32Type(); /* mask_input */ + arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(screen->module, "shader", func_type); + function = LLVMAddFunction(screen->module, func_name, func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); - variant->function[do_tri_test] = function; + variant->function[partial_mask] = function; /* XXX: need to propagate noalias down into color param now we are @@ -723,12 +575,7 @@ generate_fragment(struct llvmpipe_context *lp, dady_ptr = LLVMGetParam(function, 6); color_ptr_ptr = LLVMGetParam(function, 7); depth_ptr = LLVMGetParam(function, 8); - c0 = LLVMGetParam(function, 9); - c1 = LLVMGetParam(function, 10); - c2 = LLVMGetParam(function, 11); - step0_ptr = LLVMGetParam(function, 12); - step1_ptr = LLVMGetParam(function, 13); - step2_ptr = LLVMGetParam(function, 14); + mask_input = LLVMGetParam(function, 9); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -736,14 +583,14 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); - lp_build_name(color_ptr_ptr, "color_ptr"); + lp_build_name(color_ptr_ptr, "color_ptr_ptr"); lp_build_name(depth_ptr, "depth"); - lp_build_name(c0, "c0"); - lp_build_name(c1, "c1"); - lp_build_name(c2, "c2"); - lp_build_name(step0_ptr, "step0"); - lp_build_name(step1_ptr, "step1"); - lp_build_name(step2_ptr, "step2"); + lp_build_name(mask_input, "mask_input"); + + if (key->occlusion_count) { + counter = LLVMGetParam(function, 10); + lp_build_name(counter, "counter"); + } /* * Function body @@ -753,14 +600,17 @@ generate_fragment(struct llvmpipe_context *lp, builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - generate_pos0(builder, x, y, &x0, &y0); - + /* + * The shader input interpolation info is not explicitely baked in the + * shader key, but everything it derives from (TGSI, and flatshade) is + * already included in the shader key. + */ lp_build_interp_soa_init(&interp, - shader->base.tokens, - key->flatshade, + lp->num_inputs, + lp->inputs, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, - x0, y0); + x, y); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); @@ -787,9 +637,9 @@ generate_fragment(struct llvmpipe_context *lp, out_color, depth_ptr_i, facing, - do_tri_test, - c0, c1, c2, - step0_ptr, step1_ptr, step2_ptr); + partial_mask, + mask_input, + counter); for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) for(chan = 0; chan < NUM_CHANNELS; ++chan) @@ -803,6 +653,8 @@ generate_fragment(struct llvmpipe_context *lp, for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { LLVMValueRef color_ptr; LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); + LLVMValueRef blend_in_color[NUM_CHANNELS]; + unsigned rt; /* * Convert the fs's output color and mask to fit to the blending type. @@ -814,19 +666,27 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } - lp_build_conv_mask(builder, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); + if (partial_mask || !variant->opaque) { + lp_build_conv_mask(builder, fs_type, blend_type, + fs_mask, num_fs, + &blend_mask, 1); + } else { + blend_mask = lp_build_const_int_vec(blend_type, ~0); + } color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), ""); lp_build_name(color_ptr, "color_ptr%d", cbuf); + /* which blend/colormask state to use */ + rt = key->blend.independent_blend_enable ? cbuf : 0; + /* * Blending. */ generate_blend(&key->blend, + rt, builder, blend_type, context_ptr, @@ -835,6 +695,11 @@ generate_fragment(struct llvmpipe_context *lp, color_ptr); } +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -844,18 +709,17 @@ generate_fragment(struct llvmpipe_context *lp, #ifdef DEBUG if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { if (1) - LLVMDumpValue(function); + lp_debug_dump_value(function); abort(); } #endif /* Apply optimizations to LLVM IR */ - if (1) - LLVMRunFunctionPassManager(screen->pass, function); + LLVMRunFunctionPassManager(screen->pass, function); - if (LP_DEBUG & DEBUG_JIT) { + if (gallivm_debug & GALLIVM_DEBUG_IR) { /* Print the LLVM IR to stderr */ - LLVMDumpValue(function); + lp_debug_dump_value(function); debug_printf("\n"); } @@ -865,86 +729,163 @@ generate_fragment(struct llvmpipe_context *lp, { void *f = LLVMGetPointerToGlobal(screen->engine, function); - variant->jit_function[do_tri_test] = cast_voidptr_to_lp_jit_frag_func(f); + variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); - if (LP_DEBUG & DEBUG_ASM) + if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); + } + lp_func_delete_body(function); } } +static void +dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) +{ + unsigned i; + + debug_printf("fs variant %p:\n", (void *) key); + + if (key->flatshade) { + debug_printf("flatshade = 1\n"); + } + for (i = 0; i < key->nr_cbufs; ++i) { + debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i])); + } + if (key->depth.enabled) { + debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format)); + debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE)); + debug_printf("depth.writemask = %u\n", key->depth.writemask); + } + + for (i = 0; i < 2; ++i) { + if (key->stencil[i].enabled) { + debug_printf("stencil[%u].func = %s\n", i, util_dump_func(key->stencil[i].func, TRUE)); + debug_printf("stencil[%u].fail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].fail_op, TRUE)); + debug_printf("stencil[%u].zpass_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zpass_op, TRUE)); + debug_printf("stencil[%u].zfail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zfail_op, TRUE)); + debug_printf("stencil[%u].valuemask = 0x%x\n", i, key->stencil[i].valuemask); + debug_printf("stencil[%u].writemask = 0x%x\n", i, key->stencil[i].writemask); + } + } + + if (key->alpha.enabled) { + debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); + } + + if (key->occlusion_count) { + debug_printf("occlusion_count = 1\n"); + } + + if (key->blend.logicop_enable) { + debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE)); + } + else if (key->blend.rt[0].blend_enable) { + debug_printf("blend.rgb_func = %s\n", util_dump_blend_func (key->blend.rt[0].rgb_func, TRUE)); + debug_printf("blend.rgb_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE)); + debug_printf("blend.rgb_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE)); + debug_printf("blend.alpha_func = %s\n", util_dump_blend_func (key->blend.rt[0].alpha_func, TRUE)); + debug_printf("blend.alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE)); + debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); + } + debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); + for (i = 0; i < key->nr_samplers; ++i) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + util_format_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + util_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal); + debug_printf(" .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero); + debug_printf(" .apply_min_lod = %u\n", key->sampler[i].apply_min_lod); + debug_printf(" .apply_max_lod = %u\n", key->sampler[i].apply_max_lod); + } +} + + +void +lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant) +{ + debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n", + variant->shader->no, variant->no); + tgsi_dump(variant->shader->base.tokens, 0); + dump_fs_variant_key(&variant->key); + debug_printf("variant->opaque = %u\n", variant->opaque); + debug_printf("\n"); +} + static struct lp_fragment_shader_variant * generate_variant(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key) { struct lp_fragment_shader_variant *variant; + boolean fullcolormask; - if (LP_DEBUG & DEBUG_JIT) { - unsigned i; + variant = CALLOC_STRUCT(lp_fragment_shader_variant); + if(!variant) + return NULL; - tgsi_dump(shader->base.tokens, 0); - if(key->depth.enabled) { - debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format)); - debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE)); - debug_printf("depth.writemask = %u\n", key->depth.writemask); - } - if(key->alpha.enabled) { - debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); - debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); - } - if(key->blend.logicop_enable) { - debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); - } - else if(key->blend.rt[0].blend_enable) { - debug_printf("blend.rgb_func = %s\n", util_dump_blend_func (key->blend.rt[0].rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", util_dump_blend_func (key->blend.rt[0].alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); - } - debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); - for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if(key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - util_format_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - util_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) - debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - } + variant->shader = shader; + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + variant->no = shader->variants_created++; + + memcpy(&variant->key, key, shader->variant_key_size); + + /* + * Determine whether we are touching all channels in the color buffer. + */ + fullcolormask = FALSE; + if (key->nr_cbufs == 1) { + const struct util_format_description *format_desc; + format_desc = util_format_description(key->cbuf_format[0]); + if ((~key->blend.rt[0].colormask & + util_format_colormask(format_desc)) == 0) { + fullcolormask = TRUE; } } - variant = CALLOC_STRUCT(lp_fragment_shader_variant); - if(!variant) - return NULL; + variant->opaque = + !key->blend.logicop_enable && + !key->blend.rt[0].blend_enable && + fullcolormask && + !key->stencil[0].enabled && + !key->alpha.enabled && + !key->depth.enabled && + !shader->info.uses_kill + ? TRUE : FALSE; - memcpy(&variant->key, key, sizeof *key); - generate_fragment(lp, shader, variant, 0); - generate_fragment(lp, shader, variant, 1); + if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) { + lp_debug_fs_variant(variant); + } - /* insert new variant into linked list */ - variant->next = shader->variants; - shader->variants = variant; + generate_fragment(lp, shader, variant, RAST_EDGE_TEST); + + if (variant->opaque) { + /* Specialized shader, which doesn't need to read the color buffer. */ + generate_fragment(lp, shader, variant, RAST_WHOLE); + } else { + variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; + } return variant; } @@ -954,21 +895,50 @@ static void * llvmpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); struct lp_fragment_shader *shader; + int nr_samplers; shader = CALLOC_STRUCT(lp_fragment_shader); if (!shader) return NULL; + shader->no = fs_no++; + make_empty_list(&shader->variants); + /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &shader->info); /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); + shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ); + if (shader->draw_data == NULL) { + FREE((void *) shader->base.tokens); + FREE(shader); + return NULL; + } + + nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + + shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, + sampler[nr_samplers]); + if (LP_DEBUG & DEBUG_TGSI) { - debug_printf("llvmpipe: Create fragment shader %p:\n", (void *) shader); + unsigned attrib; + debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); tgsi_dump(templ->tokens, 0); + debug_printf("usage masks:\n"); + for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) { + unsigned usage_mask = shader->info.input_usage_mask[attrib]; + debug_printf(" IN[%u].%s%s%s%s\n", + attrib, + usage_mask & TGSI_WRITEMASK_X ? "x" : "", + usage_mask & TGSI_WRITEMASK_Y ? "y" : "", + usage_mask & TGSI_WRITEMASK_Z ? "z" : "", + usage_mask & TGSI_WRITEMASK_W ? "w" : ""); + } + debug_printf("\n"); } return shader; @@ -985,19 +955,47 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) draw_flush(llvmpipe->draw); + draw_bind_fragment_shader(llvmpipe->draw, + (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL)); + llvmpipe->fs = fs; llvmpipe->dirty |= LP_NEW_FS; } +static void +remove_shader_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader_variant *variant) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + unsigned i; + + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached #%u v total cached #%u\n", + variant->shader->no, variant->no, variant->shader->variants_created, + variant->shader->variants_cached, lp->nr_fs_variants); + } + for (i = 0; i < Elements(variant->function); i++) { + if (variant->function[i]) { + if (variant->jit_function[i]) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function[i]); + LLVMDeleteFunction(variant->function[i]); + } + } + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + lp->nr_fs_variants--; + FREE(variant); +} static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); struct lp_fragment_shader *shader = fs; - struct lp_fragment_shader_variant *variant; + struct lp_fs_variant_list_item *li; assert(fs != llvmpipe->fs); (void) llvmpipe; @@ -1005,29 +1003,21 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) /* * XXX: we need to flush the context until we have some sort of reference * counting in fragment shaders as they may still be binned + * Flushing alone might not sufficient we need to wait on it too. */ - draw_flush(llvmpipe->draw); - lp_setup_flush(llvmpipe->setup, 0); - - variant = shader->variants; - while(variant) { - struct lp_fragment_shader_variant *next = variant->next; - unsigned i; - - for (i = 0; i < Elements(variant->function); i++) { - if (variant->function[i]) { - if (variant->jit_function[i]) - LLVMFreeMachineCodeForFunction(screen->engine, - variant->function[i]); - LLVMDeleteFunction(variant->function[i]); - } - } - FREE(variant); + llvmpipe_finish(pipe, __FUNCTION__); - variant = next; + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct lp_fs_variant_list_item *next = next_elem(li); + remove_shader_variant(llvmpipe, li->base); + li = next; } + draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data); + + assert(shader->variants_cached == 0); FREE((void *) shader->base.tokens); FREE(shader); } @@ -1044,19 +1034,20 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, const void *data = constants ? llvmpipe_resource_data(constants) : NULL; assert(shader < PIPE_SHADER_TYPES); - assert(index == 0); + assert(index < PIPE_MAX_CONSTANT_BUFFERS); - if(llvmpipe->constants[shader] == constants) + if(llvmpipe->constants[shader][index] == constants) return; draw_flush(llvmpipe->draw); /* note: reference counting */ - pipe_resource_reference(&llvmpipe->constants[shader], constants); + pipe_resource_reference(&llvmpipe->constants[shader][index], constants); - if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, 0, - data, size); + if(shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY) { + draw_set_mapped_constant_buffer(llvmpipe->draw, shader, + index, data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; @@ -1064,6 +1055,25 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, /** + * Return the blend factor equivalent to a destination alpha of one. + */ +static INLINE unsigned +force_dst_alpha_one(unsigned factor) +{ + switch(factor) { + case PIPE_BLENDFACTOR_DST_ALPHA: + return PIPE_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return PIPE_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return PIPE_BLENDFACTOR_ZERO; + } + + return factor; +} + + +/** * We need to generate several variants of the fragment pipeline to match * all the combinations of the contributing state atoms. * @@ -1077,7 +1087,7 @@ make_variant_key(struct llvmpipe_context *lp, { unsigned i; - memset(key, 0, sizeof *key); + memset(key, 0, shader->variant_key_size); if (lp->framebuffer.zsbuf) { if (lp->depth_stencil->depth.enabled) { @@ -1096,7 +1106,9 @@ make_variant_key(struct llvmpipe_context *lp, /* alpha.ref_value is passed in jit_context */ key->flatshade = lp->rasterizer->flatshade; - key->scissor = lp->rasterizer->scissor; + if (lp->active_query_count) { + key->occlusion_count = TRUE; + } if (lp->framebuffer.nr_cbufs) { memcpy(&key->blend, lp->blend, sizeof key->blend); @@ -1104,31 +1116,59 @@ make_variant_key(struct llvmpipe_context *lp, key->nr_cbufs = lp->framebuffer.nr_cbufs; for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { + enum pipe_format format = lp->framebuffer.cbufs[i]->format; + struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i]; const struct util_format_description *format_desc; - unsigned chan; - format_desc = util_format_description(lp->framebuffer.cbufs[i]->format); + key->cbuf_format[i] = format; + + format_desc = util_format_description(format); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); - key->blend.rt[i].colormask = lp->blend->rt[i].colormask; + blend_rt->colormask = lp->blend->rt[i].colormask; - /* mask out color channels not present in the color buffer. - * Should be simple to incorporate per-cbuf writemasks: + /* + * Mask out color channels not present in the color buffer. */ - for(chan = 0; chan < 4; ++chan) { - enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + blend_rt->colormask &= util_format_colormask(format_desc); - if(swizzle > UTIL_FORMAT_SWIZZLE_W) - key->blend.rt[i].colormask &= ~(1 << chan); + /* + * Our swizzled render tiles always have an alpha channel, but the linear + * render target format often does not, so force here the dst alpha to be + * one. + * + * This is not a mere optimization. Wrong results will be produced if the + * dst alpha is used, the dst format does not have alpha, and the previous + * rendering was not flushed from the swizzled to linear buffer. For + * example, NonPowTwo DCT. + * + * TODO: This should be generalized to all channels for better + * performance, but only alpha causes correctness issues. + * + * Also, force rgb/alpha func/factors match, to make AoS blending easier. + */ + if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W) { + blend_rt->rgb_src_factor = force_dst_alpha_one(blend_rt->rgb_src_factor); + blend_rt->rgb_dst_factor = force_dst_alpha_one(blend_rt->rgb_dst_factor); + blend_rt->alpha_func = blend_rt->rgb_func; + blend_rt->alpha_src_factor = blend_rt->rgb_src_factor; + blend_rt->alpha_dst_factor = blend_rt->rgb_dst_factor; } } - for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) - lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); -} + /* This value will be the same for all the variants of a given shader: + */ + key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + for(i = 0; i < key->nr_samplers; ++i) { + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + lp_sampler_static_state(&key->sampler[i], + lp->fragment_sampler_views[i], + lp->sampler[i]); + } + } +} /** * Update fragment state. This is called just prior to drawing @@ -1139,22 +1179,42 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) { struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; - struct lp_fragment_shader_variant *variant; - boolean opaque; + struct lp_fragment_shader_variant *variant = NULL; + struct lp_fs_variant_list_item *li; make_variant_key(lp, shader, &key); - variant = shader->variants; - while(variant) { - if(memcmp(&variant->key, &key, sizeof key) == 0) + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) { + variant = li->base; break; - - variant = variant->next; + } + li = next_elem(li); } - if (!variant) { + if (variant) { + move_to_head(&lp->fs_variants_list, &variant->list_item_global); + } + else { int64_t t0, t1; int64_t dt; + unsigned i; + if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) { + struct pipe_context *pipe = &lp->pipe; + + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + * Flushing alone might not be sufficient we need to wait on it too. + */ + llvmpipe_finish(pipe, __FUNCTION__); + + for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) { + struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list); + remove_shader_variant(lp, item->base); + } + } t0 = os_time_get(); variant = generate_variant(lp, shader, &key); @@ -1163,24 +1223,16 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) dt = t1 - t0; LP_COUNT_ADD(llvm_compile_time, dt); LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ + + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&lp->fs_variants_list, &variant->list_item_global); + lp->nr_fs_variants++; + shader->variants_cached++; + } } - /* TODO: put this in the variant */ - /* TODO: most of these can be relaxed, in particular the colormask */ - opaque = !key.blend.logicop_enable && - !key.blend.rt[0].blend_enable && - key.blend.rt[0].colormask == 0xf && - !key.stencil[0].enabled && - !key.alpha.enabled && - !key.depth.enabled && - !key.scissor && - !shader->info.uses_kill - ? TRUE : FALSE; - - lp_setup_set_fs_functions(lp->setup, - variant->jit_function[RAST_WHOLE], - variant->jit_function[RAST_EDGE_TEST], - opaque); + lp_setup_set_fs_variant(lp->setup, variant); } |