diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
37 files changed, 1028 insertions, 430 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 4ea367597e1..526e85c82e1 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -59,7 +59,7 @@ lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxil python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ LDFLAGS += $(LLVM_LDFLAGS) -LIBS += $(GL_LIB_DEPS) -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) +LIBS += -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) $(GL_LIB_DEPS) LD=g++ $(PROGS): lp_test_main.o libllvmpipe.a diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 3c3fd386b52..9c874acdedc 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -69,7 +69,15 @@ Requirements http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment variable to the extracted path. - The version of LLVM from SVN ("2.7svn") from mid-March 2010 seems pretty + For MSVC there are two set of binaries: llvm-x.x-msvc32mt.7z and + llvm-x.x-msvc32mtd.7z . + + You have to set the LLVM=/path/to/llvm-x.x-msvc32mtd env var when passing + debug=yes to scons, and LLVM=/path/to/llvm-x.x-msvc32mt when building with + debug=no. This is necessary as LLVM builds as static library so the chosen + MS CRT must match. + + The version of LLVM from SVN ("2.7svn") from mid-March 2010 is pretty stable and has some features not in version 2.6. - scons (optional) diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 2911cf2179a..c155558aa54 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -85,4 +85,4 @@ if env['platform'] != 'embedded': ) env.InstallProgram(target) - Export('llvmpipe') +Export('llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index ebbdb1a604c..5cecec3d7f9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -29,17 +29,6 @@ #define LP_BLD_BLEND_H -/** - * @file - * LLVM IR building helpers interfaces. - * - * We use LLVM-C bindings for now. They are not documented, but follow the C++ - * interfaces very closely, and appear to be complete enough for code - * genration. See - * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - * for a standalone example. - */ - #include "gallivm/lp_bld.h" #include "pipe/p_format.h" @@ -75,6 +64,7 @@ LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, struct lp_type type, + unsigned rt, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, @@ -85,6 +75,7 @@ void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, struct lp_type type, + unsigned rt, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef const_[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index 3fa5e51cac5..70d08e71f6e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -243,6 +243,9 @@ lp_build_blend_factor(struct lp_build_blend_aos_context *bld, } +/** + * Is (a OP b) == (b OP a)? + */ boolean lp_build_blend_func_commutative(unsigned func) { @@ -305,6 +308,7 @@ LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, struct lp_type type, + unsigned rt, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, @@ -314,11 +318,10 @@ lp_build_blend_aos(LLVMBuilderRef builder, LLVMValueRef src_term; LLVMValueRef dst_term; - /* FIXME */ - assert(blend->independent_blend_enable == 0); - assert(blend->rt[0].colormask == 0xf); + /* FIXME: color masking not implemented yet */ + assert(blend->rt[rt].colormask == 0xf); - if(!blend->rt[0].blend_enable) + if(!blend->rt[rt].blend_enable) return src; /* It makes no sense to blend unless values are normalized */ @@ -335,16 +338,16 @@ lp_build_blend_aos(LLVMBuilderRef builder, * combinations it is possible to reorder the operations and therefore saving * some instructions. */ - src_term = lp_build_blend_factor(&bld, src, blend->rt[0].rgb_src_factor, - blend->rt[0].alpha_src_factor, alpha_swizzle); - dst_term = lp_build_blend_factor(&bld, dst, blend->rt[0].rgb_dst_factor, - blend->rt[0].alpha_dst_factor, alpha_swizzle); + src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor, + blend->rt[rt].alpha_src_factor, alpha_swizzle); + dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor, + blend->rt[rt].alpha_dst_factor, alpha_swizzle); lp_build_name(src_term, "src_term"); lp_build_name(dst_term, "dst_term"); - if(blend->rt[0].rgb_func == blend->rt[0].alpha_func) { - return lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); + if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) { + return lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); } else { /* Seperate RGB / A functions */ @@ -352,8 +355,8 @@ lp_build_blend_aos(LLVMBuilderRef builder, LLVMValueRef rgb; LLVMValueRef alpha; - rgb = lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&bld.base, blend->rt[0].alpha_func, src_term, dst_term); + rgb = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); + alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term); return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index b7523eb9c13..b9c7a6ceed6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -33,8 +33,8 @@ * Blending in SoA is much faster than AoS, especially when separate rgb/alpha * factors/functions are used, since no channel masking/shuffling is necessary * and we can achieve the full throughput of the SIMD operations. Furthermore - * the fragment shader output is also in SoA, so it fits nicely with the rest of - * the fragment pipeline. + * the fragment shader output is also in SoA, so it fits nicely with the rest + * of the fragment pipeline. * * The drawback is that to be displayed the color buffer needs to be in AoS * layout, so we need to tile/untile the color buffer before/after rendering. @@ -77,7 +77,7 @@ /** - * We may the same values several times, so we keep them here to avoid + * We may use the same values several times, so we keep them here to avoid * recomputing them. Also reusing the values allows us to do simplifications * that LLVM optimization passes wouldn't normally be able to do. */ @@ -98,16 +98,22 @@ struct lp_build_blend_soa_context /** * We store all factors in a table in order to eliminate redundant * multiplications later. + * Indexes are: factor[src,dst][color,term][r,g,b,a] */ LLVMValueRef factor[2][2][4]; /** * Table with all terms. + * Indexes are: term[src,dst][r,g,b,a] */ LLVMValueRef term[2][4]; }; +/** + * Build a single SOA blend factor for a color channel. + * \param i the color channel in [0,3] + */ static LLVMValueRef lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, unsigned factor, unsigned i) @@ -191,6 +197,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, /** * Generate blend code in SOA mode. + * \param rt render target index (to index the blend / colormask state) * \param src src/fragment color * \param dst dst/framebuffer color * \param con constant blend color @@ -200,6 +207,7 @@ void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, struct lp_type type, + unsigned rt, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], @@ -208,6 +216,8 @@ lp_build_blend_soa(LLVMBuilderRef builder, struct lp_build_blend_soa_context bld; unsigned i, j, k; + assert(rt < PIPE_MAX_COLOR_BUFS); + /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); @@ -218,7 +228,8 @@ lp_build_blend_soa(LLVMBuilderRef builder, } for (i = 0; i < 4; ++i) { - if (blend->rt[0].colormask & (1 << i)) { + /* only compute blending for the color channels enabled for writing */ + if (blend->rt[rt].colormask & (1 << i)) { if (blend->logicop_enable) { if(!type.floating) { res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); @@ -226,10 +237,10 @@ lp_build_blend_soa(LLVMBuilderRef builder, else res[i] = dst[i]; } - else if (blend->rt[0].blend_enable) { - unsigned src_factor = i < 3 ? blend->rt[0].rgb_src_factor : blend->rt[0].alpha_src_factor; - unsigned dst_factor = i < 3 ? blend->rt[0].rgb_dst_factor : blend->rt[0].alpha_dst_factor; - unsigned func = i < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; + else if (blend->rt[rt].blend_enable) { + unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor; + unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor; + unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); /* It makes no sense to blend unless values are normalized */ @@ -269,9 +280,9 @@ lp_build_blend_soa(LLVMBuilderRef builder, /* XXX special case these combos to work around an apparent * bug in LLVM. * This hack disables the check for multiplication by zero - * in lp_bld_mul(). When we optimize away the multiplication, - * something goes wrong during code generation and we segfault - * at runtime. + * in lp_bld_mul(). When we optimize away the + * multiplication, something goes wrong during code + * generation and we segfault at runtime. */ LLVMValueRef zeroSave = bld.base.zero; bld.base.zero = NULL; @@ -287,7 +298,7 @@ lp_build_blend_soa(LLVMBuilderRef builder, /* See if this function has been previously applied */ for(j = 0; j < i; ++j) { - unsigned prev_func = j < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func; + unsigned prev_func = j < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); if((!func_reverse && diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 1b59a13c946..e05bbe5011a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -72,6 +72,7 @@ #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_swizzle.h" @@ -445,6 +446,42 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, } +/** + * Perform the occlusion test and increase the counter. + * Test the depth mask. Add the number of channel which has none zero mask + * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. + * The counter will add 4. + * + * \param type holds element type of the mask vector. + * \param maskvalue is the depth test mask. + * \param counter is a pointer of the uint32 counter. + */ +static void +lp_build_occlusion_count(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter) +{ + LLVMValueRef countmask = lp_build_const_int_vec(type, 1); + LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); + LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16); + LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti"); + LLVMValueRef maskarray[4] = { + LLVMConstInt(LLVMInt32Type(), 0, 0), + LLVMConstInt(LLVMInt32Type(), 4, 0), + LLVMConstInt(LLVMInt32Type(), 8, 0), + LLVMConstInt(LLVMInt32Type(), 12, 0), + }; + LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4); + LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev"); + LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle"); + LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle); + LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig"); + LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr"); + LLVMBuildStore(builder, incr, counter); +} + + /** * Generate code for performing depth and/or stencil tests. @@ -470,7 +507,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, - LLVMValueRef face) + LLVMValueRef face, + LLVMValueRef counter) { struct lp_build_context bld; struct lp_build_context sbld; @@ -682,4 +720,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); + + if (counter) + lp_build_occlusion_count(builder, type, mask->value, counter); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 27dd46b625d..e257a5bd7d0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -60,7 +60,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef stencil_refs[2], LLVMValueRef zs_src, LLVMValueRef zs_dst_ptr, - LLVMValueRef facing); + LLVMValueRef facing, + LLVMValueRef counter); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 4e597b24796..de7fe7a1796 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -82,11 +82,7 @@ struct llvmpipe_context { unsigned dirty; /**< Mask of LP_NEW_x flags */ - /* Counter for occlusion queries. Note this supports overlapping - * queries. - */ - uint64_t occlusion_count; - unsigned active_query_count; + int active_query_count; /** Mapped vertex buffers */ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index ee818143610..92fb2b3ee5b 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -39,16 +39,13 @@ st_print_current(void); #define DEBUG_PIPE 0x1 #define DEBUG_TGSI 0x2 #define DEBUG_TEX 0x4 -#define DEBUG_ASM 0x8 #define DEBUG_SETUP 0x10 #define DEBUG_RAST 0x20 #define DEBUG_QUERY 0x40 #define DEBUG_SCREEN 0x80 -#define DEBUG_JIT 0x100 #define DEBUG_SHOW_TILES 0x200 #define DEBUG_SHOW_SUBTILES 0x400 #define DEBUG_COUNTERS 0x800 -#define DEBUG_NO_LLVM_OPT 0x1000 #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 644b821957a..e1425435e19 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" +#include "util/u_string.h" #include "draw/draw_context.h" #include "lp_flush.h" #include "lp_context.h" @@ -71,25 +72,25 @@ llvmpipe_flush( struct pipe_context *pipe, } /* Enable to dump BMPs of the color/depth buffers each frame */ -#if 0 - if (flags & PIPE_FLUSH_FRAME) { - static unsigned frame_no = 1; - char filename[256]; - unsigned i; - - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { - util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); - debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]); - } + if (0) { + if (flags & PIPE_FLUSH_FRAME) { + static unsigned frame_no = 1; + char filename[256]; + unsigned i; + + for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]); + } - if (0) { - util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); - debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); - } + if (0) { + util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); + } - ++frame_no; + ++frame_no; + } } -#endif } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 30e206a2b42..23aa34ddec1 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -38,7 +38,7 @@ #include "util/u_memory.h" #include "util/u_cpu_detect.h" #include "gallivm/lp_bld_init.h" -#include "lp_debug.h" +#include "gallivm/lp_bld_debug.h" #include "lp_screen.h" #include "gallivm/lp_bld_intr.h" #include "lp_jit.h" @@ -151,9 +151,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->context_ptr_type = LLVMPointerType(context_type, 0); } -#ifdef DEBUG - LLVMDumpModule(screen->module); -#endif + if (gallivm_debug & GALLIVM_DEBUG_IR) { + LLVMDumpModule(screen->module); + } } @@ -181,7 +181,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) screen->pass = LLVMCreateFunctionPassManager(screen->provider); LLVMAddTargetData(screen->target, screen->pass); - if ((LP_DEBUG & DEBUG_NO_LLVM_OPT) == 0) { + if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ @@ -196,6 +196,11 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) LLVMAddInstructionCombiningPass(screen->pass); } LLVMAddGVNPass(screen->pass); + } else { + /* We need at least this pass to prevent the backends to fail in + * unexpected ways. + */ + LLVMAddPromoteMemoryToRegisterPass(screen->pass); } lp_jit_init_globals(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 5d0268c68c4..8dee0413019 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -146,13 +146,9 @@ enum { lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color") #define lp_jit_context_textures(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES, "textures") + lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CTX_TEXTURES, "textures") -/** Indexes into jit_function[] array */ -#define RAST_WHOLE 0 -#define RAST_EDGE_TEST 1 - typedef void (*lp_jit_frag_func)(const struct lp_jit_context *context, @@ -169,7 +165,38 @@ typedef void const int32_t c3, const int32_t *step1, const int32_t *step2, - const int32_t *step3); + const int32_t *step3, + uint32_t *counter); + + +/** cast wrapper to avoid compiler warnings */ +static INLINE lp_jit_frag_func +cast_voidptr_to_lp_jit_frag_func(void *v) +{ + union { + void *v; + lp_jit_frag_func f; + } u; + assert(sizeof(u.v) == sizeof(u.f)); + u.v = v; + return u.f; +} + + +/** cast wrapper */ +static INLINE void * +cast_lp_jit_frag_func_to_voidptr(lp_jit_frag_func f) +{ + union { + void *v; + lp_jit_frag_func f; + } u; + assert(sizeof(u.v) == sizeof(u.f)); + u.f = f; + return u.v; +} + + void diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 5554285425d..c23e9839063 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,15 +19,15 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ -/* Author: - * Keith Whitwell <[email protected]> +/* Authors: + * Keith Whitwell, Qicheng Christopher Li, Brian Paul */ #include "draw/draw_context.h" @@ -34,12 +35,10 @@ #include "util/u_memory.h" #include "lp_context.h" #include "lp_query.h" +#include "lp_rast.h" +#include "lp_rast_priv.h" #include "lp_state.h" - -struct llvmpipe_query { - uint64_t start; - uint64_t end; -}; +#include "lp_setup_context.h" static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p ) @@ -51,15 +50,46 @@ static struct pipe_query * llvmpipe_create_query(struct pipe_context *pipe, unsigned type) { + struct llvmpipe_query *pq; + assert(type == PIPE_QUERY_OCCLUSION_COUNTER); - return (struct pipe_query *)CALLOC_STRUCT( llvmpipe_query ); + + pq = CALLOC_STRUCT( llvmpipe_query ); + if (pq) { + pipe_mutex_init(pq->mutex); + } + + return (struct pipe_query *) pq; } static void llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) { - FREE(q); + struct llvmpipe_query *pq = llvmpipe_query(q); + pipe_mutex_destroy(pq->mutex); + FREE(pq); +} + + +static boolean +llvmpipe_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result ) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); + struct llvmpipe_query *pq = llvmpipe_query(q); + + if (!pq->done) { + lp_setup_flush(llvmpipe->setup, TRUE); + } + + if (pq->done) { + *result = pq->result; + } + + return pq->done; } @@ -67,9 +97,23 @@ static void llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); - struct llvmpipe_query *sq = llvmpipe_query(q); - - sq->start = llvmpipe->occlusion_count; + struct llvmpipe_query *pq = llvmpipe_query(q); + + /* Check if the query is already in the scene. If so, we need to + * flush the scene now. Real apps shouldn't re-use a query in a + * frame of rendering. + */ + if (pq->binned) { + struct pipe_fence_handle *fence; + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &fence); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + } + + lp_setup_begin_query(llvmpipe->setup, pq); + llvmpipe->active_query_count++; llvmpipe->dirty |= LP_NEW_QUERY; } @@ -79,26 +123,16 @@ static void llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); - struct llvmpipe_query *sq = llvmpipe_query(q); + struct llvmpipe_query *pq = llvmpipe_query(q); + lp_setup_end_query(llvmpipe->setup, pq); + + assert(llvmpipe->active_query_count); llvmpipe->active_query_count--; - sq->end = llvmpipe->occlusion_count; llvmpipe->dirty |= LP_NEW_QUERY; } -static boolean -llvmpipe_get_query_result(struct pipe_context *pipe, - struct pipe_query *q, - boolean wait, - uint64_t *result ) -{ - struct llvmpipe_query *sq = llvmpipe_query(q); - *result = sq->end - sq->start; - return TRUE; -} - - void llvmpipe_init_query_funcs(struct llvmpipe_context *llvmpipe ) { llvmpipe->pipe.create_query = llvmpipe_create_query; diff --git a/src/gallium/drivers/llvmpipe/lp_query.h b/src/gallium/drivers/llvmpipe/lp_query.h index fa9fcd87139..721c41cb5c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.h +++ b/src/gallium/drivers/llvmpipe/lp_query.h @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,14 +26,33 @@ * **************************************************************************/ -/* Author: - * Keith Whitwell +/* Authors: + * Keith Whitwell, Qicheng Christopher Li, Brian Paul */ #ifndef LP_QUERY_H #define LP_QUERY_H +#include <limits.h> +#include "os/os_thread.h" +#include "lp_limits.h" + + struct llvmpipe_context; + + +struct llvmpipe_query { + uint64_t count[LP_MAX_THREADS]; /**< a counter for each thread */ + uint64_t result; /**< total of all counters */ + + pipe_mutex mutex; + unsigned num_tiles, tile_count; + + boolean done; + boolean binned; /**< has this query been binned in the scene? */ +}; + + extern void llvmpipe_init_query_funcs(struct llvmpipe_context * ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a00a592f2fe..6bb868bf1a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -34,6 +34,7 @@ #include "lp_debug.h" #include "lp_fence.h" #include "lp_perf.h" +#include "lp_query.h" #include "lp_rast.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" @@ -442,7 +443,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, color, depth, INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL ); + NULL, NULL, NULL, &task->vis_counter); } } } @@ -502,7 +503,8 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, c1, c2, c3, inputs->step[0], inputs->step[1], - inputs->step[2]); + inputs->step[2], + &task->vis_counter); } @@ -602,6 +604,60 @@ lp_rast_fence(struct lp_rasterizer_task *task, } +/** + * Begin a new occlusion query. + * This is a bin command put in all bins. + * Called per thread. + */ +void +lp_rast_begin_query(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + /* Reset the the per-task counter */ + task->vis_counter = 0; +} + + +/** + * End the current occlusion query. + * This is a bin command put in all bins. + * Called per thread. + */ +void +lp_rast_end_query(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + struct llvmpipe_query *pq = arg.query_obj; + + pipe_mutex_lock(pq->mutex); + { + /* Accumulate the visible fragment counter from this tile in + * the query object. + */ + pq->count[task->thread_index] += task->vis_counter; + + /* check if this is the last tile in the scene */ + pq->tile_count++; + if (pq->tile_count == pq->num_tiles) { + uint i; + + /* sum the per-thread counters for the query */ + pq->result = 0; + for (i = 0; i < LP_MAX_THREADS; i++) { + pq->result += pq->count[i]; + } + + /* reset counters (in case this query is re-used in the scene) */ + memset(pq->count, 0, sizeof(pq->count)); + + pq->tile_count = 0; + pq->binned = FALSE; + pq->done = TRUE; + } + } + pipe_mutex_unlock(pq->mutex); +} + /** @@ -650,6 +706,8 @@ static struct { RAST(set_state), RAST(store_color), RAST(fence), + RAST(begin_query), + RAST(end_query), }; static void @@ -956,3 +1014,5 @@ lp_rast_get_num_threads( struct lp_rasterizer *rast ) { return rast->num_threads; } + + diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e2f6f926779..881f475189e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -157,6 +157,7 @@ union lp_rast_cmd_arg { uint8_t clear_color[4]; unsigned clear_zstencil; struct lp_fence *fence; + struct llvmpipe_query *query_obj; }; @@ -233,4 +234,11 @@ void lp_rast_store_color( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_begin_query(struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_end_query(struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 5884d12721e..efc013ff3f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -33,6 +33,7 @@ #include "gallivm/lp_bld_debug.h" #include "lp_rast.h" #include "lp_scene.h" +#include "lp_state.h" #include "lp_texture.h" #include "lp_tile_soa.h" #include "lp_limits.h" @@ -59,6 +60,9 @@ struct lp_rasterizer_task /** "my" index */ unsigned thread_index; + /* occlude counter for visiable pixels */ + uint32_t vis_counter; + pipe_semaphore work_ready; pipe_semaphore work_done; }; @@ -221,7 +225,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, color, depth, INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL ); + NULL, NULL, NULL, &task->vis_counter ); } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 887f2dbad91..59e4c18ad05 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -30,7 +30,6 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_simple_list.h" -#include "util/u_surface.h" #include "lp_scene.h" #include "lp_scene_queue.h" @@ -210,25 +209,31 @@ lp_scene_reset(struct lp_scene *scene ) -void +struct cmd_block * lp_bin_new_cmd_block( struct cmd_block_list *list ) { struct cmd_block *block = MALLOC_STRUCT(cmd_block); - list->tail->next = block; - list->tail = block; - block->next = NULL; - block->count = 0; + if (block) { + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->count = 0; + } + return block; } -void +struct data_block * lp_bin_new_data_block( struct data_block_list *list ) { struct data_block *block = MALLOC_STRUCT(data_block); - list->tail->next = block; - list->tail = block; - block->next = NULL; - block->used = 0; + if (block) { + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->used = 0; + } + return block; } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 9467cd6f16d..22d619fdbc3 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -158,9 +158,9 @@ boolean lp_scene_is_empty(struct lp_scene *scene ); void lp_scene_reset(struct lp_scene *scene ); -void lp_bin_new_data_block( struct data_block_list *list ); +struct data_block *lp_bin_new_data_block( struct data_block_list *list ); -void lp_bin_new_cmd_block( struct cmd_block_list *list ); +struct cmd_block *lp_bin_new_cmd_block( struct cmd_block_list *list ); unsigned lp_scene_data_size( const struct lp_scene *scene ); @@ -181,15 +181,19 @@ static INLINE void * lp_scene_alloc( struct lp_scene *scene, unsigned size) { struct data_block_list *list = &scene->data; - - if (list->tail->used + size > DATA_BLOCK_SIZE) { - lp_bin_new_data_block( list ); + struct data_block *tail = list->tail; + + if (tail->used + size > DATA_BLOCK_SIZE) { + tail = lp_bin_new_data_block( list ); + if (!tail) { + /* out of memory */ + return NULL; + } } scene->scene_size += size; { - struct data_block *tail = list->tail; ubyte *data = tail->data + tail->used; tail->used += size; return data; @@ -205,15 +209,17 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, unsigned alignment ) { struct data_block_list *list = &scene->data; + struct data_block *tail = list->tail; - if (list->tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { - lp_bin_new_data_block( list ); + if (tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + tail = lp_bin_new_data_block( list ); + if (!tail) + return NULL; } scene->scene_size += size; { - struct data_block *tail = list->tail; ubyte *data = tail->data + tail->used; unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; tail->used += offset + size; @@ -257,16 +263,21 @@ lp_scene_bin_command( struct lp_scene *scene, { struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); struct cmd_block_list *list = &bin->commands; + struct cmd_block *tail = list->tail; assert(x < scene->tiles_x); assert(y < scene->tiles_y); - if (list->tail->count == CMD_BLOCK_MAX) { - lp_bin_new_cmd_block( list ); + if (tail->count == CMD_BLOCK_MAX) { + tail = lp_bin_new_cmd_block( list ); + if (!tail) { + /* out of memory - simply ignore this command (for now) */ + return; + } + assert(tail->count == 0); } { - struct cmd_block *tail = list->tail; unsigned i = tail->count; tail->cmd[i] = cmd; tail->arg[i] = arg; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 5f50446f846..cedc08e9292 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_screen.h" +#include "gallivm/lp_bld_limits.h" #include "lp_texture.h" #include "lp_fence.h" #include "lp_jit.h" @@ -52,16 +53,13 @@ static const struct debug_named_value lp_debug_flags[] = { { "pipe", DEBUG_PIPE }, { "tgsi", DEBUG_TGSI }, { "tex", DEBUG_TEX }, - { "asm", DEBUG_ASM }, { "setup", DEBUG_SETUP }, { "rast", DEBUG_RAST }, { "query", DEBUG_QUERY }, { "screen", DEBUG_SCREEN }, - { "jit", DEBUG_JIT }, { "show_tiles", DEBUG_SHOW_TILES }, { "show_subtiles", DEBUG_SHOW_SUBTILES }, { "counters", DEBUG_COUNTERS }, - { "nopt", DEBUG_NO_LLVM_OPT }, {NULL, 0} }; #endif @@ -82,7 +80,7 @@ llvmpipe_get_name(struct pipe_screen *screen) static int -llvmpipe_get_param(struct pipe_screen *screen, int param) +llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) { switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: @@ -107,6 +105,8 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) return PIPE_MAX_COLOR_BUFS; case PIPE_CAP_OCCLUSION_QUERY: return 1; + case PIPE_CAP_TIMER_QUERY: + return 0; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: return 1; case PIPE_CAP_TEXTURE_MIRROR_REPEAT: @@ -124,7 +124,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case PIPE_CAP_INDEP_BLEND_ENABLE: - return 0; + return 1; case PIPE_CAP_INDEP_BLEND_FUNC: return 0; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: @@ -133,14 +133,44 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 0; + case PIPE_CAP_MAX_VS_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS: + case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS: + /* There is no limit in number of instructions beyond available memory */ + return 32768; + case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH: + case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH: + return LP_MAX_TGSI_NESTING; + case PIPE_CAP_MAX_VS_INPUTS: + case PIPE_CAP_MAX_FS_INPUTS: + return PIPE_MAX_ATTRIBS; + case PIPE_CAP_MAX_FS_CONSTS: + case PIPE_CAP_MAX_VS_CONSTS: + /* There is no limit in number of constants beyond available memory */ + return 32768; + case PIPE_CAP_MAX_VS_TEMPS: + case PIPE_CAP_MAX_FS_TEMPS: + return LP_MAX_TGSI_TEMPS; + case PIPE_CAP_MAX_VS_ADDRS: + case PIPE_CAP_MAX_FS_ADDRS: + return LP_MAX_TGSI_ADDRS; + case PIPE_CAP_MAX_VS_PREDS: + case PIPE_CAP_MAX_FS_PREDS: + return LP_MAX_TGSI_PREDS; default: + assert(0); return 0; } } static float -llvmpipe_get_paramf(struct pipe_screen *screen, int param) +llvmpipe_get_paramf(struct pipe_screen *screen, enum pipe_cap param) { switch (param) { case PIPE_CAP_MAX_LINE_WIDTH: @@ -155,7 +185,13 @@ llvmpipe_get_paramf(struct pipe_screen *screen, int param) return 16.0; /* not actually signficant at this time */ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 16.0; /* arbitrary */ + case PIPE_CAP_GUARD_BAND_LEFT: + case PIPE_CAP_GUARD_BAND_TOP: + case PIPE_CAP_GUARD_BAND_RIGHT: + case PIPE_CAP_GUARD_BAND_BOTTOM: + return 0.0; default: + assert(0); return 0; } } @@ -270,7 +306,16 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen ) struct pipe_screen * llvmpipe_create_screen(struct sw_winsys *winsys) { - struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen); + struct llvmpipe_screen *screen; + +#ifdef PIPE_ARCH_X86 + /* require SSE2 due to LLVM PR6960. */ + util_cpu_detect(); + if (!util_cpu_caps.has_sse2) + return NULL; +#endif + + screen = CALLOC_STRUCT(llvmpipe_screen); #ifdef DEBUG LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index 4f394326103..eb40f6823f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -60,10 +60,9 @@ struct llvmpipe_screen unsigned num_threads; - /* Increments whenever textures are modified. Contexts can track - * this. + /* Increments whenever textures are modified. Contexts can track this. */ - unsigned timestamp; + unsigned timestamp; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 1a2cd55b164..656e6cc38a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -32,18 +32,20 @@ * lp_setup_flush(). */ +#include <limits.h> + #include "pipe/p_defines.h" #include "util/u_framebuffer.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_pack_color.h" -#include "util/u_surface.h" #include "lp_context.h" #include "lp_scene.h" #include "lp_scene_queue.h" #include "lp_texture.h" #include "lp_debug.h" #include "lp_fence.h" +#include "lp_query.h" #include "lp_rast.h" #include "lp_setup_context.h" #include "lp_screen.h" @@ -418,7 +420,8 @@ lp_setup_set_fs_functions( struct lp_setup_context *setup, lp_jit_frag_func jit_function1, boolean opaque ) { - LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function0); + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, + cast_lp_jit_frag_func_to_voidptr(jit_function0)); /* FIXME: reference count */ setup->fs.current.jit_function[0] = jit_function0; @@ -644,16 +647,19 @@ lp_setup_update_state( struct lp_setup_context *setup ) stored = lp_scene_alloc_aligned(scene, 4 * 16, 16); - /* smear each blend color component across 16 ubyte elements */ - for (i = 0; i < 4; ++i) { - uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); - for (j = 0; j < 16; ++j) - stored[i*16 + j] = c; - } + if (stored) { + /* smear each blend color component across 16 ubyte elements */ + for (i = 0; i < 4; ++i) { + uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); + for (j = 0; j < 16; ++j) + stored[i*16 + j] = c; + } - setup->blend_color.stored = stored; + setup->blend_color.stored = stored; + + setup->fs.current.jit_context.blend_color = setup->blend_color.stored; + } - setup->fs.current.jit_context.blend_color = setup->blend_color.stored; setup->dirty |= LP_SETUP_NEW_FS; } @@ -662,17 +668,19 @@ lp_setup_update_state( struct lp_setup_context *setup ) stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16); - stored[0] = (float) setup->scissor.current.minx; - stored[1] = (float) setup->scissor.current.miny; - stored[2] = (float) setup->scissor.current.maxx; - stored[3] = (float) setup->scissor.current.maxy; + if (stored) { + stored[0] = (float) setup->scissor.current.minx; + stored[1] = (float) setup->scissor.current.miny; + stored[2] = (float) setup->scissor.current.maxx; + stored[3] = (float) setup->scissor.current.maxy; - setup->scissor.stored = stored; + setup->scissor.stored = stored; - setup->fs.current.jit_context.scissor_xmin = stored[0]; - setup->fs.current.jit_context.scissor_ymin = stored[1]; - setup->fs.current.jit_context.scissor_xmax = stored[2]; - setup->fs.current.jit_context.scissor_ymax = stored[3]; + setup->fs.current.jit_context.scissor_xmin = stored[0]; + setup->fs.current.jit_context.scissor_ymin = stored[1]; + setup->fs.current.jit_context.scissor_xmax = stored[2]; + setup->fs.current.jit_context.scissor_ymax = stored[3]; + } setup->dirty |= LP_SETUP_NEW_FS; } @@ -852,3 +860,40 @@ fail: return NULL; } + +/** + * Put a BeginQuery command into all bins. + */ +void +lp_setup_begin_query(struct lp_setup_context *setup, + struct llvmpipe_query *pq) +{ + struct lp_scene * scene = lp_setup_get_current_scene(setup); + union lp_rast_cmd_arg cmd_arg; + + /* init the query to its beginning state */ + pq->done = FALSE; + pq->tile_count = 0; + pq->num_tiles = scene->tiles_x * scene->tiles_y; + assert(pq->num_tiles > 0); + + memset(pq->count, 0, sizeof(pq->count)); /* reset all counters */ + + cmd_arg.query_obj = pq; + lp_scene_bin_everywhere(scene, lp_rast_begin_query, cmd_arg); + pq->binned = TRUE; +} + + +/** + * Put an EndQuery command into all bins. + */ +void +lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) +{ + struct lp_scene * scene = lp_setup_get_current_scene(setup); + union lp_rast_cmd_arg cmd_arg; + + cmd_arg.query_obj = pq; + lp_scene_bin_everywhere(scene, lp_rast_end_query, cmd_arg); +} diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index e10d37d8d04..10db03b9c69 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -53,12 +53,15 @@ struct lp_shader_input { }; struct pipe_resource; +struct pipe_query; struct pipe_surface; struct pipe_blend_color; struct pipe_screen; struct pipe_framebuffer_state; struct lp_fragment_shader; struct lp_jit_context; +struct llvmpipe_query; + struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, @@ -140,5 +143,12 @@ void lp_setup_set_vertex_info( struct lp_setup_context *setup, struct vertex_info *info ); +void +lp_setup_begin_query(struct lp_setup_context *setup, + struct llvmpipe_query *pq); + +void +lp_setup_end_query(struct lp_setup_context *setup, + struct llvmpipe_query *pq); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index f8a58165733..306cb6e27d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -205,8 +205,14 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: - for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); + if (setup->flatshade_first) { + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); + } + else { + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); + } break; case LP_INTERP_LINEAR: @@ -665,14 +671,14 @@ void lp_setup_choose_triangle( struct lp_setup_context *setup ) { switch (setup->cullmode) { - case PIPE_WINDING_NONE: + case PIPE_FACE_NONE: setup->triangle = triangle_both; break; - case PIPE_WINDING_CCW: - setup->triangle = triangle_cw; + case PIPE_FACE_BACK: + setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw; break; - case PIPE_WINDING_CW: - setup->triangle = triangle_ccw; + case PIPE_FACE_FRONT: + setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw; break; default: setup->triangle = triangle_nop; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 5d3122e8ba2..f6a424f25a8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -131,11 +131,12 @@ static INLINE const_float4_ptr get_vert( const void *vertex_buffer, * draw elements / indexed primitives */ static void -lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) +lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr) { struct lp_setup_context *setup = lp_setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); const void *vertex_buffer = setup->vertex_buffer; + const boolean flatshade_first = setup->flatshade_first; unsigned i; lp_setup_update_state(setup); @@ -178,35 +179,28 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_TRIANGLES: - if (setup->flatshade_first) { - for (i = 2; i < nr; i += 3) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-2], stride) ); - } - } - else { - for (i = 2; i < nr; i += 3) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_TRIANGLE_STRIP: - if (setup->flatshade_first) { + if (flatshade_first) { for (i = 2; i < nr; i += 1) { + /* emit first triangle vertex as first triangle vertex */ setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i+(i&1)-1], stride), - get_vert(vertex_buffer, indices[i-(i&1)], stride), - get_vert(vertex_buffer, indices[i-2], stride) ); + get_vert(vertex_buffer, indices[i-(i&1)], stride) ); + } } else { for (i = 2; i < nr; i += 1) { + /* emit last triangle vertex as last triangle vertex */ setup->triangle( setup, get_vert(vertex_buffer, indices[i+(i&1)-2], stride), get_vert(vertex_buffer, indices[i-(i&1)-1], stride), @@ -216,16 +210,18 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_TRIANGLE_FAN: - if (setup->flatshade_first) { + if (flatshade_first) { for (i = 2; i < nr; i += 1) { + /* emit first non-spoke vertex as first vertex */ setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[0], stride), - get_vert(vertex_buffer, indices[i-1], stride) ); + get_vert(vertex_buffer, indices[0], stride) ); } } else { for (i = 2; i < nr; i += 1) { + /* emit last non-spoke vertex as last vertex */ setup->triangle( setup, get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -235,43 +231,88 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_QUADS: - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, + /* GL quads don't follow provoking vertex convention */ + if (flatshade_first) { + /* emit last quad vertex as first triangle vertex */ + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride) ); + } + } + else { + /* emit last quad vertex as last triangle vertex */ + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } } break; case PIPE_PRIM_QUAD_STRIP: - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); + /* GL quad strips don't follow provoking vertex convention */ + if (flatshade_first) { + /* emit last quad vertex as first triangle vertex */ + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + } + } + else { + /* emit last quad vertex as last triangle vertex */ + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } } break; case PIPE_PRIM_POLYGON: /* Almost same as tri fan but the _first_ vertex specifies the flat - * shading color. Note that the first polygon vertex is passed as - * the last triangle vertex here. - * flatshade_first state makes no difference. + * shading color. */ - for (i = 2; i < nr; i += 1) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[0], stride) ); + if (flatshade_first) { + /* emit first polygon vertex as first triangle vertex */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + else { + /* emit first polygon vertex as last triangle vertex */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } } break; @@ -292,6 +333,7 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) const unsigned stride = setup->vertex_info->size * sizeof(float); const void *vertex_buffer = (void *) get_vert(setup->vertex_buffer, start, stride); + const boolean flatshade_first = setup->flatshade_first; unsigned i; lp_setup_update_state(setup); @@ -334,35 +376,27 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_TRIANGLES: - if (setup->flatshade_first) { - for (i = 2; i < nr; i += 3) { - setup->triangle( setup, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-2, stride) ); - } - } - else { - for (i = 2; i < nr; i += 3) { - setup->triangle( setup, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; case PIPE_PRIM_TRIANGLE_STRIP: - if (setup->flatshade_first) { + if (flatshade_first) { for (i = 2; i < nr; i++) { + /* emit first triangle vertex as first triangle vertex */ setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i+(i&1)-1, stride), - get_vert(vertex_buffer, i-(i&1), stride), - get_vert(vertex_buffer, i-2, stride) ); + get_vert(vertex_buffer, i-(i&1), stride) ); } } else { for (i = 2; i < nr; i++) { + /* emit last triangle vertex as last triangle vertex */ setup->triangle( setup, get_vert(vertex_buffer, i+(i&1)-2, stride), get_vert(vertex_buffer, i-(i&1)-1, stride), @@ -372,16 +406,18 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_TRIANGLE_FAN: - if (setup->flatshade_first) { + if (flatshade_first) { for (i = 2; i < nr; i += 1) { + /* emit first non-spoke vertex as first vertex */ setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, 0, stride), - get_vert(vertex_buffer, i-1, stride) ); + get_vert(vertex_buffer, 0, stride) ); } } else { for (i = 2; i < nr; i += 1) { + /* emit last non-spoke vertex as last vertex */ setup->triangle( setup, get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride), @@ -391,42 +427,86 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_QUADS: - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); + /* GL quads don't follow provoking vertex convention */ + if (flatshade_first) { + /* emit last quad vertex as first triangle vertex */ + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride) ); + } + } + else { + /* emit last quad vertex as last triangle vertex */ + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } } break; case PIPE_PRIM_QUAD_STRIP: - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-0, stride) ); + /* GL quad strips don't follow provoking vertex convention */ + if (flatshade_first) { + /* emit last quad vertex as first triangle vertex */ + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride) ); + } + } + else { + /* emit last quad vertex as last triangle vertex */ + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride) ); + } } break; case PIPE_PRIM_POLYGON: /* Almost same as tri fan but the _first_ vertex specifies the flat - * shading color. Note that the first polygon vertex is passed as - * the last triangle vertex here. - * flatshade_first state makes no difference. + * shading color. */ - for (i = 2; i < nr; i += 1) { - setup->triangle( setup, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, 0, stride) ); + if (flatshade_first) { + /* emit first polygon vertex as first triangle vertex */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + else { + /* emit first polygon vertex as last triangle vertex */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, 0, stride) ); + } } break; @@ -463,7 +543,7 @@ lp_setup_init_vbuf(struct lp_setup_context *setup) setup->base.map_vertices = lp_setup_map_vertices; setup->base.unmap_vertices = lp_setup_unmap_vertices; setup->base.set_primitive = lp_setup_set_primitive; - setup->base.draw = lp_setup_draw; + setup->base.draw_elements = lp_setup_draw_elements; setup->base.draw_arrays = lp_setup_draw_arrays; setup->base.release_vertices = lp_setup_release_vertices; setup->base.destroy = lp_setup_vbuf_destroy; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 18143807c91..bae5de0cb35 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -63,6 +63,11 @@ struct llvmpipe_context; struct lp_fragment_shader; +/** Indexes into jit_function[] array */ +#define RAST_WHOLE 0 +#define RAST_EDGE_TEST 1 + + struct lp_fragment_shader_variant_key { struct pipe_depth_state depth; @@ -73,6 +78,7 @@ struct lp_fragment_shader_variant_key unsigned nr_cbufs:8; unsigned flatshade:1; unsigned scissor:1; + unsigned occlusion_count:1; struct { ubyte colormask; @@ -86,6 +92,8 @@ struct lp_fragment_shader_variant { struct lp_fragment_shader_variant_key key; + boolean opaque; + LLVMValueRef function[2]; lp_jit_frag_func jit_function[2]; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 113d77ab788..2edfcb28ce6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -163,7 +163,8 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_DEPTH_STENCIL_ALPHA | LP_NEW_RASTERIZER | LP_NEW_SAMPLER | - LP_NEW_SAMPLER_VIEW)) + LP_NEW_SAMPLER_VIEW | + LP_NEW_QUERY)) llvmpipe_update_fs( llvmpipe ); if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 5f861d6ca4d..9ef78e6badf 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -87,7 +87,6 @@ #include "lp_bld_depth.h" #include "lp_bld_interp.h" #include "lp_context.h" -#include "lp_debug.h" #include "lp_perf.h" #include "lp_screen.h" #include "lp_setup.h" @@ -148,7 +147,8 @@ generate_depth_stencil(LLVMBuilderRef builder, LLVMValueRef stencil_refs[2], LLVMValueRef src, LLVMValueRef dst_ptr, - LLVMValueRef facing) + LLVMValueRef facing, + LLVMValueRef counter) { const struct util_format_description *format_desc; struct lp_type dst_type; @@ -195,7 +195,8 @@ generate_depth_stencil(LLVMBuilderRef builder, stencil_refs, src, dst_ptr, - facing); + facing, + counter); } @@ -400,7 +401,8 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef c2, LLVMValueRef step0_ptr, LLVMValueRef step1_ptr, - LLVMValueRef step2_ptr) + LLVMValueRef step2_ptr, + LLVMValueRef counter) { const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef vec_type; @@ -466,12 +468,13 @@ generate_fs(struct llvmpipe_context *lp, if (early_depth_stencil_test) generate_depth_stencil(builder, key, type, &mask, - stencil_refs, z, depth_ptr, facing); + stencil_refs, z, depth_ptr, facing, counter); lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, outputs, sampler, &shader->info); + /* loop over fragment shader outputs/results */ for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(outputs[attrib][chan]) { @@ -513,7 +516,7 @@ generate_fs(struct llvmpipe_context *lp, if (!early_depth_stencil_test) generate_depth_stencil(builder, key, type, &mask, - stencil_refs, z, depth_ptr, facing); + stencil_refs, z, depth_ptr, facing, counter); lp_build_mask_end(&mask); @@ -528,9 +531,16 @@ generate_fs(struct llvmpipe_context *lp, /** * Generate color blending and color output. + * \param rt the render target index (to index blend, colormask state) + * \param type the pixel color type + * \param context_ptr pointer to the runtime JIT context + * \param mask execution mask (active fragment/pixel mask) + * \param src colors from the fragment shader + * \param dst_ptr the destination color buffer pointer */ static void generate_blend(const struct pipe_blend_state *blend, + unsigned rt, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, @@ -561,6 +571,7 @@ generate_blend(const struct pipe_blend_state *blend, const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); + /* load constant blend color and colors from the dest color buffer */ for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); @@ -571,10 +582,12 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } - lp_build_blend_soa(builder, blend, type, src, dst, con, res); + /* do blend */ + lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); + /* store results to color buffer */ for(chan = 0; chan < 4; ++chan) { - if(blend->rt[0].colormask & (1 << chan)) { + if(blend->rt[rt].colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); @@ -587,20 +600,6 @@ generate_blend(const struct pipe_blend_state *blend, } -/** casting function to avoid compiler warnings */ -static lp_jit_frag_func -cast_voidptr_to_lp_jit_frag_func(void *p) -{ - union { - void *v; - lp_jit_frag_func f; - } tmp; - assert(sizeof(tmp.v) == sizeof(tmp.f)); - tmp.v = p; - return tmp.f; -} - - /** * Generate the runtime callable function for the whole fragment pipeline. * Note that the function which we generate operates on a block of 16 @@ -620,7 +619,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMTypeRef fs_elem_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef arg_types[15]; + LLVMTypeRef arg_types[16]; LLVMTypeRef func_type; LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; @@ -631,7 +630,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; - LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; + LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr, counter = NULL; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; @@ -641,7 +640,6 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; - LLVMValueRef blend_in_color[NUM_CHANNELS]; LLVMValueRef function; LLVMValueRef facing; unsigned num_fs; @@ -696,6 +694,7 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ + arg_types[15] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); @@ -734,7 +733,7 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); - lp_build_name(color_ptr_ptr, "color_ptr"); + lp_build_name(color_ptr_ptr, "color_ptr_ptr"); lp_build_name(depth_ptr, "depth"); lp_build_name(c0, "c0"); lp_build_name(c1, "c1"); @@ -743,6 +742,11 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(step1_ptr, "step1"); lp_build_name(step2_ptr, "step2"); + if (key->occlusion_count) { + counter = LLVMGetParam(function, 15); + lp_build_name(counter, "counter"); + } + /* * Function body */ @@ -787,7 +791,7 @@ generate_fragment(struct llvmpipe_context *lp, facing, do_tri_test, c0, c1, c2, - step0_ptr, step1_ptr, step2_ptr); + step0_ptr, step1_ptr, step2_ptr, counter); for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) for(chan = 0; chan < NUM_CHANNELS; ++chan) @@ -801,6 +805,8 @@ generate_fragment(struct llvmpipe_context *lp, for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { LLVMValueRef color_ptr; LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); + LLVMValueRef blend_in_color[NUM_CHANNELS]; + unsigned rt; /* * Convert the fs's output color and mask to fit to the blending type. @@ -821,10 +827,14 @@ generate_fragment(struct llvmpipe_context *lp, ""); lp_build_name(color_ptr, "color_ptr%d", cbuf); + /* which blend/colormask state to use */ + rt = key->blend.independent_blend_enable ? cbuf : 0; + /* * Blending. */ generate_blend(&key->blend, + rt, builder, blend_type, context_ptr, @@ -842,7 +852,7 @@ generate_fragment(struct llvmpipe_context *lp, #ifdef DEBUG if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { if (1) - LLVMDumpValue(function); + lp_debug_dump_value(function); abort(); } #endif @@ -851,9 +861,9 @@ generate_fragment(struct llvmpipe_context *lp, if (1) LLVMRunFunctionPassManager(screen->pass, function); - if (LP_DEBUG & DEBUG_JIT) { + if (gallivm_debug & GALLIVM_DEBUG_IR) { /* Print the LLVM IR to stderr */ - LLVMDumpValue(function); + lp_debug_dump_value(function); debug_printf("\n"); } @@ -865,12 +875,84 @@ generate_fragment(struct llvmpipe_context *lp, variant->jit_function[do_tri_test] = cast_voidptr_to_lp_jit_frag_func(f); - if (LP_DEBUG & DEBUG_ASM) + if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); + } } } +static void +dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) +{ + unsigned i; + + debug_printf("fs variant %p:\n", (void *) key); + + if (key->depth.enabled) { + debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format)); + debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE)); + debug_printf("depth.writemask = %u\n", key->depth.writemask); + } + + for (i = 0; i < 2; ++i) { + if (key->stencil[i].enabled) { + debug_printf("stencil[%u].func = %s\n", i, util_dump_func(key->stencil[i].func, TRUE)); + debug_printf("stencil[%u].fail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].fail_op, TRUE)); + debug_printf("stencil[%u].zpass_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zpass_op, TRUE)); + debug_printf("stencil[%u].zfail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zfail_op, TRUE)); + debug_printf("stencil[%u].valuemask = 0x%x\n", i, key->stencil[i].valuemask); + debug_printf("stencil[%u].writemask = 0x%x\n", i, key->stencil[i].writemask); + } + } + + if (key->alpha.enabled) { + debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); + debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); + } + + if (key->blend.logicop_enable) { + debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE)); + } + else if (key->blend.rt[0].blend_enable) { + debug_printf("blend.rgb_func = %s\n", util_dump_blend_func (key->blend.rt[0].rgb_func, TRUE)); + debug_printf("blend.rgb_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE)); + debug_printf("blend.rgb_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE)); + debug_printf("blend.alpha_func = %s\n", util_dump_blend_func (key->blend.rt[0].alpha_func, TRUE)); + debug_printf("blend.alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE)); + debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); + } + debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); + for (i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if (key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + util_format_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + util_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + } + } +} + + + static struct lp_fragment_shader_variant * generate_variant(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, @@ -878,57 +960,9 @@ generate_variant(struct llvmpipe_context *lp, { struct lp_fragment_shader_variant *variant; - if (LP_DEBUG & DEBUG_JIT) { - unsigned i; - + if (gallivm_debug & GALLIVM_DEBUG_IR) { tgsi_dump(shader->base.tokens, 0); - if(key->depth.enabled) { - debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format)); - debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE)); - debug_printf("depth.writemask = %u\n", key->depth.writemask); - } - if(key->alpha.enabled) { - debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); - debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); - } - if(key->blend.logicop_enable) { - debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); - } - else if(key->blend.rt[0].blend_enable) { - debug_printf("blend.rgb_func = %s\n", util_dump_blend_func (key->blend.rt[0].rgb_func, TRUE)); - debug_printf("rgb_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE)); - debug_printf("rgb_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE)); - debug_printf("alpha_func = %s\n", util_dump_blend_func (key->blend.rt[0].alpha_func, TRUE)); - debug_printf("alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE)); - debug_printf("alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); - } - debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); - for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if(key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - util_format_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - util_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) - debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - } - } + dump_fs_variant_key(key); } variant = CALLOC_STRUCT(lp_fragment_shader_variant); @@ -937,8 +971,20 @@ generate_variant(struct llvmpipe_context *lp, memcpy(&variant->key, key, sizeof *key); - generate_fragment(lp, shader, variant, 0); - generate_fragment(lp, shader, variant, 1); + generate_fragment(lp, shader, variant, RAST_WHOLE); + generate_fragment(lp, shader, variant, RAST_EDGE_TEST); + + /* TODO: most of these can be relaxed, in particular the colormask */ + variant->opaque = + !key->blend.logicop_enable && + !key->blend.rt[0].blend_enable && + key->blend.rt[0].colormask == 0xf && + !key->stencil[0].enabled && + !key->alpha.enabled && + !key->depth.enabled && + !key->scissor && + !shader->info.uses_kill + ? TRUE : FALSE; /* insert new variant into linked list */ variant->next = shader->variants; @@ -964,7 +1010,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); - if (LP_DEBUG & DEBUG_TGSI) { + if (gallivm_debug & GALLIVM_DEBUG_TGSI) { debug_printf("llvmpipe: Create fragment shader %p:\n", (void *) shader); tgsi_dump(templ->tokens, 0); } @@ -1123,6 +1169,9 @@ make_variant_key(struct llvmpipe_context *lp, key->flatshade = lp->rasterizer->flatshade; key->scissor = lp->rasterizer->scissor; + if (lp->active_query_count) { + key->occlusion_count = TRUE; + } if (lp->framebuffer.nr_cbufs) { memcpy(&key->blend, lp->blend, sizeof key->blend); @@ -1187,7 +1236,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; - boolean opaque; make_variant_key(lp, shader, &key); @@ -1212,22 +1260,10 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ } - /* TODO: put this in the variant */ - /* TODO: most of these can be relaxed, in particular the colormask */ - opaque = !key.blend.logicop_enable && - !key.blend.rt[0].blend_enable && - key.blend.rt[0].colormask == 0xf && - !key.stencil[0].enabled && - !key.alpha.enabled && - !key.depth.enabled && - !key.scissor && - !shader->info.uses_kill - ? TRUE : FALSE; - lp_setup_set_fs_functions(lp->setup, variant->jit_function[RAST_WHOLE], variant->jit_function[RAST_EDGE_TEST], - opaque); + variant->opaque); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 622eb47ff45..afd3e0b21c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -67,8 +67,8 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) */ if (llvmpipe->rasterizer) { lp_setup_set_triangle_state( llvmpipe->setup, - llvmpipe->rasterizer->cull_mode, - llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW, + llvmpipe->rasterizer->cull_face, + llvmpipe->rasterizer->front_ccw, llvmpipe->rasterizer->scissor, llvmpipe->rasterizer->gl_rasterization_rules); lp_setup_set_flatshade_first( llvmpipe->setup, diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index fae7bf3fcf2..072d699666b 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -52,6 +52,19 @@ enum vector_mode typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res); +/** cast wrapper */ +static blend_test_ptr_t +voidptr_to_blend_test_ptr_t(void *p) +{ + union { + void *v; + blend_test_ptr_t f; + } u; + u.v = p; + return u.f; +} + + void write_tsv_header(FILE *fp) @@ -163,6 +176,7 @@ add_blend_test(LLVMModuleRef module, LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; + const unsigned rt = 0; vec_type = lp_build_vec_type(type); @@ -188,7 +202,7 @@ add_blend_test(LLVMModuleRef module, dst = LLVMBuildLoad(builder, dst_ptr, "dst"); con = LLVMBuildLoad(builder, const_ptr, "const"); - res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3); + res = lp_build_blend_aos(builder, blend, type, rt, src, dst, con, 3); lp_build_name(res, "res"); @@ -212,7 +226,7 @@ add_blend_test(LLVMModuleRef module, lp_build_name(dst[i], "dst.%c", "rgba"[i]); } - lp_build_blend_soa(builder, blend, type, src, dst, con, res); + lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); for(i = 0; i < 4; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -481,6 +495,7 @@ test_one(unsigned verbose, int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned i, j; + void *code; if(verbose >= 1) dump_blend_type(stdout, blend, mode, type); @@ -522,10 +537,11 @@ test_one(unsigned verbose, if(verbose >= 2) LLVMDumpModule(module); - blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func); + code = LLVMGetPointerToGlobal(engine, func); + blend_test_ptr = voidptr_to_blend_test_ptr_t(code); if(verbose >= 2) - lp_disassemble(blend_test_ptr); + lp_disassemble(code); success = TRUE; for(i = 0; i < n && success; ++i) { diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 958cc40538e..254f0daea3b 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -43,6 +43,17 @@ typedef void (*conv_test_ptr_t)(const void *src, const void *dst); +/** cast wrapper */ +static conv_test_ptr_t +voidptr_to_conv_test_ptr_t(void *p) +{ + union { + void *v; + conv_test_ptr_t f; + } u; + u.v = p; + return u.f; +} void write_tsv_header(FILE *fp) @@ -164,6 +175,7 @@ test_one(unsigned verbose, unsigned num_dsts; double eps; unsigned i, j; + void *code; if(verbose >= 1) dump_conv_types(stdout, src_type, dst_type); @@ -221,10 +233,11 @@ test_one(unsigned verbose, if(verbose >= 2) LLVMDumpModule(module); - conv_test_ptr = (conv_test_ptr_t)LLVMGetPointerToGlobal(engine, func); + code = LLVMGetPointerToGlobal(engine, func); + conv_test_ptr = voidptr_to_conv_test_ptr_t(code); if(verbose >= 2) - lp_disassemble(conv_test_ptr); + lp_disassemble(code); success = TRUE; for(i = 0; i < n && success; ++i) { @@ -384,7 +397,7 @@ test_all(unsigned verbose, FILE *fp) { const struct lp_type *src_type; const struct lp_type *dst_type; - bool success = TRUE; + boolean success = TRUE; for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) { for(dst_type = conv_types; dst_type < &conv_types[num_types]; ++dst_type) { @@ -411,7 +424,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) const struct lp_type *src_type; const struct lp_type *dst_type; unsigned long i; - bool success = TRUE; + boolean success = TRUE; for(i = 0; i < n; ++i) { src_type = &conv_types[rand() % num_types]; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index fbac815d107..267f1487bb8 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -73,6 +73,19 @@ typedef void (*fetch_ptr_t)(float *, const void *packed, unsigned i, unsigned j); +/** cast wrapper to avoid warnings */ +static fetch_ptr_t +void_to_fetch_ptr_t(void *p) +{ + union { + void *v; + fetch_ptr_t f; + } u; + u.v = p; + return u.f; +} + + static LLVMValueRef add_fetch_rgba_test(LLVMModuleRef lp_build_module, @@ -125,7 +138,7 @@ test_format(unsigned verbose, FILE *fp, fetch_ptr_t fetch_ptr; float unpacked[4]; boolean success; - unsigned i; + unsigned i, j, k; fetch = add_fetch_rgba_test(lp_build_module, desc); @@ -149,31 +162,39 @@ test_format(unsigned verbose, FILE *fp, (void)pass; #endif - fetch_ptr = (fetch_ptr_t) LLVMGetPointerToGlobal(lp_build_engine, fetch); + fetch_ptr = void_to_fetch_ptr_t(LLVMGetPointerToGlobal(lp_build_engine, fetch)); - memset(unpacked, 0, sizeof unpacked); + for (i = 0; i < desc->block.height; ++i) { + for (j = 0; j < desc->block.width; ++j) { - fetch_ptr(unpacked, test->packed, 0, 0); + memset(unpacked, 0, sizeof unpacked); - success = TRUE; - for(i = 0; i < 4; ++i) - if (fabs((float)test->unpacked[0][0][i] - unpacked[i]) > FLT_EPSILON) - success = FALSE; + fetch_ptr(unpacked, test->packed, j, i); - if (!success) { - printf("FAILED\n"); - printf(" Packed: %02x %02x %02x %02x\n", - test->packed[0], test->packed[1], test->packed[2], test->packed[3]); - printf(" Unpacked: %f %f %f %f obtained\n", - unpacked[0], unpacked[1], unpacked[2], unpacked[3]); - printf(" %f %f %f %f expected\n", - test->unpacked[0][0][0], - test->unpacked[0][0][1], - test->unpacked[0][0][2], - test->unpacked[0][0][3]); - LLVMDumpValue(fetch); + success = TRUE; + for(k = 0; k < 4; ++k) + if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON) + success = FALSE; + + if (!success) { + printf("FAILED\n"); + printf(" Packed: %02x %02x %02x %02x\n", + test->packed[0], test->packed[1], test->packed[2], test->packed[3]); + printf(" Unpacked (%u,%u): %f %f %f %f obtained\n", + j, i, + unpacked[0], unpacked[1], unpacked[2], unpacked[3]); + printf(" %f %f %f %f expected\n", + test->unpacked[i][j][0], + test->unpacked[i][j][1], + test->unpacked[i][j][2], + test->unpacked[i][j][3]); + } + } } + if (!success) + LLVMDumpValue(fetch); + LLVMFreeMachineCodeForFunction(lp_build_engine, fetch); LLVMDeleteFunction(fetch); @@ -193,20 +214,23 @@ test_one(unsigned verbose, FILE *fp, const struct util_format_description *format_desc) { unsigned i; - bool success = TRUE; - - printf("Testing %s ...\n", - format_desc->name); + boolean first = TRUE; + boolean success = TRUE; for (i = 0; i < util_format_nr_test_cases; ++i) { const struct util_format_test_case *test = &util_format_test_cases[i]; if (test->format == format_desc->format) { + if (first) { + printf("Testing %s ...\n", + format_desc->name); + first = FALSE; + } + if (!test_format(verbose, fp, format_desc, test)) { success = FALSE; } - } } @@ -218,7 +242,7 @@ boolean test_all(unsigned verbose, FILE *fp) { enum pipe_format format; - bool success = TRUE; + boolean success = TRUE; for (format = 1; format < PIPE_FORMAT_COUNT; ++format) { const struct util_format_description *format_desc; @@ -232,9 +256,7 @@ test_all(unsigned verbose, FILE *fp) * TODO: test more */ - if (format_desc->block.width != 1 || - format_desc->block.height != 1 || - format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { continue; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index e5e5925012a..13485c37748 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -41,6 +41,7 @@ struct printf_test_case { + int foo; }; void @@ -57,6 +58,19 @@ write_tsv_header(FILE *fp) typedef void (*test_printf_t)(int i); +/** cast wrapper */ +static test_printf_t +voidptr_to_test_printf_t(void *p) +{ + union { + void *v; + test_printf_t f; + } u; + u.v = p; + return u.f; +} + + static LLVMValueRef add_printf_test(LLVMModuleRef module) { @@ -91,6 +105,7 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) float unpacked[4]; unsigned packed; boolean success = TRUE; + void *code; module = LLVMModuleCreateWithName("test"); @@ -124,7 +139,8 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) (void)pass; #endif - test_printf = (test_printf_t)LLVMGetPointerToGlobal(engine, test); + code = LLVMGetPointerToGlobal(engine, test); + test_printf = voidptr_to_test_printf_t(code); memset(unpacked, 0, sizeof unpacked); packed = 0; @@ -147,7 +163,7 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) boolean test_all(unsigned verbose, FILE *fp) { - bool success = TRUE; + boolean success = TRUE; test_printf(verbose, fp, NULL); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index d3a9d39f616..65208dd5d5c 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -87,7 +87,7 @@ struct lp_llvm_sampler_soa * @sa http://llvm.org/docs/GetElementPtr.html */ static LLVMValueRef -lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, +lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base, LLVMBuilderRef builder, unsigned unit, unsigned member_index, @@ -135,7 +135,7 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, */ #define LP_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \ static LLVMValueRef \ - lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ + lp_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \ LLVMBuilderRef builder, \ unsigned unit) \ { \ @@ -164,7 +164,7 @@ lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) * The 'texel' parameter returns four vectors corresponding to R, G, B, A. */ static void -lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, +lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, LLVMBuilderRef builder, struct lp_type type, unsigned unit, diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 2f41d620c8a..4eed687ac71 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -39,6 +39,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_simple_list.h" #include "util/u_transfer.h" #include "lp_context.h" @@ -51,6 +52,11 @@ #include "state_tracker/sw_winsys.h" +#ifdef DEBUG +static struct llvmpipe_resource resource_list; +#endif + + static INLINE boolean resource_is_texture(const struct pipe_resource *resource) { @@ -107,32 +113,55 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS); for (level = 0; level <= pt->last_level; level++) { - const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE; - const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE; - unsigned nblocksx, num_slices; - - if (lpr->base.target == PIPE_TEXTURE_CUBE) - num_slices = 6; - else if (lpr->base.target == PIPE_TEXTURE_3D) - num_slices = depth; - else - num_slices = 1; - - /* Allocate storage for whole quads. This is particularly important - * for depth surfaces, which are currently stored in a swizzled format. - */ - nblocksx = util_format_get_nblocksx(pt->format, align(width, TILE_SIZE)); - lpr->row_stride[level] = - align(nblocksx * util_format_get_blocksize(pt->format), 16); + /* Row stride and image stride (for linear layout) */ + { + unsigned alignment, nblocksx, nblocksy, block_size; + + /* For non-compressed formats we need to align the texture size + * to the tile size to facilitate render-to-texture. + */ + if (util_format_is_compressed(pt->format)) + alignment = 1; + else + alignment = TILE_SIZE; + + nblocksx = util_format_get_nblocksx(pt->format, + align(width, alignment)); + nblocksy = util_format_get_nblocksy(pt->format, + align(height, alignment)); + block_size = util_format_get_blocksize(pt->format); - lpr->img_stride[level] = lpr->row_stride[level] * align(height, TILE_SIZE); + lpr->row_stride[level] = align(nblocksx * block_size, 16); - lpr->tiles_per_row[level] = width_t; - lpr->tiles_per_image[level] = width_t * height_t; - lpr->num_slices_faces[level] = num_slices; - lpr->layout[level] = alloc_layout_array(num_slices, width, height); + lpr->img_stride[level] = lpr->row_stride[level] * nblocksy; + } + /* Size of the image in tiles (for tiled layout) */ + { + const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE; + const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE; + lpr->tiles_per_row[level] = width_t; + lpr->tiles_per_image[level] = width_t * height_t; + } + + /* Number of 3D image slices or cube faces */ + { + unsigned num_slices; + + if (lpr->base.target == PIPE_TEXTURE_CUBE) + num_slices = 6; + else if (lpr->base.target == PIPE_TEXTURE_3D) + num_slices = depth; + else + num_slices = 1; + + lpr->num_slices_faces[level] = num_slices; + + lpr->layout[level] = alloc_layout_array(num_slices, width, height); + } + + /* Compute size of next mipmap level */ width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); @@ -222,6 +251,10 @@ llvmpipe_resource_create(struct pipe_screen *_screen, lpr->id = id_counter++; +#ifdef DEBUG + insert_at_tail(&resource_list, lpr); +#endif + return &lpr->base; fail: @@ -280,6 +313,11 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, align_free(lpr->data); } +#ifdef DEBUG + if (lpr->next) + remove_from_list(lpr); +#endif + FREE(lpr); } @@ -450,7 +488,7 @@ static struct pipe_surface * llvmpipe_get_tex_surface(struct pipe_screen *screen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, - enum lp_texture_usage usage) + unsigned usage) { struct pipe_surface *ps; @@ -698,11 +736,8 @@ tex_image_face_size(const struct llvmpipe_resource *lpr, unsigned level, return buffer_size; } else { - const enum pipe_format format = lpr->base.format; - const unsigned nblocksy = - util_format_get_nblocksy(format, align(height, TILE_SIZE)); - const unsigned buffer_size = nblocksy * lpr->row_stride[level]; - return buffer_size; + /* we already computed this */ + return lpr->img_stride[level]; } } @@ -1188,9 +1223,43 @@ llvmpipe_resource_size(const struct pipe_resource *resource) } +#ifdef DEBUG +void +llvmpipe_print_resources(void) +{ + struct llvmpipe_resource *lpr; + unsigned n = 0, total = 0; + + debug_printf("LLVMPIPE: current resources:\n"); + foreach(lpr, &resource_list) { + unsigned size = llvmpipe_resource_size(&lpr->base); + debug_printf("resource %u at %p, size %ux%ux%u: %u bytes, refcount %u\n", + lpr->id, (void *) lpr, + lpr->base.width0, lpr->base.height0, lpr->base.depth0, + size, lpr->base.reference.count); + total += size; + n++; + } + debug_printf("LLVMPIPE: total size of %u resources: %u\n", n, total); +} +#endif + + void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) { +#ifdef DEBUG + /* init linked list for tracking resources */ + { + static boolean first_call = TRUE; + if (first_call) { + memset(&resource_list, 0, sizeof(resource_list)); + make_empty_list(&resource_list); + first_call = FALSE; + } + } +#endif + screen->resource_create = llvmpipe_resource_create; screen->resource_destroy = llvmpipe_resource_destroy; screen->resource_from_handle = llvmpipe_resource_from_handle; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index a8d08d6247f..503b6a19a8d 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -119,6 +119,11 @@ struct llvmpipe_resource unsigned timestamp; unsigned id; /**< temporary, for debugging */ + +#ifdef DEBUG + /** for linked list */ + struct llvmpipe_resource *prev, *next; +#endif }; @@ -220,6 +225,10 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, extern void +llvmpipe_print_resources(void); + + +extern void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); extern void |