diff options
author | Nicolai Hähnle <[email protected]> | 2009-10-03 19:30:48 +0200 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2009-10-03 19:30:48 +0200 |
commit | 6d25b9125ec1e66e0e255b0ee20fe18dfe1076fa (patch) | |
tree | c376951940eac2875567979a81e6a03a019942b7 /src/gallium/drivers/llvmpipe | |
parent | 81c7561d9d3faf70ac22c6a5f3fbea18f53eed92 (diff) | |
parent | 7d2699aedc084d9cb9c2bd2f8bdb5f038271ac1e (diff) |
Merge branch 'master' into r300-compiler
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_arit.c | 276 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_arit.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_conv.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_logic.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_context.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_jit.c | 23 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_screen.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_surface.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_test_format.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_test_main.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_texture.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_tile_cache.c | 43 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_tile_cache.h | 37 |
16 files changed, 302 insertions, 160 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 0b115fc9b07..d27ef0de041 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -48,11 +48,13 @@ #include "util/u_memory.h" #include "util/u_debug.h" #include "util/u_string.h" +#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_intr.h" #include "lp_bld_logic.h" +#include "lp_bld_debug.h" #include "lp_bld_arit.h" @@ -71,30 +73,28 @@ lp_build_min_simple(struct lp_build_context *bld, /* TODO: optimize the constant case */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating) { - if(type.width == 32) + if(type.width == 32 && util_cpu_caps.has_sse) intrinsic = "llvm.x86.sse.min.ps"; - if(type.width == 64) + if(type.width == 64 && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.min.pd"; } else { - if(type.width == 8 && !type.sign) + if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pminu.b"; - if(type.width == 8 && type.sign) + if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminsb"; - if(type.width == 16 && !type.sign) + if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminuw"; - if(type.width == 16 && type.sign) + if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmins.w"; - if(type.width == 32 && !type.sign) + if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminud"; - if(type.width == 32 && type.sign) + if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminsd"; } } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -119,30 +119,28 @@ lp_build_max_simple(struct lp_build_context *bld, /* TODO: optimize the constant case */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating) { - if(type.width == 32) + if(type.width == 32 && util_cpu_caps.has_sse) intrinsic = "llvm.x86.sse.max.ps"; - if(type.width == 64) + if(type.width == 64 && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.max.pd"; } else { - if(type.width == 8 && !type.sign) + if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmaxu.b"; - if(type.width == 8 && type.sign) + if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxsb"; - if(type.width == 16 && !type.sign) + if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxuw"; - if(type.width == 16 && type.sign) + if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmaxs.w"; - if(type.width == 32 && !type.sign) + if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxud"; - if(type.width == 32 && type.sign) + if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxsd"; } } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -204,15 +202,14 @@ lp_build_add(struct lp_build_context *bld, if(a == bld->one || b == bld->one) return bld->one; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128 && + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -257,15 +254,14 @@ lp_build_sub(struct lp_build_context *bld, if(b == bld->one) return bld->zero; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128 && + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -419,8 +415,7 @@ lp_build_mul(struct lp_build_context *bld, return bld->undef; if(!type.floating && !type.fixed && type.norm) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 8 && type.length == 16) { + if(util_cpu_caps.has_sse2 && type.width == 8 && type.length == 16) { LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8); LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16); static LLVMValueRef ml = NULL; @@ -456,7 +451,6 @@ lp_build_mul(struct lp_build_context *bld, return ab; } -#endif /* FIXME */ assert(0); @@ -493,10 +487,8 @@ lp_build_div(struct lp_build_context *bld, if(LLVMIsConstant(a) && LLVMIsConstant(b)) return LLVMConstFDiv(a, b); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_mul(bld, a, lp_build_rcp(bld, b)); -#endif return LLVMBuildFDiv(bld->builder, a, b, ""); } @@ -606,8 +598,7 @@ lp_build_abs(struct lp_build_context *bld, return a; } -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width*type.length == 128) { + if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { switch(type.width) { case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); @@ -617,7 +608,6 @@ lp_build_abs(struct lp_build_context *bld, return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); } } -#endif return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, "")); } @@ -684,6 +674,8 @@ lp_build_round_sse41(struct lp_build_context *bld, assert(type.floating); assert(type.width*type.length == 128); + assert(lp_check_value(type, a)); + assert(util_cpu_caps.has_sse4_1); switch(type.width) { case 32: @@ -703,20 +695,45 @@ lp_build_round_sse41(struct lp_build_context *bld, LLVMValueRef -lp_build_round(struct lp_build_context *bld, +lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } +} -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); -#endif - /* FIXME */ - assert(0); - return bld->undef; +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_iround(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } } @@ -728,13 +745,15 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); -#endif - - /* FIXME */ - assert(0); - return bld->undef; + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_ifloor(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } } @@ -745,59 +764,143 @@ lp_build_ceil(struct lp_build_context *bld, const struct lp_type type = bld->type; assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_iceil(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } +} -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); -#endif - /* FIXME */ - assert(0); - return bld->undef; +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_itrunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + assert(lp_check_value(type, a)); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); } LLVMValueRef -lp_build_trunc(struct lp_build_context *bld, - LLVMValueRef a) +lp_build_iround(struct lp_build_context *bld, + LLVMValueRef a) { const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; assert(type.floating); + assert(lp_check_value(type, a)); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); -#endif + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); + } + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef half; + + /* get sign bit */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + + /* sign * 0.5 */ + half = lp_build_const_scalar(type, 0.5); + half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); + half = LLVMBuildOr(bld->builder, sign, half, ""); + half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); - /* FIXME */ - assert(0); - return bld->undef; + res = LLVMBuildAdd(bld->builder, a, half, ""); + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; } -/** - * Convert to integer, through whichever rounding method that's fastest, - * typically truncating to zero. - */ LLVMValueRef -lp_build_int(struct lp_build_context *bld, - LLVMValueRef a) +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; assert(type.floating); + assert(lp_check_value(type, a)); - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); + } + else { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef vec_type = lp_build_vec_type(type); + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef offset; + + /* sign = a < 0 ? ~0 : 0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + + /* offset = -0.99999(9)f */ + offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); + offset = LLVMConstBitCast(offset, int_vec_type); + + /* offset = a < 0 ? -0.99999(9)f : 0.0f */ + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); + + res = LLVMBuildAdd(bld->builder, a, offset, ""); + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; } LLVMValueRef -lp_build_ifloor(struct lp_build_context *bld, - LLVMValueRef a) +lp_build_iceil(struct lp_build_context *bld, + LLVMValueRef a) { - a = lp_build_floor(bld, a); - a = lp_build_int(bld, a); - return a; + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); + } + else { + assert(0); + res = bld->undef; + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; } @@ -837,11 +940,9 @@ lp_build_rcp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); - /* XXX: is this really necessary? */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) + /* FIXME: improve precision */ return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); -#endif return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } @@ -858,11 +959,8 @@ lp_build_rsqrt(struct lp_build_context *bld, assert(type.floating); - /* XXX: is this really necessary? */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); -#endif return lp_build_rcp(bld, lp_build_sqrt(bld, a)); } @@ -918,7 +1016,8 @@ lp_build_pow(struct lp_build_context *bld, { /* TODO: optimize the constant case */ if(LLVMIsConstant(x) && LLVMIsConstant(y)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y)); } @@ -972,7 +1071,8 @@ lp_build_polynomial(struct lp_build_context *bld, /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); for (i = num_coeffs; i--; ) { LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); @@ -1026,7 +1126,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); assert(type.floating && type.width == 32); @@ -1125,7 +1226,8 @@ lp_build_log2_approx(struct lp_build_context *bld, if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); assert(type.floating && type.width == 32); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index d68a97c4b87..095a8e1cabe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -126,11 +126,18 @@ lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef -lp_build_int(struct lp_build_context *bld, - LLVMValueRef a); +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); +LLVMValueRef +lp_build_iceil(struct lp_build_context *bld, + LLVMValueRef a); LLVMValueRef -lp_build_ifloor(struct lp_build_context *bld, +lp_build_iround(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index 186cac70f62..20c8710214b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -63,6 +63,7 @@ #include "util/u_debug.h" #include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -334,8 +335,7 @@ lp_build_pack2(LLVMBuilderRef builder, assert(!src_type.floating); assert(!dst_type.floating); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(src_type.width * src_type.length == 128) { + if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { /* All X86 non-interleaved pack instructions all take signed inputs and * saturate them, so saturate beforehand. */ if(!src_type.sign && !clamped) { @@ -349,7 +349,7 @@ lp_build_pack2(LLVMBuilderRef builder, switch(src_type.width) { case 32: - if(dst_type.sign) + if(dst_type.sign || !util_cpu_caps.has_sse4_1) res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); else /* PACKUSDW is the only instrinsic with a consistent signature */ @@ -372,7 +372,6 @@ lp_build_pack2(LLVMBuilderRef builder, res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); return res; } -#endif lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 21c665c4d4c..98ec1cb1b9d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -210,7 +210,4 @@ lp_build_depth_test(LLVMBuilderRef builder, dst = lp_build_select(&bld, z_bitmask, src, dst); LLVMBuildStore(builder, dst, dst_ptr); } - - /* FIXME */ - assert(!state->occlusion_count); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 6b6f8207697..db22a8028a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -33,6 +33,8 @@ */ +#include "util/u_cpu_detect.h" + #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_intr.h" @@ -65,7 +67,7 @@ lp_build_cmp(struct lp_build_context *bld, #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { - if(type.floating) { + if(type.floating && util_cpu_caps.has_sse) { LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -114,7 +116,7 @@ lp_build_cmp(struct lp_build_context *bld, res = LLVMBuildBitCast(bld->builder, res, int_vec_type, ""); return res; } - else { + else if(util_cpu_caps.has_sse2) { static const struct { unsigned swap:1; unsigned eq:1; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 8ca1be6f1be..1a47ca32d2d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -274,8 +274,8 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); - x0 = lp_build_int(&bld->coord_bld, s_ipart); - y0 = lp_build_int(&bld->coord_bld, t_ipart); + x0 = lp_build_itrunc(&bld->coord_bld, s_ipart); + y0 = lp_build_itrunc(&bld->coord_bld, t_ipart); x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index a4b2bd8c2ad..202cb8ef439 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -107,11 +107,16 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) if (llvmpipe->draw) draw_destroy( llvmpipe->draw ); - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]); + pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL); + } + pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]); + pipe_texture_reference(&llvmpipe->texture[i], NULL); + } for (i = 0; i < Elements(llvmpipe->constants); i++) { if (llvmpipe->constants[i].buffer) { diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index b4a22ff4a97..1126bf90b96 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -36,6 +36,7 @@ #include <llvm-c/Transforms/Scalar.h> #include "util/u_memory.h" +#include "util/u_cpu_detect.h" #include "lp_screen.h" #include "lp_bld_intr.h" #include "lp_jit.h" @@ -147,6 +148,19 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) { char *error = NULL; + util_cpu_detect(); + +#if 0 + /* For simulating less capable machines */ + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_sse4_1 = 0; +#endif + +#ifdef LLVM_NATIVE_ARCH + LLVMLinkInJIT(); + LLVMInitializeNativeTarget(); +#endif + screen->module = LLVMModuleCreateWithName("llvmpipe"); screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module); @@ -163,8 +177,15 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) LLVMAddTargetData(screen->target, screen->pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ + /* TODO: Add more passes */ LLVMAddConstantPropagationPass(screen->pass); - LLVMAddInstructionCombiningPass(screen->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(screen->pass); + } LLVMAddPromoteMemoryToRegisterPass(screen->pass); LLVMAddGVNPass(screen->pass); LLVMAddCFGSimplificationPass(screen->pass); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index ff7ef8658a9..05189274589 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -82,7 +82,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 13; /* max 4Kx4K */ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ + return 9; /* max 256x256x256 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; /* max 4Kx4K */ case PIPE_CAP_TGSI_CONT_SUPPORTED: diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 9faed5a0b18..b00be0cc32a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -401,7 +401,6 @@ generate_fragment(struct llvmpipe_context *lp, if(key->depth.enabled) { debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); - debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count); } if(key->alpha.enabled) { debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); @@ -582,6 +581,11 @@ generate_fragment(struct llvmpipe_context *lp, * Translate the LLVM IR into machine code. */ + if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function); + abort(); + } + LLVMRunFunctionPassManager(screen->pass, variant->function); #ifdef DEBUG @@ -589,11 +593,6 @@ generate_fragment(struct llvmpipe_context *lp, debug_printf("\n"); #endif - if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { - LLVMDumpValue(variant->function); - abort(); - } - variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 177a26b7b1f..2c29144c039 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -56,7 +56,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, lp_flush_tile_cache(lp->cbuf_cache[i]); /* assign new */ - lp->framebuffer.cbufs[i] = fb->cbufs[i]; + pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]); /* update cache */ lp_tile_cache_set_surface(lp->cbuf_cache[i], fb->cbufs[i]); @@ -81,7 +81,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } /* assign new */ - lp->framebuffer.zsbuf = fb->zsbuf; + pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf); /* Tell draw module how deep the Z/depth buffer is */ if (lp->framebuffer.zsbuf) { diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index d8455e56490..7d83f899e6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -264,6 +264,11 @@ int main(int argc, char **argv) unsigned i; int ret; +#ifdef LLVM_NATIVE_ARCH + LLVMLinkInJIT(); + LLVMInitializeNativeTarget(); +#endif + for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) if(!test_format(&test_cases[i])) ret = 1; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 4592dc0b2d0..f07fa256f16 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -365,6 +365,11 @@ int main(int argc, char **argv) n = atoi(argv[i]); } +#ifdef LLVM_NATIVE_ARCH + LLVMLinkInJIT(); + LLVMInitializeNativeTarget(); +#endif + if(fp) { /* Warm up the caches */ test_some(0, NULL, 100); diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 724d4378336..08f0950d475 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -353,17 +353,9 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, if(lpt->dt) { struct llvmpipe_winsys *winsys = screen->winsys; - unsigned flags = 0; - if (transfer->usage != PIPE_TRANSFER_READ) { - flags |= PIPE_BUFFER_USAGE_CPU_WRITE; - } - - if (transfer->usage != PIPE_TRANSFER_WRITE) { - flags |= PIPE_BUFFER_USAGE_CPU_READ; - } - - map = winsys->displaytarget_map(winsys, lpt->dt, flags); + map = winsys->displaytarget_map(winsys, lpt->dt, + pipe_transfer_buffer_flags(transfer)); if (map == NULL) return NULL; } @@ -373,7 +365,7 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) + if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. * In llvmpipe, that would mean flushing the texture cache. diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 2e576e6039d..0c06b659a1f 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -44,10 +44,53 @@ #include "lp_tile_cache.h" +#define MAX_WIDTH 4096 +#define MAX_HEIGHT 4096 + + +enum llvmpipe_tile_status +{ + LP_TILE_STATUS_UNDEFINED = 0, + LP_TILE_STATUS_CLEAR = 1, + LP_TILE_STATUS_DEFINED = 2 +}; + + +struct llvmpipe_cached_tile +{ + enum llvmpipe_tile_status status; + + /** color in SOA format */ + uint8_t *color; +}; + + +struct llvmpipe_tile_cache +{ + struct pipe_screen *screen; + struct pipe_surface *surface; /**< the surface we're caching */ + struct pipe_transfer *transfer; + void *transfer_map; + + struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; + + uint8_t clear_color[4]; /**< for color bufs */ + uint clear_val; /**< for z+stencil, or packed color clear value */ + + struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ +}; + + struct llvmpipe_tile_cache * lp_create_tile_cache( struct pipe_screen *screen ) { struct llvmpipe_tile_cache *tc; + int maxLevels, maxTexSize; + + /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ + maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + maxTexSize = 1 << (maxLevels - 1); + assert(MAX_WIDTH >= maxTexSize); tc = CALLOC_STRUCT( llvmpipe_tile_cache ); if(!tc) diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h index 6d8ba5ece7a..161bab37991 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.h @@ -33,42 +33,7 @@ #include "lp_tile_soa.h" -enum llvmpipe_tile_status -{ - LP_TILE_STATUS_UNDEFINED = 0, - LP_TILE_STATUS_CLEAR = 1, - LP_TILE_STATUS_DEFINED = 2 -}; - - -struct llvmpipe_cached_tile -{ - enum llvmpipe_tile_status status; - - /** color in SOA format */ - uint8_t *color; -}; - - -/** XXX move these */ -#define MAX_WIDTH 2048 -#define MAX_HEIGHT 2048 - - -struct llvmpipe_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; - - uint8_t clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ - - struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ -}; +struct llvmpipe_tile_cache; /* opaque */ extern struct llvmpipe_tile_cache * |