diff options
author | José Fonseca <[email protected]> | 2010-01-08 15:42:57 +0000 |
---|---|---|
committer | José Fonseca <[email protected]> | 2010-01-08 15:42:57 +0000 |
commit | 080c40ab32b2abd6d8381b4a0cc143d36a1652b2 (patch) | |
tree | e173767ebc5a82d81b9fc086449d915e29348976 /src/gallium/drivers/llvmpipe | |
parent | 9cdf6f025b2ed55cfb13dd09f870f01d0c7947d3 (diff) | |
parent | a1de400e8de06a80ab140bb0fa950e990607572d (diff) |
Merge remote branch 'origin/master' into lp-binning
Conflicts:
src/gallium/auxiliary/util/u_surface.c
src/gallium/drivers/llvmpipe/Makefile
src/gallium/drivers/llvmpipe/SConscript
src/gallium/drivers/llvmpipe/lp_bld_arit.c
src/gallium/drivers/llvmpipe/lp_bld_flow.c
src/gallium/drivers/llvmpipe/lp_bld_interp.c
src/gallium/drivers/llvmpipe/lp_clear.c
src/gallium/drivers/llvmpipe/lp_context.c
src/gallium/drivers/llvmpipe/lp_context.h
src/gallium/drivers/llvmpipe/lp_draw_arrays.c
src/gallium/drivers/llvmpipe/lp_jit.c
src/gallium/drivers/llvmpipe/lp_jit.h
src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
src/gallium/drivers/llvmpipe/lp_setup.c
src/gallium/drivers/llvmpipe/lp_setup_point.c
src/gallium/drivers/llvmpipe/lp_state.h
src/gallium/drivers/llvmpipe/lp_state_blend.c
src/gallium/drivers/llvmpipe/lp_state_derived.c
src/gallium/drivers/llvmpipe/lp_state_fs.c
src/gallium/drivers/llvmpipe/lp_state_sampler.c
src/gallium/drivers/llvmpipe/lp_state_surface.c
src/gallium/drivers/llvmpipe/lp_tex_cache.c
src/gallium/drivers/llvmpipe/lp_tex_cache.h
src/gallium/drivers/llvmpipe/lp_tex_sample.h
src/gallium/drivers/llvmpipe/lp_tile_cache.c
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
55 files changed, 2178 insertions, 987 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 6ec97046e15..264999a7cea 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -19,10 +19,13 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_query.c \ lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ + lp_bld_pack.c \ + lp_bld_sample.c \ lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ @@ -58,6 +61,9 @@ C_SOURCES = \ lp_texture.c \ lp_tile_soa.c +CPP_SOURCES = \ + lp_bld_misc.cpp + include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 89d08834a3c..0c3f00fd58f 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -51,21 +51,22 @@ Requirements - Linux - - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes - opcodes not yet supported by upstream. + - A x86 or amd64 processor. 64bit mode is preferred. - git clone git://people.freedesktop.org/~jrfonseca/udis86 - cd udis86 - ./configure --with-pic - make - sudo make install + Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will + yield the most efficient code. The less features the CPU has the more + likely is that you ran into underperforming, buggy, or incomplete code. + + See /proc/cpuinfo to know what your CPU supports. + + - LLVM 2.5 or greater. LLVM 2.6 is preferred. - - LLVM 2.5. On Debian based distributions do: + On Debian based distributions do: aptitude install llvm-dev - There is a typo in one of the llvm-dev 2.5 headers, that causes compilation - errors in the debug build: + There is a typo in one of the llvm 2.5 headers, that may cause compilation + errors. To fix it apply the change: --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 @@ -79,12 +80,17 @@ Requirements #endif return reinterpret_cast<T**>(Vals); - - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD - instructions. This is necessary because we emit several SSE intrinsics for - convenience. See /proc/cpuinfo to know what your CPU supports. - - - scons + - scons (optional) + - udis86, http://udis86.sourceforge.net/ (optional): + + git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86 + cd udis86 + ./autogen.sh + ./configure --with-pic + make + sudo make install + Building ======== diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index ae4303bd24f..5af77c4a12d 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'): env.Tool('udis86') +env.Append(CPPPATH = ['.']) + env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', @@ -30,10 +32,14 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_query.c', 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', 'lp_bld_logic.c', + 'lp_bld_misc.cpp', + 'lp_bld_pack.c', + 'lp_bld_sample.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_swizzle.c', @@ -75,21 +81,19 @@ llvmpipe = env.ConvenienceLibrary( env = env.Clone() -env.Prepend(LIBS = [llvmpipe] + auxiliaries) +env.Prepend(LIBS = [llvmpipe] + gallium) -env.Program( - target = 'lp_test_format', - source = ['lp_test_format.c'], -) +tests = [ + 'format', + 'blend', + 'conv', +] -env.Program( - target = 'lp_test_blend', - source = ['lp_test_blend.c', 'lp_test_main.c'], -) - -env.Program( - target = 'lp_test_conv', - source = ['lp_test_conv.c', 'lp_test_main.c'], -) +for test in tests: + target = env.Program( + target = 'lp_test_' + test, + source = ['lp_test_' + test + '.c', 'lp_test_main.c'], + ) + env.InstallProgram(target) Export('llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index f8260938f5a..2df86dd32e5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -47,6 +47,7 @@ #include "util/u_memory.h" #include "util/u_debug.h" +#include "util/u_math.h" #include "util/u_string.h" #include "util/u_cpu_detect.h" @@ -54,6 +55,7 @@ #include "lp_bld_const.h" #include "lp_bld_intr.h" #include "lp_bld_logic.h" +#include "lp_bld_pack.h" #include "lp_bld_debug.h" #include "lp_bld_arit.h" @@ -280,45 +282,6 @@ lp_build_sub(struct lp_build_context *bld, /** - * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. - */ -static LLVMValueRef -lp_build_unpack_shuffle(unsigned n, unsigned lo_hi) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; - - assert(n <= LP_MAX_VECTOR_LENGTH); - assert(lo_hi < 2); - - for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { - elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); - elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); - } - - return LLVMConstVector(elems, n); -} - - -/** - * Build constant int vector of width 'n' and value 'c'. - */ -static LLVMValueRef -lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(n <= LP_MAX_VECTOR_LENGTH); - - for(i = 0; i < n; ++i) - elems[i] = LLVMConstInt(type, c, 0); - - return LLVMConstVector(elems, n); -} - - -/** * Normalized 8bit multiplication. * * - alpha plus one @@ -361,33 +324,30 @@ lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c) */ static LLVMValueRef lp_build_mul_u8n(LLVMBuilderRef builder, + struct lp_type i16_type, LLVMValueRef a, LLVMValueRef b) { - static LLVMValueRef c01 = NULL; - static LLVMValueRef c08 = NULL; - static LLVMValueRef c80 = NULL; + LLVMValueRef c8; LLVMValueRef ab; - if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01); - if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08); - if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80); + c8 = lp_build_int_const_scalar(i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ - b = LLVMBuildAdd(builder, b, c01, ""); + b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else - /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */ + /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); - ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), ""); - ab = LLVMBuildAdd(builder, ab, c80, ""); + ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); + ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); #endif - ab = LLVMBuildLShr(builder, ab, c08, ""); + ab = LLVMBuildLShr(builder, ab, c8, ""); return ab; } @@ -402,6 +362,8 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef b) { const struct lp_type type = bld->type; + LLVMValueRef shift; + LLVMValueRef res; if(a == bld->zero) return bld->zero; @@ -415,39 +377,18 @@ lp_build_mul(struct lp_build_context *bld, return bld->undef; if(!type.floating && !type.fixed && type.norm) { - if(util_cpu_caps.has_sse2 && type.width == 8 && type.length == 16) { - LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8); - LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16); - static LLVMValueRef ml = NULL; - static LLVMValueRef mh = NULL; - LLVMValueRef al, ah, bl, bh; - LLVMValueRef abl, abh; - LLVMValueRef ab; - - if(!ml) ml = lp_build_unpack_shuffle(16, 0); - if(!mh) mh = lp_build_unpack_shuffle(16, 1); - - /* PUNPCKLBW, PUNPCKHBW */ - al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, ""); - bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, ""); - ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, ""); - bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, ""); + if(type.width == 8) { + struct lp_type i16_type = lp_wider_type(type); + LLVMValueRef al, ah, bl, bh, abl, abh, ab; - /* NOP */ - al = LLVMBuildBitCast(bld->builder, al, i16x8, ""); - bl = LLVMBuildBitCast(bld->builder, bl, i16x8, ""); - ah = LLVMBuildBitCast(bld->builder, ah, i16x8, ""); - bh = LLVMBuildBitCast(bld->builder, bh, i16x8, ""); + lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah); + lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh); /* PMULLW, PSRLW, PADDW */ - abl = lp_build_mul_u8n(bld->builder, al, bl); - abh = lp_build_mul_u8n(bld->builder, ah, bh); + abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl); + abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh); - /* PACKUSWB */ - ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh); - - /* NOP */ - ab = LLVMBuildBitCast(bld->builder, ab, i8x16, ""); + ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh); return ab; } @@ -456,10 +397,84 @@ lp_build_mul(struct lp_build_context *bld, assert(0); } - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstMul(a, b); + if(type.fixed) + shift = lp_build_int_const_scalar(type, type.width/2); + else + shift = NULL; + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) { + res = LLVMConstMul(a, b); + if(shift) { + if(type.sign) + res = LLVMConstAShr(res, shift); + else + res = LLVMConstLShr(res, shift); + } + } + else { + res = LLVMBuildMul(bld->builder, a, b, ""); + if(shift) { + if(type.sign) + res = LLVMBuildAShr(bld->builder, res, shift, ""); + else + res = LLVMBuildLShr(bld->builder, res, shift, ""); + } + } + + return res; +} + + +/** + * Small vector x scale multiplication optimization. + */ +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b) +{ + LLVMValueRef factor; + + if(b == 0) + return bld->zero; - return LLVMBuildMul(bld->builder, a, b, ""); + if(b == 1) + return a; + + if(b == -1) + return LLVMBuildNeg(bld->builder, a, ""); + + if(b == 2 && bld->type.floating) + return lp_build_add(bld, a, a); + + if(util_is_pot(b)) { + unsigned shift = ffs(b) - 1; + + if(bld->type.floating) { +#if 0 + /* + * Power of two multiplication by directly manipulating the mantissa. + * + * XXX: This might not be always faster, it will introduce a small error + * for multiplication by zero, and it will produce wrong results + * for Inf and NaN. + */ + unsigned mantissa = lp_mantissa(bld->type); + factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); + a = LLVMBuildAdd(bld->builder, a, factor, ""); + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); + return a; +#endif + } + else { + factor = lp_build_const_scalar(bld->type, shift); + return LLVMBuildShl(bld->builder, a, factor, ""); + } + } + + factor = lp_build_const_scalar(bld->type, (double)b); + return lp_build_mul(bld, a, factor); } @@ -494,13 +509,36 @@ lp_build_div(struct lp_build_context *bld, } +/** + * Linear interpolation. + * + * This also works for integer values with a few caveats. + * + * @sa http://www.stereopsis.com/doubleblend.html + */ LLVMValueRef lp_build_lerp(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, LLVMValueRef v1) { - return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); + LLVMValueRef delta; + LLVMValueRef res; + + delta = lp_build_sub(bld, v1, v0); + + res = lp_build_mul(bld, x, delta); + + res = lp_build_add(bld, v0, res); + + if(bld->type.fixed) + /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, + * but it will be wrong for other uses. Basically we need a more + * powerful lp_type, capable of further distinguishing the values + * interpretation from the value storage. */ + res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); + + return res; } @@ -1046,7 +1084,7 @@ lp_build_log(struct lp_build_context *bld, LLVMValueRef x) { /* log(2) */ - LLVMValueRef log2 = lp_build_const_scalar(bld->type, 1.4426950408889634); + LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529); return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); } @@ -1058,7 +1096,7 @@ lp_build_log(struct lp_build_context *bld, /** * Generate polynomial. - * Ex: x^2 * coeffs[0] + x * coeffs[1] + coeffs[2]. + * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2]. */ static LLVMValueRef lp_build_polynomial(struct lp_build_context *bld, @@ -1248,13 +1286,13 @@ lp_build_log2_approx(struct lp_build_context *bld, /* mant = (float) mantissa(x) */ mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); mant = LLVMBuildOr(bld->builder, mant, one, ""); - mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, ""); + mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), ""); + logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 095a8e1cabe..62be4b9aee1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -struct lp_type type; +struct lp_type; struct lp_build_context; @@ -67,6 +67,11 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b); + +LLVMValueRef lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index d14f468ba93..ced7b9c11d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, enum lp_build_blend_swizzle { LP_BUILD_BLEND_SWIZZLE_RGBA = 0, - LP_BUILD_BLEND_SWIZZLE_AAAA = 1, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1 }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h index ffb302f7366..cb8e1c7b006 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h @@ -42,7 +42,7 @@ #include <pipe/p_compiler.h> -struct lp_type type; +struct lp_type; unsigned diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index 1df938529c4..9fa88372022 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -69,6 +69,7 @@ #include "lp_bld_const.h" #include "lp_bld_intr.h" #include "lp_bld_arit.h" +#include "lp_bld_pack.h" #include "lp_bld_conv.h" @@ -203,241 +204,6 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, /** - * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. - */ -static LLVMValueRef -lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; - - assert(n <= LP_MAX_VECTOR_LENGTH); - assert(lo_hi < 2); - - /* TODO: cache results in a static table */ - - for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { - elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); - elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); - } - - return LLVMConstVector(elems, n); -} - - -/** - * Build shuffle vectors that match PACKxx instructions. - */ -static LLVMValueRef -lp_build_const_pack_shuffle(unsigned n) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(n <= LP_MAX_VECTOR_LENGTH); - - /* TODO: cache results in a static table */ - - for(i = 0; i < n; ++i) - elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); - - return LLVMConstVector(elems, n); -} - - -/** - * Expand the bit width. - * - * This will only change the number of bits the values are represented, not the - * values themselves. - */ -static void -lp_build_expand(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef src, - LLVMValueRef *dst, unsigned num_dsts) -{ - unsigned num_tmps; - unsigned i; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length == dst_type.length * num_dsts); - - num_tmps = 1; - dst[0] = src; - - while(src_type.width < dst_type.width) { - struct lp_type new_type = src_type; - LLVMTypeRef new_vec_type; - - new_type.width *= 2; - new_type.length /= 2; - new_vec_type = lp_build_vec_type(new_type); - - for(i = num_tmps; i--; ) { - LLVMValueRef zero; - LLVMValueRef shuffle_lo; - LLVMValueRef shuffle_hi; - LLVMValueRef lo; - LLVMValueRef hi; - - zero = lp_build_zero(src_type); - shuffle_lo = lp_build_const_unpack_shuffle(src_type.length, 0); - shuffle_hi = lp_build_const_unpack_shuffle(src_type.length, 1); - - /* PUNPCKLBW, PUNPCKHBW */ - lo = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_lo, ""); - hi = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_hi, ""); - - dst[2*i + 0] = LLVMBuildBitCast(builder, lo, new_vec_type, ""); - dst[2*i + 1] = LLVMBuildBitCast(builder, hi, new_vec_type, ""); - } - - src_type = new_type; - - num_tmps *= 2; - } - - assert(num_tmps == num_dsts); -} - - -/** - * Non-interleaved pack. - * - * This will move values as - * - * lo = __ l0 __ l1 __ l2 __.. __ ln - * hi = __ h0 __ h1 __ h2 __.. __ hn - * res = l0 l1 l2 .. ln h0 h1 h2 .. hn - * - * TODO: handle saturation consistently. - */ -static LLVMValueRef -lp_build_pack2(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - boolean clamped, - LLVMValueRef lo, - LLVMValueRef hi) -{ - LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); - LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); - LLVMValueRef shuffle; - LLVMValueRef res; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length * 2 == dst_type.length); - - assert(!src_type.floating); - assert(!dst_type.floating); - - if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { - /* All X86 non-interleaved pack instructions all take signed inputs and - * saturate them, so saturate beforehand. */ - if(!src_type.sign && !clamped) { - struct lp_build_context bld; - unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; - LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); - lp_build_context_init(&bld, builder, src_type); - lo = lp_build_min(&bld, lo, dst_max); - hi = lp_build_min(&bld, hi, dst_max); - } - - switch(src_type.width) { - case 32: - if(dst_type.sign || !util_cpu_caps.has_sse4_1) - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); - else - /* PACKUSDW is the only instrinsic with a consistent signature */ - return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); - break; - - case 16: - if(dst_type.sign) - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); - else - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); - break; - - default: - assert(0); - return LLVMGetUndef(dst_vec_type); - break; - } - - res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); - return res; - } - - lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); - hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); - - shuffle = lp_build_const_pack_shuffle(dst_type.length); - - res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, ""); - - return res; -} - - -/** - * Truncate the bit width. - * - * TODO: Handle saturation consistently. - */ -static LLVMValueRef -lp_build_pack(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs) -{ - LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length * num_srcs == dst_type.length); - - for(i = 0; i < num_srcs; ++i) - tmp[i] = src[i]; - - while(src_type.width > dst_type.width) { - struct lp_type new_type = src_type; - - new_type.width /= 2; - new_type.length *= 2; - - /* Take in consideration the sign changes only in the last step */ - if(new_type.width == dst_type.width) - new_type.sign = dst_type.sign; - - num_srcs /= 2; - - for(i = 0; i < num_srcs; ++i) - tmp[i] = lp_build_pack2(builder, src_type, new_type, clamped, - tmp[2*i + 0], tmp[2*i + 1]); - - src_type = new_type; - } - - assert(num_srcs == 1); - - return tmp[0]; -} - - -/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member @@ -576,7 +342,7 @@ lp_build_conv(LLVMBuilderRef builder, if(tmp_type.width < dst_type.width) { assert(num_tmps == 1); - lp_build_expand(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); + lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = num_dsts; @@ -696,7 +462,7 @@ lp_build_conv_mask(LLVMBuilderRef builder, } else if(src_type.width < dst_type.width) { assert(num_srcs == 1); - lp_build_expand(builder, src_type, dst_type, src[0], dst, num_dsts); + lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts); } else { assert(num_srcs == num_dsts); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h index ca378804d2a..948e68fae4f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -struct lp_type type; +struct lp_type; LLVMValueRef diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 59d8f492e60..39dfc51e503 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -77,10 +77,10 @@ lp_disassemble(const void* func) while (ud_disassemble(&ud_obj)) { #ifdef PIPE_ARCH_X86 - debug_printf("%08lx: ", (unsigned long)ud_insn_off(&ud_obj)); + debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); #endif #ifdef PIPE_ARCH_X86_64 - debug_printf("%016llx: ", (unsigned long long)ud_insn_off(&ud_obj)); + debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); #endif #if 0 @@ -115,9 +115,16 @@ lp_disassemble(const void* func) } } - if (ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) + if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) || + ud_obj.mnemonic == UD_Iinvalid) break; } + +#if 0 + /* Print GDB command, useful to verify udis86 output */ + debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc); +#endif + debug_printf("\n"); #else (void)func; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 98ec1cb1b9d..d438c0e63d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc, if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { type.floating = TRUE; - assert(swizzle = 0); + assert(swizzle == 0); assert(format_desc->channel[swizzle].size == format_desc->block.bits); } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index c087fc986ed..970bee379f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -42,62 +42,34 @@ struct util_format_description; struct lp_type; -/** - * Unpack a pixel into its RGBA components. - * - * @param packed integer. - * - * @return RGBA in a 4 floats vector. - */ -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +boolean +lp_format_is_rgba8(const struct util_format_description *desc); -/** - * Pack a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ -LLVMValueRef -lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled); -/** - * Load a pixel into its RGBA components. - * - * @param ptr value with the pointer to the packed pixel. Pointer type is - * irrelevant. - * - * @return RGBA in a 4 floats vector. - */ LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef packed); -/** - * Store a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed); + LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef rgba); void @@ -108,12 +80,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, LLVMValueRef *rgba); -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba); - #endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index b9b5d84bed5..10e82f120bb 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -25,18 +25,39 @@ * **************************************************************************/ +/** + * @file + * AoS pixel format manipulation. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "util/u_cpu_detect.h" #include "util/u_format.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" #include "lp_bld_format.h" +/** + * Unpack a single pixel into its RGBA components. + * + * @param packed integer. + * + * @return RGBA in a 4 floats vector. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef packed) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; @@ -49,8 +70,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i; - desc = util_format_description(format); - /* FIXME: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); @@ -111,7 +130,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); - // UIToFP can't be expressed in SSE2 + /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); if (normalized) @@ -151,12 +170,130 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, } +/** + * Take a vector with packed pixels and unpack into a rgba8 vector. + * + * Formats with bit depth smaller than 32bits are accepted, but they must be + * padded to 32bits. + */ +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed) +{ + struct lp_build_context bld; + bool rgba8; + LLVMValueRef res; + unsigned i; + + lp_build_context_init(&bld, builder, type); + + /* FIXME: Support more formats */ + assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->block.width == 1); + assert(desc->block.height == 1); + assert(desc->block.bits <= 32); + + assert(!type.floating); + assert(!type.fixed); + assert(type.norm); + assert(type.width == 8); + assert(type.length % 4 == 0); + + rgba8 = TRUE; + for(i = 0; i < 4; ++i) { + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_VOID); + if(desc->channel[0].size != 8) + rgba8 = FALSE; + } + + if(rgba8) { + /* + * The pixel is already in a rgba8 format variant. All it is necessary + * is to swizzle the channels. + */ + + unsigned char swizzles[4]; + boolean zeros[4]; /* bitwise AND mask */ + boolean ones[4]; /* bitwise OR mask */ + boolean swizzles_needed = FALSE; + boolean zeros_needed = FALSE; + boolean ones_needed = FALSE; + + for(i = 0; i < 4; ++i) { + enum util_format_swizzle swizzle = desc->swizzle[i]; + + /* Initialize with the no-op case */ + swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i; + zeros[i] = TRUE; + ones[i] = FALSE; + + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + if(swizzle != swizzles[i]) { + swizzles[i] = swizzle; + swizzles_needed = TRUE; + } + break; + case UTIL_FORMAT_SWIZZLE_0: + zeros[i] = FALSE; + zeros_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_1: + ones[i] = TRUE; + ones_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_NONE: + assert(0); + break; + } + } + + res = packed; + + if(swizzles_needed) + res = lp_build_swizzle1_aos(&bld, res, swizzles); + + if(zeros_needed) { + /* Mask out zero channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, zeros); + res = LLVMBuildAnd(builder, res, mask, ""); + } + + if(ones_needed) { + /* Or one channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, ones); + res = LLVMBuildOr(builder, res, mask, ""); + } + } + else { + /* FIXME */ + assert(0); + res = lp_build_undef(type); + } + + return res; +} + + +/** + * Pack a single pixel. + * + * @param rgba 4 float vector with the unpacked components. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef rgba) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; @@ -167,8 +304,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i, j; - desc = util_format_description(format); - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); assert(desc->block.height == 1); @@ -247,57 +382,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, return packed; } - - -LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - assert(desc->block.bits <= 32); - - type = LLVMIntType(desc->block.bits); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - packed = LLVMBuildLoad(builder, ptr, ""); - - return lp_build_unpack_rgba_aos(builder, format, packed); -} - - -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - - type = LLVMIntType(desc->block.bits); - - packed = lp_build_pack_rgba_aos(builder, format, rgba); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - LLVMBuildStore(builder, packed, ptr); -} - diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c new file mode 100644 index 00000000000..f3832d07ff9 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Utility functions to make assertions about formats. + * + * This module centralizes most of logic used when determining what algorithm + * is most suitable (i.e., most efficient yet correct) for a given format. + * + * It might be possible to move some of these functions to u_format module, + * but since tiny differences in the format my render it more/less + * appropriate to a given algorithm it is impossible to make any long term + * guarantee about the semantics of these functions. + * + * @author Jose Fonseca <[email protected]> + */ + + +#include "util/u_format.h" + +#include "lp_bld_format.h" + + +/** + * Whether this format is a 4 rgba8 variant + */ +boolean +lp_format_is_rgba8(const struct util_format_description *desc) +{ + unsigned chan; + + if(desc->block.width != 1 || + desc->block.height != 1 || + desc->block.bits != 32) + return FALSE; + + for(chan = 0; chan < 4; ++chan) { + if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) + return FALSE; + if(desc->channel[chan].size != 8) + return FALSE; + } + + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index 66bebdcdec8..64151d169da 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -34,66 +34,17 @@ #include "lp_bld_format.h" -/** - * Gather elements from scatter positions in memory into a single vector. - * - * @param src_width src element width - * @param dst_width result element width (source will be expanded to fit) - * @param length length of the offsets, - * @param base_ptr base pointer, should be a i8 pointer type. - * @param offsets vector with offsets - */ -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets) -{ - LLVMTypeRef src_type = LLVMIntType(src_width); - LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); - LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); - LLVMValueRef res; - unsigned i; - - res = LLVMGetUndef(dst_vec_type); - for(i = 0; i < length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef elem_offset; - LLVMValueRef elem_ptr; - LLVMValueRef elem; - - elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); - elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); - elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); - elem = LLVMBuildLoad(builder, elem_ptr, ""); - - assert(src_width <= dst_width); - if(src_width > dst_width) - elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); - if(src_width < dst_width) - elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); - - res = LLVMBuildInsertElement(builder, res, elem, index, ""); - } - - return res; -} - - static LLVMValueRef -lp_build_format_swizzle(struct lp_type type, - const LLVMValueRef *inputs, - enum util_format_swizzle swizzle) +lp_build_format_swizzle_chan_soa(struct lp_type type, + const LLVMValueRef *unswizzled, + enum util_format_swizzle swizzle) { switch (swizzle) { case UTIL_FORMAT_SWIZZLE_X: case UTIL_FORMAT_SWIZZLE_Y: case UTIL_FORMAT_SWIZZLE_Z: case UTIL_FORMAT_SWIZZLE_W: - return inputs[swizzle]; + return unswizzled[swizzle]; case UTIL_FORMAT_SWIZZLE_0: return lp_build_zero(type); case UTIL_FORMAT_SWIZZLE_1: @@ -108,6 +59,28 @@ lp_build_format_swizzle(struct lp_type type, void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled) +{ + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + swizzled[2] = swizzled[1] = swizzled[0] = depth; + swizzled[3] = lp_build_one(type); + } + else { + unsigned chan; + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + } + } +} + + +void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, struct lp_type type, @@ -172,38 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, start = stop; } - if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - enum util_format_swizzle swizzle = format_desc->swizzle[0]; - LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); - rgba[2] = rgba[1] = rgba[0] = depth; - rgba[3] = lp_build_one(type); - } - else { - for (chan = 0; chan < 4; ++chan) { - enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); - } - } -} - - -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba) -{ - LLVMValueRef packed; - - assert(format_desc->block.width == 1); - assert(format_desc->block.height == 1); - assert(format_desc->block.bits <= type.width); - - packed = lp_build_gather(builder, - type.length, format_desc->block.bits, type.width, - base_ptr, offsets); - - lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); + lp_build_format_swizzle_soa(format_desc, type, inputs, rgba); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index affeeca6ff9..daedf40d558 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -143,32 +143,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld, /** - * Small vector x scale multiplication optimization. - * - * TODO: Should be elsewhere. - */ -static LLVMValueRef -coeff_multiply(struct lp_build_interp_soa_context *bld, - LLVMValueRef coeff, - int step) -{ - LLVMValueRef factor; - - switch(step) { - case 0: - return bld->base.zero; - case 1: - return coeff; - case 2: - return lp_build_add(&bld->base, coeff, coeff); - default: - factor = lp_build_const_scalar(bld->base.type, (double)step); - return lp_build_mul(&bld->base, coeff, factor); - } -} - - -/** * Emit LLVM code to compute the fragment shader input attribute values. * For example, for a color input, we'll compute red, green, blue and alpha * values for the four pixels in a quad. @@ -378,8 +352,8 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, unsigned first, last, mask; unsigned attrib; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( attrib = first; attrib <= last; ++attrib ) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index d6876366561..40d64eb2c19 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -42,7 +42,7 @@ #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ -struct lp_type type; +struct lp_type; struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp new file mode 100644 index 00000000000..d3f78c06d92 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_config.h" + +#include "lp_bld_misc.h" + + +#ifndef LLVM_NATIVE_ARCH + +namespace llvm { + extern void LinkInJIT(); +} + + +void +LLVMLinkInJIT(void) +{ + llvm::LinkInJIT(); +} + + +extern "C" int X86TargetMachineModule; + + +int +LLVMInitializeNativeTarget(void) +{ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + X86TargetMachineModule = 1; +#endif + return 0; +} + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h new file mode 100644 index 00000000000..0e787e0b9cb --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_BLD_MISC_H +#define LP_BLD_MISC_H + + +#include "llvm/Config/config.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifndef LLVM_NATIVE_ARCH + +void +LLVMLinkInJIT(void); + +int +LLVMInitializeNativeTarget(void); + +#endif /* !LLVM_NATIVE_ARCH */ + + +#ifdef __cplusplus +} +#endif + + +#endif /* !LP_BLD_MISC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c new file mode 100644 index 00000000000..bc360ad77ad --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c @@ -0,0 +1,418 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helper functions for packing/unpacking. + * + * Pack/unpacking is necessary for conversion between types of different + * bit width. + * + * They are also commonly used when an computation needs higher + * precision for the intermediate values. For example, if one needs the + * function: + * + * c = compute(a, b); + * + * to use more precision for intermediate results then one should implement it + * as: + * + * LLVMValueRef + * compute(LLVMBuilderRef builder struct lp_type type, LLVMValueRef a, LLVMValueRef b) + * { + * struct lp_type wide_type = lp_wider_type(type); + * LLVMValueRef al, ah, bl, bh, cl, ch, c; + * + * lp_build_unpack2(builder, type, wide_type, a, &al, &ah); + * lp_build_unpack2(builder, type, wide_type, b, &bl, &bh); + * + * cl = compute_half(al, bl); + * ch = compute_half(ah, bh); + * + * c = lp_build_pack2(bld->builder, wide_type, type, cl, ch); + * + * return c; + * } + * + * where compute_half() would do the computation for half the elements with + * twice the precision. + * + * @author Jose Fonseca <[email protected]> + */ + + +#include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_intr.h" +#include "lp_bld_arit.h" +#include "lp_bld_pack.h" + + +/** + * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. + */ +static LLVMValueRef +lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i, j; + + assert(n <= LP_MAX_VECTOR_LENGTH); + assert(lo_hi < 2); + + /* TODO: cache results in a static table */ + + for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { + elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); + elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); + } + + return LLVMConstVector(elems, n); +} + + +/** + * Build shuffle vectors that match PACKxx instructions. + */ +static LLVMValueRef +lp_build_const_pack_shuffle(unsigned n) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(n <= LP_MAX_VECTOR_LENGTH); + + /* TODO: cache results in a static table */ + + for(i = 0; i < n; ++i) + elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); + + return LLVMConstVector(elems, n); +} + + +/** + * Interleave vector elements. + * + * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions. + */ +LLVMValueRef +lp_build_interleave2(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a, + LLVMValueRef b, + unsigned lo_hi) +{ + LLVMValueRef shuffle; + + shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi); + + return LLVMBuildShuffleVector(builder, a, b, shuffle, ""); +} + + +/** + * Double the bit width. + * + * This will only change the number of bits the values are represented, not the + * values themselves. + */ +void +lp_build_unpack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst_lo, + LLVMValueRef *dst_hi) +{ + LLVMValueRef msb; + LLVMTypeRef dst_vec_type; + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(dst_type.width == src_type.width * 2); + assert(dst_type.length * 2 == src_type.length); + + if(dst_type.sign && src_type.sign) { + /* Replicate the sign bit in the most significant bits */ + msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); + } + else + /* Most significant bits always zero */ + msb = lp_build_zero(src_type); + + /* Interleave bits */ + if(util_cpu_caps.little_endian) { + *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0); + *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1); + } + else { + *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0); + *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1); + } + + /* Cast the result into the new type (twice as wide) */ + + dst_vec_type = lp_build_vec_type(dst_type); + + *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, ""); + *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, ""); +} + + +/** + * Expand the bit width. + * + * This will only change the number of bits the values are represented, not the + * values themselves. + */ +void +lp_build_unpack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst, unsigned num_dsts) +{ + unsigned num_tmps; + unsigned i; + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length == dst_type.length * num_dsts); + + num_tmps = 1; + dst[0] = src; + + while(src_type.width < dst_type.width) { + struct lp_type tmp_type = src_type; + + tmp_type.width *= 2; + tmp_type.length /= 2; + + for(i = num_tmps; i--; ) { + lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]); + } + + src_type = tmp_type; + + num_tmps *= 2; + } + + assert(num_tmps == num_dsts); +} + + +/** + * Non-interleaved pack. + * + * This will move values as + * + * lo = __ l0 __ l1 __ l2 __.. __ ln + * hi = __ h0 __ h1 __ h2 __.. __ hn + * res = l0 l1 l2 .. ln h0 h1 h2 .. hn + * + * This will only change the number of bits the values are represented, not the + * values themselves. + * + * It is assumed the values are already clamped into the destination type range. + * Values outside that range will produce undefined results. Use + * lp_build_packs2 instead. + */ +LLVMValueRef +lp_build_pack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi) +{ + LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); + LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); + LLVMValueRef shuffle; + LLVMValueRef res; + + dst_vec_type = lp_build_vec_type(dst_type); + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(src_type.width == dst_type.width * 2); + assert(src_type.length * 2 == dst_type.length); + + if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { + switch(src_type.width) { + case 32: + if(dst_type.sign) { + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); + } + else { + if (util_cpu_caps.has_sse4_1) { + /* PACKUSDW is the only instrinsic with a consistent signature */ + return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); + } + else { + assert(0); + return LLVMGetUndef(dst_vec_type); + } + } + break; + + case 16: + if(dst_type.sign) + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); + else + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); + break; + + default: + assert(0); + return LLVMGetUndef(dst_vec_type); + break; + } + + res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); + return res; + } + + lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); + hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); + + shuffle = lp_build_const_pack_shuffle(dst_type.length); + + res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, ""); + + return res; +} + + + +/** + * Non-interleaved pack and saturate. + * + * Same as lp_build_pack2 but will saturate values so that they fit into the + * destination type. + */ +LLVMValueRef +lp_build_packs2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi) +{ + boolean clamp; + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(src_type.sign == dst_type.sign); + assert(src_type.width == dst_type.width * 2); + assert(src_type.length * 2 == dst_type.length); + + clamp = TRUE; + + /* All X86 SSE non-interleaved pack instructions take signed inputs and + * saturate them, so no need to clamp for those cases. */ + if(util_cpu_caps.has_sse2 && + src_type.width * src_type.length == 128 && + src_type.sign) + clamp = FALSE; + + if(clamp) { + struct lp_build_context bld; + unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; + LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); + lp_build_context_init(&bld, builder, src_type); + lo = lp_build_min(&bld, lo, dst_max); + hi = lp_build_min(&bld, hi, dst_max); + /* FIXME: What about lower bound? */ + } + + return lp_build_pack2(builder, src_type, dst_type, lo, hi); +} + + +/** + * Truncate the bit width. + * + * TODO: Handle saturation consistently. + */ +LLVMValueRef +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs) +{ + LLVMValueRef (*pack2)(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length); + + if(clamped) + pack2 = &lp_build_pack2; + else + pack2 = &lp_build_packs2; + + for(i = 0; i < num_srcs; ++i) + tmp[i] = src[i]; + + while(src_type.width > dst_type.width) { + struct lp_type tmp_type = src_type; + + tmp_type.width /= 2; + tmp_type.length *= 2; + + /* Take in consideration the sign changes only in the last step */ + if(tmp_type.width == dst_type.width) + tmp_type.sign = dst_type.sign; + + num_srcs /= 2; + + for(i = 0; i < num_srcs; ++i) + tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]); + + src_type = tmp_type; + } + + assert(num_srcs == 1); + + return tmp[0]; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.h b/src/gallium/drivers/llvmpipe/lp_bld_pack.h new file mode 100644 index 00000000000..fb2a34984a4 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for packing/unpacking conversions. + * + * @author Jose Fonseca <[email protected]> + */ + + +#ifndef LP_BLD_PACK_H +#define LP_BLD_PACK_H + + +#include <llvm-c/Core.h> + + +struct lp_type; + + +LLVMValueRef +lp_build_interleave2(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a, + LLVMValueRef b, + unsigned lo_hi); + + +void +lp_build_unpack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst_lo, + LLVMValueRef *dst_hi); + + +void +lp_build_unpack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst, unsigned num_dsts); + + +LLVMValueRef +lp_build_packs2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + + +LLVMValueRef +lp_build_pack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + + +LLVMValueRef +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs); + + +#endif /* !LP_BLD_PACK_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c new file mode 100644 index 00000000000..9003e108c1c --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling -- common code. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "lp_bld_debug.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_type.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width0); + state->pot_height = util_is_pot(texture->height0); + state->pot_depth = util_is_pot(texture->depth0); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + state->compare_mode = sampler->compare_mode; + if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +/** + * Compute the offset of a pixel. + * + * x, y, y_stride are vectors + */ +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); + + x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(bld, x, x_stride); + y_offset = lp_build_mul(bld, y, y_stride); + + offset = lp_build_add(bld, x_offset, y_offset); + } + + return offset; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 403d0e48367..8cb8210ca76 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -40,7 +40,9 @@ struct pipe_texture; struct pipe_sampler_state; +struct util_format_description; struct lp_type; +struct lp_build_context; /** @@ -119,6 +121,24 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr); + + void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 1a47ca32d2d..5ee8d556a68 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -27,7 +27,7 @@ /** * @file - * Texture sampling. + * Texture sampling -- SoA. * * @author Jose Fonseca <[email protected]> */ @@ -35,54 +35,23 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "util/u_debug.h" +#include "util/u_debug_dump.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" +#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_conv.h" #include "lp_bld_arit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" +#include "lp_bld_pack.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" -void -lp_sampler_static_state(struct lp_sampler_static_state *state, - const struct pipe_texture *texture, - const struct pipe_sampler_state *sampler) -{ - memset(state, 0, sizeof *state); - - if(!texture) - return; - - if(!sampler) - return; - - state->format = texture->format; - state->target = texture->target; - state->pot_width = util_is_pot(texture->width[0]); - state->pot_height = util_is_pot(texture->height[0]); - state->pot_depth = util_is_pot(texture->depth[0]); - - state->wrap_s = sampler->wrap_s; - state->wrap_t = sampler->wrap_t; - state->wrap_r = sampler->wrap_r; - state->min_img_filter = sampler->min_img_filter; - state->min_mip_filter = sampler->min_mip_filter; - state->mag_img_filter = sampler->mag_img_filter; - if(sampler->compare_mode) { - state->compare_mode = sampler->compare_mode; - state->compare_func = sampler->compare_func; - } - state->normalized_coords = sampler->normalized_coords; - state->prefilter = sampler->prefilter; -} - - - /** * Keep all information for sampling code generation in a single place. */ @@ -111,66 +80,61 @@ struct lp_build_sample_context static void -lp_build_sample_texel(struct lp_build_sample_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) +lp_build_sample_texel_soa(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) { - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef x_stride; LLVMValueRef offset; + LLVMValueRef packed; + + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); + + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + packed = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); + + lp_build_unpack_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + packed, texel); +} - x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); - - if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - LLVMValueRef x_lo, x_hi; - LLVMValueRef y_lo, y_hi; - LLVMValueRef x_stride_lo, x_stride_hi; - LLVMValueRef y_stride_lo, y_stride_hi; - LLVMValueRef x_offset_lo, x_offset_hi; - LLVMValueRef y_offset_lo, y_offset_hi; - LLVMValueRef offset_lo, offset_hi; - - x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); - y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); - - x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); - y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); - - x_stride_lo = x_stride; - y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); - - x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); - y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); - - x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); - y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); - offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); - - x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); - y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); - offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); - offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); - } - else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; +static LLVMValueRef +lp_build_sample_packed(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef offset; - x_offset = lp_build_mul(int_coord_bld, x, x_stride); - y_offset = lp_build_mul(int_coord_bld, y, y_stride); + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); - offset = lp_build_add(int_coord_bld, x_offset, y_offset); - } + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); - lp_build_load_rgba_soa(bld->builder, - bld->format_desc, - bld->texel_type, - data_ptr, - offset, - texel); + return lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); } @@ -208,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: /* FIXME */ - _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode); + _debug_printf("warning: failed to translate texture wrap mode %s\n", + debug_dump_tex_wrap(wrap_mode, TRUE)); coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); break; @@ -240,7 +205,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); + lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); } @@ -286,10 +251,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]); /* TODO: Don't interpolate missing channels */ for(chan = 0; chan < 4; ++chan) { @@ -304,6 +269,217 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, static void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); + unsigned chan; + + /* Decode the input vector components */ + for (chan = 0; chan < 4; ++chan) { + unsigned start = chan*8; + unsigned stop = start + 8; + LLVMValueRef input; + + input = packed; + + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); + + if(stop < 32) + input = LLVMBuildAnd(builder, input, mask, ""); + + input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + + rgba[chan] = input; + } +} + + +static void +lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMBuilderRef builder = bld->builder; + struct lp_build_context i32, h16, u8n; + LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; + LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; + LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2]; + LLVMValueRef neighbors_lo[2][2]; + LLVMValueRef neighbors_hi[2][2]; + LLVMValueRef packed, packed_lo, packed_hi; + LLVMValueRef unswizzled[4]; + + lp_build_context_init(&i32, builder, lp_type_int(32)); + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + + i32_vec_type = lp_build_vec_type(i32.type); + h16_vec_type = lp_build_vec_type(h16.type); + u8n_vec_type = lp_build_vec_type(u8n.type); + + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + + s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); + t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); + + i32_c128 = lp_build_int_const_scalar(i32.type, -128); + s = LLVMBuildAdd(builder, s, i32_c128, ""); + t = LLVMBuildAdd(builder, t, i32_c128, ""); + + i32_c8 = lp_build_int_const_scalar(i32.type, 8); + s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); + t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); + + i32_c255 = lp_build_int_const_scalar(i32.type, 255); + s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); + t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); + + x0 = s_ipart; + y0 = t_ipart; + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + /* + * Transform 4 x i32 in + * + * s_fpart = {s0, s1, s2, s3} + * + * into 8 x i16 + * + * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} + * + * into two 8 x i16 + * + * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} + * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} + * + * and likewise for t_fpart. There is no risk of loosing precision here + * since the fractional parts only use the lower 8bits. + */ + + s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); + t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); + + { + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffle_lo; + LLVMValueRef shuffle_hi; + unsigned i, j; + + for(j = 0; j < h16.type.length; j += 4) { + unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; + LLVMValueRef index; + + index = LLVMConstInt(elem_type, j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_lo[j + i] = index; + + index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_hi[j + i] = index; + } + + shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); + shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); + + s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); + t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); + s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); + t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); + } + + /* + * Fetch the pixels as 4 x 32bit (rgba order might differ): + * + * rgba0 rgba1 rgba2 rgba3 + * + * bit cast them into 16 x u8 + * + * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 + * + * unpack them into two 8 x i16: + * + * r0 g0 b0 a0 r1 g1 b1 a1 + * r2 g2 b2 a2 r3 g3 b3 a3 + * + * The higher 8 bits of the resulting elements will be zero. + */ + + neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); + neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); + neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); + neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); + + neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); + neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); + neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); + neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); + + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); + + /* + * Linear interpolate with 8.8 fixed point. + */ + + packed_lo = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[0][0], + neighbors_lo[0][1], + neighbors_lo[1][0], + neighbors_lo[1][1]); + + packed_hi = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[0][0], + neighbors_hi[0][1], + neighbors_hi[1][0], + neighbors_hi[1][1]); + + packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); + + /* + * Convert to SoA and swizzle. + */ + + packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); + + lp_build_rgba8_to_f32_soa(bld->builder, + bld->texel_type, + packed, unswizzled); + + lp_build_format_swizzle_soa(bld->format_desc, + bld->texel_type, unswizzled, + texel); +} + + +static void lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef p, LLVMValueRef *texel) @@ -312,7 +488,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef res; unsigned chan; - if(!bld->static_state->compare_mode) + if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) return; /* TODO: Compare before swizzling, to avoid redundant computations */ @@ -401,8 +577,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + if(lp_format_is_rgba8(bld.format_desc)) + lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); + else + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; default: assert(0); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index 1f6da804488..b9472127a63 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -struct lp_type type; +struct lp_type; struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 83ac25bb200..fb1eda4423b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -64,7 +64,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -157,19 +157,19 @@ emit_fetch( unsigned index, const unsigned chan_index ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index]; - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + const struct tgsi_full_src_register *reg = &inst->Src[index]; + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; switch (swizzle) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0); + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); res = lp_build_broadcast_scalar(&bld->base, scalar); @@ -177,17 +177,17 @@ emit_fetch( } case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->SrcRegister.Index][swizzle]; + res = bld->immediates[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_INPUT: - res = bld->inputs[reg->SrcRegister.Index][swizzle]; + res = bld->inputs[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_TEMPORARY: - res = bld->temps[reg->SrcRegister.Index][swizzle]; + res = bld->temps[reg->Register.Index][swizzle]; if(!res) return bld->base.undef; break; @@ -198,14 +198,6 @@ emit_fetch( } break; - case TGSI_EXTSWIZZLE_ZERO: - res = bld->base.zero; - break; - - case TGSI_EXTSWIZZLE_ONE: - res = bld->base.one; - break; - default: assert( 0 ); return bld->base.undef; @@ -275,7 +267,7 @@ emit_store( unsigned chan_index, LLVMValueRef value) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index]; + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -295,13 +287,13 @@ emit_store( assert(0); } - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - bld->outputs[reg->DstRegister.Index][chan_index] = value; + bld->outputs[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_TEMPORARY: - bld->temps[reg->DstRegister.Index][chan_index] = value; + bld->temps[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_ADDRESS: @@ -327,14 +319,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, boolean projected, LLVMValueRef *texel) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; LLVMValueRef lodbias; - LLVMValueRef oow; + LLVMValueRef oow = NULL; LLVMValueRef coords[3]; unsigned num_coords; unsigned i; - switch (inst->InstructionExtTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; break; @@ -369,6 +361,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, if (projected) coords[i] = lp_build_mul(&bld->base, coords[i], oow); } + for (i = num_coords; i < 3; i++) { + coords[i] = bld->base.undef; + } bld->sampler->emit_fetch_texel(bld->sampler, bld->base.builder, @@ -383,7 +378,7 @@ emit_kil( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0]; + const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; LLVMValueRef mask; unsigned chan_index; @@ -394,12 +389,7 @@ emit_kil( unsigned swizzle; /* Unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - - /* Note that we test if the value is less than zero, so 1.0 and 0.0 need - * not to be tested. */ - if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE) - continue; + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); /* Check if the component has not been already tested. */ assert(swizzle < NUM_CHANNELS); @@ -436,15 +426,15 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) { uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; - if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Indirect) + const struct tgsi_full_src_register *reg = &inst->Src[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Indirect) + const struct tgsi_full_dst_register *reg = &inst->Dst[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } return FALSE; @@ -459,7 +449,12 @@ emit_instruction( { unsigned chan_index; LLVMValueRef src0, src1, src2; - LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + LLVMValueRef tmp0, tmp1, tmp2; + LLVMValueRef tmp3 = NULL; + LLVMValueRef tmp4 = NULL; + LLVMValueRef tmp5 = NULL; + LLVMValueRef tmp6 = NULL; + LLVMValueRef tmp7 = NULL; LLVMValueRef res; LLVMValueRef dst0[NUM_CHANNELS]; @@ -488,7 +483,6 @@ emit_instruction( #endif case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); } @@ -574,9 +568,9 @@ emit_instruction( if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { - LLVMValueRef *p_floor_log2; - LLVMValueRef *p_exp; - LLVMValueRef *p_log2; + LLVMValueRef *p_floor_log2 = NULL; + LLVMValueRef *p_exp = NULL; + LLVMValueRef *p_log2 = NULL; src0 = emit_fetch( bld, inst, 0, CHAN_X ); src0 = lp_build_abs( &bld->base, src0 ); @@ -1324,7 +1318,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); return 0; @@ -1386,15 +1380,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_NOISE1: - case TGSI_OPCODE_NOISE2: - case TGSI_OPCODE_NOISE3: - case TGSI_OPCODE_NOISE4: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = bld->base.zero; - } - break; - case TGSI_OPCODE_NOP: break; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index e8cf7256c0e..8270cd057f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -161,7 +161,7 @@ lp_build_int_vec_type(struct lp_type type) * Build int32[4] vector type */ LLVMTypeRef -lp_build_int32_vec4_type() +lp_build_int32_vec4_type(void) { struct lp_type t; LLVMTypeRef type; @@ -181,12 +181,31 @@ lp_build_int32_vec4_type() struct lp_type lp_int_type(struct lp_type type) { - struct lp_type int_type; + struct lp_type res_type; - memset(&int_type, 0, sizeof int_type); - int_type.width = type.width; - int_type.length = type.length; - return int_type; + memset(&res_type, 0, sizeof res_type); + res_type.width = type.width; + res_type.length = type.length; + + return res_type; +} + + +/** + * Return the type with twice the bit width (hence half the number of elements). + */ +struct lp_type +lp_wider_type(struct lp_type type) +{ + struct lp_type res_type; + + memcpy(&res_type, &type, sizeof res_type); + res_type.width *= 2; + res_type.length /= 2; + + assert(res_type.length); + + return res_type; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 118fb339089..b7d8aed396d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -43,13 +43,18 @@ /** + * Native SIMD register width. + * + * 128 for all architectures we care about. + */ +#define LP_NATIVE_VECTOR_WIDTH 128 + +/** * Several functions can only cope with vectors of length up to this value. * You may need to increase that value if you want to represent bigger vectors. */ #define LP_MAX_VECTOR_LENGTH 16 -#define LP_MAX_TYPE_WIDTH 64 - /** * The LLVM type system can't conveniently express all the things we care about @@ -134,6 +139,91 @@ struct lp_build_context }; +static INLINE struct lp_type +lp_type_float(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.floating = TRUE; + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_int(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_uint(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_unorm(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.norm = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_fixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_ufixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + LLVMTypeRef lp_build_elem_type(struct lp_type type); @@ -170,6 +260,10 @@ struct lp_type lp_int_type(struct lp_type type); +struct lp_type +lp_wider_type(struct lp_type type); + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 696a9d5f6a8..8d965175f8c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -31,6 +31,7 @@ */ #include "draw/draw_context.h" +#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -68,6 +69,10 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) pipe_texture_reference(&llvmpipe->texture[i], NULL); } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + pipe_texture_reference(&llvmpipe->vertex_textures[i], NULL); + } + for (i = 0; i < Elements(llvmpipe->constants); i++) { if (llvmpipe->constants[i].buffer) { pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL); @@ -117,7 +122,8 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; - llvmpipe->pipe.bind_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; @@ -142,7 +148,8 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_sampler_textures = llvmpipe_set_sampler_textures; + llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures; + llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures; llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; @@ -151,8 +158,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; - llvmpipe->pipe.set_edgeflags = llvmpipe_set_edgeflags; - llvmpipe->pipe.clear = llvmpipe_clear; llvmpipe->pipe.flush = llvmpipe_flush; @@ -170,8 +175,7 @@ llvmpipe_create( struct pipe_screen *screen ) if (!llvmpipe->draw) goto fail; - /* FIXME: vertex sampler state - */ + /* FIXME: devise alternative to draw_texture_samplers */ if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 194692045dc..1ede6a6a72f 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -53,6 +53,7 @@ struct llvmpipe_context { /** Constant state objects */ const struct pipe_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; const struct pipe_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; struct lp_fragment_shader *fs; @@ -66,12 +67,15 @@ struct llvmpipe_context { struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned num_samplers; unsigned num_textures; + unsigned num_vertex_samplers; + unsigned num_vertex_textures; unsigned num_vertex_elements; unsigned num_vertex_buffers; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 91fcbc01c6d..3989cce7445 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,11 +45,11 @@ -boolean +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); + llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); } @@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -99,7 +99,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_arrays(draw, mode, start, count); /* - * unmap vertex/index buffers - will cause draw module to flush + * unmap vertex/index buffers */ for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); @@ -108,26 +108,24 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - return TRUE; + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); } -boolean +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + llvmpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } - -void -llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) -{ - struct llvmpipe_context *lp = llvmpipe_context(pipe); - draw_set_edgeflags(lp->draw, edgeflags); -} diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index 14fbea6d993..97c46087da0 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -64,7 +64,7 @@ llvmpipe_fence_reference(struct pipe_screen *screen, struct lp_fence *old = (struct lp_fence *) *ptr; struct lp_fence *f = (struct lp_fence *) fence; - if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) { + if (pipe_reference(&old->reference, &f->reference)) { lp_fence_destroy(old); } } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index fb6ec9bb37a..4ef0783f3e2 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -39,6 +39,7 @@ #include "util/u_cpu_detect.h" #include "lp_screen.h" #include "lp_bld_intr.h" +#include "lp_bld_misc.h" #include "lp_jit.h" @@ -78,25 +79,22 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_context */ { - LLVMTypeRef elem_types[5]; + LLVMTypeRef elem_types[4]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ - elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ - elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ - elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ + elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, screen->target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, dummy, - screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, - screen->target, context_type, 2); + screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, - screen->target, context_type, 3); + screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, screen->target, context_type, LP_JIT_CONTEXT_TEXTURES_INDEX); @@ -135,13 +133,12 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif -#ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif screen->module = LLVMModuleCreateWithName("llvmpipe"); @@ -150,7 +147,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { _debug_printf("%s\n", error); LLVMDisposeMessage(error); - abort(); + assert(0); } screen->target = LLVMGetExecutionEngineTargetData(screen->engine); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index e8fb7d990f8..1a6e939aa24 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -41,7 +41,6 @@ #include "pipe/p_state.h" -struct tgsi_sampler; struct llvmpipe_screen; @@ -78,8 +77,6 @@ struct lp_jit_context { const float *constants; - void *dummy; /* remove me */ - float alpha_ref_value; /* FIXME: store (also?) in floats */ @@ -92,16 +89,13 @@ struct lp_jit_context #define lp_jit_context_constants(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 0, "constants") -#define lp_jit_context_samplers(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "samplers") - #define lp_jit_context_alpha_ref_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "alpha_ref_value") + lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value") #define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 3, "blend_color") + lp_build_struct_get(_builder, _ptr, 2, "blend_color") -#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 +#define LP_JIT_CONTEXT_TEXTURES_INDEX 3 #define lp_jit_context_textures(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") @@ -124,7 +118,6 @@ typedef void const int32_t *step3); - void lp_jit_screen_cleanup(struct llvmpipe_screen *screen); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6772ff332ba..6535e693089 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -230,7 +230,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast, assert(w <= TILE_SIZE); assert(h <= TILE_SIZE); - lp_tile_read_4ub(rast->cbuf_transfer->format, + lp_tile_read_4ub(rast->cbuf_transfer->texture->format, rast->tasks[thread_index].tile.color, rast->cbuf_map, rast->cbuf_transfer->stride, @@ -394,7 +394,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, thread_index, x, y, w, h); - lp_tile_write_4ub(rast->cbuf_transfer->format, + lp_tile_write_4ub(rast->cbuf_transfer->texture->format, rast->tasks[thread_index].tile.color, rast->cbuf_map, rast->cbuf_transfer->stride, @@ -437,7 +437,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); - assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM); + assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); lp_tile_write_z32(rast->tasks[thread_index].tile.depth, rast->zsbuf_map, rast->zsbuf_transfer->stride, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 017496ea5fc..a28f6935b68 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_format.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -77,7 +78,9 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_SAMPLERS; + return PIPE_MAX_VERTEX_SAMPLERS; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: @@ -150,17 +153,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct llvmpipe_winsys *winsys = screen->winsys; + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if(!format_desc) + return FALSE; assert(target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - if(format == PIPE_FORMAT_Z16_UNORM) - return FALSE; - if(format == PIPE_FORMAT_S8_UNORM) - return FALSE; - switch(format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: @@ -171,8 +174,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, break; } - if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) - return winsys->is_displaytarget_format_supported(winsys, format); + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, format)) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + /* FIXME: Temporary restriction. See lp_state_fs.c. */ + if(format_desc->block.bits != 32) + return FALSE; + } + + /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */ + if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + } return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 1eb944a0de7..5cdcf4ecc98 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -451,8 +451,8 @@ lp_setup_set_sampler_textures( struct setup_context *setup, struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); struct lp_jit_texture *jit_tex; jit_tex = &setup->fs.current.jit_context.textures[i]; - jit_tex->width = tex->width[0]; - jit_tex->height = tex->height[0]; + jit_tex->width = tex->width0; + jit_tex->height = tex->height0; jit_tex->stride = lp_tex->stride[0]; if(!lp_tex->dt) jit_tex->data = lp_tex->data; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index e15b987767c..fe34903cf32 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -241,9 +241,6 @@ static inline int subpixel_snap( float a ) } -#define MIN3(a,b,c) MIN2(MIN2(a,b),c) -#define MAX3(a,b,c) MAX2(MAX2(a,b),c) - /** * Do basic setup for triangle rasterization and determine which * framebuffer tiles are touched. Put the triangle in the scene's diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 6017dc553a6..25d13536741 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -57,7 +57,6 @@ #define LP_NEW_BLEND_COLOR 0x8000 -struct tgsi_sampler; struct vertex_info; struct pipe_context; struct llvmpipe_context; @@ -127,6 +126,10 @@ void * llvmpipe_create_sampler_state(struct pipe_context *, const struct pipe_sampler_state *); void llvmpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void +llvmpipe_bind_vertex_sampler_states(struct pipe_context *, + unsigned num_samplers, + void **samplers); void llvmpipe_delete_sampler_state(struct pipe_context *, void *); void * @@ -173,6 +176,11 @@ void llvmpipe_set_sampler_textures( struct pipe_context *, unsigned num, struct pipe_texture ** ); +void +llvmpipe_set_vertex_sampler_textures(struct pipe_context *, + unsigned num_textures, + struct pipe_texture **); + void llvmpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); @@ -189,14 +197,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp); void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ); -boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); -boolean llvmpipe_draw_elements(struct pipe_context *pipe, +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -205,10 +213,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void -llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); - - -void llvmpipe_map_texture_surfaces(struct llvmpipe_context *lp); void diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 48afe5f5242..a10c5918df3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_debug_dump.h" +#include "draw/draw_context.h" #include "lp_screen.h" #include "lp_context.h" #include "lp_state.h" @@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->blend == blend) + return; + + draw_flush(llvmpipe->draw); + llvmpipe->blend = blend; llvmpipe->dirty |= LP_NEW_BLEND; @@ -74,6 +80,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) return; + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; + + draw_flush(llvmpipe->draw); + memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); llvmpipe->dirty |= LP_NEW_BLEND_COLOR; @@ -98,7 +109,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + if (llvmpipe->depth_stencil == depth_stencil) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->depth_stencil = depth_stencil; llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index cc7b09fd4d1..78d046985b9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -50,7 +50,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) { const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo = &llvmpipe->vertex_info; - const uint num = draw_num_vs_outputs(llvmpipe->draw); + const uint num = draw_num_shader_outputs(llvmpipe->draw); uint i; /* Tell setup to tell the draw module to simply emit the whole @@ -123,9 +123,9 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) /* Search for each input in current vs output: */ inputs[i].src_index = - draw_find_vs_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); + draw_find_shader_output(llvmpipe->draw, + lpfs->info.input_semantic_name[i], + lpfs->info.input_semantic_index[i]); } lp_setup_set_fs_inputs(llvmpipe->setup, @@ -170,26 +170,6 @@ compute_cliprect(struct llvmpipe_context *lp) } -#if 0 -static void -update_culling(struct llvmpipe_context *lp) -{ - struct lp_setup_context *setup = lp->setup; - - if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && - lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && - lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { - /* we'll do culling */ - setup->winding = lp->rasterizer->cull_mode; - } - else { - /* 'draw' will do culling */ - setup->winding = PIPE_WINDING_NONE; - } -} -#endif - - /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 3ad58415e39..3a669ba859a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -149,6 +149,20 @@ generate_depth(LLVMBuilderRef builder, format_desc = util_format_description(key->zsbuf_format); assert(format_desc); + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(src_type.floating) { + src_type.sign = FALSE; + src_type.norm = TRUE; + } + else { + assert(!src_type.sign); + assert(src_type.norm); + } + /* Pick the depth type. */ dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); @@ -156,14 +170,11 @@ generate_depth(LLVMBuilderRef builder, assert(dst_type.width == src_type.width); assert(dst_type.length == src_type.length); -#if 1 - src = lp_build_clamped_float_to_unsigned_norm(builder, - src_type, - dst_type.width, - src); -#else lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); -#endif + + dst_ptr = LLVMBuildBitCast(builder, + dst_ptr, + LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); lp_build_depth_test(builder, &key->depth, @@ -505,6 +516,7 @@ generate_fragment(struct llvmpipe_context *lp, if (LP_DEBUG & DEBUG_JIT) { tgsi_dump(shader->base.tokens, 0); if(key->depth.enabled) { + debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); } @@ -517,13 +529,40 @@ generate_fragment(struct llvmpipe_context *lp, } else if(key->blend.blend_enable) { debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); - debug_printf("blend.rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); - debug_printf("blend.rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); - debug_printf("blend.alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); - debug_printf("blend.alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); - debug_printf("blend.alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); + debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); + debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); + debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); + debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); + debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if(key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + pf_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + debug_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); + } + } } variant = CALLOC_STRUCT(lp_fragment_shader_variant); @@ -702,10 +741,12 @@ generate_fragment(struct llvmpipe_context *lp, * Translate the LLVM IR into machine code. */ +#ifdef DEBUG if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { LLVMDumpValue(variant->function); - abort(); + assert(0); } +#endif LLVMRunFunctionPassManager(screen->pass, variant->function); @@ -751,7 +792,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->fs = (struct lp_fragment_shader *) fs; + if (llvmpipe->fs == fs) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->fs = fs; llvmpipe->dirty |= LP_NEW_FS; } @@ -766,6 +812,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) struct lp_fragment_shader_variant *variant; assert(fs != llvmpipe->fs); + (void) llvmpipe; variant = shader->variants; while(variant) { @@ -804,14 +851,14 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, if(llvmpipe->constants[shader].buffer == buffer) return; - if(shader == PIPE_SHADER_VERTEX) - draw_flush(llvmpipe->draw); + draw_flush(llvmpipe->draw); /* note: reference counting */ pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, + data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 282ed2e9ea3..7d4c310aae8 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -42,14 +42,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, } void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(llvmpipe->draw, setup); + draw_set_rasterizer_state(llvmpipe->draw, rasterizer); - llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + llvmpipe->rasterizer = rasterizer; /* Note: we can immediately set the triangle state here and * not worry about binning because we handle culling during diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index e19394a4c92..976f81113fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -77,6 +77,34 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, void +llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + unsigned i; + + assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == llvmpipe->num_vertex_samplers && + !memcmp(llvmpipe->vertex_samplers, samplers, num_samplers * sizeof(void *))) + return; + + draw_flush(llvmpipe->draw); + + for (i = 0; i < num_samplers; ++i) + llvmpipe->vertex_samplers[i] = samplers[i]; + for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + llvmpipe->vertex_samplers[i] = NULL; + + llvmpipe->num_vertex_samplers = num_samplers; + + llvmpipe->dirty |= LP_NEW_SAMPLER; +} + + +void llvmpipe_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { @@ -105,6 +133,36 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, void +llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == llvmpipe->num_vertex_textures && + !memcmp(llvmpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + + draw_flush(llvmpipe->draw); + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num_textures ? textures[i] : NULL; + + pipe_texture_reference(&llvmpipe->vertex_textures[i], tex); + } + + llvmpipe->num_vertex_textures = num_textures; + + llvmpipe->dirty |= LP_NEW_TEXTURE; +} + + +void llvmpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 957e947fe02..0e9f03b90b8 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -37,6 +37,8 @@ #include "draw/draw_context.h" +#include "util/u_format.h" + /** * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. @@ -57,8 +59,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, if (lp->framebuffer.zsbuf) { int depth_bits; double mrd; - depth_bits = pf_get_component_bits(lp->framebuffer.zsbuf->format, - PIPE_FORMAT_COMP_Z); + depth_bits = util_format_get_component_bits(lp->framebuffer.zsbuf->format, + UTIL_FORMAT_COLORSPACE_ZS, + 0); if (depth_bits > 16) { mrd = 0.0000001; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 15c30296144..884e3878e62 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -70,14 +70,18 @@ fail: void -llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs; - llvmpipe->vs = (const struct lp_vertex_shader *)vs; + if (llvmpipe->vs == vs) + return; - draw_bind_vertex_shader(llvmpipe->draw, - (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL)); + draw_bind_vertex_shader(llvmpipe->draw, + vs ? vs->draw_data : NULL); + + llvmpipe->vs = vs; llvmpipe->dirty |= LP_NEW_VS; } @@ -92,5 +96,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) (struct lp_vertex_shader *)vs; draw_delete_vertex_shader(llvmpipe->draw, state->draw_data); + FREE( (void *)state->shader.tokens ); FREE( state ); } diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index a88e110c663..39d80726e65 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -56,6 +56,9 @@ #include "lp_bld_type.h" +#define LP_TEST_NUM_SAMPLES 32 + + void write_tsv_header(FILE *fp); @@ -68,17 +71,28 @@ boolean test_all(unsigned verbose, FILE *fp); +#if defined(PIPE_CC_MSVC) + +unsigned __int64 __rdtsc(); +#pragma intrinsic(__rdtsc) +#define rdtsc() __rdtsc() + +#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) + static INLINE uint64_t rdtsc(void) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) uint32_t hi, lo; __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + #else - return 0; + +#define rdtsc() 0 + #endif -} + float diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 94b661dcba4..29fff91981a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,6 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } +ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -477,8 +478,8 @@ test_one(unsigned verbose, char *error = NULL; blend_test_ptr_t blend_test_ptr; boolean success; - const unsigned n = 32; - int64_t cycles[n]; + const unsigned n = LP_TEST_NUM_SAMPLES; + int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned i, j; @@ -530,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -595,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 9dcf58e5dcd..faddfb96779 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,6 +142,7 @@ add_conv_test(LLVMModuleRef module, } +ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -156,8 +157,8 @@ test_one(unsigned verbose, char *error = NULL; conv_test_ptr_t conv_test_ptr; boolean success; - const unsigned n = 32; - int64_t cycles[n]; + const unsigned n = LP_TEST_NUM_SAMPLES; + int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned num_srcs; unsigned num_dsts; @@ -229,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; @@ -329,7 +330,7 @@ test_one(unsigned verbose, fprintf(stderr, "conv.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n"); firsttime = FALSE; - //abort(); + /* abort(); */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index ab80c0143f9..23ea9ebbe7d 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -35,9 +35,11 @@ #include <llvm-c/Target.h> #include <llvm-c/Transforms/Scalar.h> +#include "util/u_cpu_detect.h" #include "util/u_format.h" #include "lp_bld_format.h" +#include "lp_test.h" struct pixel_test_case @@ -88,34 +90,62 @@ struct pixel_test_case test_cases[] = }; -typedef void (*load_ptr_t)(const void *, float *); +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +static void +write_tsv_row(FILE *fp, + const struct util_format_description *desc, + boolean success) +{ + fprintf(fp, "%s\t", success ? "pass" : "fail"); + + fprintf(fp, "%s\n", desc->name); + + fflush(fp); +} + + +typedef void (*load_ptr_t)(const uint32_t packed, float *); static LLVMValueRef add_load_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + args[0] = LLVMInt32Type(); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - rgba = lp_build_load_rgba_aos(builder, format, ptr); + if(desc->block.bits < 32) + packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), ""); + + rgba = lp_build_unpack_rgba_aos(builder, desc, packed); + LLVMBuildStore(builder, rgba, rgba_ptr); LLVMBuildRetVoid(builder); @@ -125,27 +155,28 @@ add_load_rgba_test(LLVMModuleRef module, } -typedef void (*store_ptr_t)(void *, const float *); +typedef void (*store_ptr_t)(uint32_t *, const float *); static LLVMValueRef add_store_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed_ptr; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; + LLVMValueRef packed; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + args[0] = LLVMPointerType(LLVMInt32Type(), 0); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed_ptr = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); @@ -154,7 +185,12 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba_aos(builder, format, ptr, rgba); + packed = lp_build_pack_rgba_aos(builder, desc, rgba); + + if(desc->block.bits < 32) + packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); + + LLVMBuildStore(builder, packed, packed_ptr); LLVMBuildRetVoid(builder); @@ -163,8 +199,9 @@ add_store_rgba_test(LLVMModuleRef module, } +ALIGN_STACK static boolean -test_format(const struct pixel_test_case *test) +test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { LLVMModuleRef module = NULL; LLVMValueRef load = NULL; @@ -186,8 +223,8 @@ test_format(const struct pixel_test_case *test) module = LLVMModuleCreateWithName("test"); - load = add_load_rgba_test(module, test->format); - store = add_store_rgba_test(module, test->format); + load = add_load_rgba_test(module, desc); + store = add_store_rgba_test(module, desc); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -223,7 +260,7 @@ test_format(const struct pixel_test_case *test) memset(unpacked, 0, sizeof unpacked); packed = 0; - load_ptr(&test->packed, unpacked); + load_ptr(test->packed, unpacked); store_ptr(&packed, unpacked); success = TRUE; @@ -249,23 +286,29 @@ test_format(const struct pixel_test_case *test) if(pass) LLVMDisposePassManager(pass); + if(fp) + write_tsv_row(fp, desc, success); + return success; } -int main(int argc, char **argv) +boolean +test_all(unsigned verbose, FILE *fp) { unsigned i; - int ret; - -#ifdef LLVM_NATIVE_ARCH - LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); -#endif + bool success = TRUE; for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) - if(!test_format(&test_cases[i])) - ret = 1; + if(!test_format(verbose, fp, &test_cases[i])) + success = FALSE; - return ret; + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, unsigned long n) +{ + return test_all(verbose, fp); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index f07fa256f16..314544aa9a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -34,10 +34,25 @@ */ +#include "util/u_cpu_detect.h" + #include "lp_bld_const.h" +#include "lp_bld_misc.h" #include "lp_test.h" +#ifdef PIPE_CC_MSVC +static INLINE double +round(double x) +{ + if (x >= 0.0) + return floor(x + 0.5); + else + return ceil(x - 0.5); +} +#endif + + void dump_type(FILE *fp, struct lp_type type) @@ -365,10 +380,10 @@ int main(int argc, char **argv) n = atoi(argv[i]); } -#ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif + + util_cpu_detect(); if(fp) { /* Warm up the caches */ diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index dfc9c0e6f04..cb59a94464a 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -31,32 +31,10 @@ #include <llvm-c/Core.h> -#include "tgsi/tgsi_exec.h" - struct lp_sampler_static_state; - -extern void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - - -/** - * Texture sampling code generator that just calls lp_get_samples C function - * for the actual sampling computation. - * - * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. - */ -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr); - - /** * Pure-LLVM texture sampling code generator. * diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 1682e37354a..2462378152a 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -34,6 +34,8 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/internal/p_winsys_screen.h" + +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -47,7 +49,6 @@ /* Simple, maximally packed layout. */ - /* Conventional allocation path for non-display textures: */ static boolean @@ -56,31 +57,31 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, { struct pipe_texture *pt = &lpt->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned buffer_size = 0; - pf_get_block(lpt->base.format, &lpt->base.block); - for (level = 0; level <= pt->last_level; level++) { - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); - lpt->stride[level] = align(pt->nblocksx[level]*pt->block.size, 16); + unsigned nblocksx, nblocksy; + + /* Allocate storage for whole quads. This is particularly important + * for depth surfaces, which are currently stored in a swizzled format. */ + nblocksx = util_format_get_nblocksx(pt->format, align(width, 2)); + nblocksy = util_format_get_nblocksy(pt->format, align(height, 2)); + + lpt->stride[level] = align(nblocksx * util_format_get_blocksize(pt->format), 16); lpt->level_offset[level] = buffer_size; - buffer_size += (pt->nblocksy[level] * + buffer_size += (nblocksy * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } lpt->data = align_malloc(buffer_size, 16); @@ -94,14 +95,10 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, { struct llvmpipe_winsys *winsys = screen->winsys; - pf_get_block(lpt->base.format, &lpt->base.block); - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); - lpt->dt = winsys->displaytarget_create(winsys, lpt->base.format, - lpt->base.width[0], - lpt->base.height[0], + lpt->base.width0, + lpt->base.height0, 16, &lpt->stride[0] ); @@ -163,7 +160,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -174,8 +171,6 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, lpt->base = *base; pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = screen; - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); lpt->stride[0] = stride[0]; pipe_buffer_reference(&lpt->buffer, buffer); @@ -220,8 +215,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = lpt->level_offset[level]; ps->usage = usage; @@ -249,11 +244,17 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, ps->level = level; ps->zslice = zslice; + /* XXX shouldn't that rather be + tex_height = align(ps->height, 2); + to account for alignment done in llvmpipe_texture_layout ? + */ if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * pt->nblocksy[level] * lpt->stride[level]; + unsigned tex_height = ps->height; + ps->offset += face * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; } else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * pt->nblocksy[level] * lpt->stride[level]; + unsigned tex_height = ps->height; + ps->offset += zslice * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; } else { assert(face == 0); @@ -294,14 +295,10 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, if (lpt) { struct pipe_transfer *pt = &lpt->base; pipe_texture_reference(&pt->texture, texture); - pt->format = texture->format; - pt->block = texture->block; pt->x = x; pt->y = y; pt->width = w; pt->height = h; - pt->nblocksx = texture->nblocksx[level]; - pt->nblocksy = texture->nblocksy[level]; pt->stride = lptex->stride[level]; pt->usage = usage; pt->face = face; @@ -310,11 +307,17 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, lpt->offset = lptex->level_offset[level]; + /* XXX shouldn't that rather be + tex_height = align(u_minify(texture->height0, level), 2) + to account for alignment done in llvmpipe_texture_layout ? + */ if (texture->target == PIPE_TEXTURE_CUBE) { - lpt->offset += face * pt->nblocksy * pt->stride; + unsigned tex_height = u_minify(texture->height0, level); + lpt->offset += face * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; } else if (texture->target == PIPE_TEXTURE_3D) { - lpt->offset += zslice * pt->nblocksy * pt->stride; + unsigned tex_height = u_minify(texture->height0, level); + lpt->offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; } else { assert(face == 0); @@ -346,9 +349,11 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, struct llvmpipe_screen *screen = llvmpipe_screen(_screen); ubyte *map, *xfer_map; struct llvmpipe_texture *lpt; + enum pipe_format format; assert(transfer->texture); lpt = llvmpipe_texture(transfer->texture); + format = lpt->base.format; if(lpt->dt) { struct llvmpipe_winsys *winsys = screen->winsys; @@ -372,8 +377,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, } xfer_map = map + llvmpipe_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / util_format_get_blockheight(format) * transfer->stride + + transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); /*printf("map = %p xfer map = %p\n", map, xfer_map);*/ return xfer_map; } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 660cc30c820..1b7be3cce0d 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -29,7 +29,7 @@ #define LP_TILE_SOA_H #include "pipe/p_compiler.h" -#include "tgsi/tgsi_exec.h" // for NUM_CHANNELS +#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ #ifdef __cplusplus diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h index 595481c2cbc..74b472b6531 100644 --- a/src/gallium/drivers/llvmpipe/lp_winsys.h +++ b/src/gallium/drivers/llvmpipe/lp_winsys.h @@ -35,7 +35,7 @@ #define LP_WINSYS_H -#include "pipe/p_compiler.h" // for boolean +#include "pipe/p_compiler.h" /* for boolean */ #include "pipe/p_format.h" |