diff options
author | Keith Whitwell <[email protected]> | 2009-12-21 19:18:41 +0000 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2009-12-21 19:18:41 +0000 |
commit | a5585cb533af3d4e5d5324d5f526447b98597402 (patch) | |
tree | 6706dbb8b4f994b919e247647c3e8853d067b45c /src/gallium/drivers/llvmpipe | |
parent | d288a30610767f87e3e7c069730d4bc255246568 (diff) | |
parent | 574715d8368f99c0a5720a9676385d58d6cfdf30 (diff) |
Merge commit 'origin/master' into i965g-restart
Conflicts:
SConstruct
configs/default
configs/linux-dri
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
40 files changed, 1456 insertions, 902 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index b96ee23a993..e038a5229e5 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -17,11 +17,13 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_query.c \ lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ lp_bld_pack.c \ + lp_bld_sample.c \ lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ @@ -33,7 +35,6 @@ C_SOURCES = \ lp_draw_arrays.c \ lp_flush.c \ lp_jit.c \ - lp_prim_setup.c \ lp_prim_vbuf.c \ lp_setup.c \ lp_query.c \ @@ -55,6 +56,9 @@ C_SOURCES = \ lp_tile_cache.c \ lp_tile_soa.c +CPP_SOURCES = \ + lp_bld_misc.cpp + include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 89d08834a3c..0c3f00fd58f 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -51,21 +51,22 @@ Requirements - Linux - - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes - opcodes not yet supported by upstream. + - A x86 or amd64 processor. 64bit mode is preferred. - git clone git://people.freedesktop.org/~jrfonseca/udis86 - cd udis86 - ./configure --with-pic - make - sudo make install + Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will + yield the most efficient code. The less features the CPU has the more + likely is that you ran into underperforming, buggy, or incomplete code. + + See /proc/cpuinfo to know what your CPU supports. + + - LLVM 2.5 or greater. LLVM 2.6 is preferred. - - LLVM 2.5. On Debian based distributions do: + On Debian based distributions do: aptitude install llvm-dev - There is a typo in one of the llvm-dev 2.5 headers, that causes compilation - errors in the debug build: + There is a typo in one of the llvm 2.5 headers, that may cause compilation + errors. To fix it apply the change: --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 @@ -79,12 +80,17 @@ Requirements #endif return reinterpret_cast<T**>(Vals); - - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD - instructions. This is necessary because we emit several SSE intrinsics for - convenience. See /proc/cpuinfo to know what your CPU supports. - - - scons + - scons (optional) + - udis86, http://udis86.sourceforge.net/ (optional): + + git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86 + cd udis86 + ./autogen.sh + ./configure --with-pic + make + sudo make install + Building ======== diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 403e4daa436..3bd2e700138 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -30,10 +30,13 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_query.c', 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_misc.cpp', 'lp_bld_pack.c', + 'lp_bld_sample.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', @@ -46,7 +49,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_draw_arrays.c', 'lp_flush.c', 'lp_jit.c', - 'lp_prim_setup.c', 'lp_prim_vbuf.c', 'lp_setup.c', 'lp_query.c', @@ -76,7 +78,7 @@ env.Prepend(LIBS = [llvmpipe] + auxiliaries) env.Program( target = 'lp_test_format', - source = ['lp_test_format.c'], + source = ['lp_test_format.c', 'lp_test_main.c'], ) env.Program( diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 83ca06acf86..9c59677a741 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -47,6 +47,7 @@ #include "util/u_memory.h" #include "util/u_debug.h" +#include "util/u_math.h" #include "util/u_string.h" #include "util/u_cpu_detect.h" @@ -361,6 +362,8 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef b) { const struct lp_type type = bld->type; + LLVMValueRef shift; + LLVMValueRef res; if(a == bld->zero) return bld->zero; @@ -394,10 +397,84 @@ lp_build_mul(struct lp_build_context *bld, assert(0); } - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstMul(a, b); + if(type.fixed) + shift = lp_build_int_const_scalar(type, type.width/2); + else + shift = NULL; + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) { + res = LLVMConstMul(a, b); + if(shift) { + if(type.sign) + res = LLVMConstAShr(res, shift); + else + res = LLVMConstLShr(res, shift); + } + } + else { + res = LLVMBuildMul(bld->builder, a, b, ""); + if(shift) { + if(type.sign) + res = LLVMBuildAShr(bld->builder, res, shift, ""); + else + res = LLVMBuildLShr(bld->builder, res, shift, ""); + } + } - return LLVMBuildMul(bld->builder, a, b, ""); + return res; +} + + +/** + * Small vector x scale multiplication optimization. + */ +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b) +{ + LLVMValueRef factor; + + if(b == 0) + return bld->zero; + + if(b == 1) + return a; + + if(b == -1) + return LLVMBuildNeg(bld->builder, a, ""); + + if(b == 2 && bld->type.floating) + return lp_build_add(bld, a, a); + + if(util_is_pot(b)) { + unsigned shift = ffs(b) - 1; + + if(bld->type.floating) { +#if 0 + /* + * Power of two multiplication by directly manipulating the mantissa. + * + * XXX: This might not be always faster, it will introduce a small error + * for multiplication by zero, and it will produce wrong results + * for Inf and NaN. + */ + unsigned mantissa = lp_mantissa(bld->type); + factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); + a = LLVMBuildAdd(bld->builder, a, factor, ""); + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); + return a; +#endif + } + else { + factor = lp_build_const_scalar(bld->type, shift); + return LLVMBuildShl(bld->builder, a, factor, ""); + } + } + + factor = lp_build_const_scalar(bld->type, (double)b); + return lp_build_mul(bld, a, factor); } @@ -432,13 +509,36 @@ lp_build_div(struct lp_build_context *bld, } +/** + * Linear interpolation. + * + * This also works for integer values with a few caveats. + * + * @sa http://www.stereopsis.com/doubleblend.html + */ LLVMValueRef lp_build_lerp(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, LLVMValueRef v1) { - return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); + LLVMValueRef delta; + LLVMValueRef res; + + delta = lp_build_sub(bld, v1, v0); + + res = lp_build_mul(bld, x, delta); + + res = lp_build_add(bld, v0, res); + + if(bld->type.fixed) + /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, + * but it will be wrong for other uses. Basically we need a more + * powerful lp_type, capable of further distinguishing the values + * interpretation from the value storage. */ + res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); + + return res; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 4e568c055e2..62be4b9aee1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -67,6 +67,11 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b); + +LLVMValueRef lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 98ec1cb1b9d..d438c0e63d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc, if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { type.floating = TRUE; - assert(swizzle = 0); + assert(swizzle == 0); assert(format_desc->channel[swizzle].size == format_desc->block.bits); } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index c087fc986ed..970bee379f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -42,62 +42,34 @@ struct util_format_description; struct lp_type; -/** - * Unpack a pixel into its RGBA components. - * - * @param packed integer. - * - * @return RGBA in a 4 floats vector. - */ -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +boolean +lp_format_is_rgba8(const struct util_format_description *desc); -/** - * Pack a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ -LLVMValueRef -lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled); -/** - * Load a pixel into its RGBA components. - * - * @param ptr value with the pointer to the packed pixel. Pointer type is - * irrelevant. - * - * @return RGBA in a 4 floats vector. - */ LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef packed); -/** - * Store a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed); + LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef rgba); void @@ -108,12 +80,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, LLVMValueRef *rgba); -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba); - #endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index b9b5d84bed5..5836e0173f9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -25,18 +25,39 @@ * **************************************************************************/ +/** + * @file + * AoS pixel format manipulation. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "util/u_cpu_detect.h" #include "util/u_format.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" #include "lp_bld_format.h" +/** + * Unpack a single pixel into its RGBA components. + * + * @param packed integer. + * + * @return RGBA in a 4 floats vector. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef packed) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; @@ -49,8 +70,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i; - desc = util_format_description(format); - /* FIXME: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); @@ -151,12 +170,130 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, } +/** + * Take a vector with packed pixels and unpack into a rgba8 vector. + * + * Formats with bit depth smaller than 32bits are accepted, but they must be + * padded to 32bits. + */ +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed) +{ + struct lp_build_context bld; + bool rgba8; + LLVMValueRef res; + unsigned i; + + lp_build_context_init(&bld, builder, type); + + /* FIXME: Support more formats */ + assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->block.width == 1); + assert(desc->block.height == 1); + assert(desc->block.bits <= 32); + + assert(!type.floating); + assert(!type.fixed); + assert(type.norm); + assert(type.width == 8); + assert(type.length % 4 == 0); + + rgba8 = TRUE; + for(i = 0; i < 4; ++i) { + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_VOID); + if(desc->channel[0].size != 8) + rgba8 = FALSE; + } + + if(rgba8) { + /* + * The pixel is already in a rgba8 format variant. All it is necessary + * is to swizzle the channels. + */ + + unsigned char swizzles[4]; + boolean zeros[4]; /* bitwise AND mask */ + boolean ones[4]; /* bitwise OR mask */ + boolean swizzles_needed = FALSE; + boolean zeros_needed = FALSE; + boolean ones_needed = FALSE; + + for(i = 0; i < 4; ++i) { + enum util_format_swizzle swizzle = desc->swizzle[i]; + + /* Initialize with the no-op case */ + swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i; + zeros[i] = TRUE; + ones[i] = FALSE; + + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + if(swizzle != swizzles[i]) { + swizzles[i] = swizzle; + swizzles_needed = TRUE; + } + break; + case UTIL_FORMAT_SWIZZLE_0: + zeros[i] = FALSE; + zeros_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_1: + ones[i] = TRUE; + ones_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_NONE: + assert(0); + break; + } + } + + res = packed; + + if(swizzles_needed) + res = lp_build_swizzle1_aos(&bld, res, swizzles); + + if(zeros_needed) { + /* Mask out zero channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, zeros); + res = LLVMBuildAnd(builder, res, mask, ""); + } + + if(ones_needed) { + /* Or one channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, ones); + res = LLVMBuildOr(builder, res, mask, ""); + } + } + else { + /* FIXME */ + assert(0); + res = lp_build_undef(type); + } + + return res; +} + + +/** + * Pack a single pixel. + * + * @param rgba 4 float vector with the unpacked components. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef rgba) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; @@ -167,8 +304,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i, j; - desc = util_format_description(format); - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); assert(desc->block.height == 1); @@ -247,57 +382,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, return packed; } - - -LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - assert(desc->block.bits <= 32); - - type = LLVMIntType(desc->block.bits); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - packed = LLVMBuildLoad(builder, ptr, ""); - - return lp_build_unpack_rgba_aos(builder, format, packed); -} - - -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - - type = LLVMIntType(desc->block.bits); - - packed = lp_build_pack_rgba_aos(builder, format, rgba); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - LLVMBuildStore(builder, packed, ptr); -} - diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c new file mode 100644 index 00000000000..f3832d07ff9 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Utility functions to make assertions about formats. + * + * This module centralizes most of logic used when determining what algorithm + * is most suitable (i.e., most efficient yet correct) for a given format. + * + * It might be possible to move some of these functions to u_format module, + * but since tiny differences in the format my render it more/less + * appropriate to a given algorithm it is impossible to make any long term + * guarantee about the semantics of these functions. + * + * @author Jose Fonseca <[email protected]> + */ + + +#include "util/u_format.h" + +#include "lp_bld_format.h" + + +/** + * Whether this format is a 4 rgba8 variant + */ +boolean +lp_format_is_rgba8(const struct util_format_description *desc) +{ + unsigned chan; + + if(desc->block.width != 1 || + desc->block.height != 1 || + desc->block.bits != 32) + return FALSE; + + for(chan = 0; chan < 4; ++chan) { + if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) + return FALSE; + if(desc->channel[chan].size != 8) + return FALSE; + } + + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index 66bebdcdec8..64151d169da 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -34,66 +34,17 @@ #include "lp_bld_format.h" -/** - * Gather elements from scatter positions in memory into a single vector. - * - * @param src_width src element width - * @param dst_width result element width (source will be expanded to fit) - * @param length length of the offsets, - * @param base_ptr base pointer, should be a i8 pointer type. - * @param offsets vector with offsets - */ -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets) -{ - LLVMTypeRef src_type = LLVMIntType(src_width); - LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); - LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); - LLVMValueRef res; - unsigned i; - - res = LLVMGetUndef(dst_vec_type); - for(i = 0; i < length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef elem_offset; - LLVMValueRef elem_ptr; - LLVMValueRef elem; - - elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); - elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); - elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); - elem = LLVMBuildLoad(builder, elem_ptr, ""); - - assert(src_width <= dst_width); - if(src_width > dst_width) - elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); - if(src_width < dst_width) - elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); - - res = LLVMBuildInsertElement(builder, res, elem, index, ""); - } - - return res; -} - - static LLVMValueRef -lp_build_format_swizzle(struct lp_type type, - const LLVMValueRef *inputs, - enum util_format_swizzle swizzle) +lp_build_format_swizzle_chan_soa(struct lp_type type, + const LLVMValueRef *unswizzled, + enum util_format_swizzle swizzle) { switch (swizzle) { case UTIL_FORMAT_SWIZZLE_X: case UTIL_FORMAT_SWIZZLE_Y: case UTIL_FORMAT_SWIZZLE_Z: case UTIL_FORMAT_SWIZZLE_W: - return inputs[swizzle]; + return unswizzled[swizzle]; case UTIL_FORMAT_SWIZZLE_0: return lp_build_zero(type); case UTIL_FORMAT_SWIZZLE_1: @@ -108,6 +59,28 @@ lp_build_format_swizzle(struct lp_type type, void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled) +{ + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + swizzled[2] = swizzled[1] = swizzled[0] = depth; + swizzled[3] = lp_build_one(type); + } + else { + unsigned chan; + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + } + } +} + + +void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, struct lp_type type, @@ -172,38 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, start = stop; } - if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - enum util_format_swizzle swizzle = format_desc->swizzle[0]; - LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); - rgba[2] = rgba[1] = rgba[0] = depth; - rgba[3] = lp_build_one(type); - } - else { - for (chan = 0; chan < 4; ++chan) { - enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); - } - } -} - - -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba) -{ - LLVMValueRef packed; - - assert(format_desc->block.width == 1); - assert(format_desc->block.height == 1); - assert(format_desc->block.bits <= type.width); - - packed = lp_build_gather(builder, - type.length, format_desc->block.bits, type.width, - base_ptr, offsets); - - lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); + lp_build_format_swizzle_soa(format_desc, type, inputs, rgba); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 338dbca6d1e..49dab8ab61e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -109,32 +109,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld, /** - * Small vector x scale multiplication optimization. - * - * TODO: Should be elsewhere. - */ -static LLVMValueRef -coeff_multiply(struct lp_build_interp_soa_context *bld, - LLVMValueRef coeff, - int step) -{ - LLVMValueRef factor; - - switch(step) { - case 0: - return bld->base.zero; - case 1: - return coeff; - case 2: - return lp_build_add(&bld->base, coeff, coeff); - default: - factor = lp_build_const_scalar(bld->base.type, (double)step); - return lp_build_mul(&bld->base, coeff, factor); - } -} - - -/** * Multiply the dadx and dady with the xstep and ystep respectively. */ static void @@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld) if (mode != TGSI_INTERPOLATE_CONSTANT) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep); + bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep); + bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep); } } } @@ -329,8 +303,8 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, unsigned first, last, mask; unsigned attrib; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( attrib = first; attrib <= last; ++attrib ) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp new file mode 100644 index 00000000000..d3f78c06d92 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_config.h" + +#include "lp_bld_misc.h" + + +#ifndef LLVM_NATIVE_ARCH + +namespace llvm { + extern void LinkInJIT(); +} + + +void +LLVMLinkInJIT(void) +{ + llvm::LinkInJIT(); +} + + +extern "C" int X86TargetMachineModule; + + +int +LLVMInitializeNativeTarget(void) +{ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + X86TargetMachineModule = 1; +#endif + return 0; +} + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h new file mode 100644 index 00000000000..0e787e0b9cb --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_BLD_MISC_H +#define LP_BLD_MISC_H + + +#include "llvm/Config/config.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifndef LLVM_NATIVE_ARCH + +void +LLVMLinkInJIT(void); + +int +LLVMInitializeNativeTarget(void); + +#endif /* !LLVM_NATIVE_ARCH */ + + +#ifdef __cplusplus +} +#endif + + +#endif /* !LP_BLD_MISC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c index fe82fda0392..bc360ad77ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c @@ -159,11 +159,10 @@ lp_build_unpack2(LLVMBuilderRef builder, assert(!src_type.floating); assert(!dst_type.floating); - assert(dst_type.sign == src_type.sign); assert(dst_type.width == src_type.width * 2); assert(dst_type.length * 2 == src_type.length); - if(src_type.sign) { + if(dst_type.sign && src_type.sign) { /* Replicate the sign bit in the most significant bits */ msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c new file mode 100644 index 00000000000..af70ddc6ab9 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling -- common code. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "lp_bld_debug.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_type.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width0); + state->pot_height = util_is_pot(texture->height0); + state->pot_depth = util_is_pot(texture->depth0); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +/** + * Compute the offset of a pixel. + * + * x, y, y_stride are vectors + */ +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); + + x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(bld, x, x_stride); + y_offset = lp_build_mul(bld, y, y_stride); + + offset = lp_build_add(bld, x_offset, y_offset); + } + + return offset; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 403d0e48367..8cb8210ca76 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -40,7 +40,9 @@ struct pipe_texture; struct pipe_sampler_state; +struct util_format_description; struct lp_type; +struct lp_build_context; /** @@ -119,6 +121,24 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr); + + void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 1a47ca32d2d..47b68b71e25 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -27,7 +27,7 @@ /** * @file - * Texture sampling. + * Texture sampling -- SoA. * * @author Jose Fonseca <[email protected]> */ @@ -35,54 +35,23 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "util/u_debug.h" +#include "util/u_debug_dump.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" +#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_conv.h" #include "lp_bld_arit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" +#include "lp_bld_pack.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" -void -lp_sampler_static_state(struct lp_sampler_static_state *state, - const struct pipe_texture *texture, - const struct pipe_sampler_state *sampler) -{ - memset(state, 0, sizeof *state); - - if(!texture) - return; - - if(!sampler) - return; - - state->format = texture->format; - state->target = texture->target; - state->pot_width = util_is_pot(texture->width[0]); - state->pot_height = util_is_pot(texture->height[0]); - state->pot_depth = util_is_pot(texture->depth[0]); - - state->wrap_s = sampler->wrap_s; - state->wrap_t = sampler->wrap_t; - state->wrap_r = sampler->wrap_r; - state->min_img_filter = sampler->min_img_filter; - state->min_mip_filter = sampler->min_mip_filter; - state->mag_img_filter = sampler->mag_img_filter; - if(sampler->compare_mode) { - state->compare_mode = sampler->compare_mode; - state->compare_func = sampler->compare_func; - } - state->normalized_coords = sampler->normalized_coords; - state->prefilter = sampler->prefilter; -} - - - /** * Keep all information for sampling code generation in a single place. */ @@ -111,66 +80,61 @@ struct lp_build_sample_context static void -lp_build_sample_texel(struct lp_build_sample_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) +lp_build_sample_texel_soa(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) { - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef x_stride; LLVMValueRef offset; + LLVMValueRef packed; + + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); + + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + packed = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); + + lp_build_unpack_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + packed, texel); +} - x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); - - if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - LLVMValueRef x_lo, x_hi; - LLVMValueRef y_lo, y_hi; - LLVMValueRef x_stride_lo, x_stride_hi; - LLVMValueRef y_stride_lo, y_stride_hi; - LLVMValueRef x_offset_lo, x_offset_hi; - LLVMValueRef y_offset_lo, y_offset_hi; - LLVMValueRef offset_lo, offset_hi; - - x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); - y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); - - x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); - y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); - - x_stride_lo = x_stride; - y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); - - x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); - y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); - - x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); - y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); - offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); - - x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); - y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); - offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); - offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); - } - else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; +static LLVMValueRef +lp_build_sample_packed(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef offset; - x_offset = lp_build_mul(int_coord_bld, x, x_stride); - y_offset = lp_build_mul(int_coord_bld, y, y_stride); + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); - offset = lp_build_add(int_coord_bld, x_offset, y_offset); - } + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); - lp_build_load_rgba_soa(bld->builder, - bld->format_desc, - bld->texel_type, - data_ptr, - offset, - texel); + return lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); } @@ -208,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: /* FIXME */ - _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode); + _debug_printf("warning: failed to translate texture wrap mode %s\n", + debug_dump_tex_wrap(wrap_mode, TRUE)); coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); break; @@ -240,7 +205,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); + lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); } @@ -286,10 +251,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]); /* TODO: Don't interpolate missing channels */ for(chan = 0; chan < 4; ++chan) { @@ -304,6 +269,217 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, static void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); + unsigned chan; + + /* Decode the input vector components */ + for (chan = 0; chan < 4; ++chan) { + unsigned start = chan*8; + unsigned stop = start + 8; + LLVMValueRef input; + + input = packed; + + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); + + if(stop < 32) + input = LLVMBuildAnd(builder, input, mask, ""); + + input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + + rgba[chan] = input; + } +} + + +static void +lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMBuilderRef builder = bld->builder; + struct lp_build_context i32, h16, u8n; + LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; + LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; + LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2]; + LLVMValueRef neighbors_lo[2][2]; + LLVMValueRef neighbors_hi[2][2]; + LLVMValueRef packed, packed_lo, packed_hi; + LLVMValueRef unswizzled[4]; + + lp_build_context_init(&i32, builder, lp_type_int(32)); + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + + i32_vec_type = lp_build_vec_type(i32.type); + h16_vec_type = lp_build_vec_type(h16.type); + u8n_vec_type = lp_build_vec_type(u8n.type); + + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + + s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); + t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); + + i32_c128 = lp_build_int_const_scalar(i32.type, -128); + s = LLVMBuildAdd(builder, s, i32_c128, ""); + t = LLVMBuildAdd(builder, t, i32_c128, ""); + + i32_c8 = lp_build_int_const_scalar(i32.type, 8); + s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); + t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); + + i32_c255 = lp_build_int_const_scalar(i32.type, 255); + s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); + t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); + + x0 = s_ipart; + y0 = t_ipart; + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + /* + * Transform 4 x i32 in + * + * s_fpart = {s0, s1, s2, s3} + * + * into 8 x i16 + * + * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} + * + * into two 8 x i16 + * + * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} + * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} + * + * and likewise for t_fpart. There is no risk of loosing precision here + * since the fractional parts only use the lower 8bits. + */ + + s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); + t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); + + { + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffle_lo; + LLVMValueRef shuffle_hi; + unsigned i, j; + + for(j = 0; j < h16.type.length; j += 4) { + unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; + LLVMValueRef index; + + index = LLVMConstInt(elem_type, j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_lo[j + i] = index; + + index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_hi[j + i] = index; + } + + shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); + shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); + + s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); + t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); + s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); + t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); + } + + /* + * Fetch the pixels as 4 x 32bit (rgba order might differ): + * + * rgba0 rgba1 rgba2 rgba3 + * + * bit cast them into 16 x u8 + * + * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 + * + * unpack them into two 8 x i16: + * + * r0 g0 b0 a0 r1 g1 b1 a1 + * r2 g2 b2 a2 r3 g3 b3 a3 + * + * The higher 8 bits of the resulting elements will be zero. + */ + + neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); + neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); + neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); + neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); + + neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); + neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); + neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); + neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); + + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); + + /* + * Linear interpolate with 8.8 fixed point. + */ + + packed_lo = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[0][0], + neighbors_lo[0][1], + neighbors_lo[1][0], + neighbors_lo[1][1]); + + packed_hi = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[0][0], + neighbors_hi[0][1], + neighbors_hi[1][0], + neighbors_hi[1][1]); + + packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); + + /* + * Convert to SoA and swizzle. + */ + + packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); + + lp_build_rgba8_to_f32_soa(bld->builder, + bld->texel_type, + packed, unswizzled); + + lp_build_format_swizzle_soa(bld->format_desc, + bld->texel_type, unswizzled, + texel); +} + + +static void lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef p, LLVMValueRef *texel) @@ -402,7 +578,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, break; case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_ANISO: - lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + if(lp_format_is_rgba8(bld.format_desc)) + lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); + else + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; default: assert(0); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 64027de6aa9..fe2db04d8fa 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -64,7 +64,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -157,7 +157,7 @@ emit_fetch( unsigned index, const unsigned chan_index ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index]; + const struct tgsi_full_src_register *reg = &inst->Src[index]; unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; @@ -167,9 +167,9 @@ emit_fetch( case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0); + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); res = lp_build_broadcast_scalar(&bld->base, scalar); @@ -177,17 +177,17 @@ emit_fetch( } case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->SrcRegister.Index][swizzle]; + res = bld->immediates[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_INPUT: - res = bld->inputs[reg->SrcRegister.Index][swizzle]; + res = bld->inputs[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_TEMPORARY: - res = bld->temps[reg->SrcRegister.Index][swizzle]; + res = bld->temps[reg->Register.Index][swizzle]; if(!res) return bld->base.undef; break; @@ -267,7 +267,7 @@ emit_store( unsigned chan_index, LLVMValueRef value) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index]; + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -287,13 +287,13 @@ emit_store( assert(0); } - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - bld->outputs[reg->DstRegister.Index][chan_index] = value; + bld->outputs[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_TEMPORARY: - bld->temps[reg->DstRegister.Index][chan_index] = value; + bld->temps[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_ADDRESS: @@ -319,14 +319,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, boolean projected, LLVMValueRef *texel) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; LLVMValueRef lodbias; LLVMValueRef oow; LLVMValueRef coords[3]; unsigned num_coords; unsigned i; - switch (inst->InstructionExtTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; break; @@ -375,7 +375,7 @@ emit_kil( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0]; + const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; LLVMValueRef mask; unsigned chan_index; @@ -423,15 +423,15 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) { uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; - if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Indirect) + const struct tgsi_full_src_register *reg = &inst->Src[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Indirect) + const struct tgsi_full_dst_register *reg = &inst->Dst[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } return FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 46c298fa206..2fb233d335f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -43,13 +43,18 @@ /** + * Native SIMD register width. + * + * 128 for all architectures we care about. + */ +#define LP_NATIVE_VECTOR_WIDTH 128 + +/** * Several functions can only cope with vectors of length up to this value. * You may need to increase that value if you want to represent bigger vectors. */ #define LP_MAX_VECTOR_LENGTH 16 -#define LP_MAX_TYPE_WIDTH 64 - /** * The LLVM type system can't conveniently express all the things we care about @@ -134,6 +139,91 @@ struct lp_build_context }; +static INLINE struct lp_type +lp_type_float(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.floating = TRUE; + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_int(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_uint(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_unorm(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.norm = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_fixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_ufixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + LLVMTypeRef lp_build_elem_type(struct lp_type type); diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 202cb8ef439..c081f6de036 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -31,13 +31,13 @@ */ #include "draw/draw_context.h" +#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" -#include "lp_prim_setup.h" #include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" @@ -180,7 +180,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; - llvmpipe->pipe.bind_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; @@ -205,7 +205,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_sampler_textures = llvmpipe_set_sampler_textures; + llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures; llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; @@ -264,21 +264,21 @@ llvmpipe_create( struct pipe_screen *screen ) (struct tgsi_sampler **) llvmpipe->tgsi.vert_samplers_list); - llvmpipe->setup = lp_draw_render_stage(llvmpipe); - if (!llvmpipe->setup) - goto fail; - if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - if (debug_get_bool_option( "LP_NO_VBUF", FALSE )) { - /* Deprecated path -- vbuf is the intended interface to the draw module: - */ - draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->setup); - } - else { - lp_init_vbuf(llvmpipe); - } + llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); + if (!llvmpipe->vbuf_backend) + goto fail; + + llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend); + if (!llvmpipe->vbuf) + goto fail; + + draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf); + draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend); + + /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 7df340554e0..3ad95d0bfc2 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -121,9 +121,10 @@ struct llvmpipe_context { /** The primitive drawing context */ struct draw_context *draw; - struct draw_stage *setup; + + /** Draw module backend */ + struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - struct llvmpipe_vbuf_render *vbuf_render; boolean dirty_render_cache; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 1126bf90b96..bce3baec164 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -39,6 +39,7 @@ #include "util/u_cpu_detect.h" #include "lp_screen.h" #include "lp_bld_intr.h" +#include "lp_bld_misc.h" #include "lp_jit.h" @@ -153,13 +154,12 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif -#ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif screen->module = LLVMModuleCreateWithName("llvmpipe"); @@ -168,7 +168,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { _debug_printf("%s\n", error); LLVMDisposeMessage(error); - abort(); + assert(0); } screen->target = LLVMGetExecutionEngineTargetData(screen->engine); diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.c b/src/gallium/drivers/llvmpipe/lp_prim_setup.c deleted file mode 100644 index b14f8fb99d9..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief A draw stage that drives our triangle setup routines from - * within the draw pipeline. One of two ways to drive setup, the - * other being in lp_prim_vbuf.c. - * - * \author Keith Whitwell <[email protected]> - * \author Brian Paul - */ - - -#include "lp_context.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "lp_prim_setup.h" -#include "draw/draw_pipe.h" -#include "draw/draw_vertex.h" -#include "util/u_memory.h" - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct setup_context *setup; -}; - - - -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} - - -typedef const float (*cptrf4)[4]; - -static void -do_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_tri( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data, - (cptrf4)prim->v[2]->data ); -} - -static void -do_line(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_line( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data ); -} - -static void -do_point(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_point( setup->setup, - (cptrf4)prim->v[0]->data ); -} - - - - -static void setup_begin( struct draw_stage *stage ) -{ - struct setup_stage *setup = setup_stage(stage); - - llvmpipe_setup_prepare( setup->setup ); - - stage->point = do_point; - stage->line = do_line; - stage->tri = do_tri; -} - - -static void setup_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->point( stage, header ); -} - -static void setup_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->line( stage, header ); -} - - -static void setup_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->tri( stage, header ); -} - - - -static void setup_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->point = setup_first_point; - stage->line = setup_first_line; - stage->tri = setup_first_tri; -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ -} - - -static void render_destroy( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - llvmpipe_setup_destroy_context(ssetup->setup); - FREE( stage ); -} - - -/** - * Create a new primitive setup/render stage. - */ -struct draw_stage *lp_draw_render_stage( struct llvmpipe_context *llvmpipe ) -{ - struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - - sstage->setup = llvmpipe_setup_create_context(llvmpipe); - sstage->stage.draw = llvmpipe->draw; - sstage->stage.point = setup_first_point; - sstage->stage.line = setup_first_line; - sstage->stage.tri = setup_first_tri; - sstage->stage.flush = setup_flush; - sstage->stage.reset_stipple_counter = reset_stipple_counter; - sstage->stage.destroy = render_destroy; - - return (struct draw_stage *)sstage; -} - -struct setup_context * -lp_draw_setup_context( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - return ssetup->setup; -} - -void -lp_draw_flush( struct draw_stage *stage ) -{ - stage->flush( stage, 0 ); -} diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.h b/src/gallium/drivers/llvmpipe/lp_prim_setup.h deleted file mode 100644 index da6cae63751..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.h +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_PRIM_SETUP_H -#define LP_PRIM_SETUP_H - - -/** - * vbuf is a special stage to gather the stream of triangles, lines, points - * together and reconstruct vertex buffers for hardware upload. - * - * First attempt, work in progress. - * - * TODO: - * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. - * - tell vbuf stage how to build hw vertices directly - * - pass vbuf stage a buffer pointer for direct emit to agp/vram. - * - * - * - * Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - - -struct draw_stage; -struct llvmpipe_context; - - -typedef void (*vbuf_draw_func)( struct pipe_context *pipe, - unsigned prim, - const ushort *elements, - unsigned nr_elements, - const void *vertex_buffer, - unsigned nr_vertices ); - - -extern struct draw_stage * -lp_draw_render_stage( struct llvmpipe_context *llvmpipe ); - -extern struct setup_context * -lp_draw_setup_context( struct draw_stage * ); - -extern void -lp_draw_flush( struct draw_stage * ); - - -extern struct draw_stage * -lp_draw_vbuf_stage( struct draw_context *draw_context, - struct pipe_context *pipe, - vbuf_draw_func draw ); - - -#endif /* LP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index c394dcb61d0..4abff4ecccc 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -37,10 +37,9 @@ #include "lp_context.h" +#include "lp_setup.h" #include "lp_state.h" #include "lp_prim_vbuf.h" -#include "lp_prim_setup.h" -#include "lp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "util/u_memory.h" @@ -59,6 +58,8 @@ struct llvmpipe_vbuf_render { struct vbuf_render base; struct llvmpipe_context *llvmpipe; + struct setup_context *setup; + uint prim; uint vertex_size; uint nr_vertices; @@ -75,6 +76,11 @@ llvmpipe_vbuf_render(struct vbuf_render *vbr) } + + + + + static const struct vertex_info * lp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -105,36 +111,6 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void lp_vbuf_release_vertices(struct vbuf_render *vbr) { -#if 0 - { - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - const struct vertex_info *info = - llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe); - const float *vtx = (const float *) cvbr->vertex_buffer; - uint i, j; - debug_printf("%s (vtx_size = %u, vtx_used = %u)\n", - __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices); - for (i = 0; i < cvbr->nr_vertices; i++) { - for (j = 0; j < info->num_attribs; j++) { - uint k; - switch (info->attrib[j].emit) { - case EMIT_4F: k = 4; break; - case EMIT_3F: k = 3; break; - case EMIT_2F: k = 2; break; - case EMIT_1F: k = 1; break; - default: assert(0); - } - debug_printf("Vert %u attr %u: ", i, j); - while (k-- > 0) { - debug_printf("%g ", vtx[0]); - vtx++; - } - debug_printf("\n"); - } - } - } -#endif - /* keep the old allocation for next time */ } @@ -160,11 +136,7 @@ static boolean lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct setup_context *setup_ctx = lp_draw_setup_context(cvbr->llvmpipe->setup); + struct setup_context *setup_ctx = cvbr->setup; llvmpipe_setup_prepare( setup_ctx ); @@ -193,14 +165,9 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = cvbr->vertex_buffer; + struct setup_context *setup_ctx = cvbr->setup; unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -367,11 +334,6 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) default: assert(0); } - - /* XXX: why are we calling this??? If we had to call something, it - * would be a function in lp_setup.c: - */ - lp_draw_flush( setup ); } @@ -384,17 +346,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; + struct setup_context *setup_ctx = cvbr->setup; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = (void *) get_vert(cvbr->vertex_buffer, start, stride); unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -568,40 +525,38 @@ static void lp_vbuf_destroy(struct vbuf_render *vbr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - cvbr->llvmpipe->vbuf_render = NULL; + llvmpipe_setup_destroy_context(cvbr->setup); FREE(cvbr); } /** - * Initialize the post-transform vertex buffer information for the given - * context. + * Create the post-transform vertex handler for the given context. */ -void -lp_init_vbuf(struct llvmpipe_context *lp) +struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *lp) { - assert(lp->draw); + struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); - lp->vbuf_render = CALLOC_STRUCT(llvmpipe_vbuf_render); + assert(lp->draw); - lp->vbuf_render->base.max_indices = LP_MAX_VBUF_INDEXES; - lp->vbuf_render->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->base.get_vertex_info = lp_vbuf_get_vertex_info; - lp->vbuf_render->base.allocate_vertices = lp_vbuf_allocate_vertices; - lp->vbuf_render->base.map_vertices = lp_vbuf_map_vertices; - lp->vbuf_render->base.unmap_vertices = lp_vbuf_unmap_vertices; - lp->vbuf_render->base.set_primitive = lp_vbuf_set_primitive; - lp->vbuf_render->base.draw = lp_vbuf_draw; - lp->vbuf_render->base.draw_arrays = lp_vbuf_draw_arrays; - lp->vbuf_render->base.release_vertices = lp_vbuf_release_vertices; - lp->vbuf_render->base.destroy = lp_vbuf_destroy; + cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; + cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->llvmpipe = lp; + cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info; + cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices; + cvbr->base.map_vertices = lp_vbuf_map_vertices; + cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices; + cvbr->base.set_primitive = lp_vbuf_set_primitive; + cvbr->base.draw = lp_vbuf_draw; + cvbr->base.draw_arrays = lp_vbuf_draw_arrays; + cvbr->base.release_vertices = lp_vbuf_release_vertices; + cvbr->base.destroy = lp_vbuf_destroy; - lp->vbuf = draw_vbuf_stage(lp->draw, &lp->vbuf_render->base); + cvbr->llvmpipe = lp; - draw_set_rasterize_stage(lp->draw, lp->vbuf); + cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); - draw_set_render(lp->draw, &lp->vbuf_render->base); + return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h index 6c4e6063e6d..0676e2f42ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h @@ -31,8 +31,8 @@ struct llvmpipe_context; -extern void -lp_init_vbuf(struct llvmpipe_context *llvmpipe); +extern struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe); #endif /* LP_VBUF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 05189274589..0fb133486aa 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_format.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -131,17 +132,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct llvmpipe_winsys *winsys = screen->winsys; + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if(!format_desc) + return FALSE; assert(target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - if(format == PIPE_FORMAT_Z16_UNORM) - return FALSE; - if(format == PIPE_FORMAT_S8_UNORM) - return FALSE; - switch(format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: @@ -152,8 +153,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, break; } - if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) - return winsys->is_displaytarget_format_supported(winsys, format); + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, format)) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + /* FIXME: Temporary restriction. See lp_state_fs.c. */ + if(format_desc->block.bits != 32) + return FALSE; + } + + /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */ + if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + } return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index c43b3da4508..ffcbc9a379f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -33,7 +33,6 @@ */ #include "lp_context.h" -#include "lp_prim_setup.h" #include "lp_quad.h" #include "lp_setup.h" #include "lp_state.h" @@ -90,6 +89,8 @@ struct setup_context { float oneoverarea; int facing; + float pixel_offset; + struct quad_header quad[MAX_QUADS]; struct quad_header *quad_ptrs[MAX_QUADS]; unsigned count; @@ -483,6 +484,16 @@ static boolean setup_sort_vertices( struct setup_context *setup, ((det > 0.0) ^ (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); + /* Prepare pixel offset for rasterisation: + * - pixel center (0.5, 0.5) for GL, or + * - assume (0.0, 0.0) for other APIs. + */ + if (setup->llvmpipe->rasterizer->gl_rasterization_rules) { + setup->pixel_offset = 0.5f; + } else { + setup->pixel_offset = 0.0f; + } + return TRUE; } @@ -508,7 +519,7 @@ static void tri_pos_coeff( struct setup_context *setup, /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). + * pixel centers, in other words (pixel_offset, pixel_offset). * * this is neat but unfortunately not a good way to do things for * triangles with very large values of dadx or dady as it will @@ -519,8 +530,8 @@ static void tri_pos_coeff( struct setup_context *setup, * instead - i'll switch to this later. */ setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); /* debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -609,8 +620,8 @@ static void tri_linear_coeff( struct setup_context *setup, * instead - i'll switch to this later. */ setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); /* debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -661,8 +672,8 @@ static void tri_persp_coeff( struct setup_context *setup, setup->coef.dadx[1 + attrib][i] = dadx; setup->coef.dady[1 + attrib][i] = dady; setup->coef.a0[1 + attrib][i] = (mina - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } } @@ -746,12 +757,12 @@ static void setup_tri_coefficients( struct setup_context *setup ) static void setup_tri_edges( struct setup_context *setup ) { - float vmin_x = setup->vmin[0][0] + 0.5f; - float vmid_x = setup->vmid[0][0] + 0.5f; + float vmin_x = setup->vmin[0][0] + setup->pixel_offset; + float vmid_x = setup->vmid[0][0] + setup->pixel_offset; - float vmin_y = setup->vmin[0][1] - 0.5f; - float vmid_y = setup->vmid[0][1] - 0.5f; - float vmax_y = setup->vmax[0][1] - 0.5f; + float vmin_y = setup->vmin[0][1] - setup->pixel_offset; + float vmid_y = setup->vmid[0][1] - setup->pixel_offset; + float vmax_y = setup->vmax[0][1] - setup->pixel_offset; setup->emaj.sy = ceilf(vmin_y); setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); @@ -950,8 +961,8 @@ linear_pos_coeff(struct setup_context *setup, setup->coef.dadx[0][i] = dadx; setup->coef.dady[0][i] = dady; setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } @@ -972,8 +983,8 @@ line_linear_coeff(struct setup_context *setup, setup->coef.dadx[1 + attrib][i] = dadx; setup->coef.dady[1 + attrib][i] = dady; setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } } @@ -998,8 +1009,8 @@ line_persp_coeff(struct setup_context *setup, setup->coef.dadx[1 + attrib][i] = dadx; setup->coef.dady[1 + attrib][i] = dady; setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 3f03bd00571..b2e75d3b14e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -76,7 +76,7 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, for (i = 0; i < 4; ++i) { uint8_t c = float_to_ubyte(blend_color->color[i]); for (j = 0; j < 16; ++j) - llvmpipe->jit_context.blend_color[i*4 + j] = c; + llvmpipe->jit_context.blend_color[i*16 + j] = c; } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 30fb41ea65d..c753b183c0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -65,26 +65,19 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const enum interp_mode colorInterp - = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(llvmpipe->draw); uint i; - if (llvmpipe->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); - uint i; - - /* No longer any need to try and emit draw vertex_header info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + /* Tell draw_vbuf to simply emit the whole post-xform vertex + * as-is. No longer any need to try and emit draw vertex_header + * info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } + draw_compute_vertex_size(vinfo_vbuf); /* * Loop over fragment shader inputs, searching for the matching output @@ -112,33 +105,21 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = draw_find_vs_output(llvmpipe->draw, - TGSI_SEMANTIC_POSITION, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + interp = INTERP_POS; break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_COLOR, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); - break; - - case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; - - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FACE: - /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); + if (llvmpipe->rasterizer->flatshade) { + interp = INTERP_CONSTANT; + } break; - - default: - assert(0); } + + /* this includes texcoords and varying vars */ + src = draw_find_vs_output(llvmpipe->draw, + lpfs->info.input_semantic_name[i], + lpfs->info.input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 7728ba60763..ee0f69b2af9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -148,6 +148,20 @@ generate_depth(LLVMBuilderRef builder, format_desc = util_format_description(key->zsbuf_format); assert(format_desc); + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(src_type.floating) { + src_type.sign = FALSE; + src_type.norm = TRUE; + } + else { + assert(!src_type.sign); + assert(src_type.norm); + } + /* Pick the depth type. */ dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); @@ -155,14 +169,11 @@ generate_depth(LLVMBuilderRef builder, assert(dst_type.width == src_type.width); assert(dst_type.length == src_type.length); -#if 1 - src = lp_build_clamped_float_to_unsigned_norm(builder, - src_type, - dst_type.width, - src); -#else lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); -#endif + + dst_ptr = LLVMBuildBitCast(builder, + dst_ptr, + LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); lp_build_depth_test(builder, &key->depth, @@ -400,6 +411,7 @@ generate_fragment(struct llvmpipe_context *lp, #ifdef DEBUG tgsi_dump(shader->base.tokens, 0); if(key->depth.enabled) { + debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); } @@ -419,6 +431,34 @@ generate_fragment(struct llvmpipe_context *lp, debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if(key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + pf_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + debug_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if(key->sampler[i].compare_mode) + debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); + } + } + #endif variant = CALLOC_STRUCT(lp_fragment_shader_variant); @@ -582,10 +622,12 @@ generate_fragment(struct llvmpipe_context *lp, * Translate the LLVM IR into machine code. */ +#ifdef DEBUG if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { LLVMDumpValue(variant->function); - abort(); + assert(0); } +#endif LLVMRunFunctionPassManager(screen->pass, variant->function); diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index c69d90c723a..8333805a3fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -102,8 +102,8 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; - jit_tex->width = tex->width[0]; - jit_tex->height = tex->height[0]; + jit_tex->width = tex->width0; + jit_tex->height = tex->height0; jit_tex->stride = lp_tex->stride[0]; if(!lp_tex->dt) jit_tex->data = lp_tex->data; diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 15c30296144..8a761648e7e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -92,5 +92,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) (struct lp_vertex_shader *)vs; draw_delete_vertex_shader(llvmpipe->draw, state->draw_data); + FREE( (void *)state->shader.tokens ); FREE( state ); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index e3af81cffb0..29fff91981a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,6 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } +ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -530,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -595,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index ac2a6d05e32..968c7a2d4aa 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,6 +142,7 @@ add_conv_test(LLVMModuleRef module, } +ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -229,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 5dc8297fe90..23ea9ebbe7d 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -39,6 +39,7 @@ #include "util/u_format.h" #include "lp_bld_format.h" +#include "lp_test.h" struct pixel_test_case @@ -89,34 +90,62 @@ struct pixel_test_case test_cases[] = }; -typedef void (*load_ptr_t)(const void *, float *); +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +static void +write_tsv_row(FILE *fp, + const struct util_format_description *desc, + boolean success) +{ + fprintf(fp, "%s\t", success ? "pass" : "fail"); + + fprintf(fp, "%s\n", desc->name); + + fflush(fp); +} + + +typedef void (*load_ptr_t)(const uint32_t packed, float *); static LLVMValueRef add_load_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + args[0] = LLVMInt32Type(); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - rgba = lp_build_load_rgba_aos(builder, format, ptr); + if(desc->block.bits < 32) + packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), ""); + + rgba = lp_build_unpack_rgba_aos(builder, desc, packed); + LLVMBuildStore(builder, rgba, rgba_ptr); LLVMBuildRetVoid(builder); @@ -126,27 +155,28 @@ add_load_rgba_test(LLVMModuleRef module, } -typedef void (*store_ptr_t)(void *, const float *); +typedef void (*store_ptr_t)(uint32_t *, const float *); static LLVMValueRef add_store_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed_ptr; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; + LLVMValueRef packed; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + args[0] = LLVMPointerType(LLVMInt32Type(), 0); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed_ptr = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); @@ -155,7 +185,12 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba_aos(builder, format, ptr, rgba); + packed = lp_build_pack_rgba_aos(builder, desc, rgba); + + if(desc->block.bits < 32) + packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); + + LLVMBuildStore(builder, packed, packed_ptr); LLVMBuildRetVoid(builder); @@ -164,8 +199,9 @@ add_store_rgba_test(LLVMModuleRef module, } +ALIGN_STACK static boolean -test_format(const struct pixel_test_case *test) +test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { LLVMModuleRef module = NULL; LLVMValueRef load = NULL; @@ -187,8 +223,8 @@ test_format(const struct pixel_test_case *test) module = LLVMModuleCreateWithName("test"); - load = add_load_rgba_test(module, test->format); - store = add_store_rgba_test(module, test->format); + load = add_load_rgba_test(module, desc); + store = add_store_rgba_test(module, desc); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -224,7 +260,7 @@ test_format(const struct pixel_test_case *test) memset(unpacked, 0, sizeof unpacked); packed = 0; - load_ptr(&test->packed, unpacked); + load_ptr(test->packed, unpacked); store_ptr(&packed, unpacked); success = TRUE; @@ -250,25 +286,29 @@ test_format(const struct pixel_test_case *test) if(pass) LLVMDisposePassManager(pass); + if(fp) + write_tsv_row(fp, desc, success); + return success; } -int main(int argc, char **argv) +boolean +test_all(unsigned verbose, FILE *fp) { unsigned i; - int ret; + bool success = TRUE; -#ifdef LLVM_NATIVE_ARCH - LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); -#endif + for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) + if(!test_format(verbose, fp, &test_cases[i])) + success = FALSE; - util_cpu_detect(); + return success; +} - for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) - if(!test_format(&test_cases[i])) - ret = 1; - return ret; +boolean +test_some(unsigned verbose, FILE *fp, unsigned long n) +{ + return test_all(verbose, fp); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index d4767ff52ba..314544aa9a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -37,9 +37,22 @@ #include "util/u_cpu_detect.h" #include "lp_bld_const.h" +#include "lp_bld_misc.h" #include "lp_test.h" +#ifdef PIPE_CC_MSVC +static INLINE double +round(double x) +{ + if (x >= 0.0) + return floor(x + 0.5); + else + return ceil(x - 0.5); +} +#endif + + void dump_type(FILE *fp, struct lp_type type) @@ -367,10 +380,8 @@ int main(int argc, char **argv) n = atoi(argv[i]); } -#ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif util_cpu_detect(); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 773e8482425..c7c4143bc62 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -36,6 +36,7 @@ #include "util/u_memory.h" #include "util/u_tile.h" #include "util/u_format.h" +#include "util/u_math.h" #include "lp_context.h" #include "lp_surface.h" #include "lp_texture.h" @@ -270,8 +271,8 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, addr.bits.level, addr.bits.z, PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); + u_minify(tc->texture->width0, addr.bits.level), + u_minify(tc->texture->height0, addr.bits.level)); tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index a1365a045f1..0d01c07fb5e 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -544,7 +544,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; dsdx = fabsf(dsdx); dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width[0]; + rho = MAX2(dsdx, dsdy) * texture->width0; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; @@ -552,7 +552,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float max; dtdx = fabsf(dtdx); dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height[0]; + max = MAX2(dtdx, dtdy) * texture->height0; rho = MAX2(rho, max); } if (p) { @@ -561,7 +561,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float max; dpdx = fabsf(dpdx); dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth[0]; + max = MAX2(dpdx, dpdy) * texture->depth0; rho = MAX2(rho, max); } @@ -726,9 +726,9 @@ get_texel(const struct tgsi_sampler *tgsi_sampler, const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level) || + z < 0 || z >= (int) u_minify(texture->depth0, level)) { rgba[0][j] = sampler->border_color[0]; rgba[1][j] = sampler->border_color[1]; rgba[2][j] = sampler->border_color[2]; @@ -1093,8 +1093,8 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, assert(sampler->normalized_coords); - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); @@ -1250,9 +1250,9 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, assert(sampler->normalized_coords); - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); assert(width > 0); assert(height > 0); @@ -1394,8 +1394,8 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, /* texture RECTS cannot be mipmapped */ assert(level0 == level1); - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); @@ -1513,8 +1513,8 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, /* Do this elsewhere: */ - samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); - samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); + samp->xpot = util_unsigned_logbase2( samp->texture->width0 ); + samp->ypot = util_unsigned_logbase2( samp->texture->height0 ); /* Try to hook in a faster sampler. Ultimately we'll have to * code-generate these. Luckily most of this looks like it is diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index a00f2495dfc..65d62fd0723 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -57,9 +57,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, { struct pipe_texture *pt = &lpt->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned buffer_size = 0; @@ -68,9 +68,6 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, for (level = 0; level <= pt->last_level; level++) { unsigned nblocksx, nblocksy; - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); @@ -87,9 +84,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } lpt->data = align_malloc(buffer_size, 16); @@ -104,13 +101,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, struct llvmpipe_winsys *winsys = screen->winsys; pf_get_block(lpt->base.format, &lpt->base.block); - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); + lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0); + lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0); lpt->dt = winsys->displaytarget_create(winsys, lpt->base.format, - lpt->base.width[0], - lpt->base.height[0], + lpt->base.width0, + lpt->base.height0, 16, &lpt->stride[0] ); @@ -172,7 +169,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -183,8 +180,8 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, lpt->base = *base; pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = screen; - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); + lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0); + lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0); lpt->stride[0] = stride[0]; pipe_buffer_reference(&lpt->buffer, buffer); @@ -229,8 +226,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = lpt->level_offset[level]; ps->usage = usage; |