diff options
19 files changed, 730 insertions, 18 deletions
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 9df4e265b5b..6e22ced4e41 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -378,7 +378,9 @@ GALLIVM_SOURCES := \ gallivm/lp_bld_flow.h \ gallivm/lp_bld_format_aos_array.c \ gallivm/lp_bld_format_aos.c \ + gallivm/lp_bld_format_cached.c \ gallivm/lp_bld_format_float.c \ + gallivm/lp_bld_format.c \ gallivm/lp_bld_format.h \ gallivm/lp_bld_format_soa.c \ gallivm/lp_bld_format_srgb.c \ diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index b1e1bcbee04..8435991fb6b 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -625,6 +625,7 @@ generate_vs(struct draw_llvm_variant *variant, inputs, outputs, context_ptr, + NULL, draw_sampler, &llvm->draw->vs.vertex_shader->info, NULL); @@ -749,7 +750,8 @@ generate_fetch(struct gallivm_state *gallivm, lp_float32_vec4_type(), FALSE, map_ptr, - zero, zero, zero); + zero, zero, zero, + NULL); LLVMBuildStore(builder, val, temp_ptr); } lp_build_endif(&if_ctx); @@ -2193,6 +2195,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm, NULL, outputs, context_ptr, + NULL, sampler, &llvm->draw->gs.geometry_shader->info, (const struct lp_build_tgsi_gs_iface *)&gs_iface); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.c b/src/gallium/auxiliary/gallivm/lp_bld_format.c new file mode 100644 index 00000000000..a82fd8feee8 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.c @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "lp_bld_format.h" + + + +LLVMTypeRef +lp_build_format_cache_type(struct gallivm_state *gallivm) +{ + LLVMTypeRef elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_COUNT]; + LLVMTypeRef s; + + elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_DATA] = + LLVMArrayType(LLVMInt32TypeInContext(gallivm->context), + LP_BUILD_FORMAT_CACHE_SIZE * 16); + elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_TAGS] = + LLVMArrayType(LLVMInt64TypeInContext(gallivm->context), + LP_BUILD_FORMAT_CACHE_SIZE); +#if LP_BUILD_FORMAT_CACHE_DEBUG + elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL] = + LLVMInt64TypeInContext(gallivm->context); + elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS] = + LLVMInt64TypeInContext(gallivm->context); +#endif + + s = LLVMStructTypeInContext(gallivm->context, elem_types, + LP_BUILD_FORMAT_CACHE_MEMBER_COUNT, 0); + + return s; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 969f1f6cc94..5c866f420bd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -44,6 +44,45 @@ struct lp_type; struct lp_build_context; +#define LP_BUILD_FORMAT_CACHE_DEBUG 0 +/* + * Block cache + * + * Optional block cache to be used when unpacking big pixel blocks. + * Must be a power of 2 + */ + +#define LP_BUILD_FORMAT_CACHE_SIZE 128 + +/* + * Note: cache_data needs 16 byte alignment. + */ +struct lp_build_format_cache +{ + PIPE_ALIGN_VAR(16) uint32_t cache_data[LP_BUILD_FORMAT_CACHE_SIZE][4][4]; + uint64_t cache_tags[LP_BUILD_FORMAT_CACHE_SIZE]; +#if LP_BUILD_FORMAT_CACHE_DEBUG + uint64_t cache_access_total; + uint64_t cache_access_miss; +#endif +}; + + +enum { + LP_BUILD_FORMAT_CACHE_MEMBER_DATA = 0, + LP_BUILD_FORMAT_CACHE_MEMBER_TAGS, +#if LP_BUILD_FORMAT_CACHE_DEBUG + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS, +#endif + LP_BUILD_FORMAT_CACHE_MEMBER_COUNT +}; + + +LLVMTypeRef +lp_build_format_cache_type(struct gallivm_state *gallivm); + + /* * AoS */ @@ -66,7 +105,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, - LLVMValueRef j); + LLVMValueRef j, + LLVMValueRef cache); LLVMValueRef lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm, @@ -107,13 +147,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, LLVMValueRef offsets, LLVMValueRef i, LLVMValueRef j, + LLVMValueRef cache, LLVMValueRef rgba_out[4]); /* * YUV */ - LLVMValueRef lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, @@ -123,6 +163,18 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef i, LLVMValueRef j); + +LLVMValueRef +lp_build_fetch_cached_texels(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j, + LLVMValueRef cache); + + /* * special float formats */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index ddf3ad1dfc6..a41b30bbb96 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -370,7 +370,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, - LLVMValueRef j) + LLVMValueRef j, + LLVMValueRef cache) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; @@ -503,6 +504,34 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, } /* + * s3tc rgb formats + */ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && cache) { + struct lp_type tmp_type; + LLVMValueRef tmp; + + memset(&tmp_type, 0, sizeof tmp_type); + tmp_type.width = 8; + tmp_type.length = num_pixels * 4; + tmp_type.norm = TRUE; + + tmp = lp_build_fetch_cached_texels(gallivm, + format_desc, + num_pixels, + base_ptr, + offset, + i, j, + cache); + + lp_build_conv(gallivm, + tmp_type, type, + &tmp, 1, &tmp, 1); + + return tmp; + } + + /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c b/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c new file mode 100644 index 00000000000..b683e7f960c --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c @@ -0,0 +1,374 @@ +/************************************************************************** + * + * Copyright 2015 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "lp_bld_format.h" +#include "lp_bld_type.h" +#include "lp_bld_struct.h" +#include "lp_bld_const.h" +#include "lp_bld_flow.h" +#include "lp_bld_swizzle.h" + +#include "util/u_math.h" + + +/** + * @file + * Complex block-compression based formats are handled here by using a cache, + * so re-decoding of every pixel is not required. + * Especially for bilinear filtering, texel reuse is very high hence even + * a small cache helps. + * The elements in the cache are the decoded blocks - currently things + * are restricted to formats which are 4x4 block based, and the decoded + * texels must fit into 4x8 bits. + * The cache is direct mapped so hitrates aren't all that great and cache + * thrashing could happen. + * + * @author Roland Scheidegger <[email protected]> + */ + + +#if LP_BUILD_FORMAT_CACHE_DEBUG +static void +update_cache_access(struct gallivm_state *gallivm, + LLVMValueRef ptr, + unsigned count, + unsigned index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef member_ptr, cache_access; + + assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL || + index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); + + member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, ""); + cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access"); + cache_access = LLVMBuildAdd(builder, cache_access, + LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), + count, 0), ""); + LLVMBuildStore(builder, cache_access, member_ptr); +} +#endif + + +static void +store_cached_block(struct gallivm_state *gallivm, + LLVMValueRef *col, + LLVMValueRef tag_value, + LLVMValueRef hash_index, + LLVMValueRef cache) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ptr, indices[3]; + LLVMTypeRef type_ptr4x32; + unsigned count; + + type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); + indices[2] = hash_index; + ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), ""); + LLVMBuildStore(builder, tag_value, ptr); + + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); + hash_index = LLVMBuildMul(builder, hash_index, + lp_build_const_int32(gallivm, 16), ""); + for (count = 0; count < 4; count++) { + indices[2] = hash_index; + ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), ""); + ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, ""); + LLVMBuildStore(builder, col[count], ptr); + hash_index = LLVMBuildAdd(builder, hash_index, + lp_build_const_int32(gallivm, 4), ""); + } +} + + +static LLVMValueRef +lookup_cached_pixel(struct gallivm_state *gallivm, + LLVMValueRef ptr, + LLVMValueRef index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef member_ptr, indices[3]; + + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); + indices[2] = index; + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + return LLVMBuildLoad(builder, member_ptr, "cache_data"); +} + + +static LLVMValueRef +lookup_tag_data(struct gallivm_state *gallivm, + LLVMValueRef ptr, + LLVMValueRef index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef member_ptr, indices[3]; + + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); + indices[2] = index; + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + return LLVMBuildLoad(builder, member_ptr, "tag_data"); +} + + +static void +update_cached_block(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + LLVMValueRef ptr_addr, + LLVMValueRef hash_index, + LLVMValueRef cache) + +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); + LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4); + LLVMValueRef function; + LLVMValueRef tag_value, tmp_ptr; + LLVMValueRef col[4]; + unsigned i, j; + + /* + * Use format_desc->fetch_rgba_8unorm() for each pixel in the block. + * This doesn't actually make any sense whatsoever, someone would need + * to write a function doing this for all pixels in a block (either as + * an external c function or with generated code). Don't ask. + */ + + { + /* + * Function to call looks like: + * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) + */ + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[4]; + LLVMTypeRef function_type; + + assert(format_desc->fetch_rgba_8unorm); + + ret_type = LLVMVoidTypeInContext(gallivm->context); + arg_types[0] = pi8t; + arg_types[1] = pi8t; + arg_types[2] = i32t; + arg_types[3] = i32t; + function_type = LLVMFunctionType(ret_type, arg_types, + Elements(arg_types), 0); + + /* make const pointer for the C fetch_rgba_8unorm function */ + function = lp_build_const_int_pointer(gallivm, + func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); + + /* cast the callee pointer to the function's type */ + function = LLVMBuildBitCast(builder, function, + LLVMPointerType(function_type, 0), + "cast callee"); + } + + tmp_ptr = lp_build_array_alloca(gallivm, i32x4, + lp_build_const_int32(gallivm, 16), + "tmp_decode_store"); + tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); + + /* + * Invoke format_desc->fetch_rgba_8unorm() for each pixel. + * This is going to be really really slow. + * Note: the block store format is actually + * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ... + */ + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + LLVMValueRef args[4]; + LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4); + + /* + * Note we actually supply a pointer to the start of the block, + * not the start of the texture. + */ + args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, ""); + args[1] = ptr_addr; + args[2] = LLVMConstInt(i32t, i, 0); + args[3] = LLVMConstInt(i32t, j, 0); + LLVMBuildCall(builder, function, args, Elements(args), ""); + } + } + + /* Finally store the block - pointless mem copy + update tag. */ + tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), ""); + for (i = 0; i < 4; ++i) { + LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i); + LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, ""); + col[i] = LLVMBuildLoad(builder, ptr, ""); + } + + tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr, + LLVMInt64TypeInContext(gallivm->context), ""); + store_cached_block(gallivm, col, tag_value, hash_index, cache); +} + + +/* + * Do a cached lookup. + * + * Returns (vectors of) 4x8 rgba aos value + */ +LLVMValueRef +lp_build_fetch_cached_texels(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j, + LLVMValueRef cache) + +{ + LLVMBuilderRef builder = gallivm->builder; + unsigned count, low_bit, log2size; + LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp; + LLVMValueRef ij_index, hash_index, hash_mask, block_index; + LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); + struct lp_type type; + struct lp_build_context bld32; + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + assert(format_desc->block.width == 4); + assert(format_desc->block.height == 4); + + lp_build_context_init(&bld32, gallivm, type); + + /* + * compute hash - we use direct mapped cache, the hash function could + * be better but it needs to be simple + * per-element: + * compare offset with offset stored at tag (hash) + * if not equal decode/store block, update tag + * extract color from cache + * assemble result vector + */ + + /* TODO: not ideal with 32bit pointers... */ + + low_bit = util_logbase2(format_desc->block.bits / 8); + log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE); + addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, ""); + ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, ""); + ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc); + /* For the hash function, first mask off the unused lowest bits. Then just + do some xor with address bits - only use lower 32bits */ + ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, ""); + ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, + lp_build_const_int_vec(gallivm, type, low_bit), ""); + /* This only really makes sense for size 64,128,256 */ + hash_index = ptr_addrtrunc; + ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, + lp_build_const_int_vec(gallivm, type, 2*log2size), ""); + hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, ""); + tmp = LLVMBuildLShr(builder, hash_index, + lp_build_const_int_vec(gallivm, type, log2size), ""); + hash_index = LLVMBuildXor(builder, hash_index, tmp, ""); + + hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1); + hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, ""); + ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), ""); + ij_index = LLVMBuildAdd(builder, ij_index, j, ""); + block_index = LLVMBuildShl(builder, hash_index, + lp_build_const_int_vec(gallivm, type, 4), ""); + block_index = LLVMBuildAdd(builder, ij_index, block_index, ""); + + if (n > 1) { + color = LLVMGetUndef(LLVMVectorType(i32t, n)); + for (count = 0; count < n; count++) { + LLVMValueRef index, cond, colorx; + LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx; + struct lp_build_if_state if_ctx; + + index = lp_build_const_int32(gallivm, count); + offsetx = LLVMBuildExtractElement(builder, offset, index, ""); + addrx = LLVMBuildZExt(builder, offsetx, i64t, ""); + addrx = LLVMBuildAdd(builder, addrx, addr, ""); + block_indexx = LLVMBuildExtractElement(builder, block_index, index, ""); + hash_indexx = LLVMBuildLShr(builder, block_indexx, + lp_build_const_int32(gallivm, 4), ""); + offset_stored = lookup_tag_data(gallivm, cache, hash_indexx); + cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, ""); + + lp_build_if(&if_ctx, gallivm, cond); + { + ptr_addrx = LLVMBuildIntToPtr(builder, addrx, + LLVMPointerType(i8t, 0), ""); + update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache); +#if LP_BUILD_FORMAT_CACHE_DEBUG + update_cache_access(gallivm, cache, 1, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); +#endif + } + lp_build_endif(&if_ctx); + + colorx = lookup_cached_pixel(gallivm, cache, block_indexx); + + color = LLVMBuildInsertElement(builder, color, colorx, + lp_build_const_int32(gallivm, count), ""); + } + } + else { + LLVMValueRef cond; + struct lp_build_if_state if_ctx; + + tmp = LLVMBuildZExt(builder, offset, i64t, ""); + addr = LLVMBuildAdd(builder, tmp, addr, ""); + offset_stored = lookup_tag_data(gallivm, cache, hash_index); + cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, ""); + + lp_build_if(&if_ctx, gallivm, cond); + { + tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), ""); + update_cached_block(gallivm, format_desc, tmp, hash_index, cache); +#if LP_BUILD_FORMAT_CACHE_DEBUG + update_cache_access(gallivm, cache, 1, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); +#endif + } + lp_build_endif(&if_ctx); + + color = lookup_cached_pixel(gallivm, cache, block_index); + } +#if LP_BUILD_FORMAT_CACHE_DEBUG + update_cache_access(gallivm, cache, n, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL); +#endif + return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), ""); +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index afaabc08790..42aef8376f8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -346,6 +346,7 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm, * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0,0). For compressed formats, i will * be in [0, block_width-1] and j will be in [0, block_height-1]. + * \param cache optional value pointing to a lp_build_format_cache structure */ void lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, @@ -355,6 +356,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j, + LLVMValueRef cache, LLVMValueRef rgba_out[4]) { LLVMBuilderRef builder = gallivm->builder; @@ -473,7 +475,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, tmp_type.norm = TRUE; tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, - TRUE, base_ptr, offset, i, j); + TRUE, base_ptr, offset, i, j, cache); lp_build_rgba8_to_fi32_soa(gallivm, type, @@ -483,6 +485,37 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, return; } + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && + /* non-srgb case is already handled above */ + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && + type.floating && type.width == 32 && + (type.length == 1 || (type.length % 4 == 0)) && + cache) { + const struct util_format_description *format_decompressed; + LLVMValueRef packed; + packed = lp_build_fetch_cached_texels(gallivm, + format_desc, + type.length, + base_ptr, + offset, + i, j, + cache); + packed = LLVMBuildBitCast(builder, packed, + lp_build_int_vec_type(gallivm, type), ""); + /* + * The values are now packed so they match ordinary srgb RGBA8 format, + * hence need to use matching format for unpack. + */ + format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB); + + lp_build_unpack_rgba_soa(gallivm, + format_decompressed, + type, + packed, rgba_out); + + return; + } + /* * Fallback to calling lp_build_fetch_rgba_aos for each pixel. * @@ -524,7 +557,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, /* Get a single float[4]={R,G,B,A} pixel */ tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, TRUE, base_ptr, offset_elem, - i_elem, j_elem); + i_elem, j_elem, cache); /* * Insert the AoS tmp value channels into the SoA result vectors at diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index eba758da6ae..a6f0eff42f6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -99,6 +99,7 @@ struct lp_sampler_params unsigned sampler_index; unsigned sample_key; LLVMValueRef context_ptr; + LLVMValueRef thread_data_ptr; const LLVMValueRef *coords; const LLVMValueRef *offsets; LLVMValueRef lod; @@ -267,6 +268,17 @@ struct lp_sampler_dynamic_state struct gallivm_state *gallivm, LLVMValueRef context_ptr, unsigned sampler_unit); + + /** + * Obtain texture cache (returns ptr to lp_build_format_cache). + * + * It's optional: no caching will be done if it's NULL. + */ + LLVMValueRef + (*cache_ptr)(const struct lp_sampler_dynamic_state *state, + struct gallivm_state *gallivm, + LLVMValueRef thread_data_ptr, + unsigned unit); }; @@ -356,6 +368,7 @@ struct lp_build_sample_context LLVMValueRef img_stride_array; LLVMValueRef base_ptr; LLVMValueRef mip_offsets; + LLVMValueRef cache; /** Integer vector with texture width, height, depth */ LLVMValueRef int_size; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index d7fde810a76..729c5b8f6ef 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -593,7 +593,8 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, TRUE, data_ptr, offset, x_subcoord, - y_subcoord); + y_subcoord, + bld->cache); } *colors = rgba8; @@ -933,7 +934,8 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, TRUE, data_ptr, offset[k][j][i], x_subcoord[i], - y_subcoord[j]); + y_subcoord[j], + bld->cache); } neighbors[k][j][i] = rgba8; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 26bfa0d2677..e21933ffc85 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -161,6 +161,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, bld->texel_type, data_ptr, offset, i, j, + bld->cache, texel_out); /* @@ -2389,6 +2390,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld, bld->texel_type, bld->base_ptr, offset, i, j, + bld->cache, colors_out); if (out_of_bound_ret_zero) { @@ -2442,6 +2444,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, unsigned texture_index, unsigned sampler_index, LLVMValueRef context_ptr, + LLVMValueRef thread_data_ptr, const LLVMValueRef *coords, const LLVMValueRef *offsets, const struct lp_derivatives *derivs, /* optional */ @@ -2707,6 +2710,11 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, context_ptr, texture_index); /* Note that mip_offsets is an array[level] of offsets to texture images */ + if (dynamic_state->cache_ptr && thread_data_ptr) { + bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm, + thread_data_ptr, texture_index); + } + /* width, height, depth as single int vector */ if (dims <= 1) { bld.int_size = tex_width; @@ -2883,6 +2891,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, bld4.base_ptr = bld.base_ptr; bld4.mip_offsets = bld.mip_offsets; bld4.int_size = bld.int_size; + bld4.cache = bld.cache; bld4.vector_width = lp_type_width(type4); @@ -3081,12 +3090,14 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm, LLVMValueRef offsets[3] = { NULL }; LLVMValueRef lod = NULL; LLVMValueRef context_ptr; + LLVMValueRef thread_data_ptr = NULL; LLVMValueRef texel_out[4]; struct lp_derivatives derivs; struct lp_derivatives *deriv_ptr = NULL; unsigned num_param = 0; unsigned i, num_coords, num_derivs, num_offsets, layer; enum lp_sampler_lod_control lod_control; + boolean need_cache = FALSE; lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> LP_SAMPLER_LOD_CONTROL_SHIFT; @@ -3094,8 +3105,19 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm, get_target_info(static_texture_state->target, &num_coords, &num_derivs, &num_offsets, &layer); + if (dynamic_state->cache_ptr) { + const struct util_format_description *format_desc; + format_desc = util_format_description(static_texture_state->format); + if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + need_cache = TRUE; + } + } + /* "unpack" arguments */ context_ptr = LLVMGetParam(function, num_param++); + if (need_cache) { + thread_data_ptr = LLVMGetParam(function, num_param++); + } for (i = 0; i < num_coords; i++) { coords[i] = LLVMGetParam(function, num_param++); } @@ -3146,6 +3168,7 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm, texture_index, sampler_index, context_ptr, + thread_data_ptr, coords, offsets, deriv_ptr, @@ -3189,6 +3212,7 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, const LLVMValueRef *offsets = params->offsets; const struct lp_derivatives *derivs = params->derivs; enum lp_sampler_lod_control lod_control; + boolean need_cache = FALSE; lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> LP_SAMPLER_LOD_CONTROL_SHIFT; @@ -3196,6 +3220,17 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, get_target_info(static_texture_state->target, &num_coords, &num_derivs, &num_offsets, &layer); + if (dynamic_state->cache_ptr) { + const struct util_format_description *format_desc; + format_desc = util_format_description(static_texture_state->format); + if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + /* + * This is not 100% correct, if we have cache but the + * util_format_s3tc_prefer is true the cache won't get used + * regardless (could hook up the block decode there...) */ + need_cache = TRUE; + } + } /* * texture function matches are found by name. * Thus the name has to include both the texture and sampler unit @@ -3221,6 +3256,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, */ arg_types[num_param++] = LLVMTypeOf(params->context_ptr); + if (need_cache) { + arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr); + } for (i = 0; i < num_coords; i++) { arg_types[num_param++] = LLVMTypeOf(coords[0]); assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i])); @@ -3280,6 +3318,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, num_args = 0; args[num_args++] = params->context_ptr; + if (need_cache) { + args[num_args++] = params->thread_data_ptr; + } for (i = 0; i < num_coords; i++) { args[num_args++] = coords[i]; } @@ -3384,6 +3425,7 @@ lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state, params->texture_index, params->sampler_index, params->context_ptr, + params->thread_data_ptr, params->coords, params->offsets, params->derivs, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 2ca9c6194b3..cc4549778a3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -230,6 +230,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], LLVMValueRef context_ptr, + LLVMValueRef thread_data_ptr, struct lp_build_sampler_soa *sampler, const struct tgsi_shader_info *info, const struct lp_build_tgsi_gs_iface *gs_iface); @@ -447,6 +448,7 @@ struct lp_build_tgsi_soa_context const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS]; LLVMValueRef context_ptr; + LLVMValueRef thread_data_ptr; const struct lp_build_sampler_soa *sampler; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index fae604e2f9c..7d2cd9a9e73 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -2321,6 +2321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, params.texture_index = unit; params.sampler_index = unit; params.context_ptr = bld->context_ptr; + params.thread_data_ptr = bld->thread_data_ptr; params.coords = coords; params.offsets = offsets; params.lod = lod; @@ -2488,6 +2489,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, params.texture_index = texture_unit; params.sampler_index = sampler_unit; params.context_ptr = bld->context_ptr; + params.thread_data_ptr = bld->thread_data_ptr; params.coords = coords; params.offsets = offsets; params.lod = lod; @@ -2608,6 +2610,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, params.texture_index = unit; params.sampler_index = unit; params.context_ptr = bld->context_ptr; + params.thread_data_ptr = bld->thread_data_ptr; params.coords = coords; params.offsets = offsets; params.derivs = NULL; @@ -3858,6 +3861,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], LLVMValueRef context_ptr, + LLVMValueRef thread_data_ptr, struct lp_build_sampler_soa *sampler, const struct tgsi_shader_info *info, const struct lp_build_tgsi_gs_iface *gs_iface) @@ -3893,6 +3897,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.info = info; bld.indirect_files = info->indirect_files; bld.context_ptr = context_ptr; + bld.thread_data_ptr = thread_data_ptr; /* * If the number of temporaries is rather large then we just diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 9acde4f1b06..b915c1d64ff 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -36,6 +36,7 @@ #include "util/u_memory.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_format.h" #include "lp_context.h" #include "lp_jit.h" @@ -208,6 +209,8 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT]; LLVMTypeRef thread_data_type; + elem_types[LP_JIT_THREAD_DATA_CACHE] = + LLVMPointerType(lp_build_format_cache_type(gallivm), 0); elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc); elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] = LLVMInt32TypeInContext(lc); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 097fa7dce7c..9db26f2cba9 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -43,6 +43,7 @@ #include "lp_texture.h" +struct lp_build_format_cache; struct lp_fragment_shader_variant; struct llvmpipe_screen; @@ -189,6 +190,7 @@ enum { struct lp_jit_thread_data { + struct lp_build_format_cache *cache; uint64_t vis_counter; /* @@ -201,12 +203,16 @@ struct lp_jit_thread_data enum { - LP_JIT_THREAD_DATA_COUNTER = 0, + LP_JIT_THREAD_DATA_CACHE = 0, + LP_JIT_THREAD_DATA_COUNTER, LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, LP_JIT_THREAD_DATA_COUNT }; +#define lp_jit_thread_data_cache(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_THREAD_DATA_CACHE, "cache") + #define lp_jit_thread_data_counter(_gallivm, _ptr) \ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter") diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index c726707c062..d22e50777fa 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -43,6 +43,7 @@ #include "lp_query.h" #include "lp_rast.h" #include "lp_rast_priv.h" +#include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_debug.h" #include "lp_scene.h" #include "lp_tex_sample.h" @@ -664,6 +665,17 @@ rasterize_scene(struct lp_rasterizer_task *task, { task->scene = scene; + /* Clear the cache tags. This should not always be necessary but + simpler for now. */ +#if LP_USE_TEXTURE_CACHE + memset(task->thread_data.cache->cache_tags, 0, + sizeof(task->thread_data.cache->cache_tags)); +#if LP_BUILD_FORMAT_CACHE_DEBUG + task->thread_data.cache->cache_access_total = 0; + task->thread_data.cache->cache_access_miss = 0; +#endif +#endif + if (!task->rast->no_rast && !scene->discard) { /* loop over scene bins, rasterize each */ { @@ -679,6 +691,20 @@ rasterize_scene(struct lp_rasterizer_task *task, } +#if LP_BUILD_FORMAT_CACHE_DEBUG + { + uint64_t total, miss; + total = task->thread_data.cache->cache_access_total; + miss = task->thread_data.cache->cache_access_miss; + if (total) { + debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", + task->thread_index, (long long unsigned)total, + (long long unsigned)miss, + (float)(total - miss)/(float)total); + } + } +#endif + if (scene->fence) { lp_fence_signal(scene->fence); } @@ -866,10 +892,15 @@ lp_rast_create( unsigned num_threads ) goto no_full_scenes; } - for (i = 0; i < Elements(rast->tasks); i++) { + for (i = 0; i < MAX2(1, num_threads); i++) { struct lp_rasterizer_task *task = &rast->tasks[i]; task->rast = rast; task->thread_index = i; + task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache), + 16); + if (!task->thread_data.cache) { + goto no_thread_data_cache; + } } rast->num_threads = num_threads; @@ -885,6 +916,14 @@ lp_rast_create( unsigned num_threads ) return rast; +no_thread_data_cache: + for (i = 0; i < MAX2(1, rast->num_threads); i++) { + if (rast->tasks[i].thread_data.cache) { + align_free(rast->tasks[i].thread_data.cache); + } + } + + lp_scene_queue_destroy(rast->full_scenes); no_full_scenes: FREE(rast); no_rast: @@ -923,6 +962,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) pipe_semaphore_destroy(&rast->tasks[i].work_ready); pipe_semaphore_destroy(&rast->tasks[i].work_done); } + for (i = 0; i < MAX2(1, rast->num_threads); i++) { + align_free(rast->tasks[i].thread_data.cache); + } /* for synchronizing rasterization threads */ pipe_barrier_destroy( &rast->barrier ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index fd6c49aacd8..f55f6b4fa4f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -421,7 +421,7 @@ generate_fs_loop(struct gallivm_state *gallivm, lp_build_tgsi_soa(gallivm, tokens, type, &mask, consts_ptr, num_consts_ptr, &system_values, interp->inputs, - outputs, context_ptr, + outputs, context_ptr, thread_data_ptr, sampler, &shader->info.base, NULL); /* Alpha test */ @@ -2303,8 +2303,8 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(dady_ptr, "dady"); lp_build_name(color_ptr_ptr, "color_ptr_ptr"); lp_build_name(depth_ptr, "depth"); - lp_build_name(thread_data_ptr, "thread_data"); lp_build_name(mask_input, "mask_input"); + lp_build_name(thread_data_ptr, "thread_data"); lp_build_name(stride_ptr, "stride_ptr"); lp_build_name(depth_stride, "depth_stride"); diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index d9abd1ae37c..0640a217874 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -44,6 +44,9 @@ #include "lp_test.h" +#define USE_TEXTURE_CACHE 1 + +static struct lp_build_format_cache *cache_ptr; void write_tsv_header(FILE *fp) @@ -71,7 +74,7 @@ write_tsv_row(FILE *fp, typedef void (*fetch_ptr_t)(void *unpacked, const void *packed, - unsigned i, unsigned j); + unsigned i, unsigned j, struct lp_build_format_cache *cache); static LLVMValueRef @@ -83,7 +86,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, LLVMContextRef context = gallivm->context; LLVMModuleRef module = gallivm->module; LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef args[4]; + LLVMTypeRef args[5]; LLVMValueRef func; LLVMValueRef packed_ptr; LLVMValueRef offset = LLVMConstNull(LLVMInt32TypeInContext(context)); @@ -92,6 +95,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, LLVMValueRef j; LLVMBasicBlockRef block; LLVMValueRef rgba; + LLVMValueRef cache = NULL; util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name, type.floating ? "float" : "unorm8"); @@ -99,6 +103,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, args[0] = LLVMPointerType(lp_build_vec_type(gallivm, type), 0); args[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0); args[3] = args[2] = LLVMInt32TypeInContext(context); + args[4] = LLVMPointerType(lp_build_format_cache_type(gallivm), 0); func = LLVMAddFunction(module, name, LLVMFunctionType(LLVMVoidTypeInContext(context), @@ -109,11 +114,15 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, i = LLVMGetParam(func, 2); j = LLVMGetParam(func, 3); + if (cache_ptr) { + cache = LLVMGetParam(func, 4); + } + block = LLVMAppendBasicBlockInContext(context, func, "entry"); LLVMPositionBuilderAtEnd(builder, block); rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE, - packed_ptr, offset, i, j); + packed_ptr, offset, i, j, cache); LLVMBuildStore(builder, rgba, rgba_ptr); @@ -170,7 +179,7 @@ test_format_float(unsigned verbose, FILE *fp, memset(unpacked, 0, sizeof unpacked); - fetch_ptr(unpacked, packed, j, i); + fetch_ptr(unpacked, packed, j, i, cache_ptr); for(k = 0; k < 4; ++k) { if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) { @@ -187,6 +196,11 @@ test_format_float(unsigned verbose, FILE *fp, } } + /* Ignore errors in S3TC for now */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + match = TRUE; + } + if (!match) { printf("FAILED\n"); printf(" Packed: %02x %02x %02x %02x\n", @@ -261,7 +275,7 @@ test_format_unorm8(unsigned verbose, FILE *fp, memset(unpacked, 0, sizeof unpacked); - fetch_ptr(unpacked, packed, j, i); + fetch_ptr(unpacked, packed, j, i, cache_ptr); match = TRUE; for(k = 0; k < 4; ++k) { @@ -277,6 +291,11 @@ test_format_unorm8(unsigned verbose, FILE *fp, match = FALSE; } + /* Ignore errors in S3TC as we only implement a poor man approach */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + match = TRUE; + } + if (!match) { printf("FAILED\n"); printf(" Packed: %02x %02x %02x %02x\n", @@ -334,6 +353,10 @@ test_all(unsigned verbose, FILE *fp) util_format_s3tc_init(); +#if USE_TEXTURE_CACHE + cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16); +#endif + for (format = 1; format < PIPE_FORMAT_COUNT; ++format) { const struct util_format_description *format_desc; @@ -363,6 +386,9 @@ test_all(unsigned verbose, FILE *fp) success = FALSE; } } +#if USE_TEXTURE_CACHE + align_free(cache_ptr); +#endif return success; } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index 316d1c55082..217abe963b7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -221,6 +221,21 @@ LP_LLVM_SAMPLER_MEMBER(lod_bias, LP_JIT_SAMPLER_LOD_BIAS, TRUE) LP_LLVM_SAMPLER_MEMBER(border_color, LP_JIT_SAMPLER_BORDER_COLOR, FALSE) +#if LP_USE_TEXTURE_CACHE +static LLVMValueRef +lp_llvm_texture_cache_ptr(const struct lp_sampler_dynamic_state *base, + struct gallivm_state *gallivm, + LLVMValueRef thread_data_ptr, + unsigned unit) +{ + /* We use the same cache for all units */ + (void)unit; + + return lp_jit_thread_data_cache(gallivm, thread_data_ptr); +} +#endif + + static void lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) { @@ -314,6 +329,10 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state) sampler->dynamic_state.base.lod_bias = lp_llvm_sampler_lod_bias; sampler->dynamic_state.base.border_color = lp_llvm_sampler_border_color; +#if LP_USE_TEXTURE_CACHE + sampler->dynamic_state.base.cache_ptr = lp_llvm_texture_cache_ptr; +#endif + sampler->dynamic_state.static_state = static_state; return &sampler->base; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index f4aff226ce1..939131e7975 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -34,6 +34,10 @@ struct lp_sampler_static_state; +/** + * Whether texture cache is used for s3tc textures. + */ +#define LP_USE_TEXTURE_CACHE 1 /** * Pure-LLVM texture sampling code generator. @@ -42,5 +46,4 @@ struct lp_sampler_static_state; struct lp_build_sampler_soa * lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key); - #endif /* LP_TEX_SAMPLE_H */ |