diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format.c | 56 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format.h | 56 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 31 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_cached.c | 374 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 37 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.h | 13 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 42 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 5 |
10 files changed, 615 insertions, 7 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.c b/src/gallium/auxiliary/gallivm/lp_bld_format.c new file mode 100644 index 00000000000..a82fd8feee8 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.c @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "lp_bld_format.h" + + + +LLVMTypeRef +lp_build_format_cache_type(struct gallivm_state *gallivm) +{ + LLVMTypeRef elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_COUNT]; + LLVMTypeRef s; + + elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_DATA] = + LLVMArrayType(LLVMInt32TypeInContext(gallivm->context), + LP_BUILD_FORMAT_CACHE_SIZE * 16); + elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_TAGS] = + LLVMArrayType(LLVMInt64TypeInContext(gallivm->context), + LP_BUILD_FORMAT_CACHE_SIZE); +#if LP_BUILD_FORMAT_CACHE_DEBUG + elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL] = + LLVMInt64TypeInContext(gallivm->context); + elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS] = + LLVMInt64TypeInContext(gallivm->context); +#endif + + s = LLVMStructTypeInContext(gallivm->context, elem_types, + LP_BUILD_FORMAT_CACHE_MEMBER_COUNT, 0); + + return s; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 969f1f6cc94..5c866f420bd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -44,6 +44,45 @@ struct lp_type; struct lp_build_context; +#define LP_BUILD_FORMAT_CACHE_DEBUG 0 +/* + * Block cache + * + * Optional block cache to be used when unpacking big pixel blocks. + * Must be a power of 2 + */ + +#define LP_BUILD_FORMAT_CACHE_SIZE 128 + +/* + * Note: cache_data needs 16 byte alignment. + */ +struct lp_build_format_cache +{ + PIPE_ALIGN_VAR(16) uint32_t cache_data[LP_BUILD_FORMAT_CACHE_SIZE][4][4]; + uint64_t cache_tags[LP_BUILD_FORMAT_CACHE_SIZE]; +#if LP_BUILD_FORMAT_CACHE_DEBUG + uint64_t cache_access_total; + uint64_t cache_access_miss; +#endif +}; + + +enum { + LP_BUILD_FORMAT_CACHE_MEMBER_DATA = 0, + LP_BUILD_FORMAT_CACHE_MEMBER_TAGS, +#if LP_BUILD_FORMAT_CACHE_DEBUG + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS, +#endif + LP_BUILD_FORMAT_CACHE_MEMBER_COUNT +}; + + +LLVMTypeRef +lp_build_format_cache_type(struct gallivm_state *gallivm); + + /* * AoS */ @@ -66,7 +105,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, - LLVMValueRef j); + LLVMValueRef j, + LLVMValueRef cache); LLVMValueRef lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm, @@ -107,13 +147,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, LLVMValueRef offsets, LLVMValueRef i, LLVMValueRef j, + LLVMValueRef cache, LLVMValueRef rgba_out[4]); /* * YUV */ - LLVMValueRef lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, @@ -123,6 +163,18 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef i, LLVMValueRef j); + +LLVMValueRef +lp_build_fetch_cached_texels(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j, + LLVMValueRef cache); + + /* * special float formats */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index ddf3ad1dfc6..a41b30bbb96 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -370,7 +370,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, - LLVMValueRef j) + LLVMValueRef j, + LLVMValueRef cache) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; @@ -503,6 +504,34 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, } /* + * s3tc rgb formats + */ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && cache) { + struct lp_type tmp_type; + LLVMValueRef tmp; + + memset(&tmp_type, 0, sizeof tmp_type); + tmp_type.width = 8; + tmp_type.length = num_pixels * 4; + tmp_type.norm = TRUE; + + tmp = lp_build_fetch_cached_texels(gallivm, + format_desc, + num_pixels, + base_ptr, + offset, + i, j, + cache); + + lp_build_conv(gallivm, + tmp_type, type, + &tmp, 1, &tmp, 1); + + return tmp; + } + + /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c b/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c new file mode 100644 index 00000000000..b683e7f960c --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c @@ -0,0 +1,374 @@ +/************************************************************************** + * + * Copyright 2015 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "lp_bld_format.h" +#include "lp_bld_type.h" +#include "lp_bld_struct.h" +#include "lp_bld_const.h" +#include "lp_bld_flow.h" +#include "lp_bld_swizzle.h" + +#include "util/u_math.h" + + +/** + * @file + * Complex block-compression based formats are handled here by using a cache, + * so re-decoding of every pixel is not required. + * Especially for bilinear filtering, texel reuse is very high hence even + * a small cache helps. + * The elements in the cache are the decoded blocks - currently things + * are restricted to formats which are 4x4 block based, and the decoded + * texels must fit into 4x8 bits. + * The cache is direct mapped so hitrates aren't all that great and cache + * thrashing could happen. + * + * @author Roland Scheidegger <[email protected]> + */ + + +#if LP_BUILD_FORMAT_CACHE_DEBUG +static void +update_cache_access(struct gallivm_state *gallivm, + LLVMValueRef ptr, + unsigned count, + unsigned index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef member_ptr, cache_access; + + assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL || + index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); + + member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, ""); + cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access"); + cache_access = LLVMBuildAdd(builder, cache_access, + LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), + count, 0), ""); + LLVMBuildStore(builder, cache_access, member_ptr); +} +#endif + + +static void +store_cached_block(struct gallivm_state *gallivm, + LLVMValueRef *col, + LLVMValueRef tag_value, + LLVMValueRef hash_index, + LLVMValueRef cache) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ptr, indices[3]; + LLVMTypeRef type_ptr4x32; + unsigned count; + + type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); + indices[2] = hash_index; + ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), ""); + LLVMBuildStore(builder, tag_value, ptr); + + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); + hash_index = LLVMBuildMul(builder, hash_index, + lp_build_const_int32(gallivm, 16), ""); + for (count = 0; count < 4; count++) { + indices[2] = hash_index; + ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), ""); + ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, ""); + LLVMBuildStore(builder, col[count], ptr); + hash_index = LLVMBuildAdd(builder, hash_index, + lp_build_const_int32(gallivm, 4), ""); + } +} + + +static LLVMValueRef +lookup_cached_pixel(struct gallivm_state *gallivm, + LLVMValueRef ptr, + LLVMValueRef index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef member_ptr, indices[3]; + + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); + indices[2] = index; + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + return LLVMBuildLoad(builder, member_ptr, "cache_data"); +} + + +static LLVMValueRef +lookup_tag_data(struct gallivm_state *gallivm, + LLVMValueRef ptr, + LLVMValueRef index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef member_ptr, indices[3]; + + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); + indices[2] = index; + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + return LLVMBuildLoad(builder, member_ptr, "tag_data"); +} + + +static void +update_cached_block(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + LLVMValueRef ptr_addr, + LLVMValueRef hash_index, + LLVMValueRef cache) + +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); + LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4); + LLVMValueRef function; + LLVMValueRef tag_value, tmp_ptr; + LLVMValueRef col[4]; + unsigned i, j; + + /* + * Use format_desc->fetch_rgba_8unorm() for each pixel in the block. + * This doesn't actually make any sense whatsoever, someone would need + * to write a function doing this for all pixels in a block (either as + * an external c function or with generated code). Don't ask. + */ + + { + /* + * Function to call looks like: + * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) + */ + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[4]; + LLVMTypeRef function_type; + + assert(format_desc->fetch_rgba_8unorm); + + ret_type = LLVMVoidTypeInContext(gallivm->context); + arg_types[0] = pi8t; + arg_types[1] = pi8t; + arg_types[2] = i32t; + arg_types[3] = i32t; + function_type = LLVMFunctionType(ret_type, arg_types, + Elements(arg_types), 0); + + /* make const pointer for the C fetch_rgba_8unorm function */ + function = lp_build_const_int_pointer(gallivm, + func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); + + /* cast the callee pointer to the function's type */ + function = LLVMBuildBitCast(builder, function, + LLVMPointerType(function_type, 0), + "cast callee"); + } + + tmp_ptr = lp_build_array_alloca(gallivm, i32x4, + lp_build_const_int32(gallivm, 16), + "tmp_decode_store"); + tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); + + /* + * Invoke format_desc->fetch_rgba_8unorm() for each pixel. + * This is going to be really really slow. + * Note: the block store format is actually + * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ... + */ + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + LLVMValueRef args[4]; + LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4); + + /* + * Note we actually supply a pointer to the start of the block, + * not the start of the texture. + */ + args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, ""); + args[1] = ptr_addr; + args[2] = LLVMConstInt(i32t, i, 0); + args[3] = LLVMConstInt(i32t, j, 0); + LLVMBuildCall(builder, function, args, Elements(args), ""); + } + } + + /* Finally store the block - pointless mem copy + update tag. */ + tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), ""); + for (i = 0; i < 4; ++i) { + LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i); + LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, ""); + col[i] = LLVMBuildLoad(builder, ptr, ""); + } + + tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr, + LLVMInt64TypeInContext(gallivm->context), ""); + store_cached_block(gallivm, col, tag_value, hash_index, cache); +} + + +/* + * Do a cached lookup. + * + * Returns (vectors of) 4x8 rgba aos value + */ +LLVMValueRef +lp_build_fetch_cached_texels(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j, + LLVMValueRef cache) + +{ + LLVMBuilderRef builder = gallivm->builder; + unsigned count, low_bit, log2size; + LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp; + LLVMValueRef ij_index, hash_index, hash_mask, block_index; + LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); + struct lp_type type; + struct lp_build_context bld32; + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + assert(format_desc->block.width == 4); + assert(format_desc->block.height == 4); + + lp_build_context_init(&bld32, gallivm, type); + + /* + * compute hash - we use direct mapped cache, the hash function could + * be better but it needs to be simple + * per-element: + * compare offset with offset stored at tag (hash) + * if not equal decode/store block, update tag + * extract color from cache + * assemble result vector + */ + + /* TODO: not ideal with 32bit pointers... */ + + low_bit = util_logbase2(format_desc->block.bits / 8); + log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE); + addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, ""); + ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, ""); + ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc); + /* For the hash function, first mask off the unused lowest bits. Then just + do some xor with address bits - only use lower 32bits */ + ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, ""); + ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, + lp_build_const_int_vec(gallivm, type, low_bit), ""); + /* This only really makes sense for size 64,128,256 */ + hash_index = ptr_addrtrunc; + ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, + lp_build_const_int_vec(gallivm, type, 2*log2size), ""); + hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, ""); + tmp = LLVMBuildLShr(builder, hash_index, + lp_build_const_int_vec(gallivm, type, log2size), ""); + hash_index = LLVMBuildXor(builder, hash_index, tmp, ""); + + hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1); + hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, ""); + ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), ""); + ij_index = LLVMBuildAdd(builder, ij_index, j, ""); + block_index = LLVMBuildShl(builder, hash_index, + lp_build_const_int_vec(gallivm, type, 4), ""); + block_index = LLVMBuildAdd(builder, ij_index, block_index, ""); + + if (n > 1) { + color = LLVMGetUndef(LLVMVectorType(i32t, n)); + for (count = 0; count < n; count++) { + LLVMValueRef index, cond, colorx; + LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx; + struct lp_build_if_state if_ctx; + + index = lp_build_const_int32(gallivm, count); + offsetx = LLVMBuildExtractElement(builder, offset, index, ""); + addrx = LLVMBuildZExt(builder, offsetx, i64t, ""); + addrx = LLVMBuildAdd(builder, addrx, addr, ""); + block_indexx = LLVMBuildExtractElement(builder, block_index, index, ""); + hash_indexx = LLVMBuildLShr(builder, block_indexx, + lp_build_const_int32(gallivm, 4), ""); + offset_stored = lookup_tag_data(gallivm, cache, hash_indexx); + cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, ""); + + lp_build_if(&if_ctx, gallivm, cond); + { + ptr_addrx = LLVMBuildIntToPtr(builder, addrx, + LLVMPointerType(i8t, 0), ""); + update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache); +#if LP_BUILD_FORMAT_CACHE_DEBUG + update_cache_access(gallivm, cache, 1, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); +#endif + } + lp_build_endif(&if_ctx); + + colorx = lookup_cached_pixel(gallivm, cache, block_indexx); + + color = LLVMBuildInsertElement(builder, color, colorx, + lp_build_const_int32(gallivm, count), ""); + } + } + else { + LLVMValueRef cond; + struct lp_build_if_state if_ctx; + + tmp = LLVMBuildZExt(builder, offset, i64t, ""); + addr = LLVMBuildAdd(builder, tmp, addr, ""); + offset_stored = lookup_tag_data(gallivm, cache, hash_index); + cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, ""); + + lp_build_if(&if_ctx, gallivm, cond); + { + tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), ""); + update_cached_block(gallivm, format_desc, tmp, hash_index, cache); +#if LP_BUILD_FORMAT_CACHE_DEBUG + update_cache_access(gallivm, cache, 1, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); +#endif + } + lp_build_endif(&if_ctx); + + color = lookup_cached_pixel(gallivm, cache, block_index); + } +#if LP_BUILD_FORMAT_CACHE_DEBUG + update_cache_access(gallivm, cache, n, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL); +#endif + return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), ""); +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index afaabc08790..42aef8376f8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -346,6 +346,7 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm, * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0,0). For compressed formats, i will * be in [0, block_width-1] and j will be in [0, block_height-1]. + * \param cache optional value pointing to a lp_build_format_cache structure */ void lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, @@ -355,6 +356,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j, + LLVMValueRef cache, LLVMValueRef rgba_out[4]) { LLVMBuilderRef builder = gallivm->builder; @@ -473,7 +475,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, tmp_type.norm = TRUE; tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, - TRUE, base_ptr, offset, i, j); + TRUE, base_ptr, offset, i, j, cache); lp_build_rgba8_to_fi32_soa(gallivm, type, @@ -483,6 +485,37 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, return; } + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && + /* non-srgb case is already handled above */ + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && + type.floating && type.width == 32 && + (type.length == 1 || (type.length % 4 == 0)) && + cache) { + const struct util_format_description *format_decompressed; + LLVMValueRef packed; + packed = lp_build_fetch_cached_texels(gallivm, + format_desc, + type.length, + base_ptr, + offset, + i, j, + cache); + packed = LLVMBuildBitCast(builder, packed, + lp_build_int_vec_type(gallivm, type), ""); + /* + * The values are now packed so they match ordinary srgb RGBA8 format, + * hence need to use matching format for unpack. + */ + format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB); + + lp_build_unpack_rgba_soa(gallivm, + format_decompressed, + type, + packed, rgba_out); + + return; + } + /* * Fallback to calling lp_build_fetch_rgba_aos for each pixel. * @@ -524,7 +557,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, /* Get a single float[4]={R,G,B,A} pixel */ tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, TRUE, base_ptr, offset_elem, - i_elem, j_elem); + i_elem, j_elem, cache); /* * Insert the AoS tmp value channels into the SoA result vectors at diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index eba758da6ae..a6f0eff42f6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -99,6 +99,7 @@ struct lp_sampler_params unsigned sampler_index; unsigned sample_key; LLVMValueRef context_ptr; + LLVMValueRef thread_data_ptr; const LLVMValueRef *coords; const LLVMValueRef *offsets; LLVMValueRef lod; @@ -267,6 +268,17 @@ struct lp_sampler_dynamic_state struct gallivm_state *gallivm, LLVMValueRef context_ptr, unsigned sampler_unit); + + /** + * Obtain texture cache (returns ptr to lp_build_format_cache). + * + * It's optional: no caching will be done if it's NULL. + */ + LLVMValueRef + (*cache_ptr)(const struct lp_sampler_dynamic_state *state, + struct gallivm_state *gallivm, + LLVMValueRef thread_data_ptr, + unsigned unit); }; @@ -356,6 +368,7 @@ struct lp_build_sample_context LLVMValueRef img_stride_array; LLVMValueRef base_ptr; LLVMValueRef mip_offsets; + LLVMValueRef cache; /** Integer vector with texture width, height, depth */ LLVMValueRef int_size; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index d7fde810a76..729c5b8f6ef 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -593,7 +593,8 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, TRUE, data_ptr, offset, x_subcoord, - y_subcoord); + y_subcoord, + bld->cache); } *colors = rgba8; @@ -933,7 +934,8 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, TRUE, data_ptr, offset[k][j][i], x_subcoord[i], - y_subcoord[j]); + y_subcoord[j], + bld->cache); } neighbors[k][j][i] = rgba8; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 26bfa0d2677..e21933ffc85 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -161,6 +161,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, bld->texel_type, data_ptr, offset, i, j, + bld->cache, texel_out); /* @@ -2389,6 +2390,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld, bld->texel_type, bld->base_ptr, offset, i, j, + bld->cache, colors_out); if (out_of_bound_ret_zero) { @@ -2442,6 +2444,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, unsigned texture_index, unsigned sampler_index, LLVMValueRef context_ptr, + LLVMValueRef thread_data_ptr, const LLVMValueRef *coords, const LLVMValueRef *offsets, const struct lp_derivatives *derivs, /* optional */ @@ -2707,6 +2710,11 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, context_ptr, texture_index); /* Note that mip_offsets is an array[level] of offsets to texture images */ + if (dynamic_state->cache_ptr && thread_data_ptr) { + bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm, + thread_data_ptr, texture_index); + } + /* width, height, depth as single int vector */ if (dims <= 1) { bld.int_size = tex_width; @@ -2883,6 +2891,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, bld4.base_ptr = bld.base_ptr; bld4.mip_offsets = bld.mip_offsets; bld4.int_size = bld.int_size; + bld4.cache = bld.cache; bld4.vector_width = lp_type_width(type4); @@ -3081,12 +3090,14 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm, LLVMValueRef offsets[3] = { NULL }; LLVMValueRef lod = NULL; LLVMValueRef context_ptr; + LLVMValueRef thread_data_ptr = NULL; LLVMValueRef texel_out[4]; struct lp_derivatives derivs; struct lp_derivatives *deriv_ptr = NULL; unsigned num_param = 0; unsigned i, num_coords, num_derivs, num_offsets, layer; enum lp_sampler_lod_control lod_control; + boolean need_cache = FALSE; lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> LP_SAMPLER_LOD_CONTROL_SHIFT; @@ -3094,8 +3105,19 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm, get_target_info(static_texture_state->target, &num_coords, &num_derivs, &num_offsets, &layer); + if (dynamic_state->cache_ptr) { + const struct util_format_description *format_desc; + format_desc = util_format_description(static_texture_state->format); + if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + need_cache = TRUE; + } + } + /* "unpack" arguments */ context_ptr = LLVMGetParam(function, num_param++); + if (need_cache) { + thread_data_ptr = LLVMGetParam(function, num_param++); + } for (i = 0; i < num_coords; i++) { coords[i] = LLVMGetParam(function, num_param++); } @@ -3146,6 +3168,7 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm, texture_index, sampler_index, context_ptr, + thread_data_ptr, coords, offsets, deriv_ptr, @@ -3189,6 +3212,7 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, const LLVMValueRef *offsets = params->offsets; const struct lp_derivatives *derivs = params->derivs; enum lp_sampler_lod_control lod_control; + boolean need_cache = FALSE; lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> LP_SAMPLER_LOD_CONTROL_SHIFT; @@ -3196,6 +3220,17 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, get_target_info(static_texture_state->target, &num_coords, &num_derivs, &num_offsets, &layer); + if (dynamic_state->cache_ptr) { + const struct util_format_description *format_desc; + format_desc = util_format_description(static_texture_state->format); + if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + /* + * This is not 100% correct, if we have cache but the + * util_format_s3tc_prefer is true the cache won't get used + * regardless (could hook up the block decode there...) */ + need_cache = TRUE; + } + } /* * texture function matches are found by name. * Thus the name has to include both the texture and sampler unit @@ -3221,6 +3256,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, */ arg_types[num_param++] = LLVMTypeOf(params->context_ptr); + if (need_cache) { + arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr); + } for (i = 0; i < num_coords; i++) { arg_types[num_param++] = LLVMTypeOf(coords[0]); assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i])); @@ -3280,6 +3318,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, num_args = 0; args[num_args++] = params->context_ptr; + if (need_cache) { + args[num_args++] = params->thread_data_ptr; + } for (i = 0; i < num_coords; i++) { args[num_args++] = coords[i]; } @@ -3384,6 +3425,7 @@ lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state, params->texture_index, params->sampler_index, params->context_ptr, + params->thread_data_ptr, params->coords, params->offsets, params->derivs, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 2ca9c6194b3..cc4549778a3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -230,6 +230,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], LLVMValueRef context_ptr, + LLVMValueRef thread_data_ptr, struct lp_build_sampler_soa *sampler, const struct tgsi_shader_info *info, const struct lp_build_tgsi_gs_iface *gs_iface); @@ -447,6 +448,7 @@ struct lp_build_tgsi_soa_context const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS]; LLVMValueRef context_ptr; + LLVMValueRef thread_data_ptr; const struct lp_build_sampler_soa *sampler; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index fae604e2f9c..7d2cd9a9e73 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -2321,6 +2321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, params.texture_index = unit; params.sampler_index = unit; params.context_ptr = bld->context_ptr; + params.thread_data_ptr = bld->thread_data_ptr; params.coords = coords; params.offsets = offsets; params.lod = lod; @@ -2488,6 +2489,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, params.texture_index = texture_unit; params.sampler_index = sampler_unit; params.context_ptr = bld->context_ptr; + params.thread_data_ptr = bld->thread_data_ptr; params.coords = coords; params.offsets = offsets; params.lod = lod; @@ -2608,6 +2610,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, params.texture_index = unit; params.sampler_index = unit; params.context_ptr = bld->context_ptr; + params.thread_data_ptr = bld->thread_data_ptr; params.coords = coords; params.offsets = offsets; params.derivs = NULL; @@ -3858,6 +3861,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], LLVMValueRef context_ptr, + LLVMValueRef thread_data_ptr, struct lp_build_sampler_soa *sampler, const struct tgsi_shader_info *info, const struct lp_build_tgsi_gs_iface *gs_iface) @@ -3893,6 +3897,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.info = info; bld.indirect_files = info->indirect_files; bld.context_ptr = context_ptr; + bld.thread_data_ptr = thread_data_ptr; /* * If the number of temporaries is rather large then we just |