diff options
author | Roland Scheidegger <[email protected]> | 2015-10-27 05:34:00 +0100 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2015-11-04 02:51:02 +0100 |
commit | 9285ed98f7557722fbb94f47c5bc138ef5dd9c70 (patch) | |
tree | 830ed38086de1317dac5991f88cb3990a79b9acd /src/gallium/auxiliary/gallivm/lp_bld_format.h | |
parent | 39b4dfe6ab1003863778a25c091c080e098833ec (diff) |
llvmpipe: add cache for compressed textures
compressed textures are very slow because decoding is rather complex
(and because there's no jit code code to decode them too for non-technical
reasons).
Thus, add some texture cache which holds a couple of decoded blocks.
Right now this handles only s3tc format albeit it could be extended to work
with other formats rather trivially as long as the result of decode fits into
32bit per texel (ideally, rgtc actually would decode to more than 8 bits
per channel, but even then making it work for it shouldn't be too difficult).
This can improve performance noticeably but don't expect wonders (uncompressed
is unsurprisingly still faster). It's also possible it might be slower in
some cases (using nearest filtering for example or if there's otherwise not
many cache hits, the cache is only direct mapped which isn't great).
Also, actual decode of a block relies on util code, thus even though always
full blocks are decoded it is done texel by texel - this could obviously
benefit greatly from simd-optimized code decoding full blocks at once...
Note the cache is per (raster) thread, and currently only used for fragment
shaders.
Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_format.h')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format.h | 56 |
1 files changed, 54 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 969f1f6cc94..5c866f420bd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -44,6 +44,45 @@ struct lp_type; struct lp_build_context; +#define LP_BUILD_FORMAT_CACHE_DEBUG 0 +/* + * Block cache + * + * Optional block cache to be used when unpacking big pixel blocks. + * Must be a power of 2 + */ + +#define LP_BUILD_FORMAT_CACHE_SIZE 128 + +/* + * Note: cache_data needs 16 byte alignment. + */ +struct lp_build_format_cache +{ + PIPE_ALIGN_VAR(16) uint32_t cache_data[LP_BUILD_FORMAT_CACHE_SIZE][4][4]; + uint64_t cache_tags[LP_BUILD_FORMAT_CACHE_SIZE]; +#if LP_BUILD_FORMAT_CACHE_DEBUG + uint64_t cache_access_total; + uint64_t cache_access_miss; +#endif +}; + + +enum { + LP_BUILD_FORMAT_CACHE_MEMBER_DATA = 0, + LP_BUILD_FORMAT_CACHE_MEMBER_TAGS, +#if LP_BUILD_FORMAT_CACHE_DEBUG + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS, +#endif + LP_BUILD_FORMAT_CACHE_MEMBER_COUNT +}; + + +LLVMTypeRef +lp_build_format_cache_type(struct gallivm_state *gallivm); + + /* * AoS */ @@ -66,7 +105,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, - LLVMValueRef j); + LLVMValueRef j, + LLVMValueRef cache); LLVMValueRef lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm, @@ -107,13 +147,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, LLVMValueRef offsets, LLVMValueRef i, LLVMValueRef j, + LLVMValueRef cache, LLVMValueRef rgba_out[4]); /* * YUV */ - LLVMValueRef lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, @@ -123,6 +163,18 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef i, LLVMValueRef j); + +LLVMValueRef +lp_build_fetch_cached_texels(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j, + LLVMValueRef cache); + + /* * special float formats */ |