aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/gallivm/lp_bld_format.h
diff options
context:
space:
mode:
authorRoland Scheidegger <[email protected]>2015-10-27 05:34:00 +0100
committerRoland Scheidegger <[email protected]>2015-11-04 02:51:02 +0100
commit9285ed98f7557722fbb94f47c5bc138ef5dd9c70 (patch)
tree830ed38086de1317dac5991f88cb3990a79b9acd /src/gallium/auxiliary/gallivm/lp_bld_format.h
parent39b4dfe6ab1003863778a25c091c080e098833ec (diff)
llvmpipe: add cache for compressed textures
compressed textures are very slow because decoding is rather complex (and because there's no jit code code to decode them too for non-technical reasons). Thus, add some texture cache which holds a couple of decoded blocks. Right now this handles only s3tc format albeit it could be extended to work with other formats rather trivially as long as the result of decode fits into 32bit per texel (ideally, rgtc actually would decode to more than 8 bits per channel, but even then making it work for it shouldn't be too difficult). This can improve performance noticeably but don't expect wonders (uncompressed is unsurprisingly still faster). It's also possible it might be slower in some cases (using nearest filtering for example or if there's otherwise not many cache hits, the cache is only direct mapped which isn't great). Also, actual decode of a block relies on util code, thus even though always full blocks are decoded it is done texel by texel - this could obviously benefit greatly from simd-optimized code decoding full blocks at once... Note the cache is per (raster) thread, and currently only used for fragment shaders. Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_format.h')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format.h56
1 files changed, 54 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 969f1f6cc94..5c866f420bd 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -44,6 +44,45 @@ struct lp_type;
struct lp_build_context;
+#define LP_BUILD_FORMAT_CACHE_DEBUG 0
+/*
+ * Block cache
+ *
+ * Optional block cache to be used when unpacking big pixel blocks.
+ * Must be a power of 2
+ */
+
+#define LP_BUILD_FORMAT_CACHE_SIZE 128
+
+/*
+ * Note: cache_data needs 16 byte alignment.
+ */
+struct lp_build_format_cache
+{
+ PIPE_ALIGN_VAR(16) uint32_t cache_data[LP_BUILD_FORMAT_CACHE_SIZE][4][4];
+ uint64_t cache_tags[LP_BUILD_FORMAT_CACHE_SIZE];
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+ uint64_t cache_access_total;
+ uint64_t cache_access_miss;
+#endif
+};
+
+
+enum {
+ LP_BUILD_FORMAT_CACHE_MEMBER_DATA = 0,
+ LP_BUILD_FORMAT_CACHE_MEMBER_TAGS,
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+ LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL,
+ LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS,
+#endif
+ LP_BUILD_FORMAT_CACHE_MEMBER_COUNT
+};
+
+
+LLVMTypeRef
+lp_build_format_cache_type(struct gallivm_state *gallivm);
+
+
/*
* AoS
*/
@@ -66,7 +105,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
LLVMValueRef base_ptr,
LLVMValueRef offset,
LLVMValueRef i,
- LLVMValueRef j);
+ LLVMValueRef j,
+ LLVMValueRef cache);
LLVMValueRef
lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
@@ -107,13 +147,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
LLVMValueRef offsets,
LLVMValueRef i,
LLVMValueRef j,
+ LLVMValueRef cache,
LLVMValueRef rgba_out[4]);
/*
* YUV
*/
-
LLVMValueRef
lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
@@ -123,6 +163,18 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
LLVMValueRef i,
LLVMValueRef j);
+
+LLVMValueRef
+lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
+ const struct util_format_description *format_desc,
+ unsigned n,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef i,
+ LLVMValueRef j,
+ LLVMValueRef cache);
+
+
/*
* special float formats
*/