diff options
author | Roland Scheidegger <[email protected]> | 2015-10-27 05:34:00 +0100 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2015-11-04 02:51:02 +0100 |
commit | 9285ed98f7557722fbb94f47c5bc138ef5dd9c70 (patch) | |
tree | 830ed38086de1317dac5991f88cb3990a79b9acd /src/gallium/drivers | |
parent | 39b4dfe6ab1003863778a25c091c080e098833ec (diff) |
llvmpipe: add cache for compressed textures
compressed textures are very slow because decoding is rather complex
(and because there's no jit code code to decode them too for non-technical
reasons).
Thus, add some texture cache which holds a couple of decoded blocks.
Right now this handles only s3tc format albeit it could be extended to work
with other formats rather trivially as long as the result of decode fits into
32bit per texel (ideally, rgtc actually would decode to more than 8 bits
per channel, but even then making it work for it shouldn't be too difficult).
This can improve performance noticeably but don't expect wonders (uncompressed
is unsurprisingly still faster). It's also possible it might be slower in
some cases (using nearest filtering for example or if there's otherwise not
many cache hits, the cache is only direct mapped which isn't great).
Also, actual decode of a block relies on util code, thus even though always
full blocks are decoded it is done texel by texel - this could obviously
benefit greatly from simd-optimized code decoding full blocks at once...
Note the cache is per (raster) thread, and currently only used for fragment
shaders.
Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_jit.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_jit.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 44 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_test_format.c | 36 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_tex_sample.c | 19 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_tex_sample.h | 5 |
7 files changed, 109 insertions, 10 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 9acde4f1b06..b915c1d64ff 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -36,6 +36,7 @@ #include "util/u_memory.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_format.h" #include "lp_context.h" #include "lp_jit.h" @@ -208,6 +209,8 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT]; LLVMTypeRef thread_data_type; + elem_types[LP_JIT_THREAD_DATA_CACHE] = + LLVMPointerType(lp_build_format_cache_type(gallivm), 0); elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc); elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] = LLVMInt32TypeInContext(lc); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 097fa7dce7c..9db26f2cba9 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -43,6 +43,7 @@ #include "lp_texture.h" +struct lp_build_format_cache; struct lp_fragment_shader_variant; struct llvmpipe_screen; @@ -189,6 +190,7 @@ enum { struct lp_jit_thread_data { + struct lp_build_format_cache *cache; uint64_t vis_counter; /* @@ -201,12 +203,16 @@ struct lp_jit_thread_data enum { - LP_JIT_THREAD_DATA_COUNTER = 0, + LP_JIT_THREAD_DATA_CACHE = 0, + LP_JIT_THREAD_DATA_COUNTER, LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, LP_JIT_THREAD_DATA_COUNT }; +#define lp_jit_thread_data_cache(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_THREAD_DATA_CACHE, "cache") + #define lp_jit_thread_data_counter(_gallivm, _ptr) \ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter") diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index c726707c062..d22e50777fa 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -43,6 +43,7 @@ #include "lp_query.h" #include "lp_rast.h" #include "lp_rast_priv.h" +#include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_debug.h" #include "lp_scene.h" #include "lp_tex_sample.h" @@ -664,6 +665,17 @@ rasterize_scene(struct lp_rasterizer_task *task, { task->scene = scene; + /* Clear the cache tags. This should not always be necessary but + simpler for now. */ +#if LP_USE_TEXTURE_CACHE + memset(task->thread_data.cache->cache_tags, 0, + sizeof(task->thread_data.cache->cache_tags)); +#if LP_BUILD_FORMAT_CACHE_DEBUG + task->thread_data.cache->cache_access_total = 0; + task->thread_data.cache->cache_access_miss = 0; +#endif +#endif + if (!task->rast->no_rast && !scene->discard) { /* loop over scene bins, rasterize each */ { @@ -679,6 +691,20 @@ rasterize_scene(struct lp_rasterizer_task *task, } +#if LP_BUILD_FORMAT_CACHE_DEBUG + { + uint64_t total, miss; + total = task->thread_data.cache->cache_access_total; + miss = task->thread_data.cache->cache_access_miss; + if (total) { + debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", + task->thread_index, (long long unsigned)total, + (long long unsigned)miss, + (float)(total - miss)/(float)total); + } + } +#endif + if (scene->fence) { lp_fence_signal(scene->fence); } @@ -866,10 +892,15 @@ lp_rast_create( unsigned num_threads ) goto no_full_scenes; } - for (i = 0; i < Elements(rast->tasks); i++) { + for (i = 0; i < MAX2(1, num_threads); i++) { struct lp_rasterizer_task *task = &rast->tasks[i]; task->rast = rast; task->thread_index = i; + task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache), + 16); + if (!task->thread_data.cache) { + goto no_thread_data_cache; + } } rast->num_threads = num_threads; @@ -885,6 +916,14 @@ lp_rast_create( unsigned num_threads ) return rast; +no_thread_data_cache: + for (i = 0; i < MAX2(1, rast->num_threads); i++) { + if (rast->tasks[i].thread_data.cache) { + align_free(rast->tasks[i].thread_data.cache); + } + } + + lp_scene_queue_destroy(rast->full_scenes); no_full_scenes: FREE(rast); no_rast: @@ -923,6 +962,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) pipe_semaphore_destroy(&rast->tasks[i].work_ready); pipe_semaphore_destroy(&rast->tasks[i].work_done); } + for (i = 0; i < MAX2(1, rast->num_threads); i++) { + align_free(rast->tasks[i].thread_data.cache); + } /* for synchronizing rasterization threads */ pipe_barrier_destroy( &rast->barrier ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index fd6c49aacd8..f55f6b4fa4f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -421,7 +421,7 @@ generate_fs_loop(struct gallivm_state *gallivm, lp_build_tgsi_soa(gallivm, tokens, type, &mask, consts_ptr, num_consts_ptr, &system_values, interp->inputs, - outputs, context_ptr, + outputs, context_ptr, thread_data_ptr, sampler, &shader->info.base, NULL); /* Alpha test */ @@ -2303,8 +2303,8 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(dady_ptr, "dady"); lp_build_name(color_ptr_ptr, "color_ptr_ptr"); lp_build_name(depth_ptr, "depth"); - lp_build_name(thread_data_ptr, "thread_data"); lp_build_name(mask_input, "mask_input"); + lp_build_name(thread_data_ptr, "thread_data"); lp_build_name(stride_ptr, "stride_ptr"); lp_build_name(depth_stride, "depth_stride"); diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index d9abd1ae37c..0640a217874 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -44,6 +44,9 @@ #include "lp_test.h" +#define USE_TEXTURE_CACHE 1 + +static struct lp_build_format_cache *cache_ptr; void write_tsv_header(FILE *fp) @@ -71,7 +74,7 @@ write_tsv_row(FILE *fp, typedef void (*fetch_ptr_t)(void *unpacked, const void *packed, - unsigned i, unsigned j); + unsigned i, unsigned j, struct lp_build_format_cache *cache); static LLVMValueRef @@ -83,7 +86,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, LLVMContextRef context = gallivm->context; LLVMModuleRef module = gallivm->module; LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef args[4]; + LLVMTypeRef args[5]; LLVMValueRef func; LLVMValueRef packed_ptr; LLVMValueRef offset = LLVMConstNull(LLVMInt32TypeInContext(context)); @@ -92,6 +95,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, LLVMValueRef j; LLVMBasicBlockRef block; LLVMValueRef rgba; + LLVMValueRef cache = NULL; util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name, type.floating ? "float" : "unorm8"); @@ -99,6 +103,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, args[0] = LLVMPointerType(lp_build_vec_type(gallivm, type), 0); args[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0); args[3] = args[2] = LLVMInt32TypeInContext(context); + args[4] = LLVMPointerType(lp_build_format_cache_type(gallivm), 0); func = LLVMAddFunction(module, name, LLVMFunctionType(LLVMVoidTypeInContext(context), @@ -109,11 +114,15 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, i = LLVMGetParam(func, 2); j = LLVMGetParam(func, 3); + if (cache_ptr) { + cache = LLVMGetParam(func, 4); + } + block = LLVMAppendBasicBlockInContext(context, func, "entry"); LLVMPositionBuilderAtEnd(builder, block); rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE, - packed_ptr, offset, i, j); + packed_ptr, offset, i, j, cache); LLVMBuildStore(builder, rgba, rgba_ptr); @@ -170,7 +179,7 @@ test_format_float(unsigned verbose, FILE *fp, memset(unpacked, 0, sizeof unpacked); - fetch_ptr(unpacked, packed, j, i); + fetch_ptr(unpacked, packed, j, i, cache_ptr); for(k = 0; k < 4; ++k) { if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) { @@ -187,6 +196,11 @@ test_format_float(unsigned verbose, FILE *fp, } } + /* Ignore errors in S3TC for now */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + match = TRUE; + } + if (!match) { printf("FAILED\n"); printf(" Packed: %02x %02x %02x %02x\n", @@ -261,7 +275,7 @@ test_format_unorm8(unsigned verbose, FILE *fp, memset(unpacked, 0, sizeof unpacked); - fetch_ptr(unpacked, packed, j, i); + fetch_ptr(unpacked, packed, j, i, cache_ptr); match = TRUE; for(k = 0; k < 4; ++k) { @@ -277,6 +291,11 @@ test_format_unorm8(unsigned verbose, FILE *fp, match = FALSE; } + /* Ignore errors in S3TC as we only implement a poor man approach */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + match = TRUE; + } + if (!match) { printf("FAILED\n"); printf(" Packed: %02x %02x %02x %02x\n", @@ -334,6 +353,10 @@ test_all(unsigned verbose, FILE *fp) util_format_s3tc_init(); +#if USE_TEXTURE_CACHE + cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16); +#endif + for (format = 1; format < PIPE_FORMAT_COUNT; ++format) { const struct util_format_description *format_desc; @@ -363,6 +386,9 @@ test_all(unsigned verbose, FILE *fp) success = FALSE; } } +#if USE_TEXTURE_CACHE + align_free(cache_ptr); +#endif return success; } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index 316d1c55082..217abe963b7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -221,6 +221,21 @@ LP_LLVM_SAMPLER_MEMBER(lod_bias, LP_JIT_SAMPLER_LOD_BIAS, TRUE) LP_LLVM_SAMPLER_MEMBER(border_color, LP_JIT_SAMPLER_BORDER_COLOR, FALSE) +#if LP_USE_TEXTURE_CACHE +static LLVMValueRef +lp_llvm_texture_cache_ptr(const struct lp_sampler_dynamic_state *base, + struct gallivm_state *gallivm, + LLVMValueRef thread_data_ptr, + unsigned unit) +{ + /* We use the same cache for all units */ + (void)unit; + + return lp_jit_thread_data_cache(gallivm, thread_data_ptr); +} +#endif + + static void lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) { @@ -314,6 +329,10 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state) sampler->dynamic_state.base.lod_bias = lp_llvm_sampler_lod_bias; sampler->dynamic_state.base.border_color = lp_llvm_sampler_border_color; +#if LP_USE_TEXTURE_CACHE + sampler->dynamic_state.base.cache_ptr = lp_llvm_texture_cache_ptr; +#endif + sampler->dynamic_state.static_state = static_state; return &sampler->base; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index f4aff226ce1..939131e7975 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -34,6 +34,10 @@ struct lp_sampler_static_state; +/** + * Whether texture cache is used for s3tc textures. + */ +#define LP_USE_TEXTURE_CACHE 1 /** * Pure-LLVM texture sampling code generator. @@ -42,5 +46,4 @@ struct lp_sampler_static_state; struct lp_build_sampler_soa * lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key); - #endif /* LP_TEX_SAMPLE_H */ |