summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
authorRoland Scheidegger <[email protected]>2016-12-21 04:56:01 +0100
committerRoland Scheidegger <[email protected]>2017-01-05 23:59:38 +0100
commit4634cb5921b985f04f2daf00cda2d28036143bd3 (patch)
tree71d18016633856cfbb77d20f265384d5e36e1e15 /src/gallium/drivers/llvmpipe
parentbc86e829a5c87714a7f3798fe9096c75692e5157 (diff)
gallivm: implement aos unpack (to unorm8) for small unorm formats
Using bit replication. This path now resembles something which might make sense. (The logic was mostly copied from llvmpipe fs backend.) I am not convinced though it is actually faster than SoA sampling (actually I'm quite certain it's always a loss with AVX). With SoA it's just shift/mask/cvt/mul for getting the colors, whereas there's still roughly 3 shifts, 3 or/and per channel for AoS (i.e. for SoA it's exactly the same as it would be for a rgba8 format, whereas the extra effort for AoS is significant). The filtering might still be faster (albeit with FMA the instruction count gets down quite a bit there on the SoA float filtering path on new cpus). And those small unorm formats often don't have an alpha channel (which makes things worse relatively for AoS path). (This also fixes a trivial bug in the llvmpipe fs code this was derived from, albeit it was only relevant for 4-bit channels.) Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c8
1 files changed, 3 insertions, 5 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index a36389ccc32..e56ce1dc8df 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1096,7 +1096,7 @@ scale_bits(struct gallivm_state *gallivm,
lp_build_const_int_vec(gallivm, src_type, db),
"");
- if (db < src_bits) {
+ if (db <= src_bits) {
/* Enough bits in src to fill the remainder */
LLVMValueRef lower = LLVMBuildLShr(builder,
src,
@@ -1154,7 +1154,7 @@ convert_to_blend_type(struct gallivm_state *gallivm,
LLVMBuilderRef builder = gallivm->builder;
struct lp_type blend_type;
struct lp_type mem_type;
- unsigned i, j, k;
+ unsigned i, j;
unsigned pixels = block_size / num_srcs;
bool is_arith;
@@ -1267,9 +1267,7 @@ convert_to_blend_type(struct gallivm_state *gallivm,
unsigned from_lsb = src_fmt->nr_channels - j - 1;
#endif
- for (k = 0; k < src_fmt->channel[j].size; ++k) {
- mask |= 1 << k;
- }
+ mask = (1 << src_fmt->channel[j].size) - 1;
/* Extract bits from source */
chans[j] = LLVMBuildLShr(builder,