diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c | 32 |
1 files changed, 15 insertions, 17 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c index 90b2be981e4..9020aabf337 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c @@ -77,24 +77,17 @@ lp_build_uninterleave2_half(struct gallivm_state *gallivm, unsigned lo_hi) { LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; + unsigned i; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(lo_hi < 2); if (type.length * type.width == 256) { - assert(type.length >= 4); - for (i = 0, j = 0; i < type.length; ++i) { - if (i == type.length / 4) { - j = type.length; - } else if (i == type.length / 2) { - j = type.length / 2; - } else if (i == 3 * type.length / 4) { - j = 3 * type.length / 4; - } else { - j += 2; - } - elems[i] = lp_build_const_int32(gallivm, j + lo_hi); + assert(type.length == 8); + assert(type.width == 32); + static const unsigned shufvals[8] = {0, 2, 8, 10, 4, 6, 12, 14}; + for (i = 0; i < type.length; ++i) { + elems[i] = lp_build_const_int32(gallivm, shufvals[i] + lo_hi); } } else { for (i = 0; i < type.length; ++i) { @@ -277,7 +270,7 @@ lp_build_gather_s3tc(struct gallivm_state *gallivm, } else { LLVMValueRef tmp[4], cc01, cc23; - struct lp_type lp_type32, lp_type64, lp_type32dxt; + struct lp_type lp_type32, lp_type64; memset(&lp_type32, 0, sizeof lp_type32); lp_type32.width = 32; lp_type32.length = length; @@ -309,10 +302,13 @@ lp_build_gather_s3tc(struct gallivm_state *gallivm, lp_build_const_extend_shuffle(gallivm, 2, 4), ""); } if (length == 8) { + struct lp_type lp_type32_4 = {0}; + lp_type32_4.width = 32; + lp_type32_4.length = 4; for (i = 0; i < 4; ++i) { tmp[0] = elems[i]; tmp[1] = elems[i+4]; - elems[i] = lp_build_concat(gallivm, tmp, lp_type32, 2); + elems[i] = lp_build_concat(gallivm, tmp, lp_type32_4, 2); } } cc01 = lp_build_interleave2_half(gallivm, lp_type32, elems[0], elems[1], 0); @@ -811,7 +807,7 @@ s3tc_dxt3_to_rgba_aos(struct gallivm_state *gallivm, tmp = lp_build_select(&bld, sel_mask, alpha_low, alpha_hi); bit_pos = LLVMBuildAnd(builder, bit_pos, lp_build_const_int_vec(gallivm, type, 0xffffffdf), ""); - /* Warning: slow shift with per element count */ + /* Warning: slow shift with per element count (without avx2) */ /* * Could do pshufb here as well - just use appropriate 2 bits in bit_pos * to select the right byte with pshufb. Then for the remaining one bit @@ -1640,7 +1636,6 @@ s3tc_decode_block_dxt5(struct gallivm_state *gallivm, lp_build_const_int_vec(gallivm, type16, 8), ""); alpha = LLVMBuildBitCast(builder, alpha, i64t, ""); shuffle1 = lp_build_const_shuffle1(gallivm, 0, 8); - /* XXX this shuffle broken with LLVM 2.8 */ alpha0 = LLVMBuildShuffleVector(builder, alpha0, alpha0, shuffle1, ""); alpha1 = LLVMBuildShuffleVector(builder, alpha1, alpha1, shuffle1, ""); @@ -2176,6 +2171,9 @@ lp_build_fetch_s3tc_rgba_aos(struct gallivm_state *gallivm, return rgba; } + /* + * Could use n > 8 here with avx2, but doesn't seem faster. + */ if (n > 4) { unsigned count; LLVMTypeRef i8_vectype = LLVMVectorType(i8t, 4 * n); |