diff options
author | Roland Scheidegger <[email protected]> | 2016-05-13 01:44:39 +0200 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2016-05-27 19:11:28 +0200 |
commit | 9247570d42600fd01a8afbf6ae0bf6c7008e717a (patch) | |
tree | 9b8e9bc20a3bf8a4af9e29e7ec39c315091bbd8e /src/gallium/auxiliary | |
parent | 17d685c4267887661181209e47031bff9c85606b (diff) |
gallivm: eliminate a unnecessary AND with unorm lerps
Instead of doing a add and then mask out the upper bits, we can
simply do a add with a half wide type (this, of course, assumes
the hw can actually do it...), so we'll get the required zero
in the upper bits automatically.
Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 45 |
1 files changed, 35 insertions, 10 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 9c78837d786..11a1e7d002a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1182,16 +1182,41 @@ lp_build_lerp_simple(struct lp_build_context *bld, res = lp_build_mul(bld, x, delta); } - res = lp_build_add(bld, v0, res); - - if (((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) || - bld->type.fixed) { - /* We need to mask out the high order bits when lerping 8bit normalized colors stored on 16bits */ - /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, - * but it will be wrong for true fixed point use cases. Basically we need - * a more powerful lp_type, capable of further distinguishing the values - * interpretation from the value storage. */ - res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(bld->gallivm, bld->type, (1 << half_width) - 1), ""); + if ((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) { + /* + * At this point both res and v0 only use the lower half of the bits, + * the rest is zero. Instead of add / mask, do add with half wide type. + */ + struct lp_type narrow_type; + struct lp_build_context narrow_bld; + + memset(&narrow_type, 0, sizeof narrow_type); + narrow_type.sign = bld->type.sign; + narrow_type.width = bld->type.width/2; + narrow_type.length = bld->type.length*2; + + lp_build_context_init(&narrow_bld, bld->gallivm, narrow_type); + res = LLVMBuildBitCast(builder, res, narrow_bld.vec_type, ""); + v0 = LLVMBuildBitCast(builder, v0, narrow_bld.vec_type, ""); + res = lp_build_add(&narrow_bld, v0, res); + res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); + } else { + res = lp_build_add(bld, v0, res); + + if (bld->type.fixed) { + /* + * We need to mask out the high order bits when lerping 8bit + * normalized colors stored on 16bits + */ + /* XXX: This step is necessary for lerping 8bit colors stored on + * 16bits, but it will be wrong for true fixed point use cases. + * Basically we need a more powerful lp_type, capable of further + * distinguishing the values interpretation from the value storage. + */ + LLVMValueRef low_bits; + low_bits = lp_build_const_int_vec(bld->gallivm, bld->type, (1 << half_width) - 1); + res = LLVMBuildAnd(builder, res, low_bits, ""); + } } return res; |