gallivm: eliminate a unnecessary AND with unorm lerps

Instead of doing a add and then mask out the upper bits, we can simply do a add with a half wide type (this, of course, assumes the hw can actually do it...), so we'll get the required zero in the upper bits automatically. Reviewed-by: Jose Fonseca <[email protected]>
author: Roland Scheidegger <[email protected]> 2016-05-13 01:44:39 +0200
committer: Roland Scheidegger <[email protected]> 2016-05-27 19:11:28 +0200
commit: 9247570d42600fd01a8afbf6ae0bf6c7008e717a (patch)
tree: 9b8e9bc20a3bf8a4af9e29e7ec39c315091bbd8e /src/gallium/auxiliary
parent: 17d685c4267887661181209e47031bff9c85606b (diff)
1 files changed, 35 insertions, 10 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 9c78837d786..11a1e7d002a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1182,16 +1182,41 @@ lp_build_lerp_simple(struct lp_build_context *bld,
       res = lp_build_mul(bld, x, delta);
    }
 
-   res = lp_build_add(bld, v0, res);
-
-   if (((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) ||
-       bld->type.fixed) {
-      /* We need to mask out the high order bits when lerping 8bit normalized colors stored on 16bits */
-      /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
-       * but it will be wrong for true fixed point use cases. Basically we need
-       * a more powerful lp_type, capable of further distinguishing the values
-       * interpretation from the value storage. */
-      res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(bld->gallivm, bld->type, (1 << half_width) - 1), "");
+   if ((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) {
+      /*
+       * At this point both res and v0 only use the lower half of the bits,
+       * the rest is zero. Instead of add / mask, do add with half wide type.
+       */
+      struct lp_type narrow_type;
+      struct lp_build_context narrow_bld;
+
+      memset(&narrow_type, 0, sizeof narrow_type);
+      narrow_type.sign   = bld->type.sign;
+      narrow_type.width  = bld->type.width/2;
+      narrow_type.length = bld->type.length*2;
+
+      lp_build_context_init(&narrow_bld, bld->gallivm, narrow_type);
+      res = LLVMBuildBitCast(builder, res, narrow_bld.vec_type, "");
+      v0 = LLVMBuildBitCast(builder, v0, narrow_bld.vec_type, "");
+      res = lp_build_add(&narrow_bld, v0, res);
+      res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
+   } else {
+      res = lp_build_add(bld, v0, res);
+
+      if (bld->type.fixed) {
+         /*
+          * We need to mask out the high order bits when lerping 8bit
+          * normalized colors stored on 16bits
+          */
+         /* XXX: This step is necessary for lerping 8bit colors stored on
+          * 16bits, but it will be wrong for true fixed point use cases.
+          * Basically we need a more powerful lp_type, capable of further
+          * distinguishing the values interpretation from the value storage.
+          */
+         LLVMValueRef low_bits;
+         low_bits = lp_build_const_int_vec(bld->gallivm, bld->type, (1 << half_width) - 1);
+         res = LLVMBuildAnd(builder, res, low_bits, "");
+      }
    }
 
    return res;
author	Roland Scheidegger <[email protected]>	2016-05-13 01:44:39 +0200
committer	Roland Scheidegger <[email protected]>	2016-05-27 19:11:28 +0200
commit	9247570d42600fd01a8afbf6ae0bf6c7008e717a (patch)
tree	9b8e9bc20a3bf8a4af9e29e7ec39c315091bbd8e /src/gallium/auxiliary
parent	17d685c4267887661181209e47031bff9c85606b (diff)