llvmpipe: use alpha from already converted color if possible

For rgbx formats, there is no point in doing alpha conversion again (and with different tranpose even, so llvm can't eliminate it). Albeit it looks like there's some minimal changes needed in the blend code (found by code inspection, no test seemed to complain) if we do this - the blend factors are already sanitized if we have no destination alpha, however for src_alpha_saturate it looks like it still might make a difference (note that we forced has_alpha to true before for some formats and nothing complained, but this seems safer). Reviewed-by: Jose Fonseca <[email protected]>
author: Roland Scheidegger <[email protected]> 2016-12-22 03:49:22 +0100
committer: Roland Scheidegger <[email protected]> 2017-01-06 23:13:34 +0100
commit: 04480a04b10524e156cb01d5f7bd8ec02fcc4a67 (patch)
tree: fda9490bf6687184492195178ea3d1c0b0217674 /src/gallium/drivers/llvmpipe
parent: 53c2d24a24a631a5be0a9c4df88f23dda1e8685c (diff)
2 files changed, 54 insertions, 18 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index a57670d4956..45c5c2bb65e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -74,6 +74,7 @@ struct lp_build_blend_aos_context
    LLVMValueRef dst;
    LLVMValueRef const_;
    LLVMValueRef const_alpha;
+   boolean has_dst_alpha;
 
    LLVMValueRef inv_src;
    LLVMValueRef inv_src_alpha;
@@ -115,10 +116,10 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
          return bld->base.one;
       else {
          /*
-          * if there's separate src_alpha there's no dst alpha hence the complement
-          * is zero but for unclamped float inputs min can be non-zero (negative).
+          * If there's no dst alpha the complement is zero but for unclamped
+          * float inputs min can be non-zero (negative).
           */
-         if (bld->src_alpha) {
+         if (!bld->has_dst_alpha) {
             if (!bld->saturate)
                bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero);
          }
@@ -264,7 +265,8 @@ lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
    if (alpha_swizzle != PIPE_SWIZZLE_NONE) {
       rgb_swizzle   = lp_build_blend_factor_swizzle(rgb_factor);
       alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
-      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
+      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle,
+                                    alpha_swizzle, num_channels);
    } else {
       return rgb_factor_;
    }
@@ -327,6 +329,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    bld.src_alpha = src_alpha;
    bld.src1_alpha = src1_alpha;
    bld.const_alpha = const_alpha;
+   bld.has_dst_alpha = FALSE;
 
    /* Find the alpha channel if not provided seperately */
    if (!src_alpha) {
@@ -335,6 +338,14 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
             alpha_swizzle = i;
          }
       }
+      /*
+       * Note that we may get src_alpha included from source (and 4 channels)
+       * even if the destination doesn't have an alpha channel (for rgbx
+       * formats). Generally this shouldn't make much of a difference (we're
+       * relying on blend factors being sanitized already if there's no
+       * dst alpha).
+       */
+      bld.has_dst_alpha = desc->swizzle[3] <= PIPE_SWIZZLE_W;
    }
 
    if (blend->logicop_enable) {
@@ -347,7 +358,9 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    } else if (!state->blend_enable) {
       result = src;
    } else {
-      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
+      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor &&
+                                state->alpha_src_factor == state->alpha_dst_factor) ||
+                               nr_channels == 1;
 
       src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
                                          state->alpha_src_factor,
@@ -370,7 +383,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
                               rgb_alpha_same,
                               false);
 
-      if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != PIPE_SWIZZLE_NONE) {
+      if(state->rgb_func != state->alpha_func && nr_channels > 1 &&
+                            alpha_swizzle != PIPE_SWIZZLE_NONE) {
          LLVMValueRef alpha;
 
          alpha = lp_build_blend(&bld.base,
@@ -397,7 +411,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    if (!util_format_colormask_full(desc, state->colormask)) {
       LLVMValueRef color_mask;
 
-      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
+      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type,
+                                                    state->colormask, nr_channels, swizzle);
       lp_build_name(color_mask, "color_mask");
 
       /* Combine with input mask if necessary */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index b6f4c2a36c9..2c0339cad60 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1460,7 +1460,8 @@ convert_from_blend_type(struct gallivm_state *gallivm,
          /* Extract bits */
          chans[j] = LLVMBuildLShr(builder,
                                   dst[i],
-                                  lp_build_const_int_vec(gallivm, src_type, from_lsb * blend_type.width),
+                                  lp_build_const_int_vec(gallivm, src_type,
+                                                         from_lsb * blend_type.width),
                                   "");
 
          chans[j] = LLVMBuildAnd(builder,
@@ -1548,7 +1549,8 @@ convert_alpha(struct gallivm_state *gallivm,
       /* If there is a src for each pixel broadcast the alpha across whole row */
       if (src_count == block_size) {
          for (i = 0; i < src_count; ++i) {
-            src_alpha[i] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, row_type), src_alpha[i]);
+            src_alpha[i] = lp_build_broadcast(gallivm,
+                              lp_build_vec_type(gallivm, row_type), src_alpha[i]);
          }
       } else {
          unsigned pixels = block_size / src_count;
@@ -1749,13 +1751,23 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    }
 
    /* If 3 channels then pad to include alpha for 4 element transpose */
-   if (dst_channels == 3 && !has_alpha) {
+   if (dst_channels == 3) {
+      assert (!has_alpha);
       for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
          if (swizzle[i] > TGSI_NUM_CHANNELS)
             swizzle[i] = 3;
       }
       if (out_format_desc->nr_channels == 4) {
          dst_channels = 4;
+         /*
+          * We use alpha from the color conversion, not separate one.
+          * We had to include it for transpose, hence it will get converted
+          * too (albeit when doing transpose after conversion, that would
+          * no longer be the case necessarily).
+          * (It works only with 4 channel dsts, e.g. rgbx formats, because
+          * otherwise we really have padding, not alpha, included.)
+          */
+         has_alpha = true;
       }
    }
 
@@ -1787,6 +1799,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
       /*
        * XXX If we include that here maybe could actually use it instead of
        * separate alpha for blending?
+       * (Difficult though we actually convert pad channels, not alpha.)
        */
       if (dst_channels == 3 && !has_alpha) {
          fs_src[i][3] = alpha;
@@ -1794,11 +1807,14 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
       /* We split the row_mask and row_alpha as we want 128bit interleave */
       if (fs_type.length == 8) {
-         src_mask[i*2 + 0]  = lp_build_extract_range(gallivm, fs_mask[i], 0, src_channels);
-         src_mask[i*2 + 1]  = lp_build_extract_range(gallivm, fs_mask[i], src_channels, src_channels);
+         src_mask[i*2 + 0]  = lp_build_extract_range(gallivm, fs_mask[i],
+                                                     0, src_channels);
+         src_mask[i*2 + 1]  = lp_build_extract_range(gallivm, fs_mask[i],
+                                                     src_channels, src_channels);
 
          src_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
-         src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+         src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+                                                     src_channels, src_channels);
       } else {
          src_mask[i] = fs_mask[i];
          src_alpha[i] = alpha;
@@ -1829,7 +1845,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
          }
          if (fs_type.length == 8) {
             src1_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
-            src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+            src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+                                                         src_channels, src_channels);
          } else {
             src1_alpha[i] = alpha;
          }
@@ -1911,8 +1928,10 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
     * Blend Colour conversion
     */
    blend_color = lp_jit_context_f_blend_color(gallivm, context_ptr);
-   blend_color = LLVMBuildPointerCast(builder, blend_color, LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
-   blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color, &i32_zero, 1, ""), "");
+   blend_color = LLVMBuildPointerCast(builder, blend_color,
+                    LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
+   blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color,
+                               &i32_zero, 1, ""), "");
 
    /* Convert */
    lp_build_conv(gallivm, fs_type, blend_type, &blend_color, 1, &blend_color, 1);
@@ -2141,7 +2160,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
     * It seems some cleanup could be done here (like skipping conversion/blend
     * when not needed).
     */
-   convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type, row_type, dst, src_count);
+   convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type,
+                         row_type, dst, src_count);
 
    /*
     * FIXME: Really should get logic ops / masks out of generic blend / row
@@ -2167,7 +2187,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
                                   pad_inline ? 4 : dst_channels);
    }
 
-   convert_from_blend_type(gallivm, block_size, out_format_desc, row_type, dst_type, dst, src_count);
+   convert_from_blend_type(gallivm, block_size, out_format_desc,
+                           row_type, dst_type, dst, src_count);
 
    /* Split the blend rows back to memory rows */
    if (dst_count > src_count) {
author	Roland Scheidegger <[email protected]>	2016-12-22 03:49:22 +0100
committer	Roland Scheidegger <[email protected]>	2017-01-06 23:13:34 +0100
commit	04480a04b10524e156cb01d5f7bd8ec02fcc4a67 (patch)
tree	fda9490bf6687184492195178ea3d1c0b0217674 /src/gallium/drivers/llvmpipe
parent	53c2d24a24a631a5be0a9c4df88f23dda1e8685c (diff)