i965,iris,anv: Make alpha to coverage work with sample mask

From "Alpha Coverage" section of SKL PRM Volume 7: "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in hardware, regardless of the state setting for this feature." From OpenGL spec 4.6, "15.2 Shader Execution": "The built-in integer array gl_SampleMask can be used to change the sample coverage for a fragment from within the shader." From OpenGL spec 4.6, "17.3.1 Alpha To Coverage": "If SAMPLE_ALPHA_TO_COVERAGE is enabled, a temporary coverage value is generated where each bit is determined by the alpha value at the corresponding sample location. The temporary coverage value is then ANDed with the fragment coverage value to generate a new fragment coverage value." Similar wording could be found in Vulkan spec 1.1.100 "25.6. Multisample Coverage" Thus we need to compute alpha to coverage dithering manually in shader and replace sample mask store with the bitwise-AND of sample mask and alpha to coverage dithering. The following formula is used to compute final sample mask: m = int(16.0 * clamp(src0_alpha, 0.0, 1.0)) dither_mask = 0x1111 * ((0xfea80 >> (m & ~3)) & 0xf) | 0x0808 * (m & 2) | 0x0100 * (m & 1) sample_mask = sample_mask & dither_mask Credits to Francisco Jerez <[email protected]> for creating it. It gives a number of ones proportional to the alpha for 2, 4, 8 or 16 least significant bits of the result. GEN6 hardware does not have issue with simultaneous usage of sample mask and alpha to coverage however due to the wrong sending order of oMask and src0_alpha it is still affected by it. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109743 Signed-off-by: Danylo Piliaiev <[email protected]> Reviewed-by: Francisco Jerez <[email protected]>
author: Danylo Piliaiev <[email protected]> 2019-02-20 19:39:18 +0200
committer: Francisco Jerez <[email protected]> 2019-03-25 13:54:55 -0700
commit: c8abe03f3b65505d2c1c165d88efb3bb62e06db1 (patch)
tree: 46249f3280a70e6d703589fd54561df6d32b69d2 /src/intel
parent: 3bd54576415130465f096d73b7940dfbe02bb71b (diff)
5 files changed, 108 insertions, 6 deletions
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 375705821c9..fb7ab289723 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -398,7 +398,8 @@ struct brw_wm_prog_key {
    bool stats_wm:1;
    bool flat_shade:1;
    unsigned nr_color_regions:5;
-   bool replicate_alpha:1;
+   bool alpha_test_replicate_alpha:1;
+   bool alpha_to_coverage:1;
    bool clamp_fragment_color:1;
    bool persample_interp:1;
    bool multisample_fbo:1;
@@ -707,6 +708,7 @@ struct brw_wm_prog_data {
    bool dispatch_16;
    bool dispatch_32;
    bool dual_src_blend;
+   bool replicate_alpha;
    bool persample_dispatch;
    bool uses_pos_offset;
    bool uses_omask;
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 964cde07215..635f72721d9 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4237,7 +4237,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
       /* Set "Source0 Alpha Present to RenderTarget" bit in message
        * header.
        */
-      if (inst->target > 0 && key->replicate_alpha)
+      if (inst->target > 0 && prog_data->replicate_alpha)
          g00_bits |= 1 << 11;
 
       /* Set computes stencil to render target */
@@ -4314,7 +4314,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
        */
       setup_color_payload(bld, key, &sources[length], src0_alpha, 1);
       length++;
-   } else if (key->replicate_alpha && inst->target != 0) {
+   } else if (prog_data->replicate_alpha && inst->target != 0) {
       /* Handle the case when fragment shader doesn't write to draw buffer
        * zero. No need to call setup_color_payload() for src0_alpha because
        * alpha value will be undefined.
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 97956003973..391d46d5471 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -259,6 +259,7 @@ public:
    fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
                                  fs_reg color1, fs_reg color2,
                                  fs_reg src0_alpha, unsigned components);
+   void emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha);
    void emit_fb_writes();
    fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,
                                       const fs_reg &dst, unsigned target);
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 741b226b150..f8e8d36360e 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -400,6 +400,82 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
 }
 
 void
+fs_visitor::emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha)
+{
+   /* We need to compute alpha to coverage dithering manually in shader
+    * and replace sample mask store with the bitwise-AND of sample mask and
+    * alpha to coverage dithering.
+    *
+    * The following formula is used to compute final sample mask:
+    *  m = int(16.0 * clamp(src0_alpha, 0.0, 1.0))
+    *  dither_mask = 0x1111 * ((0xfea80 >> (m & ~3)) & 0xf) |
+    *     0x0808 * (m & 2) | 0x0100 * (m & 1)
+    *  sample_mask = sample_mask & dither_mask
+    *
+    * It gives a number of ones proportional to the alpha for 2, 4, 8 or 16
+    * least significant bits of the result:
+    *  0.0000 0000000000000000
+    *  0.0625 0000000100000000
+    *  0.1250 0001000000010000
+    *  0.1875 0001000100010000
+    *  0.2500 1000100010001000
+    *  0.3125 1000100110001000
+    *  0.3750 1001100010011000
+    *  0.4375 1001100110011000
+    *  0.5000 1010101010101010
+    *  0.5625 1010101110101010
+    *  0.6250 1011101010111010
+    *  0.6875 1011101110111010
+    *  0.7500 1110111011101110
+    *  0.8125 1110111111101110
+    *  0.8750 1111111011111110
+    *  0.9375 1111111111111110
+    *  1.0000 1111111111111111
+    */
+   const fs_builder abld = bld.annotate("compute alpha_to_coverage & "
+      "sample_mask");
+
+   /* clamp(src0_alpha, 0.f, 1.f) */
+   const fs_reg float_tmp = abld.vgrf(BRW_REGISTER_TYPE_F);
+   set_saturate(true, abld.MOV(float_tmp, src0_alpha));
+
+   /* 16.0 * clamp(src0_alpha, 0.0, 1.0) */
+   abld.MUL(float_tmp, float_tmp, brw_imm_f(16.0));
+
+   /* m = int(16.0 * clamp(src0_alpha, 0.0, 1.0)) */
+   const fs_reg m = abld.vgrf(BRW_REGISTER_TYPE_UW);
+   abld.MOV(m, float_tmp);
+
+   /* 0x1111 * ((0xfea80 >> (m & ~3)) & 0xf) */
+   const fs_reg int_tmp_1 = abld.vgrf(BRW_REGISTER_TYPE_UW);
+   const fs_reg shift_const = abld.vgrf(BRW_REGISTER_TYPE_UD);
+   abld.MOV(shift_const, brw_imm_d(0xfea80));
+   abld.AND(int_tmp_1, m, brw_imm_uw(~3));
+   abld.SHR(int_tmp_1, shift_const, int_tmp_1);
+   abld.AND(int_tmp_1, int_tmp_1, brw_imm_uw(0xf));
+   abld.MUL(int_tmp_1, int_tmp_1, brw_imm_uw(0x1111));
+
+   /* 0x0808 * (m & 2) */
+   const fs_reg int_tmp_2 = abld.vgrf(BRW_REGISTER_TYPE_UW);
+   abld.AND(int_tmp_2, m, brw_imm_uw(2));
+   abld.MUL(int_tmp_2, int_tmp_2, brw_imm_uw(0x0808));
+
+   abld.OR(int_tmp_1, int_tmp_1, int_tmp_2);
+
+   /* 0x0100 * (m & 1) */
+   const fs_reg int_tmp_3 = abld.vgrf(BRW_REGISTER_TYPE_UW);
+   abld.AND(int_tmp_3, m, brw_imm_uw(1));
+   abld.MUL(int_tmp_3, int_tmp_3, brw_imm_uw(0x0100));
+
+   abld.OR(int_tmp_1, int_tmp_1, int_tmp_3);
+
+   /* sample_mask = sample_mask & dither_mask */
+   const fs_reg mask = abld.vgrf(BRW_REGISTER_TYPE_UD);
+   abld.AND(mask, sample_mask, int_tmp_1);
+   sample_mask = mask;
+}
+
+void
 fs_visitor::emit_fb_writes()
 {
    assert(stage == MESA_SHADER_FRAGMENT);
@@ -427,6 +503,22 @@ fs_visitor::emit_fb_writes()
                            "in SIMD16+ mode.\n");
    }
 
+   /* ANV doesn't know about sample mask output during the wm key creation
+    * so we compute if we need replicate alpha and emit alpha to coverage
+    * workaround here.
+    */
+   prog_data->replicate_alpha = key->alpha_test_replicate_alpha ||
+      (key->nr_color_regions > 1 && key->alpha_to_coverage &&
+       (sample_mask.file == BAD_FILE || devinfo->gen == 6));
+
+   /* From the SKL PRM, Volume 7, "Alpha Coverage":
+    *  "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
+    *   hardware, regardless of the state setting for this feature."
+    */
+   if (devinfo->gen > 6 && key->alpha_to_coverage &&
+       sample_mask.file != BAD_FILE && this->outputs[0].file != BAD_FILE)
+      emit_alpha_to_coverage_workaround(offset(this->outputs[0], bld, 3));
+
    for (int target = 0; target < key->nr_color_regions; target++) {
       /* Skip over outputs that weren't written. */
       if (this->outputs[target].file == BAD_FILE)
@@ -436,7 +528,7 @@ fs_visitor::emit_fb_writes()
          ralloc_asprintf(this->mem_ctx, "FB write target %d", target));
 
       fs_reg src0_alpha;
-      if (devinfo->gen >= 6 && key->replicate_alpha && target != 0)
+      if (devinfo->gen >= 6 && prog_data->replicate_alpha && target != 0)
          src0_alpha = offset(outputs[0], bld, 3);
 
       inst = emit_single_fb_write(abld, this->outputs[target],
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 90942a4524a..1c51b83b5ba 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -375,8 +375,15 @@ populate_wm_prog_key(const struct gen_device_info *devinfo,
 
    key->nr_color_regions = util_bitcount(key->color_outputs_valid);
 
-   key->replicate_alpha = key->nr_color_regions > 1 &&
-                          ms_info && ms_info->alphaToCoverageEnable;
+   /* To reduce possible shader recompilations we would need to know if
+    * there is a SampleMask output variable to compute if we should emit
+    * code to workaround the issue that hardware disables alpha to coverage
+    * when there is SampleMask output.
+    */
+   key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
+
+   /* Vulkan doesn't support fixed-function alpha test */
+   key->alpha_test_replicate_alpha = false;
 
    if (ms_info) {
       /* We should probably pull this out of the shader, but it's fairly
author	Danylo Piliaiev <[email protected]>	2019-02-20 19:39:18 +0200
committer	Francisco Jerez <[email protected]>	2019-03-25 13:54:55 -0700
commit	c8abe03f3b65505d2c1c165d88efb3bb62e06db1 (patch)
tree	46249f3280a70e6d703589fd54561df6d32b69d2 /src/intel
parent	3bd54576415130465f096d73b7940dfbe02bb71b (diff)