diff options
-rw-r--r-- | src/gallium/drivers/iris/iris_state.c | 5 | ||||
-rw-r--r-- | src/intel/compiler/brw_compiler.h | 4 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 4 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 1 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_visitor.cpp | 94 | ||||
-rw-r--r-- | src/intel/vulkan/anv_pipeline.c | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.c | 18 |
7 files changed, 123 insertions, 14 deletions
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 4ab3662da7b..1ae9c557a27 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -3335,8 +3335,9 @@ iris_populate_fs_key(const struct iris_context *ice, key->clamp_fragment_color = rast->clamp_fragment_color; - key->replicate_alpha = fb->nr_cbufs > 1 && - (zsa->alpha.enabled || blend->alpha_to_coverage); + key->alpha_to_coverage = blend->alpha_to_coverage; + + key->alpha_test_replicate_alpha = fb->nr_cbufs > 1 && zsa->alpha.enabled; /* XXX: only bother if COL0/1 are read */ key->flat_shade = rast->flatshade; diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 375705821c9..fb7ab289723 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -398,7 +398,8 @@ struct brw_wm_prog_key { bool stats_wm:1; bool flat_shade:1; unsigned nr_color_regions:5; - bool replicate_alpha:1; + bool alpha_test_replicate_alpha:1; + bool alpha_to_coverage:1; bool clamp_fragment_color:1; bool persample_interp:1; bool multisample_fbo:1; @@ -707,6 +708,7 @@ struct brw_wm_prog_data { bool dispatch_16; bool dispatch_32; bool dual_src_blend; + bool replicate_alpha; bool persample_dispatch; bool uses_pos_offset; bool uses_omask; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 964cde07215..635f72721d9 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4237,7 +4237,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, /* Set "Source0 Alpha Present to RenderTarget" bit in message * header. */ - if (inst->target > 0 && key->replicate_alpha) + if (inst->target > 0 && prog_data->replicate_alpha) g00_bits |= 1 << 11; /* Set computes stencil to render target */ @@ -4314,7 +4314,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, */ setup_color_payload(bld, key, &sources[length], src0_alpha, 1); length++; - } else if (key->replicate_alpha && inst->target != 0) { + } else if (prog_data->replicate_alpha && inst->target != 0) { /* Handle the case when fragment shader doesn't write to draw buffer * zero. No need to call setup_color_payload() for src0_alpha because * alpha value will be undefined. diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 97956003973..391d46d5471 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -259,6 +259,7 @@ public: fs_inst *emit_single_fb_write(const brw::fs_builder &bld, fs_reg color1, fs_reg color2, fs_reg src0_alpha, unsigned components); + void emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha); void emit_fb_writes(); fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld, const fs_reg &dst, unsigned target); diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 741b226b150..f8e8d36360e 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -400,6 +400,82 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, } void +fs_visitor::emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha) +{ + /* We need to compute alpha to coverage dithering manually in shader + * and replace sample mask store with the bitwise-AND of sample mask and + * alpha to coverage dithering. + * + * The following formula is used to compute final sample mask: + * m = int(16.0 * clamp(src0_alpha, 0.0, 1.0)) + * dither_mask = 0x1111 * ((0xfea80 >> (m & ~3)) & 0xf) | + * 0x0808 * (m & 2) | 0x0100 * (m & 1) + * sample_mask = sample_mask & dither_mask + * + * It gives a number of ones proportional to the alpha for 2, 4, 8 or 16 + * least significant bits of the result: + * 0.0000 0000000000000000 + * 0.0625 0000000100000000 + * 0.1250 0001000000010000 + * 0.1875 0001000100010000 + * 0.2500 1000100010001000 + * 0.3125 1000100110001000 + * 0.3750 1001100010011000 + * 0.4375 1001100110011000 + * 0.5000 1010101010101010 + * 0.5625 1010101110101010 + * 0.6250 1011101010111010 + * 0.6875 1011101110111010 + * 0.7500 1110111011101110 + * 0.8125 1110111111101110 + * 0.8750 1111111011111110 + * 0.9375 1111111111111110 + * 1.0000 1111111111111111 + */ + const fs_builder abld = bld.annotate("compute alpha_to_coverage & " + "sample_mask"); + + /* clamp(src0_alpha, 0.f, 1.f) */ + const fs_reg float_tmp = abld.vgrf(BRW_REGISTER_TYPE_F); + set_saturate(true, abld.MOV(float_tmp, src0_alpha)); + + /* 16.0 * clamp(src0_alpha, 0.0, 1.0) */ + abld.MUL(float_tmp, float_tmp, brw_imm_f(16.0)); + + /* m = int(16.0 * clamp(src0_alpha, 0.0, 1.0)) */ + const fs_reg m = abld.vgrf(BRW_REGISTER_TYPE_UW); + abld.MOV(m, float_tmp); + + /* 0x1111 * ((0xfea80 >> (m & ~3)) & 0xf) */ + const fs_reg int_tmp_1 = abld.vgrf(BRW_REGISTER_TYPE_UW); + const fs_reg shift_const = abld.vgrf(BRW_REGISTER_TYPE_UD); + abld.MOV(shift_const, brw_imm_d(0xfea80)); + abld.AND(int_tmp_1, m, brw_imm_uw(~3)); + abld.SHR(int_tmp_1, shift_const, int_tmp_1); + abld.AND(int_tmp_1, int_tmp_1, brw_imm_uw(0xf)); + abld.MUL(int_tmp_1, int_tmp_1, brw_imm_uw(0x1111)); + + /* 0x0808 * (m & 2) */ + const fs_reg int_tmp_2 = abld.vgrf(BRW_REGISTER_TYPE_UW); + abld.AND(int_tmp_2, m, brw_imm_uw(2)); + abld.MUL(int_tmp_2, int_tmp_2, brw_imm_uw(0x0808)); + + abld.OR(int_tmp_1, int_tmp_1, int_tmp_2); + + /* 0x0100 * (m & 1) */ + const fs_reg int_tmp_3 = abld.vgrf(BRW_REGISTER_TYPE_UW); + abld.AND(int_tmp_3, m, brw_imm_uw(1)); + abld.MUL(int_tmp_3, int_tmp_3, brw_imm_uw(0x0100)); + + abld.OR(int_tmp_1, int_tmp_1, int_tmp_3); + + /* sample_mask = sample_mask & dither_mask */ + const fs_reg mask = abld.vgrf(BRW_REGISTER_TYPE_UD); + abld.AND(mask, sample_mask, int_tmp_1); + sample_mask = mask; +} + +void fs_visitor::emit_fb_writes() { assert(stage == MESA_SHADER_FRAGMENT); @@ -427,6 +503,22 @@ fs_visitor::emit_fb_writes() "in SIMD16+ mode.\n"); } + /* ANV doesn't know about sample mask output during the wm key creation + * so we compute if we need replicate alpha and emit alpha to coverage + * workaround here. + */ + prog_data->replicate_alpha = key->alpha_test_replicate_alpha || + (key->nr_color_regions > 1 && key->alpha_to_coverage && + (sample_mask.file == BAD_FILE || devinfo->gen == 6)); + + /* From the SKL PRM, Volume 7, "Alpha Coverage": + * "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in + * hardware, regardless of the state setting for this feature." + */ + if (devinfo->gen > 6 && key->alpha_to_coverage && + sample_mask.file != BAD_FILE && this->outputs[0].file != BAD_FILE) + emit_alpha_to_coverage_workaround(offset(this->outputs[0], bld, 3)); + for (int target = 0; target < key->nr_color_regions; target++) { /* Skip over outputs that weren't written. */ if (this->outputs[target].file == BAD_FILE) @@ -436,7 +528,7 @@ fs_visitor::emit_fb_writes() ralloc_asprintf(this->mem_ctx, "FB write target %d", target)); fs_reg src0_alpha; - if (devinfo->gen >= 6 && key->replicate_alpha && target != 0) + if (devinfo->gen >= 6 && prog_data->replicate_alpha && target != 0) src0_alpha = offset(outputs[0], bld, 3); inst = emit_single_fb_write(abld, this->outputs[target], diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 90942a4524a..1c51b83b5ba 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -375,8 +375,15 @@ populate_wm_prog_key(const struct gen_device_info *devinfo, key->nr_color_regions = util_bitcount(key->color_outputs_valid); - key->replicate_alpha = key->nr_color_regions > 1 && - ms_info && ms_info->alphaToCoverageEnable; + /* To reduce possible shader recompilations we would need to know if + * there is a SampleMask output variable to compute if we should emit + * code to workaround the issue that hardware disables alpha to coverage + * when there is SampleMask output. + */ + key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable; + + /* Vulkan doesn't support fixed-function alpha test */ + key->alpha_test_replicate_alpha = false; if (ms_info) { /* We should probably pull this out of the shader, but it's fairly diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 47905ca5549..d2d7974e841 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -93,8 +93,11 @@ brw_wm_debug_recompile(struct brw_context *brw, struct gl_program *prog, old_key->flat_shade, key->flat_shade); found |= key_debug(brw, "number of color buffers", old_key->nr_color_regions, key->nr_color_regions); - found |= key_debug(brw, "MRT alpha test or alpha-to-coverage", - old_key->replicate_alpha, key->replicate_alpha); + found |= key_debug(brw, "MRT alpha test", + old_key->alpha_test_replicate_alpha, + key->alpha_test_replicate_alpha); + found |= key_debug(brw, "alpha to coverage", + old_key->alpha_to_coverage, key->alpha_to_coverage); found |= key_debug(brw, "fragment color clamping", old_key->clamp_fragment_color, key->clamp_fragment_color); found |= key_debug(brw, "per-sample interpolation", @@ -569,10 +572,13 @@ brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) key->force_dual_color_blend = brw->dual_color_blend_by_location && (ctx->Color.BlendEnabled & 1) && ctx->Color.Blend[0]._UsesDualSrc; - /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ - key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && - (_mesa_is_alpha_test_enabled(ctx) || - _mesa_is_alpha_to_coverage_enabled(ctx)); + /* _NEW_MULTISAMPLE, _NEW_BUFFERS */ + key->alpha_to_coverage = _mesa_is_alpha_to_coverage_enabled(ctx); + + /* _NEW_COLOR, _NEW_BUFFERS */ + key->alpha_test_replicate_alpha = + ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + _mesa_is_alpha_test_enabled(ctx); /* _NEW_BUFFERS _NEW_MULTISAMPLE */ /* Ignore sample qualifier while computing this flag. */ |