diff options
-rw-r--r-- | src/intel/vulkan/anv_pipeline.c | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compiler.h | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 40 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.c | 19 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_wm_state.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_wm_state.c | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_ps_state.c | 22 |
7 files changed, 60 insertions, 56 deletions
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 5800e683d39..f55069ee747 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -286,8 +286,9 @@ populate_wm_prog_key(const struct brw_device_info *devinfo, /* We should probably pull this out of the shader, but it's fairly * harmless to compute it and then let dead-code take care of it. */ - key->persample_shading = info->pMultisampleState->sampleShadingEnable; - key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->persample_interp = + (info->pMultisampleState->minSampleShading * + info->pMultisampleState->rasterizationSamples) > 1; key->multisample_fbo = true; } } diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 3d1dc88eebc..3fcd7e87c4e 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -242,12 +242,11 @@ struct brw_wm_prog_key { uint8_t iz_lookup; bool stats_wm:1; bool flat_shade:1; - bool persample_shading:1; unsigned nr_color_regions:5; bool replicate_alpha:1; bool render_to_fbo:1; bool clamp_fragment_color:1; - bool compute_pos_offset:1; + bool persample_interp:1; bool multisample_fbo:1; unsigned line_aa:2; bool high_quality_derivatives:1; @@ -386,6 +385,7 @@ struct brw_wm_prog_data { bool early_fragment_tests; bool no_8; bool dual_src_blend; + bool persample_dispatch; bool uses_pos_offset; bool uses_omask; bool uses_kill; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 89b30c7bfb8..5e08d7120b5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1195,8 +1195,8 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, inst->no_dd_clear = true; inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode, - mod_centroid && !key->persample_shading, - mod_sample || key->persample_shading); + mod_centroid && !key->persample_interp, + mod_sample || key->persample_interp); inst->predicate = BRW_PREDICATE_NORMAL; inst->predicate_inverse = false; if (devinfo->has_pln) @@ -1204,8 +1204,8 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, } else { emit_linterp(*attr, fs_reg(interp), interpolation_mode, - mod_centroid && !key->persample_shading, - mod_sample || key->persample_shading); + mod_centroid && !key->persample_interp, + mod_sample || key->persample_interp); } if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { bld.MUL(*attr, *attr, this->pixel_w); @@ -1262,10 +1262,10 @@ void fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) { assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; + brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data; assert(dst.type == BRW_REGISTER_TYPE_F); - if (key->compute_pos_offset) { + if (wm_prog_data->persample_dispatch) { /* Convert int_sample_pos to floating point */ bld.MOV(dst, int_sample_pos); /* Scale to the range [0, 1] */ @@ -1430,7 +1430,7 @@ fs_reg * fs_visitor::emit_samplemaskin_setup() { assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_key *key = (brw_wm_prog_key *) this->key; + brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data; assert(devinfo->gen >= 6); fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type)); @@ -1438,7 +1438,7 @@ fs_visitor::emit_samplemaskin_setup() fs_reg coverage_mask(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0), BRW_REGISTER_TYPE_D)); - if (key->persample_shading) { + if (wm_prog_data->persample_dispatch) { /* gl_SampleMaskIn[] comes from two sources: the input coverage mask, * and a mask representing which sample is being processed by the * current shader invocation. @@ -5098,7 +5098,6 @@ fs_visitor::setup_fs_payload_gen6() { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; unsigned barycentric_interp_modes = (stage == MESA_SHADER_FRAGMENT) ? @@ -5151,9 +5150,19 @@ fs_visitor::setup_fs_payload_gen6() } } - prog_data->uses_pos_offset = key->compute_pos_offset; /* R31: MSAA position offsets. */ - if (prog_data->uses_pos_offset) { + if (prog_data->persample_dispatch && + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS)) { + /* From the Ivy Bridge PRM documentation for 3DSTATE_PS: + * + * "MSDISPMODE_PERSAMPLE is required in order to select + * POSOFFSET_SAMPLE" + * + * So we can only really get sample positions if we are doing real + * per-sample dispatch. If we need gl_SamplePosition and we don't have + * persample dispatch, we hard-code it to 0.5. + */ + prog_data->uses_pos_offset = true; payload.sample_pos_reg = payload.num_regs; payload.num_regs++; } @@ -5993,12 +6002,19 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, prog_data->computed_stencil = shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); + prog_data->persample_dispatch = + key->multisample_fbo && + (key->persample_interp || + (shader->info.system_values_read & (SYSTEM_BIT_SAMPLE_ID | + SYSTEM_BIT_SAMPLE_POS)) || + shader->info.fs.uses_sample_qualifier); + prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests; prog_data->barycentric_interp_modes = brw_compute_barycentric_interp_modes(compiler->devinfo, key->flat_shade, - key->persample_shading, + key->persample_interp, shader); fs_visitor v(compiler, log_data, mem_ctx, key, diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index ced97089fac..395b0b8b1e8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -250,8 +250,8 @@ brw_wm_debug_recompile(struct brw_context *brw, old_key->stats_wm, key->stats_wm); found |= key_debug(brw, "flat shading", old_key->flat_shade, key->flat_shade); - found |= key_debug(brw, "per-sample shading", - old_key->persample_shading, key->persample_shading); + found |= key_debug(brw, "per-sample interpolation", + old_key->persample_interp, key->persample_interp); found |= key_debug(brw, "number of color buffers", old_key->nr_color_regions, key->nr_color_regions); found |= key_debug(brw, "MRT alpha test or alpha-to-coverage", @@ -528,15 +528,14 @@ brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) /* _NEW_BUFFERS _NEW_MULTISAMPLE */ /* Ignore sample qualifier while computing this flag. */ - key->persample_shading = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; + if (ctx->Multisample.Enabled) { + key->persample_interp = + ctx->Multisample.SampleShading && + (ctx->Multisample.MinSampleShadingValue * + _mesa_geometric_samples(ctx->DrawBuffer) > 1); - key->compute_pos_offset = - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && - fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; - - key->multisample_fbo = ctx->Multisample.Enabled && - _mesa_geometric_samples(ctx->DrawBuffer) > 1; + key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; + } /* BRW_NEW_VUE_MAP_GEOM_OUT */ if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 335920cb20f..dd33926dc82 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -130,12 +130,10 @@ gen6_upload_wm_state(struct brw_context *brw, dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; - assert(min_inv_per_frag >= 1); - if (prog_data->prog_offset_16 || prog_data->no_8) { dw5 |= GEN6_WM_16_DISPATCH_ENABLE; - if (!prog_data->no_8 && min_inv_per_frag == 1) { + if (!prog_data->no_8 && !prog_data->persample_dispatch) { dw5 |= GEN6_WM_8_DISPATCH_ENABLE; dw4 |= (prog_data->base.dispatch_grf_start_reg << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); @@ -198,7 +196,7 @@ gen6_upload_wm_state(struct brw_context *brw, else dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; - if (min_inv_per_frag > 1) + if (prog_data->persample_dispatch) dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; else { dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 2c3930f4046..945fbbdaa2b 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -91,7 +91,7 @@ upload_wm_state(struct brw_context *brw) else dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; - if (_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false) > 1) + if (prog_data->persample_dispatch) dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; else dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; @@ -152,7 +152,6 @@ gen7_upload_ps_state(struct brw_context *brw, bool enable_dual_src_blend, unsigned sample_mask, unsigned fast_clear_op) { - struct gl_context *ctx = &brw->ctx; uint32_t dw2, dw4, dw5, ksp0, ksp2; const int max_threads_shift = brw->is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; @@ -216,18 +215,15 @@ gen7_upload_ps_state(struct brw_context *brw, if (prog_data->num_varying_inputs != 0) dw4 |= GEN7_PS_ATTRIBUTE_ENABLE; - /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16 - * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader - * is successfully compiled. In majority of the cases that bring us - * better performance than 'SIMD8 only' dispatch. - */ - int min_inv_per_frag = - _mesa_get_min_invocations_per_fragment(ctx, fp, false); - assert(min_inv_per_frag >= 1); - if (prog_data->prog_offset_16 || prog_data->no_8) { dw4 |= GEN7_PS_16_DISPATCH_ENABLE; - if (!prog_data->no_8 && min_inv_per_frag == 1) { + + /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16 + * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader + * is successfully compiled. In majority of the cases that bring us + * better performance than 'SIMD8 only' dispatch. + */ + if (!prog_data->no_8 && !prog_data->persample_dispatch) { dw4 |= GEN7_PS_8_DISPATCH_ENABLE; dw5 |= (prog_data->base.dispatch_grf_start_reg << GEN7_PS_DISPATCH_START_GRF_SHIFT_0); diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 9269a796604..d3e1ca38c75 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -52,8 +52,7 @@ gen8_upload_ps_extra(struct brw_context *brw, if (prog_data->uses_src_w) dw1 |= GEN8_PSX_USES_SOURCE_W; - if (multisampled_fbo && - _mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1) + if (prog_data->persample_dispatch) dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE; if (prog_data->uses_sample_mask) { @@ -192,7 +191,6 @@ gen8_upload_ps_state(struct brw_context *brw, const struct brw_wm_prog_data *prog_data, uint32_t fast_clear_op) { - struct gl_context *ctx = &brw->ctx; uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0; /* Initialize the execution mask with VMask. Otherwise, derivatives are @@ -246,19 +244,15 @@ gen8_upload_ps_state(struct brw_context *brw, dw6 |= fast_clear_op; - /* _NEW_MULTISAMPLE - * In case of non 1x per sample shading, only one of SIMD8 and SIMD16 - * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader - * is successfully compiled. In majority of the cases that bring us - * better performance than 'SIMD8 only' dispatch. - */ - int min_invocations_per_fragment = - _mesa_get_min_invocations_per_fragment(ctx, fp, false); - assert(min_invocations_per_fragment >= 1); - if (prog_data->prog_offset_16 || prog_data->no_8) { dw6 |= GEN7_PS_16_DISPATCH_ENABLE; - if (!prog_data->no_8 && min_invocations_per_fragment == 1) { + + /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16 + * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader + * is successfully compiled. In majority of the cases that bring us + * better performance than 'SIMD8 only' dispatch. + */ + if (!prog_data->no_8 && !prog_data->persample_dispatch) { dw6 |= GEN7_PS_8_DISPATCH_ENABLE; dw7 |= (prog_data->base.dispatch_grf_start_reg << GEN7_PS_DISPATCH_START_GRF_SHIFT_0); |