summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/vulkan/anv_pipeline.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp40
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c19
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c20
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c22
7 files changed, 60 insertions, 56 deletions
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 5800e683d39..f55069ee747 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -286,8 +286,9 @@ populate_wm_prog_key(const struct brw_device_info *devinfo,
/* We should probably pull this out of the shader, but it's fairly
* harmless to compute it and then let dead-code take care of it.
*/
- key->persample_shading = info->pMultisampleState->sampleShadingEnable;
- key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable;
+ key->persample_interp =
+ (info->pMultisampleState->minSampleShading *
+ info->pMultisampleState->rasterizationSamples) > 1;
key->multisample_fbo = true;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 3d1dc88eebc..3fcd7e87c4e 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -242,12 +242,11 @@ struct brw_wm_prog_key {
uint8_t iz_lookup;
bool stats_wm:1;
bool flat_shade:1;
- bool persample_shading:1;
unsigned nr_color_regions:5;
bool replicate_alpha:1;
bool render_to_fbo:1;
bool clamp_fragment_color:1;
- bool compute_pos_offset:1;
+ bool persample_interp:1;
bool multisample_fbo:1;
unsigned line_aa:2;
bool high_quality_derivatives:1;
@@ -386,6 +385,7 @@ struct brw_wm_prog_data {
bool early_fragment_tests;
bool no_8;
bool dual_src_blend;
+ bool persample_dispatch;
bool uses_pos_offset;
bool uses_omask;
bool uses_kill;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 89b30c7bfb8..5e08d7120b5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1195,8 +1195,8 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
inst->no_dd_clear = true;
inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode,
- mod_centroid && !key->persample_shading,
- mod_sample || key->persample_shading);
+ mod_centroid && !key->persample_interp,
+ mod_sample || key->persample_interp);
inst->predicate = BRW_PREDICATE_NORMAL;
inst->predicate_inverse = false;
if (devinfo->has_pln)
@@ -1204,8 +1204,8 @@ fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
} else {
emit_linterp(*attr, fs_reg(interp), interpolation_mode,
- mod_centroid && !key->persample_shading,
- mod_sample || key->persample_shading);
+ mod_centroid && !key->persample_interp,
+ mod_sample || key->persample_interp);
}
if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
bld.MUL(*attr, *attr, this->pixel_w);
@@ -1262,10 +1262,10 @@ void
fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
{
assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
+ brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
assert(dst.type == BRW_REGISTER_TYPE_F);
- if (key->compute_pos_offset) {
+ if (wm_prog_data->persample_dispatch) {
/* Convert int_sample_pos to floating point */
bld.MOV(dst, int_sample_pos);
/* Scale to the range [0, 1] */
@@ -1430,7 +1430,7 @@ fs_reg *
fs_visitor::emit_samplemaskin_setup()
{
assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_key *key = (brw_wm_prog_key *) this->key;
+ brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
assert(devinfo->gen >= 6);
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
@@ -1438,7 +1438,7 @@ fs_visitor::emit_samplemaskin_setup()
fs_reg coverage_mask(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
BRW_REGISTER_TYPE_D));
- if (key->persample_shading) {
+ if (wm_prog_data->persample_dispatch) {
/* gl_SampleMaskIn[] comes from two sources: the input coverage mask,
* and a mask representing which sample is being processed by the
* current shader invocation.
@@ -5098,7 +5098,6 @@ fs_visitor::setup_fs_payload_gen6()
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
unsigned barycentric_interp_modes =
(stage == MESA_SHADER_FRAGMENT) ?
@@ -5151,9 +5150,19 @@ fs_visitor::setup_fs_payload_gen6()
}
}
- prog_data->uses_pos_offset = key->compute_pos_offset;
/* R31: MSAA position offsets. */
- if (prog_data->uses_pos_offset) {
+ if (prog_data->persample_dispatch &&
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS)) {
+ /* From the Ivy Bridge PRM documentation for 3DSTATE_PS:
+ *
+ * "MSDISPMODE_PERSAMPLE is required in order to select
+ * POSOFFSET_SAMPLE"
+ *
+ * So we can only really get sample positions if we are doing real
+ * per-sample dispatch. If we need gl_SamplePosition and we don't have
+ * persample dispatch, we hard-code it to 0.5.
+ */
+ prog_data->uses_pos_offset = true;
payload.sample_pos_reg = payload.num_regs;
payload.num_regs++;
}
@@ -5993,12 +6002,19 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
prog_data->computed_stencil =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
+ prog_data->persample_dispatch =
+ key->multisample_fbo &&
+ (key->persample_interp ||
+ (shader->info.system_values_read & (SYSTEM_BIT_SAMPLE_ID |
+ SYSTEM_BIT_SAMPLE_POS)) ||
+ shader->info.fs.uses_sample_qualifier);
+
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
prog_data->barycentric_interp_modes =
brw_compute_barycentric_interp_modes(compiler->devinfo,
key->flat_shade,
- key->persample_shading,
+ key->persample_interp,
shader);
fs_visitor v(compiler, log_data, mem_ctx, key,
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index ced97089fac..395b0b8b1e8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -250,8 +250,8 @@ brw_wm_debug_recompile(struct brw_context *brw,
old_key->stats_wm, key->stats_wm);
found |= key_debug(brw, "flat shading",
old_key->flat_shade, key->flat_shade);
- found |= key_debug(brw, "per-sample shading",
- old_key->persample_shading, key->persample_shading);
+ found |= key_debug(brw, "per-sample interpolation",
+ old_key->persample_interp, key->persample_interp);
found |= key_debug(brw, "number of color buffers",
old_key->nr_color_regions, key->nr_color_regions);
found |= key_debug(brw, "MRT alpha test or alpha-to-coverage",
@@ -528,15 +528,14 @@ brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
/* _NEW_BUFFERS _NEW_MULTISAMPLE */
/* Ignore sample qualifier while computing this flag. */
- key->persample_shading =
- _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
+ if (ctx->Multisample.Enabled) {
+ key->persample_interp =
+ ctx->Multisample.SampleShading &&
+ (ctx->Multisample.MinSampleShadingValue *
+ _mesa_geometric_samples(ctx->DrawBuffer) > 1);
- key->compute_pos_offset =
- _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
- fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
-
- key->multisample_fbo = ctx->Multisample.Enabled &&
- _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+ key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+ }
/* BRW_NEW_VUE_MAP_GEOM_OUT */
if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 335920cb20f..dd33926dc82 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -130,12 +130,10 @@ gen6_upload_wm_state(struct brw_context *brw,
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
- assert(min_inv_per_frag >= 1);
-
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
- if (!prog_data->no_8 && min_inv_per_frag == 1) {
+ if (!prog_data->no_8 && !prog_data->persample_dispatch) {
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
dw4 |= (prog_data->base.dispatch_grf_start_reg <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
@@ -198,7 +196,7 @@ gen6_upload_wm_state(struct brw_context *brw,
else
dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
- if (min_inv_per_frag > 1)
+ if (prog_data->persample_dispatch)
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
else {
dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 2c3930f4046..945fbbdaa2b 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -91,7 +91,7 @@ upload_wm_state(struct brw_context *brw)
else
dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
- if (_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false) > 1)
+ if (prog_data->persample_dispatch)
dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
else
dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
@@ -152,7 +152,6 @@ gen7_upload_ps_state(struct brw_context *brw,
bool enable_dual_src_blend, unsigned sample_mask,
unsigned fast_clear_op)
{
- struct gl_context *ctx = &brw->ctx;
uint32_t dw2, dw4, dw5, ksp0, ksp2;
const int max_threads_shift = brw->is_haswell ?
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
@@ -216,18 +215,15 @@ gen7_upload_ps_state(struct brw_context *brw,
if (prog_data->num_varying_inputs != 0)
dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
- /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
- * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
- * is successfully compiled. In majority of the cases that bring us
- * better performance than 'SIMD8 only' dispatch.
- */
- int min_inv_per_frag =
- _mesa_get_min_invocations_per_fragment(ctx, fp, false);
- assert(min_inv_per_frag >= 1);
-
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
- if (!prog_data->no_8 && min_inv_per_frag == 1) {
+
+ /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
+ * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
+ * is successfully compiled. In majority of the cases that bring us
+ * better performance than 'SIMD8 only' dispatch.
+ */
+ if (!prog_data->no_8 && !prog_data->persample_dispatch) {
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
dw5 |= (prog_data->base.dispatch_grf_start_reg <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 9269a796604..d3e1ca38c75 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -52,8 +52,7 @@ gen8_upload_ps_extra(struct brw_context *brw,
if (prog_data->uses_src_w)
dw1 |= GEN8_PSX_USES_SOURCE_W;
- if (multisampled_fbo &&
- _mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1)
+ if (prog_data->persample_dispatch)
dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
if (prog_data->uses_sample_mask) {
@@ -192,7 +191,6 @@ gen8_upload_ps_state(struct brw_context *brw,
const struct brw_wm_prog_data *prog_data,
uint32_t fast_clear_op)
{
- struct gl_context *ctx = &brw->ctx;
uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
/* Initialize the execution mask with VMask. Otherwise, derivatives are
@@ -246,19 +244,15 @@ gen8_upload_ps_state(struct brw_context *brw,
dw6 |= fast_clear_op;
- /* _NEW_MULTISAMPLE
- * In case of non 1x per sample shading, only one of SIMD8 and SIMD16
- * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
- * is successfully compiled. In majority of the cases that bring us
- * better performance than 'SIMD8 only' dispatch.
- */
- int min_invocations_per_fragment =
- _mesa_get_min_invocations_per_fragment(ctx, fp, false);
- assert(min_invocations_per_fragment >= 1);
-
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
- if (!prog_data->no_8 && min_invocations_per_fragment == 1) {
+
+ /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
+ * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
+ * is successfully compiled. In majority of the cases that bring us
+ * better performance than 'SIMD8 only' dispatch.
+ */
+ if (!prog_data->no_8 && !prog_data->persample_dispatch) {
dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
dw7 |= (prog_data->base.dispatch_grf_start_reg <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);