aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/gen7_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c12
-rw-r--r--src/mesa/drivers/dri/i965/gen8_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c32
-rw-r--r--src/mesa/drivers/dri/i965/gen8_vs_state.c4
6 files changed, 41 insertions, 19 deletions
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 497ecec8e45..8d6d3fe1d34 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -59,9 +59,7 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (brw->is_haswell && prog_data->base.nr_image_params ?
- HSW_GS_UAV_ACCESS_ENABLE : 0));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index b7e48585482..a18dc697651 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -126,9 +126,7 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (brw->is_haswell && prog_data->base.nr_image_params ?
- HSW_VS_UAV_ACCESS_ENABLE : 0));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index fd6dab5be8b..06d5e65786b 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -113,7 +113,14 @@ upload_wm_state(struct brw_context *brw)
else if (prog_data->base.nr_image_params)
dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;
- /* _NEW_BUFFERS | _NEW_COLOR */
+ /* The "UAV access enable" bits are unnecessary on HSW because they only
+ * seem to have an effect on the HW-assisted coherency mechanism which we
+ * don't need, and the rasterization-related UAV_ONLY flag and the
+ * DISPATCH_ENABLE bit can be set independently from it.
+ * C.f. gen8_upload_ps_extra().
+ *
+ * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | _NEW_COLOR
+ */
if (brw->is_haswell &&
!(brw_color_buffer_write_enabled(brw) || writes_depth) &&
prog_data->base.nr_image_params)
@@ -221,9 +228,6 @@ gen7_upload_ps_state(struct brw_context *brw,
_mesa_get_min_invocations_per_fragment(ctx, fp, false);
assert(min_inv_per_frag >= 1);
- if (brw->is_haswell && prog_data->base.nr_image_params)
- dw4 |= HSW_PS_UAV_ACCESS_ENABLE;
-
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
if (!prog_data->no_8 && min_inv_per_frag == 1) {
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 81bd3b21778..26a02d3b045 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -52,9 +52,7 @@ gen8_upload_gs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (prog_data->base.nr_image_params ?
- HSW_GS_UAV_ACCESS_ENABLE : 0));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index ae18f0f162c..a6c9ab32dce 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -25,6 +25,7 @@
#include "program/program.h"
#include "brw_state.h"
#include "brw_defines.h"
+#include "brw_wm.h"
#include "intel_batchbuffer.h"
void
@@ -61,8 +62,33 @@ gen8_upload_ps_extra(struct brw_context *brw,
if (brw->gen >= 9 && prog_data->pulls_bary)
dw1 |= GEN9_PSX_SHADER_PULLS_BARY;
- if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
- prog_data->base.nr_image_params)
+ /* The stricter cross-primitive coherency guarantees that the hardware
+ * gives us with the "Accesses UAV" bit set for at least one shader stage
+ * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
+ * redundant within the current image, atomic counter and SSBO GL APIs,
+ * which all have very loose ordering and coherency requirements and
+ * generally rely on the application to insert explicit barriers when a
+ * shader invocation is expected to see the memory writes performed by the
+ * invocations of some previous primitive. Regardless of the value of "UAV
+ * coherency required", the "Accesses UAV" bits will implicitly cause an in
+ * most cases useless DC flush when the lowermost stage with the bit set
+ * finishes execution.
+ *
+ * It would be nice to disable it, but in some cases we can't because on
+ * Gen8+ it also has an influence on rasterization via the PS UAV-only
+ * signal (which could be set independently from the coherency mechanism in
+ * the 3DSTATE_WM command on Gen7), and because in some cases it will
+ * determine whether the hardware skips execution of the fragment shader or
+ * not via the ThreadDispatchEnable signal. However if we know that
+ * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
+ * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
+ * difference so we may just disable it here.
+ *
+ * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR
+ */
+ if ((_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
+ prog_data->base.nr_image_params) &&
+ !brw_color_buffer_write_enabled(brw))
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
BEGIN_BATCH(2);
@@ -87,7 +113,7 @@ upload_ps_extra(struct brw_context *brw)
const struct brw_tracked_state gen8_ps_extra = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_BUFFERS | _NEW_COLOR,
.brw = BRW_NEW_CONTEXT |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index 8b5048bee7e..28f5adddf14 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -53,9 +53,7 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4) / 4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
- (prog_data->base.nr_image_params ?
- HSW_VS_UAV_ACCESS_ENABLE : 0));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (prog_data->base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,