summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c13
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/gen8_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen8_vs_state.c4
7 files changed, 32 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 9c232c46ff3..7fa7c5f06f9 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1855,6 +1855,7 @@ enum brw_message_target {
# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
+# define HSW_VS_UAV_ACCESS_ENABLE (1 << 12)
/* DW4 */
# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
@@ -1880,6 +1881,7 @@ enum brw_message_target {
# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16)
+# define HSW_GS_UAV_ACCESS_ENABLE (1 << 12)
/* DW4 */
# define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT 23
# define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT 17
@@ -2406,6 +2408,7 @@ enum brw_wm_barycentric_interp_mode {
/* DW2 */
# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31)
# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
+# define HSW_WM_UAV_ONLY (1 << 30)
#define _3DSTATE_PS 0x7820 /* GEN7+ */
/* DW1: kernel pointer */
@@ -2429,6 +2432,7 @@ enum brw_wm_barycentric_interp_mode {
# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6)
+# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3)
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 8d6d3fe1d34..497ecec8e45 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -59,7 +59,9 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (brw->is_haswell && prog_data->base.nr_image_params ?
+ HSW_GS_UAV_ACCESS_ENABLE : 0));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 00bc6f24dbe..b7e48585482 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -111,6 +111,7 @@ upload_vs_state(struct brw_context *brw)
uint32_t floating_point_mode = 0;
const int max_threads_shift = brw->is_haswell ?
HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
+ const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base;
if (!brw->is_haswell && !brw->is_baytrail)
gen7_emit_vs_workaround_flush(brw);
@@ -125,19 +126,21 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (brw->is_haswell && prog_data->base.nr_image_params ?
+ HSW_VS_UAV_ACCESS_ENABLE : 0));
- if (brw->vs.prog_data->base.base.total_scratch) {
+ if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(brw->vs.prog_data->base.base.total_scratch) - 11);
+ ffs(prog_data->base.total_scratch) - 11);
} else {
OUT_BATCH(0);
}
- OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg <<
+ OUT_BATCH((prog_data->base.dispatch_grf_start_reg <<
GEN6_VS_DISPATCH_START_GRF_SHIFT) |
- (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+ (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) |
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index aa47421844e..285311ef53c 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -107,6 +107,12 @@ upload_wm_state(struct brw_context *brw)
dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK;
}
+ /* _NEW_BUFFERS | _NEW_COLOR */
+ if (brw->is_haswell &&
+ !(brw_color_buffer_write_enabled(brw) || writes_depth) &&
+ prog_data->base.nr_image_params)
+ dw2 |= HSW_WM_UAV_ONLY;
+
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
OUT_BATCH(dw1);
@@ -209,6 +215,9 @@ gen7_upload_ps_state(struct brw_context *brw,
_mesa_get_min_invocations_per_fragment(ctx, fp, false);
assert(min_inv_per_frag >= 1);
+ if (brw->is_haswell && prog_data->base.nr_image_params)
+ dw4 |= HSW_PS_UAV_ACCESS_ENABLE;
+
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
if (!prog_data->no_8 && min_inv_per_frag == 1) {
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 26a02d3b045..81bd3b21778 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -52,7 +52,9 @@ gen8_upload_gs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (prog_data->base.nr_image_params ?
+ HSW_GS_UAV_ACCESS_ENABLE : 0));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index d5445093e67..f84fbe1864d 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -61,7 +61,8 @@ gen8_upload_ps_extra(struct brw_context *brw,
if (brw->gen >= 9 && prog_data->pulls_bary)
dw1 |= GEN9_PSX_SHADER_PULLS_BARY;
- if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx))
+ if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
+ prog_data->base.nr_image_params)
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
BEGIN_BATCH(2);
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index 28f5adddf14..8b5048bee7e 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -53,7 +53,9 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4) / 4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (prog_data->base.nr_image_params ?
+ HSW_VS_UAV_ACCESS_ENABLE : 0));
if (prog_data->base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,