aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorChia-I Wu <[email protected]>2014-09-22 23:59:53 +0800
committerChia-I Wu <[email protected]>2014-09-23 10:08:05 +0800
commit6c9d67118a21e3713e006da4a03a584fb8268d92 (patch)
treeab8bd8e73672b02ae9e3f5d83b9c20eeba0a4076 /src/gallium/drivers
parent34e807817fb06b800f588b7e966e579110afeb2d (diff)
ilo: rework pipeline workarounds
Add current_pipe_control_dw1 and deferred_pipe_control_dw1 to track what have been done since lsat 3DPRIMITIVE and what need to be done before next 3DPRIMITIVE. Based on them, we can emit WAs more smartly. Signed-off-by: Chia-I Wu <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/ilo/ilo_3d_pipeline.h18
-rw-r--r--src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c303
-rw-r--r--src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c246
3 files changed, 339 insertions, 228 deletions
diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline.h b/src/gallium/drivers/ilo/ilo_3d_pipeline.h
index 5556edb4c69..e85bb8aee28 100644
--- a/src/gallium/drivers/ilo/ilo_3d_pipeline.h
+++ b/src/gallium/drivers/ilo/ilo_3d_pipeline.h
@@ -87,7 +87,19 @@ struct ilo_3d_pipeline {
* HW states.
*/
struct ilo_3d_pipeline_state {
- bool has_gen6_wa_pipe_control;
+ /*
+ * When a WA is needed before some command, we always emit the WA right
+ * before the command. Knowing what have already been done since last
+ * 3DPRIMITIVE allows us to skip some WAs.
+ */
+ uint32_t current_pipe_control_dw1;
+
+ /*
+ * When a WA is needed after some command, we may have the WA follow the
+ * command immediately or defer it. If this is non-zero, a PIPE_CONTROL
+ * will be emitted before 3DPRIMITIVE.
+ */
+ uint32_t deferred_pipe_control_dw1;
bool primitive_restart;
int reduced_prim;
@@ -144,7 +156,9 @@ static inline void
ilo_3d_pipeline_invalidate(struct ilo_3d_pipeline *p, uint32_t flags)
{
p->invalidate_flags |= flags;
- p->state.has_gen6_wa_pipe_control = false;
+
+ /* Kernel flushes everything. Shouldn't we set all bits here? */
+ p->state.current_pipe_control_dw1 = 0;
}
/**
diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
index 7ae6cc124e8..e5cd937ee94 100644
--- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
@@ -40,42 +40,37 @@
#include "ilo_3d_pipeline_gen6.h"
/**
- * This should be called before any depth stall flush (including those
- * produced by non-pipelined state commands) or cache flush on GEN6.
- *
- * \see intel_emit_post_sync_nonzero_flush()
+ * A wrapper for gen6_PIPE_CONTROL().
*/
-static void
-gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
- bool caller_post_sync)
+static inline void
+gen6_pipe_control(struct ilo_3d_pipeline *p, uint32_t dw1)
{
- assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
+ struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
+ p->workaround_bo : NULL;
- /* emit once */
- if (p->state.has_gen6_wa_pipe_control)
- return;
+ ILO_DEV_ASSERT(p->dev, 6, 6);
+
+ gen6_PIPE_CONTROL(p->builder, dw1, bo, 0, false);
+
+ p->state.current_pipe_control_dw1 |= dw1;
- p->state.has_gen6_wa_pipe_control = true;
+ assert(!p->state.deferred_pipe_control_dw1);
+}
+/**
+ * This should be called before PIPE_CONTROL.
+ */
+static void
+gen6_wa_pre_pipe_control(struct ilo_3d_pipeline *p, uint32_t dw1)
+{
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 60:
*
* "Pipe-control with CS-stall bit set must be sent BEFORE the
* pipe-control with a post-sync op and no write-cache flushes."
*
- * The workaround below necessitates this workaround.
- */
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_CS_STALL |
- GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
- NULL, 0, false);
-
- /* the caller will emit the post-sync op */
- if (caller_post_sync)
- return;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 60:
+ * This WA may also be triggered indirectly by the other two WAs on the
+ * same page:
*
* "Before any depth stall flush (including those produced by
* non-pipelined state commands), software needs to first send a
@@ -84,66 +79,78 @@ gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
* "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a
* PIPE_CONTROL with any non-zero post-sync-op is required."
*/
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_WRITE_IMM,
- p->workaround_bo, 0, false);
-}
+ const bool direct_wa_cond = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) &&
+ !(dw1 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH);
+ const bool indirect_wa_cond = (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) |
+ (dw1 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH);
-static void
-gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p)
-{
- assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
+ ILO_DEV_ASSERT(p->dev, 6, 6);
+
+ if (!direct_wa_cond && !indirect_wa_cond)
+ return;
- gen6_wa_pipe_control_post_sync(p, false);
+ if (!(p->state.current_pipe_control_dw1 & GEN6_PIPE_CONTROL_CS_STALL)) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 73:
+ *
+ * "1 of the following must also be set (when CS stall is set):
+ *
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Notify Enable ([8] of DW1)"
+ *
+ * Because of the WAs above, we have to pick Stall at Pixel Scoreboard.
+ */
+ const uint32_t direct_wa = GEN6_PIPE_CONTROL_CS_STALL |
+ GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 305:
- *
- * "Driver must guarentee that all the caches in the depth pipe are
- * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
- * requires driver to send a PIPE_CONTROL with a CS stall along with a
- * Depth Flush prior to this command."
- */
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- GEN6_PIPE_CONTROL_CS_STALL,
- 0, 0, false);
+ gen6_pipe_control(p, direct_wa);
+ }
+
+ if (indirect_wa_cond &&
+ !(p->state.current_pipe_control_dw1 & GEN6_PIPE_CONTROL_WRITE__MASK)) {
+ const uint32_t indirect_wa = GEN6_PIPE_CONTROL_WRITE_IMM;
+
+ gen6_pipe_control(p, indirect_wa);
+ }
}
+/**
+ * This should be called before any non-pipelined state command.
+ */
static void
-gen6_wa_pipe_control_wm_depth_flush(struct ilo_3d_pipeline *p)
+gen6_wa_pre_non_pipelined(struct ilo_3d_pipeline *p)
{
- assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
+ ILO_DEV_ASSERT(p->dev, 6, 6);
- gen6_wa_pipe_control_post_sync(p, false);
+ /* non-pipelined state commands produce depth stall */
+ gen6_wa_pre_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
+}
+static void
+gen6_wa_post_3dstate_constant_vs(struct ilo_3d_pipeline *p)
+{
/*
- * According to intel_emit_depth_stall_flushes() of classic i965, we need
- * to emit a sequence of PIPE_CONTROLs prior to emitting depth related
- * commands.
+ * According to upload_vs_state() of the classic driver, we need to emit a
+ * PIPE_CONTROL after 3DSTATE_CONSTANT_VS, otherwise the command is kept
+ * being buffered by VS FF, to the point that the FF dies.
*/
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_STALL,
- NULL, 0, false);
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
+ GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+ GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
- NULL, 0, false);
+ gen6_wa_pre_pipe_control(p, dw1);
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_STALL,
- NULL, 0, false);
+ if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+ gen6_pipe_control(p, dw1);
}
static void
-gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
+gen6_wa_pre_3dstate_wm_max_threads(struct ilo_3d_pipeline *p)
{
- assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
-
- /* the post-sync workaround should cover this already */
- if (p->state.has_gen6_wa_pipe_control)
- return;
-
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 274:
*
@@ -151,29 +158,64 @@ gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
* field set (DW1 Bit 1), must be issued prior to any change to the
* value in this field (Maximum Number of Threads in 3DSTATE_WM)"
*/
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
- NULL, 0, false);
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
+
+ ILO_DEV_ASSERT(p->dev, 6, 6);
+
+ gen6_wa_pre_pipe_control(p, dw1);
+ if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+ gen6_pipe_control(p, dw1);
}
static void
-gen6_wa_pipe_control_vs_const_flush(struct ilo_3d_pipeline *p)
+gen6_wa_pre_3dstate_multisample(struct ilo_3d_pipeline *p)
{
- assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 305:
+ *
+ * "Driver must guarentee that all the caches in the depth pipe are
+ * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
+ * requires driver to send a PIPE_CONTROL with a CS stall along with a
+ * Depth Flush prior to this command."
+ */
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ GEN6_PIPE_CONTROL_CS_STALL;
+
+ ILO_DEV_ASSERT(p->dev, 6, 6);
+
+ gen6_wa_pre_pipe_control(p, dw1);
+
+ if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+ gen6_pipe_control(p, dw1);
+}
- gen6_wa_pipe_control_post_sync(p, false);
+static void
+gen6_wa_pre_depth(struct ilo_3d_pipeline *p)
+{
+ ILO_DEV_ASSERT(p->dev, 6, 6);
/*
- * According to upload_vs_state() of classic i965, we need to emit
- * PIPE_CONTROL after 3DSTATE_CONSTANT_VS so that the command is kept being
- * buffered by VS FF, to the point that the FF dies.
+ * From the Ivy Bridge PRM, volume 2 part 1, page 315:
+ *
+ * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
+ * any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
+ * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
+ * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
+ * set), followed by a pipelined depth cache flush (PIPE_CONTROL with
+ * Depth Flush Bit set, followed by another pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
+ * guarantee that the pipeline from WM onwards is already flushed
+ * (e.g., via a preceding MI_FLUSH)."
+ *
+ * According to the classic driver, it also applies for GEN6.
*/
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_STALL |
- GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
- GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE,
- NULL, 0, false);
+ gen6_wa_pre_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL |
+ GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+
+ gen6_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
+ gen6_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+ gen6_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
}
#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
@@ -186,7 +228,7 @@ gen6_pipeline_common_select(struct ilo_3d_pipeline *p,
/* PIPELINE_SELECT */
if (session->hw_ctx_changed) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
gen6_PIPELINE_SELECT(p->builder, 0x0);
}
@@ -200,7 +242,7 @@ gen6_pipeline_common_sip(struct ilo_3d_pipeline *p,
/* STATE_SIP */
if (session->hw_ctx_changed) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
gen6_STATE_SIP(p->builder, 0);
}
@@ -215,7 +257,7 @@ gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p,
if (session->state_bo_changed || session->kernel_bo_changed ||
session->batch_bo_changed) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
gen6_state_base_address(p->builder, session->hw_ctx_changed);
@@ -468,7 +510,9 @@ gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p,
{
/* 3DPRIMITIVE */
gen6_3DPRIMITIVE(p->builder, vec->draw, &vec->ib);
- p->state.has_gen6_wa_pipe_control = false;
+
+ p->state.current_pipe_control_dw1 = 0;
+ assert(!p->state.deferred_pipe_control_dw1);
}
void
@@ -485,7 +529,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
* cannot find
*/
if (emit_3dstate_vs && ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
/* 3DSTATE_CONSTANT_VS */
if (emit_3dstate_constant_vs) {
@@ -503,7 +547,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
}
if (emit_3dstate_constant_vs && ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_vs_const_flush(p);
+ gen6_wa_post_3dstate_constant_vs(p);
}
static void
@@ -578,7 +622,7 @@ gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p,
/* 3DSTATE_GS_SVB_INDEX */
if (emit) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_GS_SVB_INDEX(p->builder,
0, 0, p->state.so_max_vertices,
@@ -651,7 +695,7 @@ gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p,
/* 3DSTATE_DRAWING_RECTANGLE */
if (DIRTY(FB)) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_DRAWING_RECTANGLE(p->builder, 0, 0,
vec->fb.state.width, vec->fb.state.height);
@@ -680,7 +724,7 @@ gen6_pipeline_wm(struct ilo_3d_pipeline *p,
vec->blend->alpha_to_coverage);
if (ilo_dev_gen(p->dev) == ILO_GEN(6) && session->hw_ctx_changed)
- gen6_wa_pipe_control_wm_max_threads_stall(p);
+ gen6_wa_pre_3dstate_wm_max_threads(p);
gen6_3DSTATE_WM(p->builder, vec->fs, num_samplers,
vec->rasterizer, dual_blend, cc_may_kill, 0);
@@ -700,8 +744,8 @@ gen6_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
&p->packed_sample_position_4x : &p->packed_sample_position_1x;
if (ilo_dev_gen(p->dev) == ILO_GEN(6)) {
- gen6_wa_pipe_control_post_sync(p, false);
- gen6_wa_pipe_control_wm_multisample_flush(p);
+ gen6_wa_pre_non_pipelined(p);
+ gen6_wa_pre_3dstate_multisample(p);
}
gen6_3DSTATE_MULTISAMPLE(p->builder,
@@ -741,8 +785,8 @@ gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p,
}
if (ilo_dev_gen(p->dev) == ILO_GEN(6)) {
- gen6_wa_pipe_control_post_sync(p, false);
- gen6_wa_pipe_control_wm_depth_flush(p);
+ gen6_wa_pre_non_pipelined(p);
+ gen6_wa_pre_depth(p);
}
gen6_3DSTATE_DEPTH_BUFFER(p->builder, zs);
@@ -761,7 +805,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
if ((DIRTY(RASTERIZER) || DIRTY(POLY_STIPPLE)) &&
vec->rasterizer->state.poly_stipple_enable) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_POLY_STIPPLE_PATTERN(p->builder,
&vec->poly_stipple);
@@ -772,7 +816,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
/* 3DSTATE_LINE_STIPPLE */
if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_stipple_enable) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_LINE_STIPPLE(p->builder,
vec->rasterizer->state.line_stipple_pattern,
@@ -782,7 +826,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
/* 3DSTATE_AA_LINE_PARAMETERS */
if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_smooth) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_AA_LINE_PARAMETERS(p->builder);
}
@@ -1437,18 +1481,22 @@ ilo_3d_pipeline_emit_draw_gen6(struct ilo_3d_pipeline *p,
void
ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p)
{
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+ GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
+ GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+ GEN6_PIPE_CONTROL_CS_STALL;
+
+ ILO_DEV_ASSERT(p->dev, 6, 7.5);
+
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
-
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
- GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
- GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- GEN6_PIPE_CONTROL_WRITE_NONE |
- GEN6_PIPE_CONTROL_CS_STALL,
- 0, 0, false);
+ gen6_wa_pre_pipe_control(p, dw1);
+
+ gen6_PIPE_CONTROL(p->builder, dw1, NULL, 0, false);
+
+ p->state.current_pipe_control_dw1 |= dw1;
+ p->state.deferred_pipe_control_dw1 &= ~dw1;
}
void
@@ -1478,27 +1526,18 @@ ilo_3d_pipeline_emit_query_gen6(struct ilo_3d_pipeline *p,
GEN6_REG_SO_NUM_PRIMS_WRITTEN;
const uint32_t *regs;
int reg_count = 0, i;
+ uint32_t pipe_control_dw1 = 0;
ILO_DEV_ASSERT(p->dev, 6, 7.5);
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
- if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, false);
-
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_STALL |
- GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT,
- q->bo, offset, true);
+ pipe_control_dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
+ GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT;
break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
- if (ilo_dev_gen(p->dev) == ILO_GEN(6))
- gen6_wa_pipe_control_post_sync(p, true);
-
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_WRITE_TIMESTAMP,
- q->bo, offset, true);
+ pipe_control_dw1 = GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
regs = &primitives_generated_reg;
@@ -1516,6 +1555,16 @@ ilo_3d_pipeline_emit_query_gen6(struct ilo_3d_pipeline *p,
break;
}
+ if (pipe_control_dw1) {
+ if (ilo_dev_gen(p->dev) == ILO_GEN(6))
+ gen6_wa_pre_pipe_control(p, pipe_control_dw1);
+
+ gen6_PIPE_CONTROL(p->builder, pipe_control_dw1, q->bo, offset, true);
+
+ p->state.current_pipe_control_dw1 |= pipe_control_dw1;
+ p->state.deferred_pipe_control_dw1 &= ~pipe_control_dw1;
+ }
+
if (!reg_count)
return;
@@ -1544,7 +1593,7 @@ gen6_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
gen6_3DSTATE_CONSTANT_VS(p->builder, NULL, NULL, 0);
gen6_3DSTATE_VS(p->builder, NULL, 0);
- gen6_wa_pipe_control_vs_const_flush(p);
+ gen6_wa_post_3dstate_constant_vs(p);
gen6_3DSTATE_CONSTANT_GS(p->builder, NULL, NULL, 0);
gen6_3DSTATE_GS(p->builder, NULL, NULL, 0);
@@ -1577,7 +1626,7 @@ gen6_rectlist_wm(struct ilo_3d_pipeline *p,
gen6_3DSTATE_CONSTANT_PS(p->builder, NULL, NULL, 0);
- gen6_wa_pipe_control_wm_max_threads_stall(p);
+ gen6_wa_pre_3dstate_wm_max_threads(p);
gen6_3DSTATE_WM(p->builder, NULL, 0, NULL, false, false, hiz_op);
}
@@ -1586,7 +1635,7 @@ gen6_rectlist_wm_depth(struct ilo_3d_pipeline *p,
const struct ilo_blitter *blitter,
struct gen6_rectlist_session *session)
{
- gen6_wa_pipe_control_wm_depth_flush(p);
+ gen6_wa_pre_depth(p);
if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
ILO_BLITTER_USE_FB_STENCIL)) {
@@ -1616,7 +1665,7 @@ gen6_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
const uint32_t *packed_sample_pos = (blitter->fb.num_samples > 1) ?
&p->packed_sample_position_4x : &p->packed_sample_position_1x;
- gen6_wa_pipe_control_wm_multisample_flush(p);
+ gen6_wa_pre_3dstate_multisample(p);
gen6_3DSTATE_MULTISAMPLE(p->builder, blitter->fb.num_samples,
packed_sample_pos, true);
@@ -1630,7 +1679,7 @@ gen6_rectlist_commands(struct ilo_3d_pipeline *p,
const struct ilo_blitter *blitter,
struct gen6_rectlist_session *session)
{
- gen6_wa_pipe_control_post_sync(p, false);
+ gen6_wa_pre_non_pipelined(p);
gen6_rectlist_wm_multisample(p, blitter, session);
diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
index 2e3c2ec6ac7..51f663b8e82 100644
--- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
@@ -37,66 +37,55 @@
#include "ilo_3d_pipeline_gen6.h"
#include "ilo_3d_pipeline_gen7.h"
-static void
-gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p,
- bool change_multisample_state,
- bool change_depth_state)
+/**
+ * A wrapper for gen6_PIPE_CONTROL().
+ */
+static inline void
+gen7_pipe_control(struct ilo_3d_pipeline *p, uint32_t dw1)
{
- struct intel_bo *bo = NULL;
- uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
+ struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
+ p->workaround_bo : NULL;
+
+ ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+ if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
+ /* CS stall cannot be set alone */
+ const uint32_t mask = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
+ GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
+ GEN6_PIPE_CONTROL_DEPTH_STALL |
+ GEN6_PIPE_CONTROL_WRITE__MASK;
+ if (!(dw1 & mask))
+ dw1 |= GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
+ }
+
+ gen6_PIPE_CONTROL(p->builder, dw1, bo, 0, false);
- assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
- ilo_dev_gen(p->dev) == ILO_GEN(7.5));
- /* emit once */
- if (p->state.has_gen6_wa_pipe_control)
- return;
- p->state.has_gen6_wa_pipe_control = true;
+ p->state.current_pipe_control_dw1 |= dw1;
+ p->state.deferred_pipe_control_dw1 &= ~dw1;
+}
+static void
+gen7_wa_post_3dstate_push_constant_alloc_ps(struct ilo_3d_pipeline *p)
+{
/*
- * From the Ivy Bridge PRM, volume 2 part 1, page 258:
- *
- * "Due to an HW issue driver needs to send a pipe control with stall
- * when ever there is state change in depth bias related state"
- *
* From the Ivy Bridge PRM, volume 2 part 1, page 292:
*
* "A PIPE_CONTOL command with the CS Stall bit set must be programmed
* in the ring after this instruction
* (3DSTATE_PUSH_CONSTANT_ALLOC_PS)."
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 304:
- *
- * "Driver must ierarchi that all the caches in the depth pipe are
- * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
- * requires driver to send a PIPE_CONTROL with a CS stall along with a
- * Depth Flush prior to this command.
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 315:
- *
- * "Driver must send a least one PIPE_CONTROL command with CS Stall and
- * a post sync operation prior to the group of depth
- * commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
- * 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
*/
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
- if (change_multisample_state)
- dw1 |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-
- if (change_depth_state) {
- dw1 |= GEN6_PIPE_CONTROL_WRITE_IMM;
- bo = p->workaround_bo;
- }
+ ILO_DEV_ASSERT(p->dev, 7, 7.5);
- gen6_PIPE_CONTROL(p->builder, dw1, bo, 0, false);
+ p->state.deferred_pipe_control_dw1 |= dw1;
}
static void
-gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
+gen7_wa_pre_vs(struct ilo_3d_pipeline *p)
{
- assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
- ilo_dev_gen(p->dev) == ILO_GEN(7.5));
-
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 106:
*
@@ -106,34 +95,73 @@ gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
* needs to be sent before any combination of VS associated 3DSTATE."
*/
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_STALL |
- GEN6_PIPE_CONTROL_WRITE_IMM,
- p->workaround_bo, 0, false);
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
+ GEN6_PIPE_CONTROL_WRITE_IMM;
+
+ ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+ if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+ gen7_pipe_control(p, dw1);
}
static void
-gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
- bool change_depth_buffer)
+gen7_wa_pre_3dstate_sf_depth_bias(struct ilo_3d_pipeline *p)
{
- assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
- ilo_dev_gen(p->dev) == ILO_GEN(7.5));
-
/*
- * From the Ivy Bridge PRM, volume 2 part 1, page 276:
+ * From the Ivy Bridge PRM, volume 2 part 1, page 258:
*
- * "The driver must make sure a PIPE_CONTROL with the Depth Stall
- * Enable bit set after all the following states are programmed:
+ * "Due to an HW issue driver needs to send a pipe control with stall
+ * when ever there is state change in depth bias related state (in
+ * 3DSTATE_SF)"
+ */
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
+
+ ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+ if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+ gen7_pipe_control(p, dw1);
+}
+
+static void
+gen7_wa_pre_3dstate_multisample(struct ilo_3d_pipeline *p)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 304:
*
- * * 3DSTATE_PS
- * * 3DSTATE_VIEWPORT_STATE_POINTERS_CC
- * * 3DSTATE_CONSTANT_PS
- * * 3DSTATE_BINDING_TABLE_POINTERS_PS
- * * 3DSTATE_SAMPLER_STATE_POINTERS_PS
- * * 3DSTATE_CC_STATE_POINTERS
- * * 3DSTATE_BLEND_STATE_POINTERS
- * * 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
+ * "Driver must ierarchi that all the caches in the depth pipe are
+ * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
+ * requires driver to send a PIPE_CONTROL with a CS stall along with a
+ * Depth Flush prior to this command.
+ */
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ GEN6_PIPE_CONTROL_CS_STALL;
+
+ ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+ if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+ gen7_pipe_control(p, dw1);
+}
+
+static void
+gen7_wa_pre_depth(struct ilo_3d_pipeline *p)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
+ * "Driver must send a least one PIPE_CONTROL command with CS Stall and
+ * a post sync operation prior to the group of depth
+ * commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
+ * 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
+ */
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL |
+ GEN6_PIPE_CONTROL_WRITE_IMM;
+
+ ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+ if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+ gen7_pipe_control(p, dw1);
+
+ /*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
* "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
@@ -146,28 +174,14 @@ gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
* guarantee that the pipeline from WM onwards is already flushed
* (e.g., via a preceding MI_FLUSH)."
*/
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_STALL,
- NULL, 0, false);
-
- if (!change_depth_buffer)
- return;
-
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
- NULL, 0, false);
-
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_DEPTH_STALL,
- NULL, 0, false);
+ gen7_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
+ gen7_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+ gen7_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
}
static void
-gen7_wa_pipe_control_ps_max_threads_stall(struct ilo_3d_pipeline *p)
+gen7_wa_pre_3dstate_ps_max_threads(struct ilo_3d_pipeline *p)
{
- assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
- ilo_dev_gen(p->dev) == ILO_GEN(7.5));
-
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 286:
*
@@ -175,10 +189,37 @@ gen7_wa_pipe_control_ps_max_threads_stall(struct ilo_3d_pipeline *p)
* between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at
* Pixel Scoreboard set is required to be issued."
*/
- gen6_PIPE_CONTROL(p->builder,
- GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
- NULL, 0, false);
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
+ ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+ if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+ gen7_pipe_control(p, dw1);
+}
+
+static void
+gen7_wa_post_ps_and_later(struct ilo_3d_pipeline *p)
+{
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 276:
+ *
+ * "The driver must make sure a PIPE_CONTROL with the Depth Stall
+ * Enable bit set after all the following states are programmed:
+ *
+ * - 3DSTATE_PS
+ * - 3DSTATE_VIEWPORT_STATE_POINTERS_CC
+ * - 3DSTATE_CONSTANT_PS
+ * - 3DSTATE_BINDING_TABLE_POINTERS_PS
+ * - 3DSTATE_SAMPLER_STATE_POINTERS_PS
+ * - 3DSTATE_CC_STATE_POINTERS
+ * - 3DSTATE_BLEND_STATE_POINTERS
+ * - 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
+ */
+ const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL;
+
+ ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+ p->state.deferred_pipe_control_dw1 |= dw1;
}
#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
@@ -212,7 +253,7 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p,
vs_entry_size *= sizeof(float) * 4;
vs_total_size = p->dev->urb_size - offset;
- gen7_wa_pipe_control_vs_depth_stall(p);
+ gen7_wa_pre_vs(p);
gen7_3DSTATE_URB_VS(p->builder,
offset, vs_total_size, vs_entry_size);
@@ -245,7 +286,7 @@ gen7_pipeline_common_pcb_alloc(struct ilo_3d_pipeline *p,
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->builder, offset, size);
if (ilo_dev_gen(p->dev) == ILO_GEN(7))
- gen7_wa_pipe_control_cs_stall(p, true, true);
+ gen7_wa_post_3dstate_push_constant_alloc_ps(p);
}
}
@@ -303,7 +344,7 @@ gen7_pipeline_vs(struct ilo_3d_pipeline *p,
/* emit depth stall before any of the VS commands */
if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
emit_3dstate_constant_vs || emit_3dstate_vs)
- gen7_wa_pipe_control_vs_depth_stall(p);
+ gen7_wa_pre_vs(p);
/* 3DSTATE_BINDING_TABLE_POINTERS_VS */
if (emit_3dstate_binding_table) {
@@ -459,7 +500,7 @@ gen7_pipeline_sf(struct ilo_3d_pipeline *p,
if (DIRTY(RASTERIZER) || DIRTY(FB)) {
struct pipe_surface *zs = vec->fb.state.zsbuf;
- gen7_wa_pipe_control_cs_stall(p, true, true);
+ gen7_wa_pre_3dstate_sf_depth_bias(p);
gen7_3DSTATE_SF(p->builder, vec->rasterizer,
(zs) ? zs->format : PIPE_FORMAT_NONE);
}
@@ -508,7 +549,7 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
if ((ilo_dev_gen(p->dev) == ILO_GEN(7) ||
ilo_dev_gen(p->dev) == ILO_GEN(7.5)) &&
session->hw_ctx_changed)
- gen7_wa_pipe_control_ps_max_threads_stall(p);
+ gen7_wa_pre_3dstate_ps_max_threads(p);
gen7_3DSTATE_PS(p->builder, vec->fs, num_samplers, dual_blend);
}
@@ -527,7 +568,6 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
(DIRTY(FB) || DIRTY(DSA) || session->state_bo_changed);
if (emit_3dstate_ps ||
- emit_3dstate_depth_buffer ||
session->pcb_state_fs_changed ||
session->viewport_state_changed ||
session->binding_table_fs_changed ||
@@ -535,7 +575,10 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
session->cc_state_cc_changed ||
session->cc_state_blend_changed ||
session->cc_state_dsa_changed)
- gen7_wa_pipe_control_wm_depth_stall(p, emit_3dstate_depth_buffer);
+ gen7_wa_post_ps_and_later(p);
+
+ if (emit_3dstate_depth_buffer)
+ gen7_wa_pre_depth(p);
}
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
@@ -575,7 +618,7 @@ gen7_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
const uint32_t *packed_sample_pos;
- gen7_wa_pipe_control_cs_stall(p, true, true);
+ gen7_wa_pre_3dstate_multisample(p);
packed_sample_pos =
(vec->fb.num_samples > 4) ? p->packed_sample_position_8x :
@@ -597,9 +640,14 @@ gen7_pipeline_vf_draw(struct ilo_3d_pipeline *p,
const struct ilo_state_vector *vec,
struct gen6_pipeline_session *session)
{
+ if (p->state.deferred_pipe_control_dw1)
+ gen7_pipe_control(p, p->state.deferred_pipe_control_dw1);
+
/* 3DPRIMITIVE */
gen7_3DPRIMITIVE(p->builder, vec->draw, &vec->ib);
- p->state.has_gen6_wa_pipe_control = false;
+
+ p->state.current_pipe_control_dw1 = 0;
+ p->state.deferred_pipe_control_dw1 = 0;
}
static void
@@ -670,7 +718,7 @@ gen7_rectlist_pcb_alloc(struct ilo_3d_pipeline *p,
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->builder, offset, size);
- gen7_wa_pipe_control_cs_stall(p, true, true);
+ gen7_wa_post_3dstate_push_constant_alloc_ps(p);
}
static void
@@ -713,7 +761,7 @@ gen7_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
gen6_3DSTATE_CLIP(p->builder, NULL, NULL, false, 0);
- gen7_wa_pipe_control_cs_stall(p, true, true);
+ gen7_wa_pre_3dstate_sf_depth_bias(p);
gen7_3DSTATE_SF(p->builder, NULL, blitter->fb.dst.base.format);
gen7_3DSTATE_SBE(p->builder, NULL, NULL);
@@ -745,7 +793,7 @@ gen7_rectlist_wm(struct ilo_3d_pipeline *p,
gen7_3DSTATE_CONSTANT_PS(p->builder, NULL, NULL, 0);
- gen7_wa_pipe_control_ps_max_threads_stall(p);
+ gen7_wa_pre_3dstate_ps_max_threads(p);
gen7_3DSTATE_PS(p->builder, NULL, 0, false);
}
@@ -754,7 +802,7 @@ gen7_rectlist_wm_depth(struct ilo_3d_pipeline *p,
const struct ilo_blitter *blitter,
struct gen6_rectlist_session *session)
{
- gen7_wa_pipe_control_wm_depth_stall(p, true);
+ gen7_wa_pre_depth(p);
if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
ILO_BLITTER_USE_FB_STENCIL)) {
@@ -786,7 +834,7 @@ gen7_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
(blitter->fb.num_samples > 1) ? &p->packed_sample_position_4x :
&p->packed_sample_position_1x;
- gen7_wa_pipe_control_cs_stall(p, true, true);
+ gen7_wa_pre_3dstate_multisample(p);
gen6_3DSTATE_MULTISAMPLE(p->builder, blitter->fb.num_samples,
packed_sample_pos, true);
@@ -813,7 +861,7 @@ gen7_rectlist_commands(struct ilo_3d_pipeline *p,
gen7_rectlist_pcb_alloc(p, blitter, session);
/* needed for any VS-related commands */
- gen7_wa_pipe_control_vs_depth_stall(p);
+ gen7_wa_pre_vs(p);
gen7_rectlist_urb(p, blitter, session);