diff options
author | Paul Berry <[email protected]> | 2013-11-04 20:06:48 -0800 |
---|---|---|
committer | Paul Berry <[email protected]> | 2013-11-18 10:09:11 -0800 |
commit | 7dfb4b2d00ddb8e5ee24d4c58eb9415dc4ccc21c (patch) | |
tree | 79601460f1cdc347b8c9f9e957f2e56a67da7bf2 /src | |
parent | d22220219347689c51134e4a5650d75143748017 (diff) |
i965/gen7: Emit workaround flush when changing GS enable state.
v2: Don't go to extra work to avoid extraneous flushes. (Previous
experiments in the kernel have suggested that flushing the pipeline
when it is already empty is extremely cheap).
Cc: "10.0" <[email protected]>
Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_blorp.cpp | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_gs_state.c | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_urb.c | 24 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.c | 30 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.h | 1 |
7 files changed, 72 insertions, 22 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index bee98e38996..0399ec04132 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -764,6 +764,7 @@ brwCreateContext(gl_api api, brw->prim_restart.in_progress = false; brw->prim_restart.enable_cut_index = false; + brw->gs.enabled = false; if (brw->gen < 6) { brw->curbe.last_buf = calloc(1, 4096); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8b1cbb34d7b..4a089868339 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1300,6 +1300,12 @@ struct brw_context struct { struct brw_stage_state base; struct brw_gs_prog_data *prog_data; + + /** + * True if the 3DSTATE_GS command most recently emitted to the 3D + * pipeline enabled the GS; false otherwise. + */ + bool enabled; } gs; struct { diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 540c46dcb40..d48153824de 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -402,6 +402,21 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw, OUT_BATCH(0); ADVANCE_BATCH(); + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled) + gen7_emit_cs_stall_flush(brw); + BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); @@ -411,6 +426,7 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw, OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); + brw->gs.enabled = false; } /* 3DSTATE_STREAMOUT diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 584f2db8f8e..d2ba354e2c4 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -80,6 +80,21 @@ upload_gs_state(struct brw_context *brw) gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active) + gen7_emit_cs_stall_flush(brw); + if (active) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); @@ -176,6 +191,7 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } + brw->gs.enabled = active; } const struct brw_tracked_state gen7_gs_state = { diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 6dcdfe4fa44..c6385862b92 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -122,28 +122,8 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, * * No such restriction exists for Haswell. */ - if (!brw->is_haswell) { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); - /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20] - * CS Stall): - * - * One of the following must also be set: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - * We choose to do a Post-Sync Operation (Write Immediate Data), since - * it seems like it will incur the least additional performance penalty. - */ - OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC(brw->batch.workaround_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + if (!brw->is_haswell) + gen7_emit_cs_stall_flush(brw); } const struct brw_tracked_state gen7_push_constant_space = { diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index fb0b45bc3b7..babe9ea1ad9 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -511,6 +511,36 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw) ADVANCE_BATCH(); } + +/** + * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set. + */ +void +gen7_emit_cs_stall_flush(struct brw_context *brw) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20] + * CS Stall): + * + * One of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * + * We choose to do a Post-Sync Operation (Write Immediate Data), since + * it seems like it will incur the least additional performance penalty. + */ + OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_RELOC(brw->batch.workaround_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + /** * Emits a PIPE_CONTROL with a non-zero post-sync operation, for * implementing two workarounds on gen6. From section 1.4.7.1 diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index d46f48e20ef..cabbb69a63c 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -59,6 +59,7 @@ void intel_batchbuffer_emit_mi_flush(struct brw_context *brw); void intel_emit_post_sync_nonzero_flush(struct brw_context *brw); void intel_emit_depth_stall_flushes(struct brw_context *brw); void gen7_emit_vs_workaround_flush(struct brw_context *brw); +void gen7_emit_cs_stall_flush(struct brw_context *brw); static INLINE uint32_t float_as_int(float f) { |