summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2014-01-13 16:00:18 -0800
committerKenneth Graunke <[email protected]>2014-01-20 15:38:23 -0800
commitf5dd608db2d6a67cfe27efed948408414a057fe3 (patch)
tree6176e8df8ae38ba9b23b2b168214158f40aa23ad
parent35458a99c0940ec29503fa02134ec3ed9de363f9 (diff)
i965: Create a helper function for emitting PIPE_CONTROL writes.
There are a lot of places that use PIPE_CONTROL to write a value to a buffer (either an immediate write, TIMESTAMP, or PS_DEPTH_COUNT). Creating a single function to do this seems convenient. As part of this refactor, we now set the PPGTT/GTT selection bit correctly on Gen7+. Previously, we set bit 2 of DW2 on all platforms. This is correct for Sandybridge, but actually part of the address on Ivybridge and later! Broadwell will also increase the length of these packets by 1; with the refactoring, we should have to adjust that in substantially fewer places, giving us confidence that we've hit them all. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Eric Anholt <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_queryobj.c56
-rw-r--r--src/mesa/drivers/dri/i965/gen6_queryobj.c15
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.c88
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.h3
4 files changed, 69 insertions, 93 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index 9f839379bff..dc26c0864e1 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -49,36 +49,15 @@
void
brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
{
- if (brw->gen >= 6) {
- /* Emit workaround flushes: */
- if (brw->gen == 6) {
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_STALL_AT_SCOREBOARD);
- }
-
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
- OUT_RELOC(query_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- PIPE_CONTROL_GLOBAL_GTT_WRITE |
- idx * sizeof(uint64_t));
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
- PIPE_CONTROL_WRITE_TIMESTAMP);
- OUT_RELOC(query_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- PIPE_CONTROL_GLOBAL_GTT_WRITE |
- idx * sizeof(uint64_t));
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ if (brw->gen == 6) {
+ /* Emit Sandybridge workaround flush: */
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
}
+
+ brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_TIMESTAMP,
+ query_bo, idx * sizeof(uint64_t), 0, 0);
}
/**
@@ -89,21 +68,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
{
assert(brw->gen < 6);
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
- PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT);
- /* This object could be mapped cacheable, but we don't have an exposed
- * mechanism to support that. Since it's going uncached, tell GEM that
- * we're writing to it. The usual clflush should be all that's required
- * to pick up the results.
- */
- OUT_RELOC(query_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- PIPE_CONTROL_GLOBAL_GTT_WRITE |
- (idx * sizeof(uint64_t)));
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ brw_emit_pipe_control_write(brw,
+ PIPE_CONTROL_WRITE_DEPTH_COUNT
+ | PIPE_CONTROL_DEPTH_STALL,
+ query_bo, idx * sizeof(uint64_t), 0, 0);
}
/**
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index 16d0c101432..e76562393de 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -49,17 +49,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
if (brw->gen == 6)
intel_emit_post_sync_nonzero_flush(brw);
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_WRITE_DEPTH_COUNT);
- OUT_RELOC(query_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- PIPE_CONTROL_GLOBAL_GTT_WRITE |
- (idx * sizeof(uint64_t)));
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ brw_emit_pipe_control_write(brw,
+ PIPE_CONTROL_WRITE_DEPTH_COUNT
+ | PIPE_CONTROL_DEPTH_STALL,
+ query_bo, idx * sizeof(uint64_t), 0, 0);
}
/*
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index f54ca9b30e4..d1587cba10c 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -459,6 +459,44 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
}
/**
+ * Emit a PIPE_CONTROL that writes to a buffer object.
+ *
+ * \p flags should contain one of the following items:
+ * - PIPE_CONTROL_WRITE_IMMEDIATE
+ * - PIPE_CONTROL_WRITE_TIMESTAMP
+ * - PIPE_CONTROL_WRITE_DEPTH_COUNT
+ */
+void
+brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+ drm_intel_bo *bo, uint32_t offset,
+ uint32_t imm_lower, uint32_t imm_upper)
+{
+ if (brw->gen >= 6) {
+ /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
+ * on later platforms. We always use PPGTT on Gen7+.
+ */
+ unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
+
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+ OUT_BATCH(flags);
+ OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ gen6_gtt | offset);
+ OUT_BATCH(imm_lower);
+ OUT_BATCH(imm_upper);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
+ OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
+ OUT_BATCH(imm_lower);
+ OUT_BATCH(imm_upper);
+ ADVANCE_BATCH();
+ }
+}
+
+/**
* Restriction [DevSNB, DevIVB]:
*
* Prior to changing Depth/Stencil Buffer state (i.e. any combination of
@@ -492,15 +530,11 @@ void
gen7_emit_vs_workaround_flush(struct brw_context *brw)
{
assert(brw->gen == 7);
-
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
- OUT_RELOC(brw->batch.workaround_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
- OUT_BATCH(0); /* write data */
- OUT_BATCH(0); /* write data */
- ADVANCE_BATCH();
+ brw_emit_pipe_control_write(brw,
+ PIPE_CONTROL_WRITE_IMMEDIATE
+ | PIPE_CONTROL_DEPTH_STALL,
+ brw->batch.workaround_bo, 0,
+ 0, 0);
}
@@ -510,27 +544,11 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
void
gen7_emit_cs_stall_flush(struct brw_context *brw)
{
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
- * CS Stall):
- *
- * One of the following must also be set:
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)
- * - Stall at Pixel Scoreboard ([1] of DW1)
- * - Depth Stall ([13] of DW1)
- * - Post-Sync Operation ([13] of DW1)
- *
- * We choose to do a Post-Sync Operation (Write Immediate Data), since
- * it seems like it will incur the least additional performance penalty.
- */
- OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
- OUT_RELOC(brw->batch.workaround_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ brw_emit_pipe_control_write(brw,
+ PIPE_CONTROL_CS_STALL
+ | PIPE_CONTROL_WRITE_IMMEDIATE,
+ brw->batch.workaround_bo, 0,
+ 0, 0);
}
@@ -581,14 +599,8 @@ intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD);
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
- OUT_RELOC(brw->batch.workaround_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
- OUT_BATCH(0); /* write data */
- OUT_BATCH(0); /* write data */
- ADVANCE_BATCH();
+ brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
+ brw->batch.workaround_bo, 0, 0, 0);
brw->batch.need_workaround_flush = false;
}
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 5ed2089049b..779a7ccd05c 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -65,6 +65,9 @@ bool intel_batchbuffer_emit_reloc64(struct brw_context *brw,
uint32_t write_domain,
uint32_t offset);
void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
+void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+ drm_intel_bo *bo, uint32_t offset,
+ uint32_t imm_lower, uint32_t imm_upper);
void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
void intel_emit_depth_stall_flushes(struct brw_context *brw);