summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-05-11 18:16:48 -0700
committerJason Ekstrand <[email protected]>2018-05-22 15:46:39 -0700
commita347a5a12c2ed98c5959ab2da9ec4c0fcd365aeb (patch)
tree15726f5eb05a184793598f07070c553e6d5f294f
parentb499b85b0f2cc0c82b7c9af91502c2814fdc8e67 (diff)
i965: Remove ring switching entirely
Reviewed-by: Topi Pohjolainen <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_compute.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_pipe_control.c32
-rw-r--r--src/mesa/drivers/dri/i965/brw_urb.c2
-rw-r--r--src/mesa/drivers/dri/i965/genX_blorp_exec.c4
-rw-r--r--src/mesa/drivers/dri/i965/genX_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.c92
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.h15
-rw-r--r--src/mesa/drivers/dri/i965/intel_blit.c6
11 files changed, 61 insertions, 105 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index 5ce899bcbcc..de08fc3ac16 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -182,7 +182,7 @@ brw_dispatch_compute_common(struct gl_context *ctx)
/* Flush the batch if the batch/state buffers are nearly full. We can
* grow them if needed, but this is not free, so we'd like to avoid it.
*/
- intel_batchbuffer_require_space(brw, 600, RENDER_RING);
+ intel_batchbuffer_require_space(brw, 600);
brw_require_statebuffer_space(brw, 2500);
intel_batchbuffer_save_state(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 773f104824d..2613b9fda22 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -461,12 +461,6 @@ struct brw_query_object {
bool flushed;
};
-enum brw_gpu_ring {
- UNKNOWN_RING,
- RENDER_RING,
- BLT_RING,
-};
-
struct brw_reloc_list {
struct drm_i915_gem_relocation_entry *relocs;
int reloc_count;
@@ -497,7 +491,6 @@ struct intel_batchbuffer {
uint32_t *map_next;
uint32_t state_used;
- enum brw_gpu_ring ring;
bool use_shadow_copy;
bool use_batch_first;
bool needs_sol_reset;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index ae3b7be2ddd..18aa12feaef 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -798,7 +798,7 @@ brw_draw_single_prim(struct gl_context *ctx,
/* Flush the batch if the batch/state buffers are nearly full. We can
* grow them if needed, but this is not free, so we'd like to avoid it.
*/
- intel_batchbuffer_require_space(brw, 1500, RENDER_RING);
+ intel_batchbuffer_require_space(brw, 1500);
brw_require_statebuffer_space(brw, 2400);
intel_batchbuffer_save_state(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 6d7ab92cf61..9a663b1d61c 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -348,7 +348,7 @@ brw_emit_depthbuffer(struct brw_context *brw)
brw_emit_depth_stall_flushes(brw);
const unsigned ds_dwords = brw->isl_dev.ds.size / 4;
- intel_batchbuffer_begin(brw, ds_dwords, RENDER_RING);
+ intel_batchbuffer_begin(brw, ds_dwords);
uint32_t *ds_map = brw->batch.map_next;
const uint32_t ds_offset = (char *)ds_map - (char *)brw->batch.batch.map;
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index cbd2853f58c..122ac260703 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -544,29 +544,17 @@ brw_emit_mi_flush(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
- if (brw->batch.ring == BLT_RING && devinfo->gen >= 6) {
- const unsigned n_dwords = devinfo->gen >= 8 ? 5 : 4;
- BEGIN_BATCH_BLT(n_dwords);
- OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- if (n_dwords == 5)
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else {
- int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
- if (devinfo->gen >= 6) {
- flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
- PIPE_CONTROL_CONST_CACHE_INVALIDATE |
- PIPE_CONTROL_DATA_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_VF_CACHE_INVALIDATE |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CS_STALL;
- }
- brw_emit_pipe_control_flush(brw, flags);
+ int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
+ if (devinfo->gen >= 6) {
+ flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+ PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+ PIPE_CONTROL_DATA_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+ PIPE_CONTROL_CS_STALL;
}
+ brw_emit_pipe_control_flush(brw, flags);
}
int
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index a86fa78acaf..d34240ee1b7 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -264,5 +264,5 @@ void brw_upload_urb_fence(struct brw_context *brw)
while (--pad);
}
- intel_batchbuffer_data(brw, &uf, sizeof(uf), RENDER_RING);
+ intel_batchbuffer_data(brw, &uf, sizeof(uf));
}
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 581438966e5..808bff0db85 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -44,7 +44,7 @@ blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
assert(batch->blorp->driver_ctx == batch->driver_batch);
struct brw_context *brw = batch->driver_batch;
- intel_batchbuffer_begin(brw, n, RENDER_RING);
+ intel_batchbuffer_begin(brw, n);
uint32_t *map = brw->batch.map_next;
brw->batch.map_next += n;
intel_batchbuffer_advance(brw);
@@ -277,7 +277,7 @@ genX(blorp_exec)(struct blorp_batch *batch,
brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
retry:
- intel_batchbuffer_require_space(brw, 1400, RENDER_RING);
+ intel_batchbuffer_require_space(brw, 1400);
brw_require_statebuffer_space(brw, 600);
intel_batchbuffer_save_state(brw);
brw->batch.no_wrap = true;
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 4f44b9965e6..b485e2cf811 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -59,7 +59,7 @@
UNUSED static void *
emit_dwords(struct brw_context *brw, unsigned n)
{
- intel_batchbuffer_begin(brw, n, RENDER_RING);
+ intel_batchbuffer_begin(brw, n);
uint32_t *map = brw->batch.map_next;
brw->batch.map_next += n;
intel_batchbuffer_advance(brw);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 4f78d8d0508..8f47e613df8 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -274,11 +274,6 @@ intel_batchbuffer_reset(struct brw_context *brw)
batch->needs_sol_reset = false;
batch->state_base_address_emitted = false;
- /* We don't know what ring the new batch will be sent to until we see the
- * first BEGIN_BATCH or BEGIN_BATCH_BLT. Mark it as unknown.
- */
- batch->ring = UNKNOWN_RING;
-
if (batch->state_batch_sizes)
_mesa_hash_table_clear(batch->state_batch_sizes, NULL);
}
@@ -311,8 +306,6 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw)
brw->batch.exec_count = brw->batch.saved.exec_count;
brw->batch.map_next = brw->batch.saved.map_next;
- if (USED_BATCH(brw->batch) == 0)
- brw->batch.ring = UNKNOWN_RING;
}
void
@@ -507,18 +500,10 @@ grow_buffer(struct brw_context *brw,
}
void
-intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
- enum brw_gpu_ring ring)
+intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz)
{
- const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct intel_batchbuffer *batch = &brw->batch;
- /* If we're switching rings, implicitly flush the batch. */
- if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING &&
- devinfo->gen >= 6) {
- intel_batchbuffer_flush(brw);
- }
-
const unsigned batch_used = USED_BATCH(*batch) * 4;
if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) {
intel_batchbuffer_flush(brw);
@@ -530,11 +515,6 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
batch->map_next = (void *) batch->batch.map + batch_used;
assert(batch_used + sz < batch->batch.bo->size);
}
-
- /* The intel_batchbuffer_flush() calls above might have changed
- * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end.
- */
- brw->batch.ring = ring;
}
/**
@@ -601,46 +581,44 @@ brw_finish_batch(struct brw_context *brw)
*/
brw_emit_query_end(brw);
- if (brw->batch.ring == RENDER_RING) {
- /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which
- * assume that the L3 cache is configured according to the hardware
- * defaults. On Kernel 4.16+, we no longer need to do this.
- */
- if (devinfo->gen >= 7 &&
- !(brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION))
- gen7_restore_default_l3_config(brw);
-
- if (devinfo->is_haswell) {
- /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
- * 3DSTATE_CC_STATE_POINTERS > "Note":
- *
- * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
- * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
- *
- * From the example in the docs, it seems to expect a regular pipe control
- * flush here as well. We may have done it already, but meh.
- *
- * See also WaAvoidRCZCounterRollover.
- */
- brw_emit_mi_flush(brw);
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
- OUT_BATCH(brw->cc.state_offset | 1);
- ADVANCE_BATCH();
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_CS_STALL);
- }
+ /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which
+ * assume that the L3 cache is configured according to the hardware
+ * defaults. On Kernel 4.16+, we no longer need to do this.
+ */
+ if (devinfo->gen >= 7 &&
+ !(brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION))
+ gen7_restore_default_l3_config(brw);
- /* Do not restore push constant packets during context restore. */
- if (devinfo->gen >= 7)
- gen10_emit_isp_disable(brw);
+ if (devinfo->is_haswell) {
+ /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
+ * 3DSTATE_CC_STATE_POINTERS > "Note":
+ *
+ * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
+ * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
+ *
+ * From the example in the docs, it seems to expect a regular pipe control
+ * flush here as well. We may have done it already, but meh.
+ *
+ * See also WaAvoidRCZCounterRollover.
+ */
+ brw_emit_mi_flush(brw);
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+ OUT_BATCH(brw->cc.state_offset | 1);
+ ADVANCE_BATCH();
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_CS_STALL);
}
+ /* Do not restore push constant packets during context restore. */
+ if (devinfo->gen >= 7)
+ gen10_emit_isp_disable(brw);
+
/* Emit MI_BATCH_BUFFER_END to finish our batch. Note that execbuf2
* requires our batch size to be QWord aligned, so we pad it out if
* necessary by emitting an extra MI_NOOP after the end.
*/
- intel_batchbuffer_require_space(brw, 8, brw->batch.ring);
+ intel_batchbuffer_require_space(brw, 8);
*brw->batch.map_next++ = MI_BATCH_BUFFER_END;
if (USED_BATCH(brw->batch) & 1) {
*brw->batch.map_next++ = MI_NOOP;
@@ -747,7 +725,6 @@ execbuffer(int fd,
static int
submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
{
- const struct gen_device_info *devinfo = &brw->screen->devinfo;
__DRIscreen *dri_screen = brw->screen->driScrnPriv;
struct intel_batchbuffer *batch = &brw->batch;
int ret = 0;
@@ -776,7 +753,6 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
* To avoid stalling, execobject.offset should match the current
* address of that object within the active context.
*/
- assert(devinfo->gen < 6 || batch->ring == RENDER_RING);
int flags = I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
if (batch->needs_sol_reset)
@@ -1045,10 +1021,10 @@ brw_state_batch(struct brw_context *brw,
void
intel_batchbuffer_data(struct brw_context *brw,
- const void *data, GLuint bytes, enum brw_gpu_ring ring)
+ const void *data, GLuint bytes)
{
assert((bytes & 3) == 0);
- intel_batchbuffer_require_space(brw, bytes, ring);
+ intel_batchbuffer_require_space(brw, bytes);
memcpy(brw->batch.map_next, data, bytes);
brw->batch.map_next += bytes >> 2;
}
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 7be5b10f3ab..bd07bef9deb 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -25,8 +25,7 @@ void intel_batchbuffer_init(struct brw_context *brw);
void intel_batchbuffer_free(struct intel_batchbuffer *batch);
void intel_batchbuffer_save_state(struct brw_context *brw);
void intel_batchbuffer_reset_to_saved(struct brw_context *brw);
-void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
- enum brw_gpu_ring ring);
+void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz);
int _intel_batchbuffer_flush_fence(struct brw_context *brw,
int in_fence_fd, int *out_fence_fd,
const char *file, int line);
@@ -43,8 +42,7 @@ int _intel_batchbuffer_flush_fence(struct brw_context *brw,
* intel_buffer_dword() calls.
*/
void intel_batchbuffer_data(struct brw_context *brw,
- const void *data, GLuint bytes,
- enum brw_gpu_ring ring);
+ const void *data, GLuint bytes);
bool brw_batch_has_aperture_space(struct brw_context *brw,
unsigned extra_space_in_bytes);
@@ -81,9 +79,9 @@ static inline uint32_t float_as_int(float f)
}
static inline void
-intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring)
+intel_batchbuffer_begin(struct brw_context *brw, int n)
{
- intel_batchbuffer_require_space(brw, n * 4, ring);
+ intel_batchbuffer_require_space(brw, n * 4);
#ifdef DEBUG
brw->batch.emit = USED_BATCH(brw->batch);
@@ -117,12 +115,13 @@ brw_ptr_in_state_buffer(struct intel_batchbuffer *batch, void *p)
}
#define BEGIN_BATCH(n) do { \
- intel_batchbuffer_begin(brw, (n), RENDER_RING); \
+ intel_batchbuffer_begin(brw, (n)); \
uint32_t *__map = brw->batch.map_next; \
brw->batch.map_next += (n)
#define BEGIN_BATCH_BLT(n) do { \
- intel_batchbuffer_begin(brw, (n), BLT_RING); \
+ assert(brw->screen->devinfo.gen < 6); \
+ intel_batchbuffer_begin(brw, (n)); \
uint32_t *__map = brw->batch.map_next; \
brw->batch.map_next += (n)
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 5ef78584cab..90784c5b195 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -288,7 +288,7 @@ emit_copy_blit(struct brw_context *brw,
unsigned length = devinfo->gen >= 8 ? 10 : 8;
- intel_batchbuffer_require_space(brw, length * 4, BLT_RING);
+ intel_batchbuffer_require_space(brw, length * 4);
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__func__,
src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -661,7 +661,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
unsigned xy_setup_blt_length = devinfo->gen >= 8 ? 10 : 8;
intel_batchbuffer_require_space(brw, (xy_setup_blt_length * 4) +
- (3 * 4) + dwords * 4, BLT_RING);
+ (3 * 4) + dwords * 4);
opcode = XY_SETUP_BLT_CMD;
if (cpp == 4)
@@ -699,7 +699,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X));
ADVANCE_BATCH();
- intel_batchbuffer_data(brw, src_bits, dwords * 4, BLT_RING);
+ intel_batchbuffer_data(brw, src_bits, dwords * 4);
brw_emit_mi_flush(brw);