summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h6
-rw-r--r--src/mesa/drivers/dri/i965/genX_state_upload.c63
2 files changed, 69 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0844400bc53..773f104824d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -966,6 +966,9 @@ struct brw_context
* These bitfields indicate which workarounds are needed.
*/
uint8_t attrib_wa_flags[VERT_ATTRIB_MAX];
+
+ /* High bits of the last seen vertex buffer address (for workarounds). */
+ uint16_t last_bo_high_bits[33];
} vb;
struct {
@@ -986,6 +989,9 @@ struct brw_context
* referencing the same index buffer.
*/
unsigned int start_vertex_offset;
+
+ /* High bits of the last seen index buffer address (for workarounds). */
+ uint16_t last_bo_high_bits;
} ib;
/* Active vertex program:
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 6178bfa3f88..4f44b9965e6 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -480,6 +480,65 @@ upload_format_size(uint32_t upload_format)
}
}
+static UNUSED uint16_t
+pinned_bo_high_bits(struct brw_bo *bo)
+{
+ return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
+}
+
+/* The VF cache designers apparently cut corners, and made the cache key's
+ * <VertexBufferIndex, Memory Address> tuple only consider the bottom 32 bits
+ * of the address. If you happen to have two vertex buffers which get placed
+ * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
+ * collisions. (These collisions can happen within a single batch.)
+ *
+ * In the soft-pin world, we'd like to assign addresses up front, and never
+ * move buffers. So, we need to do a VF cache invalidate if the buffer for
+ * a particular VB slot has different [48:32] address bits than the last one.
+ *
+ * In the relocation world, we have no idea what the addresses will be, so
+ * we can't apply this workaround. Instead, we tell the kernel to move it
+ * to the low 4GB regardless.
+ */
+static void
+vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+ bool need_invalidate = true;
+ unsigned i;
+
+ for (i = 0; i < brw->vb.nr_buffers; i++) {
+ uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo);
+
+ if (high_bits != brw->vb.last_bo_high_bits[i]) {
+ need_invalidate = true;
+ brw->vb.last_bo_high_bits[i] = high_bits;
+ }
+ }
+
+ /* Don't bother with draw parameter buffers - those are generated by
+ * the driver so we can select a consistent memory zone.
+ */
+
+ if (need_invalidate) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ }
+#endif
+}
+
+static void
+vf_invalidate_for_ib_48bit_transition(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+ uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo);
+
+ if (high_bits != brw->ib.last_bo_high_bits) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ brw->ib.last_bo_high_bits = high_bits;
+ }
+#endif
+}
+
static void
genX(emit_vertices)(struct brw_context *brw)
{
@@ -594,6 +653,8 @@ genX(emit_vertices)(struct brw_context *brw)
const unsigned nr_buffers = brw->vb.nr_buffers +
uses_draw_params + uses_derived_draw_params;
+ vf_invalidate_for_vb_48bit_transitions(brw);
+
if (nr_buffers) {
assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
@@ -886,6 +947,8 @@ genX(emit_index_buffer)(struct brw_context *brw)
if (index_buffer == NULL)
return;
+ vf_invalidate_for_ib_48bit_transition(brw);
+
brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
#if GEN_GEN < 8 && !GEN_IS_HASWELL
ib.CutIndexEnable = brw->prim_restart.enable_cut_index;