diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_state_upload.c | 63 |
2 files changed, 69 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0844400bc53..773f104824d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -966,6 +966,9 @@ struct brw_context * These bitfields indicate which workarounds are needed. */ uint8_t attrib_wa_flags[VERT_ATTRIB_MAX]; + + /* High bits of the last seen vertex buffer address (for workarounds). */ + uint16_t last_bo_high_bits[33]; } vb; struct { @@ -986,6 +989,9 @@ struct brw_context * referencing the same index buffer. */ unsigned int start_vertex_offset; + + /* High bits of the last seen index buffer address (for workarounds). */ + uint16_t last_bo_high_bits; } ib; /* Active vertex program: diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 6178bfa3f88..4f44b9965e6 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -480,6 +480,65 @@ upload_format_size(uint32_t upload_format) } } +static UNUSED uint16_t +pinned_bo_high_bits(struct brw_bo *bo) +{ + return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0; +} + +/* The VF cache designers apparently cut corners, and made the cache key's + * <VertexBufferIndex, Memory Address> tuple only consider the bottom 32 bits + * of the address. If you happen to have two vertex buffers which get placed + * exactly 4 GiB apart and use them in back-to-back draw calls, you can get + * collisions. (These collisions can happen within a single batch.) + * + * In the soft-pin world, we'd like to assign addresses up front, and never + * move buffers. So, we need to do a VF cache invalidate if the buffer for + * a particular VB slot has different [48:32] address bits than the last one. + * + * In the relocation world, we have no idea what the addresses will be, so + * we can't apply this workaround. Instead, we tell the kernel to move it + * to the low 4GB regardless. + */ +static void +vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw) +{ +#if GEN_GEN >= 8 + bool need_invalidate = true; + unsigned i; + + for (i = 0; i < brw->vb.nr_buffers; i++) { + uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo); + + if (high_bits != brw->vb.last_bo_high_bits[i]) { + need_invalidate = true; + brw->vb.last_bo_high_bits[i] = high_bits; + } + } + + /* Don't bother with draw parameter buffers - those are generated by + * the driver so we can select a consistent memory zone. + */ + + if (need_invalidate) { + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); + } +#endif +} + +static void +vf_invalidate_for_ib_48bit_transition(struct brw_context *brw) +{ +#if GEN_GEN >= 8 + uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo); + + if (high_bits != brw->ib.last_bo_high_bits) { + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); + brw->ib.last_bo_high_bits = high_bits; + } +#endif +} + static void genX(emit_vertices)(struct brw_context *brw) { @@ -594,6 +653,8 @@ genX(emit_vertices)(struct brw_context *brw) const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params + uses_derived_draw_params; + vf_invalidate_for_vb_48bit_transitions(brw); + if (nr_buffers) { assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17)); @@ -886,6 +947,8 @@ genX(emit_index_buffer)(struct brw_context *brw) if (index_buffer == NULL) return; + vf_invalidate_for_ib_48bit_transition(brw); + brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) { #if GEN_GEN < 8 && !GEN_IS_HASWELL ib.CutIndexEnable = brw->prim_restart.enable_cut_index; |