summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2018-12-02 14:16:08 -0800
committerKenneth Graunke <[email protected]>2019-02-21 10:26:10 -0800
commit1114f0c1cecc70925c3933ff360e3bcbe994ae9d (patch)
tree6192634549721502396e9e0ccc8636d43360828c /src/gallium
parentc03fbb41aa3c880daae74b9c7a3684565b0bef9d (diff)
iris: CS stall for stream out -> VB
i965 doesn't do this, but I suspect it just stalls a lot and doesn't hit this. Fixes ext_transform_feedback-position render among others.
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/iris/iris_state.c19
1 files changed, 13 insertions, 6 deletions
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 9ef6df7770e..d5cd6ae8201 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -4368,7 +4368,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
* So, we need to do a VF cache invalidate if the buffer for a VB
* slot slot changes [48:32] address bits from the previous time.
*/
- bool need_invalidate = false;
+ unsigned flush_flags = 0;
for (unsigned i = 0; i < cso->num_buffers; i++) {
uint16_t high_bits = 0;
@@ -4379,16 +4379,23 @@ iris_upload_dirty_render_state(struct iris_context *ice,
high_bits = res->bo->gtt_offset >> 32ull;
if (high_bits != ice->state.last_vbo_high_bits[i]) {
- need_invalidate = true;
+ flush_flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
ice->state.last_vbo_high_bits[i] = high_bits;
}
+
+ /* If the buffer was written to by streamout, we may need
+ * to stall so those writes land and become visible to the
+ * vertex fetcher.
+ *
+ * TODO: This may stall more than necessary.
+ */
+ if (res->bind_history & PIPE_BIND_STREAM_OUTPUT)
+ flush_flags |= PIPE_CONTROL_CS_STALL;
}
}
- if (need_invalidate) {
- iris_emit_pipe_control_flush(batch,
- PIPE_CONTROL_VF_CACHE_INVALIDATE);
- }
+ if (flush_flags)
+ iris_emit_pipe_control_flush(batch, flush_flags);
iris_batch_emit(batch, cso->vertex_buffers, sizeof(uint32_t) *
(1 + vb_dwords * cso->num_buffers));