diff options
author | Kenneth Graunke <[email protected]> | 2018-12-02 14:16:08 -0800 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2019-02-21 10:26:10 -0800 |
commit | 1114f0c1cecc70925c3933ff360e3bcbe994ae9d (patch) | |
tree | 6192634549721502396e9e0ccc8636d43360828c /src/gallium | |
parent | c03fbb41aa3c880daae74b9c7a3684565b0bef9d (diff) |
iris: CS stall for stream out -> VB
i965 doesn't do this, but I suspect it just stalls a lot and doesn't hit
this. Fixes ext_transform_feedback-position render among others.
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/iris/iris_state.c | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 9ef6df7770e..d5cd6ae8201 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4368,7 +4368,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, * So, we need to do a VF cache invalidate if the buffer for a VB * slot slot changes [48:32] address bits from the previous time. */ - bool need_invalidate = false; + unsigned flush_flags = 0; for (unsigned i = 0; i < cso->num_buffers; i++) { uint16_t high_bits = 0; @@ -4379,16 +4379,23 @@ iris_upload_dirty_render_state(struct iris_context *ice, high_bits = res->bo->gtt_offset >> 32ull; if (high_bits != ice->state.last_vbo_high_bits[i]) { - need_invalidate = true; + flush_flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; ice->state.last_vbo_high_bits[i] = high_bits; } + + /* If the buffer was written to by streamout, we may need + * to stall so those writes land and become visible to the + * vertex fetcher. + * + * TODO: This may stall more than necessary. + */ + if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) + flush_flags |= PIPE_CONTROL_CS_STALL; } } - if (need_invalidate) { - iris_emit_pipe_control_flush(batch, - PIPE_CONTROL_VF_CACHE_INVALIDATE); - } + if (flush_flags) + iris_emit_pipe_control_flush(batch, flush_flags); iris_batch_emit(batch, cso->vertex_buffers, sizeof(uint32_t) * (1 + vb_dwords * cso->num_buffers)); |