From 53dddab78c9bc7fbfd78bf23284ec6d92b70e93b Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 9 Dec 2015 19:53:18 +0100 Subject: nv50,nvc0: optimize coherent buffer checking at draw time Instead of iterating over all the buffer resources looking for coherent buffers, we keep track of a context-wide count. This will save some iterations (and CPU cycles) in 99.99% case because usually coherent buffers are not so used. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/nv50/nv50_context.h | 3 ++ src/gallium/drivers/nouveau/nv50/nv50_state.c | 25 +++++++++++++++ src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 42 +++++-------------------- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 3 ++ src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 36 +++++++++++++++++++++ src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 41 +++++------------------- 6 files changed, 82 insertions(+), 68 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 2cebcd99423..712d00ed2d3 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -134,9 +134,11 @@ struct nv50_context { struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[3]; uint16_t constbuf_valid[3]; + uint16_t constbuf_coherent[3]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned num_vtxbufs; + uint32_t vtxbufs_coherent; struct pipe_index_buffer idxbuf; uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ @@ -148,6 +150,7 @@ struct nv50_context { struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; unsigned num_textures[3]; + uint32_t textures_coherent[3]; struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; unsigned num_samplers[3]; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index de655971b66..cb040439139 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -664,6 +664,17 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, if (old) nv50_screen_tic_unlock(nv50->screen, old); + if (views[i] && views[i]->texture) { + struct pipe_resource *res = views[i]->texture; + if (res->target == PIPE_BUFFER && + (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) + nv50->textures_coherent[s] |= 1 << i; + else + nv50->textures_coherent[s] &= ~(1 << i); + } else { + nv50->textures_coherent[s] &= ~(1 << i); + } + pipe_sampler_view_reference(&nv50->textures[s][i], views[i]); } @@ -847,13 +858,19 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nv50->constbuf[s][i].u.data = cb->user_buffer; nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000); nv50->constbuf_valid[s] |= 1 << i; + nv50->constbuf_coherent[s] &= ~(1 << i); } else if (res) { nv50->constbuf[s][i].offset = cb->buffer_offset; nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000); nv50->constbuf_valid[s] |= 1 << i; + if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) + nv50->constbuf_coherent[s] |= 1 << i; + else + nv50->constbuf_coherent[s] &= ~(1 << i); } else { nv50->constbuf_valid[s] &= ~(1 << i); + nv50->constbuf_coherent[s] &= ~(1 << i); } nv50->constbuf_dirty[s] |= 1 << i; @@ -1003,6 +1020,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, if (!vb) { nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot); nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot); + nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot); return; } @@ -1015,9 +1033,16 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, nv50->vbo_constant |= 1 << dst_index; else nv50->vbo_constant &= ~(1 << dst_index); + nv50->vtxbufs_coherent &= ~(1 << dst_index); } else { nv50->vbo_user &= ~(1 << dst_index); nv50->vbo_constant &= ~(1 << dst_index); + + if (vb[i].buffer && + vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) + nv50->vtxbufs_coherent |= (1 << dst_index); + else + nv50->vtxbufs_coherent &= ~(1 << dst_index); } } } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 2d1aa6abcd2..60fa2bc06a8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -765,7 +765,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; bool tex_dirty = false; - int i, s; + int s; /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ nv50->vb_elt_first = info->min_index + info->index_bias; @@ -794,27 +794,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) push->kick_notify = nv50_draw_vbo_kick_notify; - /* TODO: Instead of iterating over all the buffer resources looking for - * coherent buffers, keep track of a context-wide count. - */ for (s = 0; s < 3 && !nv50->cb_dirty; ++s) { - uint32_t valid = nv50->constbuf_valid[s]; - - while (valid && !nv50->cb_dirty) { - const unsigned i = ffs(valid) - 1; - struct pipe_resource *res; - - valid &= ~(1 << i); - if (nv50->constbuf[s][i].user) - continue; - - res = nv50->constbuf[s][i].u.buf; - if (!res) - continue; - - if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) - nv50->cb_dirty = true; - } + if (nv50->constbuf_coherent[s]) + nv50->cb_dirty = true; } /* If there are any coherent constbufs, flush the cache */ @@ -825,15 +807,10 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } for (s = 0; s < 3 && !tex_dirty; ++s) { - for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) { - if (!nv50->textures[s][i] || - nv50->textures[s][i]->texture->target != PIPE_BUFFER) - continue; - if (nv50->textures[s][i]->texture->flags & - PIPE_RESOURCE_FLAG_MAP_COHERENT) - tex_dirty = true; - } + if (nv50->textures_coherent[s]) + tex_dirty = true; } + if (tex_dirty) { BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, 0x20); @@ -853,12 +830,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) PUSH_DATA (push, info->start_instance); } - for (i = 0; i < nv50->num_vtxbufs && !nv50->base.vbo_dirty; ++i) { - if (!nv50->vtxbuf[i].buffer) - continue; - if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) - nv50->base.vbo_dirty = true; - } + nv50->base.vbo_dirty |= !!nv50->vtxbufs_coherent; if (nv50->base.vbo_dirty) { BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 39b73ecb0c2..12195489691 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -134,10 +134,12 @@ struct nvc0_context { struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[6]; uint16_t constbuf_valid[6]; + uint16_t constbuf_coherent[6]; bool cb_dirty; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned num_vtxbufs; + uint32_t vtxbufs_coherent; struct pipe_index_buffer idxbuf; uint32_t constant_vbos; uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ @@ -149,6 +151,7 @@ struct nvc0_context { struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS]; unsigned num_textures[6]; uint32_t textures_dirty[6]; + uint32_t textures_coherent[6]; struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS]; unsigned num_samplers[6]; uint16_t samplers_dirty[6]; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 41a824a97a0..24a6c222dd5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -554,6 +554,17 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, continue; nvc0->textures_dirty[s] |= 1 << i; + if (views[i] && views[i]->texture) { + struct pipe_resource *res = views[i]->texture; + if (res->target == PIPE_BUFFER && + (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) + nvc0->textures_coherent[s] |= 1 << i; + else + nvc0->textures_coherent[s] &= ~(1 << i); + } else { + nvc0->textures_coherent[s] &= ~(1 << i); + } + if (old) { nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); nvc0_screen_tic_unlock(nvc0->screen, old); @@ -596,6 +607,17 @@ nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s, continue; nvc0->textures_dirty[s] |= 1 << i; + if (views[p] && views[p]->texture) { + struct pipe_resource *res = views[p]->texture; + if (res->target == PIPE_BUFFER && + (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) + nvc0->textures_coherent[s] |= 1 << i; + else + nvc0->textures_coherent[s] &= ~(1 << i); + } else { + nvc0->textures_coherent[s] &= ~(1 << i); + } + if (nvc0->textures[s][i]) { struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); nouveau_bufctx_reset(bctx, bin + i); @@ -842,14 +864,20 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nvc0->constbuf[s][i].u.data = cb->user_buffer; nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000); nvc0->constbuf_valid[s] |= 1 << i; + nvc0->constbuf_coherent[s] &= ~(1 << i); } else if (cb) { nvc0->constbuf[s][i].offset = cb->buffer_offset; nvc0->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000); nvc0->constbuf_valid[s] |= 1 << i; + if (res && res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) + nvc0->constbuf_coherent[s] |= 1 << i; + else + nvc0->constbuf_coherent[s] &= ~(1 << i); } else { nvc0->constbuf_valid[s] &= ~(1 << i); + nvc0->constbuf_coherent[s] &= ~(1 << i); } } @@ -1009,6 +1037,7 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, if (!vb) { nvc0->vbo_user &= ~(((1ull << count) - 1) << start_slot); nvc0->constant_vbos &= ~(((1ull << count) - 1) << start_slot); + nvc0->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot); return; } @@ -1021,9 +1050,16 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, nvc0->constant_vbos |= 1 << dst_index; else nvc0->constant_vbos &= ~(1 << dst_index); + nvc0->vtxbufs_coherent &= ~(1 << dst_index); } else { nvc0->vbo_user &= ~(1 << dst_index); nvc0->constant_vbos &= ~(1 << dst_index); + + if (vb[i].buffer && + vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) + nvc0->vtxbufs_coherent |= (1 << dst_index); + else + nvc0->vtxbufs_coherent &= ~(1 << dst_index); } } } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 235b1afc24b..251753357eb 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -871,7 +871,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; - int i, s; + int s; /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ nvc0->vb_elt_first = info->min_index + info->index_bias; @@ -922,27 +922,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) push->kick_notify = nvc0_draw_vbo_kick_notify; - /* TODO: Instead of iterating over all the buffer resources looking for - * coherent buffers, keep track of a context-wide count. - */ for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) { - uint32_t valid = nvc0->constbuf_valid[s]; - - while (valid && !nvc0->cb_dirty) { - const unsigned i = ffs(valid) - 1; - struct pipe_resource *res; - - valid &= ~(1 << i); - if (nvc0->constbuf[s][i].user) - continue; - - res = nvc0->constbuf[s][i].u.buf; - if (!res) - continue; - - if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) - nvc0->cb_dirty = true; - } + if (nvc0->constbuf_coherent[s]) + nvc0->cb_dirty = true; } if (nvc0->cb_dirty) { @@ -951,14 +933,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } for (s = 0; s < 5; ++s) { + if (!nvc0->textures_coherent[s]) + continue; + for (int i = 0; i < nvc0->num_textures[s]; ++i) { struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); - struct pipe_resource *res; - if (!tic) - continue; - res = nvc0->textures[s][i]->texture; - if (res->target != PIPE_BUFFER || - !(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) + if (!(nvc0->textures_coherent[s] & (1 << i))) continue; BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); @@ -984,12 +964,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) PUSH_DATA (push, info->start_instance); } - for (i = 0; i < nvc0->num_vtxbufs && !nvc0->base.vbo_dirty; ++i) { - if (!nvc0->vtxbuf[i].buffer) - continue; - if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) - nvc0->base.vbo_dirty = true; - } + nvc0->base.vbo_dirty |= !!nvc0->vtxbufs_coherent; if (!nvc0->base.vbo_dirty && nvc0->idxbuf.buffer && nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) -- cgit v1.2.3