diff options
author | Samuel Pitoiset <[email protected]> | 2015-12-09 19:53:18 +0100 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2016-01-03 12:17:05 +0100 |
commit | 53dddab78c9bc7fbfd78bf23284ec6d92b70e93b (patch) | |
tree | 91724b558e504d80dba49c0ab68c8d9f8bd2cca8 /src/gallium/drivers/nouveau/nv50 | |
parent | 28dea2662699072715f67ef34e910d278f88f3b1 (diff) |
nv50,nvc0: optimize coherent buffer checking at draw time
Instead of iterating over all the buffer resources looking for coherent
buffers, we keep track of a context-wide count. This will save some
iterations (and CPU cycles) in 99.99% case because usually coherent
buffers are not so used.
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/nv50')
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_context.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_state.c | 25 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 42 |
3 files changed, 35 insertions, 35 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 2cebcd99423..712d00ed2d3 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -134,9 +134,11 @@ struct nv50_context { struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[3]; uint16_t constbuf_valid[3]; + uint16_t constbuf_coherent[3]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned num_vtxbufs; + uint32_t vtxbufs_coherent; struct pipe_index_buffer idxbuf; uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ @@ -148,6 +150,7 @@ struct nv50_context { struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; unsigned num_textures[3]; + uint32_t textures_coherent[3]; struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; unsigned num_samplers[3]; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index de655971b66..cb040439139 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -664,6 +664,17 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, if (old) nv50_screen_tic_unlock(nv50->screen, old); + if (views[i] && views[i]->texture) { + struct pipe_resource *res = views[i]->texture; + if (res->target == PIPE_BUFFER && + (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) + nv50->textures_coherent[s] |= 1 << i; + else + nv50->textures_coherent[s] &= ~(1 << i); + } else { + nv50->textures_coherent[s] &= ~(1 << i); + } + pipe_sampler_view_reference(&nv50->textures[s][i], views[i]); } @@ -847,13 +858,19 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nv50->constbuf[s][i].u.data = cb->user_buffer; nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000); nv50->constbuf_valid[s] |= 1 << i; + nv50->constbuf_coherent[s] &= ~(1 << i); } else if (res) { nv50->constbuf[s][i].offset = cb->buffer_offset; nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000); nv50->constbuf_valid[s] |= 1 << i; + if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) + nv50->constbuf_coherent[s] |= 1 << i; + else + nv50->constbuf_coherent[s] &= ~(1 << i); } else { nv50->constbuf_valid[s] &= ~(1 << i); + nv50->constbuf_coherent[s] &= ~(1 << i); } nv50->constbuf_dirty[s] |= 1 << i; @@ -1003,6 +1020,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, if (!vb) { nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot); nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot); + nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot); return; } @@ -1015,9 +1033,16 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, nv50->vbo_constant |= 1 << dst_index; else nv50->vbo_constant &= ~(1 << dst_index); + nv50->vtxbufs_coherent &= ~(1 << dst_index); } else { nv50->vbo_user &= ~(1 << dst_index); nv50->vbo_constant &= ~(1 << dst_index); + + if (vb[i].buffer && + vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) + nv50->vtxbufs_coherent |= (1 << dst_index); + else + nv50->vtxbufs_coherent &= ~(1 << dst_index); } } } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 2d1aa6abcd2..60fa2bc06a8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -765,7 +765,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; bool tex_dirty = false; - int i, s; + int s; /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ nv50->vb_elt_first = info->min_index + info->index_bias; @@ -794,27 +794,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) push->kick_notify = nv50_draw_vbo_kick_notify; - /* TODO: Instead of iterating over all the buffer resources looking for - * coherent buffers, keep track of a context-wide count. - */ for (s = 0; s < 3 && !nv50->cb_dirty; ++s) { - uint32_t valid = nv50->constbuf_valid[s]; - - while (valid && !nv50->cb_dirty) { - const unsigned i = ffs(valid) - 1; - struct pipe_resource *res; - - valid &= ~(1 << i); - if (nv50->constbuf[s][i].user) - continue; - - res = nv50->constbuf[s][i].u.buf; - if (!res) - continue; - - if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) - nv50->cb_dirty = true; - } + if (nv50->constbuf_coherent[s]) + nv50->cb_dirty = true; } /* If there are any coherent constbufs, flush the cache */ @@ -825,15 +807,10 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } for (s = 0; s < 3 && !tex_dirty; ++s) { - for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) { - if (!nv50->textures[s][i] || - nv50->textures[s][i]->texture->target != PIPE_BUFFER) - continue; - if (nv50->textures[s][i]->texture->flags & - PIPE_RESOURCE_FLAG_MAP_COHERENT) - tex_dirty = true; - } + if (nv50->textures_coherent[s]) + tex_dirty = true; } + if (tex_dirty) { BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, 0x20); @@ -853,12 +830,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) PUSH_DATA (push, info->start_instance); } - for (i = 0; i < nv50->num_vtxbufs && !nv50->base.vbo_dirty; ++i) { - if (!nv50->vtxbuf[i].buffer) - continue; - if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) - nv50->base.vbo_dirty = true; - } + nv50->base.vbo_dirty |= !!nv50->vtxbufs_coherent; if (nv50->base.vbo_dirty) { BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); |