diff options
author | Marek Olšák <[email protected]> | 2012-03-29 17:51:50 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2012-10-31 00:55:13 +0100 |
commit | e73bf3b805de78299f1a652668ba4e6eab9bac94 (patch) | |
tree | 11839d343c6aad3a8fb5c594cadf961288adbea0 /src/gallium/auxiliary/util/u_vbuf.c | |
parent | a7c5be098aee3a8228cbd95558bac29cb7ff6a3d (diff) |
gallium: add start_slot parameter to set_vertex_buffers
This allows updating only a subrange of buffer bindings.
set_vertex_buffers(pipe, start_slot, count, NULL) unbinds buffers in that
range. Binding NULL resources unbinds buffers too (both buffer and user_buffer
must be NULL).
The meta ops are adapted to only save, change, and restore the single slot
they use. The cso_context can save and restore only one vertex buffer slot.
The clients can query which one it is using cso_get_aux_vertex_buffer_slot.
It's currently set to 0. (the Draw module breaks if it's set to non-zero)
It should decrease the CPU overhead when using a lot of meta ops, but
the drivers must be able to treat each vertex buffer slot as a separate
state (only r600g does so at the moment).
I can imagine this also being useful for optimizing some OpenGL use cases.
Reviewed-by: Brian Paul <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/util/u_vbuf.c')
-rw-r--r-- | src/gallium/auxiliary/util/u_vbuf.c | 223 |
1 files changed, 137 insertions, 86 deletions
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index 1cc83c3dd39..7fb492882f5 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -89,17 +89,17 @@ struct u_vbuf { /* This is what was set in set_vertex_buffers. * May contain user buffers. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_buffers; + uint32_t enabled_vb_mask; - /* Saved vertex buffers. */ - struct pipe_vertex_buffer vertex_buffer_saved[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_buffers_saved; + /* Saved vertex buffer. */ + unsigned aux_vertex_buffer_slot; + struct pipe_vertex_buffer aux_vertex_buffer_saved; /* Vertex buffers for the driver. - * There are no user buffers. */ + * There are usually no user buffers. */ struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nr_real_vertex_buffers; - boolean vertex_buffers_dirty; + uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last + call of set_vertex_buffers */ /* The index buffer. */ struct pipe_index_buffer index_buffer; @@ -174,11 +174,12 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps) struct u_vbuf * u_vbuf_create(struct pipe_context *pipe, - struct u_vbuf_caps *caps) + struct u_vbuf_caps *caps, unsigned aux_vertex_buffer_index) { struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf); mgr->caps = *caps; + mgr->aux_vertex_buffer_slot = aux_vertex_buffer_index; mgr->pipe = pipe; mgr->cso_cache = cso_cache_create(); mgr->translate_cache = translate_cache_create(); @@ -239,16 +240,20 @@ void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count, void u_vbuf_destroy(struct u_vbuf *mgr) { + struct pipe_screen *screen = mgr->pipe->screen; unsigned i; + unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX, + PIPE_SHADER_CAP_MAX_INPUTS); - mgr->pipe->set_vertex_buffers(mgr->pipe, 0, NULL); + mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL); - for (i = 0; i < mgr->nr_vertex_buffers; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { pipe_resource_reference(&mgr->vertex_buffer[i].buffer, NULL); } - for (i = 0; i < mgr->nr_real_vertex_buffers; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { pipe_resource_reference(&mgr->real_vertex_buffer[i].buffer, NULL); } + pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL); translate_cache_destroy(mgr->translate_cache); u_upload_destroy(mgr->uploader); @@ -267,39 +272,42 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; struct pipe_resource *out_buffer = NULL; uint8_t *out_map; - unsigned i, out_offset; + unsigned out_offset, mask; /* Get a translate object. */ tr = translate_cache_find(mgr->translate_cache, key); /* Map buffers we want to translate. */ - for (i = 0; i < mgr->nr_vertex_buffers; i++) { - if (vb_mask & (1 << i)) { - struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[i]; - unsigned offset = vb->buffer_offset + vb->stride * start_vertex; - uint8_t *map; + mask = vb_mask; + while (mask) { + struct pipe_vertex_buffer *vb; + unsigned offset; + uint8_t *map; + unsigned i = u_bit_scan(&mask); - if (vb->user_buffer) { - map = (uint8_t*)vb->user_buffer + offset; - } else { - unsigned size = vb->stride ? num_vertices * vb->stride - : sizeof(double)*4; + vb = &mgr->vertex_buffer[i]; + offset = vb->buffer_offset + vb->stride * start_vertex; - if (offset+size > vb->buffer->width0) { - size = vb->buffer->width0 - offset; - } + if (vb->user_buffer) { + map = (uint8_t*)vb->user_buffer + offset; + } else { + unsigned size = vb->stride ? num_vertices * vb->stride + : sizeof(double)*4; - map = pipe_buffer_map_range(mgr->pipe, vb->buffer, offset, size, - PIPE_TRANSFER_READ, &vb_transfer[i]); + if (offset+size > vb->buffer->width0) { + size = vb->buffer->width0 - offset; } - /* Subtract min_index so that indexing with the index buffer works. */ - if (unroll_indices) { - map -= vb->stride * min_index; - } + map = pipe_buffer_map_range(mgr->pipe, vb->buffer, offset, size, + PIPE_TRANSFER_READ, &vb_transfer[i]); + } - tr->set_buffer(tr, i, map, vb->stride, ~0); + /* Subtract min_index so that indexing with the index buffer works. */ + if (unroll_indices) { + map -= vb->stride * min_index; } + + tr->set_buffer(tr, i, map, vb->stride, ~0); } /* Translate. */ @@ -354,7 +362,10 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, } /* Unmap all buffers. */ - for (i = 0; i < mgr->nr_vertex_buffers; i++) { + mask = vb_mask; + while (mask) { + unsigned i = u_bit_scan(&mask); + if (vb_transfer[i]) { pipe_buffer_unmap(mgr->pipe, vb_transfer[i]); } @@ -379,7 +390,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, /* Set the bit for each buffer which is incompatible, or isn't set. */ uint32_t unused_vb_mask = mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | - ~((1 << mgr->nr_vertex_buffers) - 1); + ~mgr->enabled_vb_mask; memset(fallback_vbs, ~0, sizeof(fallback_vbs)); @@ -389,20 +400,21 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, uint32_t index; if (!unused_vb_mask) { - /* fail, reset the number to its original value */ - mgr->nr_real_vertex_buffers = mgr->nr_vertex_buffers; return FALSE; } index = ffs(unused_vb_mask) - 1; fallback_vbs[type] = index; - if (index >= mgr->nr_real_vertex_buffers) { - mgr->nr_real_vertex_buffers = index + 1; - } /*printf("found slot=%i for type=%i\n", index, type);*/ } } + for (type = 0; type < VB_NUM; type++) { + if (mask[type]) { + mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type]; + } + } + memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs)); return TRUE; } @@ -568,9 +580,11 @@ static void u_vbuf_translate_end(struct u_vbuf *mgr) if (vb != ~0) { pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer, NULL); mgr->fallback_vbs[i] = ~0; + + /* This will cause the buffer to be unbound in the driver later. */ + mgr->dirty_real_vb_mask |= 1 << vb; } } - mgr->nr_real_vertex_buffers = mgr->nr_vertex_buffers; } #define FORMAT_REPLACE(what, withwhat) \ @@ -700,45 +714,77 @@ static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso) FREE(ve); } -void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, unsigned count, +void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, + unsigned start_slot, unsigned count, const struct pipe_vertex_buffer *bufs) { unsigned i; + /* which buffers are enabled */ + uint32_t enabled_vb_mask = 0; + /* which buffers are in user memory */ + uint32_t user_vb_mask = 0; + /* which buffers are incompatible with the driver */ + uint32_t incompatible_vb_mask = 0; + /* which buffers have a non-zero stride */ + uint32_t nonzero_stride_vb_mask = 0; + uint32_t mask = ~(((1ull << count) - 1) << start_slot); + + /* Zero out the bits we are going to rewrite completely. */ + mgr->user_vb_mask &= mask; + mgr->incompatible_vb_mask &= mask; + mgr->nonzero_stride_vb_mask &= mask; + mgr->enabled_vb_mask &= mask; + + if (!bufs) { + struct pipe_context *pipe = mgr->pipe; + /* Unbind. */ + mgr->dirty_real_vb_mask &= mask; - mgr->user_vb_mask = 0; - mgr->incompatible_vb_mask = 0; - mgr->nonzero_stride_vb_mask = 0; + for (i = 0; i < count; i++) { + unsigned dst_index = start_slot + i; + + pipe_resource_reference(&mgr->vertex_buffer[dst_index].buffer, NULL); + pipe_resource_reference(&mgr->real_vertex_buffer[dst_index].buffer, + NULL); + } + + pipe->set_vertex_buffers(pipe, start_slot, count, NULL); + return; + } for (i = 0; i < count; i++) { + unsigned dst_index = start_slot + i; const struct pipe_vertex_buffer *vb = &bufs[i]; - struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[i]; - struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[i]; + struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index]; + struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index]; + + if (!vb->buffer && !vb->user_buffer) { + pipe_resource_reference(&orig_vb->buffer, NULL); + pipe_resource_reference(&real_vb->buffer, NULL); + real_vb->user_buffer = NULL; + continue; + } pipe_resource_reference(&orig_vb->buffer, vb->buffer); orig_vb->user_buffer = vb->user_buffer; real_vb->buffer_offset = orig_vb->buffer_offset = vb->buffer_offset; real_vb->stride = orig_vb->stride = vb->stride; - real_vb->user_buffer = NULL; if (vb->stride) { - mgr->nonzero_stride_vb_mask |= 1 << i; - } - - if (!vb->buffer && !vb->user_buffer) { - pipe_resource_reference(&real_vb->buffer, NULL); - continue; + nonzero_stride_vb_mask |= 1 << dst_index; } + enabled_vb_mask |= 1 << dst_index; if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) || (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) { - mgr->incompatible_vb_mask |= 1 << i; + incompatible_vb_mask |= 1 << dst_index; pipe_resource_reference(&real_vb->buffer, NULL); continue; } if (!mgr->caps.user_vertex_buffers && vb->user_buffer) { - mgr->user_vb_mask |= 1 << i; + user_vb_mask |= 1 << dst_index; pipe_resource_reference(&real_vb->buffer, NULL); continue; } @@ -747,16 +793,14 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, unsigned count, real_vb->user_buffer = vb->user_buffer; } - for (i = count; i < mgr->nr_vertex_buffers; i++) { - pipe_resource_reference(&mgr->vertex_buffer[i].buffer, NULL); - } - for (i = count; i < mgr->nr_real_vertex_buffers; i++) { - pipe_resource_reference(&mgr->real_vertex_buffer[i].buffer, NULL); - } + mgr->user_vb_mask |= user_vb_mask; + mgr->incompatible_vb_mask |= incompatible_vb_mask; + mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask; + mgr->enabled_vb_mask |= enabled_vb_mask; - mgr->nr_vertex_buffers = count; - mgr->nr_real_vertex_buffers = count; - mgr->vertex_buffers_dirty = TRUE; + /* All changed buffers are marked as dirty, even the NULL ones, + * which will cause the NULL buffers to be unbound in the driver later. */ + mgr->dirty_real_vb_mask |= ~mask; } void u_vbuf_set_index_buffer(struct u_vbuf *mgr, @@ -782,7 +826,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr, { unsigned i; unsigned nr_velems = mgr->ve->count; - unsigned nr_vbufs = mgr->nr_vertex_buffers; + unsigned nr_vbufs = util_last_bit(mgr->enabled_vb_mask); struct pipe_vertex_element *velems = mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve; unsigned start_offset[PIPE_MAX_ATTRIBS]; @@ -978,6 +1022,19 @@ static void u_vbuf_get_minmax_index(struct pipe_context *pipe, } } +static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr) +{ + struct pipe_context *pipe = mgr->pipe; + unsigned start_slot, count; + + start_slot = ffs(mgr->dirty_real_vb_mask) - 1; + count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot); + + pipe->set_vertex_buffers(pipe, start_slot, count, + mgr->real_vertex_buffer + start_slot); + mgr->dirty_real_vb_mask = 0; +} + void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) { struct pipe_context *pipe = mgr->pipe; @@ -990,11 +1047,10 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) if (!mgr->incompatible_vb_mask && !mgr->ve->incompatible_elem_mask && !user_vb_mask) { + /* Set vertex buffers if needed. */ - if (mgr->vertex_buffers_dirty) { - pipe->set_vertex_buffers(pipe, mgr->nr_real_vertex_buffers, - mgr->real_vertex_buffer); - mgr->vertex_buffers_dirty = FALSE; + if (mgr->dirty_real_vb_mask) { + u_vbuf_set_driver_vertex_buffers(mgr); } pipe->draw_vbo(pipe, info); @@ -1062,6 +1118,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) if (user_vb_mask) { u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, info->start_instance, info->instance_count); + mgr->dirty_real_vb_mask |= user_vb_mask; } /* @@ -1086,8 +1143,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) */ u_upload_unmap(mgr->uploader); - pipe->set_vertex_buffers(pipe, mgr->nr_real_vertex_buffers, - mgr->real_vertex_buffer); + u_vbuf_set_driver_vertex_buffers(mgr); if (unlikely(unroll_indices)) { struct pipe_draw_info new_info = *info; @@ -1105,7 +1161,6 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) if (mgr->using_translate) { u_vbuf_translate_end(mgr); } - mgr->vertex_buffers_dirty = TRUE; } void u_vbuf_save_vertex_elements(struct u_vbuf *mgr) @@ -1126,22 +1181,18 @@ void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr) mgr->ve_saved = NULL; } -void u_vbuf_save_vertex_buffers(struct u_vbuf *mgr) +void u_vbuf_save_aux_vertex_buffer_slot(struct u_vbuf *mgr) { - util_copy_vertex_buffers(mgr->vertex_buffer_saved, - &mgr->nr_vertex_buffers_saved, - mgr->vertex_buffer, - mgr->nr_vertex_buffers); + struct pipe_vertex_buffer *vb = + &mgr->vertex_buffer[mgr->aux_vertex_buffer_slot]; + + pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, vb->buffer); + memcpy(&mgr->aux_vertex_buffer_saved, vb, sizeof(*vb)); } -void u_vbuf_restore_vertex_buffers(struct u_vbuf *mgr) +void u_vbuf_restore_aux_vertex_buffer_slot(struct u_vbuf *mgr) { - unsigned i; - - u_vbuf_set_vertex_buffers(mgr, mgr->nr_vertex_buffers_saved, - mgr->vertex_buffer_saved); - for (i = 0; i < mgr->nr_vertex_buffers_saved; i++) { - pipe_resource_reference(&mgr->vertex_buffer_saved[i].buffer, NULL); - } - mgr->nr_vertex_buffers_saved = 0; + u_vbuf_set_vertex_buffers(mgr, mgr->aux_vertex_buffer_slot, 1, + &mgr->aux_vertex_buffer_saved); + pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL); } |