summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2012-01-03 22:01:03 +0100
committerMarek Olšák <[email protected]>2012-01-05 18:29:11 +0100
commitce44bae366ade59fb2dbdfbfe5a1ab8d24518a57 (patch)
tree8c1e2ff281a99acf07364de436b2958db7dedf26
parent2b851526c1c047bba7ebb7e51706b1694f027947 (diff)
u_vbuf: implement another upload codepath which unrolls indices
Improves performance from cca 1 fps to 23 fps in Cogs. This new codepath is not always used, instead, there is a heuristic which determines whether to use it. Using translate for uploads is generally slower than what we have had already, it's a win only in a few cases.
-rw-r--r--src/gallium/auxiliary/util/u_vbuf.c145
-rw-r--r--src/gallium/auxiliary/util/u_vbuf.h2
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c2
3 files changed, 121 insertions, 28 deletions
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index 5dfee42bcbd..08f5c627a25 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -166,7 +166,9 @@ void u_vbuf_destroy(struct u_vbuf *mgrb)
static void
u_vbuf_translate_buffers(struct u_vbuf_priv *mgr, struct translate_key *key,
unsigned vb_mask, unsigned out_vb,
- int start_vertex, unsigned num_vertices)
+ int start_vertex, unsigned num_vertices,
+ int start_index, unsigned num_indices, int min_index,
+ bool unroll_indices)
{
struct translate *tr;
struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
@@ -198,21 +200,65 @@ u_vbuf_translate_buffers(struct u_vbuf_priv *mgr, struct translate_key *key,
PIPE_TRANSFER_READ, &vb_transfer[i]);
}
+ /* Subtract min_index so that indexing with the index buffer works. */
+ if (unroll_indices) {
+ map -= vb->stride * min_index;
+ }
+
tr->set_buffer(tr, i, map, vb->stride, ~0);
}
}
- /* Create and map the output buffer. */
- u_upload_alloc(mgr->b.uploader,
- key->output_stride * start_vertex,
- key->output_stride * num_vertices,
- &out_offset, &out_buffer,
- (void**)&out_map);
+ /* Translate. */
+ if (unroll_indices) {
+ struct pipe_index_buffer *ib = &mgr->b.index_buffer;
+ struct pipe_transfer *transfer = NULL;
+ unsigned offset = ib->offset + start_index * ib->index_size;
+ uint8_t *map;
- out_offset -= key->output_stride * start_vertex;
+ assert(ib->buffer && ib->index_size);
- /* Translate. */
- tr->run(tr, 0, num_vertices, 0, out_map);
+ if (u_vbuf_resource(ib->buffer)->user_ptr) {
+ map = u_vbuf_resource(ib->buffer)->user_ptr + offset;
+ } else {
+ map = pipe_buffer_map_range(mgr->pipe, ib->buffer, offset,
+ num_indices * ib->index_size,
+ PIPE_TRANSFER_READ, &transfer);
+ }
+
+ /* Create and map the output buffer. */
+ u_upload_alloc(mgr->b.uploader, 0,
+ key->output_stride * num_indices,
+ &out_offset, &out_buffer,
+ (void**)&out_map);
+
+ switch (ib->index_size) {
+ case 4:
+ tr->run_elts(tr, (unsigned*)map, num_indices, 0, out_map);
+ break;
+ case 2:
+ tr->run_elts16(tr, (uint16_t*)map, num_indices, 0, out_map);
+ break;
+ case 1:
+ tr->run_elts8(tr, map, num_indices, 0, out_map);
+ break;
+ }
+
+ if (transfer) {
+ pipe_buffer_unmap(mgr->pipe, transfer);
+ }
+ } else {
+ /* Create and map the output buffer. */
+ u_upload_alloc(mgr->b.uploader,
+ key->output_stride * start_vertex,
+ key->output_stride * num_vertices,
+ &out_offset, &out_buffer,
+ (void**)&out_map);
+
+ out_offset -= key->output_stride * start_vertex;
+
+ tr->run(tr, 0, num_vertices, 0, out_map);
+ }
/* Unmap all buffers. */
for (i = 0; i < mgr->b.nr_vertex_buffers; i++) {
@@ -283,7 +329,9 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf_priv *mgr,
static boolean
u_vbuf_translate_begin(struct u_vbuf_priv *mgr,
int start_vertex, unsigned num_vertices,
- int start_instance, unsigned num_instances)
+ int start_instance, unsigned num_instances,
+ int start_index, unsigned num_indices, int min_index,
+ bool unroll_indices)
{
unsigned mask[VB_NUM] = {0};
struct translate_key key[VB_NUM];
@@ -310,16 +358,24 @@ u_vbuf_translate_begin(struct u_vbuf_priv *mgr,
for (i = 0; i < mgr->ve->count; i++) {
unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
- if (!mgr->ve->incompatible_layout_elem[i] &&
- !mgr->incompatible_vb[vb_index]) {
- continue;
- }
-
if (!mgr->b.vertex_buffer[vb_index].stride) {
+ if (!mgr->ve->incompatible_layout_elem[i] &&
+ !mgr->incompatible_vb[vb_index]) {
+ continue;
+ }
mask[VB_CONST] |= 1 << vb_index;
} else if (mgr->ve->ve[i].instance_divisor) {
+ if (!mgr->ve->incompatible_layout_elem[i] &&
+ !mgr->incompatible_vb[vb_index]) {
+ continue;
+ }
mask[VB_INSTANCE] |= 1 << vb_index;
} else {
+ if (!unroll_indices &&
+ !mgr->ve->incompatible_layout_elem[i] &&
+ !mgr->incompatible_vb[vb_index]) {
+ continue;
+ }
mask[VB_VERTEX] |= 1 << vb_index;
}
}
@@ -336,15 +392,16 @@ u_vbuf_translate_begin(struct u_vbuf_priv *mgr,
struct translate_key *k;
struct translate_element *te;
unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
+ bit = 1 << vb_index;
if (!mgr->ve->incompatible_layout_elem[i] &&
- !mgr->incompatible_vb[vb_index]) {
+ !mgr->incompatible_vb[vb_index] &&
+ (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
continue;
}
/* Set type to what we will translate.
* Whether vertex, instance, or constant attribs. */
- bit = 1 << vb_index;
for (type = 0; type < VB_NUM; type++) {
if (mask[type] & bit) {
break;
@@ -376,7 +433,9 @@ u_vbuf_translate_begin(struct u_vbuf_priv *mgr,
if (key[type].nr_elements) {
u_vbuf_translate_buffers(mgr, &key[type], mask[type],
mgr->fallback_vbs[type],
- start[type], num[type]);
+ start[type], num[type],
+ start_index, num_indices, min_index,
+ unroll_indices && type == VB_VERTEX);
/* Fixup the stride for constant attribs. */
if (type == VB_CONST) {
@@ -917,11 +976,12 @@ static void u_vbuf_get_minmax_index(struct pipe_context *pipe,
enum u_vbuf_return_flags
u_vbuf_draw_begin(struct u_vbuf *mgrb,
- const struct pipe_draw_info *info)
+ struct pipe_draw_info *info)
{
struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
- int start_vertex;
+ int start_vertex, min_index;
unsigned num_vertices;
+ bool unroll_indices = false;
if (!mgr->incompatible_vb_layout &&
!mgr->ve->incompatible_layout &&
@@ -930,7 +990,7 @@ u_vbuf_draw_begin(struct u_vbuf *mgrb,
}
if (info->indexed) {
- int min_index, max_index;
+ int max_index;
bool index_bounds_valid = false;
if (info->max_index != ~0) {
@@ -950,6 +1010,17 @@ u_vbuf_draw_begin(struct u_vbuf *mgrb,
start_vertex = min_index + info->index_bias;
num_vertices = max_index + 1 - min_index;
+
+ /* Primitive restart doesn't work when unrolling indices.
+ * We would have to break this drawing operation into several ones. */
+ /* Use some heuristic to see if unrolling indices improves
+ * performance. */
+ if (!info->primitive_restart &&
+ num_vertices > info->count*2 &&
+ num_vertices-info->count > 32) {
+ /*printf("num_vertices=%i count=%i\n", num_vertices, info->count);*/
+ unroll_indices = true;
+ }
} else {
/* Nothing to do for per-vertex attribs. */
start_vertex = 0;
@@ -959,13 +1030,18 @@ u_vbuf_draw_begin(struct u_vbuf *mgrb,
} else {
start_vertex = info->start;
num_vertices = info->count;
+ min_index = 0;
}
/* Translate vertices with non-native layouts or formats. */
- if (mgr->incompatible_vb_layout || mgr->ve->incompatible_layout) {
+ if (unroll_indices ||
+ mgr->incompatible_vb_layout ||
+ mgr->ve->incompatible_layout) {
/* XXX check the return value */
u_vbuf_translate_begin(mgr, start_vertex, num_vertices,
- info->start_instance, info->instance_count);
+ info->start_instance, info->instance_count,
+ info->start, info->count, min_index,
+ unroll_indices);
}
/* Upload user buffers. */
@@ -974,7 +1050,15 @@ u_vbuf_draw_begin(struct u_vbuf *mgrb,
info->start_instance, info->instance_count);
}
- /*unsigned i;
+ /*
+ if (unroll_indices) {
+ printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
+ start_vertex, num_vertices);
+ util_dump_draw_info(stdout, info);
+ printf("\n");
+ }
+
+ unsigned i;
for (i = 0; i < mgr->b.nr_vertex_buffers; i++) {
printf("input %i: ", i);
util_dump_vertex_buffer(stdout, mgr->b.vertex_buffer+i);
@@ -984,7 +1068,16 @@ u_vbuf_draw_begin(struct u_vbuf *mgrb,
printf("real %i: ", i);
util_dump_vertex_buffer(stdout, mgr->b.real_vertex_buffer+i);
printf("\n");
- }*/
+ }
+ */
+
+ if (unroll_indices) {
+ info->indexed = FALSE;
+ info->index_bias = 0;
+ info->min_index = 0;
+ info->max_index = info->count - 1;
+ info->start = 0;
+ }
return U_VBUF_BUFFERS_UPDATED;
}
diff --git a/src/gallium/auxiliary/util/u_vbuf.h b/src/gallium/auxiliary/util/u_vbuf.h
index 57b93ddea6b..3669c9b874a 100644
--- a/src/gallium/auxiliary/util/u_vbuf.h
+++ b/src/gallium/auxiliary/util/u_vbuf.h
@@ -130,7 +130,7 @@ void u_vbuf_set_index_buffer(struct u_vbuf *mgr,
const struct pipe_index_buffer *ib);
enum u_vbuf_return_flags u_vbuf_draw_begin(struct u_vbuf *mgr,
- const struct pipe_draw_info *info);
+ struct pipe_draw_info *info);
unsigned u_vbuf_draw_max_vertex_count(struct u_vbuf *mgr);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 9f6f5142c09..054ab90595c 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -605,7 +605,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
r600_update_derived_state(rctx);
- u_vbuf_draw_begin(rctx->vbuf_mgr, dinfo);
+ u_vbuf_draw_begin(rctx->vbuf_mgr, &info);
r600_vertex_buffer_update(rctx);
rdraw.vgt_num_indices = info.count;