summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2010-10-31 23:51:30 +0100
committerFrancisco Jerez <[email protected]>2010-11-06 01:59:59 +0100
commit2e64c2209e7f9d5acbcc9d70bf315732f3c403b3 (patch)
treedb64ff8d903ccee8c3cf0c8b894d5b313cc96fce /src/mesa
parentf1600d3a9725803f0526fb3fd673787307539d27 (diff)
vbo: Avoid unnecessary copy to/from current in vertex format upgrade.
Rebuilding the vertex format from scratch every time we see a new vertex attribute is rather costly, new attributes can be appended at the end avoiding a copy to current and then back again, and the full attr pointer recalculation. In the not so likely case of an already existing attribute having its size increased the old behavior is preserved, this could be optimized more, not sure if it's worth it. It's a modest improvement in FlightGear (that game punishes the VBO module pretty hard in general, framerate goes from some 46 FPS to 50 FPS with the nouveau classic driver). Signed-off-by: Brian Paul <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/vbo/vbo_exec_api.c98
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c13
2 files changed, 63 insertions, 48 deletions
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 4988c51d44e..9b2d59f9e4c 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -220,8 +220,9 @@ static void vbo_exec_wrap_upgrade_vertex( struct vbo_exec_context *exec,
struct gl_context *ctx = exec->ctx;
struct vbo_context *vbo = vbo_context(ctx);
GLint lastcount = exec->vtx.vert_count;
- GLfloat *tmp;
- GLuint oldsz;
+ GLfloat *old_attrptr[VBO_ATTRIB_MAX];
+ GLuint old_vtx_size = exec->vtx.vertex_size;
+ GLuint oldsz = exec->vtx.attrsz[attr];
GLuint i;
/* Run pipeline on current vertices, copy wrapped vertices
@@ -229,86 +230,103 @@ static void vbo_exec_wrap_upgrade_vertex( struct vbo_exec_context *exec,
*/
vbo_exec_wrap_buffers( exec );
+ if (unlikely(exec->vtx.copied.nr)) {
+ /* We're in the middle of a primitive, keep the old vertex
+ * format around to be able to translate the copied vertices to
+ * the new format.
+ */
+ memcpy(old_attrptr, exec->vtx.attrptr, sizeof(old_attrptr));
+ }
- /* Do a COPY_TO_CURRENT to ensure back-copying works for the case
- * when the attribute already exists in the vertex and is having
- * its size increased.
- */
- vbo_exec_copy_to_current( exec );
-
+ if (unlikely(oldsz)) {
+ /* Do a COPY_TO_CURRENT to ensure back-copying works for the
+ * case when the attribute already exists in the vertex and is
+ * having its size increased.
+ */
+ vbo_exec_copy_to_current( exec );
+ }
/* Heuristic: Attempt to isolate attributes received outside
* begin/end so that they don't bloat the vertices.
*/
if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END &&
- exec->vtx.attrsz[attr] == 0 &&
- lastcount > 8 &&
- exec->vtx.vertex_size) {
+ !oldsz && lastcount > 8 && exec->vtx.vertex_size) {
+ vbo_exec_copy_to_current( exec );
reset_attrfv( exec );
}
/* Fix up sizes:
*/
- oldsz = exec->vtx.attrsz[attr];
exec->vtx.attrsz[attr] = newsz;
-
exec->vtx.vertex_size += newsz - oldsz;
exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
(exec->vtx.vertex_size * sizeof(GLfloat)));
exec->vtx.vert_count = 0;
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
-
- /* Recalculate all the attrptr[] values
- */
- for (i = 0, tmp = exec->vtx.vertex ; i < VBO_ATTRIB_MAX ; i++) {
- if (exec->vtx.attrsz[i]) {
- exec->vtx.attrptr[i] = tmp;
- tmp += exec->vtx.attrsz[i];
+ if (unlikely(oldsz)) {
+ /* Size changed, recalculate all the attrptr[] values
+ */
+ GLfloat *tmp = exec->vtx.vertex;
+
+ for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
+ if (exec->vtx.attrsz[i]) {
+ exec->vtx.attrptr[i] = tmp;
+ tmp += exec->vtx.attrsz[i];
+ }
+ else
+ exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */
}
- else
- exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */
- }
- /* Copy from current to repopulate the vertex with correct values.
- */
- vbo_exec_copy_from_current( exec );
+ /* Copy from current to repopulate the vertex with correct
+ * values.
+ */
+ vbo_exec_copy_from_current( exec );
+
+ } else {
+ /* Just have to append the new attribute at the end */
+ exec->vtx.attrptr[attr] = exec->vtx.vertex +
+ exec->vtx.vertex_size - newsz;
+ }
/* Replay stored vertices to translate them
* to new format here.
*
* -- No need to replay - just copy piecewise
*/
- if (exec->vtx.copied.nr)
- {
+ if (unlikely(exec->vtx.copied.nr)) {
GLfloat *data = exec->vtx.copied.buffer;
GLfloat *dest = exec->vtx.buffer_ptr;
GLuint j;
assert(exec->vtx.buffer_ptr == exec->vtx.buffer_map);
-
+
for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
for (j = 0 ; j < VBO_ATTRIB_MAX ; j++) {
- if (exec->vtx.attrsz[j]) {
+ GLuint sz = exec->vtx.attrsz[j];
+
+ if (sz) {
+ GLint old_offset = old_attrptr[j] - exec->vtx.vertex;
+ GLint new_offset = exec->vtx.attrptr[j] - exec->vtx.vertex;
+
if (j == attr) {
if (oldsz) {
- COPY_CLEAN_4V( dest, oldsz, data );
- data += oldsz;
- dest += newsz;
+ GLfloat tmp[4];
+ COPY_CLEAN_4V(tmp, oldsz, data + old_offset);
+ COPY_SZ_4V(dest + new_offset, newsz, tmp);
} else {
- const GLfloat *current = (const GLfloat *)vbo->currval[j].Ptr;
- COPY_SZ_4V( dest, newsz, current );
- dest += newsz;
+ GLfloat *current = (GLfloat *)vbo->currval[j].Ptr;
+ COPY_SZ_4V(dest + new_offset, sz, current);
}
}
else {
- GLuint sz = exec->vtx.attrsz[j];
- COPY_SZ_4V( dest, sz, data );
- dest += sz;
- data += sz;
+ COPY_SZ_4V(dest + new_offset, sz, data + old_offset);
}
}
}
+
+ data += old_vtx_size;
+ dest += exec->vtx.vertex_size;
}
exec->vtx.buffer_ptr = dest;
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 71ac0066cac..94aa021ac9a 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -160,7 +160,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
struct vbo_exec_context *exec = &vbo->exec;
struct gl_client_array *arrays = exec->vtx.arrays;
const GLuint count = exec->vtx.vert_count;
- const GLubyte *data = (GLubyte *) exec->vtx.buffer_map;
const GLuint *map;
GLuint attr;
GLbitfield varying_inputs = 0x0;
@@ -215,6 +214,9 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
const GLuint src = map[attr];
if (exec->vtx.attrsz[src]) {
+ GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] -
+ (GLbyte *)exec->vtx.vertex;
+
/* override the default array set above */
ASSERT(attr < Elements(exec->vtx.inputs));
ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */
@@ -222,17 +224,13 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
/* a real buffer obj: Ptr is an offset, not a pointer*/
- GLsizeiptr offset;
assert(exec->vtx.bufferobj->Pointer); /* buf should be mapped */
- offset = (GLbyte *) data -
- (GLbyte *) exec->vtx.bufferobj->Pointer +
- exec->vtx.bufferobj->Offset;
assert(offset >= 0);
- arrays[attr].Ptr = (void *) offset;
+ arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset + offset;
}
else {
/* Ptr into ordinary app memory */
- arrays[attr].Ptr = (void *) data;
+ arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map + offset;
}
arrays[attr].Size = exec->vtx.attrsz[src];
arrays[attr].StrideB = exec->vtx.vertex_size * sizeof(GLfloat);
@@ -245,7 +243,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
exec->vtx.bufferobj);
arrays[attr]._MaxElement = count; /* ??? */
- data += exec->vtx.attrsz[src] * sizeof(GLfloat);
varying_inputs |= 1 << attr;
}
}