19 files changed, 1382 insertions, 464 deletions
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index 02651587a6f..a1533d7f3e4 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -442,7 +442,6 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
     */
    _tnl_destroy_pipeline( ctx );
    _tnl_install_pipeline( ctx, r200_pipeline );
-   ctx->Driver.FlushVertices = r200FlushVertices;
 
    /* Try and keep materials and vertices separate:
     */
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.h b/src/mesa/drivers/dri/r200/r200_swtcl.h
index ce2b6b5f06b..ccf817988c5 100644
--- a/src/mesa/drivers/dri/r200/r200_swtcl.h
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.h
@@ -42,7 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 extern void r200InitSwtcl( GLcontext *ctx );
 extern void r200DestroySwtcl( GLcontext *ctx );
 
-extern void r200FlushVertices( GLcontext *ctx, GLuint flags );
 extern void r200ChooseRenderState( GLcontext *ctx );
 extern void r200ChooseVertexState( GLcontext *ctx );
 
diff --git a/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c
index 72c03c53ad9..0625e5bc571 100644
--- a/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c
+++ b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c
@@ -46,6 +46,8 @@
 #include "state.h"
 #include "image.h"
 
+#include "vbo/vbo_context.h"
+
 #define CONV_VB(a, b) rvb->AttribPtr[(a)].size = vb->b->size, \
 			rvb->AttribPtr[(a)].type = GL_FLOAT, \
 			rvb->AttribPtr[(a)].stride = vb->b->stride, \
@@ -129,15 +131,7 @@ static int setup_arrays(r300ContextPtr rmesa, GLint start)
 			CONV(i, VertexAttrib[i]);
 	
 	for (i=0; i < VERT_ATTRIB_MAX; i++) {
-		if (enabled & (1 << i)) {
-			rmesa->state.VB.AttribPtr[i].data += rmesa->state.VB.AttribPtr[i].stride * start;
-		} else {
-			def.data = ctx->Current.Attrib[i];
-			memcpy(&rmesa->state.VB.AttribPtr[i], &def, sizeof(struct dt));
-		}
-		
-		/*if(rmesa->state.VB.AttribPtr[i].data == ctx->Current.Attrib[i])
-			fprintf(stderr, "%d is default coord\n", i);*/
+	   rmesa->state.VB.AttribPtr[i].data += rmesa->state.VB.AttribPtr[i].stride * start;
 	}
 	
 	for(i=0; i < VERT_ATTRIB_MAX; i++){
@@ -177,177 +171,18 @@ static int setup_arrays(r300ContextPtr rmesa, GLint start)
 
 void radeon_init_vtxfmt_a(r300ContextPtr rmesa);
 
-static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const GLvoid *c_indices )
-{
-	GET_CURRENT_CONTEXT(ctx);
-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-	int elt_size;
-	int i;
-	unsigned int min = ~0, max = 0;
-	struct tnl_prim prim;
-	static void *ptr = NULL;
-	struct r300_dma_region rvb;
-	const GLvoid *indices = c_indices;
-	
-	if (count > 65535) {
-		WARN_ONCE("Too many verts!\n");
-		goto fallback;
-	}
-	
-	if (ctx->Array.ElementArrayBufferObj->Name) {
-		/* use indices in the buffer object */
-		if (!ctx->Array.ElementArrayBufferObj->Data) {
-			_mesa_warning(ctx, "DrawRangeElements with empty vertex elements buffer!");
-			return;
-		}
-		/* actual address is the sum of pointers */
-		indices = (GLvoid *)
-		ADD_POINTERS(ctx->Array.ElementArrayBufferObj->Data, (const GLubyte *) c_indices);
-	}
-	
-	if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices ))
-		return;
-	
-	FLUSH_CURRENT( ctx, 0 );
-	
-	memset(&rvb, 0, sizeof(rvb));
-	switch (type) {
-	case GL_UNSIGNED_BYTE:
-		for (i=0; i < count; i++) {
-			if(((unsigned char *)indices)[i] < min)
-				min = ((unsigned char *)indices)[i];
-			if(((unsigned char *)indices)[i] > max)
-				max = ((unsigned char *)indices)[i];
-		}
-		
-#ifdef FORCE_32BITS_ELTS
-		elt_size = 4;
-#else
-		elt_size = 2;
-#endif		
-		r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
-		rvb.aos_offset = GET_START(&rvb);
-		ptr = rvb.address + rvb.start;
-			
-#ifdef FORCE_32BITS_ELTS
-		for (i=0; i < count; i++)
-			((unsigned int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
-#else
-		for (i=0; i < count; i++)
-			((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
-#endif
-	break;
-		
-	case GL_UNSIGNED_SHORT:
-		for (i=0; i < count; i++) {
-			if(((unsigned short int *)indices)[i] < min)
-				min = ((unsigned short int *)indices)[i];
-			if(((unsigned short int *)indices)[i] > max)
-				max = ((unsigned short int *)indices)[i];
-		}
-		
-#ifdef FORCE_32BITS_ELTS
-		elt_size = 4;
-#else
-		elt_size = 2;
-#endif
-		
-		r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
-		rvb.aos_offset = GET_START(&rvb);
-		ptr = rvb.address + rvb.start;
-		
-#ifdef FORCE_32BITS_ELTS
-		for (i=0; i < count; i++)
-			((unsigned int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
-#else
-		for (i=0; i < count; i++)
-			((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
-#endif
-	break;
-	
-	case GL_UNSIGNED_INT:
-		for (i=0; i < count; i++) {
-			if(((unsigned int *)indices)[i] < min)
-				min = ((unsigned int *)indices)[i];
-			if(((unsigned int *)indices)[i] > max)
-				max = ((unsigned int *)indices)[i];
-		}
-		
-#ifdef FORCE_32BITS_ELTS
-		elt_size = 4;
-#else
-		if (max - min <= 65535)
-			elt_size = 2;
-		else 
-			elt_size = 4;
-#endif
-		r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
-		rvb.aos_offset = GET_START(&rvb);
-		ptr = rvb.address + rvb.start;
-		
-		
-		if (elt_size == 2)
-			for (i=0; i < count; i++)
-				((unsigned short int *)ptr)[i] = ((unsigned int *)indices)[i] - min;
-		else
-			for (i=0; i < count; i++)
-				((unsigned int *)ptr)[i] = ((unsigned int *)indices)[i] - min;
-	break;
-	
-	default:
-		WARN_ONCE("Unknown elt type!\n");
-	goto fallback;
-	}
-	
-	if (ctx->NewState) 
-		_mesa_update_state( ctx );
-	
-	r300UpdateShaders(rmesa);
-	
-	if (setup_arrays(rmesa, min) >= R300_FALLBACK_TCL) {
-		r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__);
-		goto fallback;
-	}
-	
-	rmesa->state.VB.Count = max - min + 1;
-	
-	r300UpdateShaderStates(rmesa);
-	
-	rmesa->state.VB.Primitive = &prim;
-	rmesa->state.VB.PrimitiveCount = 1;
-	
-	prim.mode = mode | PRIM_BEGIN | PRIM_END;
-	if (rmesa->state.VB.LockCount)
-		prim.start = min - rmesa->state.VB.LockFirst;
-	else
-		prim.start = 0;
-	prim.count = count;
-	
-	rmesa->state.VB.Elts = ptr;
-	rmesa->state.VB.elt_size = elt_size;
-	
-	if (r300_run_vb_render(ctx, NULL)) {
-		r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__);
-		goto fallback;
-	}
-	
-	if(rvb.buf)
-		radeon_mm_use(rmesa, rvb.buf->id);
-	
-	r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__);
-	return;
-	
-	fallback:
-	_tnl_array_init(ctx);
-	_mesa_install_exec_vtxfmt( ctx, &TNL_CONTEXT(ctx)->exec_vtxfmt );
-	CALL_DrawElements(GET_DISPATCH(), (mode, count, type, c_indices));
-	radeon_init_vtxfmt_a(rmesa);
-	_mesa_install_exec_vtxfmt( ctx, &TNL_CONTEXT(ctx)->exec_vtxfmt );
-}
 
-static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei count, GLenum type, const GLvoid *c_indices)
+static void radeonDrawRangeElements(GLcontext *ctx,
+				    GLenum mode,
+				    GLuint min,
+				    GLuint max,
+				    GLsizei count, 
+				    GLenum type,
+				    const GLvoid *c_indices)
 {
-	GET_CURRENT_CONTEXT(ctx);
+#if 1
+	return GL_FALSE;
+#else
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	struct tnl_prim prim;
 	int elt_size;
@@ -371,26 +206,23 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei
 				indices += i * _mesa_sizeof_type(type);
 				count -= i;
 			}
-			return ;
+			return GL_TRUE;
 		}
 		WARN_ONCE("Too many verts!\n");
-		goto fallback;
+		return GL_FALSE;
 	}
 	
 	if (ctx->Array.ElementArrayBufferObj->Name) {
 		/* use indices in the buffer object */
 		if (!ctx->Array.ElementArrayBufferObj->Data) {
 			_mesa_warning(ctx, "DrawRangeElements with empty vertex elements buffer!");
-			return;
+			return GL_TRUE;
 		}
 		/* actual address is the sum of pointers */
 		indices = (GLvoid *)
 		ADD_POINTERS(ctx->Array.ElementArrayBufferObj->Data, (const GLubyte *) c_indices);
 	}
 	
-	if (!_mesa_validate_DrawRangeElements( ctx, mode, min, max, count, type, indices ))
-		return;
-	
 	FLUSH_CURRENT( ctx, 0 );
 #ifdef OPTIMIZE_ELTS
 	min = 0;
@@ -465,7 +297,7 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei
 	
 	default:
 		WARN_ONCE("Unknown elt type!\n");
-	goto fallback;
+		return GL_FALSE;
 	}
 	
 	/* XXX: setup_arrays before state update? */
@@ -477,7 +309,7 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei
 
 	if (setup_arrays(rmesa, min) >= R300_FALLBACK_TCL) {
 		r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__);
-		goto fallback;
+		return GL_FALSE;
 	}
 
 	rmesa->state.VB.Count = max - min + 1;
@@ -501,37 +333,34 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei
 	
 	if (r300_run_vb_render(ctx, NULL)) {
 		r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__);
-		goto fallback;
+		return GL_FALSE;
 	}
 	
 	if(rvb.buf)
 		radeon_mm_use(rmesa, rvb.buf->id);
 	
 	r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__);
-	return ;
-	
-	fallback:
-	_tnl_array_init(ctx);
-	_mesa_install_exec_vtxfmt( ctx, &TNL_CONTEXT(ctx)->exec_vtxfmt );
-	CALL_DrawRangeElements(GET_DISPATCH(), (mode, min, max, count, type, c_indices));
-	radeon_init_vtxfmt_a(rmesa);
-	_mesa_install_exec_vtxfmt( ctx, &TNL_CONTEXT(ctx)->exec_vtxfmt );
+	return GL_TRUE;
+#endif
 }
 
-static void radeonDrawArrays( GLenum mode, GLint start, GLsizei count )
+static GLboolean radeonDrawArrays( GLcontext *ctx,
+				   GLenum mode, GLint start, GLsizei count )
 {
+#if 1
+	return GL_FALSE;
+#else
 	GET_CURRENT_CONTEXT(ctx);
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	struct tnl_prim prim;
 	
 	if (count > 65535) {
+	   /* TODO: split into multiple draws.
+	    */
 		WARN_ONCE("Too many verts!\n");
-		goto fallback;
+		return GL_FALSE;
 	}
 	
-	if (!_mesa_validate_DrawArrays( ctx, mode, start, count ))
-		return;
-	
 	FLUSH_CURRENT( ctx, 0 );
 	
 	if (ctx->NewState) 
@@ -542,7 +371,7 @@ static void radeonDrawArrays( GLenum mode, GLint start, GLsizei count )
 	r300UpdateShaders(rmesa);
 
 	if (setup_arrays(rmesa, start) >= R300_FALLBACK_TCL)
-		goto fallback;
+		return GL_FALSE;
 
 	rmesa->state.VB.Count = count;
 
@@ -564,31 +393,70 @@ static void radeonDrawArrays( GLenum mode, GLint start, GLsizei count )
 	rmesa->state.VB.elt_max = 0;
 	
 	if (r300_run_vb_render(ctx, NULL))
-		goto fallback;
+	   return GL_FALSE;
 
-	return ;
-	
-	fallback:
-	_tnl_array_init(ctx);
-	_mesa_install_exec_vtxfmt( ctx, &TNL_CONTEXT(ctx)->exec_vtxfmt );
-	CALL_DrawArrays(GET_DISPATCH(), (mode, start, count));
-	radeon_init_vtxfmt_a(rmesa);
-	_mesa_install_exec_vtxfmt( ctx, &TNL_CONTEXT(ctx)->exec_vtxfmt );
+	return GL_TRUE;
+#endif
 }
 
+static void radeon_draw_prims( GLcontext *ctx,
+			       const struct gl_client_array *arrays[],
+			       const struct _mesa_prim *prim,
+			       GLuint nr_prims,
+			       const struct _mesa_index_buffer *ib,
+			       GLuint min_index,
+			       GLuint max_index)
+{
+	if (ib == NULL) {
+		for (i = 0; i < nr_prims; i++) {
+			if (!radeonDrawArrays(ctx,
+					      prim->mode,
+					      prim->start,
+					      prim->count)) {
+				/* Fallback
+				 */
+				_tnl_draw_prims(ctx,
+						arrays,
+						prim + i,
+						nr_prims - i,
+						ib, 
+						min_index,
+						max_index);
+				return;
+			}
+		}
+	} else {
+		for (i = 0; i < nr_prims; i++) {
+			if (!radeonDrawRangeElements(ctx,
+						     prim->mode,
+						     min_index,
+						     max_index,
+						     prim->count,
+						     ib->types,
+						     ib->ptr)) {
+				/* Fallback
+				 */
+				_tnl_draw_prims(ctx,
+						arrays,
+						prim + i,
+						nr_prims - i,
+						ib, 
+						min_index,
+						max_index);
+				return;
+			}
+		}
+	}
+}
+	   
 void radeon_init_vtxfmt_a(r300ContextPtr rmesa)
 {
 	GLcontext *ctx;
-	GLvertexformat *vfmt;
-	
-	ctx = rmesa->radeon.glCtx; 
-	vfmt = (GLvertexformat *)ctx->TnlModule.Current;
-   
-	vfmt->DrawElements = radeonDrawElements;
-	vfmt->DrawArrays = radeonDrawArrays;
-	vfmt->DrawRangeElements = radeonDrawRangeElements;
+	struct vbo_context *vbo = vbo_context(ctx);
 	
+	vbo->draw_prims = radeon_draw_prims;
 }
+
 #endif
 
 #ifdef HW_VBOS
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 91f3af3c911..94d0ff63079 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -970,7 +970,6 @@ _mesa_init_current( GLcontext *ctx )
    ASSIGN_4V( ctx->Current.Attrib[VERT_ATTRIB_NORMAL], 0.0, 0.0, 1.0, 1.0 );
    ASSIGN_4V( ctx->Current.Attrib[VERT_ATTRIB_COLOR0], 1.0, 1.0, 1.0, 1.0 );
    ASSIGN_4V( ctx->Current.Attrib[VERT_ATTRIB_COLOR1], 0.0, 0.0, 0.0, 1.0 );
-   ASSIGN_4V( ctx->Current.Attrib[VERT_ATTRIB_FOG], 0.0, 0.0, 0.0, 0.0 );
    ASSIGN_4V( ctx->Current.Attrib[VERT_ATTRIB_COLOR_INDEX], 1.0, 0.0, 0.0, 1.0 );
    ASSIGN_4V( ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG], 1.0, 0.0, 0.0, 1.0 );
 }
diff --git a/src/mesa/sources b/src/mesa/sources
index b589111a19b..a32ecaf0278 100644
--- a/src/mesa/sources
+++ b/src/mesa/sources
@@ -143,6 +143,9 @@ VBO_SOURCES = \
 	vbo/vbo_exec_array.c \
 	vbo/vbo_exec_draw.c \
 	vbo/vbo_exec_eval.c \
+	vbo/vbo_split.c \
+	vbo/vbo_split_copy.c \
+	vbo/vbo_split_inplace.c \
 	vbo/vbo_save.c \
 	vbo/vbo_save_api.c \
 	vbo/vbo_save_draw.c \
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index c84a10856e0..0e7c2b6a689 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -32,6 +32,7 @@
 #include "state.h"
 #include "mtypes.h"
 #include "macros.h"
+#include "enums.h"
 
 #include "t_context.h"
 #include "t_pipeline.h"
@@ -41,13 +42,13 @@
 
 
 
-static GLfloat *get_space(GLcontext *ctx, GLuint bytes)
+static GLubyte *get_space(GLcontext *ctx, GLuint bytes)
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    GLubyte *space = _mesa_malloc(bytes);
    
    tnl->block[tnl->nr_blocks++] = space;
-   return (GLfloat *)space;
+   return space;
 }
 
 
@@ -97,7 +98,7 @@ static void _tnl_import_array( GLcontext *ctx,
 			       GLuint start,
 			       GLuint end,
 			       const struct gl_client_array *input,
-			       const char *ptr )
+			       const GLubyte *ptr )
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
@@ -108,7 +109,8 @@ static void _tnl_import_array( GLcontext *ctx,
 
    if (input->Type != GL_FLOAT) {
       const GLuint sz = input->Size;
-      GLfloat *fptr = get_space(ctx, count * sz * sizeof(GLfloat));
+      GLubyte *buf = get_space(ctx, count * sz * sizeof(GLfloat));
+      GLfloat *fptr = (GLfloat *)buf;
 
       switch (input->Type) {
       case GL_BYTE: 
@@ -137,7 +139,7 @@ static void _tnl_import_array( GLcontext *ctx,
 	 break;
       }
 
-      ptr = (const char *)fptr;
+      ptr = buf;
       stride = sz * sizeof(GLfloat);
    }
 
@@ -181,7 +183,7 @@ static GLboolean *_tnl_import_edgeflag( GLcontext *ctx,
 
 static void bind_inputs( GLcontext *ctx, 
 			 const struct gl_client_array *inputs[],
-			 GLint start, GLint end,
+			 GLint min_index, GLint max_index,
 			 struct gl_buffer_object **bo,
 			 GLuint *nr_bo )
 {
@@ -197,7 +199,7 @@ static void bind_inputs( GLcontext *ctx,
       if (inputs[i]->BufferObj->Name) { 
 	 if (!inputs[i]->BufferObj->Pointer) {
 	    bo[*nr_bo] = inputs[i]->BufferObj;
-	    *nr_bo++;
+	    (*nr_bo)++;
 	    ctx->Driver.MapBuffer(ctx, 
 				  GL_ARRAY_BUFFER,
 				  GL_READ_ONLY_ARB,
@@ -213,15 +215,20 @@ static void bind_inputs( GLcontext *ctx,
 	 ptr = inputs[i]->Ptr;
 
       /* Just make sure the array is floating point, otherwise convert to
-       * temporary storage.  Rebase arrays so that 'start' becomes
+       * temporary storage.  Rebase arrays so that 'min_index' becomes
        * element zero.
        *
        * XXX: remove the GLvector4f type at some stage and just use
        * client arrays.
        */
-      _tnl_import_array(ctx, i, start, end, inputs[i], ptr);
+      _tnl_import_array(ctx, i, min_index, max_index, inputs[i], ptr);
    }
 
+   /* We process only the vertices between min & max index:
+    */
+   VB->Count = max_index - min_index;
+
+
    /* Legacy pointers -- remove one day.
     */
    VB->ObjPtr = VB->AttribPtr[_TNL_ATTRIB_POS];
@@ -255,20 +262,23 @@ static void bind_inputs( GLcontext *ctx,
 
 /* Translate indices to GLuints and store in VB->Elts.
  */
-static void bind_indicies( GLcontext *ctx,
-			   const struct _mesa_index_buffer *ib,
-			   struct gl_buffer_object **bo,
-			   GLuint *nr_bo)
+static void bind_indices( GLcontext *ctx,
+			  const struct _mesa_index_buffer *ib,
+			  GLuint min_index,
+			  struct gl_buffer_object **bo,
+			  GLuint *nr_bo)
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+   void *ptr;
 
    if (!ib)
       return;
 
    if (ib->obj->Name && !ib->obj->Pointer) {
       bo[*nr_bo] = ib->obj;
-      *nr_bo++;
+      (*nr_bo)++;
       ctx->Driver.MapBuffer(ctx, 
 			    GL_ELEMENT_ARRAY_BUFFER,
 			    GL_READ_ONLY_ARB,
@@ -277,19 +287,63 @@ static void bind_indicies( GLcontext *ctx,
       assert(ib->obj->Pointer);
    }
 
-   VB->Elts = (GLuint *)ADD_POINTERS(ib->obj->Pointer, 
-				     ib->ptr);
-   
-   VB->Elts += ib->rebase;
+   ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
 
-   switch (ib->type) {
-   case GL_UNSIGNED_INT:
-      return;
-   case GL_UNSIGNED_SHORT:
-      break;
-   case GL_UNSIGNED_BYTE:
-      break;
+   if (ib->type == GL_UNSIGNED_INT && min_index == 0) {
+      VB->Elts = (GLuint *) ptr;
+      VB->Elts += ib->rebase;
+   }
+   else {
+      GLuint *elts = (GLuint *)get_space(ctx, ib->count * sizeof(GLuint));
+      VB->Elts = elts;
+
+      switch (ib->type) {
+      case GL_UNSIGNED_INT: {
+	 const GLuint *in = ((GLuint *)ptr) + ib->rebase;
+	 for (i = 0; i < ib->count; i++) 
+	    *elts++ = *in++ - min_index;
+	 break;
+      }
+      case GL_UNSIGNED_SHORT: {
+	 const GLushort *in = ((GLushort *)ptr) + ib->rebase;
+	 for (i = 0; i < ib->count; i++) 
+	    *elts++ = (GLuint)(*in++) - min_index;
+	 break;
+      }
+      case GL_UNSIGNED_BYTE: {
+	 const GLubyte *in = ((GLubyte *)ptr) + ib->rebase;
+	 for (i = 0; i < ib->count; i++) 
+	    *elts++ = (GLuint)(*in++) - min_index;
+	 break;
+      }
+      }      
+   }
+}
+
+static void bind_prims( GLcontext *ctx,
+			const struct _mesa_prim *prim,
+			GLuint nr_prims,
+			GLuint min_index )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+
+   if (min_index != 0) {
+      struct _mesa_prim *tmp = (struct _mesa_prim *)get_space(ctx, nr_prims * sizeof(*prim));
+
+      for (i = 0; i < nr_prims; i++) {
+	 tmp[i] = prim[i];
+	 tmp[i].start -= min_index;
+      }
+
+      VB->Primitive = tmp;
+   }
+   else {
+      VB->Primitive = prim;
    }
+
+   VB->PrimitiveCount = nr_prims;
 }
 
 static void unmap_vbos( GLcontext *ctx,
@@ -320,26 +374,65 @@ void _tnl_draw_prims( GLcontext *ctx,
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
+   GLint max = VB->Size;
 
-   /* May need to map a vertex buffer object for every attribute plus
-    * one for the index buffer.
-    */
-   struct gl_buffer_object *bo[VERT_ATTRIB_MAX + 1];
-   GLuint nr_bo = 0;
+#ifdef TEST_SPLIT
+   max = 8 + MAX_CLIPPED_VERTICES;
+#endif
+   
+   assert(max_index > min_index);
+   assert(!(max_index & 0x80000000));
+
+   VB->Elts = NULL;
 
-   /* Binding inputs may imply mapping some vertex buffer objects.
-    * They will need to be unmapped below.
+#if 0
+   {
+      GLuint i;
+      _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+      for (i = 0; i < nr_prims; i++)
+	 _mesa_printf("prim %d: %s start %d count %d\n", i, 
+		      _mesa_lookup_enum_by_nr(prim[i].mode),
+		      prim[i].start,
+		      prim[i].count);
+   }
+#endif
+
+   /* The software TNL pipeline has a fixed amount of storage for
+    * vertices and it is necessary to split incoming drawing commands
+    * if they exceed that limit.
     */
-   bind_inputs(ctx, arrays, min_index, max_index, bo, &nr_bo);
-   bind_indicies(ctx, ib, bo, &nr_bo);
+   if (max_index - min_index >= max - MAX_CLIPPED_VERTICES) {
+      struct split_limits limits;
+      limits.max_verts = max - MAX_CLIPPED_VERTICES;
+      limits.max_vb_size = ~0;
+      limits.max_indices = ~0;
+
+      /* This will split the buffers one way or another and
+       * recursively call back into this function.
+       */
+      vbo_split_prims( ctx, arrays, prim, nr_prims, ib, 
+		       min_index, max_index,
+		       _tnl_draw_prims,
+		       &limits );
+   }
+   else {
+      /* May need to map a vertex buffer object for every attribute plus
+       * one for the index buffer.
+       */
+      struct gl_buffer_object *bo[VERT_ATTRIB_MAX + 1];
+      GLuint nr_bo = 0;
 
-   VB->Primitive = prim;
-   VB->PrimitiveCount = nr_prims;
-   VB->Count = max_index - min_index;
+      /* Binding inputs may imply mapping some vertex buffer objects.
+       * They will need to be unmapped below.
+       */
+      bind_inputs(ctx, arrays, min_index, max_index+1, bo, &nr_bo);
+      bind_indices(ctx, ib, min_index, bo, &nr_bo);
+      bind_prims(ctx, prim, nr_prims, VB->Elts ? 0 : min_index );
 
-   TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx);
+      TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx);
 
-   unmap_vbos(ctx, bo, nr_bo);
-   free_space(ctx);
+      unmap_vbos(ctx, bo, nr_bo);
+      free_space(ctx);
+   }
 }
 
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index 80f7a3322bf..c81d83f9b61 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -65,4 +65,40 @@ void _vbo_DestroyContext( GLcontext *ctx );
 void _vbo_InvalidateState( GLcontext *ctx, GLuint new_state );
 
 
+typedef void (*vbo_draw_func)( GLcontext *ctx,
+			       const struct gl_client_array **arrays,
+			       const struct _mesa_prim *prims,
+			       GLuint nr_prims,
+			       const struct _mesa_index_buffer *ib,
+			       GLuint min_index,
+			       GLuint max_index );
+
+
+
+
+/* Utility function to cope with various constraints on tnl modules or
+ * hardware.  This can be used to split an incoming set of arrays and
+ * primitives against the following constraints:
+ *    - Maximum number of indices in index buffer.
+ *    - Maximum number of vertices referenced by index buffer.
+ *    - Maximum hardware vertex buffer size.
+ */
+struct split_limits {
+   GLuint max_verts;
+   GLuint max_indices;
+   GLuint max_vb_size;		/* bytes */
+};
+
+
+void vbo_split_prims( GLcontext *ctx,
+		      const struct gl_client_array *arrays[],
+		      const struct _mesa_prim *prim,
+		      GLuint nr_prims,
+		      const struct _mesa_index_buffer *ib,
+		      GLuint min_index,
+		      GLuint max_index,
+		      vbo_draw_func draw,
+		      const struct split_limits *limits );
+
+
 #endif
diff --git a/src/mesa/vbo/vbo_attrib_tmp.h b/src/mesa/vbo/vbo_attrib_tmp.h
index 72a8b04aca1..ff11c7d59a7 100644
--- a/src/mesa/vbo/vbo_attrib_tmp.h
+++ b/src/mesa/vbo/vbo_attrib_tmp.h
@@ -357,9 +357,10 @@ static void GLAPIENTRY TAG(VertexAttrib4fvARB)( GLuint index,
 }
 
 
-/* Although we don't export NV_vertex_program, these entrypoints are
+/* In addition to supporting NV_vertex_program, these entrypoints are
  * used by the display list and other code specifically because of
- * their property of aliasing with other attributes.
+ * their property of aliasing with other attributes.  (See
+ * vbo_save_loopback.c)
  */
 static void GLAPIENTRY TAG(VertexAttrib1fNV)( GLuint index, GLfloat x )
 {
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index 165e32da8a8..65998e7ff75 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -47,6 +47,14 @@ extern void _tnl_draw_prims( GLcontext *ctx,
 #define NR_GENERIC_ATTRIBS 16
 #define NR_MAT_ATTRIBS 12
 
+static GLuint check_size( const GLfloat *attr )
+{
+   if (attr[3] != 1.0) return 4;
+   if (attr[2] != 0.0) return 3;
+   if (attr[1] != 0.0) return 2;
+   return 1;		
+}
+
 static void init_legacy_currval(GLcontext *ctx)
 {
    struct vbo_context *vbo = vbo_context(ctx);
@@ -63,7 +71,7 @@ static void init_legacy_currval(GLcontext *ctx)
 
       /* Size will have to be determined at runtime:
        */
-      cl->Size = 1;
+      cl->Size = check_size(ctx->Current.Attrib[i]);
       cl->Stride = 0;
       cl->StrideB = 0;
       cl->Enabled = 1;
@@ -88,7 +96,6 @@ static void init_generic_currval(GLcontext *ctx)
       /* This will have to be determined at runtime:
        */
       cl->Size = 1;
-
       cl->Type = GL_FLOAT;
       cl->Ptr = (const void *)ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + i];
       cl->Stride = 0;
diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
index 4c5ed96765b..0dc1019b39f 100644
--- a/src/mesa/vbo/vbo_context.h
+++ b/src/mesa/vbo/vbo_context.h
@@ -79,13 +79,7 @@ struct vbo_context {
    /* Callback into the driver.  This must always succeed, the driver
     * is responsible for initiating any fallback actions required:
     */
-   void (*draw_prims)( GLcontext *ctx,
-		       const struct gl_client_array *arrays[],
-		       const struct _mesa_prim *prims,
-		       GLuint nr_prims,
-		       const struct _mesa_index_buffer *ib,
-		       GLuint min_index,
-		       GLuint max_index );
+   vbo_draw_func draw_prims;
 };
 
 
diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h
index e28913b22af..a9b01e08e6a 100644
--- a/src/mesa/vbo/vbo_exec.h
+++ b/src/mesa/vbo/vbo_exec.h
@@ -130,9 +130,6 @@ struct vbo_exec_context
        * programs:
        */
       const struct gl_client_array *inputs[VERT_ATTRIB_MAX];
-
-
-      struct gl_buffer_object *index_obj;
    } array;
 };
 
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index b3650e26978..cec353cf43b 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -81,7 +81,8 @@ static GLuint get_max_index( GLuint count, GLuint type,
  */
 static void bind_array_obj( GLcontext *ctx )
 {
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+   struct vbo_context *vbo = vbo_context(ctx);
+   struct vbo_exec_context *exec = &vbo->exec;
    GLuint i;
 
    /* TODO: Fix the ArrayObj struct to keep legacy arrays in an array
@@ -89,15 +90,16 @@ static void bind_array_obj( GLcontext *ctx )
     * go away.
     */
    exec->array.legacy_array[VERT_ATTRIB_POS] = &ctx->Array.ArrayObj->Vertex;
+   exec->array.legacy_array[VERT_ATTRIB_WEIGHT] = &vbo->legacy_currval[VERT_ATTRIB_WEIGHT];
    exec->array.legacy_array[VERT_ATTRIB_NORMAL] = &ctx->Array.ArrayObj->Normal;
    exec->array.legacy_array[VERT_ATTRIB_COLOR0] = &ctx->Array.ArrayObj->Color;
    exec->array.legacy_array[VERT_ATTRIB_COLOR1] = &ctx->Array.ArrayObj->SecondaryColor;
    exec->array.legacy_array[VERT_ATTRIB_FOG] = &ctx->Array.ArrayObj->FogCoord;
    exec->array.legacy_array[VERT_ATTRIB_COLOR_INDEX] = &ctx->Array.ArrayObj->Index;
-   exec->array.legacy_array[VBO_ATTRIB_EDGEFLAG] = &ctx->Array.ArrayObj->EdgeFlag;
+   exec->array.legacy_array[VERT_ATTRIB_EDGEFLAG] = &ctx->Array.ArrayObj->EdgeFlag;
 
    for (i = 0; i < 8; i++)
-      exec->array.legacy_array[VBO_ATTRIB_TEX0 + i] = &ctx->Array.ArrayObj->TexCoord[i];
+      exec->array.legacy_array[VERT_ATTRIB_TEX0 + i] = &ctx->Array.ArrayObj->TexCoord[i];
 
    for (i = 0; i < VERT_ATTRIB_MAX; i++)
       exec->array.generic_array[i] = &ctx->Array.ArrayObj->VertexAttrib[i];
@@ -115,8 +117,6 @@ static void recalculate_input_bindings( GLcontext *ctx )
    exec->array.program_mode = get_program_mode(ctx);
    exec->array.enabled_flags = ctx->Array.ArrayObj->_Enabled;
 
-   /* TODO:  Get rid of NV_program (please!).
-    */
    switch (exec->array.program_mode) {
    case VP_NONE:
       /* When no vertex program is active, we put the material values
@@ -133,6 +133,13 @@ static void recalculate_input_bindings( GLcontext *ctx )
       for (i = 0; i < MAT_ATTRIB_MAX; i++) {
 	 inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i];
       }
+
+      /* Could use just about anything, just to fill in the empty
+       * slots:
+       */
+      for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX; i++)
+	 inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0];
+
       break;
    case VP_NV:
       /* NV_vertex_program - attribute arrays alias and override
@@ -147,6 +154,13 @@ static void recalculate_input_bindings( GLcontext *ctx )
 	 else
 	    inputs[i] = &vbo->legacy_currval[i];
       }
+
+      /* Could use just about anything, just to fill in the empty
+       * slots:
+       */
+      for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++)
+	 inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0];
+
       break;
    case VP_ARB:
       /* ARB_vertex_program - Only the attribute zero (position) array
@@ -274,7 +288,9 @@ vbo_exec_DrawRangeElements(GLenum mode,
 
    if (ctx->NewState)
       _mesa_update_state( ctx );
-      
+
+   bind_arrays( ctx );
+
    ib.count = count;
    ib.type = type; 
    ib.obj = ctx->Array.ElementArrayBufferObj;
@@ -344,8 +360,6 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid *ind
 
 void vbo_exec_array_init( struct vbo_exec_context *exec )
 {
-   GLcontext *ctx = exec->ctx;
-
 #if 1
    exec->vtxfmt.DrawArrays = vbo_exec_DrawArrays;
    exec->vtxfmt.DrawElements = vbo_exec_DrawElements;
@@ -355,14 +369,10 @@ void vbo_exec_array_init( struct vbo_exec_context *exec )
    exec->vtxfmt.DrawElements = _mesa_noop_DrawElements;
    exec->vtxfmt.DrawRangeElements = _mesa_noop_DrawRangeElements;
 #endif
-
-   exec->array.index_obj = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB);
 }
 
 
 void vbo_exec_array_destroy( struct vbo_exec_context *exec )
 {
-   GLcontext *ctx = exec->ctx;
-
-   ctx->Driver.DeleteBuffer(ctx, exec->array.index_obj);
+   /* nothing to do */
 }
diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h
index 3051f5c59a1..b81f275a602 100644
--- a/src/mesa/vbo/vbo_save.h
+++ b/src/mesa/vbo/vbo_save.h
@@ -142,8 +142,6 @@ struct vbo_save_context {
 
    struct vbo_save_copied_vtx copied;
    
-   GLfloat CurrentFloatEdgeFlag;
-
    GLfloat *current[VBO_ATTRIB_MAX]; /* points into ctx->ListState */
    GLubyte *currentsz[VBO_ATTRIB_MAX];
 };
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 8ceba2b832b..ade48d220e6 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -415,26 +415,14 @@ static void _save_copy_to_current( GLcontext *ctx )
    struct vbo_save_context *save = &vbo_context(ctx)->save; 
    GLuint i;
 
-   for (i = VBO_ATTRIB_POS+1 ; i <= VBO_ATTRIB_INDEX ; i++) {
+   for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
       if (save->attrsz[i]) {
 	 save->currentsz[i][0] = save->attrsz[i];
 	 COPY_CLEAN_4V(save->current[i], 
-		    save->attrsz[i], 
-		    save->attrptr[i]);
+		       save->attrsz[i], 
+		       save->attrptr[i]);
       }
    }
-
-   /* Edgeflag requires special treatment: 
-    *
-    * TODO: change edgeflag to GLfloat in Mesa.
-    */
-   if (save->attrsz[VBO_ATTRIB_EDGEFLAG]) {
-      ctx->ListState.ActiveEdgeFlag = 1;
-      save->CurrentFloatEdgeFlag = 
-	 save->attrptr[VBO_ATTRIB_EDGEFLAG][0];
-      ctx->ListState.CurrentEdgeFlag = 
-	 (save->CurrentFloatEdgeFlag == 1.0);
-   }
 }
 
 
@@ -443,7 +431,7 @@ static void _save_copy_from_current( GLcontext *ctx )
    struct vbo_save_context *save = &vbo_context(ctx)->save; 
    GLint i;
 
-   for (i = VBO_ATTRIB_POS+1 ; i <= VBO_ATTRIB_INDEX ; i++) 
+   for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
       switch (save->attrsz[i]) {
       case 4: save->attrptr[i][3] = save->current[i][3];
       case 3: save->attrptr[i][2] = save->current[i][2];
@@ -451,12 +439,6 @@ static void _save_copy_from_current( GLcontext *ctx )
       case 1: save->attrptr[i][0] = save->current[i][0];
       case 0: break;
       }
-
-   /* Edgeflag requires special treatment:
-    */
-   if (save->attrsz[VBO_ATTRIB_EDGEFLAG]) {
-      save->CurrentFloatEdgeFlag = (GLfloat)ctx->ListState.CurrentEdgeFlag;
-      save->attrptr[VBO_ATTRIB_EDGEFLAG][0] = save->CurrentFloatEdgeFlag;
    }
 }
 
@@ -527,7 +509,7 @@ static void _save_upgrade_vertex( GLcontext *ctx,
 
       /* Need to note this and fix up at runtime (or loopback):
        */
-      if (save->currentsz[attr][0] == 0) {
+      if (attr != VBO_ATTRIB_POS && save->currentsz[attr][0] == 0) {
 	 assert(oldsz == 0);
 	 save->dangling_attr_ref = GL_TRUE;
       }
@@ -1106,23 +1088,19 @@ static void _save_current_init( GLcontext *ctx )
    struct vbo_save_context *save = &vbo_context(ctx)->save;
    GLint i;
 
-   for (i = 0; i < VBO_ATTRIB_FIRST_MATERIAL; i++) {
-      save->currentsz[i] = &ctx->ListState.ActiveAttribSize[i];
-      save->current[i] = ctx->ListState.CurrentAttrib[i];
+   for (i = VBO_ATTRIB_POS; i <= VBO_ATTRIB_GENERIC15; i++) {
+      const GLuint j = i - VBO_ATTRIB_POS;
+      ASSERT(j < VERT_ATTRIB_MAX);
+      save->currentsz[i] = &ctx->ListState.ActiveAttribSize[j];
+      save->current[i] = ctx->ListState.CurrentAttrib[j];
    }
 
-   for (i = VBO_ATTRIB_FIRST_MATERIAL; i < VBO_ATTRIB_INDEX; i++) {
+   for (i = VBO_ATTRIB_FIRST_MATERIAL; i <= VBO_ATTRIB_MAT_FRONT_AMBIENT; i++) {
       const GLuint j = i - VBO_ATTRIB_FIRST_MATERIAL;
       ASSERT(j < MAT_ATTRIB_MAX);
       save->currentsz[i] = &ctx->ListState.ActiveMaterialSize[j];
       save->current[i] = ctx->ListState.CurrentMaterial[j];
    }
-
-   save->currentsz[VBO_ATTRIB_INDEX] = &ctx->ListState.ActiveIndex;
-   save->current[VBO_ATTRIB_INDEX] = &ctx->ListState.CurrentIndex;
-
-   save->currentsz[VBO_ATTRIB_EDGEFLAG] = &ctx->ListState.ActiveEdgeFlag;
-   save->current[VBO_ATTRIB_EDGEFLAG] = &save->CurrentFloatEdgeFlag;
 }
 
 /**
diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c
index 941c4beea94..430333b84dd 100644
--- a/src/mesa/vbo/vbo_save_loopback.c
+++ b/src/mesa/vbo/vbo_save_loopback.c
@@ -44,7 +44,10 @@
 typedef void (*attr_func)( GLcontext *ctx, GLint target, const GLfloat * );
 
 
-/* Wrapper functions in case glVertexAttrib*fvNV doesn't exist */
+/* This file makes heavy use of the aliasing of NV vertex attributes
+ * with the legacy attributes, and also with ARB and Material
+ * attributes as currently implemented.
+ */
 static void VertexAttrib1fvNV(GLcontext *ctx, GLint target, const GLfloat *v)
 {
    CALL_VertexAttrib1fvNV(ctx->Exec, (target, v));
@@ -72,118 +75,6 @@ static attr_func vert_attrfunc[4] = {
    VertexAttrib4fvNV
 };
 
-#if 0
-static void VertexAttrib1fvARB(GLcontext *ctx, GLint target, const GLfloat *v)
-{
-   CALL_VertexAttrib1fvARB(ctx->Exec, (target, v));
-}
-
-static void VertexAttrib2fvARB(GLcontext *ctx, GLint target, const GLfloat *v)
-{
-   CALL_VertexAttrib2fvARB(ctx->Exec, (target, v));
-}
-
-static void VertexAttrib3fvARB(GLcontext *ctx, GLint target, const GLfloat *v)
-{
-   CALL_VertexAttrib3fvARB(ctx->Exec, (target, v));
-}
-
-static void VertexAttrib4fvARB(GLcontext *ctx, GLint target, const GLfloat *v)
-{
-   CALL_VertexAttrib4fvARB(ctx->Exec, (target, v));
-}
-
-
-static attr_func vert_attrfunc_arb[4] = {
-   VertexAttrib1fvARB,
-   VertexAttrib2fvARB,
-   VertexAttrib3fvARB,
-   VertexAttrib4fvARB
-};
-#endif
-
-
-
-
-
-
-static void mat_attr1fv( GLcontext *ctx, GLint target, const GLfloat *v )
-{
-   switch (target) {
-   case VBO_ATTRIB_MAT_FRONT_SHININESS:
-      CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_SHININESS, v ));
-      break;
-   case VBO_ATTRIB_MAT_BACK_SHININESS:
-      CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_SHININESS, v ));
-      break;
-   }
-}
-
-
-static void mat_attr3fv( GLcontext *ctx, GLint target, const GLfloat *v )
-{
-   switch (target) {
-   case VBO_ATTRIB_MAT_FRONT_INDEXES:
-      CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_COLOR_INDEXES, v ));
-      break;
-   case VBO_ATTRIB_MAT_BACK_INDEXES:
-      CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_COLOR_INDEXES, v ));
-      break;
-   }
-}
-
-
-static void mat_attr4fv( GLcontext *ctx, GLint target, const GLfloat *v )
-{
-   switch (target) {
-   case VBO_ATTRIB_MAT_FRONT_EMISSION:
-      CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_EMISSION, v ));
-      break;
-   case VBO_ATTRIB_MAT_BACK_EMISSION:
-      CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_EMISSION, v ));
-      break;
-   case VBO_ATTRIB_MAT_FRONT_AMBIENT:
-      CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_AMBIENT, v ));
-      break;
-   case VBO_ATTRIB_MAT_BACK_AMBIENT:
-      CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_AMBIENT, v ));
-      break;
-   case VBO_ATTRIB_MAT_FRONT_DIFFUSE:
-      CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_DIFFUSE, v ));
-      break;
-   case VBO_ATTRIB_MAT_BACK_DIFFUSE:
-      CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_DIFFUSE, v ));
-      break;
-   case VBO_ATTRIB_MAT_FRONT_SPECULAR:
-      CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_SPECULAR, v ));
-      break;
-   case VBO_ATTRIB_MAT_BACK_SPECULAR:
-      CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_SPECULAR, v ));
-      break;
-   }
-}
-
-
-static attr_func mat_attrfunc[4] = {
-   mat_attr1fv,
-   NULL,
-   mat_attr3fv,
-   mat_attr4fv
-};
-
-
-static void index_attr1fv(GLcontext *ctx, GLint target, const GLfloat *v)
-{
-   (void) target;
-   CALL_Indexf(ctx->Exec, (v[0]));
-}
-
-static void edgeflag_attr1fv(GLcontext *ctx, GLint target, const GLfloat *v)
-{
-   (void) target;
-   CALL_EdgeFlag(ctx->Exec, ((GLboolean)(v[0] == 1.0)));
-}
-
 struct loopback_attr {
    GLint target;
    GLint sz;
@@ -277,7 +168,10 @@ void vbo_loopback_vertex_list( GLcontext *ctx,
    struct loopback_attr la[VBO_ATTRIB_MAX];
    GLuint i, nr = 0;
 
-   for (i = 0 ; i <= VBO_ATTRIB_TEX7 ; i++) {
+   /* All Legacy, NV, ARB and Material attributes are routed through
+    * the NV attributes entrypoints:
+    */
+   for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
       if (attrsz[i]) {
 	 la[nr].target = i;
 	 la[nr].sz = attrsz[i];
@@ -286,33 +180,6 @@ void vbo_loopback_vertex_list( GLcontext *ctx,
       }
    }
 
-   for (i = VBO_ATTRIB_MAT_FRONT_AMBIENT ; 
-	i <= VBO_ATTRIB_MAT_BACK_INDEXES ; 
-	i++) {
-      if (attrsz[i]) {
-	 la[nr].target = i;
-	 la[nr].sz = attrsz[i];
-	 la[nr].func = mat_attrfunc[attrsz[i]-1];
-	 nr++;
-      }
-   }
-
-   if (attrsz[VBO_ATTRIB_EDGEFLAG]) {
-      la[nr].target = VBO_ATTRIB_EDGEFLAG;
-      la[nr].sz = attrsz[VBO_ATTRIB_EDGEFLAG];
-      la[nr].func = edgeflag_attr1fv;
-      nr++;
-   }
-
-   if (attrsz[VBO_ATTRIB_INDEX]) {
-      la[nr].target = VBO_ATTRIB_INDEX;
-      la[nr].sz = attrsz[VBO_ATTRIB_INDEX];
-      la[nr].func = index_attr1fv;
-      nr++;
-   }
-
-   /* XXX ARB vertex attribs */
-
    for (i = 0 ; i < prim_count ; i++) {
       if ((prim[i].mode & VBO_SAVE_PRIM_WEAK) &&
 	  (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END))
diff --git a/src/mesa/vbo/vbo_split.c b/src/mesa/vbo/vbo_split.c
new file mode 100644
index 00000000000..171859a18e0
--- /dev/null
+++ b/src/mesa/vbo/vbo_split.c
@@ -0,0 +1,161 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <[email protected]>
+ */
+
+/* Deal with hardware and/or swtnl maximums:
+ * - maximum number of vertices in buffer
+ * - maximum number of elements (maybe zero)
+ *
+ * The maximums may vary with opengl state (eg if a larger hardware
+ * vertex is required in this state, the maximum number of vertices
+ * may be smaller than in another state).
+ *
+ * We want buffer splitting to be a convenience function for the code
+ * actually drawing the primitives rather than a system-wide maximum,
+ * otherwise it is hard to avoid pessimism.  
+ *
+ * For instance, if a driver has no hardware limits on vertex buffer
+ * dimensions, it would not ordinarily want to split vbos.  But if
+ * there is an unexpected fallback, eg memory manager fails to upload
+ * textures, it will want to pass the drawing commands onto swtnl,
+ * which does have limitations.  A convenience function allows swtnl
+ * to split the drawing and vbos internally without imposing its
+ * limitations on drivers which want to use it as a fallback path.
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "mtypes.h"
+
+#include "vbo_split.h"
+#include "vbo.h"
+
+/* True if a primitive can be split without copying of vertices, false
+ * otherwise.
+ */
+GLboolean split_prim_inplace(GLenum mode, GLuint *first, GLuint *incr)
+{
+   switch (mode) {
+   case GL_POINTS:
+      *first = 1;
+      *incr = 1;
+      return GL_TRUE;
+   case GL_LINES:
+      *first = 2;
+      *incr = 2;
+      return GL_TRUE;
+   case GL_LINE_STRIP:
+      *first = 2;
+      *incr = 1;
+      return GL_TRUE;
+   case GL_TRIANGLES:
+      *first = 3;
+      *incr = 3;
+      return GL_TRUE;
+   case GL_TRIANGLE_STRIP:
+      *first = 3;
+      *incr = 1;
+      return GL_TRUE;
+   case GL_QUADS:
+      *first = 4;
+      *incr = 4;
+      return GL_TRUE;
+   case GL_QUAD_STRIP:
+      *first = 4;
+      *incr = 2;
+      return GL_TRUE;
+   default:
+      *first = 0;
+      *incr = 1;		/* so that count % incr works */
+      return GL_FALSE;
+   }
+}
+
+
+
+void vbo_split_prims( GLcontext *ctx,
+		      const struct gl_client_array *arrays[],
+		      const struct _mesa_prim *prim,
+		      GLuint nr_prims,
+		      const struct _mesa_index_buffer *ib,
+		      GLuint min_index,
+		      GLuint max_index,
+		      vbo_draw_func draw,
+		      const struct split_limits *limits )
+{
+  
+   if (ib) {
+      if (limits->max_indices == 0) {
+	 /* Could traverse the indices, re-emitting vertices in turn.
+	  * But it's hard to see why this case would be needed - for
+	  * software tnl, it is better to convert to non-indexed
+	  * rendering after transformation is complete, as is done in
+	  * the t_dd_rendertmp.h templates.  Are there any devices
+	  * with hardware tnl that cannot do indexed rendering?
+	  *
+	  * For now, this path is disabled.
+	  */
+	 assert(0);
+      }
+      else if (max_index - min_index > limits->max_verts) {
+	 /* The vertex buffers are too large for hardware (or the
+	  * swtnl module).  Traverse the indices, re-emitting vertices
+	  * in turn.  Use a vertex cache to preserve some of the
+	  * sharing from the original index list.
+	  */
+	 vbo_split_copy(ctx, arrays, prim, nr_prims, ib,
+			draw, limits );
+      }
+      else if (ib->count > limits->max_indices) {
+	 /* The index buffer is too large for hardware.  Try to split
+	  * on whole-primitive boundaries, otherwise try to split the
+	  * individual primitives.
+	  */
+	 vbo_split_inplace(ctx, arrays, prim, nr_prims, ib,
+			   min_index, max_index, draw, limits );
+      }
+      else {
+	 /* Why were we called? */
+	 assert(0);
+      }
+   }
+   else {
+      if (max_index - min_index >= limits->max_verts) {
+	 /* The vertex buffer is too large for hardware (or the swtnl
+	  * module).  Try to split on whole-primitive boundaries,
+	  * otherwise try to split the individual primitives.
+	  */
+	 vbo_split_inplace(ctx, arrays, prim, nr_prims, ib,
+			   min_index, max_index, draw, limits );
+      }
+      else {
+	 /* Why were we called? */
+	 assert(0);
+      }
+   }
+}
+
diff --git a/src/mesa/vbo/vbo_split.h b/src/mesa/vbo/vbo_split.h
new file mode 100644
index 00000000000..05888d048cb
--- /dev/null
+++ b/src/mesa/vbo/vbo_split.h
@@ -0,0 +1,72 @@
+/*
+ * mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file vbo_context.h
+ * \brief VBO builder module datatypes and definitions.
+ * \author Keith Whitwell
+ */
+
+
+/**
+ * \mainpage The VBO splitter
+ *
+ * This is the private data used internally to the vbo_split_prims()
+ * helper function.  Nobody outside the vbo_split* files needs to
+ * include or know about this structure.
+ */
+
+
+#ifndef _VBO_SPLIT_H
+#define _VBO_SPLIT_H
+
+#include "vbo.h"
+
+
+/* True if a primitive can be split without copying of vertices, false
+ * otherwise.
+ */
+GLboolean split_prim_inplace(GLenum mode, GLuint *first, GLuint *incr);
+
+void vbo_split_inplace( GLcontext *ctx,
+			const struct gl_client_array *arrays[],
+			const struct _mesa_prim *prim,
+			GLuint nr_prims,
+			const struct _mesa_index_buffer *ib,
+			GLuint min_index,
+			GLuint max_index,
+			vbo_draw_func draw,
+			const struct split_limits *limits );
+
+/* Requires ib != NULL:
+ */
+void vbo_split_copy( GLcontext *ctx,
+		     const struct gl_client_array *arrays[],
+		     const struct _mesa_prim *prim,
+		     GLuint nr_prims,
+		     const struct _mesa_index_buffer *ib,
+		     vbo_draw_func draw,
+		     const struct split_limits *limits );
+
+#endif
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
new file mode 100644
index 00000000000..0adad71732f
--- /dev/null
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -0,0 +1,549 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <[email protected]>
+ */
+
+/* Split indexed primitives with per-vertex copying.
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "macros.h"
+#include "enums.h"
+#include "mtypes.h"
+
+#include "vbo_split.h"
+#include "vbo.h"
+
+
+#define ELT_TABLE_SIZE 16
+
+/* Used for vertex-level splitting of indexed buffers.  Note that
+ * non-indexed primitives may be converted to indexed in some cases
+ * (eg loops, fans) in order to use this splitting path.
+ */
+struct copy_context {
+
+   GLcontext *ctx;
+   const struct gl_client_array **array;
+   const struct _mesa_prim *prim;
+   GLuint nr_prims;
+   const struct _mesa_index_buffer *ib;
+   vbo_draw_func draw;
+
+   const struct split_limits *limits;
+
+   struct {
+      GLuint attr;
+      GLuint size;
+      const struct gl_client_array *array;
+      const GLubyte *src_ptr;
+
+      struct gl_client_array dstarray;
+
+   } varying[VERT_ATTRIB_MAX];
+   GLuint nr_varying;
+
+   const struct gl_client_array *dstarray_ptr[VERT_ATTRIB_MAX];
+   struct _mesa_index_buffer dstib;
+
+   GLuint *translated_elt_buf;
+   const GLuint *srcelt;
+
+   /* A baby hash table to avoid re-emitting (some) duplicate
+    * vertices when splitting indexed primitives.
+    */
+   struct { 
+      GLuint in;
+      GLuint out;
+   } vert_cache[ELT_TABLE_SIZE];
+      
+
+   GLuint vertex_size;
+   GLubyte *dstbuf;
+   GLubyte *dstptr;		/* dstptr == dstbuf + dstelt_max * vertsize */
+   GLuint dstbuf_size;	/* in vertices */
+   GLuint dstbuf_nr;		/* count of emitted vertices, also the
+				 * largest value in dstelt.  Our
+				 * MaxIndex.
+				 */
+
+   GLuint *dstelt;
+   GLuint dstelt_nr;
+   GLuint dstelt_size;
+
+#define MAX_PRIM 32
+   struct _mesa_prim dstprim[MAX_PRIM];
+   GLuint dstprim_nr;
+
+};
+
+
+static GLuint type_size( GLenum type )
+{
+   switch(type) {
+   case GL_BYTE: return sizeof(GLbyte);
+   case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
+   case GL_SHORT: return sizeof(GLshort);
+   case GL_UNSIGNED_SHORT: return sizeof(GLushort);
+   case GL_INT: return sizeof(GLint);
+   case GL_UNSIGNED_INT: return sizeof(GLuint);
+   case GL_FLOAT: return sizeof(GLfloat);
+   case GL_DOUBLE: return sizeof(GLdouble);
+   default: return 0;
+   }
+}
+
+static GLuint attr_size( const struct gl_client_array *array )
+{
+   return array->Size * type_size(array->Type);
+}
+
+
+/* Starts returning true slightly before the buffer fills, to ensure
+ * that there is sufficient room for any remaining vertices to finish
+ * off the prim:
+ */
+static GLboolean check_flush( struct copy_context *copy )
+{
+   if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
+      return GL_TRUE;
+
+   if (copy->dstelt_nr + 4 > copy->dstelt_size)
+      return GL_TRUE;
+
+   return GL_FALSE;
+}
+
+static void flush( struct copy_context *copy )
+{
+   GLuint i;
+
+   /* Set some counters: 
+    */
+   copy->dstib.count = copy->dstelt_nr;
+
+   copy->draw( copy->ctx,
+	       copy->dstarray_ptr,
+	       copy->dstprim,
+	       copy->dstprim_nr,
+	       &copy->dstib,
+	       0,
+	       copy->dstbuf_nr );
+
+   /* Reset all pointers: 
+    */
+   copy->dstprim_nr = 0;
+   copy->dstelt_nr = 0;
+   copy->dstbuf_nr = 0;
+   copy->dstptr = copy->dstbuf;
+
+   /* Clear the vertex cache:
+    */
+   for (i = 0; i < ELT_TABLE_SIZE; i++)
+      copy->vert_cache[i].in = ~0;
+}
+
+
+
+static void begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
+{
+   struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
+
+   _mesa_printf("begin %s (%d)\n", _mesa_lookup_enum_by_nr(mode), begin_flag);
+		
+   prim->mode = mode;
+   prim->begin = begin_flag;
+}
+
+
+/* Use a hashtable to attempt to identify recently-emitted vertices
+ * and avoid re-emitting them.
+ */
+static GLuint elt(struct copy_context *copy, GLuint elt_idx)
+{
+   GLuint elt = copy->srcelt[elt_idx];
+   GLuint slot = elt & (ELT_TABLE_SIZE-1);
+
+   _mesa_printf("elt %d\n", elt);
+
+   /* Look up the incoming element in the vertex cache.  Re-emit if
+    * necessary.   
+    */
+   if (copy->vert_cache[slot].in != elt) {
+      GLubyte *csr = copy->dstptr;
+      GLuint i;
+
+      _mesa_printf("  --> emit to dstelt %d\n", copy->dstbuf_nr);
+
+      for (i = 0; i < copy->nr_varying; i++) {
+	 const struct gl_client_array *srcarray = copy->varying[i].array;
+	 const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB;
+
+	 memcpy(csr, srcptr, copy->varying[i].size);
+	 csr += copy->varying[i].size;
+
+	 {
+	    const GLuint *f = (const GLuint *)srcptr;
+	    GLuint j;
+	    _mesa_printf("  varying %d: ", i);
+	    for(j = 0; j < copy->varying[i].size / 4; j++)
+	       _mesa_printf("%x ", f[j]);
+	    _mesa_printf("\n");
+	 }
+	       
+      }
+
+      copy->vert_cache[slot].in = elt;
+      copy->vert_cache[slot].out = copy->dstbuf_nr++;
+      copy->dstptr += copy->vertex_size;
+
+      assert(csr == copy->dstptr);
+      assert(copy->dstptr == (copy->dstbuf + 
+				    copy->dstbuf_nr * 
+				    copy->vertex_size));
+   }
+   else
+      _mesa_printf("  --> reuse vertex\n");
+   
+   _mesa_printf("  --> emit %d\n", copy->vert_cache[slot].out);
+   copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
+   return check_flush(copy);
+}
+
+static void end( struct copy_context *copy, GLboolean end_flag )
+{
+   struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
+
+   _mesa_printf("end (%d)\n", end_flag);
+
+   prim->end = end_flag;
+   prim->count = copy->dstelt_nr - prim->start;
+
+   if (++copy->dstprim_nr == MAX_PRIM ||
+       check_flush(copy)) 
+      flush(copy);
+}
+
+
+
+static void replay_elts( struct copy_context *copy )
+{
+   GLuint i, j, k;
+   GLboolean split;
+
+   for (i = 0; i < copy->nr_prims; i++) {
+      const struct _mesa_prim *prim = &copy->prim[i];
+      const GLuint start = prim->start;
+      GLuint first, incr;
+
+      switch (prim->mode) {
+	 
+      case GL_LINE_LOOP:
+	 /* Convert to linestrip and emit the final vertex explicitly,
+	  * but only in the resultant strip that requires it.
+	  */
+	 j = 0;
+	 while (j != prim->count) {
+	    begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
+
+	    for (split = GL_FALSE; j != prim->count && !split; j++)
+	       split = elt(copy, start + j);
+
+	    if (j == prim->count) {
+	       /* Done, emit final line.  Split doesn't matter as
+		* it is always raised a bit early so we can emit
+		* the last verts if necessary!
+		*/
+	       if (prim->end) 
+		  (void)elt(copy, start + 0);
+
+	       end(copy, prim->end);
+	    }
+	    else {
+	       /* Wrap
+		*/
+	       assert(split);
+	       end(copy, 0);
+	       j--;
+	    }
+	 }
+	 break;
+
+      case GL_TRIANGLE_FAN:
+      case GL_POLYGON:
+	 j = 2;
+	 while (j != prim->count) {
+	    begin(copy, prim->mode, prim->begin && j == 0);
+
+	    split = elt(copy, start+0); 
+	    assert(!split);
+
+	    split = elt(copy, start+j-1); 
+	    assert(!split);
+
+	    for (; j != prim->count && !split; j++)
+	       split = elt(copy, start+j);
+
+	    end(copy, prim->end && j == prim->count);
+
+	    if (j != prim->count) {
+	       /* Wrapped the primitive, need to repeat some vertices:
+		*/
+	       j -= 1;
+	    }
+	 }
+	 break;
+
+      default:
+	 (void)split_prim_inplace(prim->mode, &first, &incr);
+	 
+	 j = 0;
+	 while (j != prim->count) {
+
+	    begin(copy, prim->mode, prim->begin && j == 0);
+
+	    split = 0;
+	    for (k = 0; k < first; k++, j++)
+	       split |= elt(copy, start+j);
+
+	    assert(!split);
+
+	    for (; j != prim->count && !split; )
+	       for (k = 0; k < incr; k++, j++)
+		  split |= elt(copy, start+j);
+
+	    end(copy, prim->end && j == prim->count);
+
+	    if (j != prim->count) {
+	       /* Wrapped the primitive, need to repeat some vertices:
+		*/
+	       assert(j > first - incr);
+	       j -= (first - incr);
+	    }
+	 }
+	 break;
+      }
+   }
+
+   if (copy->dstprim_nr)
+      flush(copy);
+}
+
+
+static void replay_init( struct copy_context *copy )
+{
+   GLcontext *ctx = copy->ctx;
+   GLuint i;
+   GLuint offset;
+
+   /* Make a list of varying attributes and their vbo's.  Also
+    * calculate vertex size.
+    */
+   copy->vertex_size = 0;
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      struct gl_buffer_object *vbo = copy->array[i]->BufferObj;
+
+      if (copy->array[i]->StrideB == 0) {
+	 copy->dstarray_ptr[i] = copy->array[i];
+      }
+      else {
+	 GLuint j = copy->nr_varying++;
+	 
+	 copy->varying[j].attr = i;
+	 copy->varying[j].array = copy->array[i];
+	 copy->varying[j].size = attr_size(copy->array[i]);
+	 copy->vertex_size += attr_size(copy->array[i]);
+      
+	 if (vbo->Name && !vbo->Pointer) 
+	    ctx->Driver.MapBuffer(ctx,
+				  GL_ARRAY_BUFFER_ARB, 
+				  GL_DYNAMIC_DRAW_ARB, /* XXX */
+				  vbo);
+
+	 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
+						 copy->array[i]->Ptr);
+
+	 copy->dstarray_ptr[i] = &copy->varying[j].dstarray;
+      }
+   }
+
+   /* There must always be an index buffer.  Currently require the
+    * caller convert non-indexed prims to indexed.  Could alternately
+    * do it internally.
+    */
+   if (copy->ib->obj->Name && !copy->ib->obj->Pointer) 
+      ctx->Driver.MapBuffer(ctx, 
+			    GL_ARRAY_BUFFER_ARB, /* XXX */
+			    GL_DYNAMIC_DRAW_ARB, /* XXX */
+			    copy->ib->obj);
+
+   switch (copy->ib->type) {
+   case GL_UNSIGNED_BYTE:
+      copy->translated_elt_buf = _mesa_malloc(sizeof(GLuint) * copy->ib->count);
+      copy->srcelt = copy->translated_elt_buf;
+      
+      for (i = 0; i < copy->ib->count; i++)
+	 copy->translated_elt_buf[i] = ((const GLubyte *)copy->ib->ptr)[i];
+      break;
+
+   case GL_UNSIGNED_SHORT:
+      copy->translated_elt_buf = _mesa_malloc(sizeof(GLuint) * copy->ib->count);
+      copy->srcelt = copy->translated_elt_buf;
+
+      for (i = 0; i < copy->ib->count; i++)
+	 copy->translated_elt_buf[i] = ((const GLushort *)copy->ib->ptr)[i];
+      break;
+
+   case GL_UNSIGNED_INT:
+      copy->translated_elt_buf = NULL;
+      copy->srcelt = (const GLuint *)ADD_POINTERS(copy->ib->obj->Pointer, 
+						  copy->ib->ptr);
+      break;
+   }
+   
+
+   /* Figure out the maximum allowed vertex buffer size:
+    */
+   if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
+      copy->dstbuf_size = copy->limits->max_verts;
+   }
+   else {
+      copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
+   }
+
+   /* Allocate an output vertex buffer:
+    *
+    * XXX:  This should be a VBO!
+    */
+   copy->dstbuf = _mesa_malloc(copy->dstbuf_size * 
+			       copy->vertex_size);   
+   copy->dstptr = copy->dstbuf;
+
+   /* Setup new vertex arrays to point into the output buffer: 
+    */
+   for (offset = 0, i = 0; i < copy->nr_varying; i++) {
+      const struct gl_client_array *src = copy->varying[i].array;
+      struct gl_client_array *dst = &copy->varying[i].dstarray;
+
+      dst->Size = src->Size;
+      dst->Type = src->Type;
+      dst->Stride = copy->vertex_size;
+      dst->StrideB = copy->vertex_size;
+      dst->Ptr = copy->dstbuf + offset;
+      dst->Enabled = GL_TRUE;
+      dst->Normalized = GL_TRUE;
+      dst->BufferObj = ctx->Array.NullBufferObj;
+      dst->_MaxElement = copy->dstbuf_size; /* may be less! */
+
+      offset += copy->varying[i].size;
+   }
+
+   /* Allocate an output element list:
+    */
+   copy->dstelt_size = MIN2(65536,
+			    copy->ib->count * 2);
+   copy->dstelt_size = MIN2(copy->dstelt_size,
+			    copy->limits->max_indices);
+   copy->dstelt = _mesa_malloc(copy->dstelt_size);
+   copy->dstelt_nr = 0;
+
+   /* Setup the new index buffer to point to the allocated element
+    * list:
+    */
+   copy->dstib.count = 0;	/* duplicates dstelt_nr */
+   copy->dstib.type = GL_UNSIGNED_INT;
+   copy->dstib.obj = ctx->Array.NullBufferObj;
+   copy->dstib.ptr = copy->dstelt;
+   copy->dstib.rebase = 0;	
+}
+
+
+static void replay_finish( struct copy_context *copy )
+{
+   GLcontext *ctx = copy->ctx;
+   GLuint i;
+
+   /* Free our vertex and index buffers: 
+    */
+   _mesa_free(copy->translated_elt_buf);
+   _mesa_free(copy->dstbuf);
+   _mesa_free(copy->dstelt);
+   
+   /* Unmap VBO's 
+    */
+   for (i = 0; i < copy->nr_varying; i++) {
+      struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
+
+      if (vbo->Name && vbo->Pointer) 
+	 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo);
+   }
+
+   /* Unmap index buffer:
+    */
+   if (copy->ib->obj->Name && copy->ib->obj->Pointer) {
+      ctx->Driver.UnmapBuffer(ctx, 
+			      GL_ARRAY_BUFFER_ARB, /* XXX */
+			      copy->ib->obj);
+   }
+}
+
+void vbo_split_copy( GLcontext *ctx,
+		     const struct gl_client_array *arrays[],
+		     const struct _mesa_prim *prim,
+		     GLuint nr_prims,
+		     const struct _mesa_index_buffer *ib,
+		     vbo_draw_func draw,
+		     const struct split_limits *limits )
+{
+   struct copy_context copy;
+   GLuint i;
+
+   memset(&copy, 0, sizeof(copy));
+
+   /* Require indexed primitives:
+    */
+   assert(ib);
+   
+   copy.ctx = ctx;
+   copy.array = arrays;
+   copy.prim = prim;
+   copy.nr_prims = nr_prims;
+   copy.ib = ib;
+   copy.draw = draw;
+   copy.limits = limits;
+
+
+   /* Clear the vertex cache:
+    */
+   for (i = 0; i < ELT_TABLE_SIZE; i++)
+      copy.vert_cache[i].in = ~0;
+
+
+   replay_init(&copy);
+   replay_elts(&copy);
+   replay_finish(&copy);
+}
diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c
new file mode 100644
index 00000000000..d3649c59db3
--- /dev/null
+++ b/src/mesa/vbo/vbo_split_inplace.c
@@ -0,0 +1,287 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <[email protected]>
+ */
+
+
+#include "mtypes.h"
+#include "macros.h"
+#include "enums.h"
+#include "vbo_split.h"
+
+
+#define MAX_PRIM 32
+
+/* Used for splitting without copying.
+ */
+struct split_context {
+   GLcontext *ctx;
+   const struct gl_client_array **array;
+   const struct _mesa_prim *prim;
+   GLuint nr_prims;
+   const struct _mesa_index_buffer *ib;
+   GLuint min_index;
+   GLuint max_index;
+   vbo_draw_func draw;
+
+   const struct split_limits *limits;
+
+   struct _mesa_prim dstprim[MAX_PRIM];
+   GLuint dstprim_nr;
+};
+
+
+
+
+static void flush_vertex( struct split_context *split )
+{
+   GLint min_index, max_index;
+
+   if (!split->dstprim_nr) 
+      return;
+
+   if (split->ib) {
+      /* This should basically be multipass rendering over the same
+       * unchanging set of VBO's.  Would like the driver not to
+       * re-upload the data, or swtnl not to re-transform the
+       * vertices.
+       */
+      assert(split->max_index - split->min_index < split->limits->max_verts);
+      min_index = split->min_index;
+      max_index = split->max_index;
+   }
+   else {
+      /* Non-indexed rendering.  Cannot assume that the primitives are
+       * ordered by increasing vertex, because of entrypoints like
+       * MultiDrawArrays.
+       */
+      GLuint i;
+      min_index = split->dstprim[0].start;
+      max_index = min_index + split->dstprim[0].count - 1;
+
+      for (i = 1; i < split->dstprim_nr; i++) {
+	 GLuint tmp_min = split->dstprim[i].start;
+	 GLuint tmp_max = tmp_min + split->dstprim[i].count - 1;
+
+	 if (tmp_min < min_index) 
+	    min_index = tmp_min;
+
+	 if (tmp_max > max_index) 
+	    max_index = tmp_max;
+      }
+   }
+
+   assert(max_index >= min_index);
+
+   split->draw( split->ctx, 
+		split->array, 
+		split->dstprim,
+		split->dstprim_nr,
+		NULL,
+		min_index,
+		max_index);
+
+   split->dstprim_nr = 0;
+}
+
+
+static struct _mesa_prim *next_outprim( struct split_context *split )
+{
+   if (split->dstprim_nr == MAX_PRIM-1) {
+      flush_vertex(split);
+   }
+
+   {
+      struct _mesa_prim *prim = &split->dstprim[split->dstprim_nr++];
+      memset(prim, 0, sizeof(*prim));
+      return prim;
+   }
+}
+
+static int align(int value, int alignment)
+{
+   return (value + alignment - 1) & ~(alignment - 1);
+}
+
+
+
+/* Break large primitives into smaller ones.  If not possible, convert
+ * the primitive to indexed and pass to split_elts().
+ */
+static void split_prims( struct split_context *split) 
+{
+   GLuint csr = 0;
+   GLuint i;
+
+   for (i = 0; i < split->nr_prims; i++) {
+      const struct _mesa_prim *prim = &split->prim[i];
+      GLuint first, incr;
+      GLboolean split_inplace = split_prim_inplace(prim->mode, &first, &incr);
+      GLuint count;
+
+      /* Always wrap on an even numbered vertex to avoid problems with
+       * triangle strips.  
+       */
+      GLuint available = align(split->limits->max_verts - csr - 1, 2); 
+      assert(split->limits->max_verts >= csr);
+
+      if (prim->count < first)
+	 continue;
+      
+      count = prim->count - (prim->count - first) % incr; 
+
+
+      if ((available < count && !split_inplace) || 
+	  (available < first && split_inplace)) {
+	 flush_vertex(split);
+	 csr = 0;
+	 available = align(split->limits->max_verts - csr - 1, 2);
+      }
+      
+      if (available >= count) {
+	 struct _mesa_prim *outprim = next_outprim(split);
+	 *outprim = *prim;
+	 csr += prim->count;
+	 available = align(split->limits->max_verts - csr - 1, 2); 
+      } 
+      else if (split_inplace) {
+	 GLuint j, nr;
+
+
+	 for (j = 0 ; j < count ; ) {
+	    GLuint remaining = count - j;
+	    struct _mesa_prim *outprim = next_outprim(split);
+
+	    nr = MIN2( available, remaining );
+	    
+	    nr -= (nr - first) % incr;
+	    
+	    outprim->mode = prim->mode;
+	    outprim->begin = (j == 0 && prim->begin);
+	    outprim->end = (nr == remaining && prim->end);
+	    outprim->start = prim->start + j;
+	    outprim->count = nr;
+	    
+	    if (nr == remaining) {
+	       /* Finished. 
+		*/
+	       j += nr;		
+	       csr += nr;
+	       available = align(split->limits->max_verts - csr - 1, 2); 
+	    }
+	    else {
+	       /* Wrapped the primitive: 
+		*/
+	       j += nr - (first - incr);
+	       flush_vertex(split);
+	       csr = 0;
+	       available = align(split->limits->max_verts - csr - 1, 2); 
+	    }
+	 }
+      }
+      else if (split->ib == NULL) {
+	 /* XXX: could at least send the first max_verts off from the
+	  * inplace buffers.
+	  */
+
+	 /* else convert to indexed primitive and pass to split_elts,
+	  * which will do the necessary copying and turn it back into a
+	  * vertex primitive for rendering...
+	  */
+	 struct _mesa_index_buffer ib;
+	 struct _mesa_prim tmpprim;
+	 GLuint *elts = malloc(count * sizeof(GLuint));
+	 GLuint j;
+	 
+	 for (j = 0; j < count; j++)
+	    elts[j] = prim->start + j;
+
+	 ib.count = count;
+	 ib.type = GL_UNSIGNED_INT;
+	 ib.obj = split->ctx->Array.NullBufferObj;
+	 ib.ptr = elts;
+	 ib.rebase = 0;		/* ? */
+	    
+	 tmpprim = *prim;
+	 tmpprim.indexed = 1;
+	 tmpprim.start = 0;
+	 tmpprim.count = count;
+
+	 flush_vertex(split);
+
+	 vbo_split_copy(split->ctx,
+			split->array,
+			&tmpprim, 1, 
+			&ib,
+			split->draw,
+			split->limits);
+	    
+	 free(elts);
+      }
+      else {
+	 flush_vertex(split);
+
+	 vbo_split_copy(split->ctx,
+			split->array,
+			prim, 1, 
+			split->ib,
+			split->draw,
+			split->limits);
+      }
+   }
+
+   flush_vertex(split);
+}
+
+
+void vbo_split_inplace( GLcontext *ctx,
+			const struct gl_client_array *arrays[],
+			const struct _mesa_prim *prim,
+			GLuint nr_prims,
+			const struct _mesa_index_buffer *ib,
+			GLuint min_index,
+			GLuint max_index,
+			vbo_draw_func draw,
+			const struct split_limits *limits )
+{
+   struct split_context split;
+
+   memset(&split, 0, sizeof(split));
+
+   split.ctx = ctx;
+   split.array = arrays;
+   split.prim = prim;
+   split.nr_prims = nr_prims;
+   split.ib = ib;
+   split.min_index = min_index;
+   split.max_index = max_index;
+   split.draw = draw;
+   split.limits = limits;
+
+   split_prims( &split );
+}
+
+