6 files changed, 208 insertions, 71 deletions
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 28ba470d8c7..aa096692888 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -242,6 +242,11 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
 }
 
 
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+				int index_bias)
+{
+   hwtnl->index_bias = index_bias;
+}
 
 
 
@@ -265,15 +270,16 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
          unsigned size = vb ? vb->width0 : 0;
          unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
          unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
-         unsigned index_bias = range->indexBias;
+         int index_bias = (int) range->indexBias + hwtnl->index_bias;
          unsigned width;
 
          assert(vb);
          assert(size);
          assert(offset < size);
-         assert(index_bias >= 0);
          assert(min_index <= max_index);
-         assert(offset + index_bias*stride < size);
+         if (index_bias >= 0) {
+            assert(offset + index_bias*stride < size);
+         }
          if (min_index != ~0) {
             assert(offset + (index_bias + min_index) * stride < size);
          }
@@ -394,6 +400,7 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
    hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
 
    hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+   hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
 
    pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
    hwtnl->cmd.prim_count++;
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h
index a2403d802be..1dac17421e1 100644
--- a/src/gallium/drivers/svga/svga_draw.h
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -79,5 +79,8 @@ svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
 enum pipe_error
 svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
 
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+                                int index_bias);
+
 
 #endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
index ca658ac6745..8126f7ee23c 100644
--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -116,6 +116,13 @@ struct draw_cmd {
 struct svga_hwtnl {
    struct svga_context *svga;
    struct u_upload_mgr *upload_ib;
+
+   /* Additional negative index bias due to partial buffer uploads
+    * This is compensated for in the offset associated with all
+    * vertex buffers.
+    */
+
+   int index_bias;
    
    /* Flatshade information:
     */
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index a632fb12c94..8e1c764ef5f 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -37,6 +37,116 @@
 #include "svga_state.h"
 #include "svga_swtnl.h"
 #include "svga_debug.h"
+#include "svga_resource_buffer.h"
+#include "util/u_upload_mgr.h"
+
+/**
+ * svga_upload_user_buffers - upload parts of user buffers
+ *
+ * This function streams a part of a user buffer to hw and sets
+ * svga_buffer::source_offset to the first byte uploaded. After upload
+ * also svga_buffer::uploaded::buffer is set to !NULL
+ */
+
+static int
+svga_upload_user_buffers(struct svga_context *svga,
+                         unsigned start,
+                         unsigned count,
+                         unsigned instance_count)
+{
+   const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+   unsigned i;
+   int ret;
+
+   for (i=0; i < svga->curr.velems->count; i++) {
+      struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
+
+      if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+         struct svga_buffer *buffer = svga_buffer(vb->buffer);
+         unsigned first, size;
+         boolean flushed;
+         unsigned instance_div = ve[i].instance_divisor;
+
+         svga->dirty |= SVGA_NEW_VBUFFER;
+
+         if (instance_div) {
+            first = 0;
+            size = vb->stride *
+               (instance_count + instance_div - 1) / instance_div;
+         } else if (vb->stride) {
+            first = vb->stride * start;
+            size = vb->stride * count;
+         } else {
+            /* Only a single vertex!
+             * Upload with the largest vertex size the hw supports,
+             * if possible.
+             */
+            first = 0;
+            size = MIN2(16, vb->buffer->width0);
+         }
+
+         ret = u_upload_buffer( svga->upload_vb,
+                                0, first, size,
+                                &buffer->b.b,
+                                &buffer->uploaded.offset,
+                                &buffer->uploaded.buffer,
+                                &flushed);
+
+         if (ret)
+            return ret;
+
+         if (0)
+            debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d"
+                         " sz %d\n",
+                         __FUNCTION__,
+                         i,
+                         buffer,
+                         buffer->uploaded.buffer,
+                         buffer->uploaded.offset,
+                         first,
+                         size);
+
+         vb->buffer_offset = buffer->uploaded.offset;
+         buffer->source_offset = first;
+      }
+   }
+
+   return PIPE_OK;
+}
+
+/**
+ * svga_release_user_upl_buffers - release uploaded parts of user buffers
+ *
+ * This function releases the hw copy of the uploaded fraction of the
+ * user-buffer. It's important to do this as soon as all draw calls
+ * affecting the uploaded fraction are issued, as this allows for
+ * efficient reuse of the hardware surface backing the uploaded fraction.
+ *
+ * svga_buffer::source_offset is set to 0, and svga_buffer::uploaded::buffer
+ * is set to 0.
+ */
+
+static void
+svga_release_user_upl_buffers(struct svga_context *svga)
+{
+   unsigned i;
+   unsigned nr;
+
+   nr = svga->curr.num_vertex_buffers;
+
+   for (i = 0; i < nr; ++i) {
+      struct pipe_vertex_buffer *vb = &svga->curr.vb[i];
+
+      if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+         struct svga_buffer *buffer = svga_buffer(vb->buffer);
+
+         buffer->source_offset = 0;
+         if (buffer->uploaded.buffer)
+            pipe_resource_reference(&buffer->uploaded.buffer, NULL);
+      }
+   }
+}
 
 
 
@@ -50,6 +160,7 @@ retry_draw_range_elements( struct svga_context *svga,
                            unsigned prim, 
                            unsigned start, 
                            unsigned count,
+                           unsigned instance_count,
                            boolean do_retry )
 {
    enum pipe_error ret = 0;
@@ -61,6 +172,10 @@ retry_draw_range_elements( struct svga_context *svga,
                              svga->curr.rast->templ.flatshade,
                              svga->curr.rast->templ.flatshade_first );
 
+   ret = svga_upload_user_buffers( svga, min_index + index_bias,
+                                   max_index - min_index + 1, instance_count );
+   if (ret != PIPE_OK)
+      goto retry;
 
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret)
@@ -84,7 +199,7 @@ retry:
                                         index_buffer, index_size, index_bias,
                                         min_index, max_index,
                                         prim, start, count,
-                                        FALSE );
+                                        instance_count, FALSE );
    }
 
    return ret;
@@ -96,6 +211,7 @@ retry_draw_arrays( struct svga_context *svga,
                    unsigned prim, 
                    unsigned start, 
                    unsigned count,
+                   unsigned instance_count,
                    boolean do_retry )
 {
    enum pipe_error ret;
@@ -107,6 +223,11 @@ retry_draw_arrays( struct svga_context *svga,
                              svga->curr.rast->templ.flatshade,
                              svga->curr.rast->templ.flatshade_first );
 
+   ret = svga_upload_user_buffers( svga, start, count, instance_count );
+
+   if (ret != PIPE_OK)
+      goto retry;
+
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret)
       goto retry;
@@ -127,6 +248,7 @@ retry:
                                 prim,
                                 start,
                                 count,
+                                instance_count,
                                 FALSE );
    }
 
@@ -183,6 +305,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
          svga_context_flush(svga, NULL);
       }
 
+      /* Avoid leaking the previous hwtnl bias to swtnl */
+      svga_hwtnl_set_index_bias( svga->hwtnl, 0 );
       ret = svga_swtnl_draw_vbo( svga, info );
    }
    else {
@@ -201,6 +325,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                           info->mode,
                                           info->start + offset,
                                           info->count,
+                                          info->instance_count,
                                           TRUE );
       }
       else {
@@ -208,10 +333,13 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                   info->mode,
                                   info->start,
                                   info->count,
+                                  info->instance_count,
                                   TRUE );
       }
    }
 
+   svga_release_user_upl_buffers( svga );
+
    if (SVGA_DEBUG & DEBUG_FLUSH) {
       svga_hwtnl_flush_retry( svga );
       svga_context_flush(svga, NULL);
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h
index 95032213fa5..2ae44d2a5e9 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -132,6 +132,13 @@ struct svga_buffer
    } uploaded;
 
    /**
+    * For user buffers, this is the offset to the data about to be
+    * referenced by the next draw command, and hence the data that needs
+    * to be uploaded.
+    */
+   unsigned source_offset;
+
+   /**
     * DMA'ble memory.
     *
     * A piece of GMR memory, with the same size of the buffer. It is created
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
index 7c393a1da8d..2375a022f97 100644
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -38,57 +38,6 @@
 #include "svga_hw_reg.h"
 
 
-static int
-upload_user_buffers( struct svga_context *svga )
-{
-   enum pipe_error ret = PIPE_OK;
-   int i;
-   int nr;
-
-   if (0) 
-      debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers);
-
-   nr = svga->curr.num_vertex_buffers;
-
-   for (i = 0; i < nr; i++) 
-   {
-      if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer))
-      {
-         struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer);
-
-         if (!buffer->uploaded.buffer) {
-            boolean flushed;
-            ret = u_upload_buffer( svga->upload_vb,
-                                   0, 0,
-                                   buffer->b.b.width0,
-                                   &buffer->b.b,
-                                   &buffer->uploaded.offset,
-                                   &buffer->uploaded.buffer,
-                                   &flushed);
-            if (ret)
-               return ret;
-
-            if (0)
-               debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n",
-                            __FUNCTION__,
-                            i,
-                            buffer,
-                            buffer->uploaded.buffer,
-                            buffer->uploaded.offset,
-                            buffer->b.b.width0);
-         }
-
-         svga->curr.vb[i].buffer_offset = buffer->uploaded.offset;
-      }
-   }
-
-   if (0)
-      debug_printf("%s: DONE\n", __FUNCTION__);
-
-   return ret;
-}
-
-
 /***********************************************************************
  */
 
@@ -99,6 +48,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
    const struct pipe_vertex_element *ve = svga->curr.velems->velem;
    SVGA3dVertexDecl decl;
    unsigned i;
+   unsigned neg_bias = 0;
 
    assert(svga->curr.velems->count >=
           svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
@@ -106,12 +56,50 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
    svga_hwtnl_reset_vdecl( svga->hwtnl, 
                            svga->curr.velems->count );
 
+   /**
+    * We can't set the VDECL offset to something negative, so we
+    * must calculate a common negative additional index bias, and modify
+    * the VDECL offsets accordingly so they *all* end up positive.
+    *
+    * Note that the exact value of the negative index bias is not that
+    * important, since we compensate for it when we calculate the vertex
+    * buffer offset below. The important thing is that all vertex buffer
+    * offsets remain positive.
+    *
+    * Note that we use a negative bias variable in order to make the
+    * rounding maths more easy to follow, and to avoid int / unsigned
+    * confusion.
+    */
+
    for (i = 0; i < svga->curr.velems->count; i++) {
-      const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
+      const struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
+      struct svga_buffer *buffer;
+      unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+      unsigned tmp_neg_bias = 0;
+
+      if (!vb->buffer)
+         continue;
+
+      buffer = svga_buffer(vb->buffer);
+      if (buffer->source_offset > offset) {
+         tmp_neg_bias = buffer->source_offset - offset;
+         if (vb->stride)
+            tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
+         neg_bias = MAX2(neg_bias, tmp_neg_bias);
+      }
+   }
+
+   for (i = 0; i < svga->curr.velems->count; i++) {
+      const struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
       unsigned usage, index;
-      struct svga_buffer *buffer = svga_buffer(vb->buffer);
+      struct svga_buffer *buffer;
 
+      if (!vb->buffer)
+         continue;
 
+      buffer= svga_buffer(vb->buffer);
       svga_generate_vdecl_semantics( i, &usage, &index );
 
       /* SVGA_NEW_VELEMENT
@@ -121,8 +109,16 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
       decl.identity.usage = usage;
       decl.identity.usageIndex = index;
       decl.array.stride = vb->stride;
-      decl.array.offset = (vb->buffer_offset +
-                           ve[i].src_offset);
+
+      /* Compensate for partially uploaded vbo, and
+       * for the negative index bias.
+       */
+      decl.array.offset = (vb->buffer_offset
+                           + ve[i].src_offset
+			   + neg_bias * vb->stride
+			   - buffer->source_offset);
+
+      assert(decl.array.offset >= 0);
 
       svga_hwtnl_vdecl( svga->hwtnl,
                         i,
@@ -131,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
                         vb->buffer );
    }
 
+   svga_hwtnl_set_index_bias( svga->hwtnl, -neg_bias );
    return 0;
 }
 
@@ -138,23 +135,11 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
 static int emit_hw_vdecl( struct svga_context *svga,
                           unsigned dirty )
 {
-   int ret = 0;
-
    /* SVGA_NEW_NEED_SWTNL
     */
    if (svga->state.sw.need_swtnl)
       return 0; /* Do not emit during swtnl */
 
-   /* If we get to here, we know that we're going to draw.  Upload
-    * userbuffers now and try to combine multiple userbuffers from
-    * multiple draw calls into a single host buffer for performance.
-    */
-   if (svga->curr.any_user_vertex_buffers) {
-      ret = upload_user_buffers( svga );
-      if (ret)
-         return ret;
-   }
-
    return emit_hw_vs_vdecl( svga, dirty );
 }