8 files changed, 186 insertions, 68 deletions
diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c
index 450ae09b345..8c8176e44ac 100644
--- a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c
+++ b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c
@@ -196,6 +196,7 @@ destroy(struct drm_api *api)
 struct drm_api intel_drm_api =
 {
    .name = "i915",
+   .driver_name = "i915",
    .create_context = intel_drm_create_context,
    .create_screen = intel_drm_create_screen,
    .texture_from_shared_handle = intel_drm_texture_from_shared_handle,
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
index 4b2c6a1025e..c9f39d815d8 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
@@ -255,6 +255,7 @@ nouveau_drm_handle_from_pt(struct drm_api *api, struct pipe_screen *pscreen,
 
 struct drm_api drm_api_hooks = {
 	.name = "nouveau",
+	.driver_name = "nouveau",
 	.create_screen = nouveau_drm_create_screen,
 	.create_context = nouveau_drm_create_context,
 	.texture_from_shared_handle = nouveau_drm_pt_from_name,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index 9552f0ad6a9..bff6fdc1ad0 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -270,6 +270,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api,
 
 struct drm_api drm_api_hooks = {
     .name = "radeon",
+    .driver_name = "radeon",
     .create_screen = radeon_create_screen,
     .create_context = radeon_create_context,
     .texture_from_shared_handle = radeon_texture_from_shared_handle,
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.c b/src/gallium/winsys/drm/vmware/core/vmw_context.c
index b6997588de4..b5fd4f5a6a1 100644
--- a/src/gallium/winsys/drm/vmware/core/vmw_context.c
+++ b/src/gallium/winsys/drm/vmware/core/vmw_context.c
@@ -41,9 +41,18 @@
 
 #define VMW_COMMAND_SIZE (64*1024)
 #define VMW_SURFACE_RELOCS (1024)
+#define VMW_REGION_RELOCS (512)
 
 #define VMW_MUST_FLUSH_STACK 8
 
+struct vmw_region_relocation
+{
+   struct SVGAGuestPtr *where;
+   struct pb_buffer *buffer;
+   /* TODO: put offset info inside where */
+   uint32 offset;
+};
+
 struct vmw_svga_winsys_context
 {
    struct svga_winsys_context base;
@@ -69,10 +78,31 @@ struct vmw_svga_winsys_context
       uint32_t staged;
       uint32_t reserved;
    } surface;
+   
+   struct {
+      struct vmw_region_relocation relocs[VMW_REGION_RELOCS];
+      uint32_t size;
+      uint32_t used;
+      uint32_t staged;
+      uint32_t reserved;
+   } region;
 
    struct pb_validate *validate;
 
    uint32_t last_fence;
+
+   /**
+    * The amount of GMR that is referred by the commands currently batched
+    * in the context.
+    */
+   uint32_t seen_regions;
+
+   /**
+    * Whether this context should fail to reserve more commands, not because it
+    * ran out of command space, but because a substantial ammount of GMR was
+    * referred.
+    */
+   boolean preemptive_flush;
 };
 
 
@@ -96,6 +126,19 @@ vmw_swc_flush(struct svga_winsys_context *swc,
    ret = pb_validate_validate(vswc->validate);
    assert(ret == PIPE_OK);
    if(ret == PIPE_OK) {
+   
+      /* Apply relocations */
+      for(i = 0; i < vswc->region.used; ++i) {
+         struct vmw_region_relocation *reloc = &vswc->region.relocs[i];
+         struct SVGAGuestPtr ptr;
+
+         if(!vmw_gmr_bufmgr_region_ptr(reloc->buffer, &ptr))
+            assert(0);
+
+         ptr.offset += reloc->offset;
+
+         *reloc->where = ptr;
+      }
 
       if (vswc->command.used)
          vmw_ioctl_command(vswc->vws,
@@ -121,9 +164,18 @@ vmw_swc_flush(struct svga_winsys_context *swc,
    vswc->surface.used = 0;
    vswc->surface.reserved = 0;
 
+   for(i = 0; i < vswc->region.used + vswc->region.staged; ++i) {
+      pb_reference(&vswc->region.relocs[i].buffer, NULL);
+   }
+
+   vswc->region.used = 0;
+   vswc->region.reserved = 0;
+
 #ifdef DEBUG
    vswc->must_flush = FALSE;
 #endif
+   vswc->preemptive_flush = FALSE;
+   vswc->seen_regions = 0;
 
    if(pfence)
       *pfence = fence;
@@ -151,8 +203,10 @@ vmw_swc_reserve(struct svga_winsys_context *swc,
    if(nr_bytes > vswc->command.size)
       return NULL;
 
-   if(vswc->command.used + nr_bytes > vswc->command.size ||
-      vswc->surface.used + nr_relocs > vswc->surface.size) {
+   if(vswc->preemptive_flush ||
+      vswc->command.used + nr_bytes > vswc->command.size ||
+      vswc->surface.used + nr_relocs > vswc->surface.size ||
+      vswc->region.used + nr_relocs > vswc->region.size) {
 #ifdef DEBUG
       vswc->must_flush = TRUE;
       debug_backtrace_capture(vswc->must_flush_stack, 1,
@@ -163,11 +217,14 @@ vmw_swc_reserve(struct svga_winsys_context *swc,
 
    assert(vswc->command.used + nr_bytes <= vswc->command.size);
    assert(vswc->surface.used + nr_relocs <= vswc->surface.size);
-
+   assert(vswc->region.used + nr_relocs <= vswc->region.size);
+   
    vswc->command.reserved = nr_bytes;
    vswc->surface.reserved = nr_relocs;
    vswc->surface.staged = 0;
-
+   vswc->region.reserved = nr_relocs;
+   vswc->region.staged = 0;
+   
    return vswc->command.buffer + vswc->command.used;
 }
 
@@ -206,20 +263,41 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc,
                           unsigned flags)
 {
    struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
-   struct SVGAGuestPtr ptr;
-   struct pb_buffer *buf = vmw_pb_buffer(buffer);
+   struct vmw_region_relocation *reloc;
    enum pipe_error ret;
+   
+   assert(vswc->region.staged < vswc->region.reserved);
 
-   if(!vmw_gmr_bufmgr_region_ptr(buf, &ptr))
-      assert(0);
-
-   ptr.offset += offset;
+   reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
+   reloc->where = where;
+   pb_reference(&reloc->buffer, vmw_pb_buffer(buffer));
+   reloc->offset = offset;
 
-   *where = ptr;
+   ++vswc->region.staged;
 
-   ret = pb_validate_add_buffer(vswc->validate, buf, flags);
+   ret = pb_validate_add_buffer(vswc->validate, reloc->buffer, flags);
    /* TODO: Update pipebuffer to reserve buffers and not fail here */
    assert(ret == PIPE_OK);
+
+   /*
+    * Flush preemptively the FIFO commands to keep the GMR working set within
+    * the GMR pool size.
+    *
+    * This is necessary for applications like SPECviewperf that generate huge
+    * amounts of immediate vertex data, so that we don't pile up too much of
+    * that vertex data neither in the guest nor in the host.
+    *
+    * Note that in the current implementation if a region is referred twice in
+    * a command stream, it will be accounted twice. We could detect repeated
+    * regions and count only once, but there is no incentive to do that, since
+    * regions are typically short-lived; always referred in a single command;
+    * and at the worst we just flush the commands a bit sooner, which for the
+    * SVGA virtual device it's not a performance issue since flushing commands
+    * to the FIFO won't cause flushing in the host.
+    */
+   vswc->seen_regions += reloc->buffer->base.size;
+   if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/2)
+      vswc->preemptive_flush = TRUE;
 }
 
 
@@ -238,6 +316,12 @@ vmw_swc_commit(struct svga_winsys_context *swc)
    vswc->surface.used += vswc->surface.staged;
    vswc->surface.staged = 0;
    vswc->surface.reserved = 0;
+
+   assert(vswc->region.staged <= vswc->region.reserved);
+   assert(vswc->region.used + vswc->region.staged <= vswc->region.size);
+   vswc->region.used += vswc->region.staged;
+   vswc->region.staged = 0;
+   vswc->region.reserved = 0;
 }
 
 
@@ -246,6 +330,11 @@ vmw_swc_destroy(struct svga_winsys_context *swc)
 {
    struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
    unsigned i;
+
+   for(i = 0; i < vswc->region.used; ++i) {
+      pb_reference(&vswc->region.relocs[i].buffer, NULL);
+   }
+
    for(i = 0; i < vswc->surface.used; ++i) {
       p_atomic_dec(&vswc->surface.handles[i]->validated);
       vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL);
@@ -279,6 +368,7 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
 
    vswc->command.size = VMW_COMMAND_SIZE;
    vswc->surface.size = VMW_SURFACE_RELOCS;
+   vswc->region.size = VMW_REGION_RELOCS;
 
    vswc->validate = pb_validate_create();
    if(!vswc->validate) {
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.h b/src/gallium/winsys/drm/vmware/core/vmw_screen.h
index a875107370c..f1d69865e77 100644
--- a/src/gallium/winsys/drm/vmware/core/vmw_screen.h
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.h
@@ -40,6 +40,10 @@
 
 #include "svga_winsys.h"
 
+
+#define VMW_GMR_POOL_SIZE (16*1024*1024)
+
+
 struct pb_manager;
 struct vmw_region;
 
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c
index 4f5ccea4677..8be9d74bde3 100644
--- a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c
@@ -85,6 +85,23 @@ vmw_drm_create_screen(struct drm_api *drm_api,
    struct pipe_screen *screen;
    struct dri1_create_screen_arg *dri1;
 
+   if (!arg || arg->mode == DRM_CREATE_NORMAL) {
+      struct dri1_api_version drm_ver;
+      drmVersionPtr ver;
+
+      ver = drmGetVersion(fd);
+      if (ver == NULL)
+	 return NULL;
+
+      drm_ver.major = ver->version_major;
+      drm_ver.minor = ver->version_minor;
+
+      drmFreeVersion(ver);
+      if (!vmw_dri1_check_version(&drm_ver, &drm_required,
+				  &drm_compat, "vmwgfx drm driver"))
+	 return NULL;
+   }
+
    if (arg != NULL) {
       switch (arg->mode) {
       case DRM_CREATE_NORMAL:
@@ -220,22 +237,19 @@ vmw_dri1_present_locked(struct pipe_context *locked_pipe,
    vmw_svga_winsys_surface_reference(&vsrf, NULL);
 }
 
-/**
- * FIXME: We'd probably want to cache these buffers in the
- * screen, based on handle.
- */
-
-static struct pipe_buffer *
-vmw_drm_buffer_from_handle(struct drm_api *drm_api,
-                           struct pipe_screen *screen,
-			   const char *name,
-			   unsigned handle)
+static struct pipe_texture *
+vmw_drm_texture_from_handle(struct drm_api *drm_api,
+			    struct pipe_screen *screen,
+			    struct pipe_texture *templat,
+			    const char *name,
+			    unsigned stride,
+			    unsigned handle)
 {
     struct vmw_svga_winsys_surface *vsrf;
     struct svga_winsys_surface *ssrf;
     struct vmw_winsys_screen *vws =
 	vmw_winsys_screen(svga_winsys_screen(screen));
-    struct pipe_buffer *buf;
+    struct pipe_texture *tex;
     union drm_vmw_surface_reference_arg arg;
     struct drm_vmw_surface_arg *req = &arg.req;
     struct drm_vmw_surface_create_req *rep = &arg.rep;
@@ -282,43 +296,28 @@ vmw_drm_buffer_from_handle(struct drm_api *drm_api,
 
     pipe_reference_init(&vsrf->refcnt, 1);
     p_atomic_set(&vsrf->validated, 0);
+    vsrf->screen = vws;
     vsrf->sid = handle;
     ssrf = svga_winsys_surface(vsrf);
-    buf = svga_screen_buffer_wrap_surface(screen, rep->format, ssrf);
-    if (!buf)
+    tex = svga_screen_texture_wrap_surface(screen, templat, rep->format, ssrf);
+    if (!tex)
 	vmw_svga_winsys_surface_reference(&vsrf, NULL);
 
-    return buf;
+    return tex;
   out_mip:
     vmw_ioctl_surface_destroy(vws, handle);
     return NULL;
 }
 
-static struct pipe_texture *
-vmw_drm_texture_from_handle(struct drm_api *drm_api,
-			    struct pipe_screen *screen,
-			    struct pipe_texture *templat,
-			    const char *name,
-			    unsigned stride,
-			    unsigned handle)
-{
-    struct pipe_buffer *buffer;
-    buffer = vmw_drm_buffer_from_handle(drm_api, screen, name, handle);
-
-    if (!buffer)
-	return NULL;
-
-    return screen->texture_blanket(screen, templat, &stride, buffer);
-}
-
 static boolean
-vmw_drm_handle_from_buffer(struct drm_api *drm_api,
+vmw_drm_handle_from_texture(struct drm_api *drm_api,
                            struct pipe_screen *screen,
-			   struct pipe_buffer *buffer,
+			   struct pipe_texture *texture,
+			   unsigned *stride,
 			   unsigned *handle)
 {
     struct svga_winsys_surface *surface =
-	svga_screen_buffer_get_winsys_surface(buffer);
+	svga_screen_texture_get_winsys_surface(texture);
     struct vmw_svga_winsys_surface *vsrf;
 
     if (!surface)
@@ -326,25 +325,13 @@ vmw_drm_handle_from_buffer(struct drm_api *drm_api,
 
     vsrf = vmw_svga_winsys_surface(surface);
     *handle = vsrf->sid;
+    *stride = pf_get_nblocksx(&texture->block, texture->width[0]) *
+	texture->block.size;
+
     vmw_svga_winsys_surface_reference(&vsrf, NULL);
     return TRUE;
 }
 
-static boolean
-vmw_drm_handle_from_texture(struct drm_api *drm_api,
-			    struct pipe_screen *screen,
-			    struct pipe_texture *texture,
-			    unsigned *stride,
-			    unsigned *handle)
-{
-    struct pipe_buffer *buffer;
-
-    if (!svga_screen_buffer_from_texture(texture, &buffer, stride))
-	return FALSE;
-
-    return vmw_drm_handle_from_buffer(drm_api, screen, buffer, handle);
-}
-
 static struct pipe_context*
 vmw_drm_create_context(struct drm_api *drm_api,
                        struct pipe_screen *screen)
@@ -359,6 +346,7 @@ static struct dri1_api dri1_api_hooks = {
 
 static struct drm_api vmw_drm_api_hooks = {
    .name = "vmwgfx",
+   .driver_name = "vmwgfx",
    .create_screen = vmw_drm_create_screen,
    .create_context = vmw_drm_create_context,
    .texture_from_shared_handle = vmw_drm_texture_from_handle,
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c
index b1c24b0cb6a..b9823d78575 100644
--- a/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c
@@ -53,14 +53,32 @@ vmw_pools_init(struct vmw_winsys_screen *vws)
       goto error;
 
    vws->pools.gmr_mm = mm_bufmgr_create(vws->pools.gmr,
-                                        16*1024*1024,
+                                        VMW_GMR_POOL_SIZE,
                                         12 /* 4096 alignment */);
    if(!vws->pools.gmr_mm)
       goto error;
 
+   /*
+    * GMR buffers are typically shortlived, but it's possible that at a given
+    * instance a buffer is mapped. So to avoid stalling we tell pipebuffer to
+    * forbid creation of buffers beyond half the GMR pool size,
+    *
+    * XXX: It is unclear weather we want to limit the total amount of temporary
+    * malloc memory used to backup unvalidated GMR buffers. On one hand it is
+    * preferrable to fail an allocation than exhausting the guest memory with
+    * temporary data, but on the other hand it is possible that a stupid
+    * application creates large vertex buffers and does not use them for a long
+    * time -- since the svga pipe driver only emits the DMA uploads when a
+    * buffer is used for drawing this would effectively disabling swapping GMR
+    * buffers to memory. So far, the preemptively flush already seems to keep
+    * total allocated memory within relatively small numbers, so we don't
+    * limit.
+    */
    vws->pools.gmr_fenced = fenced_bufmgr_create(
       vws->pools.gmr_mm,
-      vmw_fence_ops_create(vws));
+      vmw_fence_ops_create(vws),
+      VMW_GMR_POOL_SIZE/2,
+      ~0);
 
 #ifdef DEBUG
    vws->pools.gmr_fenced = pb_debug_manager_create(vws->pools.gmr_fenced,
diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c
index f7c0099584e..49dbf443ca3 100644
--- a/src/gallium/winsys/xlib/xlib_softpipe.c
+++ b/src/gallium/winsys/xlib/xlib_softpipe.c
@@ -204,6 +204,14 @@ xm_buffer_destroy(struct pipe_buffer *buf)
 {
    struct xm_buffer *oldBuf = xm_buffer(buf);
 
+   /*
+    * Note oldBuf->data may point to one of three things:
+    * 1. XShm shared memory image data
+    * 2. User-provided (wrapped) memory, see xm_user_buffer_create()
+    * 3. Regular, malloc'd memory
+    * We need to be careful with freeing that data now.
+    */
+
    if (oldBuf->data) {
 #ifdef USE_XSHM
       if (oldBuf->shminfo.shmid >= 0) {
@@ -213,12 +221,19 @@ xm_buffer_destroy(struct pipe_buffer *buf)
          oldBuf->shminfo.shmid = -1;
          oldBuf->shminfo.shmaddr = (char *) -1;
       }
-      else
-#endif
-      {
-         if (!oldBuf->userBuffer) {
-            align_free(oldBuf->data);
+
+      if (oldBuf->tempImage) {
+         if (oldBuf->data == oldBuf->tempImage->data) {
+            /* oldBuf->data points at the xshm memory which we'll now free */
+            oldBuf->data = NULL;
          }
+         XDestroyImage(oldBuf->tempImage);
+      }
+#endif
+
+      if (oldBuf->data && !oldBuf->userBuffer) {
+         /* this was regular malloc'd memory */
+         align_free(oldBuf->data);
       }
 
       oldBuf->data = NULL;