diff options
author | Brian Paul <[email protected]> | 2014-02-08 09:51:15 -0800 |
---|---|---|
committer | Brian Paul <[email protected]> | 2014-02-14 08:21:44 -0700 |
commit | fe6a854477c2ed30c37c200668a4dc86512120f7 (patch) | |
tree | 9ad955f1e6c293bb28082f9d9dc1de9cf328dbad /src/gallium/winsys | |
parent | 59e7c596215155b556ba8cf06233b621b88f49c6 (diff) |
svga/winsys: implement GBS support
This is a squash commit of many commits by Thomas Hellstrom.
Reviewed-by: Thomas Hellstrom <[email protected]>
Cc: "10.1" <[email protected]>
Diffstat (limited to 'src/gallium/winsys')
19 files changed, 3064 insertions, 323 deletions
diff --git a/src/gallium/winsys/svga/drm/Makefile.sources b/src/gallium/winsys/svga/drm/Makefile.sources index c300860ff79..08aecd6bfaf 100644 --- a/src/gallium/winsys/svga/drm/Makefile.sources +++ b/src/gallium/winsys/svga/drm/Makefile.sources @@ -7,4 +7,6 @@ C_SOURCES := \ vmw_screen_ioctl.c \ vmw_screen_pools.c \ vmw_screen_svga.c \ - vmw_surface.c + vmw_surface.c \ + vmw_shader.c \ + pb_buffer_simple_fenced.c diff --git a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c new file mode 100644 index 00000000000..888aebb2bdc --- /dev/null +++ b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c @@ -0,0 +1,844 @@ +/************************************************************************** + * + * Copyright 2007-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Implementation of fenced buffers. + * + * \author Jose Fonseca <jfonseca-at-vmware-dot-com> + * \author Thomas Hellström <thellstrom-at-vmware-dot-com> + */ + + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#include <unistd.h> +#include <sched.h> +#endif + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "util/u_debug.h" +#include "os/os_thread.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "pipebuffer/pb_buffer.h" +#include "pipebuffer/pb_bufmgr.h" +#include "pipebuffer/pb_buffer_fenced.h" +#include "vmw_screen.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct fenced_manager +{ + struct pb_manager base; + struct pb_manager *provider; + struct pb_fence_ops *ops; + + /** + * Following members are mutable and protected by this mutex. + */ + pipe_mutex mutex; + + /** + * Fenced buffer list. + * + * All fenced buffers are placed in this listed, ordered from the oldest + * fence to the newest fence. + */ + struct list_head fenced; + pb_size num_fenced; + + struct list_head unfenced; + pb_size num_unfenced; + +}; + + +/** + * Fenced buffer. + * + * Wrapper around a pipe buffer which adds fencing and reference counting. + */ +struct fenced_buffer +{ + /* + * Immutable members. + */ + + struct pb_buffer base; + struct fenced_manager *mgr; + + /* + * Following members are mutable and protected by fenced_manager::mutex. + */ + + struct list_head head; + + /** + * Buffer with storage. + */ + struct pb_buffer *buffer; + pb_size size; + + /** + * A bitmask of PB_USAGE_CPU/GPU_READ/WRITE describing the current + * buffer usage. + */ + unsigned flags; + + unsigned mapcount; + + struct pb_validate *vl; + unsigned validation_flags; + + struct pipe_fence_handle *fence; +}; + + +static INLINE struct fenced_manager * +fenced_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct fenced_manager *)mgr; +} + + +static INLINE struct fenced_buffer * +fenced_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct fenced_buffer *)buf; +} + + +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + const struct pb_desc *desc, + boolean wait); +/** + * Dump the fenced buffer list. + * + * Useful to understand failures to allocate buffers. + */ +static void +fenced_manager_dump_locked(struct fenced_manager *fenced_mgr) +{ +#ifdef DEBUG + struct pb_fence_ops *ops = fenced_mgr->ops; + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + + debug_printf("%10s %7s %8s %7s %10s %s\n", + "buffer", "size", "refcount", "storage", "fence", "signalled"); + + curr = fenced_mgr->unfenced.next; + next = curr->next; + while(curr != &fenced_mgr->unfenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(!fenced_buf->fence); + debug_printf("%10p %7u %8u %7s\n", + (void *) fenced_buf, + fenced_buf->base.size, + p_atomic_read(&fenced_buf->base.reference.count), + fenced_buf->buffer ? "gpu" : "none"); + curr = next; + next = curr->next; + } + + curr = fenced_mgr->fenced.next; + next = curr->next; + while(curr != &fenced_mgr->fenced) { + int signaled; + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(fenced_buf->buffer); + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); + debug_printf("%10p %7u %8u %7s %10p %s\n", + (void *) fenced_buf, + fenced_buf->base.size, + p_atomic_read(&fenced_buf->base.reference.count), + "gpu", + (void *) fenced_buf->fence, + signaled == 0 ? "y" : "n"); + curr = next; + next = curr->next; + } +#else + (void)fenced_mgr; +#endif +} + + +static INLINE void +fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(!pipe_is_referenced(&fenced_buf->base.reference)); + + assert(!fenced_buf->fence); + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + + FREE(fenced_buf); +} + + +/** + * Add the buffer to the fenced list. + * + * Reference count should be incremented before calling this function. + */ +static INLINE void +fenced_buffer_add_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(pipe_is_referenced(&fenced_buf->base.reference)); + assert(fenced_buf->flags & PB_USAGE_GPU_READ_WRITE); + assert(fenced_buf->fence); + + p_atomic_inc(&fenced_buf->base.reference.count); + + LIST_DEL(&fenced_buf->head); + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->fenced); + ++fenced_mgr->num_fenced; +} + + +/** + * Remove the buffer from the fenced list, and potentially destroy the buffer + * if the reference count reaches zero. + * + * Returns TRUE if the buffer was detroyed. + */ +static INLINE boolean +fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + struct pb_fence_ops *ops = fenced_mgr->ops; + + assert(fenced_buf->fence); + assert(fenced_buf->mgr == fenced_mgr); + + ops->fence_reference(ops, &fenced_buf->fence, NULL); + fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE; + + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + + LIST_DEL(&fenced_buf->head); + assert(fenced_mgr->num_fenced); + --fenced_mgr->num_fenced; + + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + + if (p_atomic_dec_zero(&fenced_buf->base.reference.count)) { + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); + return TRUE; + } + + return FALSE; +} + + +/** + * Wait for the fence to expire, and remove it from the fenced list. + * + * This function will release and re-aquire the mutex, so any copy of mutable + * state must be discarded after calling it. + */ +static INLINE enum pipe_error +fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + struct pb_fence_ops *ops = fenced_mgr->ops; + enum pipe_error ret = PIPE_ERROR; + +#if 0 + debug_warning("waiting for GPU"); +#endif + + assert(pipe_is_referenced(&fenced_buf->base.reference)); + assert(fenced_buf->fence); + + if(fenced_buf->fence) { + struct pipe_fence_handle *fence = NULL; + int finished; + boolean proceed; + + ops->fence_reference(ops, &fence, fenced_buf->fence); + + pipe_mutex_unlock(fenced_mgr->mutex); + + finished = ops->fence_finish(ops, fenced_buf->fence, 0); + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(pipe_is_referenced(&fenced_buf->base.reference)); + + /* + * Only proceed if the fence object didn't change in the meanwhile. + * Otherwise assume the work has been already carried out by another + * thread that re-aquired the lock before us. + */ + proceed = fence == fenced_buf->fence ? TRUE : FALSE; + + ops->fence_reference(ops, &fence, NULL); + + if(proceed && finished == 0) { + /* + * Remove from the fenced list + */ + + boolean destroyed; + + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + + /* TODO: remove consequents buffers with the same fence? */ + + assert(!destroyed); + + fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE; + + ret = PIPE_OK; + } + } + + return ret; +} + + +/** + * Remove as many fenced buffers from the fenced list as possible. + * + * Returns TRUE if at least one buffer was removed. + */ +static boolean +fenced_manager_check_signalled_locked(struct fenced_manager *fenced_mgr, + boolean wait) +{ + struct pb_fence_ops *ops = fenced_mgr->ops; + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + struct pipe_fence_handle *prev_fence = NULL; + boolean ret = FALSE; + + curr = fenced_mgr->fenced.next; + next = curr->next; + while(curr != &fenced_mgr->fenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + + if(fenced_buf->fence != prev_fence) { + int signaled; + + if (wait) { + signaled = ops->fence_finish(ops, fenced_buf->fence, 0); + + /* + * Don't return just now. Instead preemptively check if the + * following buffers' fences already expired, + * without further waits. + */ + wait = FALSE; + } + else { + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); + } + + if (signaled != 0) { + return ret; + } + + prev_fence = fenced_buf->fence; + } + else { + /* This buffer's fence object is identical to the previous buffer's + * fence object, so no need to check the fence again. + */ + assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); + } + + fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + + ret = TRUE; + + curr = next; + next = curr->next; + } + + return ret; +} + + +/** + * Destroy the GPU storage. + */ +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf) +{ + if(fenced_buf->buffer) { + pb_reference(&fenced_buf->buffer, NULL); + } +} + + +/** + * Try to create GPU storage for this buffer. + * + * This function is a shorthand around pb_manager::create_buffer for + * fenced_buffer_create_gpu_storage_locked()'s benefit. + */ +static INLINE boolean +fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + const struct pb_desc *desc) +{ + struct pb_manager *provider = fenced_mgr->provider; + + assert(!fenced_buf->buffer); + + fenced_buf->buffer = provider->create_buffer(fenced_mgr->provider, + fenced_buf->size, desc); + return fenced_buf->buffer ? TRUE : FALSE; +} + + +/** + * Create GPU storage for this buffer. + */ +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + const struct pb_desc *desc, + boolean wait) +{ + assert(!fenced_buf->buffer); + + /* + * Check for signaled buffers before trying to allocate. + */ + fenced_manager_check_signalled_locked(fenced_mgr, FALSE); + + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf, desc); + + /* + * Keep trying while there is some sort of progress: + * - fences are expiring, + * - or buffers are being being swapped out from GPU memory into CPU memory. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, FALSE))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf, + desc); + } + + if(!fenced_buf->buffer && wait) { + /* + * Same as before, but this time around, wait to free buffers if + * necessary. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, TRUE))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf, + desc); + } + } + + if(!fenced_buf->buffer) { + if(0) + fenced_manager_dump_locked(fenced_mgr); + + /* give up */ + return PIPE_ERROR_OUT_OF_MEMORY; + } + + return PIPE_OK; +} + + +static void +fenced_buffer_destroy(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + assert(!pipe_is_referenced(&fenced_buf->base.reference)); + + pipe_mutex_lock(fenced_mgr->mutex); + + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); + + pipe_mutex_unlock(fenced_mgr->mutex); +} + + +static void * +fenced_buffer_map(struct pb_buffer *buf, + unsigned flags, void *flush_ctx) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; + void *map = NULL; + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(!(flags & PB_USAGE_GPU_READ_WRITE)); + + /* + * Serialize writes. + */ + while((fenced_buf->flags & PB_USAGE_GPU_WRITE) || + ((fenced_buf->flags & PB_USAGE_GPU_READ) && + (flags & PB_USAGE_CPU_WRITE))) { + + /* + * Don't wait for the GPU to finish accessing it, + * if blocking is forbidden. + */ + if((flags & PB_USAGE_DONTBLOCK) && + ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) { + goto done; + } + + if (flags & PB_USAGE_UNSYNCHRONIZED) { + break; + } + + /* + * Wait for the GPU to finish accessing. This will release and re-acquire + * the mutex, so all copies of mutable state must be discarded. + */ + fenced_buffer_finish_locked(fenced_mgr, fenced_buf); + } + + map = pb_map(fenced_buf->buffer, flags, flush_ctx); + + if(map) { + ++fenced_buf->mapcount; + fenced_buf->flags |= flags & PB_USAGE_CPU_READ_WRITE; + } + +done: + pipe_mutex_unlock(fenced_mgr->mutex); + + return map; +} + + +static void +fenced_buffer_unmap(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(fenced_buf->mapcount); + if(fenced_buf->mapcount) { + if (fenced_buf->buffer) + pb_unmap(fenced_buf->buffer); + --fenced_buf->mapcount; + if(!fenced_buf->mapcount) + fenced_buf->flags &= ~PB_USAGE_CPU_READ_WRITE; + } + + pipe_mutex_unlock(fenced_mgr->mutex); +} + + +static enum pipe_error +fenced_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + enum pipe_error ret; + + pipe_mutex_lock(fenced_mgr->mutex); + + if(!vl) { + /* invalidate */ + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + ret = PIPE_OK; + goto done; + } + + assert(flags & PB_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PB_USAGE_GPU_READ_WRITE)); + flags &= PB_USAGE_GPU_READ_WRITE; + + /* Buffer cannot be validated in two different lists */ + if(fenced_buf->vl && fenced_buf->vl != vl) { + ret = PIPE_ERROR_RETRY; + goto done; + } + + if(fenced_buf->vl == vl && + (fenced_buf->validation_flags & flags) == flags) { + /* Nothing to do -- buffer already validated */ + ret = PIPE_OK; + goto done; + } + + ret = pb_validate(fenced_buf->buffer, vl, flags); + if (ret != PIPE_OK) + goto done; + + fenced_buf->vl = vl; + fenced_buf->validation_flags |= flags; + +done: + pipe_mutex_unlock(fenced_mgr->mutex); + + return ret; +} + + +static void +fenced_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(pipe_is_referenced(&fenced_buf->base.reference)); + assert(fenced_buf->buffer); + + if(fence != fenced_buf->fence) { + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + if (fenced_buf->fence) { + boolean destroyed; + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + assert(!destroyed); + } + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + fenced_buffer_add_locked(fenced_mgr, fenced_buf); + } + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + } + + pipe_mutex_unlock(fenced_mgr->mutex); +} + + +static void +fenced_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(fenced_buf->buffer); + + if(fenced_buf->buffer) + pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); + else { + *base_buf = buf; + *offset = 0; + } + + pipe_mutex_unlock(fenced_mgr->mutex); +} + + +static const struct pb_vtbl +fenced_buffer_vtbl = { + fenced_buffer_destroy, + fenced_buffer_map, + fenced_buffer_unmap, + fenced_buffer_validate, + fenced_buffer_fence, + fenced_buffer_get_base_buffer +}; + + +/** + * Wrap a buffer in a fenced buffer. + */ +static struct pb_buffer * +fenced_bufmgr_create_buffer(struct pb_manager *mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + struct fenced_buffer *fenced_buf; + enum pipe_error ret; + + fenced_buf = CALLOC_STRUCT(fenced_buffer); + if(!fenced_buf) + goto no_buffer; + + pipe_reference_init(&fenced_buf->base.reference, 1); + fenced_buf->base.alignment = desc->alignment; + fenced_buf->base.usage = desc->usage; + fenced_buf->base.size = size; + fenced_buf->size = size; + + fenced_buf->base.vtbl = &fenced_buffer_vtbl; + fenced_buf->mgr = fenced_mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + + /* + * Try to create GPU storage without stalling, + */ + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, + desc, TRUE); + + /* + * Give up. + */ + if(ret != PIPE_OK) { + goto no_storage; + } + + assert(fenced_buf->buffer); + + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + pipe_mutex_unlock(fenced_mgr->mutex); + + return &fenced_buf->base; + +no_storage: + pipe_mutex_unlock(fenced_mgr->mutex); + FREE(fenced_buf); +no_buffer: + return NULL; +} + + +static void +fenced_bufmgr_flush(struct pb_manager *mgr) +{ + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; + pipe_mutex_unlock(fenced_mgr->mutex); + + assert(fenced_mgr->provider->flush); + if(fenced_mgr->provider->flush) + fenced_mgr->provider->flush(fenced_mgr->provider); +} + + +static void +fenced_bufmgr_destroy(struct pb_manager *mgr) +{ + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + + pipe_mutex_lock(fenced_mgr->mutex); + + /* Wait on outstanding fences */ + while (fenced_mgr->num_fenced) { + pipe_mutex_unlock(fenced_mgr->mutex); +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) + sched_yield(); +#endif + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; + } + +#ifdef DEBUG + /*assert(!fenced_mgr->num_unfenced);*/ +#endif + + pipe_mutex_unlock(fenced_mgr->mutex); + pipe_mutex_destroy(fenced_mgr->mutex); + + FREE(fenced_mgr); +} + + +struct pb_manager * +simple_fenced_bufmgr_create(struct pb_manager *provider, + struct pb_fence_ops *ops) +{ + struct fenced_manager *fenced_mgr; + + if(!provider) + return NULL; + + fenced_mgr = CALLOC_STRUCT(fenced_manager); + if (!fenced_mgr) + return NULL; + + fenced_mgr->base.destroy = fenced_bufmgr_destroy; + fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; + fenced_mgr->base.flush = fenced_bufmgr_flush; + + fenced_mgr->provider = provider; + fenced_mgr->ops = ops; + + LIST_INITHEAD(&fenced_mgr->fenced); + fenced_mgr->num_fenced = 0; + + LIST_INITHEAD(&fenced_mgr->unfenced); + fenced_mgr->num_unfenced = 0; + + pipe_mutex_init(fenced_mgr->mutex); + + return &fenced_mgr->base; +} diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.c b/src/gallium/winsys/svga/drm/vmw_buffer.c index 66ab80848ec..c516054b7fc 100644 --- a/src/gallium/winsys/svga/drm/vmw_buffer.c +++ b/src/gallium/winsys/svga/drm/vmw_buffer.c @@ -51,7 +51,6 @@ #include "vmw_screen.h" #include "vmw_buffer.h" - struct vmw_gmr_bufmgr; @@ -63,6 +62,7 @@ struct vmw_gmr_buffer struct vmw_region *region; void *map; + unsigned map_flags; }; @@ -113,6 +113,25 @@ vmw_gmr_buffer_map(struct pb_buffer *_buf, void *flush_ctx) { struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf); + int ret; + + if (!buf->map) + buf->map = vmw_ioctl_region_map(buf->region); + + if (!buf->map) + return NULL; + + + if ((_buf->usage & VMW_BUFFER_USAGE_SYNC) && + !(flags & PB_USAGE_UNSYNCHRONIZED)) { + ret = vmw_ioctl_syncforcpu(buf->region, + !!(flags & PB_USAGE_DONTBLOCK), + !(flags & PB_USAGE_CPU_WRITE), + FALSE); + if (ret) + return NULL; + } + return buf->map; } @@ -120,8 +139,15 @@ vmw_gmr_buffer_map(struct pb_buffer *_buf, static void vmw_gmr_buffer_unmap(struct pb_buffer *_buf) { - /* Do nothing */ - (void)_buf; + struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf); + unsigned flags = buf->map_flags; + + if ((_buf->usage & VMW_BUFFER_USAGE_SYNC) && + !(flags & PB_USAGE_UNSYNCHRONIZED)) { + vmw_ioctl_releasefromcpu(buf->region, + !(flags & PB_USAGE_CPU_WRITE), + FALSE); + } } @@ -167,35 +193,33 @@ const struct pb_vtbl vmw_gmr_buffer_vtbl = { static struct pb_buffer * vmw_gmr_bufmgr_create_buffer(struct pb_manager *_mgr, pb_size size, - const struct pb_desc *desc) + const struct pb_desc *pb_desc) { struct vmw_gmr_bufmgr *mgr = vmw_gmr_bufmgr(_mgr); struct vmw_winsys_screen *vws = mgr->vws; struct vmw_gmr_buffer *buf; + const struct vmw_buffer_desc *desc = + (const struct vmw_buffer_desc *) pb_desc; buf = CALLOC_STRUCT(vmw_gmr_buffer); if(!buf) goto error1; pipe_reference_init(&buf->base.reference, 1); - buf->base.alignment = desc->alignment; - buf->base.usage = desc->usage; - buf->base.size = size; + buf->base.alignment = pb_desc->alignment; + buf->base.usage = pb_desc->usage & ~VMW_BUFFER_USAGE_SHARED; buf->base.vtbl = &vmw_gmr_buffer_vtbl; buf->mgr = mgr; - - buf->region = vmw_ioctl_region_create(vws, size); - if(!buf->region) - goto error2; + buf->base.size = size; + if ((pb_desc->usage & VMW_BUFFER_USAGE_SHARED) && desc->region) { + buf->region = desc->region; + } else { + buf->region = vmw_ioctl_region_create(vws, size); + if(!buf->region) + goto error2; + } - buf->map = vmw_ioctl_region_map(buf->region); - if(!buf->map) - goto error3; - return &buf->base; - -error3: - vmw_ioctl_region_destroy(buf->region); error2: FREE(buf); error1: @@ -257,3 +281,91 @@ vmw_gmr_bufmgr_region_ptr(struct pb_buffer *buf, return TRUE; } + +#ifdef DEBUG +struct svga_winsys_buffer { + struct pb_buffer *pb_buf; + struct debug_flush_buf *fbuf; +}; + +struct pb_buffer * +vmw_pb_buffer(struct svga_winsys_buffer *buffer) +{ + assert(buffer); + return buffer->pb_buf; +} + +struct svga_winsys_buffer * +vmw_svga_winsys_buffer_wrap(struct pb_buffer *buffer) +{ + struct svga_winsys_buffer *buf; + + if (!buffer) + return NULL; + + buf = CALLOC_STRUCT(svga_winsys_buffer); + if (!buf) { + pb_reference(&buffer, NULL); + return NULL; + } + + buf->pb_buf = buffer; + buf->fbuf = debug_flush_buf_create(TRUE, VMW_DEBUG_FLUSH_STACK); + return buf; +} + +struct debug_flush_buf * +vmw_debug_flush_buf(struct svga_winsys_buffer *buffer) +{ + return buffer->fbuf; +} + +#endif + +void +vmw_svga_winsys_buffer_destroy(struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf) +{ + struct pb_buffer *pbuf = vmw_pb_buffer(buf); + (void)sws; + pb_reference(&pbuf, NULL); +#ifdef DEBUG + debug_flush_buf_reference(&buf->fbuf, NULL); + FREE(buf); +#endif +} + +void * +vmw_svga_winsys_buffer_map(struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf, + unsigned flags) +{ + void *map; + + (void)sws; + if (flags & PIPE_TRANSFER_UNSYNCHRONIZED) + flags &= ~PIPE_TRANSFER_DONTBLOCK; + + map = pb_map(vmw_pb_buffer(buf), flags, NULL); + +#ifdef DEBUG + if (map != NULL) + debug_flush_map(buf->fbuf, flags); +#endif + + return map; +} + + +void +vmw_svga_winsys_buffer_unmap(struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf) +{ + (void)sws; + +#ifdef DEBUG + debug_flush_unmap(buf->fbuf); +#endif + + pb_unmap(vmw_pb_buffer(buf)); +} diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.h b/src/gallium/winsys/svga/drm/vmw_buffer.h index 41fb4476da5..e0bb8085a48 100644 --- a/src/gallium/winsys/svga/drm/vmw_buffer.h +++ b/src/gallium/winsys/svga/drm/vmw_buffer.h @@ -29,6 +29,12 @@ #include <assert.h> #include "pipe/p_compiler.h" +#include "pipebuffer/pb_bufmgr.h" +#include "util/u_debug_flush.h" + + +#define VMW_BUFFER_USAGE_SHARED (1 << 20) +#define VMW_BUFFER_USAGE_SYNC (1 << 21) struct SVGAGuestPtr; struct pb_buffer; @@ -37,7 +43,22 @@ struct svga_winsys_buffer; struct svga_winsys_surface; struct vmw_winsys_screen; +struct vmw_buffer_desc { + struct pb_desc pb_desc; + struct vmw_region *region; +}; + + +#ifdef DEBUG +struct pb_buffer * +vmw_pb_buffer(struct svga_winsys_buffer *buffer); +struct svga_winsys_buffer * +vmw_svga_winsys_buffer_wrap(struct pb_buffer *buffer); +struct debug_flush_buf * +vmw_debug_flush_buf(struct svga_winsys_buffer *buffer); + +#else static INLINE struct pb_buffer * vmw_pb_buffer(struct svga_winsys_buffer *buffer) { @@ -47,12 +68,23 @@ vmw_pb_buffer(struct svga_winsys_buffer *buffer) static INLINE struct svga_winsys_buffer * -vmw_svga_winsys_buffer(struct pb_buffer *buffer) +vmw_svga_winsys_buffer_wrap(struct pb_buffer *buffer) { - assert(buffer); return (struct svga_winsys_buffer *)buffer; } +#endif + +void +vmw_svga_winsys_buffer_destroy(struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf); +void * +vmw_svga_winsys_buffer_map(struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf, + unsigned flags); +void +vmw_svga_winsys_buffer_unmap(struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf); struct pb_manager * vmw_gmr_bufmgr_create(struct vmw_winsys_screen *vws); diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c index eb89272a5e6..1234a5edce3 100644 --- a/src/gallium/winsys/svga/drm/vmw_context.c +++ b/src/gallium/winsys/svga/drm/vmw_context.c @@ -29,6 +29,8 @@ #include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_debug_stack.h" +#include "util/u_debug_flush.h" +#include "util/u_hash_table.h" #include "pipebuffer/pb_buffer.h" #include "pipebuffer/pb_validate.h" @@ -38,19 +40,38 @@ #include "vmw_buffer.h" #include "vmw_surface.h" #include "vmw_fence.h" +#include "vmw_shader.h" #define VMW_COMMAND_SIZE (64*1024) #define VMW_SURFACE_RELOCS (1024) +#define VMW_SHADER_RELOCS (1024) #define VMW_REGION_RELOCS (512) #define VMW_MUST_FLUSH_STACK 8 -struct vmw_region_relocation +struct vmw_buffer_relocation { - struct SVGAGuestPtr *where; struct pb_buffer *buffer; - /* TODO: put offset info inside where */ + boolean is_mob; uint32 offset; + + union { + struct { + struct SVGAGuestPtr *where; + } region; + struct { + SVGAMobId *id; + uint32 *offset_into_mob; + } mob; + }; +}; + +struct vmw_ctx_validate_item { + union { + struct vmw_svga_winsys_surface *vsurf; + struct vmw_svga_winsys_shader *vshader; + }; + boolean referenced; }; struct vmw_svga_winsys_context @@ -58,10 +79,12 @@ struct vmw_svga_winsys_context struct svga_winsys_context base; struct vmw_winsys_screen *vws; + struct util_hash_table *hash; #ifdef DEBUG boolean must_flush; struct debug_stack_frame must_flush_stack[VMW_MUST_FLUSH_STACK]; + struct debug_flush_ctx *fctx; #endif struct { @@ -72,7 +95,7 @@ struct vmw_svga_winsys_context } command; struct { - struct vmw_svga_winsys_surface *handles[VMW_SURFACE_RELOCS]; + struct vmw_ctx_validate_item items[VMW_SURFACE_RELOCS]; uint32_t size; uint32_t used; uint32_t staged; @@ -80,20 +103,30 @@ struct vmw_svga_winsys_context } surface; struct { - struct vmw_region_relocation relocs[VMW_REGION_RELOCS]; + struct vmw_buffer_relocation relocs[VMW_REGION_RELOCS]; uint32_t size; uint32_t used; uint32_t staged; uint32_t reserved; } region; + struct { + struct vmw_ctx_validate_item items[VMW_SHADER_RELOCS]; + uint32_t size; + uint32_t used; + uint32_t staged; + uint32_t reserved; + } shader; + struct pb_validate *validate; /** - * The amount of GMR that is referred by the commands currently batched - * in the context. + * The amount of surface, GMR or MOB memory that is referred by the commands + * currently batched in the context command buffer. */ - uint32_t seen_regions; + uint64_t seen_surfaces; + uint64_t seen_regions; + uint64_t seen_mobs; /** * Whether this context should fail to reserve more commands, not because it @@ -140,7 +173,7 @@ vmw_swc_flush(struct svga_winsys_context *swc, /* Apply relocations */ for(i = 0; i < vswc->region.used; ++i) { - struct vmw_region_relocation *reloc = &vswc->region.relocs[i]; + struct vmw_buffer_relocation *reloc = &vswc->region.relocs[i]; struct SVGAGuestPtr ptr; if(!vmw_gmr_bufmgr_region_ptr(reloc->buffer, &ptr)) @@ -148,7 +181,16 @@ vmw_swc_flush(struct svga_winsys_context *swc, ptr.offset += reloc->offset; - *reloc->where = ptr; + if (reloc->is_mob) { + if (reloc->mob.id) + *reloc->mob.id = ptr.gmrId; + if (reloc->mob.offset_into_mob) + *reloc->mob.offset_into_mob = ptr.offset; + else { + assert(ptr.offset == 0); + } + } else + *reloc->region.where = ptr; } if (vswc->command.used || pfence != NULL) @@ -166,27 +208,37 @@ vmw_swc_flush(struct svga_winsys_context *swc, vswc->command.reserved = 0; for(i = 0; i < vswc->surface.used + vswc->surface.staged; ++i) { - struct vmw_svga_winsys_surface *vsurf = - vswc->surface.handles[i]; - p_atomic_dec(&vsurf->validated); - vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL); + struct vmw_ctx_validate_item *isurf = &vswc->surface.items[i]; + if (isurf->referenced) + p_atomic_dec(&isurf->vsurf->validated); + vmw_svga_winsys_surface_reference(&isurf->vsurf, NULL); } + util_hash_table_clear(vswc->hash); vswc->surface.used = 0; vswc->surface.reserved = 0; - for(i = 0; i < vswc->region.used + vswc->region.staged; ++i) { - pb_reference(&vswc->region.relocs[i].buffer, NULL); + for(i = 0; i < vswc->shader.used + vswc->shader.staged; ++i) { + struct vmw_ctx_validate_item *ishader = &vswc->shader.items[i]; + if (ishader->referenced) + p_atomic_dec(&ishader->vshader->validated); + vmw_svga_winsys_shader_reference(&ishader->vshader, NULL); } + vswc->shader.used = 0; + vswc->shader.reserved = 0; + vswc->region.used = 0; vswc->region.reserved = 0; #ifdef DEBUG vswc->must_flush = FALSE; + debug_flush_flush(vswc->fctx); #endif vswc->preemptive_flush = FALSE; + vswc->seen_surfaces = 0; vswc->seen_regions = 0; + vswc->seen_mobs = 0; if(pfence) vmw_fence_reference(vswc->vws, pfence, fence); @@ -210,6 +262,7 @@ vmw_swc_reserve(struct svga_winsys_context *swc, debug_backtrace_dump(vswc->must_flush_stack, VMW_MUST_FLUSH_STACK); assert(!vswc->must_flush); } + debug_flush_might_flush(vswc->fctx); #endif assert(nr_bytes <= vswc->command.size); @@ -219,6 +272,7 @@ vmw_swc_reserve(struct svga_winsys_context *swc, if(vswc->preemptive_flush || vswc->command.used + nr_bytes > vswc->command.size || vswc->surface.used + nr_relocs > vswc->surface.size || + vswc->shader.used + nr_relocs > vswc->shader.size || vswc->region.used + nr_relocs > vswc->region.size) { #ifdef DEBUG vswc->must_flush = TRUE; @@ -230,17 +284,190 @@ vmw_swc_reserve(struct svga_winsys_context *swc, assert(vswc->command.used + nr_bytes <= vswc->command.size); assert(vswc->surface.used + nr_relocs <= vswc->surface.size); + assert(vswc->shader.used + nr_relocs <= vswc->shader.size); assert(vswc->region.used + nr_relocs <= vswc->region.size); vswc->command.reserved = nr_bytes; vswc->surface.reserved = nr_relocs; vswc->surface.staged = 0; + vswc->shader.reserved = nr_relocs; + vswc->shader.staged = 0; vswc->region.reserved = nr_relocs; vswc->region.staged = 0; return vswc->command.buffer + vswc->command.used; } +static void +vmw_swc_context_relocation(struct svga_winsys_context *swc, + uint32 *cid) +{ + *cid = swc->cid; +} + +static boolean +vmw_swc_add_validate_buffer(struct vmw_svga_winsys_context *vswc, + struct pb_buffer *pb_buf, + unsigned flags) +{ + enum pipe_error ret; + unsigned translated_flags; + + /* + * TODO: Update pb_validate to provide a similar functionality + * (Check buffer already present before adding) + */ + if (util_hash_table_get(vswc->hash, pb_buf) != pb_buf) { + translated_flags = vmw_translate_to_pb_flags(flags); + ret = pb_validate_add_buffer(vswc->validate, pb_buf, translated_flags); + /* TODO: Update pipebuffer to reserve buffers and not fail here */ + assert(ret == PIPE_OK); + (void)ret; + (void)util_hash_table_set(vswc->hash, pb_buf, pb_buf); + return TRUE; + } + + return FALSE; +} + +static void +vmw_swc_region_relocation(struct svga_winsys_context *swc, + struct SVGAGuestPtr *where, + struct svga_winsys_buffer *buffer, + uint32 offset, + unsigned flags) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + struct vmw_buffer_relocation *reloc; + + assert(vswc->region.staged < vswc->region.reserved); + + reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged]; + reloc->region.where = where; + + /* + * pb_validate holds a refcount to the buffer, so no need to + * refcount it again in the relocation. + */ + reloc->buffer = vmw_pb_buffer(buffer); + reloc->offset = offset; + reloc->is_mob = FALSE; + ++vswc->region.staged; + + if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) { + vswc->seen_regions += reloc->buffer->size; + if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/5) + vswc->preemptive_flush = TRUE; + } + +#ifdef DEBUG + if (!(flags & SVGA_RELOC_INTERNAL)) + debug_flush_cb_reference(vswc->fctx, vmw_debug_flush_buf(buffer)); +#endif +} + +static void +vmw_swc_mob_relocation(struct svga_winsys_context *swc, + SVGAMobId *id, + uint32 *offset_into_mob, + struct svga_winsys_buffer *buffer, + uint32 offset, + unsigned flags) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + struct vmw_buffer_relocation *reloc; + + assert(vswc->region.staged < vswc->region.reserved); + + reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged]; + reloc->mob.id = id; + reloc->mob.offset_into_mob = offset_into_mob; + + /* + * pb_validate holds a refcount to the buffer, so no need to + * refcount it again in the relocation. + */ + reloc->buffer = vmw_pb_buffer(buffer); + reloc->offset = offset; + reloc->is_mob = TRUE; + ++vswc->region.staged; + + if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) { + vswc->seen_mobs += reloc->buffer->size; + /* divide by 5, tested for best performance */ + if (vswc->seen_mobs >= vswc->vws->ioctl.max_mob_memory / 5) + vswc->preemptive_flush = TRUE; + } + +#ifdef DEBUG + if (!(flags & SVGA_RELOC_INTERNAL)) + debug_flush_cb_reference(vswc->fctx, vmw_debug_flush_buf(buffer)); +#endif +} + + +/** + * vmw_swc_surface_clear_reference - Clear referenced info for a surface + * + * @swc: Pointer to an svga_winsys_context + * @vsurf: Pointer to a vmw_svga_winsys_surface, the referenced info of which + * we want to clear + * + * This is primarily used by a discard surface map to indicate that the + * surface data is no longer referenced by a draw call, and mapping it + * should therefore no longer cause a flush. + */ +void +vmw_swc_surface_clear_reference(struct svga_winsys_context *swc, + struct vmw_svga_winsys_surface *vsurf) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + struct vmw_ctx_validate_item *isrf = + util_hash_table_get(vswc->hash, vsurf); + + if (isrf && isrf->referenced) { + isrf->referenced = FALSE; + p_atomic_dec(&vsurf->validated); + } +} + +static void +vmw_swc_surface_only_relocation(struct svga_winsys_context *swc, + uint32 *where, + struct vmw_svga_winsys_surface *vsurf, + unsigned flags) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + struct vmw_ctx_validate_item *isrf; + + assert(vswc->surface.staged < vswc->surface.reserved); + isrf = util_hash_table_get(vswc->hash, vsurf); + + if (isrf == NULL) { + isrf = &vswc->surface.items[vswc->surface.used + vswc->surface.staged]; + vmw_svga_winsys_surface_reference(&isrf->vsurf, vsurf); + isrf->referenced = FALSE; + /* + * Note that a failure here may just fall back to unhashed behavior + * and potentially cause unnecessary flushing, so ignore the + * return code. + */ + (void) util_hash_table_set(vswc->hash, vsurf, isrf); + ++vswc->surface.staged; + + vswc->seen_surfaces += vsurf->size; + /* divide by 5 not well tuned for performance */ + if (vswc->seen_surfaces >= vswc->vws->ioctl.max_surface_memory / 5) + vswc->preemptive_flush = TRUE; + } + + if (!(flags & SVGA_RELOC_INTERNAL) && !isrf->referenced) { + isrf->referenced = TRUE; + p_atomic_inc(&vsurf->validated); + } + + *where = vsurf->sid; +} static void vmw_swc_surface_relocation(struct svga_winsys_context *swc, @@ -249,75 +476,79 @@ vmw_swc_surface_relocation(struct svga_winsys_context *swc, struct svga_winsys_surface *surface, unsigned flags) { - struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); struct vmw_svga_winsys_surface *vsurf; + assert(swc->have_gb_objects || mobid == NULL); + if(!surface) { *where = SVGA3D_INVALID_ID; + if (mobid) + *mobid = SVGA3D_INVALID_ID; return; } - assert(vswc->surface.staged < vswc->surface.reserved); - vsurf = vmw_svga_winsys_surface(surface); + vmw_swc_surface_only_relocation(swc, where, vsurf, flags); - *where = vsurf->sid; + if (swc->have_gb_objects && vsurf->buf != NULL) { - vmw_svga_winsys_surface_reference(&vswc->surface.handles[vswc->surface.used + vswc->surface.staged], vsurf); - p_atomic_inc(&vsurf->validated); - ++vswc->surface.staged; -} + /* + * Make sure backup buffer ends up fenced. + */ + pipe_mutex_lock(vsurf->mutex); + assert(vsurf->buf != NULL); + + vmw_swc_mob_relocation(swc, mobid, NULL, (struct svga_winsys_buffer *) + vsurf->buf, 0, flags); + pipe_mutex_unlock(vsurf->mutex); + } +} static void -vmw_swc_region_relocation(struct svga_winsys_context *swc, - struct SVGAGuestPtr *where, - struct svga_winsys_buffer *buffer, - uint32 offset, - unsigned flags) +vmw_swc_shader_relocation(struct svga_winsys_context *swc, + uint32 *shid, + uint32 *mobid, + uint32 *offset, + struct svga_winsys_gb_shader *shader) { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); - struct vmw_region_relocation *reloc; - unsigned translated_flags; - enum pipe_error ret; - - assert(vswc->region.staged < vswc->region.reserved); + struct vmw_svga_winsys_shader *vshader; + struct vmw_ctx_validate_item *ishader; + if(!shader) { + *shid = SVGA3D_INVALID_ID; + return; + } - reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged]; - reloc->where = where; - pb_reference(&reloc->buffer, vmw_pb_buffer(buffer)); - reloc->offset = offset; + assert(vswc->shader.staged < vswc->shader.reserved); + vshader = vmw_svga_winsys_shader(shader); + ishader = util_hash_table_get(vswc->hash, vshader); + + if (ishader == NULL) { + ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged]; + vmw_svga_winsys_shader_reference(&ishader->vshader, vshader); + ishader->referenced = FALSE; + /* + * Note that a failure here may just fall back to unhashed behavior + * and potentially cause unnecessary flushing, so ignore the + * return code. + */ + (void) util_hash_table_set(vswc->hash, vshader, ishader); + ++vswc->shader.staged; + } - ++vswc->region.staged; + if (!ishader->referenced) { + ishader->referenced = TRUE; + p_atomic_inc(&vshader->validated); + } - translated_flags = vmw_translate_to_pb_flags(flags); - ret = pb_validate_add_buffer(vswc->validate, reloc->buffer, translated_flags); - /* TODO: Update pipebuffer to reserve buffers and not fail here */ - assert(ret == PIPE_OK); - (void)ret; + *shid = vshader->shid; - /* - * Flush preemptively the FIFO commands to keep the GMR working set within - * the GMR pool size. - * - * This is necessary for applications like SPECviewperf that generate huge - * amounts of immediate vertex data, so that we don't pile up too much of - * that vertex data neither in the guest nor in the host. - * - * Note that in the current implementation if a region is referred twice in - * a command stream, it will be accounted twice. We could detect repeated - * regions and count only once, but there is no incentive to do that, since - * regions are typically short-lived; always referred in a single command; - * and at the worst we just flush the commands a bit sooner, which for the - * SVGA virtual device it's not a performance issue since flushing commands - * to the FIFO won't cause flushing in the host. - */ - vswc->seen_regions += reloc->buffer->size; - if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/3) - vswc->preemptive_flush = TRUE; + if (mobid != NULL && vshader->buf) + vmw_swc_mob_relocation(swc, mobid, offset, vshader->buf, + 0, SVGA_RELOC_READ); } - static void vmw_swc_commit(struct svga_winsys_context *swc) { @@ -334,6 +565,12 @@ vmw_swc_commit(struct svga_winsys_context *swc) vswc->surface.staged = 0; vswc->surface.reserved = 0; + assert(vswc->shader.staged <= vswc->shader.reserved); + assert(vswc->shader.used + vswc->shader.staged <= vswc->shader.size); + vswc->shader.used += vswc->shader.staged; + vswc->shader.staged = 0; + vswc->shader.reserved = 0; + assert(vswc->region.staged <= vswc->region.reserved); assert(vswc->region.used + vswc->region.staged <= vswc->region.size); vswc->region.used += vswc->region.staged; @@ -348,19 +585,38 @@ vmw_swc_destroy(struct svga_winsys_context *swc) struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); unsigned i; - for(i = 0; i < vswc->region.used; ++i) { - pb_reference(&vswc->region.relocs[i].buffer, NULL); + for(i = 0; i < vswc->surface.used; ++i) { + struct vmw_ctx_validate_item *isurf = &vswc->surface.items[i]; + if (isurf->referenced) + p_atomic_dec(&isurf->vsurf->validated); + vmw_svga_winsys_surface_reference(&isurf->vsurf, NULL); } - for(i = 0; i < vswc->surface.used; ++i) { - p_atomic_dec(&vswc->surface.handles[i]->validated); - vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL); + for(i = 0; i < vswc->shader.used; ++i) { + struct vmw_ctx_validate_item *ishader = &vswc->shader.items[i]; + if (ishader->referenced) + p_atomic_dec(&ishader->vshader->validated); + vmw_svga_winsys_shader_reference(&ishader->vshader, NULL); } + + util_hash_table_destroy(vswc->hash); pb_validate_destroy(vswc->validate); vmw_ioctl_context_destroy(vswc->vws, swc->cid); +#ifdef DEBUG + debug_flush_ctx_destroy(vswc->fctx); +#endif FREE(vswc); } +static unsigned vmw_hash_ptr(void *p) +{ + return (unsigned)(unsigned long)p; +} + +static int vmw_ptr_compare(void *key1, void *key2) +{ + return (key1 == key2) ? 0 : 1; +} struct svga_winsys_context * vmw_svga_winsys_context_create(struct svga_winsys_screen *sws) @@ -376,22 +632,41 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws) vswc->base.reserve = vmw_swc_reserve; vswc->base.surface_relocation = vmw_swc_surface_relocation; vswc->base.region_relocation = vmw_swc_region_relocation; + vswc->base.mob_relocation = vmw_swc_mob_relocation; + vswc->base.context_relocation = vmw_swc_context_relocation; + vswc->base.shader_relocation = vmw_swc_shader_relocation; vswc->base.commit = vmw_swc_commit; vswc->base.flush = vmw_swc_flush; + vswc->base.surface_map = vmw_svga_winsys_surface_map; + vswc->base.surface_unmap = vmw_svga_winsys_surface_unmap; vswc->base.cid = vmw_ioctl_context_create(vws); + vswc->base.have_gb_objects = sws->have_gb_objects; vswc->vws = vws; vswc->command.size = VMW_COMMAND_SIZE; vswc->surface.size = VMW_SURFACE_RELOCS; + vswc->shader.size = VMW_SHADER_RELOCS; vswc->region.size = VMW_REGION_RELOCS; vswc->validate = pb_validate_create(); - if(!vswc->validate) { - FREE(vswc); - return NULL; - } + if(!vswc->validate) + goto out_no_validate; + + vswc->hash = util_hash_table_create(vmw_hash_ptr, vmw_ptr_compare); + if (!vswc->hash) + goto out_no_hash; + +#ifdef DEBUG + vswc->fctx = debug_flush_ctx_create(TRUE, VMW_DEBUG_FLUSH_STACK); +#endif return &vswc->base; + +out_no_hash: + pb_validate_destroy(vswc->validate); +out_no_validate: + FREE(vswc); + return NULL; } diff --git a/src/gallium/winsys/svga/drm/vmw_context.h b/src/gallium/winsys/svga/drm/vmw_context.h index 3d383ccb1ed..2c2fb415782 100644 --- a/src/gallium/winsys/svga/drm/vmw_context.h +++ b/src/gallium/winsys/svga/drm/vmw_context.h @@ -62,4 +62,12 @@ struct pipe_screen; struct svga_winsys_context * vmw_svga_winsys_context_create(struct svga_winsys_screen *sws); +struct vmw_svga_winsys_surface; + + +void +vmw_swc_surface_clear_reference(struct svga_winsys_context *swc, + struct vmw_svga_winsys_surface *vsurf); + + #endif /* VMW_CONTEXT_H_ */ diff --git a/src/gallium/winsys/svga/drm/vmw_fence.c b/src/gallium/winsys/svga/drm/vmw_fence.c index 754f8a666df..8af2250b7ea 100644 --- a/src/gallium/winsys/svga/drm/vmw_fence.c +++ b/src/gallium/winsys/svga/drm/vmw_fence.c @@ -22,20 +22,10 @@ * SOFTWARE. * **********************************************************/ -/* - * TODO: - * - * Fencing is currently a bit inefficient, since we need to call the - * kernel do determine a fence object signaled status if the fence is not - * signaled. This can be greatly improved upon by using the fact that the - * execbuf ioctl returns the last signaled fence seqno, as does the - * fence signaled ioctl. We should set up a ring of fence objects and - * walk through them checking for signaled status each time we receive a - * new passed fence seqno. - */ - #include "util/u_memory.h" #include "util/u_atomic.h" +#include "util/u_double_list.h" +#include "os/os_thread.h" #include "pipebuffer/pb_buffer_fenced.h" @@ -44,20 +34,129 @@ struct vmw_fence_ops { + /* + * Immutable members. + */ struct pb_fence_ops base; - struct vmw_winsys_screen *vws; + + pipe_mutex mutex; + + /* + * Protected by mutex; + */ + struct list_head not_signaled; + uint32_t last_signaled; + uint32_t last_emitted; }; struct vmw_fence { + struct list_head ops_list; int32_t refcount; uint32_t handle; uint32_t mask; int32_t signalled; + uint32_t seqno; }; /** + * vmw_fence_seq_is_signaled - Check whether a fence seqno is + * signaled. + * + * @ops: Pointer to a struct pb_fence_ops. + * + */ +static INLINE boolean +vmw_fence_seq_is_signaled(uint32_t seq, uint32_t last, uint32_t cur) +{ + return (cur - last <= cur - seq); +} + + +/** + * vmw_fence_ops - Return the vmw_fence_ops structure backing a + * struct pb_fence_ops pointer. + * + * @ops: Pointer to a struct pb_fence_ops. + * + */ +static INLINE struct vmw_fence_ops * +vmw_fence_ops(struct pb_fence_ops *ops) +{ + assert(ops); + return (struct vmw_fence_ops *)ops; +} + + +/** + * vmw_fences_release - Release all fences from the not_signaled + * list. + * + * @ops: Pointer to a struct vmw_fence_ops. + * + */ +static void +vmw_fences_release(struct vmw_fence_ops *ops) +{ + struct vmw_fence *fence, *n; + + pipe_mutex_lock(ops->mutex); + LIST_FOR_EACH_ENTRY_SAFE(fence, n, &ops->not_signaled, ops_list) + LIST_DELINIT(&fence->ops_list); + pipe_mutex_unlock(ops->mutex); +} + +/** + * vmw_fences_signal - Traverse the not_signaled list and try to + * signal unsignaled fences. + * + * @ops: Pointer to a struct pb_fence_ops. + * @signaled: Seqno that has signaled. + * @emitted: Last seqno emitted by the kernel. + * @has_emitted: Whether we provide the emitted value. + * + */ +void +vmw_fences_signal(struct pb_fence_ops *fence_ops, + uint32_t signaled, + uint32_t emitted, + boolean has_emitted) +{ + struct vmw_fence_ops *ops = NULL; + struct vmw_fence *fence, *n; + + if (fence_ops == NULL) + return; + + ops = vmw_fence_ops(fence_ops); + pipe_mutex_lock(ops->mutex); + + if (!has_emitted) { + emitted = ops->last_emitted; + if (emitted - signaled > (1 << 30)) + emitted = signaled; + } + + if (signaled == ops->last_signaled && emitted == ops->last_emitted) + goto out_unlock; + + LIST_FOR_EACH_ENTRY_SAFE(fence, n, &ops->not_signaled, ops_list) { + if (!vmw_fence_seq_is_signaled(fence->seqno, signaled, emitted)) + break; + + p_atomic_set(&fence->signalled, 1); + LIST_DELINIT(&fence->ops_list); + } + ops->last_signaled = signaled; + ops->last_emitted = emitted; + +out_unlock: + pipe_mutex_unlock(ops->mutex); +} + + +/** * vmw_fence - return the vmw_fence object identified by a * struct pipe_fence_handle * * @@ -69,18 +168,22 @@ vmw_fence(struct pipe_fence_handle *fence) return (struct vmw_fence *) fence; } + /** * vmw_fence_create - Create a user-space fence object. * + * @fence_ops: The fence_ops manager to register with. * @handle: Handle identifying the kernel fence object. * @mask: Mask of flags that this fence object may signal. * * Returns NULL on failure. */ struct pipe_fence_handle * -vmw_fence_create(uint32_t handle, uint32_t mask) +vmw_fence_create(struct pb_fence_ops *fence_ops, uint32_t handle, + uint32_t seqno, uint32_t mask) { struct vmw_fence *fence = CALLOC_STRUCT(vmw_fence); + struct vmw_fence_ops *ops = vmw_fence_ops(fence_ops); if (!fence) return NULL; @@ -88,27 +191,24 @@ vmw_fence_create(uint32_t handle, uint32_t mask) p_atomic_set(&fence->refcount, 1); fence->handle = handle; fence->mask = mask; + fence->seqno = seqno; p_atomic_set(&fence->signalled, 0); + pipe_mutex_lock(ops->mutex); + + if (vmw_fence_seq_is_signaled(seqno, ops->last_signaled, seqno)) { + p_atomic_set(&fence->signalled, 1); + LIST_INITHEAD(&fence->ops_list); + } else { + p_atomic_set(&fence->signalled, 0); + LIST_ADDTAIL(&fence->ops_list, &ops->not_signaled); + } - return (struct pipe_fence_handle *) fence; -} + pipe_mutex_unlock(ops->mutex); -/** - * vmw_fence_ops - Return the vmw_fence_ops structure backing a - * struct pb_fence_ops pointer. - * - * @ops: Pointer to a struct pb_fence_ops. - * - */ -static INLINE struct vmw_fence_ops * -vmw_fence_ops(struct pb_fence_ops *ops) -{ - assert(ops); - return (struct vmw_fence_ops *)ops; + return (struct pipe_fence_handle *) fence; } - /** * vmw_fence_reference - Reference / unreference a vmw fence object. * @@ -125,7 +225,14 @@ vmw_fence_reference(struct vmw_winsys_screen *vws, struct vmw_fence *vfence = vmw_fence(*ptr); if (p_atomic_dec_zero(&vfence->refcount)) { + struct vmw_fence_ops *ops = vmw_fence_ops(vws->fence_ops); + vmw_ioctl_fence_unref(vws, vfence->handle); + + pipe_mutex_lock(ops->mutex); + LIST_DELINIT(&vfence->ops_list); + pipe_mutex_unlock(ops->mutex); + FREE(vfence); } } @@ -171,18 +278,21 @@ vmw_fence_signalled(struct vmw_winsys_screen *vws, if ((old & vflags) == vflags) return 0; + /* + * Currently we update signaled fences on each execbuf call. + * That should really be sufficient, and we can avoid + * a lot of kernel calls this way. + */ +#if 1 ret = vmw_ioctl_fence_signalled(vws, vfence->handle, vflags); - if (ret == 0) { - int32_t prev = old; - - do { - old = prev; - prev = p_atomic_cmpxchg(&vfence->signalled, old, old | vflags); - } while (prev != old); - } - + if (ret == 0) + p_atomic_set(&vfence->signalled, 1); return ret; +#else + (void) ret; + return -1; +#endif } /** @@ -287,6 +397,7 @@ vmw_fence_ops_fence_finish(struct pb_fence_ops *ops, static void vmw_fence_ops_destroy(struct pb_fence_ops *ops) { + vmw_fences_release(vmw_fence_ops(ops)); FREE(ops); } @@ -310,6 +421,8 @@ vmw_fence_ops_create(struct vmw_winsys_screen *vws) if(!ops) return NULL; + pipe_mutex_init(ops->mutex); + LIST_INITHEAD(&ops->not_signaled); ops->base.destroy = &vmw_fence_ops_destroy; ops->base.fence_reference = &vmw_fence_ops_fence_reference; ops->base.fence_signalled = &vmw_fence_ops_fence_signalled; @@ -319,5 +432,3 @@ vmw_fence_ops_create(struct vmw_winsys_screen *vws) return &ops->base; } - - diff --git a/src/gallium/winsys/svga/drm/vmw_fence.h b/src/gallium/winsys/svga/drm/vmw_fence.h index 403ae266d5e..56f1a0ab05d 100644 --- a/src/gallium/winsys/svga/drm/vmw_fence.h +++ b/src/gallium/winsys/svga/drm/vmw_fence.h @@ -29,7 +29,7 @@ #include "pipe/p_compiler.h" - +#include "pipebuffer/pb_buffer_fenced.h" struct pipe_fence_handle; struct pb_fence_ops; @@ -37,7 +37,8 @@ struct vmw_winsys_screen; struct pipe_fence_handle * -vmw_fence_create(uint32_t handle, uint32_t mask); +vmw_fence_create(struct pb_fence_ops *fence_ops, + uint32_t handle, uint32_t seqno, uint32_t mask); int vmw_fence_finish(struct vmw_winsys_screen *vws, diff --git a/src/gallium/winsys/svga/drm/vmw_screen.c b/src/gallium/winsys/svga/drm/vmw_screen.c index f7799cec32d..0c343cc7bdd 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen.c +++ b/src/gallium/winsys/svga/drm/vmw_screen.c @@ -25,42 +25,94 @@ #include "vmw_screen.h" - +#include "vmw_fence.h" #include "vmw_context.h" #include "util/u_memory.h" #include "pipe/p_compiler.h" +#include "util/u_hash_table.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +static struct util_hash_table *dev_hash = NULL; + +static int vmw_dev_compare(void *key1, void *key2) +{ + return (major(*(dev_t *)key1) == major(*(dev_t *)key2) && + minor(*(dev_t *)key1) == minor(*(dev_t *)key2)) ? 0 : 1; +} + +static unsigned vmw_dev_hash(void *key) +{ + return (major(*(dev_t *) key) << 16) | minor(*(dev_t *) key); +} /* Called from vmw_drm_create_screen(), creates and initializes the * vmw_winsys_screen structure, which is the main entity in this * module. + * First, check whether a vmw_winsys_screen object already exists for + * this device, and in that case return that one, making sure that we + * have our own file descriptor open to DRM. */ + struct vmw_winsys_screen * vmw_winsys_create( int fd, boolean use_old_scanout_flag ) { - struct vmw_winsys_screen *vws = CALLOC_STRUCT(vmw_winsys_screen); + struct vmw_winsys_screen *vws; + struct stat stat_buf; + + if (dev_hash == NULL) { + dev_hash = util_hash_table_create(vmw_dev_hash, vmw_dev_compare); + if (dev_hash == NULL) + return NULL; + } + + if (fstat(fd, &stat_buf)) + return NULL; + + vws = util_hash_table_get(dev_hash, &stat_buf.st_rdev); + if (vws) { + vws->open_count++; + return vws; + } + + vws = CALLOC_STRUCT(vmw_winsys_screen); if (!vws) goto out_no_vws; - vws->ioctl.drm_fd = fd; + vws->device = stat_buf.st_rdev; + vws->open_count = 1; + vws->ioctl.drm_fd = dup(fd); vws->use_old_scanout_flag = use_old_scanout_flag; + vws->base.have_gb_dma = TRUE; if (!vmw_ioctl_init(vws)) goto out_no_ioctl; + vws->fence_ops = vmw_fence_ops_create(vws); + if (!vws->fence_ops) + goto out_no_fence_ops; + if(!vmw_pools_init(vws)) goto out_no_pools; if (!vmw_winsys_screen_init_svga(vws)) goto out_no_svga; + if (util_hash_table_set(dev_hash, &vws->device, vws) != PIPE_OK) + goto out_no_hash_insert; + return vws; +out_no_hash_insert: out_no_svga: vmw_pools_cleanup(vws); out_no_pools: + vws->fence_ops->destroy(vws->fence_ops); +out_no_fence_ops: vmw_ioctl_cleanup(vws); out_no_ioctl: + close(vws->ioctl.drm_fd); FREE(vws); out_no_vws: return NULL; @@ -69,7 +121,12 @@ out_no_vws: void vmw_winsys_destroy(struct vmw_winsys_screen *vws) { - vmw_pools_cleanup(vws); - vmw_ioctl_cleanup(vws); - FREE(vws); + if (--vws->open_count == 0) { + util_hash_table_remove(dev_hash, &vws->device); + vmw_pools_cleanup(vws); + vws->fence_ops->destroy(vws->fence_ops); + vmw_ioctl_cleanup(vws); + close(vws->ioctl.drm_fd); + FREE(vws); + } } diff --git a/src/gallium/winsys/svga/drm/vmw_screen.h b/src/gallium/winsys/svga/drm/vmw_screen.h index bdc1c7ba3de..f9da3693c58 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen.h +++ b/src/gallium/winsys/svga/drm/vmw_screen.h @@ -39,10 +39,13 @@ #include "pipe/p_state.h" #include "svga_winsys.h" +#include "pipebuffer/pb_buffer_fenced.h" #define VMW_GMR_POOL_SIZE (16*1024*1024) #define VMW_QUERY_POOL_SIZE (8192) +#define VMW_DEBUG_FLUSH_STACK 10 + /* * Something big, but arbitrary. The kernel reports an error if it can't * handle this, and the svga driver will resort to multiple partial @@ -53,6 +56,10 @@ struct pb_manager; struct vmw_region; +struct vmw_cap_3d { + boolean has_cap; + SVGA3dDevCapResult result; +}; struct vmw_winsys_screen { @@ -63,7 +70,10 @@ struct vmw_winsys_screen struct { int drm_fd; uint32_t hwversion; - uint32_t *buffer; + uint32_t num_cap_3d; + struct vmw_cap_3d *cap_3d; + uint64_t max_mob_memory; + uint64_t max_surface_memory; } ioctl; struct { @@ -72,10 +82,21 @@ struct vmw_winsys_screen struct pb_manager *gmr_fenced; struct pb_manager *gmr_slab; struct pb_manager *gmr_slab_fenced; - struct pb_manager *query; struct pb_manager *query_mm; struct pb_manager *query_fenced; + struct pb_manager *mob_fenced; + struct pb_manager *mob_cache; + struct pb_manager *mob_shader_slab; + struct pb_manager *mob_shader_slab_fenced; } pools; + + struct pb_fence_ops *fence_ops; + + /* + * Screen instances + */ + dev_t device; + int open_count; }; @@ -86,6 +107,9 @@ vmw_winsys_screen(struct svga_winsys_screen *base) } /* */ +uint32_t +vmw_region_size(struct vmw_region *region); + uint32 vmw_ioctl_context_create(struct vmw_winsys_screen *vws); @@ -100,6 +124,23 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws, SVGA3dSize size, uint32 numFaces, uint32 numMipLevels); +uint32 +vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws, + SVGA3dSurfaceFlags flags, + SVGA3dSurfaceFormat format, + SVGA3dSize size, + uint32 numFaces, + uint32 numMipLevels, + uint32 buffer_handle, + struct vmw_region **p_region); + +int +vmw_ioctl_gb_surface_ref(struct vmw_winsys_screen *vws, + uint32_t handle, + SVGA3dSurfaceFlags *flags, + SVGA3dSurfaceFormat *format, + uint32_t *numMipLevels, + struct vmw_region **p_region); void vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws, @@ -140,12 +181,28 @@ void vmw_ioctl_fence_unref(struct vmw_winsys_screen *vws, uint32_t handle); +uint32 +vmw_ioctl_shader_create(struct vmw_winsys_screen *vws, + SVGA3dShaderType type, + uint32 code_len); +void +vmw_ioctl_shader_destroy(struct vmw_winsys_screen *vws, uint32 shid); +int +vmw_ioctl_syncforcpu(struct vmw_region *region, + boolean dont_block, + boolean readonly, + boolean allow_cs); +void +vmw_ioctl_releasefromcpu(struct vmw_region *region, + boolean readonly, + boolean allow_cs); /* Initialize parts of vmw_winsys_screen at startup: */ boolean vmw_ioctl_init(struct vmw_winsys_screen *vws); boolean vmw_pools_init(struct vmw_winsys_screen *vws); boolean vmw_query_pools_init(struct vmw_winsys_screen *vws); +boolean vmw_mob_pools_init(struct vmw_winsys_screen *vws); boolean vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws); void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws); @@ -156,4 +213,13 @@ void vmw_winsys_destroy(struct vmw_winsys_screen *sws); void vmw_winsys_screen_set_throttling(struct pipe_screen *screen, uint32_t throttle_us); +struct pb_manager * +simple_fenced_bufmgr_create(struct pb_manager *provider, + struct pb_fence_ops *ops); +void +vmw_fences_signal(struct pb_fence_ops *fence_ops, + uint32_t signaled, + uint32_t emitted, + boolean has_emitted); + #endif /* VMW_SCREEN_H_ */ diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c index a17cdf7cf92..0f5a9552d85 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c @@ -32,7 +32,9 @@ #include "vmw_context.h" #include "vmw_screen.h" #include "vmw_surface.h" +#include "vmw_buffer.h" #include "svga_drm_public.h" +#include "svga3d_surfacedefs.h" #include "state_tracker/drm_driver.h" @@ -52,6 +54,11 @@ static struct svga_winsys_surface * vmw_drm_surface_from_handle(struct svga_winsys_screen *sws, struct winsys_handle *whandle, SVGA3dSurfaceFormat *format); + +static struct svga_winsys_surface * +vmw_drm_gb_surface_from_handle(struct svga_winsys_screen *sws, + struct winsys_handle *whandle, + SVGA3dSurfaceFormat *format); static boolean vmw_drm_surface_get_handle(struct svga_winsys_screen *sws, struct svga_winsys_surface *surface, @@ -109,7 +116,8 @@ svga_drm_winsys_screen_create(int fd) goto out_no_vws; /* XXX do this properly */ - vws->base.surface_from_handle = vmw_drm_surface_from_handle; + vws->base.surface_from_handle = vws->base.have_gb_objects ? + vmw_drm_gb_surface_from_handle : vmw_drm_surface_from_handle; vws->base.surface_get_handle = vmw_drm_surface_get_handle; return &vws->base; @@ -150,6 +158,83 @@ vmw_dri1_intersect_src_bbox(struct drm_clip_rect *dst, return TRUE; } +/** + * vmw_drm_gb_surface_from_handle - Create a shared surface + * + * @sws: Screen to register the surface with. + * @whandle: struct winsys_handle identifying the kernel surface object + * @format: On successful return points to a value describing the + * surface format. + * + * Returns a refcounted pointer to a struct svga_winsys_surface + * embedded in a struct vmw_svga_winsys_surface on success or NULL + * on failure. + */ +static struct svga_winsys_surface * +vmw_drm_gb_surface_from_handle(struct svga_winsys_screen *sws, + struct winsys_handle *whandle, + SVGA3dSurfaceFormat *format) +{ + struct vmw_svga_winsys_surface *vsrf; + struct svga_winsys_surface *ssrf; + struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + SVGA3dSurfaceFlags flags; + uint32_t mip_levels; + struct vmw_buffer_desc desc; + struct pb_manager *provider = vws->pools.gmr; + struct pb_buffer *pb_buf; + int ret; + + ret = vmw_ioctl_gb_surface_ref(vws, whandle->handle, &flags, format, + &mip_levels, &desc.region); + + if (ret) { + fprintf(stderr, "Failed referencing shared surface. SID %d.\n" + "Error %d (%s).\n", + whandle->handle, ret, strerror(-ret)); + return NULL; + } + + if (mip_levels != 1) { + fprintf(stderr, "Incorrect number of mipmap levels on shared surface." + " SID %d, levels %d\n", + whandle->handle, mip_levels); + goto out_mip; + } + + vsrf = CALLOC_STRUCT(vmw_svga_winsys_surface); + if (!vsrf) + goto out_mip; + + pipe_reference_init(&vsrf->refcnt, 1); + p_atomic_set(&vsrf->validated, 0); + vsrf->screen = vws; + vsrf->sid = whandle->handle; + vsrf->size = vmw_region_size(desc.region); + + /* + * Synchronize backing buffers of shared surfaces using the + * kernel, since we don't pass fence objects around between + * processes. + */ + desc.pb_desc.alignment = 4096; + desc.pb_desc.usage = VMW_BUFFER_USAGE_SHARED | VMW_BUFFER_USAGE_SYNC; + pb_buf = provider->create_buffer(provider, vsrf->size, &desc.pb_desc); + vsrf->buf = vmw_svga_winsys_buffer_wrap(pb_buf); + if (!vsrf->buf) + goto out_no_buf; + ssrf = svga_winsys_surface(vsrf); + + return ssrf; + +out_no_buf: + FREE(vsrf); +out_mip: + vmw_ioctl_region_destroy(desc.region); + vmw_ioctl_surface_destroy(vws, whandle->handle); + return NULL; +} + static struct svga_winsys_surface * vmw_drm_surface_from_handle(struct svga_winsys_screen *sws, struct winsys_handle *whandle, @@ -162,6 +247,7 @@ vmw_drm_surface_from_handle(struct svga_winsys_screen *sws, struct drm_vmw_surface_arg *req = &arg.req; struct drm_vmw_surface_create_req *rep = &arg.rep; uint32_t handle = 0; + SVGA3dSize size; int ret; int i; @@ -187,6 +273,7 @@ vmw_drm_surface_from_handle(struct svga_winsys_screen *sws, memset(&arg, 0, sizeof(arg)); req->sid = handle; + rep->size_addr = (size_t)&size; ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_REF_SURFACE, &arg, sizeof(arg)); @@ -235,6 +322,11 @@ vmw_drm_surface_from_handle(struct svga_winsys_screen *sws, ssrf = svga_winsys_surface(vsrf); *format = rep->format; + /* Estimate usage, for early flushing. */ + vsrf->size = svga3dsurface_get_serialized_size(rep->format, size, + rep->mip_levels[0], + FALSE); + return ssrf; out_mip: diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c index 36888dc2cce..501c047c32a 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c @@ -43,6 +43,7 @@ #include "xf86drm.h" #include "vmwgfx_drm.h" #include "svga3d_caps.h" +#include "svga3d_reg.h" #include "os/os_mman.h" @@ -51,7 +52,6 @@ struct vmw_region { - SVGAGuestPtr ptr; uint32_t handle; uint64_t map_handle; void *data; @@ -66,6 +66,13 @@ struct vmw_region */ #define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9) + +uint32_t +vmw_region_size(struct vmw_region *region) +{ + return region->size; +} + uint32 vmw_ioctl_context_create(struct vmw_winsys_screen *vws) { @@ -168,6 +175,139 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws, return rep->sid; } + +uint32 +vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws, + SVGA3dSurfaceFlags flags, + SVGA3dSurfaceFormat format, + SVGA3dSize size, + uint32_t numFaces, + uint32_t numMipLevels, + uint32_t buffer_handle, + struct vmw_region **p_region) +{ + union drm_vmw_gb_surface_create_arg s_arg; + struct drm_vmw_gb_surface_create_req *req = &s_arg.req; + struct drm_vmw_gb_surface_create_rep *rep = &s_arg.rep; + struct vmw_region *region = NULL; + int ret; + + vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format); + + if (p_region) { + region = CALLOC_STRUCT(vmw_region); + if (!region) + return SVGA3D_INVALID_ID; + } + + memset(&s_arg, 0, sizeof(s_arg)); + if (flags & SVGA3D_SURFACE_HINT_SCANOUT) { + req->svga3d_flags = (uint32_t) (flags & ~SVGA3D_SURFACE_HINT_SCANOUT); + req->drm_surface_flags = drm_vmw_surface_flag_scanout; + } else { + req->svga3d_flags = (uint32_t) flags; + } + req->format = (uint32_t) format; + req->drm_surface_flags |= drm_vmw_surface_flag_shareable; + req->drm_surface_flags |= drm_vmw_surface_flag_create_buffer; + + assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES* + DRM_VMW_MAX_MIP_LEVELS); + req->base_size.width = size.width; + req->base_size.height = size.height; + req->base_size.depth = size.depth; + req->mip_levels = numMipLevels; + req->multisample_count = 0; + req->autogen_filter = SVGA3D_TEX_FILTER_NONE; + if (buffer_handle) + req->buffer_handle = buffer_handle; + else + req->buffer_handle = SVGA3D_INVALID_ID; + + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GB_SURFACE_CREATE, + &s_arg, sizeof(s_arg)); + + if (ret) + goto out_fail_create; + + if (p_region) { + region->handle = rep->buffer_handle; + region->map_handle = rep->buffer_map_handle; + region->drm_fd = vws->ioctl.drm_fd; + region->size = rep->backup_size; + *p_region = region; + } + + vmw_printf("Surface id is %d\n", rep->sid); + return rep->handle; + +out_fail_create: + if (region) + FREE(region); + return SVGA3D_INVALID_ID; +} + +/** + * vmw_ioctl_gb_surface_ref - Put a reference on a guest-backed surface and + * get surface information + * + * @vws: Screen to register the reference on + * @handle: Kernel handle of the guest-backed surface + * @flags: flags used when the surface was created + * @format: Format used when the surface was created + * @numMipLevels: Number of mipmap levels of the surface + * @p_region: On successful return points to a newly allocated + * struct vmw_region holding a reference to the surface backup buffer. + * + * Returns 0 on success, a system error on failure. + */ +int +vmw_ioctl_gb_surface_ref(struct vmw_winsys_screen *vws, + uint32_t handle, + SVGA3dSurfaceFlags *flags, + SVGA3dSurfaceFormat *format, + uint32_t *numMipLevels, + struct vmw_region **p_region) +{ + union drm_vmw_gb_surface_reference_arg s_arg; + struct drm_vmw_surface_arg *req = &s_arg.req; + struct drm_vmw_gb_surface_ref_rep *rep = &s_arg.rep; + struct vmw_region *region = NULL; + int ret; + + vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format); + + assert(p_region != NULL); + region = CALLOC_STRUCT(vmw_region); + if (!region) + return -ENOMEM; + + memset(&s_arg, 0, sizeof(s_arg)); + req->sid = handle; + + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GB_SURFACE_REF, + &s_arg, sizeof(s_arg)); + + if (ret) + goto out_fail_ref; + + region->handle = rep->crep.buffer_handle; + region->map_handle = rep->crep.buffer_map_handle; + region->drm_fd = vws->ioctl.drm_fd; + region->size = rep->crep.backup_size; + *p_region = region; + + *flags = rep->creq.svga3d_flags; + *format = rep->creq.format; + *numMipLevels = rep->creq.mip_levels; + + return 0; +out_fail_ref: + if (region) + FREE(region); + return ret; +} + void vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws, uint32 sid) { @@ -238,8 +378,11 @@ vmw_ioctl_command(struct vmw_winsys_screen *vws, int32_t cid, *pfence = NULL; } else { if (pfence) { - *pfence = vmw_fence_create(rep.handle, rep.mask); + vmw_fences_signal(vws->fence_ops, rep.passed_seqno, rep.seqno, + TRUE); + *pfence = vmw_fence_create(vws->fence_ops, rep.handle, + rep.seqno, rep.mask); if (*pfence == NULL) { /* * Fence creation failed. Need to sync. @@ -279,8 +422,6 @@ vmw_ioctl_region_create(struct vmw_winsys_screen *vws, uint32_t size) goto out_err1; } - region->ptr.gmrId = rep->cur_gmr_id; - region->ptr.offset = rep->cur_gmr_offset; region->data = NULL; region->handle = rep->handle; region->map_handle = rep->map_handle; @@ -321,7 +462,8 @@ vmw_ioctl_region_destroy(struct vmw_region *region) SVGAGuestPtr vmw_ioctl_region_ptr(struct vmw_region *region) { - return region->ptr; + SVGAGuestPtr ptr = {region->handle, 0}; + return ptr; } void * @@ -356,6 +498,69 @@ vmw_ioctl_region_unmap(struct vmw_region *region) --region->map_count; } +/** + * vmw_ioctl_syncforcpu - Synchronize a buffer object for CPU usage + * + * @region: Pointer to a struct vmw_region representing the buffer object. + * @dont_block: Dont wait for GPU idle, but rather return -EBUSY if the + * GPU is busy with the buffer object. + * @readonly: Hint that the CPU access is read-only. + * @allow_cs: Allow concurrent command submission while the buffer is + * synchronized for CPU. If FALSE command submissions referencing the + * buffer will block until a corresponding call to vmw_ioctl_releasefromcpu. + * + * This function idles any GPU activities touching the buffer and blocks + * command submission of commands referencing the buffer, even from + * other processes. + */ +int +vmw_ioctl_syncforcpu(struct vmw_region *region, + boolean dont_block, + boolean readonly, + boolean allow_cs) +{ + struct drm_vmw_synccpu_arg arg; + + memset(&arg, 0, sizeof(arg)); + arg.op = drm_vmw_synccpu_grab; + arg.handle = region->handle; + arg.flags = drm_vmw_synccpu_read; + if (!readonly) + arg.flags |= drm_vmw_synccpu_write; + if (dont_block) + arg.flags |= drm_vmw_synccpu_dontblock; + if (allow_cs) + arg.flags |= drm_vmw_synccpu_allow_cs; + + return drmCommandWrite(region->drm_fd, DRM_VMW_SYNCCPU, &arg, sizeof(arg)); +} + +/** + * vmw_ioctl_releasefromcpu - Undo a previous syncforcpu. + * + * @region: Pointer to a struct vmw_region representing the buffer object. + * @readonly: Should hold the same value as the matching syncforcpu call. + * @allow_cs: Should hold the same value as the matching syncforcpu call. + */ +void +vmw_ioctl_releasefromcpu(struct vmw_region *region, + boolean readonly, + boolean allow_cs) +{ + struct drm_vmw_synccpu_arg arg; + + memset(&arg, 0, sizeof(arg)); + arg.op = drm_vmw_synccpu_release; + arg.handle = region->handle; + arg.flags = drm_vmw_synccpu_read; + if (!readonly) + arg.flags |= drm_vmw_synccpu_write; + if (allow_cs) + arg.flags |= drm_vmw_synccpu_allow_cs; + + (void) drmCommandWrite(region->drm_fd, DRM_VMW_SYNCCPU, &arg, sizeof(arg)); +} + void vmw_ioctl_fence_unref(struct vmw_winsys_screen *vws, uint32_t handle) @@ -405,6 +610,8 @@ vmw_ioctl_fence_signalled(struct vmw_winsys_screen *vws, if (ret != 0) return ret; + vmw_fences_signal(vws->fence_ops, arg.passed_seqno, 0, FALSE); + return (arg.signaled) ? 0 : -1; } @@ -435,6 +642,113 @@ vmw_ioctl_fence_finish(struct vmw_winsys_screen *vws, return 0; } +uint32 +vmw_ioctl_shader_create(struct vmw_winsys_screen *vws, + SVGA3dShaderType type, + uint32 code_len) +{ + struct drm_vmw_shader_create_arg sh_arg; + int ret; + + VMW_FUNC; + + memset(&sh_arg, 0, sizeof(sh_arg)); + + sh_arg.size = code_len; + sh_arg.buffer_handle = SVGA3D_INVALID_ID; + sh_arg.shader_handle = SVGA3D_INVALID_ID; + switch (type) { + case SVGA3D_SHADERTYPE_VS: + sh_arg.shader_type = drm_vmw_shader_type_vs; + break; + case SVGA3D_SHADERTYPE_PS: + sh_arg.shader_type = drm_vmw_shader_type_ps; + break; + default: + assert(!"Invalid shader type."); + break; + } + + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_SHADER, + &sh_arg, sizeof(sh_arg)); + + if (ret) + return SVGA3D_INVALID_ID; + + return sh_arg.shader_handle; +} + +void +vmw_ioctl_shader_destroy(struct vmw_winsys_screen *vws, uint32 shid) +{ + struct drm_vmw_shader_arg sh_arg; + + VMW_FUNC; + + memset(&sh_arg, 0, sizeof(sh_arg)); + sh_arg.handle = shid; + + (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_SHADER, + &sh_arg, sizeof(sh_arg)); + +} + +static int +vmw_ioctl_parse_caps(struct vmw_winsys_screen *vws, + const uint32_t *cap_buffer) +{ + int i; + + if (vws->base.have_gb_objects) { + for (i = 0; i < vws->ioctl.num_cap_3d; ++i) { + vws->ioctl.cap_3d[i].has_cap = TRUE; + vws->ioctl.cap_3d[i].result.u = cap_buffer[i]; + } + return 0; + } else { + const uint32 *capsBlock; + const SVGA3dCapsRecord *capsRecord = NULL; + uint32 offset; + const SVGA3dCapPair *capArray; + int numCaps, index; + + /* + * Search linearly through the caps block records for the specified type. + */ + capsBlock = cap_buffer; + for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) { + const SVGA3dCapsRecord *record; + assert(offset < SVGA_FIFO_3D_CAPS_SIZE); + record = (const SVGA3dCapsRecord *) (capsBlock + offset); + if ((record->header.type >= SVGA3DCAPS_RECORD_DEVCAPS_MIN) && + (record->header.type <= SVGA3DCAPS_RECORD_DEVCAPS_MAX) && + (!capsRecord || (record->header.type > capsRecord->header.type))) { + capsRecord = record; + } + } + + if(!capsRecord) + return -1; + + /* + * Calculate the number of caps from the size of the record. + */ + capArray = (const SVGA3dCapPair *) capsRecord->data; + numCaps = (int) ((capsRecord->header.length * sizeof(uint32) - + sizeof capsRecord->header) / (2 * sizeof(uint32))); + + for (i = 0; i < numCaps; i++) { + index = capArray[i][0]; + if (index < vws->ioctl.num_cap_3d) { + vws->ioctl.cap_3d[index].has_cap = TRUE; + vws->ioctl.cap_3d[index].result.u = capArray[i][1]; + } else { + debug_printf("Unknown devcaps seen: %d\n", index); + } + } + } + return 0; +} boolean vmw_ioctl_init(struct vmw_winsys_screen *vws) @@ -443,9 +757,19 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws) struct drm_vmw_get_3d_cap_arg cap_arg; unsigned int size; int ret; + uint32_t *cap_buffer; + drmVersionPtr version; + boolean drm_gb_capable; VMW_FUNC; + version = drmGetVersion(vws->ioctl.drm_fd); + if (!version) + goto out_no_version; + + drm_gb_capable = version->version_major > 2 || + (version->version_major == 2 && version->version_minor > 4); + memset(&gp_arg, 0, sizeof(gp_arg)); gp_arg.param = DRM_VMW_PARAM_3D; ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM, @@ -466,15 +790,78 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws) } vws->ioctl.hwversion = gp_arg.value; - size = SVGA_FIFO_3D_CAPS_SIZE * sizeof(uint32_t); - vws->ioctl.buffer = calloc(1, size); - if (!vws->ioctl.buffer) { + memset(&gp_arg, 0, sizeof(gp_arg)); + gp_arg.param = DRM_VMW_PARAM_HW_CAPS; + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM, + &gp_arg, sizeof(gp_arg)); + if (ret) + vws->base.have_gb_objects = FALSE; + else + vws->base.have_gb_objects = + !!(gp_arg.value & (uint64_t) SVGA_CAP_GBOBJECTS); + + if (vws->base.have_gb_objects && !drm_gb_capable) + goto out_no_3d; + + if (vws->base.have_gb_objects) { + memset(&gp_arg, 0, sizeof(gp_arg)); + gp_arg.param = DRM_VMW_PARAM_3D_CAPS_SIZE; + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM, + &gp_arg, sizeof(gp_arg)); + if (ret) + size = SVGA_FIFO_3D_CAPS_SIZE * sizeof(uint32_t); + else + size = gp_arg.value; + + if (vws->base.have_gb_objects) + vws->ioctl.num_cap_3d = size / sizeof(uint32_t); + else + vws->ioctl.num_cap_3d = SVGA3D_DEVCAP_MAX; + + + memset(&gp_arg, 0, sizeof(gp_arg)); + gp_arg.param = DRM_VMW_PARAM_MAX_MOB_MEMORY; + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM, + &gp_arg, sizeof(gp_arg)); + if (ret) { + /* Just guess a large enough value. */ + vws->ioctl.max_mob_memory = 256*1024*1024; + } else { + vws->ioctl.max_mob_memory = gp_arg.value; + } + /* Never early flush surfaces, mobs do accounting. */ + vws->ioctl.max_surface_memory = -1; + } else { + vws->ioctl.num_cap_3d = SVGA3D_DEVCAP_MAX; + + memset(&gp_arg, 0, sizeof(gp_arg)); + gp_arg.param = DRM_VMW_PARAM_MAX_SURF_MEMORY; + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM, + &gp_arg, sizeof(gp_arg)); + if (ret) { + /* Just guess a large enough value, around 800mb. */ + vws->ioctl.max_surface_memory = 0x300000000; + } else { + vws->ioctl.max_surface_memory = gp_arg.value; + } + size = SVGA_FIFO_3D_CAPS_SIZE * sizeof(uint32_t); + } + + cap_buffer = calloc(1, size); + if (!cap_buffer) { debug_printf("Failed alloc fifo 3D caps buffer.\n"); goto out_no_3d; } + vws->ioctl.cap_3d = calloc(vws->ioctl.num_cap_3d, + sizeof(*vws->ioctl.cap_3d)); + if (!vws->ioctl.cap_3d) { + debug_printf("Failed alloc fifo 3D caps buffer.\n"); + goto out_no_caparray; + } + memset(&cap_arg, 0, sizeof(cap_arg)); - cap_arg.buffer = (uint64_t) (unsigned long) (vws->ioctl.buffer); + cap_arg.buffer = (uint64_t) (unsigned long) (cap_buffer); cap_arg.max_size = size; ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_GET_3D_CAP, @@ -486,11 +873,24 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws) goto out_no_caps; } + ret = vmw_ioctl_parse_caps(vws, cap_buffer); + if (ret) { + debug_printf("Failed to parse 3D capabilities" + " (%i, %s).\n", ret, strerror(-ret)); + goto out_no_caps; + } + free(cap_buffer); + drmFreeVersion(version); vmw_printf("%s OK\n", __FUNCTION__); return TRUE; out_no_caps: - free(vws->ioctl.buffer); + free(vws->ioctl.cap_3d); + out_no_caparray: + free(cap_buffer); out_no_3d: + drmFreeVersion(version); + out_no_version: + vws->ioctl.num_cap_3d = 0; debug_printf("%s Failed\n", __FUNCTION__); return FALSE; } diff --git a/src/gallium/winsys/svga/drm/vmw_screen_pools.c b/src/gallium/winsys/svga/drm/vmw_screen_pools.c index 2525604f952..7f7b77982af 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_pools.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_pools.c @@ -32,13 +32,6 @@ #include "pipebuffer/pb_buffer.h" #include "pipebuffer/pb_bufmgr.h" -/* - * TODO: Have the query pool always ask the fence manager for - * SVGA_FENCE_FLAG_QUERY signaled. Unfortunately, pb_fenced doesn't - * support that currently, so we'd have to create a separate - * pb_fence_ops wrapper that does this implicitly. - */ - /** * vmw_pools_cleanup - Destroy the buffer pools. * @@ -47,20 +40,32 @@ void vmw_pools_cleanup(struct vmw_winsys_screen *vws) { - if(vws->pools.gmr_fenced) - vws->pools.gmr_fenced->destroy(vws->pools.gmr_fenced); + if (vws->pools.mob_shader_slab_fenced) + vws->pools.mob_shader_slab_fenced->destroy + (vws->pools.mob_shader_slab_fenced); + if (vws->pools.mob_shader_slab) + vws->pools.mob_shader_slab->destroy(vws->pools.mob_shader_slab); + if (vws->pools.mob_fenced) + vws->pools.mob_fenced->destroy(vws->pools.mob_fenced); + if (vws->pools.mob_cache) + vws->pools.mob_cache->destroy(vws->pools.mob_cache); + if (vws->pools.query_fenced) vws->pools.query_fenced->destroy(vws->pools.query_fenced); + if (vws->pools.query_mm) + vws->pools.query_mm->destroy(vws->pools.query_mm); - /* gmr_mm pool is already destroyed above */ - + if(vws->pools.gmr_fenced) + vws->pools.gmr_fenced->destroy(vws->pools.gmr_fenced); + if (vws->pools.gmr_mm) + vws->pools.gmr_mm->destroy(vws->pools.gmr_mm); if (vws->pools.gmr_slab_fenced) vws->pools.gmr_slab_fenced->destroy(vws->pools.gmr_slab_fenced); + if (vws->pools.gmr_slab) + vws->pools.gmr_slab->destroy(vws->pools.gmr_slab); if(vws->pools.gmr) vws->pools.gmr->destroy(vws->pools.gmr); - if(vws->pools.query) - vws->pools.query->destroy(vws->pools.query); } @@ -77,21 +82,14 @@ vmw_pools_cleanup(struct vmw_winsys_screen *vws) boolean vmw_query_pools_init(struct vmw_winsys_screen *vws) { - vws->pools.query = vmw_gmr_bufmgr_create(vws); - if(!vws->pools.query) - return FALSE; - - vws->pools.query_mm = mm_bufmgr_create(vws->pools.query, + vws->pools.query_mm = mm_bufmgr_create(vws->pools.gmr, VMW_QUERY_POOL_SIZE, 3 /* 8 alignment */); - if(!vws->pools.query_mm) - goto out_no_query_mm; + if (!vws->pools.query_mm) + return FALSE; - vws->pools.query_fenced = fenced_bufmgr_create( - vws->pools.query_mm, - vmw_fence_ops_create(vws), - VMW_QUERY_POOL_SIZE, - ~0); + vws->pools.query_fenced = simple_fenced_bufmgr_create( + vws->pools.query_mm, vws->fence_ops); if(!vws->pools.query_fenced) goto out_no_query_fenced; @@ -100,8 +98,60 @@ vmw_query_pools_init(struct vmw_winsys_screen *vws) out_no_query_fenced: vws->pools.query_mm->destroy(vws->pools.query_mm); - out_no_query_mm: - vws->pools.query->destroy(vws->pools.query); + return FALSE; +} + +/** + * vmw_mob_pool_init - Create a pool of fenced kernel buffers. + * + * @vws: Pointer to a struct vmw_winsys_screen. + * + * Typically this pool should be created on demand when we + * detect that the app will be using MOB buffers. + */ +boolean +vmw_mob_pools_init(struct vmw_winsys_screen *vws) +{ + struct pb_desc desc; + + vws->pools.mob_cache = + pb_cache_manager_create(vws->pools.gmr, 100000, 2, + VMW_BUFFER_USAGE_SHARED); + if (!vws->pools.mob_cache) + return FALSE; + + vws->pools.mob_fenced = + simple_fenced_bufmgr_create(vws->pools.mob_cache, + vws->fence_ops); + if(!vws->pools.mob_fenced) + goto out_no_mob_fenced; + + desc.alignment = 64; + desc.usage = ~(SVGA_BUFFER_USAGE_PINNED | VMW_BUFFER_USAGE_SHARED | + VMW_BUFFER_USAGE_SYNC); + vws->pools.mob_shader_slab = + pb_slab_range_manager_create(vws->pools.mob_cache, + 64, + 8192, + 16384, + &desc); + if(!vws->pools.mob_shader_slab) + goto out_no_mob_shader_slab; + + vws->pools.mob_shader_slab_fenced = + simple_fenced_bufmgr_create(vws->pools.mob_shader_slab, + vws->fence_ops); + if(!vws->pools.mob_fenced) + goto out_no_mob_shader_slab_fenced; + + return TRUE; + + out_no_mob_shader_slab_fenced: + vws->pools.mob_shader_slab->destroy(vws->pools.mob_shader_slab); + out_no_mob_shader_slab: + vws->pools.mob_fenced->destroy(vws->pools.mob_fenced); + out_no_mob_fenced: + vws->pools.mob_cache->destroy(vws->pools.mob_cache); return FALSE; } @@ -119,33 +169,27 @@ vmw_pools_init(struct vmw_winsys_screen *vws) if(!vws->pools.gmr) goto error; - vws->pools.gmr_mm = mm_bufmgr_create(vws->pools.gmr, - VMW_GMR_POOL_SIZE, - 12 /* 4096 alignment */); - if(!vws->pools.gmr_mm) - goto error; + if ((vws->base.have_gb_objects && vws->base.have_gb_dma) || + !vws->base.have_gb_objects) { + /* + * A managed pool for DMA buffers. + */ + vws->pools.gmr_mm = mm_bufmgr_create(vws->pools.gmr, + VMW_GMR_POOL_SIZE, + 12 /* 4096 alignment */); + if(!vws->pools.gmr_mm) + goto error; - /* - * We disallow "CPU" buffers to be created by the fenced_bufmgr_create, - * because that defers "GPU" buffer creation to buffer validation, - * and at buffer validation we have no means of handling failures - * due to pools space shortage or fragmentation. Effectively this - * makes sure all failures are reported immediately on buffer allocation, - * and we can revert to allocating directly from the kernel. - */ - vws->pools.gmr_fenced = fenced_bufmgr_create( - vws->pools.gmr_mm, - vmw_fence_ops_create(vws), - VMW_GMR_POOL_SIZE, - 0); + vws->pools.gmr_fenced = simple_fenced_bufmgr_create + (vws->pools.gmr_mm, vws->fence_ops); #ifdef DEBUG - vws->pools.gmr_fenced = pb_debug_manager_create(vws->pools.gmr_fenced, - 4096, - 4096); + vws->pools.gmr_fenced = pb_debug_manager_create(vws->pools.gmr_fenced, + 4096, + 4096); #endif - if(!vws->pools.gmr_fenced) - goto error; + if(!vws->pools.gmr_fenced) + goto error; /* * The slab pool allocates buffers directly from the kernel except @@ -153,30 +197,33 @@ vmw_pools_init(struct vmw_winsys_screen *vws) * not to waste memory, since a kernel buffer is a minimum 4096 bytes. * * Here we use it only for emergency in the case our pre-allocated - * buffer pool runs out of memory. + * managed buffer pool runs out of memory. */ - desc.alignment = 64; - desc.usage = ~0; - vws->pools.gmr_slab = pb_slab_range_manager_create(vws->pools.gmr, - 64, - 8192, - 16384, - &desc); - if (!vws->pools.gmr_slab) - goto error; - - vws->pools.gmr_slab_fenced = - fenced_bufmgr_create(vws->pools.gmr_slab, - vmw_fence_ops_create(vws), - VMW_MAX_BUFFER_SIZE, - 0); - - if (!vws->pools.gmr_slab_fenced) - goto error; + + desc.alignment = 64; + desc.usage = ~(SVGA_BUFFER_USAGE_PINNED | SVGA_BUFFER_USAGE_SHADER | + VMW_BUFFER_USAGE_SHARED | VMW_BUFFER_USAGE_SYNC); + vws->pools.gmr_slab = pb_slab_range_manager_create(vws->pools.gmr, + 64, + 8192, + 16384, + &desc); + if (!vws->pools.gmr_slab) + goto error; + + vws->pools.gmr_slab_fenced = + simple_fenced_bufmgr_create(vws->pools.gmr_slab, vws->fence_ops); + + if (!vws->pools.gmr_slab_fenced) + goto error; + } vws->pools.query_fenced = NULL; vws->pools.query_mm = NULL; - vws->pools.query = NULL; + vws->pools.mob_cache = NULL; + + if (vws->base.have_gb_objects && !vmw_mob_pools_init(vws)) + goto error; return TRUE; @@ -184,4 +231,3 @@ error: vmw_pools_cleanup(vws); return FALSE; } - diff --git a/src/gallium/winsys/svga/drm/vmw_screen_svga.c b/src/gallium/winsys/svga/drm/vmw_screen_svga.c index bf817ca88d8..bfba2038559 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_svga.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_svga.c @@ -47,7 +47,14 @@ #include "vmw_surface.h" #include "vmw_buffer.h" #include "vmw_fence.h" +#include "vmw_shader.h" +#include "svga3d_surfacedefs.h" +/** + * Try to get a surface backing buffer from the cache + * if it's this size or smaller. + */ +#define VMW_TRY_CACHED_SIZE (2*1024*1024) static struct svga_winsys_buffer * vmw_svga_winsys_buffer_create(struct svga_winsys_screen *sws, @@ -56,64 +63,37 @@ vmw_svga_winsys_buffer_create(struct svga_winsys_screen *sws, unsigned size) { struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); - struct pb_desc desc; + struct vmw_buffer_desc desc; struct pb_manager *provider; struct pb_buffer *buffer; memset(&desc, 0, sizeof desc); - desc.alignment = alignment; - desc.usage = usage; + desc.pb_desc.alignment = alignment; + desc.pb_desc.usage = usage; if (usage == SVGA_BUFFER_USAGE_PINNED) { if (vws->pools.query_fenced == NULL && !vmw_query_pools_init(vws)) return NULL; provider = vws->pools.query_fenced; + } else if (usage == SVGA_BUFFER_USAGE_SHADER) { + provider = vws->pools.mob_shader_slab_fenced; } else provider = vws->pools.gmr_fenced; assert(provider); - buffer = provider->create_buffer(provider, size, &desc); + buffer = provider->create_buffer(provider, size, &desc.pb_desc); if(!buffer && provider == vws->pools.gmr_fenced) { assert(provider); provider = vws->pools.gmr_slab_fenced; - buffer = provider->create_buffer(provider, size, &desc); + buffer = provider->create_buffer(provider, size, &desc.pb_desc); } if (!buffer) return NULL; - return vmw_svga_winsys_buffer(buffer); -} - - -static void * -vmw_svga_winsys_buffer_map(struct svga_winsys_screen *sws, - struct svga_winsys_buffer *buf, - unsigned flags) -{ - (void)sws; - return pb_map(vmw_pb_buffer(buf), flags, NULL); -} - - -static void -vmw_svga_winsys_buffer_unmap(struct svga_winsys_screen *sws, - struct svga_winsys_buffer *buf) -{ - (void)sws; - pb_unmap(vmw_pb_buffer(buf)); -} - - -static void -vmw_svga_winsys_buffer_destroy(struct svga_winsys_screen *sws, - struct svga_winsys_buffer *buf) -{ - struct pb_buffer *pbuf = vmw_pb_buffer(buf); - (void)sws; - pb_reference(&pbuf, NULL); + return vmw_svga_winsys_buffer_wrap(buffer); } @@ -161,7 +141,12 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws, { struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); struct vmw_svga_winsys_surface *surface; + struct vmw_buffer_desc desc; + struct pb_manager *provider = vws->pools.mob_fenced; + uint32_t buffer_size; + + memset(&desc, 0, sizeof(desc)); surface = CALLOC_STRUCT(vmw_svga_winsys_surface); if(!surface) goto no_surface; @@ -169,15 +154,96 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws, pipe_reference_init(&surface->refcnt, 1); p_atomic_set(&surface->validated, 0); surface->screen = vws; - surface->sid = vmw_ioctl_surface_create(vws, - flags, format, size, - numFaces, numMipLevels); - if(surface->sid == SVGA3D_INVALID_ID) - goto no_sid; + pipe_mutex_init(surface->mutex); + + /* + * Used for the backing buffer GB surfaces, and to approximate + * when to flush on non-GB hosts. + */ + buffer_size = svga3dsurface_get_serialized_size(format, size, numMipLevels, (numFaces == 6)); + if (sws->have_gb_objects) { + SVGAGuestPtr ptr = {0,0}; + + /* + * If the backing buffer size is small enough, try to allocate a + * buffer out of the buffer cache. Otherwise, let the kernel allocate + * a suitable buffer for us. + */ + if (buffer_size < VMW_TRY_CACHED_SIZE) { + struct pb_buffer *pb_buf; + + surface->size = buffer_size; + desc.pb_desc.alignment = 4096; + desc.pb_desc.usage = 0; + pb_buf = provider->create_buffer(provider, buffer_size, &desc.pb_desc); + surface->buf = vmw_svga_winsys_buffer_wrap(pb_buf); + if (surface->buf && !vmw_gmr_bufmgr_region_ptr(pb_buf, &ptr)) + assert(0); + } + + surface->sid = vmw_ioctl_gb_surface_create(vws, + flags, format, size, + numFaces, numMipLevels, + ptr.gmrId, + surface->buf ? NULL : + &desc.region); + + if (surface->sid == SVGA3D_INVALID_ID && surface->buf) { + + /* + * Kernel refused to allocate a surface for us. + * Perhaps something was wrong with our buffer? + * This is really a guard against future new size requirements + * on the backing buffers. + */ + vmw_svga_winsys_buffer_destroy(sws, surface->buf); + surface->buf = NULL; + surface->sid = vmw_ioctl_gb_surface_create(vws, + flags, format, size, + numFaces, numMipLevels, + 0, + &desc.region); + if (surface->sid == SVGA3D_INVALID_ID) + goto no_sid; + } + + /* + * If the kernel created the buffer for us, wrap it into a + * vmw_svga_winsys_buffer. + */ + if (surface->buf == NULL) { + struct pb_buffer *pb_buf; + + surface->size = vmw_region_size(desc.region); + desc.pb_desc.alignment = 4096; + desc.pb_desc.usage = VMW_BUFFER_USAGE_SHARED; + pb_buf = provider->create_buffer(provider, surface->size, + &desc.pb_desc); + surface->buf = vmw_svga_winsys_buffer_wrap(pb_buf); + if (surface->buf == NULL) { + vmw_ioctl_region_destroy(desc.region); + vmw_ioctl_surface_destroy(vws, surface->sid); + goto no_sid; + } + } + } else { + surface->sid = vmw_ioctl_surface_create(vws, + flags, format, size, + numFaces, numMipLevels); + if(surface->sid == SVGA3D_INVALID_ID) + goto no_sid; + + /* Best estimate for surface size, used for early flushing. */ + surface->size = buffer_size; + surface->buf = NULL; + } return svga_winsys_surface(surface); no_sid: + if (surface->buf) + vmw_svga_winsys_buffer_destroy(sws, surface->buf); + FREE(surface); no_surface: return NULL; @@ -220,6 +286,9 @@ vmw_svga_winsys_get_hw_version(struct svga_winsys_screen *sws) { struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + if (sws->have_gb_objects) + return SVGA3D_HWVERSION_WS8_B1; + return (SVGA3dHardwareVersion) vws->ioctl.hwversion; } @@ -228,70 +297,70 @@ static boolean vmw_svga_winsys_get_cap(struct svga_winsys_screen *sws, SVGA3dDevCapIndex index, SVGA3dDevCapResult *result) -{ +{ struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); - const uint32 *capsBlock; - const SVGA3dCapsRecord *capsRecord = NULL; - uint32 offset; - const SVGA3dCapPair *capArray; - int numCaps, first, last; - - if(vws->ioctl.hwversion < SVGA3D_HWVERSION_WS6_B1) - return FALSE; - - /* - * Search linearly through the caps block records for the specified type. - */ - capsBlock = (const uint32 *)vws->ioctl.buffer; - for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) { - const SVGA3dCapsRecord *record; - assert(offset < SVGA_FIFO_3D_CAPS_SIZE); - record = (const SVGA3dCapsRecord *) (capsBlock + offset); - if ((record->header.type >= SVGA3DCAPS_RECORD_DEVCAPS_MIN) && - (record->header.type <= SVGA3DCAPS_RECORD_DEVCAPS_MAX) && - (!capsRecord || (record->header.type > capsRecord->header.type))) { - capsRecord = record; - } - } - if(!capsRecord) + if (index > vws->ioctl.num_cap_3d || !vws->ioctl.cap_3d[index].has_cap) return FALSE; - /* - * Calculate the number of caps from the size of the record. - */ - capArray = (const SVGA3dCapPair *) capsRecord->data; - numCaps = (int) ((capsRecord->header.length * sizeof(uint32) - - sizeof capsRecord->header) / (2 * sizeof(uint32))); - - /* - * Binary-search for the cap with the specified index. - */ - for (first = 0, last = numCaps - 1; first <= last; ) { - int mid = (first + last) / 2; + *result = vws->ioctl.cap_3d[index].result; + return TRUE; +} - if ((SVGA3dDevCapIndex) capArray[mid][0] == index) { - /* - * Found it. - */ - result->u = capArray[mid][1]; - return TRUE; - } +static struct svga_winsys_gb_shader * +vmw_svga_winsys_shader_create(struct svga_winsys_screen *sws, + SVGA3dShaderType type, + const uint32 *bytecode, + uint32 bytecodeLen) +{ + struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + struct vmw_svga_winsys_shader *shader; + void *code; + + shader = CALLOC_STRUCT(vmw_svga_winsys_shader); + if(!shader) + goto out_no_shader; + + pipe_reference_init(&shader->refcnt, 1); + p_atomic_set(&shader->validated, 0); + shader->screen = vws; + shader->buf = vmw_svga_winsys_buffer_create(sws, 64, + SVGA_BUFFER_USAGE_SHADER, + bytecodeLen); + if (!shader->buf) + goto out_no_buf; + + code = vmw_svga_winsys_buffer_map(sws, shader->buf, PIPE_TRANSFER_WRITE); + if (!code) + goto out_no_buf; + + memcpy(code, bytecode, bytecodeLen); + vmw_svga_winsys_buffer_unmap(sws, shader->buf); + + shader->shid = vmw_ioctl_shader_create(vws, type, bytecodeLen); + if(shader->shid == SVGA3D_INVALID_ID) + goto out_no_shid; + + return svga_winsys_shader(shader); + +out_no_shid: + vmw_svga_winsys_buffer_destroy(sws, shader->buf); +out_no_buf: + FREE(shader); +out_no_shader: + return NULL; +} - /* - * Divide and conquer. - */ - if ((SVGA3dDevCapIndex) capArray[mid][0] > index) { - last = mid - 1; - } else { - first = mid + 1; - } - } +static void +vmw_svga_winsys_shader_destroy(struct svga_winsys_screen *sws, + struct svga_winsys_gb_shader *shader) +{ + struct vmw_svga_winsys_shader *d_shader = + vmw_svga_winsys_shader(shader); - return FALSE; + vmw_svga_winsys_shader_reference(&d_shader, NULL); } - boolean vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws) { @@ -308,6 +377,8 @@ vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws) vws->base.buffer_destroy = vmw_svga_winsys_buffer_destroy; vws->base.fence_reference = vmw_svga_winsys_fence_reference; vws->base.fence_signalled = vmw_svga_winsys_fence_signalled; + vws->base.shader_create = vmw_svga_winsys_shader_create; + vws->base.shader_destroy = vmw_svga_winsys_shader_destroy; vws->base.fence_finish = vmw_svga_winsys_fence_finish; return TRUE; diff --git a/src/gallium/winsys/svga/drm/vmw_shader.c b/src/gallium/winsys/svga/drm/vmw_shader.c new file mode 100644 index 00000000000..e82486ab608 --- /dev/null +++ b/src/gallium/winsys/svga/drm/vmw_shader.c @@ -0,0 +1,64 @@ +/********************************************************** + * Copyright 2009-2012 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "svga_cmd.h" +#include "util/u_debug.h" +#include "util/u_memory.h" + +#include "vmw_shader.h" +#include "vmw_screen.h" + +void +vmw_svga_winsys_shader_reference(struct vmw_svga_winsys_shader **pdst, + struct vmw_svga_winsys_shader *src) +{ + struct pipe_reference *src_ref; + struct pipe_reference *dst_ref; + struct vmw_svga_winsys_shader *dst; + + if(pdst == NULL || *pdst == src) + return; + + dst = *pdst; + + src_ref = src ? &src->refcnt : NULL; + dst_ref = dst ? &dst->refcnt : NULL; + + if (pipe_reference(dst_ref, src_ref)) { + struct svga_winsys_screen *sws = &dst->screen->base; + + vmw_ioctl_shader_destroy(dst->screen, dst->shid); +#ifdef DEBUG + /* to detect dangling pointers */ + assert(p_atomic_read(&dst->validated) == 0); + dst->shid = SVGA3D_INVALID_ID; +#endif + sws->buffer_destroy(sws, dst->buf); + FREE(dst); + } + + *pdst = src; +} diff --git a/src/gallium/winsys/svga/drm/vmw_shader.h b/src/gallium/winsys/svga/drm/vmw_shader.h new file mode 100644 index 00000000000..1fd8c3311f9 --- /dev/null +++ b/src/gallium/winsys/svga/drm/vmw_shader.h @@ -0,0 +1,67 @@ +/********************************************************** + * Copyright 2009-2012 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * Shaders for VMware SVGA winsys. + * + * @author Jose Fonseca <[email protected]> + * @author Thomas Hellstrom <[email protected]> + */ + +#ifndef VMW_SHADER_H_ + +#include "pipe/p_compiler.h" +#include "util/u_atomic.h" +#include "util/u_inlines.h" + +struct vmw_svga_winsys_shader +{ + int32_t validated; + struct pipe_reference refcnt; + + struct vmw_winsys_screen *screen; + struct svga_winsys_buffer *buf; + uint32_t shid; +}; + +static INLINE struct svga_winsys_gb_shader * +svga_winsys_shader(struct vmw_svga_winsys_shader *shader) +{ + assert(!shader || shader->shid != SVGA3D_INVALID_ID); + return (struct svga_winsys_gb_shader *)shader; +} + +static INLINE struct vmw_svga_winsys_shader * +vmw_svga_winsys_shader(struct svga_winsys_gb_shader *shader) +{ + return (struct vmw_svga_winsys_shader *)shader; +} + +void +vmw_svga_winsys_shader_reference(struct vmw_svga_winsys_shader **pdst, + struct vmw_svga_winsys_shader *src); + +#endif /* VMW_SHADER_H_ */ diff --git a/src/gallium/winsys/svga/drm/vmw_surface.c b/src/gallium/winsys/svga/drm/vmw_surface.c index 5f1b9ad5770..cf648b4dd93 100644 --- a/src/gallium/winsys/svga/drm/vmw_surface.c +++ b/src/gallium/winsys/svga/drm/vmw_surface.c @@ -27,9 +27,152 @@ #include "svga_cmd.h" #include "util/u_debug.h" #include "util/u_memory.h" - +#include "pipe/p_defines.h" #include "vmw_surface.h" #include "vmw_screen.h" +#include "vmw_buffer.h" +#include "vmw_context.h" +#include "pipebuffer/pb_bufmgr.h" + + +void * +vmw_svga_winsys_surface_map(struct svga_winsys_context *swc, + struct svga_winsys_surface *srf, + unsigned flags, boolean *retry) +{ + struct vmw_svga_winsys_surface *vsrf = vmw_svga_winsys_surface(srf); + void *data = NULL; + struct pb_buffer *pb_buf; + uint32_t pb_flags; + struct vmw_winsys_screen *vws = vsrf->screen; + + *retry = FALSE; + assert((flags & (PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE)) != 0); + pipe_mutex_lock(vsrf->mutex); + + if (vsrf->mapcount) { + /* + * Only allow multiple readers to map. + */ + if ((flags & PIPE_TRANSFER_WRITE) || + (vsrf->map_mode & PIPE_TRANSFER_WRITE)) + goto out_unlock; + + data = vsrf->data; + goto out_mapped; + } + + vsrf->rebind = FALSE; + + /* + * If we intend to read, there's no point discarding the + * data if busy. + */ + if (flags & PIPE_TRANSFER_READ || vsrf->shared) + flags &= ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + + /* + * Discard is a hint to a synchronized map. + */ + if (flags & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) + flags &= ~PIPE_TRANSFER_UNSYNCHRONIZED; + + /* + * The surface is allowed to be referenced on the command stream iff + * we're mapping unsynchronized or discard. This is an early check. + * We need to recheck after a failing discard map. + */ + if (!(flags & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_UNSYNCHRONIZED)) && + p_atomic_read(&vsrf->validated)) { + *retry = TRUE; + goto out_unlock; + } + + pb_flags = flags & (PIPE_TRANSFER_READ_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); + + if (flags & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + struct pb_manager *provider; + struct pb_desc desc; + + /* + * First, if possible, try to map existing storage with DONTBLOCK. + */ + if (!p_atomic_read(&vsrf->validated)) { + data = vmw_svga_winsys_buffer_map(&vws->base, vsrf->buf, + PIPE_TRANSFER_DONTBLOCK | pb_flags); + if (data) + goto out_mapped; + } + + /* + * Attempt to get a new buffer. + */ + provider = vws->pools.mob_fenced; + memset(&desc, 0, sizeof(desc)); + desc.alignment = 4096; + pb_buf = provider->create_buffer(provider, vsrf->size, &desc); + if (pb_buf != NULL) { + struct svga_winsys_buffer *vbuf = + vmw_svga_winsys_buffer_wrap(pb_buf); + + data = vmw_svga_winsys_buffer_map(&vws->base, vbuf, pb_flags); + if (data) { + vsrf->rebind = TRUE; + /* + * We've discarded data on this surface and thus + * it's data is no longer consider referenced. + */ + vmw_swc_surface_clear_reference(swc, vsrf); + if (vsrf->buf) + vmw_svga_winsys_buffer_destroy(&vws->base, vsrf->buf); + vsrf->buf = vbuf; + goto out_mapped; + } else + vmw_svga_winsys_buffer_destroy(&vws->base, vbuf); + } + /* + * We couldn't get and map a new buffer for some reason. + * Fall through to an ordinary map. + * But tell pipe driver to flush now if already on validate list, + * Otherwise we'll overwrite previous contents. + */ + if (!(flags & PIPE_TRANSFER_UNSYNCHRONIZED) && + p_atomic_read(&vsrf->validated)) { + *retry = TRUE; + goto out_unlock; + } + } + + pb_flags |= (flags & PIPE_TRANSFER_DONTBLOCK); + data = vmw_svga_winsys_buffer_map(&vws->base, vsrf->buf, pb_flags); + if (data == NULL) + goto out_unlock; + +out_mapped: + ++vsrf->mapcount; + vsrf->data = data; + vsrf->map_mode = flags & (PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE); +out_unlock: + pipe_mutex_unlock(vsrf->mutex); + return data; +} + + +void +vmw_svga_winsys_surface_unmap(struct svga_winsys_context *swc, + struct svga_winsys_surface *srf, + boolean *rebind) +{ + struct vmw_svga_winsys_surface *vsrf = vmw_svga_winsys_surface(srf); + pipe_mutex_lock(vsrf->mutex); + if (--vsrf->mapcount == 0) { + *rebind = vsrf->rebind; + vsrf->rebind = FALSE; + vmw_svga_winsys_buffer_unmap(&vsrf->screen->base, vsrf->buf); + } + pipe_mutex_unlock(vsrf->mutex); +} void vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst, @@ -48,12 +191,15 @@ vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst, dst_ref = dst ? &dst->refcnt : NULL; if (pipe_reference(dst_ref, src_ref)) { + if (dst->buf) + vmw_svga_winsys_buffer_destroy(&dst->screen->base, dst->buf); vmw_ioctl_surface_destroy(dst->screen, dst->sid); #ifdef DEBUG /* to detect dangling pointers */ assert(p_atomic_read(&dst->validated) == 0); dst->sid = SVGA3D_INVALID_ID; #endif + pipe_mutex_destroy(dst->mutex); FREE(dst); } diff --git a/src/gallium/winsys/svga/drm/vmw_surface.h b/src/gallium/winsys/svga/drm/vmw_surface.h index 3d61595c288..e44d0554fbc 100644 --- a/src/gallium/winsys/svga/drm/vmw_surface.h +++ b/src/gallium/winsys/svga/drm/vmw_surface.h @@ -38,6 +38,8 @@ #include "pipe/p_compiler.h" #include "util/u_atomic.h" #include "util/u_inlines.h" +#include "os/os_thread.h" +#include "pipebuffer/pb_buffer.h" #define VMW_MAX_PRESENTS 3 @@ -54,6 +56,15 @@ struct vmw_svga_winsys_surface /* FIXME: make this thread safe */ unsigned next_present_no; uint32_t present_fences[VMW_MAX_PRESENTS]; + + pipe_mutex mutex; + struct svga_winsys_buffer *buf; /* Current backing guest buffer */ + uint32_t mapcount; /* Number of mappers */ + uint32_t map_mode; /* PIPE_TRANSFER_[READ|WRITE] */ + void *data; /* Pointer to data if mapcount != 0*/ + boolean shared; /* Shared surface. Never discard */ + uint32_t size; /* Size of backing buffer */ + boolean rebind; /* Surface needs a rebind after next unmap */ }; @@ -75,5 +86,13 @@ vmw_svga_winsys_surface(struct svga_winsys_surface *surf) void vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst, struct vmw_svga_winsys_surface *src); +void * +vmw_svga_winsys_surface_map(struct svga_winsys_context *swc, + struct svga_winsys_surface *srf, + unsigned flags, boolean *retry); +void +vmw_svga_winsys_surface_unmap(struct svga_winsys_context *swc, + struct svga_winsys_surface *srf, + boolean *rebind); #endif /* VMW_SURFACE_H_ */ diff --git a/src/gallium/winsys/svga/drm/vmwgfx_drm.h b/src/gallium/winsys/svga/drm/vmwgfx_drm.h index bad47a702f2..e42b3f5bf1d 100644 --- a/src/gallium/winsys/svga/drm/vmwgfx_drm.h +++ b/src/gallium/winsys/svga/drm/vmwgfx_drm.h @@ -28,6 +28,10 @@ #ifndef __VMWGFX_DRM_H__ #define __VMWGFX_DRM_H__ +#ifndef __KERNEL__ +#include <drm.h> +#endif + #define DRM_VMW_MAX_SURFACE_FACES 6 #define DRM_VMW_MAX_MIP_LEVELS 24 @@ -54,7 +58,12 @@ #define DRM_VMW_FENCE_EVENT 17 #define DRM_VMW_PRESENT 18 #define DRM_VMW_PRESENT_READBACK 19 - +#define DRM_VMW_UPDATE_LAYOUT 20 +#define DRM_VMW_CREATE_SHADER 21 +#define DRM_VMW_UNREF_SHADER 22 +#define DRM_VMW_GB_SURFACE_CREATE 23 +#define DRM_VMW_GB_SURFACE_REF 24 +#define DRM_VMW_SYNCCPU 25 /*************************************************************************/ /** @@ -75,6 +84,9 @@ #define DRM_VMW_PARAM_FIFO_CAPS 4 #define DRM_VMW_PARAM_MAX_FB_SIZE 5 #define DRM_VMW_PARAM_FIFO_HW_VERSION 6 +#define DRM_VMW_PARAM_MAX_SURF_MEMORY 7 +#define DRM_VMW_PARAM_3D_CAPS_SIZE 8 +#define DRM_VMW_PARAM_MAX_MOB_MEMORY 9 /** * struct drm_vmw_getparam_arg @@ -661,6 +673,51 @@ struct drm_vmw_fence_arg { /*************************************************************************/ /** + * DRM_VMW_FENCE_EVENT + * + * Queues an event on a fence to be delivered on the drm character device + * when the fence has signaled the DRM_VMW_FENCE_FLAG_EXEC flag. + * Optionally the approximate time when the fence signaled is + * given by the event. + */ + +/* + * The event type + */ +#define DRM_VMW_EVENT_FENCE_SIGNALED 0x80000000 + +struct drm_vmw_event_fence { + struct drm_event base; + uint64_t user_data; + uint32_t tv_sec; + uint32_t tv_usec; +}; + +/* + * Flags that may be given to the command. + */ +/* Request fence signaled time on the event. */ +#define DRM_VMW_FE_FLAG_REQ_TIME (1 << 0) + +/** + * struct drm_vmw_fence_event_arg + * + * @fence_rep: Pointer to fence_rep structure cast to uint64_t or 0 if + * the fence is not supposed to be referenced by user-space. + * @user_info: Info to be delivered with the event. + * @handle: Attach the event to this fence only. + * @flags: A set of flags as defined above. + */ +struct drm_vmw_fence_event_arg { + uint64_t fence_rep; + uint64_t user_data; + uint32_t handle; + uint32_t flags; +}; + + +/*************************************************************************/ +/** * DRM_VMW_PRESENT * * Executes an SVGA present on a given fb for a given surface. The surface @@ -720,5 +777,276 @@ struct drm_vmw_present_readback_arg { uint64_t fence_rep; }; +/*************************************************************************/ +/** + * DRM_VMW_UPDATE_LAYOUT - Update layout + * + * Updates the preferred modes and connection status for connectors. The + * command consists of one drm_vmw_update_layout_arg pointing to an array + * of num_outputs drm_vmw_rect's. + */ + +/** + * struct drm_vmw_update_layout_arg + * + * @num_outputs: number of active connectors + * @rects: pointer to array of drm_vmw_rect cast to an uint64_t + * + * Input argument to the DRM_VMW_UPDATE_LAYOUT Ioctl. + */ +struct drm_vmw_update_layout_arg { + uint32_t num_outputs; + uint32_t pad64; + uint64_t rects; +}; + + +/*************************************************************************/ +/** + * DRM_VMW_CREATE_SHADER - Create shader + * + * Creates a shader and optionally binds it to a dma buffer containing + * the shader byte-code. + */ + +/** + * enum drm_vmw_shader_type - Shader types + */ +enum drm_vmw_shader_type { + drm_vmw_shader_type_vs = 0, + drm_vmw_shader_type_ps, + drm_vmw_shader_type_gs +}; + + +/** + * struct drm_vmw_shader_create_arg + * + * @shader_type: Shader type of the shader to create. + * @size: Size of the byte-code in bytes. + * where the shader byte-code starts + * @buffer_handle: Buffer handle identifying the buffer containing the + * shader byte-code + * @shader_handle: On successful completion contains a handle that + * can be used to subsequently identify the shader. + * @offset: Offset in bytes into the buffer given by @buffer_handle, + * + * Input / Output argument to the DRM_VMW_CREATE_SHADER Ioctl. + */ +struct drm_vmw_shader_create_arg { + enum drm_vmw_shader_type shader_type; + uint32_t size; + uint32_t buffer_handle; + uint32_t shader_handle; + uint64_t offset; +}; + +/*************************************************************************/ +/** + * DRM_VMW_UNREF_SHADER - Unreferences a shader + * + * Destroys a user-space reference to a shader, optionally destroying + * it. + */ + +/** + * struct drm_vmw_shader_arg + * + * @handle: Handle identifying the shader to destroy. + * + * Input argument to the DRM_VMW_UNREF_SHADER ioctl. + */ +struct drm_vmw_shader_arg { + uint32_t handle; + uint32_t pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_GB_SURFACE_CREATE - Create a host guest-backed surface. + * + * Allocates a surface handle and queues a create surface command + * for the host on the first use of the surface. The surface ID can + * be used as the surface ID in commands referencing the surface. + */ + +/** + * enum drm_vmw_surface_flags + * + * @drm_vmw_surface_flag_shareable: Whether the surface is shareable + * @drm_vmw_surface_flag_scanout: Whether the surface is a scanout + * surface. + * @drm_vmw_surface_flag_create_buffer: Create a backup buffer if none is + * given. + */ +enum drm_vmw_surface_flags { + drm_vmw_surface_flag_shareable = (1 << 0), + drm_vmw_surface_flag_scanout = (1 << 1), + drm_vmw_surface_flag_create_buffer = (1 << 2) +}; + +/** + * struct drm_vmw_gb_surface_create_req + * + * @svga3d_flags: SVGA3d surface flags for the device. + * @format: SVGA3d format. + * @mip_level: Number of mip levels for all faces. + * @drm_surface_flags Flags as described above. + * @multisample_count Future use. Set to 0. + * @autogen_filter Future use. Set to 0. + * @buffer_handle Buffer handle of backup buffer. SVGA3D_INVALID_ID + * if none. + * @base_size Size of the base mip level for all faces. + * + * Input argument to the DRM_VMW_GB_SURFACE_CREATE Ioctl. + * Part of output argument for the DRM_VMW_GB_SURFACE_REF Ioctl. + */ +struct drm_vmw_gb_surface_create_req { + uint32_t svga3d_flags; + uint32_t format; + uint32_t mip_levels; + enum drm_vmw_surface_flags drm_surface_flags; + uint32_t multisample_count; + uint32_t autogen_filter; + uint32_t buffer_handle; + uint32_t pad64; + struct drm_vmw_size base_size; +}; + +/** + * struct drm_vmw_gb_surface_create_rep + * + * @handle: Surface handle. + * @backup_size: Size of backup buffers for this surface. + * @buffer_handle: Handle of backup buffer. SVGA3D_INVALID_ID if none. + * @buffer_size: Actual size of the buffer identified by + * @buffer_handle + * @buffer_map_handle: Offset into device address space for the buffer + * identified by @buffer_handle. + * + * Part of output argument for the DRM_VMW_GB_SURFACE_REF ioctl. + * Output argument for the DRM_VMW_GB_SURFACE_CREATE ioctl. + */ +struct drm_vmw_gb_surface_create_rep { + uint32_t handle; + uint32_t backup_size; + uint32_t buffer_handle; + uint32_t buffer_size; + uint64_t buffer_map_handle; +}; + +/** + * union drm_vmw_gb_surface_create_arg + * + * @req: Input argument as described above. + * @rep: Output argument as described above. + * + * Argument to the DRM_VMW_GB_SURFACE_CREATE ioctl. + */ +union drm_vmw_gb_surface_create_arg { + struct drm_vmw_gb_surface_create_rep rep; + struct drm_vmw_gb_surface_create_req req; +}; + +/*************************************************************************/ +/** + * DRM_VMW_GB_SURFACE_REF - Reference a host surface. + * + * Puts a reference on a host surface with a given handle, as previously + * returned by the DRM_VMW_GB_SURFACE_CREATE ioctl. + * A reference will make sure the surface isn't destroyed while we hold + * it and will allow the calling client to use the surface handle in + * the command stream. + * + * On successful return, the Ioctl returns the surface information given + * to and returned from the DRM_VMW_GB_SURFACE_CREATE ioctl. + */ + +/** + * struct drm_vmw_gb_surface_reference_arg + * + * @creq: The data used as input when the surface was created, as described + * above at "struct drm_vmw_gb_surface_create_req" + * @crep: Additional data output when the surface was created, as described + * above at "struct drm_vmw_gb_surface_create_rep" + * + * Output Argument to the DRM_VMW_GB_SURFACE_REF ioctl. + */ +struct drm_vmw_gb_surface_ref_rep { + struct drm_vmw_gb_surface_create_req creq; + struct drm_vmw_gb_surface_create_rep crep; +}; + +/** + * union drm_vmw_gb_surface_reference_arg + * + * @req: Input data as described above at "struct drm_vmw_surface_arg" + * @rep: Output data as described above at "struct drm_vmw_gb_surface_ref_rep" + * + * Argument to the DRM_VMW_GB_SURFACE_REF Ioctl. + */ +union drm_vmw_gb_surface_reference_arg { + struct drm_vmw_gb_surface_ref_rep rep; + struct drm_vmw_surface_arg req; +}; + + +/*************************************************************************/ +/** + * DRM_VMW_SYNCCPU - Sync a DMA buffer / MOB for CPU access. + * + * Idles any previously submitted GPU operations on the buffer and + * by default blocks command submissions that reference the buffer. + * If the file descriptor used to grab a blocking CPU sync is closed, the + * cpu sync is released. + * The flags argument indicates how the grab / release operation should be + * performed: + */ + +/** + * enum drm_vmw_synccpu_flags - Synccpu flags: + * + * @drm_vmw_synccpu_read: Sync for read. If sync is done for read only, it's a + * hint to the kernel to allow command submissions that references the buffer + * for read-only. + * @drm_vmw_synccpu_write: Sync for write. Block all command submissions + * referencing this buffer. + * @drm_vmw_synccpu_dontblock: Dont wait for GPU idle, but rather return + * -EBUSY should the buffer be busy. + * @drm_vmw_synccpu_allow_cs: Allow command submission that touches the buffer + * while the buffer is synced for CPU. This is similar to the GEM bo idle + * behavior. + */ +enum drm_vmw_synccpu_flags { + drm_vmw_synccpu_read = (1 << 0), + drm_vmw_synccpu_write = (1 << 1), + drm_vmw_synccpu_dontblock = (1 << 2), + drm_vmw_synccpu_allow_cs = (1 << 3) +}; + +/** + * enum drm_vmw_synccpu_op - Synccpu operations: + * + * @drm_vmw_synccpu_grab: Grab the buffer for CPU operations + * @drm_vmw_synccpu_release: Release a previous grab. + */ +enum drm_vmw_synccpu_op { + drm_vmw_synccpu_grab, + drm_vmw_synccpu_release +}; + +/** + * struct drm_vmw_synccpu_arg + * + * @op: The synccpu operation as described above. + * @handle: Handle identifying the buffer object. + * @flags: Flags as described above. + */ +struct drm_vmw_synccpu_arg { + enum drm_vmw_synccpu_op op; + enum drm_vmw_synccpu_flags flags; + uint32_t handle; + uint32_t pad64; +}; #endif |