summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2013-01-08 16:13:11 +0100
committerChristoph Bumiller <[email protected]>2013-01-08 16:13:51 +0100
commit48a45ec24ae74c00d1487552e94d9f824a428f58 (patch)
treec2a160cb708b7a05f3340d889d6a0e676eb9d727 /src/gallium
parenta75ddfd55d24363046f11b2fd2de25563698fa39 (diff)
nouveau: improve buffer transfers
Save double memcpy on uploads to VRAM in most cases. Properly handle FLUSH_EXPLICIT. Reallocate on DISCARD_WHOLE_RESOURCE to avoid sync.
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.c399
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.h3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_context.h23
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h4
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h3
-rw-r--r--src/gallium/drivers/nv30/nv30_context.c78
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c82
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c86
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h3
12 files changed, 551 insertions, 137 deletions
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 0ecd53af67c..fdeeee53869 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -10,8 +10,15 @@
#include "nouveau_buffer.h"
#include "nouveau_mm.h"
+#define NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD 192
+
struct nouveau_transfer {
struct pipe_transfer base;
+
+ uint8_t *map;
+ struct nouveau_bo *bo;
+ struct nouveau_mm_allocation *mm;
+ uint32_t offset;
};
static INLINE struct nouveau_transfer *
@@ -21,6 +28,14 @@ nouveau_transfer(struct pipe_transfer *transfer)
}
static INLINE boolean
+nouveau_buffer_malloc(struct nv04_resource *buf)
+{
+ if (!buf->data)
+ buf->data = align_malloc(buf->base.width0, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
+ return !!buf->data;
+}
+
+static INLINE boolean
nouveau_buffer_allocate(struct nouveau_screen *screen,
struct nv04_resource *buf, unsigned domain)
{
@@ -40,13 +55,10 @@ nouveau_buffer_allocate(struct nouveau_screen *screen,
&buf->bo, &buf->offset);
if (!buf->bo)
return FALSE;
- }
- if (domain != NOUVEAU_BO_GART) {
- if (!buf->data) {
- buf->data = align_malloc(buf->base.width0, 64);
- if (!buf->data)
- return FALSE;
- }
+ } else {
+ assert(domain == 0);
+ if (!nouveau_buffer_malloc(buf))
+ return FALSE;
}
buf->domain = domain;
if (buf->bo)
@@ -80,6 +92,11 @@ nouveau_buffer_reallocate(struct nouveau_screen *screen,
{
nouveau_buffer_release_gpu_storage(buf);
+ nouveau_fence_ref(NULL, &buf->fence);
+ nouveau_fence_ref(NULL, &buf->fence_wr);
+
+ buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
+
return nouveau_buffer_allocate(screen, buf, domain);
}
@@ -100,73 +117,75 @@ nouveau_buffer_destroy(struct pipe_screen *pscreen,
FREE(res);
}
-/* Maybe just migrate to GART right away if we actually need to do this. */
-boolean
-nouveau_buffer_download(struct nouveau_context *nv, struct nv04_resource *buf,
- unsigned start, unsigned size)
+static uint8_t *
+nouveau_transfer_staging(struct nouveau_context *nv,
+ struct nouveau_transfer *tx, boolean permit_pb)
{
- struct nouveau_mm_allocation *mm;
- struct nouveau_bo *bounce = NULL;
- uint32_t offset;
+ const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
+ const unsigned size = align(tx->base.box.width, 4) + adj;
- assert(buf->domain == NOUVEAU_BO_VRAM);
+ if ((size <= NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD) && permit_pb) {
+ tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
+ if (tx->map)
+ tx->map += adj;
+ } else {
+ tx->mm =
+ nouveau_mm_allocate(nv->screen->mm_GART, size, &tx->bo, &tx->offset);
+ if (tx->bo) {
+ tx->offset += adj;
+ if (!nouveau_bo_map(tx->bo, 0, NULL))
+ tx->map = (uint8_t *)tx->bo->map + tx->offset;
+ }
+ }
+ return tx->map;
+}
- mm = nouveau_mm_allocate(nv->screen->mm_GART, size, &bounce, &offset);
- if (!bounce)
- return FALSE;
+/* Maybe just migrate to GART right away if we actually need to do this. */
+static boolean
+nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
+{
+ struct nv04_resource *buf = nv04_resource(tx->base.resource);
+ const unsigned base = tx->base.box.x;
+ const unsigned size = tx->base.box.width;
- nv->copy_data(nv, bounce, offset, NOUVEAU_BO_GART,
- buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, size);
+ nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART,
+ buf->bo, buf->offset + base, buf->domain, size);
- if (nouveau_bo_map(bounce, NOUVEAU_BO_RD, nv->screen->client))
+ if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client))
return FALSE;
- memcpy(buf->data + start, (uint8_t *)bounce->map + offset, size);
- buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ if (buf->data)
+ memcpy(buf->data + base, tx->map, size);
- nouveau_bo_ref(NULL, &bounce);
- if (mm)
- nouveau_mm_free(mm);
return TRUE;
}
-static boolean
-nouveau_buffer_upload(struct nouveau_context *nv, struct nv04_resource *buf,
- unsigned start, unsigned size)
+static void
+nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
+ unsigned offset, unsigned size)
{
- struct nouveau_mm_allocation *mm;
- struct nouveau_bo *bounce = NULL;
- uint32_t offset;
-
- if (size <= 192 && (nv->push_data || nv->push_cb)) {
- if (buf->base.bind & PIPE_BIND_CONSTANT_BUFFER)
- nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
- start, size / 4, (const uint32_t *)(buf->data + start));
- else
- nv->push_data(nv, buf->bo, buf->offset + start, buf->domain,
- size, buf->data + start);
- return TRUE;
- }
-
- mm = nouveau_mm_allocate(nv->screen->mm_GART, size, &bounce, &offset);
- if (!bounce)
- return FALSE;
-
- nouveau_bo_map(bounce, 0, nv->screen->client);
- memcpy((uint8_t *)bounce->map + offset, buf->data + start, size);
+ struct nv04_resource *buf = nv04_resource(tx->base.resource);
+ uint8_t *data = tx->map + offset;
+ const unsigned base = tx->base.box.x + offset;
+ const boolean can_cb = !((base | size) & 3);
- nv->copy_data(nv, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
- bounce, offset, NOUVEAU_BO_GART, size);
-
- nouveau_bo_ref(NULL, &bounce);
- if (mm)
- release_allocation(&mm, nv->screen->fence.current);
+ if (buf->data)
+ memcpy(data, buf->data + base, size);
+ else
+ buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY;
- if (start == 0 && size == buf->base.width0)
- buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
- return TRUE;
+ if (tx->bo)
+ nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
+ tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
+ else
+ if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
+ nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
+ base, size / 4, (const uint32_t *)data);
+ else
+ nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
}
+
static INLINE boolean
nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
{
@@ -197,6 +216,87 @@ nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
return (buf->fence && !nouveau_fence_signalled(buf->fence));
}
+static INLINE void
+nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
+ struct pipe_resource *resource,
+ const struct pipe_box *box,
+ unsigned usage)
+{
+ tx->base.resource = resource;
+ tx->base.level = 0;
+ tx->base.usage = usage;
+ tx->base.box.x = box->x;
+ tx->base.box.y = 0;
+ tx->base.box.z = 0;
+ tx->base.box.width = box->width;
+ tx->base.box.height = 1;
+ tx->base.box.depth = 1;
+ tx->base.stride = 0;
+ tx->base.layer_stride = 0;
+
+ tx->bo = NULL;
+ tx->map = NULL;
+}
+
+static INLINE void
+nouveau_buffer_transfer_del(struct nouveau_context *nv,
+ struct nouveau_transfer *tx)
+{
+ if (tx->map) {
+ if (likely(tx->bo)) {
+ nouveau_bo_ref(NULL, &tx->bo);
+ if (tx->mm)
+ release_allocation(&tx->mm, nv->screen->fence.current);
+ } else {
+ align_free(tx->map -
+ (tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK));
+ }
+ }
+}
+
+static boolean
+nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
+{
+ struct nouveau_transfer tx;
+ boolean ret;
+ tx.base.resource = &buf->base;
+ tx.base.box.x = 0;
+ tx.base.box.width = buf->base.width0;
+ tx.bo = NULL;
+
+ if (!buf->data)
+ if (!nouveau_buffer_malloc(buf))
+ return FALSE;
+ if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
+ return TRUE;
+ nv->stats.buf_cache_count++;
+
+ if (!nouveau_transfer_staging(nv, &tx, FALSE))
+ return FALSE;
+
+ ret = nouveau_transfer_read(nv, &tx);
+ if (ret) {
+ buf->status &= ~NOUVEAU_BUFFER_STATUS_DIRTY;
+ memcpy(buf->data, tx.map, buf->base.width0);
+ }
+ nouveau_buffer_transfer_del(nv, &tx);
+ return ret;
+}
+
+
+#define NOUVEAU_TRANSFER_DISCARD \
+ (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
+
+static INLINE boolean
+nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
+{
+ if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
+ return FALSE;
+ if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
+ return FALSE;
+ return buf->mm && nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE);
+}
+
static void *
nouveau_buffer_transfer_map(struct pipe_context *pipe,
struct pipe_resource *resource,
@@ -204,59 +304,87 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
const struct pipe_box *box,
struct pipe_transfer **ptransfer)
{
- struct nv04_resource *buf = nv04_resource(resource);
struct nouveau_context *nv = nouveau_context(pipe);
- struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer);
- struct nouveau_bo *bo = buf->bo;
+ struct nv04_resource *buf = nv04_resource(resource);
+ struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
uint8_t *map;
int ret;
- uint32_t offset = box->x;
- uint32_t flags = 0;
- if (!xfr)
+ if (!tx)
return NULL;
-
- xfr->base.resource = resource;
- xfr->base.box.x = box->x;
- xfr->base.box.width = box->width;
- xfr->base.usage = usage;
+ nouveau_buffer_transfer_init(tx, resource, box, usage);
+ *ptransfer = &tx->base;
if (buf->domain == NOUVEAU_BO_VRAM) {
- if (usage & PIPE_TRANSFER_READ) {
- if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING)
- nouveau_buffer_download(nv, buf, 0, buf->base.width0);
+ if (usage & NOUVEAU_TRANSFER_DISCARD) {
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
+ buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
+ nouveau_transfer_staging(nv, tx, TRUE);
+ } else {
+ if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ if (buf->data) {
+ align_free(buf->data);
+ buf->data = NULL;
+ }
+ nouveau_transfer_staging(nv, tx, FALSE);
+ nouveau_transfer_read(nv, tx);
+ } else {
+ if (usage & PIPE_TRANSFER_WRITE)
+ nouveau_transfer_staging(nv, tx, TRUE);
+ if (!buf->data)
+ nouveau_buffer_cache(nv, buf);
+ }
}
+ return buf->data ? (buf->data + box->x) : tx->map;
+ } else
+ if (unlikely(buf->domain == 0)) {
+ return buf->data + box->x;
}
- if (buf->domain != NOUVEAU_BO_GART) {
- *ptransfer = &xfr->base;
- return buf->data + offset;
+ if (nouveau_buffer_should_discard(buf, usage)) {
+ int ref = buf->base.reference.count - 1;
+ nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
+ if (ref > 0) /* any references inside context possible ? */
+ nv->invalidate_resource_storage(nv, &buf->base, ref);
}
- if (!buf->mm)
- flags = nouveau_screen_transfer_flags(xfr->base.usage);
-
- offset += buf->offset;
-
- ret = nouveau_bo_map(buf->bo, flags, nv->screen->client);
+ ret = nouveau_bo_map(buf->bo,
+ buf->mm ? 0 : nouveau_screen_transfer_flags(usage),
+ nv->client);
if (ret) {
- FREE(xfr);
+ FREE(tx);
return NULL;
}
- map = (uint8_t *)bo->map + offset;
+ map = (uint8_t *)buf->bo->map + buf->offset + box->x;
- if (buf->mm) {
- if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) {
- if (nouveau_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE)) {
- FREE(xfr);
- return NULL;
- }
+ /* using kernel fences only if !buf->mm */
+ if ((usage & PIPE_TRANSFER_UNSYNCHRONIZED) || !buf->mm)
+ return map;
+
+ if (nouveau_buffer_busy(buf, usage & PIPE_TRANSFER_READ_WRITE)) {
+ if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
+ /* Discarding was not possible, must sync because
+ * subsequent transfers might use UNSYNCHRONIZED. */
+ nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
} else
- if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
- nouveau_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE);
+ if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
+ nouveau_transfer_staging(nv, tx, TRUE);
+ map = tx->map;
+ } else
+ if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
+ if (usage & PIPE_TRANSFER_DONTBLOCK)
+ map = NULL;
+ else
+ nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
+ } else {
+ nouveau_transfer_staging(nv, tx, TRUE);
+ if (tx->map)
+ memcpy(tx->map, map, box->width);
+ map = tx->map;
}
}
- *ptransfer = &xfr->base;
+ if (!map)
+ FREE(tx);
return map;
}
@@ -267,38 +395,35 @@ nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
struct pipe_transfer *transfer,
const struct pipe_box *box)
{
-#if 0
- struct nv04_resource *res = nv04_resource(transfer->resource);
- struct nouveau_bo *bo = res->bo;
- unsigned offset = res->offset + transfer->box.x + box->x;
-
- /* not using non-snoop system memory yet, no need for cflush */
- if (1)
- return;
-
- /* XXX: maybe need to upload for VRAM buffers here */
-#endif
+ struct nouveau_transfer *tx = nouveau_transfer(transfer);
+ if (tx->map)
+ nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
}
static void
nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
- struct nv04_resource *buf = nv04_resource(transfer->resource);
- struct nouveau_transfer *xfr = nouveau_transfer(transfer);
struct nouveau_context *nv = nouveau_context(pipe);
+ struct nouveau_transfer *tx = nouveau_transfer(transfer);
+ struct nv04_resource *buf = nv04_resource(transfer->resource);
- if (xfr->base.usage & PIPE_TRANSFER_WRITE) {
- if (buf->domain == NOUVEAU_BO_VRAM) {
- nouveau_buffer_upload(nv, buf, transfer->box.x, transfer->box.width);
+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+ if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
+ nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+
+ if (likely(buf->domain)) {
+ const uint8_t bind = buf->base.bind;
+ /* make sure we invalidate dedicated caches */
+ if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
+ nv->vbo_dirty = TRUE;
+ if (bind & (PIPE_BIND_CONSTANT_BUFFER))
+ nv->cb_dirty = TRUE;
}
-
- if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER)))
- nouveau_context(pipe)->vbo_dirty = TRUE;
}
- FREE(xfr);
+ nouveau_buffer_transfer_del(nv, tx);
+ FREE(tx);
}
@@ -307,12 +432,14 @@ nouveau_resource_map_offset(struct nouveau_context *nv,
struct nv04_resource *res, uint32_t offset,
uint32_t flags)
{
- if ((res->domain == NOUVEAU_BO_VRAM) &&
- (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
- nouveau_buffer_download(nv, res, 0, res->base.width0);
+ if (unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
+ return res->data + offset;
- if ((res->domain != NOUVEAU_BO_GART) ||
- (res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
+ if (res->domain == NOUVEAU_BO_VRAM) {
+ if (!res->data || (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
+ nouveau_buffer_cache(nv, res);
+ }
+ if (res->domain != NOUVEAU_BO_GART)
return res->data + offset;
if (res->mm) {
@@ -322,7 +449,7 @@ nouveau_resource_map_offset(struct nouveau_context *nv,
if (nouveau_bo_map(res->bo, 0, NULL))
return NULL;
} else {
- if (nouveau_bo_map(res->bo, flags, nv->screen->client))
+ if (nouveau_bo_map(res->bo, flags, nv->client))
return NULL;
}
return (uint8_t *)res->bo->map + res->offset + offset;
@@ -365,6 +492,11 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
buffer->domain = NOUVEAU_BO_VRAM;
break;
case PIPE_USAGE_DYNAMIC:
+ /* For most apps, we'd have to do staging transfers to avoid sync
+ * with this usage, and GART -> GART copies would be suboptimal.
+ */
+ buffer->domain = NOUVEAU_BO_VRAM;
+ break;
case PIPE_USAGE_STAGING:
case PIPE_USAGE_STREAM:
buffer->domain = NOUVEAU_BO_GART;
@@ -385,6 +517,9 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
if (ret == FALSE)
goto fail;
+ if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
+ nouveau_buffer_cache(NULL, buffer);
+
return &buffer->base;
fail:
@@ -419,20 +554,15 @@ nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
return &buffer->base;
}
-/* Like download, but for GART buffers. Merge ? */
static INLINE boolean
nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
struct nouveau_bo *bo, unsigned offset, unsigned size)
{
- if (!buf->data) {
- buf->data = MALLOC(size);
- if (!buf->data)
- return FALSE;
- }
- if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->screen->client))
+ if (!nouveau_buffer_malloc(buf))
+ return FALSE;
+ if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client))
return FALSE;
memcpy(buf->data, (uint8_t *)bo->map + offset, size);
-
return TRUE;
}
@@ -453,7 +583,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
if (!nouveau_buffer_allocate(screen, buf, new_domain))
return FALSE;
- ret = nouveau_bo_map(buf->bo, 0, nv->screen->client);
+ ret = nouveau_bo_map(buf->bo, 0, nv->client);
if (ret)
return ret;
memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size);
@@ -484,10 +614,17 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
release_allocation(&mm, screen->fence.current);
} else
if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
+ struct nouveau_transfer tx;
if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
return FALSE;
- if (!nouveau_buffer_upload(nv, buf, 0, buf->base.width0))
+ tx.base.resource = &buf->base;
+ tx.base.box.x = 0;
+ tx.base.box.width = buf->base.width0;
+ tx.bo = NULL;
+ if (!nouveau_transfer_staging(nv, &tx, FALSE))
return FALSE;
+ nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
+ nouveau_buffer_transfer_del(nv, &tx);
} else
return FALSE;
@@ -513,7 +650,7 @@ nouveau_user_buffer_upload(struct nouveau_context *nv,
if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
return FALSE;
- ret = nouveau_bo_map(buf->bo, 0, nv->screen->client);
+ ret = nouveau_bo_map(buf->bo, 0, nv->client);
if (ret)
return FALSE;
memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
@@ -601,7 +738,7 @@ nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
nv->scratch.offset = 0;
nv->scratch.end = nv->scratch.bo_size;
- ret = nouveau_bo_map(bo, NOUVEAU_BO_WR, nv->screen->client);
+ ret = nouveau_bo_map(bo, NOUVEAU_BO_WR, nv->client);
if (!ret)
nv->scratch.map = bo->map;
return !ret;
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h
index 3b8ee72e72a..aafc84293a7 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -16,8 +16,11 @@ struct nouveau_bo;
*/
#define NOUVEAU_BUFFER_STATUS_GPU_READING (1 << 0)
#define NOUVEAU_BUFFER_STATUS_GPU_WRITING (1 << 1)
+#define NOUVEAU_BUFFER_STATUS_DIRTY (1 << 2)
#define NOUVEAU_BUFFER_STATUS_USER_MEMORY (1 << 7)
+#define NOUVEAU_BUFFER_STATUS_REALLOC_MASK NOUVEAU_BUFFER_STATUS_USER_MEMORY
+
/* Resources, if mapped into the GPU's address space, are guaranteed to
* have constant virtual addresses (nv50+).
*
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
index b3fe05b1019..bad5ab7319b 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -28,6 +28,11 @@ struct nouveau_context {
unsigned base, unsigned size,
unsigned offset, unsigned words, const uint32_t *);
+ /* @return: @ref reduced by nr of references found in context */
+ int (*invalidate_resource_storage)(struct nouveau_context *,
+ struct pipe_resource *,
+ int ref);
+
struct {
uint8_t *map;
unsigned id;
@@ -40,6 +45,11 @@ struct nouveau_context {
unsigned nr_runout;
unsigned bo_size;
} scratch;
+
+ struct {
+ uint32_t buf_cache_count;
+ uint32_t buf_cache_frame;
+ } stats;
};
static INLINE struct nouveau_context *
@@ -84,4 +94,17 @@ nouveau_context_destroy(struct nouveau_context *ctx)
FREE(ctx);
}
+
+static INLINE void
+nouveau_context_update_frame_stats(struct nouveau_context *nv)
+{
+ nv->stats.buf_cache_frame <<= 1;
+ if (nv->stats.buf_cache_count) {
+ nv->stats.buf_cache_count = 0;
+ nv->stats.buf_cache_frame |= 1;
+ if ((nv->stats.buf_cache_frame & 0xf) == 0xf)
+ nv->screen->hint_buf_keep_sysmem_copy = TRUE;
+ }
+}
+
#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 335b95820ab..1de3fa65f5d 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -21,7 +21,7 @@ struct nouveau_screen {
unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */
unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */
/*
- * For bindings with (vidmem & sysmem) bits set set, PIPE_USAGE_* decides
+ * For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides
* placement.
*/
@@ -41,6 +41,8 @@ struct nouveau_screen {
struct nouveau_mman *mm_GART;
int64_t cpu_gpu_time_delta;
+
+ boolean hint_buf_keep_sysmem_copy;
};
static INLINE struct nouveau_screen *
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index faaa5243f62..9993ed6ee72 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -12,6 +12,9 @@
#define NV04_PFIFO_MAX_PACKET_LEN 2047
#endif
+#define NOUVEAU_MIN_BUFFER_MAP_ALIGN 64
+#define NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK (NOUVEAU_MIN_BUFFER_MAP_ALIGN - 1)
+
static INLINE uint32_t
PUSH_AVAIL(struct nouveau_pushbuf *push)
{
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index b0aee8d5755..66ffff350a1 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -59,7 +59,8 @@ nv30_context_kick_notify(struct nouveau_pushbuf *push)
if (bref->flags & NOUVEAU_BO_WR) {
nouveau_fence_ref(screen->fence.current, &res->fence_wr);
- res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
+ NOUVEAU_BUFFER_STATUS_DIRTY;
}
}
}
@@ -78,6 +79,79 @@ nv30_context_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence,
(struct nouveau_fence **)fence);
PUSH_KICK(push);
+
+ nouveau_context_update_frame_stats(&nv30->base);
+}
+
+static int
+nv30_invalidate_resource_storage(struct nouveau_context *nv,
+ struct pipe_resource *res,
+ int ref)
+{
+ struct nv30_context *nv30 = nv30_context(&nv->pipe);
+ unsigned i;
+
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ for (i = 0; i < nv30->framebuffer.nr_cbufs; ++i) {
+ if (nv30->framebuffer.cbufs[i] &&
+ nv30->framebuffer.cbufs[i]->texture == res) {
+ nv30->dirty |= NV30_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (nv30->framebuffer.zsbuf &&
+ nv30->framebuffer.zsbuf->texture == res) {
+ nv30->dirty |= NV30_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_VERTEX_BUFFER) {
+ for (i = 0; i < nv30->num_vtxbufs; ++i) {
+ if (nv30->vtxbuf[i].buffer == res) {
+ nv30->dirty |= NV30_NEW_ARRAYS;
+ nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VTXBUF);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_INDEX_BUFFER) {
+ if (nv30->idxbuf.buffer == res) {
+ nouveau_bufctx_reset(nv30->bufctx, BUFCTX_IDXBUF);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_SAMPLER_VIEW) {
+ for (i = 0; i < nv30->fragprog.num_textures; ++i) {
+ if (nv30->fragprog.textures[i] &&
+ nv30->fragprog.textures[i]->texture == res) {
+ nv30->dirty |= NV30_NEW_FRAGTEX;
+ nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FRAGTEX(i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ for (i = 0; i < nv30->vertprog.num_textures; ++i) {
+ if (nv30->vertprog.textures[i] &&
+ nv30->vertprog.textures[i]->texture == res) {
+ nv30->dirty |= NV30_NEW_VERTTEX;
+ nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VERTTEX(i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+
+ return ref;
}
static void
@@ -138,6 +212,8 @@ nv30_context_create(struct pipe_screen *pscreen, void *priv)
nv30->base.pushbuf->rsvd_kick = 16; /* hack in screen before first space */
nv30->base.pushbuf->kick_notify = nv30_context_kick_notify;
+ nv30->base.invalidate_resource_storage = nv30_invalidate_resource_storage;
+
ret = nouveau_bufctx_new(nv30->base.client, 64, &nv30->bufctx);
if (ret) {
nv30_context_destroy(pipe);
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 08e576a214e..b8b29a3b5d7 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -42,6 +42,8 @@ nv50_flush(struct pipe_context *pipe,
nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
PUSH_KICK(screen->pushbuf);
+
+ nouveau_context_update_frame_stats(nouveau_context(pipe));
}
static void
@@ -115,6 +117,83 @@ nv50_destroy(struct pipe_context *pipe)
nouveau_context_destroy(&nv50->base);
}
+static int
+nv50_invalidate_resource_storage(struct nouveau_context *ctx,
+ struct pipe_resource *res,
+ int ref)
+{
+ struct nv50_context *nv50 = nv50_context(&ctx->pipe);
+ unsigned s, i;
+
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
+ if (nv50->framebuffer.cbufs[i] &&
+ nv50->framebuffer.cbufs[i]->texture == res) {
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (nv50->framebuffer.zsbuf &&
+ nv50->framebuffer.zsbuf->texture == res) {
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_VERTEX_BUFFER) {
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ if (nv50->vtxbuf[i].buffer == res) {
+ nv50->dirty |= NV50_NEW_ARRAYS;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_INDEX_BUFFER) {
+ if (nv50->idxbuf.buffer == res)
+ if (!--ref)
+ return ref;
+ }
+
+ if (res->bind & PIPE_BIND_SAMPLER_VIEW) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nv50->num_textures[s]; ++i) {
+ if (nv50->textures[s][i] &&
+ nv50->textures[s][i]->texture == res) {
+ nv50->dirty |= NV50_NEW_TEXTURES;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ if (res->bind & PIPE_BIND_CONSTANT_BUFFER) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ if (!nv50->constbuf[s][i].user &&
+ nv50->constbuf[s][i].u.buf == res) {
+ nv50->dirty |= NV50_NEW_CONSTBUF;
+ nv50->constbuf_dirty[s] |= 1 << i;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ return ref;
+}
+
struct pipe_context *
nv50_create(struct pipe_screen *pscreen, void *priv)
{
@@ -133,6 +212,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
goto out_err;
nv50->base.pushbuf = screen->base.pushbuf;
+ nv50->base.client = screen->base.client;
ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT,
&nv50->bufctx_3d);
@@ -168,6 +248,8 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
nv50_init_state_functions(nv50);
nv50_init_resource_functions(pipe);
+ nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage;
+
#ifdef NV50_WITH_DRAW_MODULE
/* no software fallbacks implemented */
nv50->draw = draw_create(pipe);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index fb0d94b5b01..b6e53f6fdea 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -176,7 +176,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 256;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
- return 64;
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 8784f36d417..2e8af436e36 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -98,7 +98,8 @@ nv50_resource_validate(struct nv04_resource *res, uint32_t flags)
{
if (likely(res->bo)) {
if (flags & NOUVEAU_BO_WR)
- res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
+ NOUVEAU_BUFFER_STATUS_DIRTY;
if (flags & NOUVEAU_BO_RD)
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 6325ac859f4..23059a097b9 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -43,6 +43,8 @@ nvc0_flush(struct pipe_context *pipe,
nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
PUSH_KICK(nvc0->base.pushbuf); /* fencing handled in kick_notify */
+
+ nouveau_context_update_frame_stats(&nvc0->base);
}
static void
@@ -116,6 +118,87 @@ nvc0_default_kick_notify(struct nouveau_pushbuf *push)
}
}
+static int
+nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
+ struct pipe_resource *res,
+ int ref)
+{
+ struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
+ unsigned s, i;
+
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
+ if (nvc0->framebuffer.cbufs[i] &&
+ nvc0->framebuffer.cbufs[i]->texture == res) {
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (nvc0->framebuffer.zsbuf &&
+ nvc0->framebuffer.zsbuf->texture == res) {
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_VERTEX_BUFFER) {
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ if (nvc0->vtxbuf[i].buffer == res) {
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_INDEX_BUFFER) {
+ if (nvc0->idxbuf.buffer == res) {
+ nvc0->dirty |= NVC0_NEW_IDXBUF;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_SAMPLER_VIEW) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ if (nvc0->textures[s][i] &&
+ nvc0->textures[s][i]->texture == res) {
+ nvc0->textures_dirty[s] |= 1 << i;
+ nvc0->dirty |= NVC0_NEW_TEXTURES;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ if (res->bind & PIPE_BIND_CONSTANT_BUFFER) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ if (!nvc0->constbuf[s][i].user &&
+ nvc0->constbuf[s][i].u.buf == res) {
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ nvc0->constbuf_dirty[s] |= 1 << i;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ return ref;
+}
+
struct pipe_context *
nvc0_create(struct pipe_screen *pscreen, void *priv)
{
@@ -134,6 +217,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
goto out_err;
nvc0->base.pushbuf = screen->base.pushbuf;
+ nvc0->base.client = screen->base.client;
ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_COUNT,
&nvc0->bufctx_3d);
@@ -168,6 +252,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
nvc0_init_transfer_functions(nvc0);
nvc0_init_resource_functions(pipe);
+ nvc0->base.invalidate_resource_storage = nvc0_invalidate_resource_storage;
+
#ifdef NVC0_WITH_DRAW_MODULE
/* no software fallbacks implemented */
nvc0->draw = draw_create(pipe);
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index 95e6efc7d0d..d116be9dc46 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -150,7 +150,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 256;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
- return 64;
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index 1387d67a273..2adcfeac3ef 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -97,7 +97,8 @@ nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
{
if (likely(res->bo)) {
if (flags & NOUVEAU_BO_WR)
- res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
+ NOUVEAU_BUFFER_STATUS_DIRTY;
if (flags & NOUVEAU_BO_RD)
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;