summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvfx/nvfx_transfer.c
diff options
context:
space:
mode:
authorLuca Barbieri <[email protected]>2010-08-07 05:39:18 +0200
committerLuca Barbieri <[email protected]>2010-08-21 20:42:14 +0200
commit8eb0fc430a8c1687627156a06faf5762144022f3 (patch)
treefaddeeecd24e26c1d92d9aeeeb5e4ba0dd276e96 /src/gallium/drivers/nvfx/nvfx_transfer.c
parent73b7c6fb336ad3e717f8e961f4e2df761e94cd2f (diff)
nvfx: rewrite draw code and buffer code
This is a full rewrite of the drawing and buffer management logic. It offers a lot of improvements: 1. A copy of buffers is now always kept in system memory. This is necessary to allow software processing of them, which is necessary or improves performance in many cases. 2. Support for pushing vertices on the FIFO, with index lookup if necessary. 3. "Smart" draw code that tries to intelligently choose the cheapest way to draw something: whether to use inline vertices or hardware vertex buffer, and whether to use hardware index buffers 4. Support for all vertex formats supported by the hardware 5. Usage of translate to push vertices, supporting all formats that are sensible to use as vertex formats 6. Support for base vertex 7. Usage of Ben Skeggs' primitive splitter originally for nv50, allowing correct splitting of line loops, triangle fans, etc. 8. Support for instancing 9. Precomputation using the vertex elements CSO Thanks to Ben Skeggs for his primitive splitter originally for nv50. Thanks to Christoph Bumiller for his nv50 push code, that was the basis of this work, even though I changed his code dramatically, in particular to replace his ad-hoc vertex data emitter with translate. The changes could also go into nv50 too, but there are substantial differences due to the additional nv50 hardware features.
Diffstat (limited to 'src/gallium/drivers/nvfx/nvfx_transfer.c')
-rw-r--r--src/gallium/drivers/nvfx/nvfx_transfer.c173
1 files changed, 149 insertions, 24 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c
index e9c3dd7e551..ca4462ef9dc 100644
--- a/src/gallium/drivers/nvfx/nvfx_transfer.c
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.c
@@ -26,25 +26,44 @@ nvfx_transfer_new(struct pipe_context *pipe,
unsigned usage,
const struct pipe_box *box)
{
- struct nvfx_staging_transfer* tx;
- bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
-
- tx = CALLOC_STRUCT(nvfx_staging_transfer);
- if(!tx)
- return NULL;
-
- util_staging_transfer_init(pipe, pt, sr, usage, box, direct, tx);
+ if((usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK)) == PIPE_TRANSFER_DONTBLOCK)
+ {
+ struct nouveau_bo* bo = ((struct nvfx_resource*)pt)->bo;
+ if(bo && nouveau_bo_busy(bo, NOUVEAU_BO_WR))
+ return NULL;
+ }
if(pt->target == PIPE_BUFFER)
{
- tx->base.base.slice_stride = tx->base.base.stride = ((struct nvfx_resource*)tx->base.staging_resource)->bo->size;
- if(direct)
- tx->offset = util_format_get_stride(pt->format, box->x);
- else
- tx->offset = 0;
+ // it would be nice if we could avoid all this ridiculous overhead...
+ struct pipe_transfer* tx;
+ struct nvfx_buffer* buffer = nvfx_buffer(pt);
+
+ tx = CALLOC_STRUCT(pipe_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->resource, pt);
+ tx->sr = sr;
+ tx->usage = usage;
+ tx->box = *box;
+
+ tx->slice_stride = tx->stride = util_format_get_stride(pt->format, box->width);
+ tx->data = buffer->data + util_format_get_stride(pt->format, box->x);
+
+ return tx;
}
else
{
+ struct nvfx_staging_transfer* tx;
+ bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
+
+ tx = CALLOC_STRUCT(nvfx_staging_transfer);
+ if(!tx)
+ return NULL;
+
+ util_staging_transfer_init(pipe, pt, sr, usage, box, direct, &tx->base);
+
if(direct)
{
tx->base.base.stride = nvfx_subresource_pitch(pt, sr.level);
@@ -66,26 +85,132 @@ nvfx_transfer_new(struct pipe_context *pipe,
}
}
+static void nvfx_buffer_dirty_interval(struct nvfx_buffer* buffer, unsigned begin, unsigned size, boolean unsynchronized)
+{
+ struct nvfx_screen* screen = nvfx_screen(buffer->base.base.screen);
+ buffer->last_update_static = buffer->bytes_to_draw_until_static < 0;
+ if(buffer->dirty_begin == buffer->dirty_end)
+ {
+ buffer->dirty_begin = begin;
+ buffer->dirty_end = begin + size;
+ buffer->dirty_unsynchronized = unsynchronized;
+ }
+ else
+ {
+ buffer->dirty_begin = MIN2(buffer->dirty_begin, begin);
+ buffer->dirty_end = MAX2(buffer->dirty_end, begin + size);
+ buffer->dirty_unsynchronized &= unsynchronized;
+ }
+
+ if(unsynchronized)
+ {
+ // TODO: revisit this, it doesn't seem quite right
+ //printf("UNSYNC UPDATE %p %u %u\n", buffer, begin, size);
+ buffer->bytes_to_draw_until_static += size * screen->static_reuse_threshold;
+ }
+ else
+ buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold;
+}
+
+static void nvfx_transfer_flush_region( struct pipe_context *pipe,
+ struct pipe_transfer *ptx,
+ const struct pipe_box *box)
+{
+ if(ptx->resource->target == PIPE_BUFFER && (ptx->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+ nvfx_buffer_dirty_interval(buffer,
+ (uint8_t*)ptx->data - buffer->data + util_format_get_stride(buffer->base.base.format, box->x),
+ util_format_get_stride(buffer->base.base.format, box->width),
+ !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+ }
+}
+
+static void
+nvfx_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+ if(ptx->resource->target == PIPE_BUFFER)
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+ if((ptx->usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) == PIPE_TRANSFER_WRITE)
+ nvfx_buffer_dirty_interval(buffer,
+ (uint8_t*)ptx->data - buffer->data,
+ ptx->stride,
+ !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+ pipe_resource_reference(&ptx->resource, 0);
+ FREE(ptx);
+ }
+ else
+ util_staging_transfer_destroy(pipe, ptx);
+}
+
void *
nvfx_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx)
{
- struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
- if(!ptx->data)
+ if(ptx->resource->target == PIPE_BUFFER)
+ return ptx->data;
+ else
{
- struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
- uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage));
- ptx->data = map + tx->offset;
+ struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+ if(!ptx->data)
+ {
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+ uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage));
+ ptx->data = map + tx->offset;
+ }
+
+ ++tx->map_count;
+ return ptx->data;
}
- ++tx->map_count;
- return ptx->data;
}
void
nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx)
{
- struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
- struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+ if(ptx->resource->target != PIPE_BUFFER)
+ {
+ struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+
+ if(!--tx->map_count)
+ {
+ nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
+ ptx->data = 0;
+ }
+ }
+}
+
+static void nvfx_transfer_inline_write( struct pipe_context *pipe,
+ struct pipe_resource *pr,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned slice_stride)
+{
+ if(pr->target != PIPE_BUFFER)
+ {
+ u_default_transfer_inline_write(pipe, pr, sr, usage, box, data, stride, slice_stride);
+ }
+ else
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(pr);
+ unsigned begin = util_format_get_stride(pr->format, box->x);
+ unsigned size = util_format_get_stride(pr->format, box->width);
+ memcpy(buffer->data + begin, data, size);
+ nvfx_buffer_dirty_interval(buffer, begin, size,
+ !!(pr->flags & PIPE_TRANSFER_UNSYNCHRONIZED));
+ }
+}
- if(!--tx->map_count)
- nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
+void
+nvfx_init_transfer_functions(struct pipe_context *pipe)
+{
+ pipe->get_transfer = nvfx_transfer_new;
+ pipe->transfer_map = nvfx_transfer_map;
+ pipe->transfer_flush_region = nvfx_transfer_flush_region;
+ pipe->transfer_unmap = nvfx_transfer_unmap;
+ pipe->transfer_destroy = nvfx_transfer_destroy;
+ pipe->transfer_inline_write = nvfx_transfer_inline_write;
}