diff options
author | Marek Olšák <[email protected]> | 2014-03-08 15:15:41 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2014-03-11 19:18:02 +0100 |
commit | 4ca3486b195653f875003d335921fd4e7d7c2c4a (patch) | |
tree | 71d066d3bf42403a51176692966f0b552bb913dc /src/gallium/drivers/radeon | |
parent | de5094d102da0ffd8adef606b89e7a40d5843141 (diff) |
r600g,radeonsi: use a fallback in dma_copy instead of failing
v2: - allow byte-aligned DMA buffer copies on Evergreen
- fix piglit/texsubimage regression
- use the fallback for 3D copies (depth > 1) as well
Diffstat (limited to 'src/gallium/drivers/radeon')
-rw-r--r-- | src/gallium/drivers/radeon/r600_buffer_common.c | 58 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.h | 17 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_texture.c | 18 |
3 files changed, 39 insertions, 54 deletions
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 90ca8cb35e6..a7ecfb3e440 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx, return data; } +static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, + unsigned dstx, unsigned srcx, unsigned size) +{ + bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); + + return rctx->screen->has_cp_dma || + (dword_aligned && (rctx->rings.dma.cs || + rctx->screen->has_streamout)); + +} + static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, @@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && - (rscreen->has_cp_dma || - (rscreen->has_streamout && - /* The buffer range must be aligned to 4 with streamout. */ - box->x % 4 == 0 && box->width % 4 == 0))) { + r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ @@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, /* At this point, the buffer is always idle (we checked it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } - /* Using DMA for larger reads is much faster */ + /* Using a staging buffer in GTT for larger reads is much faster. */ else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_WRITE) && - (rbuffer->domains == RADEON_DOMAIN_VRAM)) { + rbuffer->domains == RADEON_DOMAIN_VRAM && + r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) { unsigned offset; struct r600_resource *staging = NULL; @@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, if (staging) { data += box->x % R600_MAP_BUFFER_ALIGNMENT; - /* Copy the staging buffer into the original one. */ - if (rctx->dma_copy(ctx, (struct pipe_resource*)staging, 0, - box->x % R600_MAP_BUFFER_ALIGNMENT, - 0, 0, resource, level, box)) { - rctx->rings.gfx.flush(rctx, 0); - if (rctx->rings.dma.cs) - rctx->rings.dma.flush(rctx, 0); - - /* Wait for any offloaded CS flush to complete - * to avoid busy-waiting in the winsys. */ - rctx->ws->cs_sync_flush(rctx->rings.gfx.cs); - if (rctx->rings.dma.cs) - rctx->ws->cs_sync_flush(rctx->rings.dma.cs); - - rctx->ws->buffer_wait(staging->buf, RADEON_USAGE_WRITE); - return r600_buffer_get_transfer(ctx, resource, level, usage, box, - ptransfer, data, staging, offset); - } else { - pipe_resource_reference((struct pipe_resource**)&staging, NULL); - } + /* Copy the VRAM buffer to the staging buffer. */ + rctx->dma_copy(ctx, &staging->b.b, 0, + box->x % R600_MAP_BUFFER_ALIGNMENT, + 0, 0, resource, level, box); + + /* Just do the synchronization. The buffer is mapped already. */ + r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ); + + return r600_buffer_get_transfer(ctx, resource, level, usage, box, + ptransfer, data, staging, offset); } } @@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx, u_box_1d(soffset, size, &box); /* Copy the staging buffer into the original one. */ - if (!(size % 4) && !(doffset % 4) && !(soffset % 4) && - rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) { - /* DONE. */ - } else { - ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, src, 0, &box); - } + rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box); } pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index da953a2ef1e..86de6f879aa 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -346,15 +346,14 @@ struct r600_common_context { boolean current_render_cond_cond; boolean predicate_drawing; - /* Copy one resource to another using async DMA. - * False is returned if the copy couldn't be done. */ - boolean (*dma_copy)(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dst_x, unsigned dst_y, unsigned dst_z, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box); + /* Copy one resource to another using async DMA. */ + void (*dma_copy)(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box); void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, unsigned offset, unsigned size, unsigned value); diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 8eb48678f07..be10b46c0f1 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -80,12 +80,8 @@ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_t return; } - if (!rctx->dma_copy(ctx, dst, 0, 0, 0, 0, - src, transfer->level, - &transfer->box)) { - ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0, - src, transfer->level, &transfer->box); - } + rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, + &transfer->box); } /* Copy from a transfer's staging texture to a full GPU one. */ @@ -106,13 +102,9 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 return; } - if (!rctx->dma_copy(ctx, dst, transfer->level, - transfer->box.x, transfer->box.y, transfer->box.z, - src, 0, &sbox)) { - ctx->resource_copy_region(ctx, dst, transfer->level, - transfer->box.x, transfer->box.y, transfer->box.z, - src, 0, &sbox); - } + rctx->dma_copy(ctx, dst, transfer->level, + transfer->box.x, transfer->box.y, transfer->box.z, + src, 0, &sbox); } static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level, |