diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_clear.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_cp_dma.c | 88 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_test_clearbuffer.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_test_dma.c | 2 |
6 files changed, 60 insertions, 53 deletions
diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index 654ff0ace78..4e07de81bac 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -256,7 +256,7 @@ void vi_dcc_clear_level(struct si_context *sctx, } si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size, - clear_value, SI_COHERENCY_CB_META, SI_METHOD_BEST); + clear_value, SI_COHERENCY_CB_META); } /* Set the same micro tile mode as the destination of the last MSAA resolve. @@ -489,7 +489,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, si_clear_buffer(sctx, &tex->cmask_buffer->b.b, tex->cmask_offset, tex->surface.cmask_size, - 0xCCCCCCCC, SI_COHERENCY_CB_META, SI_METHOD_BEST); + 0xCCCCCCCC, SI_COHERENCY_CB_META); need_decompress_pass = true; } @@ -520,7 +520,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, /* Do the fast clear. */ si_clear_buffer(sctx, &tex->cmask_buffer->b.b, tex->cmask_offset, tex->surface.cmask_size, 0, - SI_COHERENCY_CB_META, SI_METHOD_BEST); + SI_COHERENCY_CB_META); need_decompress_pass = true; } diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 86eb3529d9b..bae592a4f7d 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -224,28 +224,63 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst } } +void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned value, + enum si_coherency coher, + enum si_cache_policy cache_policy) +{ + struct r600_resource *rdst = r600_resource(dst); + uint64_t va = rdst->gpu_address + offset; + bool is_first = true; + + assert(size && size % 4 == 0); + + /* Mark the buffer range of destination as valid (initialized), + * so that transfer_map knows it should wait for the GPU when mapping + * that range. */ + util_range_add(&rdst->valid_buffer_range, offset, offset + size); + + /* Flush the caches. */ + sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | + SI_CONTEXT_CS_PARTIAL_FLUSH | + get_flush_flags(sctx, coher, cache_policy); + + while (size) { + unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx)); + unsigned dma_flags = CP_DMA_CLEAR; + + si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0, coher, + &is_first, &dma_flags); + + /* Emit the clear packet. */ + si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, cache_policy); + + size -= byte_count; + va += byte_count; + } + + if (cache_policy != L2_BYPASS) + rdst->TC_L2_dirty = true; + + /* If it's not a framebuffer fast clear... */ + if (coher == SI_COHERENCY_SHADER) + sctx->num_cp_dma_calls++; +} + void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value, - enum si_coherency coher, enum si_method xfer) + enum si_coherency coher) { struct radeon_winsys *ws = sctx->ws; struct r600_resource *rdst = r600_resource(dst); enum si_cache_policy cache_policy = get_cache_policy(sctx, coher); - unsigned flush_flags = get_flush_flags(sctx, coher, cache_policy); uint64_t dma_clear_size; - bool is_first = true; if (!size) return; dma_clear_size = size & ~3ull; - /* Mark the buffer range of destination as valid (initialized), - * so that transfer_map knows it should wait for the GPU when mapping - * that range. */ - util_range_add(&rdst->valid_buffer_range, offset, - offset + dma_clear_size); - /* dma_clear_buffer can use clear_buffer on failure. Make sure that * doesn't happen. We don't want an infinite recursion: */ if (sctx->dma_cs && @@ -261,44 +296,17 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, * For example, DeusEx:MD has 21 buffer clears per frame and all * of them are moved to SDMA thanks to this. */ !ws->cs_is_buffer_referenced(sctx->gfx_cs, rdst->buf, - RADEON_USAGE_READWRITE)) && - /* bypass sdma transfer with param xfer */ - (xfer != SI_METHOD_CP_DMA)) { + RADEON_USAGE_READWRITE))) { sctx->dma_clear_buffer(sctx, dst, offset, dma_clear_size, value); offset += dma_clear_size; size -= dma_clear_size; } else if (dma_clear_size >= 4) { - uint64_t va = rdst->gpu_address + offset; + si_cp_dma_clear_buffer(sctx, dst, offset, dma_clear_size, value, + coher, cache_policy); offset += dma_clear_size; size -= dma_clear_size; - - /* Flush the caches. */ - sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | - SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags; - - while (dma_clear_size) { - unsigned byte_count = MIN2(dma_clear_size, cp_dma_max_byte_count(sctx)); - unsigned dma_flags = CP_DMA_CLEAR; - - si_cp_dma_prepare(sctx, dst, NULL, byte_count, dma_clear_size, 0, - coher, &is_first, &dma_flags); - - /* Emit the clear packet. */ - si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, - cache_policy); - - dma_clear_size -= byte_count; - va += byte_count; - } - - if (cache_policy != L2_BYPASS) - rdst->TC_L2_dirty = true; - - /* If it's not a framebuffer fast clear... */ - if (coher == SI_COHERENCY_SHADER) - sctx->num_cp_dma_calls++; } if (size) { @@ -370,7 +378,7 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx, } si_clear_buffer(sctx, dst, offset, size, dword_value, - SI_COHERENCY_SHADER, SI_METHOD_BEST); + SI_COHERENCY_SHADER); } /** diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index c1983b86661..3ca53dfed7c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -546,7 +546,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, /* Clear the NULL constant buffer, because loads should return zeros. */ si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, - SI_COHERENCY_SHADER, SI_METHOD_BEST); + SI_COHERENCY_SHADER); } uint64_t max_threads_per_block; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 557f708d250..7773cb02d7c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1119,15 +1119,14 @@ enum si_coherency { SI_COHERENCY_CB_META, }; -enum si_method { - SI_METHOD_CP_DMA, - SI_METHOD_BEST, -}; - void si_cp_dma_wait_for_idle(struct si_context *sctx); +void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned value, + enum si_coherency coher, + enum si_cache_policy cache_policy); void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value, - enum si_coherency coher, enum si_method xfer); + enum si_coherency coher); void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size, diff --git a/src/gallium/drivers/radeonsi/si_test_clearbuffer.c b/src/gallium/drivers/radeonsi/si_test_clearbuffer.c index c0696da26db..e863381fd15 100644 --- a/src/gallium/drivers/radeonsi/si_test_clearbuffer.c +++ b/src/gallium/drivers/radeonsi/si_test_clearbuffer.c @@ -48,8 +48,8 @@ measure_clearbuf_time(struct pipe_context *ctx, ctx->begin_query(ctx, query_te); /* operation */ - si_clear_buffer(sctx, buf, 0, memory_size, 0x00, - SI_COHERENCY_SHADER, SI_METHOD_CP_DMA); + si_cp_dma_clear_buffer(sctx, buf, 0, memory_size, 0x00, + SI_COHERENCY_SHADER, L2_LRU); ctx->end_query(ctx, query_te); ctx->get_query_result(ctx, query_te, true, &qresult); diff --git a/src/gallium/drivers/radeonsi/si_test_dma.c b/src/gallium/drivers/radeonsi/si_test_dma.c index f125769d1cf..c81ec75dde2 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma.c +++ b/src/gallium/drivers/radeonsi/si_test_dma.c @@ -308,7 +308,7 @@ void si_test_dma(struct si_screen *sscreen) /* clear dst pixels */ si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, 0, - SI_COHERENCY_SHADER, SI_METHOD_BEST); + SI_COHERENCY_SHADER); memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size); /* preparation */ |