diff options
author | Pierre-Eric Pelloux-Prayer <[email protected]> | 2019-10-15 15:19:22 +0200 |
---|---|---|
committer | Pierre-Eric Pelloux-Prayer <[email protected]> | 2019-10-30 18:03:14 +0100 |
commit | 21b9a6b59019fe232beb8e82fc0eb231e56df268 (patch) | |
tree | 7961294d2281262d9a76264f45f91a9e57e3ef9c /src/gallium | |
parent | f53811aeace20530a502ea1ead3d4d2230dc1945 (diff) |
radeonsi: align sdma byte count to dw
If src/dst addresses are dw aligned and size is > 4 then we align
byte count to dw as well.
PAL implementation works like this.
Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/radeonsi/cik_sdma.c | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index f45903affa3..af905f66c99 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -35,6 +35,7 @@ static void cik_sdma_copy_buffer(struct si_context *ctx, { struct radeon_cmdbuf *cs = ctx->dma_cs; unsigned i, ncopy, csize; + unsigned align = ~0u; struct si_resource *sdst = si_resource(dst); struct si_resource *ssrc = si_resource(src); @@ -48,10 +49,20 @@ static void cik_sdma_copy_buffer(struct si_context *ctx, src_offset += ssrc->gpu_address; ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); + + /* Align copy size to dw if src/dst address are dw aligned */ + if ((src_offset & 0x3) == 0 && + (dst_offset & 0x3) == 0 && + size > 4 && + (size & 3) != 0) { + align = ~0x3u; + ncopy++; + } + si_need_dma_space(ctx, ncopy * 7, sdst, ssrc); for (i = 0; i < ncopy; i++) { - csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); + csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size; radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, 0)); |