diff options
author | Jerome Glisse <[email protected]> | 2013-01-07 17:45:59 -0500 |
---|---|---|
committer | Jerome Glisse <[email protected]> | 2013-01-28 11:30:35 -0500 |
commit | 325422c49449acdd8df1eb2ca8ed81f7696c38cc (patch) | |
tree | 7cbc1aafe166dd925c5626c7118f523c8b75ae6d /src/gallium/drivers/r600/evergreen_state.c | |
parent | bff07638a86d36ac826fb287214eda9ce31c02ad (diff) |
r600g: add async for staging buffer upload v2
v2: Add virtual address to dma src/dst offset for cayman
Signed-off-by: Jerome Glisse <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600/evergreen_state.c')
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index d03c376f0fc..be1c427ce29 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -30,6 +30,20 @@ #include "util/u_framebuffer.h" #include "util/u_dual_blend.h" #include "evergreen_compute.h" +#include "util/u_math.h" + +static INLINE unsigned evergreen_array_mode(unsigned mode) +{ + switch (mode) { + case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_028C70_ARRAY_LINEAR_ALIGNED; + break; + case RADEON_SURF_MODE_1D: return V_028C70_ARRAY_1D_TILED_THIN1; + break; + case RADEON_SURF_MODE_2D: return V_028C70_ARRAY_2D_TILED_THIN1; + default: + case RADEON_SURF_MODE_LINEAR: return V_028C70_ARRAY_LINEAR_GENERAL; + } +} static uint32_t eg_num_banks(uint32_t nbanks) { @@ -3445,3 +3459,190 @@ void evergreen_update_db_shader_control(struct r600_context * rctx) rctx->db_misc_state.atom.dirty = true; } } + +static void evergreen_dma_copy_tile(struct r600_context *rctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dst_x, + unsigned dst_y, + unsigned dst_z, + struct pipe_resource *src, + unsigned src_level, + unsigned src_x, + unsigned src_y, + unsigned src_z, + unsigned copy_height, + unsigned pitch, + unsigned bpp) +{ + struct radeon_winsys_cs *cs = rctx->rings.dma.cs; + struct r600_texture *rsrc = (struct r600_texture*)src; + struct r600_texture *rdst = (struct r600_texture*)dst; + unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; + unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode; + unsigned sub_cmd, bank_h, bank_w, mt_aspect, nbanks, tile_split; + unsigned long base, addr; + + /* make sure that the dma ring is only one active */ + rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC); + + dst_mode = rdst->surface.level[dst_level].mode; + src_mode = rsrc->surface.level[src_level].mode; + /* downcast linear aligned to linear to simplify test */ + src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode; + dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode; + assert(dst_mode != src_mode); + + y = 0; + sub_cmd = 0x8; + lbpp = util_logbase2(bpp); + pitch_tile_max = ((pitch / bpp) >> 3) - 1; + nbanks = eg_num_banks(rctx->screen->tiling_info.num_banks); + + if (dst_mode == RADEON_SURF_MODE_LINEAR) { + /* T2L */ + array_mode = evergreen_array_mode(src_mode); + slice_tile_max = (((pitch * rsrc->surface.level[src_level].npix_y) >> 6) / bpp) - 1; + /* linear height must be the same as the slice tile max height, it's ok even + * if the linear destination/source have smaller heigh as the size of the + * dma packet will be using the copy_height which is always smaller or equal + * to the linear height + */ + height = rsrc->surface.level[src_level].npix_y; + detile = 1; + x = src_x; + y = src_y; + z = src_z; + base = rsrc->surface.level[src_level].offset; + addr = rdst->surface.level[dst_level].offset; + addr += rdst->surface.level[dst_level].slice_size * dst_z; + addr += dst_y * pitch + dst_x * bpp; + bank_h = eg_bank_wh(rsrc->surface.bankh); + bank_w = eg_bank_wh(rsrc->surface.bankw); + mt_aspect = eg_macro_tile_aspect(rsrc->surface.mtilea); + tile_split = eg_tile_split(rsrc->surface.tile_split); + base += r600_resource_va(&rctx->screen->screen, src); + addr += r600_resource_va(&rctx->screen->screen, dst); + } else { + /* L2T */ + array_mode = evergreen_array_mode(dst_mode); + slice_tile_max = (((pitch * rdst->surface.level[dst_level].npix_y) >> 6) / bpp) - 1; + /* linear height must be the same as the slice tile max height, it's ok even + * if the linear destination/source have smaller heigh as the size of the + * dma packet will be using the copy_height which is always smaller or equal + * to the linear height + */ + height = rdst->surface.level[dst_level].npix_y; + detile = 0; + x = dst_x; + y = dst_y; + z = dst_z; + base = rdst->surface.level[dst_level].offset; + addr = rsrc->surface.level[src_level].offset; + addr += rsrc->surface.level[src_level].slice_size * src_z; + addr += src_y * pitch + src_x * bpp; + bank_h = eg_bank_wh(rdst->surface.bankh); + bank_w = eg_bank_wh(rdst->surface.bankw); + mt_aspect = eg_macro_tile_aspect(rdst->surface.mtilea); + tile_split = eg_tile_split(rdst->surface.tile_split); + base += r600_resource_va(&rctx->screen->screen, dst); + addr += r600_resource_va(&rctx->screen->screen, src); + } + + size = (copy_height * pitch) >> 2; + ncopy = (size / 0x000fffff) + !!(size % 0x000fffff); + r600_need_dma_space(rctx, ncopy * 9); + + for (i = 0; i < ncopy; i++) { + cheight = copy_height; + if (((cheight * pitch) >> 2) > 0x000fffff) { + cheight = (0x000fffff << 2) / pitch; + } + size = (cheight * pitch) >> 2; + /* emit reloc before writting cs so that cs is always in consistent state */ + r600_context_bo_reloc(rctx, &rctx->rings.dma, &rsrc->resource, RADEON_USAGE_READ); + r600_context_bo_reloc(rctx, &rctx->rings.dma, &rdst->resource, RADEON_USAGE_WRITE); + cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size); + cs->buf[cs->cdw++] = base >> 8; + cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) | + (lbpp << 24) | (bank_h << 21) | + (bank_w << 18) | (mt_aspect << 16); + cs->buf[cs->cdw++] = (pitch_tile_max << 0) | ((height - 1) << 16); + cs->buf[cs->cdw++] = (slice_tile_max << 0); + cs->buf[cs->cdw++] = (x << 0) | (z << 18); + cs->buf[cs->cdw++] = (y << 0) | (tile_split << 21) | (nbanks << 25); + cs->buf[cs->cdw++] = addr & 0xfffffffc; + cs->buf[cs->cdw++] = (addr >> 32UL) & 0xff; + copy_height -= cheight; + addr += cheight * pitch; + y += cheight; + } +} + +boolean evergreen_dma_blit(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_texture *rsrc = (struct r600_texture*)src; + struct r600_texture *rdst = (struct r600_texture*)dst; + unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height; + unsigned src_w, dst_w; + + if (rctx->rings.dma.cs == NULL) { + return FALSE; + } + if (src->format != dst->format) { + return FALSE; + } + + bpp = rdst->surface.bpe; + dst_pitch = rdst->surface.level[dst_level].pitch_bytes; + src_pitch = rsrc->surface.level[src_level].pitch_bytes; + src_w = rsrc->surface.level[src_level].npix_x; + dst_w = rdst->surface.level[dst_level].npix_x; + copy_height = src_box->height / rsrc->surface.blk_h; + + dst_mode = rdst->surface.level[dst_level].mode; + src_mode = rsrc->surface.level[src_level].mode; + /* downcast linear aligned to linear to simplify test */ + src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode; + dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode; + + if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { + /* FIXME evergreen can do partial blit */ + return FALSE; + } + /* the x test here are currently useless (because we don't support partial blit) + * but keep them around so we don't forget about those + */ + if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) { + return FALSE; + } + + if (src_mode == dst_mode) { + unsigned long dst_offset, src_offset; + /* simple dma blit would do NOTE code here assume : + * src_box.x/y == 0 + * dst_x/y == 0 + * dst_pitch == src_pitch + */ + src_offset= rsrc->surface.level[src_level].offset; + src_offset += rsrc->surface.level[src_level].slice_size * src_box->z; + src_offset += src_box->y * src_pitch + src_box->x * bpp; + dst_offset = rdst->surface.level[dst_level].offset; + dst_offset += rdst->surface.level[dst_level].slice_size * dst_z; + dst_offset += dst_y * dst_pitch + dst_x * bpp; + evergreen_dma_copy(rctx, dst, src, dst_offset, src_offset, + src_box->height * src_pitch); + } else { + evergreen_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z, + src, src_level, src_box->x, src_box->y, src_box->z, + copy_height, dst_pitch, bpp); + } + return TRUE; +} |