diff options
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 85 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600d.h | 34 |
9 files changed, 140 insertions, 24 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 8d3050beff0..ed5055b950f 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, */ r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd); - ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; + ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(ctx); /* Emit colorbuffers. */ diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 7635f867210..d0402c219fb 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1570,14 +1570,14 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, uint32_t i, log_samples; if (rctx->framebuffer.state.nr_cbufs) { - rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) { rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META; } } if (rctx->framebuffer.state.zsbuf) { - rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; } util_copy_framebuffer_state(&rctx->framebuffer.state, state); diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 260536ecb44..93604fbe7b5 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -147,9 +147,10 @@ struct r600_so_target { #define R600_CONTEXT_INVAL_READ_CACHES (1 << 0) #define R600_CONTEXT_STREAMOUT_FLUSH (1 << 1) -#define R600_CONTEXT_WAIT_IDLE (1 << 2) -#define R600_CONTEXT_FLUSH_AND_INV (1 << 3) -#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 4) +#define R600_CONTEXT_WAIT_3D_IDLE (1 << 2) +#define R600_CONTEXT_WAIT_CP_DMA_IDLE (1 << 3) +#define R600_CONTEXT_FLUSH_AND_INV (1 << 4) +#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 5) struct r600_context; struct r600_screen; @@ -170,6 +171,10 @@ void r600_context_streamout_begin(struct r600_context *ctx); void r600_context_streamout_end(struct r600_context *ctx); void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in); void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block, unsigned pkt_flags); +void r600_cp_dma_copy_buffer(struct r600_context *rctx, + struct pipe_resource *dst, unsigned dst_offset, + struct pipe_resource *src, unsigned src_offset, + unsigned size); int evergreen_context_init(struct r600_context *ctx); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index b348aa728b0..c4ce7f7652b 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -503,15 +503,18 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx, r600_blitter_end(ctx); } -void r600_copy_buffer(struct pipe_context *ctx, struct - pipe_resource *dst, unsigned dstx, +void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dstx, struct pipe_resource *src, const struct pipe_box *src_box) { struct r600_context *rctx = (struct r600_context*)ctx; - if (rctx->screen->has_streamout && - /* Require dword alignment. */ - dstx % 4 == 0 && src_box->x % 4 == 0 && src_box->width % 4 == 0) { + /* CP DMA doesn't work on R600 (flushing seems to be unreliable). */ + if (rctx->screen->info.drm_minor >= 27 && rctx->chip_class >= R700) { + r600_cp_dma_copy_buffer(rctx, dst, dstx, src, src_box->x, src_box->width); + } + else if (rctx->screen->has_streamout && + /* Require 4-byte alignment. */ + dstx % 4 == 0 && src_box->x % 4 == 0 && src_box->width % 4 == 0) { r600_blitter_begin(ctx, R600_COPY_BUFFER); util_blitter_copy_buffer(rctx->blitter, dst, dstx, src, src_box->x, src_box->width); r600_blitter_end(ctx); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 1506b393ce1..caebf5c7a54 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -434,7 +434,7 @@ void r600_context_dirty_block(struct r600_context *ctx, LIST_ADDTAIL(&block->list,&ctx->dirty); if (block->flags & REG_FLAG_FLUSH_CHANGE) { - ctx->flags |= R600_CONTEXT_WAIT_IDLE; + ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE; } } } @@ -606,6 +606,7 @@ void r600_flush_emit(struct r600_context *rctx) { struct radeon_winsys_cs *cs = rctx->cs; unsigned cp_coher_cntl = 0; + unsigned wait_until = 0; unsigned emit_flush = 0; if (!rctx->flags) { @@ -674,9 +675,15 @@ void r600_flush_emit(struct r600_context *rctx) cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ } - if (rctx->flags & R600_CONTEXT_WAIT_IDLE) { + if (rctx->flags & R600_CONTEXT_WAIT_3D_IDLE) { + wait_until |= S_008040_WAIT_3D_IDLE(1); + } + if (rctx->flags & R600_CONTEXT_WAIT_CP_DMA_IDLE) { + wait_until |= S_008040_WAIT_CP_DMA_IDLE(1); + } + if (wait_until) { /* wait for things to settle */ - r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); + r600_write_config_reg(cs, R_008040_WAIT_UNTIL, wait_until); } /* everything is properly flushed */ @@ -709,7 +716,8 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) */ ctx->flags |= R600_CONTEXT_FLUSH_AND_INV | R600_CONTEXT_FLUSH_AND_INV_CB_META | - R600_CONTEXT_WAIT_IDLE; + R600_CONTEXT_WAIT_3D_IDLE | + R600_CONTEXT_WAIT_CP_DMA_IDLE; r600_flush_emit(ctx); @@ -1049,6 +1057,73 @@ void r600_context_streamout_end(struct r600_context *ctx) } r600_set_streamout_enable(ctx, 0); } - ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; + ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; ctx->num_cs_dw_streamout_end = 0; } + +/* The max number of bytes to copy per packet. */ +#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8) + +void r600_cp_dma_copy_buffer(struct r600_context *rctx, + struct pipe_resource *dst, unsigned dst_offset, + struct pipe_resource *src, unsigned src_offset, + unsigned size) +{ + struct radeon_winsys_cs *cs = rctx->cs; + + assert(size); + assert(rctx->chip_class != R600); + + /* CP DMA doesn't work on R600 (flushing seems to be unreliable). */ + if (rctx->chip_class == R600) { + return; + } + + /* We flush the caches, because we might read from or write + * to resources which are bound right now. */ + rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES | + R600_CONTEXT_FLUSH_AND_INV | + R600_CONTEXT_FLUSH_AND_INV_CB_META | + R600_CONTEXT_STREAMOUT_FLUSH | + R600_CONTEXT_WAIT_3D_IDLE; + + /* There are differences between R700 and EG in CP DMA, + * but we only use the common bits here. */ + while (size) { + unsigned sync = 0; + unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); + unsigned src_reloc, dst_reloc; + + r600_need_cs_space(rctx, 10 + (rctx->flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE); + + /* Flush the caches for the first copy only. */ + if (rctx->flags) { + r600_flush_emit(rctx); + } + + /* Do the synchronization after the last copy, so that all data is written to memory. */ + if (size == byte_count) { + sync = PKT3_CP_DMA_CP_SYNC; + } + + /* This must be done after r600_need_cs_space. */ + src_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)src, RADEON_USAGE_READ); + dst_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)dst, RADEON_USAGE_WRITE); + + r600_write_value(cs, PKT3(PKT3_CP_DMA, 4, 0)); + r600_write_value(cs, src_offset); /* SRC_ADDR_LO [31:0] */ + r600_write_value(cs, sync); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */ + r600_write_value(cs, dst_offset); /* DST_ADDR_LO [31:0] */ + r600_write_value(cs, 0); /* DST_ADDR_HI [7:0] */ + r600_write_value(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ + + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, src_reloc); + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, dst_reloc); + + size -= byte_count; + src_offset += byte_count; + dst_offset += byte_count; + } +} diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 934a6f547c0..5d22c93cf7d 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -598,8 +598,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx, void evergreen_update_db_shader_control(struct r600_context * rctx); /* r600_blit.c */ -void r600_copy_buffer(struct pipe_context *ctx, struct - pipe_resource *dst, unsigned dstx, +void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dstx, struct pipe_resource *src, const struct pipe_box *src_box); void r600_init_blit_functions(struct r600_context *rctx); void r600_blit_decompress_depth(struct pipe_context *ctx, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0cfc4e4ee68..e2d0f7544c1 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1465,7 +1465,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, unsigned i; if (rctx->framebuffer.state.nr_cbufs) { - rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; if (rctx->chip_class >= R700 && rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) { @@ -1473,7 +1473,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } } if (rctx->framebuffer.state.zsbuf) { - rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; } /* Set the new state. */ @@ -2299,7 +2299,7 @@ bool r600_adjust_gprs(struct r600_context *rctx) if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) { rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; rctx->config_state.atom.dirty = true; - rctx->flags |= R600_CONTEXT_WAIT_IDLE; + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE; } return true; } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index bbcfc4f5062..3b61413f84e 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -88,7 +88,7 @@ static void r600_texture_barrier(struct pipe_context *ctx) { struct r600_context *rctx = (struct r600_context *)ctx; - rctx->flags |= R600_CONTEXT_WAIT_IDLE; + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE; rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; } @@ -357,7 +357,7 @@ void r600_sampler_states_dirty(struct r600_context *rctx, { if (state->dirty_mask) { if (state->dirty_mask & state->has_bordercolor_mask) { - rctx->flags |= R600_CONTEXT_WAIT_IDLE; + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE; } state->atom.num_dw = util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 + @@ -420,7 +420,7 @@ static void r600_bind_sampler_states(struct pipe_context *pipe, seamless_cube_map != -1 && seamless_cube_map != rctx->seamless_cube_map.enabled) { /* change in TA_CNTL_AUX need a pipeline flush */ - rctx->flags |= R600_CONTEXT_WAIT_IDLE; + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE; rctx->seamless_cube_map.enabled = seamless_cube_map; rctx->seamless_cube_map.atom.dirty = true; } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 69bfd7a2f87..dd64aca3d51 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -159,6 +159,40 @@ #define PKT3_PRED_S(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) +#define PKT3_CP_DMA 0x41 +/* 1. header + * 2. SRC_ADDR_LO [31:0] + * 3. CP_SYNC [31] | SRC_ADDR_HI [7:0] + * 4. DST_ADDR_LO [31:0] + * 5. DST_ADDR_HI [7:0] + * 6. COMMAND [29:22] | BYTE_COUNT [20:0] + */ +#define PKT3_CP_DMA_CP_SYNC (1 << 31) +/* COMMAND */ +#define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +/* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +#define PKT3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) +/* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +#define PKT3_CP_DMA_CMD_SAS (1 << 26) +/* 0 - memory + * 1 - register + */ +#define PKT3_CP_DMA_CMD_DAS (1 << 27) +/* 0 - memory + * 1 - register + */ +#define PKT3_CP_DMA_CMD_SAIC (1 << 28) +#define PKT3_CP_DMA_CMD_DAIC (1 << 29) + + /* Registers */ #define R_008490_CP_STRMOUT_CNTL 0x008490 #define S_008490_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0) |