summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c2
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c4
-rw-r--r--src/gallium/drivers/r600/r600.h11
-rw-r--r--src/gallium/drivers/r600/r600_blit.c13
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c85
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h3
-rw-r--r--src/gallium/drivers/r600/r600_state.c6
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c6
-rw-r--r--src/gallium/drivers/r600/r600d.h34
9 files changed, 140 insertions, 24 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 8d3050beff0..ed5055b950f 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
*/
r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd);
- ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
r600_flush_emit(ctx);
/* Emit colorbuffers. */
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 7635f867210..d0402c219fb 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1570,14 +1570,14 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
uint32_t i, log_samples;
if (rctx->framebuffer.state.nr_cbufs) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) {
rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
}
}
if (rctx->framebuffer.state.zsbuf) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
}
util_copy_framebuffer_state(&rctx->framebuffer.state, state);
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 260536ecb44..93604fbe7b5 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -147,9 +147,10 @@ struct r600_so_target {
#define R600_CONTEXT_INVAL_READ_CACHES (1 << 0)
#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 1)
-#define R600_CONTEXT_WAIT_IDLE (1 << 2)
-#define R600_CONTEXT_FLUSH_AND_INV (1 << 3)
-#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 4)
+#define R600_CONTEXT_WAIT_3D_IDLE (1 << 2)
+#define R600_CONTEXT_WAIT_CP_DMA_IDLE (1 << 3)
+#define R600_CONTEXT_FLUSH_AND_INV (1 << 4)
+#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 5)
struct r600_context;
struct r600_screen;
@@ -170,6 +171,10 @@ void r600_context_streamout_begin(struct r600_context *ctx);
void r600_context_streamout_end(struct r600_context *ctx);
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block, unsigned pkt_flags);
+void r600_cp_dma_copy_buffer(struct r600_context *rctx,
+ struct pipe_resource *dst, unsigned dst_offset,
+ struct pipe_resource *src, unsigned src_offset,
+ unsigned size);
int evergreen_context_init(struct r600_context *ctx);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index b348aa728b0..c4ce7f7652b 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -503,15 +503,18 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx,
r600_blitter_end(ctx);
}
-void r600_copy_buffer(struct pipe_context *ctx, struct
- pipe_resource *dst, unsigned dstx,
+void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dstx,
struct pipe_resource *src, const struct pipe_box *src_box)
{
struct r600_context *rctx = (struct r600_context*)ctx;
- if (rctx->screen->has_streamout &&
- /* Require dword alignment. */
- dstx % 4 == 0 && src_box->x % 4 == 0 && src_box->width % 4 == 0) {
+ /* CP DMA doesn't work on R600 (flushing seems to be unreliable). */
+ if (rctx->screen->info.drm_minor >= 27 && rctx->chip_class >= R700) {
+ r600_cp_dma_copy_buffer(rctx, dst, dstx, src, src_box->x, src_box->width);
+ }
+ else if (rctx->screen->has_streamout &&
+ /* Require 4-byte alignment. */
+ dstx % 4 == 0 && src_box->x % 4 == 0 && src_box->width % 4 == 0) {
r600_blitter_begin(ctx, R600_COPY_BUFFER);
util_blitter_copy_buffer(rctx->blitter, dst, dstx, src, src_box->x, src_box->width);
r600_blitter_end(ctx);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 1506b393ce1..caebf5c7a54 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -434,7 +434,7 @@ void r600_context_dirty_block(struct r600_context *ctx,
LIST_ADDTAIL(&block->list,&ctx->dirty);
if (block->flags & REG_FLAG_FLUSH_CHANGE) {
- ctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
}
}
}
@@ -606,6 +606,7 @@ void r600_flush_emit(struct r600_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->cs;
unsigned cp_coher_cntl = 0;
+ unsigned wait_until = 0;
unsigned emit_flush = 0;
if (!rctx->flags) {
@@ -674,9 +675,15 @@ void r600_flush_emit(struct r600_context *rctx)
cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */
}
- if (rctx->flags & R600_CONTEXT_WAIT_IDLE) {
+ if (rctx->flags & R600_CONTEXT_WAIT_3D_IDLE) {
+ wait_until |= S_008040_WAIT_3D_IDLE(1);
+ }
+ if (rctx->flags & R600_CONTEXT_WAIT_CP_DMA_IDLE) {
+ wait_until |= S_008040_WAIT_CP_DMA_IDLE(1);
+ }
+ if (wait_until) {
/* wait for things to settle */
- r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+ r600_write_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);
}
/* everything is properly flushed */
@@ -709,7 +716,8 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
*/
ctx->flags |= R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
- R600_CONTEXT_WAIT_IDLE;
+ R600_CONTEXT_WAIT_3D_IDLE |
+ R600_CONTEXT_WAIT_CP_DMA_IDLE;
r600_flush_emit(ctx);
@@ -1049,6 +1057,73 @@ void r600_context_streamout_end(struct r600_context *ctx)
}
r600_set_streamout_enable(ctx, 0);
}
- ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
ctx->num_cs_dw_streamout_end = 0;
}
+
+/* The max number of bytes to copy per packet. */
+#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+
+void r600_cp_dma_copy_buffer(struct r600_context *rctx,
+ struct pipe_resource *dst, unsigned dst_offset,
+ struct pipe_resource *src, unsigned src_offset,
+ unsigned size)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+
+ assert(size);
+ assert(rctx->chip_class != R600);
+
+ /* CP DMA doesn't work on R600 (flushing seems to be unreliable). */
+ if (rctx->chip_class == R600) {
+ return;
+ }
+
+ /* We flush the caches, because we might read from or write
+ * to resources which are bound right now. */
+ rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES |
+ R600_CONTEXT_FLUSH_AND_INV |
+ R600_CONTEXT_FLUSH_AND_INV_CB_META |
+ R600_CONTEXT_STREAMOUT_FLUSH |
+ R600_CONTEXT_WAIT_3D_IDLE;
+
+ /* There are differences between R700 and EG in CP DMA,
+ * but we only use the common bits here. */
+ while (size) {
+ unsigned sync = 0;
+ unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+ unsigned src_reloc, dst_reloc;
+
+ r600_need_cs_space(rctx, 10 + (rctx->flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
+
+ /* Flush the caches for the first copy only. */
+ if (rctx->flags) {
+ r600_flush_emit(rctx);
+ }
+
+ /* Do the synchronization after the last copy, so that all data is written to memory. */
+ if (size == byte_count) {
+ sync = PKT3_CP_DMA_CP_SYNC;
+ }
+
+ /* This must be done after r600_need_cs_space. */
+ src_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)src, RADEON_USAGE_READ);
+ dst_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)dst, RADEON_USAGE_WRITE);
+
+ r600_write_value(cs, PKT3(PKT3_CP_DMA, 4, 0));
+ r600_write_value(cs, src_offset); /* SRC_ADDR_LO [31:0] */
+ r600_write_value(cs, sync); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */
+ r600_write_value(cs, dst_offset); /* DST_ADDR_LO [31:0] */
+ r600_write_value(cs, 0); /* DST_ADDR_HI [7:0] */
+ r600_write_value(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, src_reloc);
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, dst_reloc);
+
+ size -= byte_count;
+ src_offset += byte_count;
+ dst_offset += byte_count;
+ }
+}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 934a6f547c0..5d22c93cf7d 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -598,8 +598,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
void evergreen_update_db_shader_control(struct r600_context * rctx);
/* r600_blit.c */
-void r600_copy_buffer(struct pipe_context *ctx, struct
- pipe_resource *dst, unsigned dstx,
+void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dstx,
struct pipe_resource *src, const struct pipe_box *src_box);
void r600_init_blit_functions(struct r600_context *rctx);
void r600_blit_decompress_depth(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 0cfc4e4ee68..e2d0f7544c1 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1465,7 +1465,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
unsigned i;
if (rctx->framebuffer.state.nr_cbufs) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
if (rctx->chip_class >= R700 &&
rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) {
@@ -1473,7 +1473,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
}
}
if (rctx->framebuffer.state.zsbuf) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
}
/* Set the new state. */
@@ -2299,7 +2299,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) {
rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
rctx->config_state.atom.dirty = true;
- rctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
}
return true;
}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index bbcfc4f5062..3b61413f84e 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -88,7 +88,7 @@ static void r600_texture_barrier(struct pipe_context *ctx)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- rctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
}
@@ -357,7 +357,7 @@ void r600_sampler_states_dirty(struct r600_context *rctx,
{
if (state->dirty_mask) {
if (state->dirty_mask & state->has_bordercolor_mask) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
}
state->atom.num_dw =
util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
@@ -420,7 +420,7 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
seamless_cube_map != -1 &&
seamless_cube_map != rctx->seamless_cube_map.enabled) {
/* change in TA_CNTL_AUX need a pipeline flush */
- rctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
rctx->seamless_cube_map.enabled = seamless_cube_map;
rctx->seamless_cube_map.atom.dirty = true;
}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 69bfd7a2f87..dd64aca3d51 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -159,6 +159,40 @@
#define PKT3_PRED_S(x) (((x) >> 0) & 0x1)
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
+#define PKT3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO [31:0]
+ * 3. CP_SYNC [31] | SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+#define PKT3_CP_DMA_CP_SYNC (1 << 31)
+/* COMMAND */
+#define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+/* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+#define PKT3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+/* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+#define PKT3_CP_DMA_CMD_SAS (1 << 26)
+/* 0 - memory
+ * 1 - register
+ */
+#define PKT3_CP_DMA_CMD_DAS (1 << 27)
+/* 0 - memory
+ * 1 - register
+ */
+#define PKT3_CP_DMA_CMD_SAIC (1 << 28)
+#define PKT3_CP_DMA_CMD_DAIC (1 << 29)
+
+
/* Registers */
#define R_008490_CP_STRMOUT_CNTL 0x008490
#define S_008490_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)