summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2014-12-29 14:45:49 +0100
committerMarek Olšák <marek.olsak@amd.com>2015-01-07 12:06:43 +0100
commitedf18da85dd3b1865c4faaba650a8fa371b7103c (patch)
tree5cf04bf87559911e77dd5c57093bb2d64503e390 /src
parent73c2b0d18c51459697d8ec194ecfc4438c98c139 (diff)
radeonsi: only flush the right set of caches for CP DMA operations
That's either framebuffer caches or caches for shader resources. The motivation is that framebuffer caches need to be flushed very rarely here. Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/r600/r600_blit.c3
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c5
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h6
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c8
-rw-r--r--src/gallium/drivers/radeon/radeon_video.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c51
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h2
9 files changed, 48 insertions, 34 deletions
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index bdc5f9f9bc2..01262a59e90 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -565,7 +565,8 @@ static void r600_copy_global_buffer(struct pipe_context *ctx,
}
static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value)
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer)
{
struct r600_context *rctx = (struct r600_context*)ctx;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index d46b3b38f86..04fc9c59c73 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -912,12 +912,13 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
}
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value)
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
pipe_mutex_lock(rscreen->aux_context_lock);
- rctx->clear_buffer(&rctx->b, dst, offset, size, value);
+ rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer);
rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
pipe_mutex_unlock(rscreen->aux_context_lock);
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 15736d7b1c9..a9416b686ed 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -388,7 +388,8 @@ struct r600_common_context {
const struct pipe_box *src_box);
void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value);
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer);
void (*blit_decompress_depth)(struct pipe_context *ctx,
struct r600_texture *texture,
@@ -441,7 +442,8 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
const struct tgsi_token *tokens);
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value);
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer);
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ);
const char *r600_get_llvm_processor_name(enum radeon_family family);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index fdf4d763f2d..ab8ce7bd713 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -559,7 +559,8 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
* without htile buffer */
R600_ERR("Failed to create buffer object for htile buffer.\n");
} else {
- r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, htile_size, 0);
+ r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
+ htile_size, 0, true);
}
}
@@ -638,7 +639,8 @@ r600_texture_create_object(struct pipe_screen *screen,
if (rtex->cmask.size) {
/* Initialize the cmask to 0xCC (= compressed state). */
r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
- rtex->cmask.offset, rtex->cmask.size, 0xCCCCCCCC);
+ rtex->cmask.offset, rtex->cmask.size,
+ 0xCCCCCCCC, true);
}
/* Initialize the CMASK base register value. */
@@ -1273,7 +1275,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
/* Do the fast clear. */
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
- tex->cmask.offset, tex->cmask.size, 0);
+ tex->cmask.offset, tex->cmask.size, 0, true);
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
fb_state->dirty = true;
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index f6cfdff7aa2..14207989325 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -122,7 +122,8 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)context;
- rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, 0);
+ rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
+ 0, false);
context->flush(context, NULL, 0);
}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 4744154c7e2..1f2c4082dbc 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -556,7 +556,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
/* Fallback for buffers. */
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
- si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
+ si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
return;
}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index d46f4e522e6..c9599617ede 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1054,9 +1054,11 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value)
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer)
{
struct si_context *sctx = (struct si_context*)ctx;
+ unsigned flush_flags;
if (!size)
return;
@@ -1081,12 +1083,15 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
uint64_t va = r600_resource(dst)->gpu_address + offset;
/* Flush the caches where the resource is bound. */
- /* XXX only flush the caches where the buffer is bound. */
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
- SI_CONTEXT_INV_KCACHE |
- SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+ if (is_framebuffer)
+ flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ else
+ flush_flags = SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_KCACHE;
+
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ flush_flags;
while (size) {
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
@@ -1120,17 +1125,16 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
/* Flush the caches again in case the 3D engine has been prefetching
* the resource. */
- /* XXX only flush the caches where the buffer is bound. */
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
- SI_CONTEXT_INV_KCACHE |
- SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ sctx->b.flags |= flush_flags;
}
void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
- uint64_t dst_offset, uint64_t src_offset, unsigned size)
+ uint64_t dst_offset, uint64_t src_offset, unsigned size,
+ bool is_framebuffer)
{
+ unsigned flush_flags;
+
if (!size)
return;
@@ -1144,11 +1148,15 @@ void si_copy_buffer(struct si_context *sctx,
src_offset += r600_resource(src)->gpu_address;
/* Flush the caches where the resource is bound. */
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
- SI_CONTEXT_INV_KCACHE |
- SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
- SI_CONTEXT_PS_PARTIAL_FLUSH;
+ if (is_framebuffer)
+ flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ else
+ flush_flags = SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_KCACHE;
+
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ flush_flags;
while (size) {
unsigned sync_flags = 0;
@@ -1180,10 +1188,9 @@ void si_copy_buffer(struct si_context *sctx,
dst_offset += byte_count;
}
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
- SI_CONTEXT_INV_KCACHE |
- SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ /* Flush the caches again in case the 3D engine has been prefetching
+ * the resource. */
+ sctx->b.flags |= flush_flags;
}
/* INIT/DEINIT */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4d8fd65d981..38bff31e005 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -155,7 +155,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
/* Clear the NULL constant buffer, because loads should return zeros. */
sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
- sctx->null_const_buf.buffer->width0, 0);
+ sctx->null_const_buf.buffer->width0, 0, false);
}
return &sctx->b.b;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 8927e50644b..3cd252c0e64 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -238,7 +238,7 @@ void si_release_all_descriptors(struct si_context *sctx);
void si_all_descriptors_begin_new_cs(struct si_context *sctx);
void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
- uint64_t dst_offset, uint64_t src_offset, unsigned size);
+ uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer);
void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
const uint8_t *ptr, unsigned size, uint32_t *const_offset);