diff options
author | Marek Olšák <[email protected]> | 2013-08-26 17:19:39 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2013-08-31 01:34:30 +0200 |
commit | a77ee8b548d83614b11bbfb654b031b7d464c3e3 (patch) | |
tree | 08509657f61e58f5563c45b43f264018190e2c2c /src/gallium/drivers/radeonsi/si_state.c | |
parent | aa5c40f97cf5d0609dfb8c0792eca5f6d5108579 (diff) |
radeonsi: simplify and improve flushing
This mimics r600g. The R600_CONTEXT_xxx flags are added to rctx->b.flags
and si_emit_cache_flush emits the packets. That's it. The shared radeon code
tells us when the streamout cache should be flushed, so we have to check
the flags anyway.
There is a new atom "cache_flush", because caches must be flushed *after*
resource descriptors are changed in memory.
Functional changes:
* Write caches are flushed at the end of CS and read caches are flushed
at its beginning.
* Sampler view states are removed from si_state, they only held the flush
flags.
* Everytime a shader is changed, the I cache is flushed. Is this needed?
Due to a hw bug, this also flushes the K cache.
* The WRITE_DATA packet is changed to use TC, which fixes a rendering issue
in openarena. I'm not sure how TC interacts with CP DMA, but for now it
seems to work better than any other solution I tried. (BTW CIK allows us
to use TC for CP DMA.)
* Flush the K cache instead of the texture cache when updating resource
descriptors (due to a hw bug, this also flushes the I cache).
I think the K cache flush is correct here, but I'm not sure if the texture
cache should be flushed too (probably not considering we use TC
for WRITE_DATA, but we don't use TC for CP DMA).
* The number of resource contexts is decreased to 16. With all of these cache
changes, 4 doesn't work, but 8 works, which suggests I'm actually doing
the right thing here and the pipeline isn't drained during flushes.
Reviewed-by: Michel Dänzer <[email protected]>
Reviewed-by: Christian König <[email protected]>
Tested-by: Tom Stellard <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 46 |
1 files changed, 20 insertions, 26 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 5ac55f22a5b..3c4197ca548 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2238,11 +2238,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (pm4 == NULL) return; - si_pm4_inval_fb_cache(pm4, state->nr_cbufs); - rctx->flush_and_inv_cb_meta = true; - - if (state->zsbuf) - si_pm4_inval_zsbuf_cache(pm4); + if (rctx->framebuffer.nr_cbufs) { + rctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB | + R600_CONTEXT_FLUSH_AND_INV_CB_META; + } + if (rctx->framebuffer.zsbuf) { + rctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_DB; + } util_copy_framebuffer_state(&rctx->framebuffer, state); @@ -2468,6 +2470,8 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) si_pm4_bind_state(rctx, vs, sel->current->pm4); else si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4); + + rctx->b.flags |= R600_CONTEXT_INV_SHADER_CACHE; } static void si_bind_ps_shader(struct pipe_context *ctx, void *state) @@ -2484,6 +2488,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) si_pm4_bind_state(rctx, ps, sel->current->pm4); else si_pm4_bind_state(rctx, ps, rctx->dummy_pixel_shader->pm4); + + rctx->b.flags |= R600_CONTEXT_INV_SHADER_CACHE; } static void si_delete_shader_selector(struct pipe_context *ctx, @@ -2826,17 +2832,14 @@ static void *si_create_sampler_state(struct pipe_context *ctx, /* XXX consider moving this function to si_descriptors.c for gcc to inline * the si_set_sampler_view calls. LTO might help too. */ -static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx, - unsigned shader, unsigned count, - struct pipe_sampler_view **views) +static void si_set_sampler_views(struct r600_context *rctx, + unsigned shader, unsigned count, + struct pipe_sampler_view **views) { struct r600_textures_info *samplers = &rctx->samplers[shader]; struct si_pipe_sampler_view **rviews = (struct si_pipe_sampler_view **)views; - struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); int i; - si_pm4_inval_texture_cache(pm4); - for (i = 0; i < count; i++) { if (views[i]) { struct r600_texture *rtex = @@ -2879,27 +2882,23 @@ static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx, } samplers->n_views = count; - return pm4; + rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE; } static void si_set_vs_sampler_views(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_pm4_state *pm4; - pm4 = si_set_sampler_views(rctx, PIPE_SHADER_VERTEX, count, views); - si_pm4_set_state(rctx, vs_sampler_views, pm4); + si_set_sampler_views(rctx, PIPE_SHADER_VERTEX, count, views); } static void si_set_ps_sampler_views(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_pm4_state *pm4; - pm4 = si_set_sampler_views(rctx, PIPE_SHADER_FRAGMENT, count, views); - si_pm4_set_state(rctx, ps_sampler_views, pm4); + si_set_sampler_views(rctx, PIPE_SHADER_FRAGMENT, count, views); } static struct si_pm4_state *si_bind_sampler_states(struct r600_context *rctx, unsigned count, @@ -2915,7 +2914,7 @@ static struct si_pm4_state *si_bind_sampler_states(struct r600_context *rctx, un if (!count) goto out; - si_pm4_inval_texture_cache(pm4); + rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE; si_pm4_sh_data_begin(pm4); for (i = 0; i < count; i++) { @@ -3128,14 +3127,9 @@ static void si_set_polygon_stipple(struct pipe_context *ctx, static void si_texture_barrier(struct pipe_context *ctx) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); - - if (pm4 == NULL) - return; - si_pm4_inval_texture_cache(pm4); - si_pm4_inval_fb_cache(pm4, rctx->framebuffer.nr_cbufs); - si_pm4_set_state(rctx, texture_barrier, pm4); + rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | + R600_CONTEXT_FLUSH_AND_INV_CB; } static void *si_create_blend_custom(struct r600_context *rctx, unsigned mode) |