diff options
author | Jason Ekstrand <[email protected]> | 2017-11-03 16:11:54 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2017-11-13 21:51:59 -0800 |
commit | fb0e9b5197cb65bde1e116d89acd5deb32f9132c (patch) | |
tree | 5587da4bf48642f3fc7e68f4a92b0053bfbcfa72 /src/mesa/drivers | |
parent | d6d0ac95d5d77bd18b2064c3ed9aad70cf38cb6f (diff) |
i965: Track the depth and render caches separately
Previously, we just had one hash set for tracking depth and render
caches called brw_context::render_cache. This is less than ideal
because the depth and render caches are separate and we can't track
moves between the depth and the render caches. This limitation led
to some unnecessary flushing around the depth cache. There are cases
(mostly with BLORP) where we can end up touching a depth or stencil
buffer through the render cache. To guard against this, blorp would
unconditionally do a render_cache_set_check_flush on it's destination
which meant that if you did any rendering (including a BLORP operation)
to a given surface and then used it as a blorp destination, you would
end up flushing it out of the render cache before rendering into it.
Things get worse when you dig into the depth/stencil state code for
regular GL draw calls. Because we may end up rendering to a depth
or stencil buffer via BLORP, we did a render_cache_set_check_flush on
all depth and stencil buffers in brw_emit_depthbuffer to ensure that
they got flushed out of the render cache prior to using them for depth
or stencil testing. However, because we also need to track dirtiness
for depth and stencil so that we can implement depth and stencil
texturing correctly, we were adding all depth and stencil buffers to the
render cache set in brw_postdraw_set_buffers_need_resolve. This meant
that, if anything caused 3DSTATE_DEPTH_BUFFER to get re-emitted
(currently _NEW_BUFFERS, BRW_NEW_BATCH, and BRW_NEW_BLORP), we would
almost always do a full pipeline stall and render/depth cache flush.
The root cause of both of these problems is that we can't tell the
difference between the render and depth caches in our tracking. This
commit splits our cache tracking into two sets, one for render and one
for depth, and properly handles transitioning between the two. We still
flush all the caches whenever anything needs to be flushed. The idea is
that if we're going to take the hit of a flush and stall, we may as well
flush everything in the hopes that we can avoid a flush by something
else later.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_blorp_exec.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_fbo.c | 33 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_fbo.h | 5 |
5 files changed, 26 insertions, 22 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8aa0c5ff64c..7457aba54a3 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -703,6 +703,13 @@ struct brw_context struct set *render_cache; /** + * Set of struct brw_bo * that have been used as a depth buffer within this + * batchbuffer and would need flushing before being used from another cache + * domain that isn't coherent with it (i.e. the sampler). + */ + struct set *depth_cache; + + /** * Number of resets observed in the system at context creation. * * This is tracked in the context so that we can determine that another diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 84117531410..f824c127e40 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -232,7 +232,6 @@ genX(blorp_exec)(struct blorp_batch *batch, brw_cache_flush_for_depth(brw, params->depth.addr.buffer); if (params->stencil.enabled) brw_cache_flush_for_depth(brw, params->stencil.addr.buffer); - brw_render_cache_set_check_flush(brw, params->dst.addr.buffer); brw_select_pipeline(brw, BRW_RENDER_PIPELINE); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 1a366c78b00..33c79a2b54d 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -220,7 +220,7 @@ static void intel_batchbuffer_reset_and_clear_render_cache(struct brw_context *brw) { intel_batchbuffer_reset(brw); - brw_render_cache_set_clear(brw); + brw_cache_sets_clear(brw); } void diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 927f589321e..75c85ecb639 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -970,19 +970,16 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, } void -brw_render_cache_set_clear(struct brw_context *brw) +brw_cache_sets_clear(struct brw_context *brw) { struct set_entry *entry; set_foreach(brw->render_cache, entry) { _mesa_set_remove(brw->render_cache, entry); } -} -void -brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo) -{ - _mesa_set_add(brw->render_cache, bo); + set_foreach(brw->depth_cache, entry) + _mesa_set_remove(brw->depth_cache, entry); } /** @@ -997,14 +994,11 @@ brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo) * necessary is flushed before another use of that BO, but for reuse from * different caches within a batchbuffer, it's all our responsibility. */ -void -brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo) +static void +flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - if (!_mesa_set_search(brw->render_cache, bo)) - return; - if (devinfo->gen >= 6) { brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH | @@ -1018,36 +1012,41 @@ brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo) brw_emit_mi_flush(brw); } - brw_render_cache_set_clear(brw); + brw_cache_sets_clear(brw); } void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo) { - brw_render_cache_set_check_flush(brw, bo); + if (_mesa_set_search(brw->render_cache, bo) || + _mesa_set_search(brw->depth_cache, bo)) + flush_depth_and_render_caches(brw, bo); } void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo) { + if (_mesa_set_search(brw->depth_cache, bo)) + flush_depth_and_render_caches(brw, bo); } void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo) { - brw_render_cache_set_add_bo(brw, bo); + _mesa_set_add(brw->render_cache, bo); } void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo) { - brw_render_cache_set_check_flush(brw, bo); + if (_mesa_set_search(brw->render_cache, bo)) + flush_depth_and_render_caches(brw, bo); } void brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo) { - brw_render_cache_set_add_bo(brw, bo); + _mesa_set_add(brw->depth_cache, bo); } /** @@ -1069,4 +1068,6 @@ intel_fbo_init(struct brw_context *brw) brw->render_cache = _mesa_set_create(brw, _mesa_hash_pointer, _mesa_key_pointer_equal); + brw->depth_cache = _mesa_set_create(brw, _mesa_hash_pointer, + _mesa_key_pointer_equal); } diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h index d06a1e8fe99..10be4bbc7dc 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.h +++ b/src/mesa/drivers/dri/i965/intel_fbo.h @@ -234,10 +234,7 @@ void intel_renderbuffer_upsample(struct brw_context *brw, struct intel_renderbuffer *irb); -void brw_render_cache_set_clear(struct brw_context *brw); -void brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo); -void brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo); - +void brw_cache_sets_clear(struct brw_context *brw); void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo); void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo); void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo); |