diff options
author | Dave Airlie <[email protected]> | 2018-08-07 01:41:20 +0100 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2018-08-21 20:45:38 +0100 |
commit | 32529e60849dd20d167f14cb8542c5798343f0e0 (patch) | |
tree | 91003a038f71084695c86be8b3a889648a282c90 /src/gallium/drivers/r600/evergreen_state.c | |
parent | 41d58e20983576212636c11afd6ca25ebd60b68f (diff) |
r600/eg: rework atomic counter emission with flushes
With the current code, we didn't do the space checks prior
to atomic counter setup emission, but we also didn't add
atomic counters to the space check so we could get a flush
later as well.
These flushes would be bad, and lead to problems with
parallel tests. We have to ensure the atomic counter copy in,
draw emits and counter copy out are kept in the same command
submission unit.
This reworks the code to drop some useless masks, make the
counting separate to the emits, and make the space checker
handle atomic counter space.
[airlied: want this in 18.2]
Fixes: 06993e4ee (r600: add support for hw atomic counters. (v3))
Diffstat (limited to 'src/gallium/drivers/r600/evergreen_state.c')
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 38 |
1 files changed, 23 insertions, 15 deletions
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 57e81e30c27..cc41e114369 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -4030,7 +4030,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, if (!buffers || !buffers[idx].buffer) { pipe_resource_reference(&abuf->buffer, NULL); - astate->enabled_mask &= ~(1 << i); continue; } buf = &buffers[idx]; @@ -4038,7 +4037,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, pipe_resource_reference(&abuf->buffer, buf->buffer); abuf->buffer_offset = buf->buffer_offset; abuf->buffer_size = buf->buffer_size; - astate->enabled_mask |= (1 << i); } } @@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct r600_context *rctx, radeon_emit(cs, reloc); } -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, - struct r600_pipe_shader *cs_shader, - struct r600_shader_atomic *combined_atomics, - uint8_t *atomic_used_mask_p) +void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx, + struct r600_pipe_shader *cs_shader, + struct r600_shader_atomic *combined_atomics, + uint8_t *atomic_used_mask_p) { - struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; - unsigned pkt_flags = 0; uint8_t atomic_used_mask = 0; int i, j, k; bool is_compute = cs_shader ? true : false; - if (is_compute) - pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; - for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) { uint8_t num_atomic_stage; struct r600_pipe_shader *pshader; @@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, } } } + *atomic_used_mask_p = atomic_used_mask; +} + +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, + bool is_compute, + struct r600_shader_atomic *combined_atomics, + uint8_t atomic_used_mask) +{ + struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; + unsigned pkt_flags = 0; + uint32_t mask; + + if (is_compute) + pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; + + mask = atomic_used_mask; + if (!mask) + return; - uint32_t mask = atomic_used_mask; while (mask) { unsigned atomic_index = u_bit_scan(&mask); struct r600_shader_atomic *atomic = &combined_atomics[atomic_index]; @@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, else evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags); } - *atomic_used_mask_p = atomic_used_mask; - return true; } void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, @@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; uint32_t pkt_flags = 0; uint32_t event = EVENT_TYPE_PS_DONE; - uint32_t mask = astate->enabled_mask; + uint32_t mask; uint64_t dst_offset; unsigned reloc; |