diff options
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_hw_context.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 38 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 14 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 13 |
6 files changed, 54 insertions, 31 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 90eae1e2829..a77f58242e3 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context *rctx, rctx->cmd_buf_is_compute = true; } - r600_need_cs_space(rctx, 0, true); if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) { r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty); current = rctx->cs_shader_state.shader->sel->current; @@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context *rctx, } rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0; rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true; + + evergreen_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, &atomic_used_mask); + r600_need_cs_space(rctx, 0, true, util_bitcount(atomic_used_mask)); + if (need_buf_const) { eg_setup_buffer_constants(rctx, PIPE_SHADER_COMPUTE); } r600_update_driver_const_buffers(rctx, true); - if (evergreen_emit_atomic_buffer_setup(rctx, current, combined_atomics, &atomic_used_mask)) { + evergreen_emit_atomic_buffer_setup(rctx, true, combined_atomics, atomic_used_mask); + if (atomic_used_mask) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } - } + } else + r600_need_cs_space(rctx, 0, true, 0); /* Initialize all the compute-related registers. * diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index d3f3e227c1f..5e0e27b0f16 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) + - R600_MAX_PFP_SYNC_ME_DWORDS, FALSE); + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0); /* Flush the caches for the first copy only. */ if (rctx->b.flags) { diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 57e81e30c27..cc41e114369 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -4030,7 +4030,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, if (!buffers || !buffers[idx].buffer) { pipe_resource_reference(&abuf->buffer, NULL); - astate->enabled_mask &= ~(1 << i); continue; } buf = &buffers[idx]; @@ -4038,7 +4037,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, pipe_resource_reference(&abuf->buffer, buf->buffer); abuf->buffer_offset = buf->buffer_offset; abuf->buffer_size = buf->buffer_size; - astate->enabled_mask |= (1 << i); } } @@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct r600_context *rctx, radeon_emit(cs, reloc); } -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, - struct r600_pipe_shader *cs_shader, - struct r600_shader_atomic *combined_atomics, - uint8_t *atomic_used_mask_p) +void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx, + struct r600_pipe_shader *cs_shader, + struct r600_shader_atomic *combined_atomics, + uint8_t *atomic_used_mask_p) { - struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; - unsigned pkt_flags = 0; uint8_t atomic_used_mask = 0; int i, j, k; bool is_compute = cs_shader ? true : false; - if (is_compute) - pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; - for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) { uint8_t num_atomic_stage; struct r600_pipe_shader *pshader; @@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, } } } + *atomic_used_mask_p = atomic_used_mask; +} + +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, + bool is_compute, + struct r600_shader_atomic *combined_atomics, + uint8_t atomic_used_mask) +{ + struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; + unsigned pkt_flags = 0; + uint32_t mask; + + if (is_compute) + pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; + + mask = atomic_used_mask; + if (!mask) + return; - uint32_t mask = atomic_used_mask; while (mask) { unsigned atomic_index = u_bit_scan(&mask); struct r600_shader_atomic *atomic = &combined_atomics[atomic_index]; @@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, else evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags); } - *atomic_used_mask_p = atomic_used_mask; - return true; } void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, @@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; uint32_t pkt_flags = 0; uint32_t event = EVENT_TYPE_PS_DONE; - uint32_t mask = astate->enabled_mask; + uint32_t mask; uint64_t dst_offset; unsigned reloc; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 1cfc180ad6c..a2f5f637b20 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -31,7 +31,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, - boolean count_draw_in) + boolean count_draw_in, unsigned num_atomics) { /* Flush the DMA IB if it's not empty. */ if (radeon_emitted(ctx->b.dma.cs, 0)) @@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS; } + /* add atomic counters, 8 pre + 8 post per counter + 16 post if any counters */ + num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0); + /* Count in r600_suspend_queries. */ num_dw += ctx->b.num_cs_dw_queries_suspend; @@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) + - 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE); + 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0); /* Flush the caches for the first copy only. */ if (rctx->b.flags) { diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6204e3c557b..239005cab7f 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -446,8 +446,6 @@ struct r600_shader_state { }; struct r600_atomic_buffer_state { - uint32_t enabled_mask; - uint32_t dirty_mask; struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS]; }; @@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence); void r600_begin_new_cs(struct r600_context *ctx); void r600_flush_emit(struct r600_context *ctx); -void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in); +void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics); void r600_emit_pfp_sync_me(struct r600_context *rctx); void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *dst, uint64_t dst_offset, @@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx, struct r600_pipe_shader_selector *sel); struct r600_shader_atomic; -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, - struct r600_pipe_shader *cs_shader, +void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx, + struct r600_pipe_shader *cs_shader, + struct r600_shader_atomic *combined_atomics, + uint8_t *atomic_used_mask_p); +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, + bool is_compute, struct r600_shader_atomic *combined_atomics, - uint8_t *atomic_used_mask_p); + uint8_t atomic_used_mask); void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, bool is_compute, struct r600_shader_atomic *combined_atomics, diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 402d95838f0..e6c1b0be97c 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info : (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] : info->mode; - if (rctx->b.chip_class >= EVERGREEN) - evergreen_emit_atomic_buffer_setup(rctx, NULL, combined_atomics, &atomic_used_mask); + if (rctx->b.chip_class >= EVERGREEN) { + evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask); + } if (index_size) { index_offset += info->start * index_size; @@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info evergreen_setup_tess_constants(rctx, info, &num_patches); /* Emit states. */ - r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE); + r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, util_bitcount(atomic_used_mask)); r600_flush_emit(rctx); mask = rctx->dirty_atoms; @@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]); } + if (rctx->b.chip_class >= EVERGREEN) { + evergreen_emit_atomic_buffer_setup(rctx, false, combined_atomics, atomic_used_mask); + } + if (rctx->b.chip_class == CAYMAN) { /* Copied from radeonsi. */ unsigned primgroup_size = 128; /* recommended without a GS */ @@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, bool include_draw_vbo) { - r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo); + r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo, 0); } /* keep this at the end of this file, please */ |