summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c11
-rw-r--r--src/gallium/drivers/r600/evergreen_hw_context.c2
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c38
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c7
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h14
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c13
6 files changed, 54 insertions, 31 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 90eae1e2829..a77f58242e3 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context *rctx,
rctx->cmd_buf_is_compute = true;
}
- r600_need_cs_space(rctx, 0, true);
if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) {
r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty);
current = rctx->cs_shader_state.shader->sel->current;
@@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context *rctx,
}
rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0;
rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
+
+ evergreen_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, &atomic_used_mask);
+ r600_need_cs_space(rctx, 0, true, util_bitcount(atomic_used_mask));
+
if (need_buf_const) {
eg_setup_buffer_constants(rctx, PIPE_SHADER_COMPUTE);
}
r600_update_driver_const_buffers(rctx, true);
- if (evergreen_emit_atomic_buffer_setup(rctx, current, combined_atomics, &atomic_used_mask)) {
+ evergreen_emit_atomic_buffer_setup(rctx, true, combined_atomics, atomic_used_mask);
+ if (atomic_used_mask) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}
- }
+ } else
+ r600_need_cs_space(rctx, 0, true, 0);
/* Initialize all the compute-related registers.
*
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index d3f3e227c1f..5e0e27b0f16 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
r600_need_cs_space(rctx,
10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
- R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
+ R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);
/* Flush the caches for the first copy only. */
if (rctx->b.flags) {
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 57e81e30c27..cc41e114369 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -4030,7 +4030,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
if (!buffers || !buffers[idx].buffer) {
pipe_resource_reference(&abuf->buffer, NULL);
- astate->enabled_mask &= ~(1 << i);
continue;
}
buf = &buffers[idx];
@@ -4038,7 +4037,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
pipe_resource_reference(&abuf->buffer, buf->buffer);
abuf->buffer_offset = buf->buffer_offset;
abuf->buffer_size = buf->buffer_size;
- astate->enabled_mask |= (1 << i);
}
}
@@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct r600_context *rctx,
radeon_emit(cs, reloc);
}
-bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
- struct r600_pipe_shader *cs_shader,
- struct r600_shader_atomic *combined_atomics,
- uint8_t *atomic_used_mask_p)
+void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
+ struct r600_pipe_shader *cs_shader,
+ struct r600_shader_atomic *combined_atomics,
+ uint8_t *atomic_used_mask_p)
{
- struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
- unsigned pkt_flags = 0;
uint8_t atomic_used_mask = 0;
int i, j, k;
bool is_compute = cs_shader ? true : false;
- if (is_compute)
- pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
-
for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
uint8_t num_atomic_stage;
struct r600_pipe_shader *pshader;
@@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
}
}
}
+ *atomic_used_mask_p = atomic_used_mask;
+}
+
+void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+ bool is_compute,
+ struct r600_shader_atomic *combined_atomics,
+ uint8_t atomic_used_mask)
+{
+ struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
+ unsigned pkt_flags = 0;
+ uint32_t mask;
+
+ if (is_compute)
+ pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
+
+ mask = atomic_used_mask;
+ if (!mask)
+ return;
- uint32_t mask = atomic_used_mask;
while (mask) {
unsigned atomic_index = u_bit_scan(&mask);
struct r600_shader_atomic *atomic = &combined_atomics[atomic_index];
@@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
else
evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);
}
- *atomic_used_mask_p = atomic_used_mask;
- return true;
}
void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
@@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
uint32_t pkt_flags = 0;
uint32_t event = EVENT_TYPE_PS_DONE;
- uint32_t mask = astate->enabled_mask;
+ uint32_t mask;
uint64_t dst_offset;
unsigned reloc;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 1cfc180ad6c..a2f5f637b20 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -31,7 +31,7 @@
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
- boolean count_draw_in)
+ boolean count_draw_in, unsigned num_atomics)
{
/* Flush the DMA IB if it's not empty. */
if (radeon_emitted(ctx->b.dma.cs, 0))
@@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
}
+ /* add atomic counters, 8 pre + 8 post per counter + 16 post if any counters */
+ num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);
+
/* Count in r600_suspend_queries. */
num_dw += ctx->b.num_cs_dw_queries_suspend;
@@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
r600_need_cs_space(rctx,
10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
- 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
+ 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);
/* Flush the caches for the first copy only. */
if (rctx->b.flags) {
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 6204e3c557b..239005cab7f 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -446,8 +446,6 @@ struct r600_shader_state {
};
struct r600_atomic_buffer_state {
- uint32_t enabled_mask;
- uint32_t dirty_mask;
struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS];
};
@@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
struct pipe_fence_handle **fence);
void r600_begin_new_cs(struct r600_context *ctx);
void r600_flush_emit(struct r600_context *ctx);
-void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
+void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics);
void r600_emit_pfp_sync_me(struct r600_context *rctx);
void r600_cp_dma_copy_buffer(struct r600_context *rctx,
struct pipe_resource *dst, uint64_t dst_offset,
@@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx,
struct r600_pipe_shader_selector *sel);
struct r600_shader_atomic;
-bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
- struct r600_pipe_shader *cs_shader,
+void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
+ struct r600_pipe_shader *cs_shader,
+ struct r600_shader_atomic *combined_atomics,
+ uint8_t *atomic_used_mask_p);
+void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+ bool is_compute,
struct r600_shader_atomic *combined_atomics,
- uint8_t *atomic_used_mask_p);
+ uint8_t atomic_used_mask);
void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
bool is_compute,
struct r600_shader_atomic *combined_atomics,
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 402d95838f0..e6c1b0be97c 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
: (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
: info->mode;
- if (rctx->b.chip_class >= EVERGREEN)
- evergreen_emit_atomic_buffer_setup(rctx, NULL, combined_atomics, &atomic_used_mask);
+ if (rctx->b.chip_class >= EVERGREEN) {
+ evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask);
+ }
if (index_size) {
index_offset += info->start * index_size;
@@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
evergreen_setup_tess_constants(rctx, info, &num_patches);
/* Emit states. */
- r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE);
+ r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, util_bitcount(atomic_used_mask));
r600_flush_emit(rctx);
mask = rctx->dirty_atoms;
@@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
}
+ if (rctx->b.chip_class >= EVERGREEN) {
+ evergreen_emit_atomic_buffer_setup(rctx, false, combined_atomics, atomic_used_mask);
+ }
+
if (rctx->b.chip_class == CAYMAN) {
/* Copied from radeonsi. */
unsigned primgroup_size = 128; /* recommended without a GS */
@@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable
static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo)
{
- r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo);
+ r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo, 0);
}
/* keep this at the end of this file, please */