diff options
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_buffer.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 69 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 29 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 61 |
9 files changed, 96 insertions, 87 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 128464e101a..8b3104e40a1 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -438,10 +438,6 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, ctx->ws->buffer_wait(onebo->buf, 0); COMPUTE_DBG("...\n"); - - ctx->streamout_start = TRUE; - ctx->streamout_append_bitmask = ~0; - } diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 2e301bc86ec..b601f1430ce 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2643,6 +2643,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5); + r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0); rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 11dbb3bad37..dfd18b81a92 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -168,8 +168,6 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen unsigned offset, unsigned value); void r600_flush_emit(struct r600_context *ctx); -void r600_context_streamout_begin(struct r600_context *ctx); -void r600_context_streamout_end(struct r600_context *ctx); void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in); void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw); void r600_dma_copy(struct r600_context *rctx, diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 3e0d89c06f0..c737aa25c43 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -58,8 +58,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb); util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso); util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); - util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets, - (struct pipe_stream_output_target**)rctx->so_targets); + util_blitter_save_so_targets(rctx->blitter, rctx->streamout.num_targets, + (struct pipe_stream_output_target**)rctx->streamout.targets); util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso); if (op & R600_SAVE_FRAGMENT_STATE) { diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 89740359be0..3267c37cc6d 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -126,11 +126,13 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, } } /* Streamout buffers. */ - for (i = 0; i < rctx->num_so_targets; i++) { - if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) { - r600_context_streamout_end(rctx); - rctx->streamout_start = TRUE; - rctx->streamout_append_bitmask = ~0; + for (i = 0; i < rctx->streamout.num_targets; i++) { + if (rctx->streamout.targets[i]->b.buffer == &rbuffer->b.b) { + if (rctx->streamout.begin_emitted) { + r600_emit_streamout_end(rctx); + } + rctx->streamout.append_bitmask = rctx->streamout.enabled_mask; + r600_streamout_buffers_dirty(rctx); } } /* Constant buffers. */ diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index ec098c1f7cf..78b5b293060 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -404,7 +404,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, num_dw += ctx->num_cs_dw_nontimer_queries_suspend; /* Count in streamout_end at the end of CS. */ - num_dw += ctx->num_cs_dw_streamout_end; + if (ctx->streamout.begin_emitted) { + num_dw += ctx->streamout.num_dw_for_end; + } /* Count in render_condition(NULL) at the end of CS. */ if (ctx->predicate_drawing) { @@ -727,7 +729,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) return; ctx->nontimer_queries_suspended = false; - ctx->streamout_suspended = false; + ctx->streamout.suspended = false; /* suspend queries */ if (ctx->num_cs_dw_nontimer_queries_suspend) { @@ -735,9 +737,9 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) ctx->nontimer_queries_suspended = true; } - if (ctx->num_cs_dw_streamout_end) { - r600_context_streamout_end(ctx); - ctx->streamout_suspended = true; + if (ctx->streamout.begin_emitted) { + r600_emit_streamout_end(ctx); + ctx->streamout.suspended = true; } /* flush is needed to avoid lockups on some chips with user fences @@ -854,9 +856,9 @@ void r600_begin_new_cs(struct r600_context *ctx) r600_sampler_states_dirty(ctx, &samplers->states); } - if (ctx->streamout_suspended) { - ctx->streamout_start = TRUE; - ctx->streamout_append_bitmask = ~0; + if (ctx->streamout.suspended) { + ctx->streamout.append_bitmask = ctx->streamout.enabled_mask; + r600_streamout_buffers_dirty(ctx); } /* resume queries */ @@ -940,48 +942,23 @@ static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_ } } -void r600_context_streamout_begin(struct r600_context *ctx) +void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; - struct r600_so_target **t = ctx->so_targets; + struct r600_so_target **t = ctx->streamout.targets; unsigned *stride_in_dw = ctx->vs_shader->so.stride; - unsigned buffer_en, i, update_flags = 0; + unsigned i, update_flags = 0; uint64_t va; - unsigned num_cs_dw_streamout_end; - - buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) | - (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) | - (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) | - (ctx->num_so_targets >= 4 && t[3] ? 8 : 0); - - num_cs_dw_streamout_end = - 12 + /* flush_vgt_streamout */ - util_bitcount(buffer_en) * 8 + /* STRMOUT_BUFFER_UPDATE */ - 3 /* set_streamout_enable(0) */; - - r600_need_cs_space(ctx, - 12 + /* flush_vgt_streamout */ - 6 + /* set_streamout_enable */ - util_bitcount(buffer_en) * 7 + /* SET_CONTEXT_REG */ - (ctx->family >= CHIP_RS780 && - ctx->family <= CHIP_RV740 ? util_bitcount(buffer_en) * 5 : 0) + /* STRMOUT_BASE_UPDATE */ - util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + /* STRMOUT_BUFFER_UPDATE */ - util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + /* STRMOUT_BUFFER_UPDATE */ - (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780 ? 2 : 0) + /* SURFACE_BASE_UPDATE */ - num_cs_dw_streamout_end, TRUE); - - /* This must be set after r600_need_cs_space. */ - ctx->num_cs_dw_streamout_end = num_cs_dw_streamout_end; if (ctx->chip_class >= EVERGREEN) { evergreen_flush_vgt_streamout(ctx); - evergreen_set_streamout_enable(ctx, buffer_en); + evergreen_set_streamout_enable(ctx, ctx->streamout.enabled_mask); } else { r600_flush_vgt_streamout(ctx); - r600_set_streamout_enable(ctx, buffer_en); + r600_set_streamout_enable(ctx, ctx->streamout.enabled_mask); } - for (i = 0; i < ctx->num_so_targets; i++) { + for (i = 0; i < ctx->streamout.num_targets; i++) { if (t[i]) { t[i]->stride_in_dw = stride_in_dw[i]; t[i]->so_index = i; @@ -1014,7 +991,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) RADEON_USAGE_WRITE); } - if (ctx->streamout_append_bitmask & (1 << i)) { + if (ctx->streamout.append_bitmask & (1 << i)) { va = r600_resource_va(&ctx->screen->screen, (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset; /* Append. */ @@ -1043,16 +1020,17 @@ void r600_context_streamout_begin(struct r600_context *ctx) } } - if (ctx->family > CHIP_R600 && ctx->family < CHIP_RS780) { + if (ctx->family > CHIP_R600 && ctx->family < CHIP_RV770) { cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); cs->buf[cs->cdw++] = update_flags; } + ctx->streamout.begin_emitted = true; } -void r600_context_streamout_end(struct r600_context *ctx) +void r600_emit_streamout_end(struct r600_context *ctx) { struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; - struct r600_so_target **t = ctx->so_targets; + struct r600_so_target **t = ctx->streamout.targets; unsigned i; uint64_t va; @@ -1062,7 +1040,7 @@ void r600_context_streamout_end(struct r600_context *ctx) r600_flush_vgt_streamout(ctx); } - for (i = 0; i < ctx->num_so_targets; i++) { + for (i = 0; i < ctx->streamout.num_targets; i++) { if (t[i]) { va = r600_resource_va(&ctx->screen->screen, (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset; @@ -1079,7 +1057,6 @@ void r600_context_streamout_end(struct r600_context *ctx) cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, t[i]->buf_filled_size, RADEON_USAGE_WRITE); - } } @@ -1093,7 +1070,7 @@ void r600_context_streamout_end(struct r600_context *ctx) r600_set_streamout_enable(ctx, 0); } ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; - ctx->num_cs_dw_streamout_end = 0; + ctx->streamout.begin_emitted = false; } /* The max number of bytes to copy per packet. */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 1d0ad793943..9a84c1f92a3 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -35,7 +35,7 @@ #include "r600_resource.h" #include "evergreen_compute.h" -#define R600_NUM_ATOMS 37 +#define R600_NUM_ATOMS 38 #define R600_TRACE_CS 0 @@ -416,6 +416,19 @@ struct r600_fetch_shader { unsigned offset; }; +struct r600_streamout { + struct r600_atom begin_atom; + bool begin_emitted; + unsigned num_dw_for_end; + + unsigned enabled_mask; + unsigned num_targets; + struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS]; + + unsigned append_bitmask; + bool suspended; +}; + struct r600_ring { struct radeon_winsys_cs *cs; bool flushing; @@ -504,6 +517,7 @@ struct r600_context { struct r600_vertexbuf_state vertex_buffer_state; /** Vertex buffers for compute shaders */ struct r600_vertexbuf_state cs_vertex_buffer_state; + struct r600_streamout streamout; /* Additional context states. */ unsigned flags; @@ -539,14 +553,6 @@ struct r600_context { unsigned current_render_cond_mode; boolean predicate_drawing; - /* Streamout state. */ - unsigned num_cs_dw_streamout_end; - unsigned num_so_targets; - struct r600_so_target *so_targets[PIPE_MAX_SO_BUFFERS]; - boolean streamout_start; - unsigned streamout_append_bitmask; - bool streamout_suspended; - /* Deprecated state management. */ struct r600_range *range; unsigned nblocks; @@ -718,6 +724,10 @@ unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean vtx); +/* r600_hw_context.c */ +void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom); +void r600_emit_streamout_end(struct r600_context *ctx); + /* r600_state_common.c */ void r600_init_common_state_functions(struct r600_context *rctx); void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom); @@ -737,6 +747,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx, void r600_sampler_states_dirty(struct r600_context *rctx, struct r600_sampler_states *state); void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state); +void r600_streamout_buffers_dirty(struct r600_context *rctx); void r600_draw_rectangle(struct blitter_context *blitter, int x1, int y1, int x2, int y2, float depth, enum blitter_attrib_type type, const union pipe_color_union *attrib); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 2d3ec93b282..6f49eed4376 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2331,6 +2331,7 @@ void r600_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5); + r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0); rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index fae28bc9511..bc119787003 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1014,31 +1014,60 @@ static void r600_so_target_destroy(struct pipe_context *ctx, FREE(t); } -static void r600_set_so_targets(struct pipe_context *ctx, - unsigned num_targets, - struct pipe_stream_output_target **targets, - unsigned append_bitmask) +void r600_streamout_buffers_dirty(struct r600_context *rctx) +{ + rctx->streamout.num_dw_for_end = + 12 + /* flush_vgt_streamout */ + util_bitcount(rctx->streamout.enabled_mask) * 8 + /* STRMOUT_BUFFER_UPDATE */ + 3 /* set_streamout_enable(0) */; + + rctx->streamout.begin_atom.num_dw = + 12 + /* flush_vgt_streamout */ + 6 + /* set_streamout_enable */ + util_bitcount(rctx->streamout.enabled_mask) * 7 + /* SET_CONTEXT_REG */ + (rctx->family >= CHIP_RS780 && + rctx->family <= CHIP_RV740 ? util_bitcount(rctx->streamout.enabled_mask) * 5 : 0) + /* STRMOUT_BASE_UPDATE */ + util_bitcount(rctx->streamout.enabled_mask & rctx->streamout.append_bitmask) * 8 + /* STRMOUT_BUFFER_UPDATE */ + util_bitcount(rctx->streamout.enabled_mask & ~rctx->streamout.append_bitmask) * 6 + /* STRMOUT_BUFFER_UPDATE */ + (rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0) + /* SURFACE_BASE_UPDATE */ + rctx->streamout.num_dw_for_end; + + rctx->streamout.begin_atom.dirty = true; +} + +static void r600_set_streamout_targets(struct pipe_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask) { struct r600_context *rctx = (struct r600_context *)ctx; unsigned i; /* Stop streamout. */ - if (rctx->num_so_targets && !rctx->streamout_start) { - r600_context_streamout_end(rctx); + if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) { + r600_emit_streamout_end(rctx); } /* Set the new targets. */ for (i = 0; i < num_targets; i++) { - pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]); + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]); r600_context_add_resource_size(ctx, targets[i]->buffer); } - for (; i < rctx->num_so_targets; i++) { - pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL); + for (; i < rctx->streamout.num_targets; i++) { + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL); } - rctx->num_so_targets = num_targets; - rctx->streamout_start = num_targets != 0; - rctx->streamout_append_bitmask = append_bitmask; + rctx->streamout.enabled_mask = (num_targets >= 1 && targets[0] ? 1 : 0) | + (num_targets >= 2 && targets[1] ? 2 : 0) | + (num_targets >= 3 && targets[2] ? 4 : 0) | + (num_targets >= 4 && targets[3] ? 8 : 0); + + rctx->streamout.num_targets = num_targets; + rctx->streamout.append_bitmask = append_bitmask; + + if (num_targets) { + r600_streamout_buffers_dirty(rctx); + } } static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) @@ -1352,12 +1381,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info info.index_bias = info.start; } - /* Enable stream out if needed. */ - if (rctx->streamout_start) { - r600_context_streamout_begin(rctx); - rctx->streamout_start = FALSE; - } - /* Set the index offset and multi primitive */ if (rctx->vgt2_state.vgt_indx_offset != info.index_bias) { rctx->vgt2_state.vgt_indx_offset = info.index_bias; @@ -1764,7 +1787,7 @@ void r600_init_common_state_functions(struct r600_context *rctx) rctx->context.texture_barrier = r600_texture_barrier; rctx->context.create_stream_output_target = r600_create_so_target; rctx->context.stream_output_target_destroy = r600_so_target_destroy; - rctx->context.set_stream_output_targets = r600_set_so_targets; + rctx->context.set_stream_output_targets = r600_set_streamout_targets; rctx->context.draw_vbo = r600_draw_vbo; } |