diff options
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 63 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 33 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 53 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 36 |
7 files changed, 116 insertions, 82 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index acf91ba43f3..35333120654 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -583,7 +583,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) /* since all required registers are initialised in the * start_compute_cs_cmd atom, we can EMIT_EARLY here. */ - r600_init_command_buffer(cb, 256, EMIT_EARLY); + r600_init_command_buffer(ctx, cb, 1, 256); cb->pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; switch (ctx->family) { diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index bda8ed5dc2d..9a5183ec6be 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2161,27 +2161,50 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom void evergreen_init_state_functions(struct r600_context *rctx) { - r600_init_atom(&rctx->cb_misc_state.atom, evergreen_emit_cb_misc_state, 0, 0); - r600_atom_dirty(rctx, &rctx->cb_misc_state.atom); - r600_init_atom(&rctx->db_misc_state.atom, evergreen_emit_db_misc_state, 7, 0); - r600_atom_dirty(rctx, &rctx->db_misc_state.atom); - r600_init_atom(&rctx->vertex_buffer_state.atom, evergreen_fs_emit_vertex_buffers, 0, 0); - r600_init_atom(&rctx->cs_vertex_buffer_state.atom, evergreen_cs_emit_vertex_buffers, 0, 0); - r600_init_atom(&rctx->vs_constbuf_state.atom, evergreen_emit_vs_constant_buffers, 0, 0); - r600_init_atom(&rctx->ps_constbuf_state.atom, evergreen_emit_ps_constant_buffers, 0, 0); - r600_init_atom(&rctx->vs_samplers.views.atom, evergreen_emit_vs_sampler_views, 0, 0); - r600_init_atom(&rctx->ps_samplers.views.atom, evergreen_emit_ps_sampler_views, 0, 0); - r600_init_atom(&rctx->cs_shader_state.atom, evergreen_emit_cs_shader, 0, 0); - r600_init_atom(&rctx->vs_samplers.atom_sampler, evergreen_emit_vs_sampler, 0, 0); - r600_init_atom(&rctx->ps_samplers.atom_sampler, evergreen_emit_ps_sampler, 0, 0); - - if (rctx->chip_class == EVERGREEN) - r600_init_atom(&rctx->sample_mask.atom, evergreen_emit_sample_mask, 3, 0); - else - r600_init_atom(&rctx->sample_mask.atom, cayman_emit_sample_mask, 4, 0); + unsigned id = 4; + + /* !!! + * To avoid GPU lockup registers must be emited in a specific order + * (no kidding ...). The order below is important and have been + * partialy infered from analyzing fglrx command stream. + * + * Don't reorder atom without carefully checking the effect (GPU lockup + * or piglit regression). + * !!! + */ + + /* shader const */ + r600_init_atom(rctx, &rctx->vs_constbuf_state.atom, id++, evergreen_emit_vs_constant_buffers, 0); + r600_init_atom(rctx, &rctx->ps_constbuf_state.atom, id++, evergreen_emit_ps_constant_buffers, 0); + /* shader program */ + r600_init_atom(rctx, &rctx->cs_shader_state.atom, id++, evergreen_emit_cs_shader, 0); + /* sampler */ + r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, evergreen_emit_vs_sampler, 0); + r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, evergreen_emit_ps_sampler, 0); + /* resources */ + r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0); + r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0); + r600_init_atom(rctx, &rctx->vs_samplers.views.atom, id++, evergreen_emit_vs_sampler_views, 0); + r600_init_atom(rctx, &rctx->ps_samplers.views.atom, id++, evergreen_emit_ps_sampler_views, 0); + + if (rctx->chip_class == EVERGREEN) { + r600_init_atom(rctx, &rctx->sample_mask.atom, id++, evergreen_emit_sample_mask, 3); + } else { + r600_init_atom(rctx, &rctx->sample_mask.atom, id++, cayman_emit_sample_mask, 4); + } rctx->sample_mask.sample_mask = ~0; r600_atom_dirty(rctx, &rctx->sample_mask.atom); + r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, evergreen_emit_cb_misc_state, 0); + r600_atom_dirty(rctx, &rctx->cb_misc_state.atom); + + r600_init_atom(rctx, &rctx->alphatest_state.atom, id++, r600_emit_alphatest_state, 6); + r600_atom_dirty(rctx, &rctx->alphatest_state.atom); + + r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 7); + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); + + rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; rctx->context.create_fs_state = r600_create_shader_state_ps; @@ -2230,7 +2253,7 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) { struct r600_command_buffer *cb = &rctx->start_cs_cmd; - r600_init_command_buffer(cb, 256, EMIT_EARLY); + r600_init_command_buffer(rctx, cb, 0, 256); /* This must be first. */ r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); @@ -2608,7 +2631,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx) return; } - r600_init_command_buffer(cb, 256, EMIT_EARLY); + r600_init_command_buffer(rctx, cb, 0, 256); /* This must be first. */ r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 7b7b6b1ffac..0ec13e5bade 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -633,15 +633,17 @@ out_err: void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in) { - struct r600_atom *state; - /* The number of dwords we already used in the CS so far. */ num_dw += ctx->cs->cdw; if (count_draw_in) { + unsigned i; + /* The number of dwords all the dirty states would take. */ - LIST_FOR_EACH_ENTRY(state, &ctx->dirty_states, head) { - num_dw += state->num_dw; + for (i = 0; i < R600_MAX_ATOM; i++) { + if (ctx->atoms[i] && ctx->atoms[i]->dirty) { + num_dw += ctx->atoms[i]->num_dw; + } } num_dw += ctx->pm4_dirty_cdwords; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 286c676f85b..9e6c28d523b 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -224,7 +224,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->family = rscreen->family; rctx->chip_class = rscreen->chip_class; - LIST_INITHEAD(&rctx->dirty_states); LIST_INITHEAD(&rctx->active_timer_queries); LIST_INITHEAD(&rctx->active_nontimer_queries); LIST_INITHEAD(&rctx->dirty); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 721334d1e84..ff720e95cfe 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -35,6 +35,8 @@ #include "r600_resource.h" #include "evergreen_compute.h" +#define R600_MAX_ATOM 17 + #define R600_MAX_CONST_BUFFERS 2 #define R600_MAX_CONST_BUFFER_SIZE 4096 @@ -44,23 +46,14 @@ #define R600_BIG_ENDIAN 0 #endif -enum r600_atom_flags { - /* When set, atoms are added at the beginning of the dirty list - * instead of the end. */ - EMIT_EARLY = (1 << 0) -}; - /* This encapsulates a state or an operation which can emitted into the GPU * command stream. It's not limited to states only, it can be used for anything * that wants to write commands into the CS (e.g. cache flushes). */ struct r600_atom { void (*emit)(struct r600_context *ctx, struct r600_atom *state); - + unsigned id; unsigned num_dw; - enum r600_atom_flags flags; bool dirty; - - struct list_head head; }; /* This is an atom containing GPU commands that never change. @@ -372,8 +365,8 @@ struct r600_context { unsigned default_ps_gprs, default_vs_gprs; /* States based on r600_atom. */ - struct list_head dirty_states; struct r600_command_buffer start_cs_cmd; /* invariant state mostly */ + struct r600_atom *atoms[R600_MAX_ATOM]; /** Compute specific registers initializations. The start_cs_cmd atom * must be emitted before start_compute_cs_cmd. */ struct r600_command_buffer start_compute_cs_cmd; @@ -464,20 +457,11 @@ static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *a { atom->emit(rctx, atom); atom->dirty = false; - if (atom->head.next && atom->head.prev) - LIST_DELINIT(&atom->head); } static INLINE void r600_atom_dirty(struct r600_context *rctx, struct r600_atom *state) { - if (!state->dirty) { - if (state->flags & EMIT_EARLY) { - LIST_ADD(&state->head, &rctx->dirty_states); - } else { - LIST_ADDTAIL(&state->head, &rctx->dirty_states); - } - state->dirty = true; - } + state->dirty = true; } /* evergreen_state.c */ @@ -587,9 +571,10 @@ void r600_translate_index_buffer(struct r600_context *r600, unsigned count); /* r600_state_common.c */ -void r600_init_atom(struct r600_atom *atom, +void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom); +void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id, void (*emit)(struct r600_context *ctx, struct r600_atom *state), - unsigned num_dw, enum r600_atom_flags flags); + unsigned num_dw); void r600_init_common_atoms(struct r600_context *rctx); unsigned r600_get_cb_flush_flags(struct r600_context *rctx); void r600_texture_barrier(struct pipe_context *ctx); @@ -772,7 +757,7 @@ static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned r600_store_value(cb, value); } -void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw, enum r600_atom_flags flags); +void r600_init_command_buffer(struct r600_context *rctx, struct r600_command_buffer *cb, unsigned id, unsigned num_dw); void r600_release_command_buffer(struct r600_command_buffer *cb); /* diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 53bbdd9da7d..ccafdc6733b 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2031,27 +2031,48 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a void r600_init_state_functions(struct r600_context *rctx) { - r600_init_atom(&rctx->seamless_cube_map.atom, r600_emit_seamless_cube_map, 3, 0); - r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom); - r600_init_atom(&rctx->cb_misc_state.atom, r600_emit_cb_misc_state, 0, 0); - r600_atom_dirty(rctx, &rctx->cb_misc_state.atom); - r600_init_atom(&rctx->db_misc_state.atom, r600_emit_db_misc_state, 4, 0); - r600_atom_dirty(rctx, &rctx->db_misc_state.atom); - r600_init_atom(&rctx->vertex_buffer_state.atom, r600_emit_vertex_buffers, 0, 0); - r600_init_atom(&rctx->vs_constbuf_state.atom, r600_emit_vs_constant_buffers, 0, 0); - r600_init_atom(&rctx->ps_constbuf_state.atom, r600_emit_ps_constant_buffers, 0, 0); - r600_init_atom(&rctx->vs_samplers.views.atom, r600_emit_vs_sampler_views, 0, 0); - r600_init_atom(&rctx->ps_samplers.views.atom, r600_emit_ps_sampler_views, 0, 0); + unsigned id = 4; + + /* !!! + * To avoid GPU lockup registers must be emited in a specific order + * (no kidding ...). The order below is important and have been + * partialy infered from analyzing fglrx command stream. + * + * Don't reorder atom without carefully checking the effect (GPU lockup + * or piglit regression). + * !!! + */ + + /* shader const */ + r600_init_atom(rctx, &rctx->vs_constbuf_state.atom, id++, r600_emit_vs_constant_buffers, 0); + r600_init_atom(rctx, &rctx->ps_constbuf_state.atom, id++, r600_emit_ps_constant_buffers, 0); + /* sampler must be emited before TA_CNTL_AUX otherwise DISABLE_CUBE_WRAP change - * does not take effect + * does not take effect (TA_CNTL_AUX emited by r600_emit_seamless_cube_map) */ - r600_init_atom(&rctx->vs_samplers.atom_sampler, r600_emit_vs_sampler, 0, EMIT_EARLY); - r600_init_atom(&rctx->ps_samplers.atom_sampler, r600_emit_ps_sampler, 0, EMIT_EARLY); + r600_init_atom(rctx, &rctx->vs_samplers.atom_sampler, id++, r600_emit_vs_sampler, 0); + r600_init_atom(rctx, &rctx->ps_samplers.atom_sampler, id++, r600_emit_ps_sampler, 0); + /* resource */ + r600_init_atom(rctx, &rctx->vs_samplers.views.atom, id++, r600_emit_vs_sampler_views, 0); + r600_init_atom(rctx, &rctx->ps_samplers.views.atom, id++, r600_emit_ps_sampler_views, 0); + r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, r600_emit_vertex_buffers, 0); + + r600_init_atom(rctx, &rctx->seamless_cube_map.atom, id++, r600_emit_seamless_cube_map, 3); + r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom); - r600_init_atom(&rctx->sample_mask.atom, r600_emit_sample_mask, 3, 0); + r600_init_atom(rctx, &rctx->sample_mask.atom, id++, r600_emit_sample_mask, 3); rctx->sample_mask.sample_mask = ~0; r600_atom_dirty(rctx, &rctx->sample_mask.atom); + r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, r600_emit_cb_misc_state, 0); + r600_atom_dirty(rctx, &rctx->cb_misc_state.atom); + + r600_init_atom(rctx, &rctx->alphatest_state.atom, id++, r600_emit_alphatest_state, 6); + r600_atom_dirty(rctx, &rctx->alphatest_state.atom); + + r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, r600_emit_db_misc_state, 4); + r600_atom_dirty(rctx, &rctx->db_misc_state.atom); + rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; rctx->context.create_fs_state = r600_create_shader_state_ps; @@ -2157,7 +2178,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx) struct r600_command_buffer *cb = &rctx->start_cs_cmd; uint32_t tmp; - r600_init_command_buffer(cb, 256, EMIT_EARLY); + r600_init_command_buffer(rctx, cb, 0, 256); /* R6xx requires this packet at the start of each command buffer */ if (rctx->chip_class == R600) { diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 26af6f609a5..e67eba8921c 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -44,11 +44,9 @@ static void r600_emit_command_buffer(struct r600_context *rctx, struct r600_atom cs->cdw += cb->atom.num_dw; } -void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw, enum r600_atom_flags flags) +void r600_init_command_buffer(struct r600_context *rctx, struct r600_command_buffer *cb, unsigned id, unsigned num_dw) { - cb->atom.emit = r600_emit_command_buffer; - cb->atom.num_dw = 0; - cb->atom.flags = flags; + r600_init_atom(rctx, &cb->atom, id, r600_emit_command_buffer, 0); cb->buf = CALLOC(1, 4 * num_dw); cb->max_num_dw = num_dw; } @@ -79,16 +77,22 @@ static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct r600_ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); } -void r600_init_atom(struct r600_atom *atom, +void r600_init_atom(struct r600_context *rctx, + struct r600_atom *atom, + unsigned id, void (*emit)(struct r600_context *ctx, struct r600_atom *state), - unsigned num_dw, enum r600_atom_flags flags) + unsigned num_dw) { + assert(id < R600_MAX_ATOM); + assert(rctx->atoms[id] == NULL); + rctx->atoms[id] = atom; + atom->id = id; atom->emit = emit; atom->num_dw = num_dw; - atom->flags = flags; + atom->dirty = false; } -static void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom) +void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = rctx->cs; struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom; @@ -106,10 +110,8 @@ static void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_ato void r600_init_common_atoms(struct r600_context *rctx) { - r600_init_atom(&rctx->surface_sync_cmd.atom, r600_emit_surface_sync, 5, EMIT_EARLY); - r600_init_atom(&rctx->r6xx_flush_and_inv_cmd, r600_emit_r6xx_flush_and_inv, 2, EMIT_EARLY); - r600_init_atom(&rctx->alphatest_state.atom, r600_emit_alphatest_state, 6, 0); - r600_atom_dirty(rctx, &rctx->alphatest_state.atom); + r600_init_atom(rctx, &rctx->r6xx_flush_and_inv_cmd, 2, r600_emit_r6xx_flush_and_inv, 2); + r600_init_atom(rctx, &rctx->surface_sync_cmd.atom, 3, r600_emit_surface_sync, 5); } unsigned r600_get_cb_flush_flags(struct r600_context *rctx) @@ -1127,9 +1129,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) struct r600_context *rctx = (struct r600_context *)ctx; struct pipe_draw_info info = *dinfo; struct pipe_index_buffer ib = {}; - unsigned prim, ls_mask = 0; + unsigned prim, ls_mask = 0, i; struct r600_block *dirty_block = NULL, *next_block = NULL; - struct r600_atom *state = NULL, *next_state = NULL; struct radeon_winsys_cs *cs = rctx->cs; uint64_t va; uint8_t *ptr; @@ -1221,8 +1222,11 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) /* Emit states (the function expects that we emit at most 17 dwords here). */ r600_need_cs_space(rctx, 0, TRUE); - LIST_FOR_EACH_ENTRY_SAFE(state, next_state, &rctx->dirty_states, head) { - r600_emit_atom(rctx, state); + for (i = 0; i < R600_MAX_ATOM; i++) { + if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) { + continue; + } + r600_emit_atom(rctx, rctx->atoms[i]); } LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &rctx->dirty,list) { r600_context_block_emit_dirty(rctx, dirty_block, 0 /* pkt_flags */); |