summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJerome Glisse <[email protected]>2012-10-26 18:59:05 -0400
committerJerome Glisse <[email protected]>2012-10-31 10:49:15 -0400
commit470952f751d1327831c638ee369b7f0f2e20e6fb (patch)
treeecc4744c1ceb0186c12ca48297505564fb24b907
parent183e122bdfe27f875c3c121964484dae9587c051 (diff)
r600g: avoid shader needing too many gpr to lockup the gpu v2
On r6xx/r7xx shader resource management need to make sure that the shader does not goes over the gpr register limit. Each specific asic has a maxmimum register that can be split btw shader stage. For each stage the shader must not use more register than the limit programmed. v2: Print an error message when discarding draw. Don't add another boolean to context structure, but rather propagate the discard boolean through the call chain. Signed-off-by: Jerome Glisse <[email protected]>
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h2
-rw-r--r--src/gallium/drivers/r600/r600_state.c67
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c27
3 files changed, 62 insertions, 34 deletions
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 238ab1676f4..342ab87052d 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -620,7 +620,7 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx);
void *r600_create_resolve_blend(struct r600_context *rctx);
void *r700_create_resolve_blend(struct r600_context *rctx);
void *r600_create_decompress_blend(struct r600_context *rctx);
-void r600_adjust_gprs(struct r600_context *rctx);
+bool r600_adjust_gprs(struct r600_context *rctx);
boolean r600_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index f3af5687258..5c52f3d518e 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2186,36 +2186,61 @@ void r600_init_state_functions(struct r600_context *rctx)
}
/* Adjust GPR allocation on R6xx/R7xx */
-void r600_adjust_gprs(struct r600_context *rctx)
+bool r600_adjust_gprs(struct r600_context *rctx)
{
- unsigned num_ps_gprs = rctx->default_ps_gprs;
- unsigned num_vs_gprs = rctx->default_vs_gprs;
+ unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
+ unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
+ unsigned new_num_ps_gprs = num_ps_gprs;
+ unsigned new_num_vs_gprs = num_vs_gprs;
+ unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+ unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+ unsigned def_num_ps_gprs = rctx->default_ps_gprs;
+ unsigned def_num_vs_gprs = rctx->default_vs_gprs;
+ unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
+ /* hardware will reserve twice num_clause_temp_gprs */
+ unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2;
unsigned tmp;
- int diff;
- if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) {
- diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs;
- num_vs_gprs -= diff;
- num_ps_gprs += diff;
- }
-
- if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs)
- {
- diff = rctx->vs_shader->current->shader.bc.ngpr - rctx->default_vs_gprs;
- num_ps_gprs -= diff;
- num_vs_gprs += diff;
+ /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */
+ if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs) {
+ /* try to use switch back to default */
+ if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs) {
+ /* always privilege vs stage so that at worst we have the
+ * pixel stage producing wrong output (not the vertex
+ * stage) */
+ new_num_ps_gprs = max_gprs - (new_num_vs_gprs + def_num_clause_temp_gprs * 2);
+ new_num_vs_gprs = num_vs_gprs;
+ } else {
+ new_num_ps_gprs = def_num_ps_gprs;
+ new_num_vs_gprs = def_num_vs_gprs;
+ }
+ } else {
+ return true;
}
- tmp = 0;
- tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
- tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
- tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs);
-
- if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) {
+ /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
+ * SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup
+ * Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS
+ * it will lockup. So in this case just discard the draw command
+ * and don't change the current gprs repartitions.
+ */
+ if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) {
+ R600_ERR("ps & vs shader require too many register (%d + %d) "
+ "for a combined maximum of %d\n",
+ num_ps_gprs, num_vs_gprs, max_gprs);
+ return false;
+ }
+
+ /* in some case we endup recomputing the current value */
+ tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) |
+ S_008C04_NUM_VS_GPRS(new_num_vs_gprs) |
+ S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
+ if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) {
rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
rctx->config_state.atom.dirty = true;
rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
}
+ return true;
}
void r600_init_atom_start_cs(struct r600_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 3503c3964a6..7cd84bcac81 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -756,10 +756,6 @@ static int r600_shader_select(struct pipe_context *ctx,
shader->next_variant = sel->current;
sel->current = shader;
- if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
- r600_adjust_gprs(rctx);
- }
-
if (rctx->ps_shader &&
rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
@@ -815,9 +811,6 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
rctx->cb_misc_state.multiwrite = multiwrite;
rctx->cb_misc_state.atom.dirty = true;
}
-
- if (rctx->vs_shader)
- r600_adjust_gprs(rctx);
}
if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
@@ -840,9 +833,6 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
if (state) {
r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);
- if (rctx->chip_class < EVERGREEN && rctx->ps_shader)
- r600_adjust_gprs(rctx);
-
/* Update clip misc state. */
if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
@@ -1033,7 +1023,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
rctx->sample_mask.atom.dirty = true;
}
-static void r600_update_derived_state(struct r600_context *rctx)
+static bool r600_update_derived_state(struct r600_context *rctx)
{
struct pipe_context * ctx = (struct pipe_context*)rctx;
unsigned ps_dirty = 0;
@@ -1071,6 +1061,13 @@ static void r600_update_derived_state(struct r600_context *rctx)
if (ps_dirty)
r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
+ if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
+ if (!r600_adjust_gprs(rctx)) {
+ /* discard rendering */
+ return false;
+ }
+ }
+
blend_disable = (rctx->dual_src_blend &&
rctx->ps_shader->current->nr_ps_color_outputs < 2);
@@ -1080,6 +1077,7 @@ static void r600_update_derived_state(struct r600_context *rctx)
rctx->blend_state.cso,
blend_disable);
}
+ return true;
}
static unsigned r600_conv_prim_to_gs_out(unsigned mode)
@@ -1138,7 +1136,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
return;
}
- r600_update_derived_state(rctx);
+ if (!r600_update_derived_state(rctx)) {
+ /* useless to render because current rendering command
+ * can't be achieved
+ */
+ return;
+ }
if (info.indexed) {
/* Initialize the index buffer struct. */