summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2015-11-30 13:15:57 +1000
committerDave Airlie <[email protected]>2015-12-07 09:58:59 +1000
commitbb2b8778cbf7ca6263bf1540708900620a56ace4 (patch)
tree4fc63f99bdb503a81cd85ec57b93f5da21dc7268
parentd1b90839c07c34d3390d393df3b78c13d0b06684 (diff)
r600: make adjust_gprs use hw stages.
This changes the r600 specific GPR adjustment code to use the stage defines, and arrays. This is prep work for the tess changes later. Reviewed-by: Oded Gabbay <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h2
-rw-r--r--src/gallium/drivers/r600/r600_state.c117
2 files changed, 64 insertions, 55 deletions
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 5b5eaacf683..e248b372f03 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -432,7 +432,7 @@ struct r600_context {
/* Hardware info. */
boolean has_vertex_cache;
boolean keep_tiling_flags;
- unsigned default_ps_gprs, default_vs_gprs;
+ unsigned default_gprs[EG_NUM_HW_STAGES];
unsigned r6xx_num_clause_temp_gprs;
/* Miscellaneous state objects. */
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index b11cfeab17e..c7e0007e600 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2044,57 +2044,62 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
/* Adjust GPR allocation on R6xx/R7xx */
bool r600_adjust_gprs(struct r600_context *rctx)
{
- unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
- unsigned num_vs_gprs, num_es_gprs, num_gs_gprs;
- unsigned new_num_ps_gprs = num_ps_gprs;
- unsigned new_num_vs_gprs, new_num_es_gprs, new_num_gs_gprs;
- unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
- unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
- unsigned cur_num_gs_gprs = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
- unsigned cur_num_es_gprs = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
- unsigned def_num_ps_gprs = rctx->default_ps_gprs;
- unsigned def_num_vs_gprs = rctx->default_vs_gprs;
- unsigned def_num_gs_gprs = 0;
- unsigned def_num_es_gprs = 0;
+ unsigned num_gprs[R600_NUM_HW_STAGES];
+ unsigned new_gprs[R600_NUM_HW_STAGES];
+ unsigned cur_gprs[R600_NUM_HW_STAGES];
+ unsigned def_gprs[R600_NUM_HW_STAGES];
unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
- /* hardware will reserve twice num_clause_temp_gprs */
- unsigned max_gprs = def_num_gs_gprs + def_num_es_gprs + def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2;
+ unsigned max_gprs;
unsigned tmp, tmp2;
+ unsigned i;
+ bool need_recalc = false, use_default = true;
+
+ /* hardware will reserve twice num_clause_temp_gprs */
+ max_gprs = def_num_clause_temp_gprs * 2;
+ for (i = 0; i < R600_NUM_HW_STAGES; i++) {
+ def_gprs[i] = rctx->default_gprs[i];
+ max_gprs += def_gprs[i];
+ }
+ cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+ cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+ cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+ cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+
+ num_gprs[R600_HW_STAGE_PS] = rctx->ps_shader->current->shader.bc.ngpr;
if (rctx->gs_shader) {
- num_es_gprs = rctx->vs_shader->current->shader.bc.ngpr;
- num_gs_gprs = rctx->gs_shader->current->shader.bc.ngpr;
- num_vs_gprs = rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr;
+ num_gprs[R600_HW_STAGE_ES] = rctx->vs_shader->current->shader.bc.ngpr;
+ num_gprs[R600_HW_STAGE_GS] = rctx->gs_shader->current->shader.bc.ngpr;
+ num_gprs[R600_HW_STAGE_VS] = rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr;
} else {
- num_es_gprs = 0;
- num_gs_gprs = 0;
- num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
+ num_gprs[R600_HW_STAGE_ES] = 0;
+ num_gprs[R600_HW_STAGE_GS] = 0;
+ num_gprs[R600_HW_STAGE_VS] = rctx->vs_shader->current->shader.bc.ngpr;
+ }
+
+ for (i = 0; i < R600_NUM_HW_STAGES; i++) {
+ new_gprs[i] = num_gprs[i];
+ if (new_gprs[i] > cur_gprs[i])
+ need_recalc = true;
+ if (new_gprs[i] > def_gprs[i])
+ use_default = false;
}
- new_num_vs_gprs = num_vs_gprs;
- new_num_es_gprs = num_es_gprs;
- new_num_gs_gprs = num_gs_gprs;
/* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */
- if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs ||
- new_num_es_gprs > cur_num_es_gprs || new_num_gs_gprs > cur_num_gs_gprs) {
- /* try to use switch back to default */
- if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs ||
- new_num_gs_gprs > def_num_gs_gprs || new_num_es_gprs > def_num_es_gprs) {
- /* always privilege vs stage so that at worst we have the
- * pixel stage producing wrong output (not the vertex
- * stage) */
- new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
- new_num_vs_gprs = num_vs_gprs;
- new_num_gs_gprs = num_gs_gprs;
- new_num_es_gprs = num_es_gprs;
- } else {
- new_num_ps_gprs = def_num_ps_gprs;
- new_num_vs_gprs = def_num_vs_gprs;
- new_num_es_gprs = def_num_es_gprs;
- new_num_gs_gprs = def_num_gs_gprs;
- }
- } else {
+ if (!need_recalc)
return true;
+
+ /* try to use switch back to default */
+ if (!use_default) {
+ /* always privilege vs stage so that at worst we have the
+ * pixel stage producing wrong output (not the vertex
+ * stage) */
+ new_gprs[R600_HW_STAGE_PS] = max_gprs - def_num_clause_temp_gprs * 2;
+ for (i = R600_HW_STAGE_VS; i < R600_NUM_HW_STAGES; i++)
+ new_gprs[R600_HW_STAGE_PS] -= new_gprs[i];
+ } else {
+ for (i = 0; i < R600_NUM_HW_STAGES; i++)
+ new_gprs[i] = def_gprs[i];
}
/* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
@@ -2103,21 +2108,22 @@ bool r600_adjust_gprs(struct r600_context *rctx)
* it will lockup. So in this case just discard the draw command
* and don't change the current gprs repartitions.
*/
- if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs ||
- num_gs_gprs > new_num_gs_gprs || num_es_gprs > new_num_es_gprs) {
- R600_ERR("shaders require too many register (%d + %d + %d + %d) "
- "for a combined maximum of %d\n",
- num_ps_gprs, num_vs_gprs, num_es_gprs, num_gs_gprs, max_gprs);
- return false;
+ for (i = 0; i < R600_NUM_HW_STAGES; i++) {
+ if (num_gprs[i] > new_gprs[i]) {
+ R600_ERR("shaders require too many register (%d + %d + %d + %d) "
+ "for a combined maximum of %d\n",
+ num_gprs[R600_HW_STAGE_PS], num_gprs[R600_HW_STAGE_VS], num_gprs[R600_HW_STAGE_ES], num_gprs[R600_HW_STAGE_GS], max_gprs);
+ return false;
+ }
}
/* in some case we endup recomputing the current value */
- tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) |
- S_008C04_NUM_VS_GPRS(new_num_vs_gprs) |
+ tmp = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) |
+ S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) |
S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
- tmp2 = S_008C08_NUM_ES_GPRS(new_num_es_gprs) |
- S_008C08_NUM_GS_GPRS(new_num_gs_gprs);
+ tmp2 = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) |
+ S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]);
if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) {
rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2;
@@ -2286,8 +2292,11 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
break;
}
- rctx->default_ps_gprs = num_ps_gprs;
- rctx->default_vs_gprs = num_vs_gprs;
+ rctx->default_gprs[R600_HW_STAGE_PS] = num_ps_gprs;
+ rctx->default_gprs[R600_HW_STAGE_VS] = num_vs_gprs;
+ rctx->default_gprs[R600_HW_STAGE_GS] = 0;
+ rctx->default_gprs[R600_HW_STAGE_ES] = 0;
+
rctx->r6xx_num_clause_temp_gprs = num_temp_gprs;
/* SQ_CONFIG */