From 476e9cee1d0cbe321c401277214e6c36ce5b18c9 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 30 Jun 2016 10:57:34 +0200 Subject: radeonsi: compute only one set of interpolation (i,j) when MSAA is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should increase the PS launch rate for shaders using at least 2 pairs of perspective (i,j) and same for linear. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 74 ++++++++++++++++++++++++- src/gallium/drivers/radeonsi/si_shader.h | 4 +- src/gallium/drivers/radeonsi/si_state_shaders.c | 13 +++++ 3 files changed, 88 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index da4a6cb19e5..a59c28e75bf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1300,6 +1300,20 @@ static unsigned select_interp_param(struct si_shader_context *ctx, return SI_PARAM_LINEAR_SAMPLE; } } + if (ctx->shader->key.ps.prolog.force_persp_center_interp) { + switch (param) { + case SI_PARAM_PERSP_CENTROID: + case SI_PARAM_PERSP_SAMPLE: + return SI_PARAM_PERSP_CENTER; + } + } + if (ctx->shader->key.ps.prolog.force_linear_center_interp) { + switch (param) { + case SI_PARAM_LINEAR_CENTROID: + case SI_PARAM_LINEAR_SAMPLE: + return SI_PARAM_PERSP_CENTER; + } + } return param; } @@ -6382,6 +6396,8 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple); fprintf(f, " prolog.force_persp_sample_interp = %u\n", key->ps.prolog.force_persp_sample_interp); fprintf(f, " prolog.force_linear_sample_interp = %u\n", key->ps.prolog.force_linear_sample_interp); + fprintf(f, " prolog.force_persp_center_interp = %u\n", key->ps.prolog.force_persp_center_interp); + fprintf(f, " prolog.force_linear_center_interp = %u\n", key->ps.prolog.force_linear_center_interp); fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format); fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8); fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf); @@ -7255,6 +7271,40 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen, linear_sample[i], base + 10 + i, ""); } + /* Force center interpolation. */ + if (key->ps_prolog.states.force_persp_center_interp) { + unsigned i, base = key->ps_prolog.num_input_sgprs; + LLVMValueRef persp_center[2]; + + /* Read PERSP_CENTER. */ + for (i = 0; i < 2; i++) + persp_center[i] = LLVMGetParam(func, base + 2 + i); + /* Overwrite PERSP_SAMPLE. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + persp_center[i], base + i, ""); + /* Overwrite PERSP_CENTROID. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + persp_center[i], base + 4 + i, ""); + } + if (key->ps_prolog.states.force_linear_center_interp) { + unsigned i, base = key->ps_prolog.num_input_sgprs; + LLVMValueRef linear_center[2]; + + /* Read LINEAR_CENTER. */ + for (i = 0; i < 2; i++) + linear_center[i] = LLVMGetParam(func, base + 8 + i); + /* Overwrite LINEAR_SAMPLE. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + linear_center[i], base + 6 + i, ""); + /* Overwrite LINEAR_CENTROID. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + linear_center[i], base + 10 + i, ""); + } + /* Tell LLVM to insert WQM instruction sequence when needed. */ if (key->ps_prolog.wqm) { LLVMAddTargetDependentFunctionAttr(func, @@ -7414,7 +7464,9 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, prolog_key.ps_prolog.wqm = info->uses_derivatives && (prolog_key.ps_prolog.colors_read || prolog_key.ps_prolog.states.force_persp_sample_interp || - prolog_key.ps_prolog.states.force_linear_sample_interp); + prolog_key.ps_prolog.states.force_linear_sample_interp || + prolog_key.ps_prolog.states.force_persp_center_interp || + prolog_key.ps_prolog.states.force_linear_center_interp); if (info->colors_read) { unsigned *color = shader->selector->color_attr_index; @@ -7443,6 +7495,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, /* Force the interpolation location for colors here. */ if (shader->key.ps.prolog.force_persp_sample_interp) location = TGSI_INTERPOLATE_LOC_SAMPLE; + if (shader->key.ps.prolog.force_persp_center_interp) + location = TGSI_INTERPOLATE_LOC_CENTER; switch (location) { case TGSI_INTERPOLATE_LOC_SAMPLE: @@ -7468,6 +7522,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, /* Force the interpolation location for colors here. */ if (shader->key.ps.prolog.force_linear_sample_interp) location = TGSI_INTERPOLATE_LOC_SAMPLE; + if (shader->key.ps.prolog.force_linear_center_interp) + location = TGSI_INTERPOLATE_LOC_CENTER; switch (location) { case TGSI_INTERPOLATE_LOC_SAMPLE: @@ -7499,6 +7555,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, if (prolog_key.ps_prolog.colors_read || prolog_key.ps_prolog.states.force_persp_sample_interp || prolog_key.ps_prolog.states.force_linear_sample_interp || + prolog_key.ps_prolog.states.force_persp_center_interp || + prolog_key.ps_prolog.states.force_linear_center_interp || prolog_key.ps_prolog.states.poly_stipple) { shader->prolog = si_get_shader_part(sscreen, &sscreen->ps_prologs, @@ -7544,6 +7602,20 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA; shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1); } + if (shader->key.ps.prolog.force_persp_center_interp && + (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) || + G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) { + shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA; + shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA; + shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1); + } + if (shader->key.ps.prolog.force_linear_center_interp && + (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) || + G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) { + shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA; + shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA; + shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1); + } /* POW_W_FLOAT requires that one of the perspective weights is enabled. */ if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) && diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 6c2e83267ef..064773605fb 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -315,9 +315,9 @@ struct si_ps_prolog_bits { unsigned poly_stipple:1; unsigned force_persp_sample_interp:1; unsigned force_linear_sample_interp:1; + unsigned force_persp_center_interp:1; + unsigned force_linear_center_interp:1; /* TODO: - * - add force_center_interp if MSAA is disabled and centroid or - * sample are present * - add force_center_interp_bc_optimize to force center interpolation * based on the bc_optimize SGPR bit if MSAA is enabled, centroid is * present and sample isn't present. diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index cf5c1f94a5a..d679825914d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -946,6 +946,19 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key->ps.prolog.force_linear_sample_interp = sel->info.uses_linear_center || sel->info.uses_linear_centroid; + } else if (!rs->multisample_enable || + sctx->framebuffer.nr_samples <= 1) { + /* Make sure SPI doesn't compute more than 1 pair + * of (i,j), which is the optimization here. */ + key->ps.prolog.force_persp_center_interp = + sel->info.uses_persp_center + + sel->info.uses_persp_centroid + + sel->info.uses_persp_sample > 1; + + key->ps.prolog.force_linear_center_interp = + sel->info.uses_linear_center + + sel->info.uses_linear_centroid + + sel->info.uses_linear_sample > 1; } } -- cgit v1.2.3