summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c26
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h7
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c37
3 files changed, 61 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 2b432244331..abe30e54a58 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2281,6 +2281,26 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
for (i = 0; i < noutput; i++) {
semantic_name = outputs[i].name;
semantic_index = outputs[i].sid;
+ bool export_param = true;
+
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_POSITION: /* ignore these */
+ case TGSI_SEMANTIC_PSIZE:
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ case TGSI_SEMANTIC_EDGEFLAG:
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ case TGSI_SEMANTIC_CLIPDIST:
+ if (shader->key.opt.hw_vs.kill_outputs &
+ (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
+ export_param = false;
+ break;
+ default:
+ if (shader->key.opt.hw_vs.kill_outputs2 &
+ (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index)))
+ export_param = false;
+ break;
+ }
handle_semantic:
/* Select the correct target */
@@ -2304,6 +2324,8 @@ handle_semantic:
break;
case TGSI_SEMANTIC_COLOR:
case TGSI_SEMANTIC_BCOLOR:
+ if (!export_param)
+ continue;
target = V_008DFC_SQ_EXP_PARAM + param_count;
assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
shader->info.vs_output_param_offset[i] = param_count;
@@ -2325,6 +2347,8 @@ handle_semantic:
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_GENERIC:
+ if (!export_param)
+ continue;
target = V_008DFC_SQ_EXP_PARAM + param_count;
assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
shader->info.vs_output_param_offset[i] = param_count;
@@ -7083,7 +7107,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
si_init_shader_ctx(&ctx, sscreen, shader, tm);
ctx.separate_prolog = !is_monolithic;
- memset(shader->info.vs_output_param_offset, 0xff,
+ memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED,
sizeof(shader->info.vs_output_param_offset));
shader->info.uses_instanceid = sel->info.uses_instanceid;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index fc9c9131be8..aa37676f887 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -322,10 +322,6 @@ struct si_vs_prolog_bits {
/* Common VS bits between the shader key and the epilog key. */
struct si_vs_epilog_bits {
unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
- /* TODO:
- * - skip layer, viewport, clipdist, and culldist parameter exports
- * if PS doesn't read them
- */
};
/* Common TCS bits between the shader key and the epilog key. */
@@ -440,6 +436,8 @@ struct si_shader_key {
/* Optimization flags for asynchronous compilation only. */
union {
struct {
+ uint64_t kill_outputs; /* "get_unique_index" bits */
+ uint32_t kill_outputs2; /* "get_unique_index2" bits */
unsigned clip_disable:1;
} hw_vs; /* HW VS (it can be VS, TES, GS) */
} opt;
@@ -468,6 +466,7 @@ enum {
EXP_PARAM_DEFAULT_VAL_0001,
EXP_PARAM_DEFAULT_VAL_1110,
EXP_PARAM_DEFAULT_VAL_1111,
+ EXP_PARAM_UNDEFINED = 255,
};
/* GCN-specific shader info. */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index e4d8747c6fe..7834f8711b6 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -858,11 +858,35 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
struct si_shader_selector *vs,
struct si_shader_key *key)
{
+ struct si_shader_selector *ps = sctx->ps_shader.cso;
+
key->opt.hw_vs.clip_disable =
sctx->queued.named.rasterizer->clip_plane_enable == 0 &&
(vs->info.clipdist_writemask ||
vs->info.writes_clipvertex) &&
!vs->info.culldist_writemask;
+
+ /* Find out if PS is disabled. */
+ bool ps_disabled = ps == NULL;
+
+ /* Find out which VS outputs aren't used by the PS. */
+ uint64_t outputs_written = vs->outputs_written;
+ uint32_t outputs_written2 = vs->outputs_written2;
+ uint64_t inputs_read = 0;
+ uint32_t inputs_read2 = 0;
+
+ outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
+
+ if (!ps_disabled) {
+ inputs_read = ps->inputs_read;
+ inputs_read2 = ps->inputs_read2;
+ }
+
+ uint64_t linked = outputs_written & inputs_read;
+ uint32_t linked2 = outputs_written2 & inputs_read2;
+
+ key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
+ key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
}
/* Compute the key for the hw shader variant */
@@ -1785,11 +1809,16 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
/* The input is loaded from parameter memory. */
ps_input_cntl |= S_028644_OFFSET(offset);
} else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
- /* The input is a DEFAULT_VAL constant. */
- assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
- offset <= EXP_PARAM_DEFAULT_VAL_1111);
+ if (offset == EXP_PARAM_UNDEFINED) {
+ /* This can happen with depth-only rendering. */
+ offset = 0;
+ } else {
+ /* The input is a DEFAULT_VAL constant. */
+ assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
+ offset <= EXP_PARAM_DEFAULT_VAL_1111);
+ offset -= EXP_PARAM_DEFAULT_VAL_0000;
+ }
- offset -= EXP_PARAM_DEFAULT_VAL_0000;
ps_input_cntl = S_028644_OFFSET(0x20) |
S_028644_DEFAULT_VAL(offset);
}