diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 26 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 37 |
3 files changed, 61 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2b432244331..abe30e54a58 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2281,6 +2281,26 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, for (i = 0; i < noutput; i++) { semantic_name = outputs[i].name; semantic_index = outputs[i].sid; + bool export_param = true; + + switch (semantic_name) { + case TGSI_SEMANTIC_POSITION: /* ignore these */ + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_CLIPVERTEX: + case TGSI_SEMANTIC_EDGEFLAG: + break; + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_CLIPDIST: + if (shader->key.opt.hw_vs.kill_outputs & + (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index))) + export_param = false; + break; + default: + if (shader->key.opt.hw_vs.kill_outputs2 & + (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index))) + export_param = false; + break; + } handle_semantic: /* Select the correct target */ @@ -2304,6 +2324,8 @@ handle_semantic: break; case TGSI_SEMANTIC_COLOR: case TGSI_SEMANTIC_BCOLOR: + if (!export_param) + continue; target = V_008DFC_SQ_EXP_PARAM + param_count; assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); shader->info.vs_output_param_offset[i] = param_count; @@ -2325,6 +2347,8 @@ handle_semantic: case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_TEXCOORD: case TGSI_SEMANTIC_GENERIC: + if (!export_param) + continue; target = V_008DFC_SQ_EXP_PARAM + param_count; assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); shader->info.vs_output_param_offset[i] = param_count; @@ -7083,7 +7107,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, si_init_shader_ctx(&ctx, sscreen, shader, tm); ctx.separate_prolog = !is_monolithic; - memset(shader->info.vs_output_param_offset, 0xff, + memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED, sizeof(shader->info.vs_output_param_offset)); shader->info.uses_instanceid = sel->info.uses_instanceid; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index fc9c9131be8..aa37676f887 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -322,10 +322,6 @@ struct si_vs_prolog_bits { /* Common VS bits between the shader key and the epilog key. */ struct si_vs_epilog_bits { unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ - /* TODO: - * - skip layer, viewport, clipdist, and culldist parameter exports - * if PS doesn't read them - */ }; /* Common TCS bits between the shader key and the epilog key. */ @@ -440,6 +436,8 @@ struct si_shader_key { /* Optimization flags for asynchronous compilation only. */ union { struct { + uint64_t kill_outputs; /* "get_unique_index" bits */ + uint32_t kill_outputs2; /* "get_unique_index2" bits */ unsigned clip_disable:1; } hw_vs; /* HW VS (it can be VS, TES, GS) */ } opt; @@ -468,6 +466,7 @@ enum { EXP_PARAM_DEFAULT_VAL_0001, EXP_PARAM_DEFAULT_VAL_1110, EXP_PARAM_DEFAULT_VAL_1111, + EXP_PARAM_UNDEFINED = 255, }; /* GCN-specific shader info. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index e4d8747c6fe..7834f8711b6 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -858,11 +858,35 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, struct si_shader_selector *vs, struct si_shader_key *key) { + struct si_shader_selector *ps = sctx->ps_shader.cso; + key->opt.hw_vs.clip_disable = sctx->queued.named.rasterizer->clip_plane_enable == 0 && (vs->info.clipdist_writemask || vs->info.writes_clipvertex) && !vs->info.culldist_writemask; + + /* Find out if PS is disabled. */ + bool ps_disabled = ps == NULL; + + /* Find out which VS outputs aren't used by the PS. */ + uint64_t outputs_written = vs->outputs_written; + uint32_t outputs_written2 = vs->outputs_written2; + uint64_t inputs_read = 0; + uint32_t inputs_read2 = 0; + + outputs_written &= ~0x3; /* ignore POSITION, PSIZE */ + + if (!ps_disabled) { + inputs_read = ps->inputs_read; + inputs_read2 = ps->inputs_read2; + } + + uint64_t linked = outputs_written & inputs_read; + uint32_t linked2 = outputs_written2 & inputs_read2; + + key->opt.hw_vs.kill_outputs = ~linked & outputs_written; + key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2; } /* Compute the key for the hw shader variant */ @@ -1785,11 +1809,16 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, /* The input is loaded from parameter memory. */ ps_input_cntl |= S_028644_OFFSET(offset); } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) { - /* The input is a DEFAULT_VAL constant. */ - assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 && - offset <= EXP_PARAM_DEFAULT_VAL_1111); + if (offset == EXP_PARAM_UNDEFINED) { + /* This can happen with depth-only rendering. */ + offset = 0; + } else { + /* The input is a DEFAULT_VAL constant. */ + assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 && + offset <= EXP_PARAM_DEFAULT_VAL_1111); + offset -= EXP_PARAM_DEFAULT_VAL_0000; + } - offset -= EXP_PARAM_DEFAULT_VAL_0000; ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset); } |