diff options
author | Marek Olšák <[email protected]> | 2017-06-06 15:23:42 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2017-06-08 23:29:07 +0200 |
commit | 391673af7ad1565a5f6ac8fc2f8c9fcdd1fe9908 (patch) | |
tree | 968f54761e23772b15aeebb14094a5a626a57d46 /src/gallium/drivers | |
parent | 4b8d0c2b1d25c2cf1b3ce57c9c1d997bff1b9408 (diff) |
radeonsi: disable the patch ID workaround on SI when the patch ID isn't used (v2)
The workaround causes a massive performance decrease on 1-SE parts.
(Cape Verde, Hainan, Oland)
The performance regression is already part of 17.0 and 17.1.
v2: check tess_uses_prim_id
Cc: 17.0 17.1 <[email protected]>
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 35 |
2 files changed, 21 insertions, 15 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 108929c10c6..55599464843 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -371,6 +371,7 @@ struct si_context { struct si_shader_selector *last_tcs; int last_num_tcs_input_cp; int last_tes_sh_base; + bool last_tess_uses_primid; unsigned last_num_patches; /* Debug state. */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index cd069e31a64..8508259ad87 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -102,6 +102,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx, * It would be wrong to think that TCS = TES. */ struct si_shader_selector *tcs = sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso; + unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id; + bool has_primid_instancing_bug = sctx->b.chip_class == SI && + sctx->b.screen->info.max_se == 1; unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL]; unsigned num_tcs_input_cp = info->vertices_per_patch; unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs; @@ -128,7 +131,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx, if (sctx->last_ls == ls_current && sctx->last_tcs == tcs && sctx->last_tes_sh_base == tes_sh_base && - sctx->last_num_tcs_input_cp == num_tcs_input_cp) { + sctx->last_num_tcs_input_cp == num_tcs_input_cp && + (!has_primid_instancing_bug || + (sctx->last_tess_uses_primid == tess_uses_primid))) { *num_patches = sctx->last_num_patches; return; } @@ -137,6 +142,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, sctx->last_tcs = tcs; sctx->last_tes_sh_base = tes_sh_base; sctx->last_num_tcs_input_cp = num_tcs_input_cp; + sctx->last_tess_uses_primid = tess_uses_primid; /* This calculates how shader inputs and outputs among VS, TCS, and TES * are laid out in LDS. */ @@ -194,22 +200,21 @@ static void si_emit_derived_tess_state(struct si_context *sctx, */ unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); *num_patches = MIN2(*num_patches, one_wave); - - if (sctx->screen->b.info.max_se == 1) { - /* The VGT HS block increments the patch ID unconditionally - * within a single threadgroup. This results in incorrect - * patch IDs when instanced draws are used. - * - * The intended solution is to restrict threadgroups to - * a single instance by setting SWITCH_ON_EOI, which - * should cause IA to split instances up. However, this - * doesn't work correctly on SI when there is no other - * SE to switch to. - */ - *num_patches = 1; - } } + /* The VGT HS block increments the patch ID unconditionally + * within a single threadgroup. This results in incorrect + * patch IDs when instanced draws are used. + * + * The intended solution is to restrict threadgroups to + * a single instance by setting SWITCH_ON_EOI, which + * should cause IA to split instances up. However, this + * doesn't work correctly on SI when there is no other + * SE to switch to. + */ + if (has_primid_instancing_bug) + *num_patches = 1; + sctx->last_num_patches = *num_patches; output_patch0_offset = input_patch_size * *num_patches; |