diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 10 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 19 |
2 files changed, 28 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 09ddf43f87d..a508ece85b1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -445,12 +445,20 @@ struct si_shader_key { } mono; /* Optimization flags for asynchronous compilation only. */ - union { + struct { struct { uint64_t kill_outputs; /* "get_unique_index" bits */ uint32_t kill_outputs2; /* "get_unique_index2" bits */ unsigned clip_disable:1; } hw_vs; /* HW VS (it can be VS, TES, GS) */ + + /* For shaders where monolithic variants have better code. + * + * This is a flag that has no effect on code generation, + * but forces monolithic shaders to be used as soon as + * possible, because it's in the "opt" group. + */ + unsigned prefer_mono:1; } opt; }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b7f848fd077..b2cdcb71944 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1279,6 +1279,25 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key, &key->part.gs.vs_prolog); key->part.gs.es = sctx->vs_shader.cso; } + + /* Merged ES-GS can have unbalanced wave usage. + * + * ES threads are per-vertex, while GS threads are + * per-primitive. So without any amplification, there + * are fewer GS threads than ES threads, which can result + * in empty (no-op) GS waves. With too much amplification, + * there are more GS threads than ES threads, which + * can result in empty (no-op) ES waves. + * + * Non-monolithic shaders are implemented by setting EXEC + * at the beginning of shader parts, and don't jump to + * the end if EXEC is 0. + * + * Monolithic shaders use conditional blocks, so they can + * jump and skip empty waves of ES or GS. So set this to + * always use optimized variants, which are monolithic. + */ + key->opt.prefer_mono = 1; } key->part.gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix; break; |