aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_state_draw.c
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2017-08-29 23:11:38 +0200
committerNicolai Hähnle <[email protected]>2017-09-06 10:02:49 +0200
commit45c5c444518b7e83d9accd9f44702fa49282a3b8 (patch)
tree62a48cc1753b281930210d160e3f9b2dabedcb29 /src/gallium/drivers/radeonsi/si_state_draw.c
parent552aaa11ed56d020c3ecd6b5d17a159ad545b901 (diff)
radeonsi/gfx9: proper workaround for LS/HS VGPR initialization bug
When the HS wave is empty, the hardware writes the LS VGPRs starting at v0 instead of v2. Workaround by shifting them back into place when necessary. For simplicity, this is always done in the LS prolog. According to the hardware team, this will be fixed in future chips, so take that into account already. Note that this is not a bug fix, as the bug was already worked around by commit 166823bfd26 ("radeonsi/gfx9: add a temporary workaround for a tessellation driver bug"). This change merely replaces the workaround by one that should be better. v2: add workaround code to shader only when necessary v3: clarify the prefer_mono comment Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state_draw.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c27
1 files changed, 22 insertions, 5 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 7ee6cf88e88..051dfea8f7c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -195,11 +195,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
*/
*num_patches = MIN2(*num_patches, 40);
- if (sctx->b.chip_class == SI ||
- /* TODO: fix GFX9 where a threadgroup contains more than 1 wave and
- * LS vertices per patch > HS vertices per patch. Piglit: 16in-1out */
- (sctx->b.chip_class == GFX9 &&
- num_tcs_input_cp > num_tcs_output_cp)) {
+ if (sctx->b.chip_class == SI) {
/* SI bug workaround, related to power management. Limit LS-HS
* threadgroups to only one wave.
*/
@@ -1264,6 +1260,27 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
sctx->do_update_shaders = true;
}
+ if (sctx->tes_shader.cso &&
+ (sctx->b.family == CHIP_VEGA10 || sctx->b.family == CHIP_RAVEN)) {
+ /* Determine whether the LS VGPR fix should be applied.
+ *
+ * It is only required when num input CPs > num output CPs,
+ * which cannot happen with the fixed function TCS. We should
+ * also update this bit when switching from TCS to fixed
+ * function TCS.
+ */
+ struct si_shader_selector *tcs = sctx->tcs_shader.cso;
+ bool ls_vgpr_fix =
+ tcs &&
+ info->vertices_per_patch >
+ tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+
+ if (ls_vgpr_fix != sctx->ls_vgpr_fix) {
+ sctx->ls_vgpr_fix = ls_vgpr_fix;
+ sctx->do_update_shaders = true;
+ }
+ }
+
if (sctx->gs_shader.cso) {
/* Determine whether the GS triangle strip adjacency fix should
* be applied. Rotate every other triangle if