diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_hw_context.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 48 |
4 files changed, 63 insertions, 10 deletions
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 3582cd711b5..46e60739382 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -232,27 +232,33 @@ void si_begin_new_cs(struct si_context *ctx) ctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS; /* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */ - ctx->framebuffer.dirty_cbufs = - u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs); - /* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */ - ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL; + bool has_clear_state = ctx->screen->has_clear_state; + if (has_clear_state) { + ctx->framebuffer.dirty_cbufs = + u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs); + /* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */ + ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL; + } else { + ctx->framebuffer.dirty_cbufs = u_bit_consecutive(0, 8); + ctx->framebuffer.dirty_zsbuf = true; + } /* This should always be marked as dirty to set the framebuffer scissor * at least. */ si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); si_mark_atom_dirty(ctx, &ctx->clip_regs); /* CLEAR_STATE sets zeros. */ - if (ctx->clip_state.any_nonzeros) + if (!has_clear_state || ctx->clip_state.any_nonzeros) si_mark_atom_dirty(ctx, &ctx->clip_state.atom); ctx->msaa_sample_locs.nr_samples = 0; si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom); si_mark_atom_dirty(ctx, &ctx->msaa_config); /* CLEAR_STATE sets 0xffff. */ - if (ctx->sample_mask.sample_mask != 0xffff) + if (!has_clear_state || ctx->sample_mask.sample_mask != 0xffff) si_mark_atom_dirty(ctx, &ctx->sample_mask.atom); si_mark_atom_dirty(ctx, &ctx->cb_render_state); /* CLEAR_STATE sets zeros. */ - if (ctx->blend_color.any_nonzeros) + if (!has_clear_state || ctx->blend_color.any_nonzeros) si_mark_atom_dirty(ctx, &ctx->blend_color.atom); si_mark_atom_dirty(ctx, &ctx->db_render_state); si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index cac1d015593..80a77a8f1f2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1067,6 +1067,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->tess_offchip_block_dw_size = sscreen->b.family == CHIP_HAWAII ? 4096 : 8192; + /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs + * on SI. */ + sscreen->has_clear_state = sscreen->b.chip_class >= CIK; + sscreen->has_distributed_tess = sscreen->b.chip_class >= VI && sscreen->b.info.max_se >= 2; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index ee0ab1b37b9..8d822874d55 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -87,6 +87,7 @@ struct si_screen { struct r600_common_screen b; unsigned gs_table_depth; unsigned tess_offchip_block_dw_size; + bool has_clear_state; bool has_distributed_tess; bool has_draw_indirect_multi; bool has_ds_bpermute; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 2c413a4c365..8010df6584d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4424,8 +4424,12 @@ static void si_init_config(struct si_context *sctx) unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; unsigned raster_config, raster_config_1; uint64_t border_color_va = sctx->border_color_buffer->gpu_address; + bool has_clear_state = sscreen->has_clear_state; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); + /* Only SI can disable CLEAR_STATE for now. */ + assert(has_clear_state || sscreen->b.chip_class == SI); + if (!pm4) return; @@ -4434,11 +4438,15 @@ static void si_init_config(struct si_context *sctx) si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); si_pm4_cmd_end(pm4, false); - si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE); - si_pm4_cmd_add(pm4, 0); - si_pm4_cmd_end(pm4, false); + if (has_clear_state) { + si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE); + si_pm4_cmd_add(pm4, 0); + si_pm4_cmd_end(pm4, false); + } si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); + if (!has_clear_state) + si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ if (sctx->b.chip_class <= VI) { @@ -4446,7 +4454,15 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); } + if (!has_clear_state) { + si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); + si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); + si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); + } + si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); + if (!has_clear_state) + si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (sctx->b.chip_class < CIK) si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); @@ -4454,6 +4470,9 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); + if (!has_clear_state) + si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); + switch (sctx->screen->b.family) { case CHIP_TAHITI: case CHIP_PITCAIRN: @@ -4564,6 +4583,26 @@ static void si_init_config(struct si_context *sctx) S_028034_BR_X(16384) | S_028034_BR_Y(16384)); } + if (!has_clear_state) { + si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); + si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, + S_028230_ER_TRI(0xA) | + S_028230_ER_POINT(0xA) | + S_028230_ER_RECT(0xA) | + /* Required by DX10_DIAMOND_TEST_ENA: */ + S_028230_ER_LINE_LR(0x1A) | + S_028230_ER_LINE_RL(0x26) | + S_028230_ER_LINE_TB(0xA) | + S_028230_ER_LINE_BT(0xA)); + /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ + si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); + si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); + si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); + si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); + si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); + si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); + } + if (sctx->b.chip_class >= GFX9) { si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); @@ -4640,6 +4679,9 @@ static void si_init_config(struct si_context *sctx) vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); + } else if (!has_clear_state) { + si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); + si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); } si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); |