diff options
author | Sonny Jiang <sonny.jiang@amd.com> | 2018-06-07 12:13:48 -0400 |
---|---|---|
committer | Marek Olšák <marek.olsak@amd.com> | 2018-06-07 23:26:25 -0400 |
commit | 43b0269ce3e349590c49f082008ecfe9e7f85014 (patch) | |
tree | 0afb0247665f66cc2077c6efd078b69a18909e49 /src/gallium | |
parent | d797f1f47efd389ce656fa60fa51687c7c58daa4 (diff) |
radeonsi: emit_db_render_state packets optimization
Remembering latest states of registers to eliminate redunant SET_CONTEXT_REG packets
Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_build_pm4.h | 43 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_gfx_cs.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 60 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 16 |
5 files changed, 95 insertions, 29 deletions
diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h index 22f5558b7f1..45d943fac90 100644 --- a/src/gallium/drivers/radeonsi/si_build_pm4.h +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h @@ -110,4 +110,47 @@ static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs, radeon_emit(cs, value); } +/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */ +static inline void radeon_opt_set_context_reg(struct si_context *sctx, unsigned offset, + enum si_tracked_reg reg, unsigned value) +{ + struct radeon_winsys_cs *cs = sctx->gfx_cs; + + if (!(sctx->tracked_regs.reg_saved & (1 << reg)) || + sctx->tracked_regs.reg_value[reg] != value ) { + + radeon_set_context_reg(cs, offset, value); + + sctx->tracked_regs.reg_saved |= 1 << reg; + sctx->tracked_regs.reg_value[reg] = value; + } +} + +/** + * Set 2 consecutive registers if any registers value is different. + * @param offset starting register offset + * @param value1 is written to first register + * @param value2 is written to second register + */ +static inline void radeon_opt_set_context_reg2(struct si_context *sctx, unsigned offset, + enum si_tracked_reg reg, unsigned value1, + unsigned value2) +{ + struct radeon_winsys_cs *cs = sctx->gfx_cs; + + if (!(sctx->tracked_regs.reg_saved & (1 << reg)) || + !(sctx->tracked_regs.reg_saved & (1 << (reg + 1))) || + sctx->tracked_regs.reg_value[reg] != value1 || + sctx->tracked_regs.reg_value[reg+1] != value2 ) { + + radeon_set_context_reg_seq(cs, offset, 2); + radeon_emit(cs, value1); + radeon_emit(cs, value2); + + sctx->tracked_regs.reg_value[reg] = value1; + sctx->tracked_regs.reg_value[reg+1] = value2; + sctx->tracked_regs.reg_saved |= 3 << reg; + } +} + #endif diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index ec74c1bc703..0b9a0205874 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -321,4 +321,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->last_num_tcs_input_cp = -1; ctx->cs_shader_state.initialized = false; + + /* Set all saved registers state to unknown */ + ctx->tracked_regs.reg_saved = 0; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index cf9b124fe5a..4edefbdbd25 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1033,6 +1033,8 @@ struct si_context { void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value); + + struct si_tracked_regs tracked_regs; }; /* cik_sdma.c */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3a7e928df53..c95b92940aa 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1343,28 +1343,25 @@ void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st) static void si_emit_db_render_state(struct si_context *sctx) { - struct radeon_winsys_cs *cs = sctx->gfx_cs; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; - unsigned db_shader_control; - - radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); + unsigned db_shader_control, db_render_control, db_count_control; /* DB_RENDER_CONTROL */ if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) { - radeon_emit(cs, - S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | - S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | - S_028000_COPY_CENTROID(1) | - S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); + db_render_control = + S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | + S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | + S_028000_COPY_CENTROID(1) | + S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample); } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { - radeon_emit(cs, - S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | - S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); + db_render_control = + S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | + S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace); } else { - radeon_emit(cs, - S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | - S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); + db_render_control = + S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | + S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear); } /* DB_COUNT_CONTROL (occlusion queries) */ @@ -1373,28 +1370,33 @@ static void si_emit_db_render_state(struct si_context *sctx) bool perfect = sctx->num_perfect_occlusion_queries > 0; if (sctx->chip_class >= CIK) { - radeon_emit(cs, - S_028004_PERFECT_ZPASS_COUNTS(perfect) | - S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | - S_028004_ZPASS_ENABLE(1) | - S_028004_SLICE_EVEN_ENABLE(1) | - S_028004_SLICE_ODD_ENABLE(1)); + db_count_control = + S_028004_PERFECT_ZPASS_COUNTS(perfect) | + S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | + S_028004_ZPASS_ENABLE(1) | + S_028004_SLICE_EVEN_ENABLE(1) | + S_028004_SLICE_ODD_ENABLE(1); } else { - radeon_emit(cs, - S_028004_PERFECT_ZPASS_COUNTS(perfect) | - S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); + db_count_control = + S_028004_PERFECT_ZPASS_COUNTS(perfect) | + S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples); } } else { /* Disable occlusion queries. */ if (sctx->chip_class >= CIK) { - radeon_emit(cs, 0); + db_count_control = 0; } else { - radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); + db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); } } + radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, + SI_TRACKED_DB_RENDER_CONTROL, db_render_control, + db_count_control); + /* DB_RENDER_OVERRIDE2 */ - radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, + radeon_opt_set_context_reg(sctx, R_028010_DB_RENDER_OVERRIDE2, + SI_TRACKED_DB_RENDER_OVERRIDE2, S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); @@ -1415,8 +1417,8 @@ static void si_emit_db_render_state(struct si_context *sctx) !sctx->screen->rbplus_allowed) db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); - radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, - db_shader_control); + radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, + SI_TRACKED_DB_SHADER_CONTROL, db_shader_control); } /* diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index d235f31c792..fb5f7211895 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -206,6 +206,22 @@ struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; +/* The list of registers whose emitted values are remembered by si_context. */ +enum si_tracked_reg { + SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */ + SI_TRACKED_DB_COUNT_CONTROL, + + SI_TRACKED_DB_RENDER_OVERRIDE2, + SI_TRACKED_DB_SHADER_CONTROL, + + SI_NUM_TRACKED_REGS, +}; + +struct si_tracked_regs { + uint32_t reg_saved; + uint32_t reg_value[SI_NUM_TRACKED_REGS]; +}; + /* Private read-write buffer slots. */ enum { SI_ES_RING_ESGS, |