diff options
author | Marek Olšák <[email protected]> | 2016-02-22 00:40:04 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-03-01 00:18:19 +0100 |
commit | d0f3b524cdb8489a6872ba3639a13813de221fc2 (patch) | |
tree | c918a3180af2a75e4f0a8c58b156a7cb9d21a084 | |
parent | 09bfbd43a0818c67cb0a6dd4437cc4946e0af5dc (diff) |
radeonsi: use re-Z
This can increase perf for shaders that kill pixels (kill, alpha-test,
alpha-to-coverage).
v2: add comments
Reviewed-by: Michel Dänzer <[email protected]>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 20 |
3 files changed, 21 insertions, 6 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index ff5c24d8918..637d26488df 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -365,6 +365,7 @@ struct si_shader { struct r600_resource *scratch_bo; union si_shader_key key; bool is_binary_shared; + unsigned z_order; /* The following data is all that's needed for binary shaders. */ struct radeon_shader_binary binary; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 2dfdbeb8d8f..b23b17ad77b 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1339,10 +1339,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s sctx->ps_db_shader_control; /* Bug workaround for smoothing (overrasterization) on SI. */ - if (sctx->b.chip_class == SI && sctx->smoothing_enabled) + if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { + db_shader_control &= C_02880C_Z_ORDER; db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); - else - db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + } /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index a6753a7a528..321b87d80a6 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -789,6 +789,17 @@ static void si_shader_ps(struct si_shader *shader) S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | S_00B02C_USER_SGPR(num_user_sgprs) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); + + /* Prefer RE_Z if the shader is complex enough. The requirement is either: + * - the shader uses at least 2 VMEM instructions, or + * - the code size is at least 50 2-dword instructions or 100 1-dword + * instructions. + */ + if (info->num_memory_instructions >= 2 || + shader->binary.code_size > 100*4) + shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z; + else + shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z; } static void si_shader_init_pm4_state(struct si_shader *shader) @@ -1985,15 +1996,18 @@ bool si_update_shaders(struct si_context *sctx) si_update_vgt_shader_config(sctx); if (sctx->ps_shader.cso) { - unsigned db_shader_control = - sctx->ps_shader.cso->db_shader_control | - S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS); + unsigned db_shader_control; r = si_shader_select(ctx, &sctx->ps_shader); if (r) return false; si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); + db_shader_control = + sctx->ps_shader.cso->db_shader_control | + S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS) | + S_02880C_Z_ORDER(sctx->ps_shader.current->z_order); + if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || sctx->sprite_coord_enable != rs->sprite_coord_enable || sctx->flatshade != rs->flatshade) { |