summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2015-12-01 14:56:54 +0100
committerMarek Olšák <[email protected]>2015-12-11 15:25:12 +0100
commitde887ba90ce077a0243269aa0c72a1ab0d2d3ff4 (patch)
treef0b42c7e99a84725261384466569f074f0c3a30d /src
parent0f9519b938d78ac55e8e5fdad5727a79baf18d42 (diff)
radeonsi: implement RB+ for Stoney (v2)
v2: fix dual source blending Reviewed-by: Alex Deucher <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c1
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h3
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c159
-rw-r--r--src/gallium/drivers/radeonsi/sid.h3
5 files changed, 170 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 8899ba4d55b..ba541acfd75 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -375,6 +375,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
{ "nodcc", DBG_NO_DCC, "Disable DCC." },
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
+ { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
DEBUG_NAMED_VALUE_END /* must be last */
};
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 8c6c0c37e50..dd23ed5be89 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -86,6 +86,7 @@
#define DBG_CHECK_VM (1llu << 42)
#define DBG_NO_DCC (1llu << 43)
#define DBG_NO_DCC_CLEAR (1llu << 44)
+#define DBG_NO_RB_PLUS (1llu << 45)
#define R600_MAP_BUFFER_ALIGNMENT 64
@@ -250,6 +251,8 @@ struct r600_surface {
unsigned cb_color_fmask_slice; /* EG and later */
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
unsigned cb_color_mask; /* R600 only */
+ unsigned sx_ps_downconvert; /* Stoney only */
+ unsigned sx_blend_opt_epsilon; /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 6515a829b5a..de2d1cb53b3 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1389,6 +1389,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
return;
for (i = 0; i < fb->nr_cbufs; i++) {
+ struct r600_surface *surf;
struct r600_texture *tex;
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
@@ -1399,6 +1400,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
if (!(*buffers & clear_bit))
continue;
+ surf = (struct r600_surface *)fb->cbufs[i];
tex = (struct r600_texture *)fb->cbufs[i]->texture;
/* 128-bit formats are unusupported */
@@ -1445,6 +1447,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
if (clear_words_needed)
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
} else {
+ /* RB+ doesn't work with CMASK fast clear. */
+ if (surf->sx_ps_downconvert)
+ continue;
+
/* ensure CMASK is enabled */
r600_texture_alloc_cmask_separate(rctx->screen, tex);
if (tex->cmask.size == 0) {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index bbe15497b67..1cc03f75045 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -347,10 +347,54 @@ static uint32_t si_translate_blend_factor(int blend_fact)
return 0;
}
+static uint32_t si_translate_blend_opt_function(int blend_func)
+{
+ switch (blend_func) {
+ case PIPE_BLEND_ADD:
+ return V_028760_OPT_COMB_ADD;
+ case PIPE_BLEND_SUBTRACT:
+ return V_028760_OPT_COMB_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return V_028760_OPT_COMB_REVSUBTRACT;
+ case PIPE_BLEND_MIN:
+ return V_028760_OPT_COMB_MIN;
+ case PIPE_BLEND_MAX:
+ return V_028760_OPT_COMB_MAX;
+ default:
+ return V_028760_OPT_COMB_BLEND_DISABLED;
+ }
+}
+
+static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
+{
+ switch (blend_fact) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
+ case PIPE_BLENDFACTOR_ONE:
+ return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
+ : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
+ : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
+ : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+ default:
+ return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+ }
+}
+
static void *si_create_blend_state_mode(struct pipe_context *ctx,
const struct pipe_blend_state *state,
unsigned mode)
{
+ struct si_context *sctx = (struct si_context*)ctx;
struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
struct si_pm4_state *pm4 = &blend->pm4;
@@ -416,8 +460,47 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
} else {
color_control |= S_028808_MODE(V_028808_CB_DISABLE);
}
- si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
+ if (sctx->b.family == CHIP_STONEY) {
+ uint32_t sx_blend_opt_control = 0;
+
+ for (int i = 0; i < 8; i++) {
+ const int j = state->independent_blend_enable ? i : 0;
+
+ /* TODO: We can also set this if the surface doesn't contain RGB. */
+ if (!state->rt[j].blend_enable ||
+ !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B)))
+ sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i);
+
+ /* TODO: We can also set this if the surface doesn't contain alpha. */
+ if (!state->rt[j].blend_enable ||
+ !(state->rt[j].colormask & PIPE_MASK_A))
+ sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i);
+
+ if (!state->rt[j].blend_enable) {
+ si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
+ S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+ S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED));
+ continue;
+ }
+
+ si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
+ S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) |
+ S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) |
+ S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) |
+ S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) |
+ S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) |
+ S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func)));
+ }
+
+ si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control);
+
+ /* RB+ doesn't work with dual source blending */
+ if (blend->dual_src_blend)
+ color_control |= S_028808_DISABLE_DUAL_QUAD(1);
+ }
+
+ si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
return blend;
}
@@ -1057,6 +1140,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
+ if (sctx->b.family == CHIP_STONEY &&
+ sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
+ db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
+
radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
db_shader_control);
}
@@ -1970,6 +2057,61 @@ static void si_initialize_color_surface(struct si_context *sctx,
surf->export_16bpc = true;
}
+ if (sctx->b.family == CHIP_STONEY &&
+ !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
+ switch (desc->channel[0].size) {
+ case 32:
+ if (desc->nr_channels == 1) {
+ if (swap == V_0280A0_SWAP_STD)
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
+ else if (swap == V_0280A0_SWAP_ALT_REV)
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A;
+ }
+ break;
+ case 16:
+ /* For 1-channel formats, use the superset thereof. */
+ if (desc->nr_channels <= 2) {
+ if (swap == V_0280A0_SWAP_STD ||
+ swap == V_0280A0_SWAP_STD_REV)
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR;
+ else
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR;
+ }
+ break;
+ case 11:
+ if (desc->nr_channels == 3) {
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11;
+ surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT;
+ }
+ break;
+ case 10:
+ if (desc->nr_channels == 4) {
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10;
+ surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT;
+ }
+ break;
+ case 8:
+ /* For 1 and 2-channel formats, use the superset thereof. */
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8;
+ surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT;
+ break;
+ case 5:
+ if (desc->nr_channels == 3) {
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5;
+ surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT;
+ } else if (desc->nr_channels == 4) {
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5;
+ surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT;
+ }
+ break;
+ case 4:
+ /* For 1 nad 2-channel formats, use the superset thereof. */
+ surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4;
+ surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT;
+ break;
+ }
+ }
+
surf->color_initialized = true;
}
@@ -2238,6 +2380,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
unsigned i, nr_cbufs = state->nr_cbufs;
struct r600_texture *tex = NULL;
struct r600_surface *cb = NULL;
+ uint32_t sx_ps_downconvert = 0;
+ uint32_t sx_blend_opt_epsilon = 0;
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
@@ -2288,18 +2432,29 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
if (sctx->b.chip_class >= VI)
radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */
+
+ sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
+ sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0] &&
sctx->framebuffer.dirty_cbufs & (1 << 0)) {
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
cb->cb_color_info | tex->cb_color_info);
+ sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
+ sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
i++;
}
for (; i < 8 ; i++)
if (sctx->framebuffer.dirty_cbufs & (1 << i))
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
+ if (sctx->b.family == CHIP_STONEY) {
+ radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2);
+ radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */
+ radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */
+ }
+
/* ZS buffer. */
if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
@@ -3460,7 +3615,7 @@ static void si_init_config(struct si_context *sctx)
}
if (sctx->b.family == CHIP_STONEY)
- si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0);
+ si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
if (sctx->b.chip_class >= CIK)
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index d2648e93c14..573ab78b482 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -6771,6 +6771,9 @@
#define G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) >> 27) & 0x1)
#define C_028804_ENABLE_POSTZ_OVERRASTERIZATION 0xF7FFFFFF
#define R_028808_CB_COLOR_CONTROL 0x028808
+#define S_028808_DISABLE_DUAL_QUAD(x) (((x) & 0x1) << 0)
+#define G_028808_DISABLE_DUAL_QUAD(x) (((x) >> 0) & 0x1)
+#define C_028808_DISABLE_DUAL_QUAD 0xFFFFFFFE
#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7