summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-01-03 19:00:29 +0100
committerMarek Olšák <[email protected]>2016-02-09 21:19:51 +0100
commitb9126dcda834ba9cf58af32e97f4b5d93c9817a3 (patch)
tree7b47e4168772d247d51371f740e695a6164348fe
parent4596f3c1b8bbcc83b841b6c7ea4a287a6f3210f7 (diff)
radeonsi: implement forcing per-sample_interpolation using the shader key only
It was partly a state and partly emulated by shader code, but since we want to do this in a fragment shader prolog, we need to put it into the shader key, which will be used to generate the prolog. This also removes the spi_ps_input states and moves the registers to the PS state. Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c46
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h50
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c107
6 files changed, 55 insertions, 152 deletions
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index d60c4515625..b5a4034cc12 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -182,7 +182,6 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->db_render_state);
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
si_mark_atom_dirty(ctx, &ctx->spi_map);
- si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
si_all_descriptors_begin_new_cs(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 48947442757..3c963db5078 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -202,7 +202,6 @@ struct si_context {
struct si_viewports viewports;
struct si_stencil_ref stencil_ref;
struct r600_atom spi_map;
- struct r600_atom spi_ps_input;
/* Precomputed states. */
struct si_pm4_state *init_config;
@@ -222,7 +221,6 @@ struct si_context {
struct si_vertex_element *vertex_elements;
unsigned sprite_coord_enable;
bool flatshade;
- bool force_persample_interp;
/* shader descriptors */
struct si_descriptors vertex_buffers;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c595f208274..0a92a7b54e6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -833,14 +833,11 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
}
/* This shouldn't be used by explicit INTERP opcodes. */
-static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
- unsigned param)
+static unsigned select_interp_param(struct si_shader_context *si_shader_ctx,
+ unsigned param)
{
- struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
- unsigned sample_param = 0;
- LLVMValueRef default_ij, sample_ij, force_sample;
-
- default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
+ if (!si_shader_ctx->shader->key.ps.force_persample_interp)
+ return param;
/* If the shader doesn't use center/centroid, just return the parameter.
*
@@ -850,36 +847,15 @@ static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
switch (param) {
case SI_PARAM_PERSP_CENTROID:
case SI_PARAM_PERSP_CENTER:
- if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
- return default_ij;
-
- sample_param = SI_PARAM_PERSP_SAMPLE;
- break;
+ return SI_PARAM_PERSP_SAMPLE;
case SI_PARAM_LINEAR_CENTROID:
case SI_PARAM_LINEAR_CENTER:
- if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
- return default_ij;
-
- sample_param = SI_PARAM_LINEAR_SAMPLE;
- break;
+ return SI_PARAM_LINEAR_SAMPLE;
default:
- return default_ij;
+ return param;
}
-
- /* Otherwise, we have to select (i,j) based on a user data SGPR. */
- sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
-
- /* TODO: this can be done more efficiently by switching between
- * 2 prologs.
- */
- force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_PS_STATE_BITS);
- force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
- LLVMInt1TypeInContext(gallivm->context), "");
- return LLVMBuildSelect(gallivm->builder, force_sample,
- sample_ij, default_ij, "");
}
static void declare_input_fs(
@@ -918,8 +894,11 @@ static void declare_input_fs(
decl->Interp.Location);
if (interp_param_idx == -1)
return;
- else if (interp_param_idx)
- interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
+ else if (interp_param_idx) {
+ interp_param_idx = select_interp_param(si_shader_ctx,
+ interp_param_idx);
+ interp_param = LLVMGetParam(main_fn, interp_param_idx);
+ }
/* fs.constant returns the param from the middle vertex, so it's not
* really useful for flat shading. It's meant to be used for custom
@@ -3633,7 +3612,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
case TGSI_PROCESSOR_FRAGMENT:
params[SI_PARAM_ALPHA_REF] = f32;
- params[SI_PARAM_PS_STATE_BITS] = i32;
params[SI_PARAM_PRIM_MASK] = i32;
last_sgpr = SI_PARAM_PRIM_MASK;
params[SI_PARAM_PERSP_SAMPLE] = v2i32;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index d3609d46334..0d6a45a319a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -88,7 +88,6 @@ struct radeon_shader_reloc;
#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
#define SI_SGPR_ALPHA_REF 8 /* PS only */
-#define SI_SGPR_PS_STATE_BITS 9 /* PS only */
#define SI_VS_NUM_USER_SGPR 13 /* API VS */
#define SI_ES_NUM_USER_SGPR 12 /* API VS */
@@ -97,7 +96,7 @@ struct radeon_shader_reloc;
#define SI_TES_NUM_USER_SGPR 10
#define SI_GS_NUM_USER_SGPR 8
#define SI_GSCOPY_NUM_USER_SGPR 4
-#define SI_PS_NUM_USER_SGPR 10
+#define SI_PS_NUM_USER_SGPR 9
/* LLVM function parameter indices */
#define SI_PARAM_RW_BUFFERS 0
@@ -152,27 +151,23 @@ struct radeon_shader_reloc;
/* PS only parameters */
#define SI_PARAM_ALPHA_REF 4
-/* Bits:
- * 0: force_persample_interp
- */
-#define SI_PARAM_PS_STATE_BITS 5
-#define SI_PARAM_PRIM_MASK 6
-#define SI_PARAM_PERSP_SAMPLE 7
-#define SI_PARAM_PERSP_CENTER 8
-#define SI_PARAM_PERSP_CENTROID 9
-#define SI_PARAM_PERSP_PULL_MODEL 10
-#define SI_PARAM_LINEAR_SAMPLE 11
-#define SI_PARAM_LINEAR_CENTER 12
-#define SI_PARAM_LINEAR_CENTROID 13
-#define SI_PARAM_LINE_STIPPLE_TEX 14
-#define SI_PARAM_POS_X_FLOAT 15
-#define SI_PARAM_POS_Y_FLOAT 16
-#define SI_PARAM_POS_Z_FLOAT 17
-#define SI_PARAM_POS_W_FLOAT 18
-#define SI_PARAM_FRONT_FACE 19
-#define SI_PARAM_ANCILLARY 20
-#define SI_PARAM_SAMPLE_COVERAGE 21
-#define SI_PARAM_POS_FIXED_PT 22
+#define SI_PARAM_PRIM_MASK 5
+#define SI_PARAM_PERSP_SAMPLE 6
+#define SI_PARAM_PERSP_CENTER 7
+#define SI_PARAM_PERSP_CENTROID 8
+#define SI_PARAM_PERSP_PULL_MODEL 9
+#define SI_PARAM_LINEAR_SAMPLE 10
+#define SI_PARAM_LINEAR_CENTER 11
+#define SI_PARAM_LINEAR_CENTROID 12
+#define SI_PARAM_LINE_STIPPLE_TEX 13
+#define SI_PARAM_POS_X_FLOAT 14
+#define SI_PARAM_POS_Y_FLOAT 15
+#define SI_PARAM_POS_Z_FLOAT 16
+#define SI_PARAM_POS_W_FLOAT 17
+#define SI_PARAM_FRONT_FACE 18
+#define SI_PARAM_ANCILLARY 19
+#define SI_PARAM_SAMPLE_COVERAGE 20
+#define SI_PARAM_POS_FIXED_PT 21
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
@@ -193,14 +188,6 @@ struct si_shader_selector {
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
- /* Whether the shader has to use a conditional assignment to
- * choose between weights when emulating
- * pipe_rasterizer_state::force_persample_interp.
- * If false, "si_emit_spi_ps_input" will take care of it instead.
- */
- bool forces_persample_interp_for_persp;
- bool forces_persample_interp_for_linear;
-
/* GS parameters. */
unsigned esgs_itemsize;
unsigned gs_input_verts_per_prim;
@@ -245,6 +232,7 @@ union si_shader_key {
unsigned poly_stipple:1;
unsigned poly_line_smoothing:1;
unsigned clamp_color:1;
+ unsigned force_persample_interp:1;
} ps;
struct {
unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 507f45938ce..e9a017534d1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -133,7 +133,6 @@ union si_state_atoms {
struct r600_atom *viewports;
struct r600_atom *stencil_ref;
struct r600_atom *spi_map;
- struct r600_atom *spi_ps_input;
} s;
struct r600_atom *array[0];
};
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6e7311807dd..59511c67ed0 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -472,6 +472,17 @@ static void si_shader_ps(struct si_shader *shader)
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
bool has_centroid;
+ unsigned input_ena = shader->config.spi_ps_input_ena;
+
+ /* we need to enable at least one of them, otherwise we hang the GPU */
+ assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
+ G_0286CC_PERSP_CENTER_ENA(input_ena) ||
+ G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
+ G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
+ G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
+ G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
+ G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -515,6 +526,9 @@ static void si_shader_ps(struct si_shader *shader)
shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
spi_shader_col_format = V_028714_SPI_SHADER_32_R;
+ si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
+ si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, input_ena);
+
/* Set interpolation controls. */
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
@@ -706,6 +720,15 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
(is_line && rs->line_smooth)) &&
sctx->framebuffer.nr_samples <= 1;
key->ps.clamp_color = rs->clamp_fragment_color;
+
+ key->ps.force_persample_interp = rs->force_persample_interp &&
+ rs->multisample_enable &&
+ sctx->framebuffer.nr_samples > 1 &&
+ sctx->ps_iter_samples > 1 &&
+ (sel->info.uses_persp_center ||
+ sel->info.uses_persp_centroid ||
+ sel->info.uses_linear_center ||
+ sel->info.uses_linear_centroid);
}
key->ps.alpha_func = si_get_alpha_test_func(sctx);
@@ -808,7 +831,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->type = util_pipe_shader_from_tgsi_processor(sel->info.processor);
p_atomic_inc(&sscreen->b.num_shaders_created);
- /* First set which opcode uses which (i,j) pair. */
+ /* Set which opcode uses which (i,j) pair. */
if (sel->info.uses_persp_opcode_interp_centroid)
sel->info.uses_persp_centroid = true;
@@ -823,19 +846,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->info.uses_linear_opcode_interp_sample)
sel->info.uses_linear_center = true;
- /* Determine if the shader has to use a conditional assignment when
- * emulating force_persample_interp.
- */
- sel->forces_persample_interp_for_persp =
- sel->info.uses_persp_center +
- sel->info.uses_persp_centroid +
- sel->info.uses_persp_sample >= 2;
-
- sel->forces_persample_interp_for_linear =
- sel->info.uses_linear_center +
- sel->info.uses_linear_centroid +
- sel->info.uses_linear_sample >= 2;
-
switch (sel->type) {
case PIPE_SHADER_GEOMETRY:
sel->gs_output_prim =
@@ -1181,68 +1191,6 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
assert(num_interp == num_written);
}
-static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
-{
- struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- struct si_shader *ps = sctx->ps_shader.current;
- unsigned input_ena;
-
- if (!ps)
- return;
-
- input_ena = ps->config.spi_ps_input_ena;
-
- /* we need to enable at least one of them, otherwise we hang the GPU */
- assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
- G_0286CC_PERSP_CENTER_ENA(input_ena) ||
- G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
- G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
- G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
- G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
- G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
- G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
-
- if (sctx->force_persample_interp) {
- unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) +
- G_0286CC_PERSP_CENTER_ENA(input_ena) +
- G_0286CC_PERSP_CENTROID_ENA(input_ena);
- unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) +
- G_0286CC_LINEAR_CENTER_ENA(input_ena) +
- G_0286CC_LINEAR_CENTROID_ENA(input_ena);
-
- /* If only one set of (i,j) coordinates is used, we can disable
- * CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates
- * where CENTER/CENTROID are expected, effectively forcing per-sample
- * interpolation.
- */
- if (num_persp == 1) {
- input_ena &= C_0286CC_PERSP_CENTER_ENA;
- input_ena &= C_0286CC_PERSP_CENTROID_ENA;
- input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1);
- }
- if (num_linear == 1) {
- input_ena &= C_0286CC_LINEAR_CENTER_ENA;
- input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
- input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1);
- }
-
- /* If at least 2 sets of coordinates are used, we can't use this
- * trick and have to select SAMPLE using a conditional assignment
- * in the shader with "force_persample_interp" being a shader constant.
- */
- }
-
- radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
- radeon_emit(cs, input_ena);
- radeon_emit(cs, input_ena);
-
- if (ps->selector->forces_persample_interp_for_persp ||
- ps->selector->forces_persample_interp_for_linear)
- radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
- SI_SGPR_PS_STATE_BITS * 4,
- sctx->force_persample_interp);
-}
-
/**
* Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that.
*/
@@ -1774,12 +1722,6 @@ bool si_update_shaders(struct si_context *sctx)
si_mark_atom_dirty(sctx, &sctx->spi_map);
}
- if (si_pm4_state_changed(sctx, ps) ||
- sctx->force_persample_interp != rs->force_persample_interp) {
- sctx->force_persample_interp = rs->force_persample_interp;
- si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
- }
-
if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
@@ -1812,7 +1754,6 @@ bool si_update_shaders(struct si_context *sctx)
void si_init_shader_functions(struct si_context *sctx)
{
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
- si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input);
sctx->b.b.create_vs_state = si_create_shader_selector;
sctx->b.b.create_tcs_state = si_create_shader_selector;