summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-02-15 23:57:54 +0100
committerMarek Olšák <[email protected]>2016-02-21 21:08:58 +0100
commit4636d9be4a40138d0a10cadcb1b63eea89d95e34 (patch)
tree740236bab766818d5bf3ea6e30bcbd03448f7b6c /src/gallium/drivers
parente79bb746ab8525d56f71948ebf3d6dbf8a3bdd4d (diff)
radeonsi: add PS prolog
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c324
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h14
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c7
5 files changed, 345 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 3c87dd690f7..8bfaf85df5b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -542,6 +542,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
sscreen->vs_prologs,
sscreen->vs_epilogs,
sscreen->tcs_epilogs,
+ sscreen->ps_prologs,
sscreen->ps_epilogs
};
unsigned i;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5d204ec6462..1ac7bc4bd85 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -92,6 +92,7 @@ struct si_screen {
struct si_shader_part *vs_prologs;
struct si_shader_part *vs_epilogs;
struct si_shader_part *tcs_epilogs;
+ struct si_shader_part *ps_prologs;
struct si_shader_part *ps_epilogs;
};
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8e50d06b30f..bb30c0f5019 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -879,7 +879,8 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
static unsigned select_interp_param(struct si_shader_context *ctx,
unsigned param)
{
- if (!ctx->shader->key.ps.prolog.force_persample_interp)
+ if (!ctx->shader->key.ps.prolog.force_persample_interp ||
+ !ctx->is_monolithic)
return param;
/* If the shader doesn't use center/centroid, just return the parameter.
@@ -1023,6 +1024,7 @@ static void declare_input_fs(
unsigned input_index,
const struct tgsi_full_declaration *decl)
{
+ struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
struct si_shader *shader = ctx->shader;
@@ -1030,6 +1032,26 @@ static void declare_input_fs(
LLVMValueRef interp_param = NULL;
int interp_param_idx;
+ /* Get colors from input VGPRs (set by the prolog). */
+ if (!ctx->is_monolithic &&
+ decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+ unsigned i = decl->Semantic.Index;
+ unsigned colors_read = shader->selector->info.colors_read;
+ unsigned mask = colors_read >> (i * 4);
+ unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
+ (i ? util_bitcount(colors_read & 0xf) : 0);
+
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
+ mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
+ mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
+ mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
+ mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ return;
+ }
+
interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
decl->Interp.Location);
if (interp_param_idx == -1)
@@ -3970,6 +3992,16 @@ static void create_function(struct si_shader_context *ctx)
num_params = SI_PARAM_POS_FIXED_PT+1;
if (!ctx->is_monolithic) {
+ /* Color inputs from the prolog. */
+ if (shader->selector->info.colors_read) {
+ unsigned num_color_elements =
+ util_bitcount(shader->selector->info.colors_read);
+
+ assert(num_params + num_color_elements <= ARRAY_SIZE(params));
+ for (i = 0; i < num_color_elements; i++)
+ params[num_params++] = ctx->f32;
+ }
+
/* Outputs for the epilog. */
num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
num_returns =
@@ -4001,6 +4033,20 @@ static void create_function(struct si_shader_context *ctx)
si_create_function(ctx, returns, num_returns, params,
num_params, last_array_pointer, last_sgpr);
+ /* Reserve register locations for VGPR inputs the PS prolog may need. */
+ if (ctx->type == TGSI_PROCESSOR_FRAGMENT &&
+ !ctx->is_monolithic) {
+ radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
+ "InitialPSInputAddr",
+ S_0286D0_PERSP_SAMPLE_ENA(1) |
+ S_0286D0_PERSP_CENTER_ENA(1) |
+ S_0286D0_PERSP_CENTROID_ENA(1) |
+ S_0286D0_LINEAR_SAMPLE_ENA(1) |
+ S_0286D0_LINEAR_CENTER_ENA(1) |
+ S_0286D0_LINEAR_CENTROID_ENA(1) |
+ S_0286D0_FRONT_FACE_ENA(1));
+ }
+
shader->num_input_sgprs = 0;
shader->num_input_vgprs = 0;
@@ -5305,6 +5351,157 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
}
/**
+ * Compile the pixel shader prolog. This handles:
+ * - two-side color selection and interpolation
+ * - overriding interpolation parameters for the API PS
+ * - polygon stippling
+ *
+ * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
+ * overriden by other states. (e.g. per-sample interpolation)
+ * Interpolated colors are stored after the preloaded VGPRs.
+ */
+static bool si_compile_ps_prolog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ LLVMTypeRef *params;
+ LLVMValueRef ret, func;
+ int last_sgpr, num_params, num_returns, i, num_color_channels;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
+ ctx.type = TGSI_PROCESSOR_FRAGMENT;
+ shader.key.ps.prolog = key->ps_prolog.states;
+
+ /* Number of inputs + 8 color elements. */
+ params = alloca((key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.num_input_vgprs + 8) *
+ sizeof(LLVMTypeRef));
+
+ /* Declare inputs. */
+ num_params = 0;
+ for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
+ params[num_params++] = ctx.i32;
+ last_sgpr = num_params - 1;
+
+ for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
+ params[num_params++] = ctx.f32;
+
+ /* Declare outputs (same as inputs + add colors if needed) */
+ num_returns = num_params;
+ num_color_channels = util_bitcount(key->ps_prolog.colors_read);
+ for (i = 0; i < num_color_channels; i++)
+ params[num_returns++] = ctx.f32;
+
+ /* Create the function. */
+ si_create_function(&ctx, params, num_returns, params,
+ num_params, -1, last_sgpr);
+ func = ctx.radeon_bld.main_fn;
+
+ /* Copy inputs to outputs. This should be no-op, as the registers match,
+ * but it will prevent the compiler from overwriting them unintentionally.
+ */
+ ret = ctx.return_value;
+ for (i = 0; i < num_params; i++) {
+ LLVMValueRef p = LLVMGetParam(func, i);
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ }
+
+ /* Interpolate colors. */
+ for (i = 0; i < 2; i++) {
+ unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
+ unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.face_vgpr_index;
+ LLVMValueRef interp[2], color[4];
+ LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
+
+ if (!writemask)
+ continue;
+
+ /* If the interpolation qualifier is not CONSTANT (-1). */
+ if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
+ unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.color_interp_vgpr_index[i];
+
+ interp[0] = LLVMGetParam(func, interp_vgpr);
+ interp[1] = LLVMGetParam(func, interp_vgpr + 1);
+ interp_ij = lp_build_gather_values(gallivm, interp, 2);
+ interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
+ ctx.v2i32, "");
+ }
+
+ /* Use the absolute location of the input. */
+ prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
+
+ if (key->ps_prolog.states.color_two_side) {
+ face = LLVMGetParam(func, face_vgpr);
+ face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
+ }
+
+ interp_fs_input(&ctx,
+ key->ps_prolog.color_attr_index[i],
+ TGSI_SEMANTIC_COLOR, i,
+ key->ps_prolog.num_interp_inputs,
+ key->ps_prolog.colors_read, interp_ij,
+ prim_mask, face, color);
+
+ while (writemask) {
+ unsigned chan = u_bit_scan(&writemask);
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
+ num_params++, "");
+ }
+ }
+
+ /* Force per-sample interpolation. */
+ if (key->ps_prolog.states.force_persample_interp) {
+ unsigned i, base = key->ps_prolog.num_input_sgprs;
+ LLVMValueRef persp_sample[2], linear_sample[2];
+
+ /* Read PERSP_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ persp_sample[i] = LLVMGetParam(func, base + i);
+ /* Overwrite PERSP_CENTER. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_sample[i], base + 2 + i, "");
+ /* Overwrite PERSP_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_sample[i], base + 4 + i, "");
+ /* Read LINEAR_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ linear_sample[i] = LLVMGetParam(func, base + 6 + i);
+ /* Overwrite LINEAR_CENTER. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_sample[i], base + 8 + i, "");
+ /* Overwrite LINEAR_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_sample[i], base + 10 + i, "");
+ }
+
+ /* TODO: polygon stippling */
+
+ /* Compile. */
+ LLVMBuildRet(gallivm->builder, ret);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Fragment Shader Prolog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
* Compile the pixel shader epilog. This handles everything that must be
* emulated for pixel shader exports. (alpha-test, format conversions, etc)
*/
@@ -5430,7 +5627,103 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
struct pipe_debug_callback *debug)
{
struct tgsi_shader_info *info = &shader->selector->info;
+ union si_shader_part_key prolog_key;
union si_shader_part_key epilog_key;
+ unsigned i;
+
+ /* Get the prolog. */
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.ps_prolog.states = shader->key.ps.prolog;
+ prolog_key.ps_prolog.colors_read = info->colors_read;
+ prolog_key.ps_prolog.num_input_sgprs = shader->num_input_sgprs;
+ prolog_key.ps_prolog.num_input_vgprs = shader->num_input_vgprs;
+
+ if (info->colors_read) {
+ unsigned *color = shader->selector->color_attr_index;
+
+ if (shader->key.ps.prolog.color_two_side) {
+ /* BCOLORs are stored after the last input. */
+ prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
+ prolog_key.ps_prolog.face_vgpr_index = shader->face_vgpr_index;
+ shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ unsigned location = info->input_interpolate_loc[color[i]];
+
+ if (!(info->colors_read & (0xf << i*4)))
+ continue;
+
+ prolog_key.ps_prolog.color_attr_index[i] = color[i];
+
+ /* Force per-sample interpolation for the colors here. */
+ if (shader->key.ps.prolog.force_persample_interp)
+ location = TGSI_INTERPOLATE_LOC_SAMPLE;
+
+ switch (info->input_interpolate[color[i]]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ case TGSI_INTERPOLATE_COLOR:
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
+ }
+
+ /* The prolog is a no-op if these aren't set. */
+ if (prolog_key.ps_prolog.colors_read ||
+ prolog_key.ps_prolog.states.force_persample_interp ||
+ prolog_key.ps_prolog.states.poly_stipple) {
+ shader->prolog =
+ si_get_shader_part(sscreen, &sscreen->ps_prologs,
+ &prolog_key, tm, debug,
+ si_compile_ps_prolog);
+ if (!shader->prolog)
+ return false;
+ }
/* Get the epilog. */
memset(&epilog_key, 0, sizeof(epilog_key));
@@ -5447,6 +5740,35 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
if (!shader->epilog)
return false;
+ /* Set up the enable bits for per-sample shading if needed. */
+ if (shader->key.ps.prolog.force_persample_interp) {
+ if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
+ }
+ if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
+ }
+ }
+
+ /* POW_W_FLOAT requires that one of the perspective weights is enabled. */
+ if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
+ !(shader->config.spi_ps_input_ena & 0xf)) {
+ shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
+ assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr));
+ }
+
+ /* At least one pair of interpolation weights must be enabled. */
+ if (!(shader->config.spi_ps_input_ena & 0x7f)) {
+ shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
+ assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
+ }
+
/* The sample mask input is always enabled, because the API shader always
* passes it through to the epilog. Disable it here if it's unused.
*/
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 928cb2e18e9..196fa3e9086 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -169,7 +169,7 @@ struct radeon_shader_reloc;
#define SI_PARAM_SAMPLE_COVERAGE 20
#define SI_PARAM_POS_FIXED_PT 21
-#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
+#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
struct si_shader;
@@ -199,6 +199,7 @@ struct si_shader_selector {
unsigned max_gsvs_emit_size;
/* PS parameters. */
+ unsigned color_attr_index[2];
unsigned db_shader_control;
/* Set 0xf or 0x0 (4 bits) per each written output.
* ANDed with spi_shader_col_format.
@@ -282,6 +283,17 @@ union si_shader_part_key {
struct si_tcs_epilog_bits states;
} tcs_epilog;
struct {
+ struct si_ps_prolog_bits states;
+ unsigned num_input_sgprs:5;
+ unsigned num_input_vgprs:5;
+ /* Color interpolation and two-side color selection. */
+ unsigned colors_read:8; /* color input components read */
+ unsigned num_interp_inputs:5; /* BCOLOR is at this location */
+ unsigned face_vgpr_index:5;
+ char color_attr_index[2];
+ char color_interp_vgpr_index[2]; /* -1 == constant */
+ } ps_prolog;
+ struct {
struct si_ps_epilog_bits states;
unsigned colors_written:8;
unsigned writes_z:1;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 08f5d88e8dc..fbc377a6a4e 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -903,6 +903,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
for (i = 0; i < 8; i++)
if (sel->info.colors_written & (1 << i))
sel->colors_written_4bit |= 0xf << (4 * i);
+
+ for (i = 0; i < sel->info.num_inputs; i++) {
+ if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ int index = sel->info.input_semantic_index[i];
+ sel->color_attr_index[index] = i;
+ }
+ }
break;
}