summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c115
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h10
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c18
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c7
6 files changed, 149 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index bf3b442dbcc..bc633bb927f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -672,6 +672,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
sscreen->vs_prologs,
sscreen->vs_epilogs,
sscreen->tcs_epilogs,
+ sscreen->gs_prologs,
sscreen->ps_prologs,
sscreen->ps_epilogs
};
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e7617bc4975..8e6a94deade 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -96,6 +96,7 @@ struct si_screen {
struct si_shader_part *vs_prologs;
struct si_shader_part *vs_epilogs;
struct si_shader_part *tcs_epilogs;
+ struct si_shader_part *gs_prologs;
struct si_shader_part *ps_prologs;
struct si_shader_part *ps_epilogs;
@@ -319,6 +320,7 @@ struct si_context {
unsigned last_sc_line_stipple;
int last_vtx_reuse_depth;
int current_rast_prim; /* primitive type after TES, GS */
+ bool gs_tri_strip_adj_fix;
unsigned last_gsvs_itemsize;
/* Scratch buffer */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index fe1542088fb..28a8b1fe9e9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6747,6 +6747,78 @@ static void si_get_ps_epilog_key(struct si_shader *shader,
}
/**
+ * Build the GS prolog function. Rotate the input vertices for triangle strips
+ * with adjacency.
+ */
+static void si_build_gs_prolog_function(struct si_shader_context *ctx,
+ union si_shader_part_key *key)
+{
+ const unsigned num_sgprs = SI_GS_NUM_USER_SGPR + 2;
+ const unsigned num_vgprs = 8;
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef params[32];
+ LLVMTypeRef returns[32];
+ LLVMValueRef func, ret;
+
+ for (unsigned i = 0; i < num_sgprs; ++i) {
+ params[i] = ctx->i32;
+ returns[i] = ctx->i32;
+ }
+
+ for (unsigned i = 0; i < num_vgprs; ++i) {
+ params[num_sgprs + i] = ctx->i32;
+ returns[num_sgprs + i] = ctx->f32;
+ }
+
+ /* Create the function. */
+ si_create_function(ctx, "gs_prolog", returns, num_sgprs + num_vgprs,
+ params, num_sgprs + num_vgprs, num_sgprs - 1);
+ func = ctx->main_fn;
+
+ /* Copy inputs to outputs. This should be no-op, as the registers match,
+ * but it will prevent the compiler from overwriting them unintentionally.
+ */
+ ret = ctx->return_value;
+ for (unsigned i = 0; i < num_sgprs; i++) {
+ LLVMValueRef p = LLVMGetParam(func, i);
+ ret = LLVMBuildInsertValue(builder, ret, p, i, "");
+ }
+ for (unsigned i = 0; i < num_vgprs; i++) {
+ LLVMValueRef p = LLVMGetParam(func, num_sgprs + i);
+ p = LLVMBuildBitCast(builder, p, ctx->f32, "");
+ ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, "");
+ }
+
+ if (key->gs_prolog.states.tri_strip_adj_fix) {
+ /* Remap the input vertices for every other primitive. */
+ const unsigned vtx_params[6] = {
+ num_sgprs,
+ num_sgprs + 1,
+ num_sgprs + 3,
+ num_sgprs + 4,
+ num_sgprs + 5,
+ num_sgprs + 6
+ };
+ LLVMValueRef prim_id, rotate;
+
+ prim_id = LLVMGetParam(func, num_sgprs + 2);
+ rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, "");
+
+ for (unsigned i = 0; i < 6; ++i) {
+ LLVMValueRef base, rotated, actual;
+ base = LLVMGetParam(func, vtx_params[i]);
+ rotated = LLVMGetParam(func, vtx_params[(i + 4) % 6]);
+ actual = LLVMBuildSelect(builder, rotate, rotated, base, "");
+ actual = LLVMBuildBitCast(builder, actual, ctx->f32, "");
+ ret = LLVMBuildInsertValue(builder, ret, actual, vtx_params[i], "");
+ }
+ }
+
+ LLVMBuildRet(builder, ret);
+}
+
+/**
* Given a list of shader part functions, build a wrapper function that
* runs them in sequence to form a monolithic shader.
*/
@@ -7019,6 +7091,18 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
parts[1] = ctx.main_fn;
si_build_wrapper_function(&ctx, parts, 2, 0);
+ } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) {
+ LLVMValueRef parts[2];
+ union si_shader_part_key prolog_key;
+
+ parts[1] = ctx.main_fn;
+
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.gs_prolog.states = shader->key.gs.prolog;
+ si_build_gs_prolog_function(&ctx, &prolog_key);
+ parts[0] = ctx.main_fn;
+
+ si_build_wrapper_function(&ctx, parts, 2, 1);
} else if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) {
LLVMValueRef parts[3];
union si_shader_part_key prolog_key;
@@ -7207,6 +7291,9 @@ si_get_shader_part(struct si_screen *sscreen,
assert(!prolog);
shader.key.tcs.epilog = key->tcs_epilog.states;
break;
+ case PIPE_SHADER_GEOMETRY:
+ assert(prolog);
+ break;
case PIPE_SHADER_FRAGMENT:
if (prolog)
shader.key.ps.prolog = key->ps_prolog.states;
@@ -7531,6 +7618,30 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
}
/**
+ * Select and compile (or reuse) GS parts (prolog).
+ */
+static bool si_shader_select_gs_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ union si_shader_part_key prolog_key;
+
+ if (!shader->key.gs.prolog.tri_strip_adj_fix)
+ return true;
+
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.gs_prolog.states = shader->key.gs.prolog;
+
+ shader->prolog = si_get_shader_part(sscreen, &sscreen->gs_prologs,
+ PIPE_SHADER_GEOMETRY, true,
+ &prolog_key, tm, debug,
+ si_build_gs_prolog_function,
+ "Geometry Shader Prolog");
+ return shader->prolog != NULL;
+}
+
+/**
* Build the pixel shader prolog function. This handles:
* - two-side color selection and interpolation
* - overriding interpolation parameters for the API PS
@@ -8047,6 +8158,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
if (!si_shader_select_tes_parts(sscreen, tm, shader, debug))
return -1;
break;
+ case PIPE_SHADER_GEOMETRY:
+ if (!si_shader_select_gs_parts(sscreen, tm, shader, debug))
+ return -1;
+ break;
case PIPE_SHADER_FRAGMENT:
if (!si_shader_select_ps_parts(sscreen, tm, shader, debug))
return -1;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 91f9cbffd8e..d8ab2a41c9c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -325,6 +325,10 @@ struct si_tcs_epilog_bits {
uint64_t inputs_to_copy;
};
+struct si_gs_prolog_bits {
+ unsigned tri_strip_adj_fix:1;
+};
+
/* Common PS bits between the shader key and the prolog key. */
struct si_ps_prolog_bits {
unsigned color_two_side:1;
@@ -363,6 +367,9 @@ union si_shader_part_key {
struct si_tcs_epilog_bits states;
} tcs_epilog;
struct {
+ struct si_gs_prolog_bits states;
+ } gs_prolog;
+ struct {
struct si_ps_prolog_bits states;
unsigned num_input_sgprs:5;
unsigned num_input_vgprs:5;
@@ -401,6 +408,9 @@ union si_shader_key {
struct si_vs_epilog_bits epilog; /* same as VS */
unsigned as_es:1; /* export shader */
} tes; /* tessellation evaluation shader */
+ struct {
+ struct si_gs_prolog_bits prolog;
+ } gs;
};
struct si_shader_config {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index c0e2642ba3d..affc156a3d1 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -982,6 +982,24 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
sctx->do_update_shaders = true;
}
+ if (sctx->gs_shader.cso) {
+ /* Determine whether the GS triangle strip adjacency fix should
+ * be applied. Rotate every other triangle if
+ * - triangle strips with adjacency are fed to the GS and
+ * - primitive restart is disabled (the rotation doesn't help
+ * when the restart occurs after an odd number of triangles).
+ */
+ bool gs_tri_strip_adj_fix =
+ !sctx->tes_shader.cso &&
+ info->mode == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY &&
+ !info->primitive_restart;
+
+ if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) {
+ sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix;
+ sctx->do_update_shaders = true;
+ }
+ }
+
if (sctx->do_update_shaders && !si_update_shaders(sctx))
return;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4c647cbbf02..2a41bf1c20a 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -896,6 +896,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->tes.epilog.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
+ key->gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix;
break;
case PIPE_SHADER_FRAGMENT: {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
@@ -1155,8 +1156,7 @@ void si_init_shader_selector_async(void *job, int thread_index)
* If this fails, the driver will try to compile a monolithic shader
* on demand.
*/
- if (sel->type != PIPE_SHADER_GEOMETRY &&
- !sscreen->use_monolithic_shaders) {
+ if (!sscreen->use_monolithic_shaders) {
struct si_shader *shader = CALLOC_STRUCT(si_shader);
void *tgsi_binary;
@@ -1201,8 +1201,7 @@ void si_init_shader_selector_async(void *job, int thread_index)
}
/* Pre-compilation. */
- if (sel->type == PIPE_SHADER_GEOMETRY ||
- sscreen->b.debug_flags & DBG_PRECOMPILE) {
+ if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
union si_shader_key key;