summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_shader.c
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-03-14 19:35:28 +0100
committerMarek Olšák <[email protected]>2017-04-28 21:47:35 +0200
commit0d6d25475d7c3ddc04349afee8e11b67d5ecf726 (patch)
tree5265e6b3425a546f2a2d11a4c3aa969e2e55be54 /src/gallium/drivers/radeonsi/si_shader.c
parenta84a6feac926beca0a5a6fdabf83350a509cdcf8 (diff)
radeonsi/gfx9: set EXEC for non-mono merged shaders, add a barrier between them
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c43
1 files changed, 41 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d96d55917bd..126f7dc7e59 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -7079,9 +7079,22 @@ static void si_count_scratch_private_memory(struct si_shader_context *ctx)
}
}
+static void si_init_exec_from_input(struct si_shader_context *ctx,
+ unsigned param, unsigned bitoffset)
+{
+ LLVMValueRef args[] = {
+ LLVMGetParam(ctx->main_fn, param),
+ LLVMConstInt(ctx->i32, bitoffset, 0),
+ };
+ lp_build_intrinsic(ctx->gallivm.builder,
+ "llvm.amdgcn.init.exec.from.input",
+ ctx->voidt, args, 2, LP_FUNC_ATTR_CONVERGENT);
+}
+
static bool si_compile_tgsi_main(struct si_shader_context *ctx,
- struct si_shader *shader)
+ bool is_monolithic)
{
+ struct si_shader *shader = ctx->shader;
struct si_shader_selector *sel = shader->selector;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
@@ -7127,6 +7140,29 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
create_function(ctx);
preload_ring_buffers(ctx);
+ /* For GFX9 merged shaders:
+ * - Set EXEC. If the prolog is present, set EXEC there instead.
+ * - Add a barrier before the second shader.
+ *
+ * The same thing for monolithic shaders is done in
+ * si_build_wrapper_function.
+ */
+ if (ctx->screen->b.chip_class >= GFX9 && !is_monolithic) {
+ if (sel->info.num_instructions > 1 && /* not empty shader */
+ (shader->key.as_es || shader->key.as_ls) &&
+ (ctx->type == PIPE_SHADER_TESS_EVAL ||
+ (ctx->type == PIPE_SHADER_VERTEX &&
+ !sel->vs_needs_prolog))) {
+ si_init_exec_from_input(ctx,
+ ctx->param_merged_wave_info, 0);
+ } else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
+ ctx->type == PIPE_SHADER_GEOMETRY) {
+ si_init_exec_from_input(ctx,
+ ctx->param_merged_wave_info, 8);
+ si_llvm_emit_barrier(NULL, bld_base, NULL);
+ }
+ }
+
if (ctx->type == PIPE_SHADER_GEOMETRY) {
int i;
for (i = 0; i < 4; i++) {
@@ -7642,7 +7678,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
ctx.load_system_value = declare_system_value;
- if (!si_compile_tgsi_main(&ctx, shader)) {
+ if (!si_compile_tgsi_main(&ctx, is_monolithic)) {
si_llvm_dispose(&ctx);
return -1;
}
@@ -7992,6 +8028,9 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
num_params, last_sgpr);
func = ctx->main_fn;
+ if (key->vs_prolog.num_merged_next_stage_vgprs)
+ si_init_exec_from_input(ctx, 3, 0);
+
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/