diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 115 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 10 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 7 |
6 files changed, 149 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index bf3b442dbcc..bc633bb927f 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -672,6 +672,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen) sscreen->vs_prologs, sscreen->vs_epilogs, sscreen->tcs_epilogs, + sscreen->gs_prologs, sscreen->ps_prologs, sscreen->ps_epilogs }; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e7617bc4975..8e6a94deade 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -96,6 +96,7 @@ struct si_screen { struct si_shader_part *vs_prologs; struct si_shader_part *vs_epilogs; struct si_shader_part *tcs_epilogs; + struct si_shader_part *gs_prologs; struct si_shader_part *ps_prologs; struct si_shader_part *ps_epilogs; @@ -319,6 +320,7 @@ struct si_context { unsigned last_sc_line_stipple; int last_vtx_reuse_depth; int current_rast_prim; /* primitive type after TES, GS */ + bool gs_tri_strip_adj_fix; unsigned last_gsvs_itemsize; /* Scratch buffer */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fe1542088fb..28a8b1fe9e9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6747,6 +6747,78 @@ static void si_get_ps_epilog_key(struct si_shader *shader, } /** + * Build the GS prolog function. Rotate the input vertices for triangle strips + * with adjacency. + */ +static void si_build_gs_prolog_function(struct si_shader_context *ctx, + union si_shader_part_key *key) +{ + const unsigned num_sgprs = SI_GS_NUM_USER_SGPR + 2; + const unsigned num_vgprs = 8; + struct gallivm_state *gallivm = &ctx->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef params[32]; + LLVMTypeRef returns[32]; + LLVMValueRef func, ret; + + for (unsigned i = 0; i < num_sgprs; ++i) { + params[i] = ctx->i32; + returns[i] = ctx->i32; + } + + for (unsigned i = 0; i < num_vgprs; ++i) { + params[num_sgprs + i] = ctx->i32; + returns[num_sgprs + i] = ctx->f32; + } + + /* Create the function. */ + si_create_function(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, + params, num_sgprs + num_vgprs, num_sgprs - 1); + func = ctx->main_fn; + + /* Copy inputs to outputs. This should be no-op, as the registers match, + * but it will prevent the compiler from overwriting them unintentionally. + */ + ret = ctx->return_value; + for (unsigned i = 0; i < num_sgprs; i++) { + LLVMValueRef p = LLVMGetParam(func, i); + ret = LLVMBuildInsertValue(builder, ret, p, i, ""); + } + for (unsigned i = 0; i < num_vgprs; i++) { + LLVMValueRef p = LLVMGetParam(func, num_sgprs + i); + p = LLVMBuildBitCast(builder, p, ctx->f32, ""); + ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, ""); + } + + if (key->gs_prolog.states.tri_strip_adj_fix) { + /* Remap the input vertices for every other primitive. */ + const unsigned vtx_params[6] = { + num_sgprs, + num_sgprs + 1, + num_sgprs + 3, + num_sgprs + 4, + num_sgprs + 5, + num_sgprs + 6 + }; + LLVMValueRef prim_id, rotate; + + prim_id = LLVMGetParam(func, num_sgprs + 2); + rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, ""); + + for (unsigned i = 0; i < 6; ++i) { + LLVMValueRef base, rotated, actual; + base = LLVMGetParam(func, vtx_params[i]); + rotated = LLVMGetParam(func, vtx_params[(i + 4) % 6]); + actual = LLVMBuildSelect(builder, rotate, rotated, base, ""); + actual = LLVMBuildBitCast(builder, actual, ctx->f32, ""); + ret = LLVMBuildInsertValue(builder, ret, actual, vtx_params[i], ""); + } + } + + LLVMBuildRet(builder, ret); +} + +/** * Given a list of shader part functions, build a wrapper function that * runs them in sequence to form a monolithic shader. */ @@ -7019,6 +7091,18 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, parts[1] = ctx.main_fn; si_build_wrapper_function(&ctx, parts, 2, 0); + } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) { + LLVMValueRef parts[2]; + union si_shader_part_key prolog_key; + + parts[1] = ctx.main_fn; + + memset(&prolog_key, 0, sizeof(prolog_key)); + prolog_key.gs_prolog.states = shader->key.gs.prolog; + si_build_gs_prolog_function(&ctx, &prolog_key); + parts[0] = ctx.main_fn; + + si_build_wrapper_function(&ctx, parts, 2, 1); } else if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) { LLVMValueRef parts[3]; union si_shader_part_key prolog_key; @@ -7207,6 +7291,9 @@ si_get_shader_part(struct si_screen *sscreen, assert(!prolog); shader.key.tcs.epilog = key->tcs_epilog.states; break; + case PIPE_SHADER_GEOMETRY: + assert(prolog); + break; case PIPE_SHADER_FRAGMENT: if (prolog) shader.key.ps.prolog = key->ps_prolog.states; @@ -7531,6 +7618,30 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, } /** + * Select and compile (or reuse) GS parts (prolog). + */ +static bool si_shader_select_gs_parts(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + union si_shader_part_key prolog_key; + + if (!shader->key.gs.prolog.tri_strip_adj_fix) + return true; + + memset(&prolog_key, 0, sizeof(prolog_key)); + prolog_key.gs_prolog.states = shader->key.gs.prolog; + + shader->prolog = si_get_shader_part(sscreen, &sscreen->gs_prologs, + PIPE_SHADER_GEOMETRY, true, + &prolog_key, tm, debug, + si_build_gs_prolog_function, + "Geometry Shader Prolog"); + return shader->prolog != NULL; +} + +/** * Build the pixel shader prolog function. This handles: * - two-side color selection and interpolation * - overriding interpolation parameters for the API PS @@ -8047,6 +8158,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, if (!si_shader_select_tes_parts(sscreen, tm, shader, debug)) return -1; break; + case PIPE_SHADER_GEOMETRY: + if (!si_shader_select_gs_parts(sscreen, tm, shader, debug)) + return -1; + break; case PIPE_SHADER_FRAGMENT: if (!si_shader_select_ps_parts(sscreen, tm, shader, debug)) return -1; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 91f9cbffd8e..d8ab2a41c9c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -325,6 +325,10 @@ struct si_tcs_epilog_bits { uint64_t inputs_to_copy; }; +struct si_gs_prolog_bits { + unsigned tri_strip_adj_fix:1; +}; + /* Common PS bits between the shader key and the prolog key. */ struct si_ps_prolog_bits { unsigned color_two_side:1; @@ -363,6 +367,9 @@ union si_shader_part_key { struct si_tcs_epilog_bits states; } tcs_epilog; struct { + struct si_gs_prolog_bits states; + } gs_prolog; + struct { struct si_ps_prolog_bits states; unsigned num_input_sgprs:5; unsigned num_input_vgprs:5; @@ -401,6 +408,9 @@ union si_shader_key { struct si_vs_epilog_bits epilog; /* same as VS */ unsigned as_es:1; /* export shader */ } tes; /* tessellation evaluation shader */ + struct { + struct si_gs_prolog_bits prolog; + } gs; }; struct si_shader_config { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c0e2642ba3d..affc156a3d1 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -982,6 +982,24 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) sctx->do_update_shaders = true; } + if (sctx->gs_shader.cso) { + /* Determine whether the GS triangle strip adjacency fix should + * be applied. Rotate every other triangle if + * - triangle strips with adjacency are fed to the GS and + * - primitive restart is disabled (the rotation doesn't help + * when the restart occurs after an odd number of triangles). + */ + bool gs_tri_strip_adj_fix = + !sctx->tes_shader.cso && + info->mode == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY && + !info->primitive_restart; + + if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) { + sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix; + sctx->do_update_shaders = true; + } + } + if (sctx->do_update_shaders && !si_update_shaders(sctx)) return; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 4c647cbbf02..2a41bf1c20a 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -896,6 +896,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key->tes.epilog.export_prim_id = 1; break; case PIPE_SHADER_GEOMETRY: + key->gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix; break; case PIPE_SHADER_FRAGMENT: { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; @@ -1155,8 +1156,7 @@ void si_init_shader_selector_async(void *job, int thread_index) * If this fails, the driver will try to compile a monolithic shader * on demand. */ - if (sel->type != PIPE_SHADER_GEOMETRY && - !sscreen->use_monolithic_shaders) { + if (!sscreen->use_monolithic_shaders) { struct si_shader *shader = CALLOC_STRUCT(si_shader); void *tgsi_binary; @@ -1201,8 +1201,7 @@ void si_init_shader_selector_async(void *job, int thread_index) } /* Pre-compilation. */ - if (sel->type == PIPE_SHADER_GEOMETRY || - sscreen->b.debug_flags & DBG_PRECOMPILE) { + if (sscreen->b.debug_flags & DBG_PRECOMPILE) { struct si_shader_ctx_state state = {sel}; union si_shader_key key; |