aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_shader_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader_internal.h')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_internal.h431
1 files changed, 198 insertions, 233 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 47173142d44..2191604b706 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -25,8 +25,8 @@
#ifndef SI_SHADER_PRIVATE_H
#define SI_SHADER_PRIVATE_H
-#include "si_shader.h"
#include "ac_shader_abi.h"
+#include "si_shader.h"
struct pipe_debug_callback;
@@ -38,275 +38,245 @@ struct pipe_debug_callback;
#define PS_EPILOG_SAMPLEMASK_MIN_LOC 14
struct si_shader_output_values {
- LLVMValueRef values[4];
- unsigned semantic_name;
- unsigned semantic_index;
- ubyte vertex_stream[4];
+ LLVMValueRef values[4];
+ unsigned semantic_name;
+ unsigned semantic_index;
+ ubyte vertex_stream[4];
};
struct si_shader_context {
- struct ac_llvm_context ac;
- struct si_shader *shader;
- struct si_screen *screen;
-
- unsigned type; /* PIPE_SHADER_* specifies the type of shader. */
-
- /* For clamping the non-constant index in resource indexing: */
- unsigned num_const_buffers;
- unsigned num_shader_buffers;
- unsigned num_images;
- unsigned num_samplers;
-
- struct ac_shader_args args;
- struct ac_shader_abi abi;
-
- LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
-
- LLVMBasicBlockRef merged_wrap_if_entry_block;
- int merged_wrap_if_label;
-
- LLVMValueRef main_fn;
- LLVMTypeRef return_type;
-
- struct ac_arg const_and_shader_buffers;
- struct ac_arg samplers_and_images;
-
- /* For merged shaders, the per-stage descriptors for the stage other
- * than the one we're processing, used to pass them through from the
- * first stage to the second.
- */
- struct ac_arg other_const_and_shader_buffers;
- struct ac_arg other_samplers_and_images;
-
- struct ac_arg rw_buffers;
- struct ac_arg bindless_samplers_and_images;
- /* Common inputs for merged shaders. */
- struct ac_arg merged_wave_info;
- struct ac_arg merged_scratch_offset;
- struct ac_arg small_prim_cull_info;
- /* API VS */
- struct ac_arg vertex_buffers;
- struct ac_arg vb_descriptors[5];
- struct ac_arg rel_auto_id;
- struct ac_arg vs_prim_id;
- struct ac_arg vertex_index0;
- /* VS states and layout of LS outputs / TCS inputs at the end
- * [0] = clamp vertex color
- * [1] = indexed
- * [2:3] = NGG: output primitive type
- * [4:5] = NGG: provoking vertex index
- * [6] = NGG: streamout queries enabled
- * [7:10] = NGG: small prim filter precision = num_samples / quant_mode,
- * but in reality it's: 1/2^n, from 1/16 to 1/4096 = 1/2^4 to 1/2^12
- * Only the first 4 bits of the exponent are stored.
- * Set it like this: (fui(num_samples / quant_mode) >> 23)
- * Expand to FP32 like this: ((0x70 | value) << 23);
- * With 0x70 = 112, we get 2^(112 + value - 127) = 2^(value - 15)
- * = 1/2^(15 - value) in FP32
- * [11:23] = stride between patches in DW = num_inputs * num_vertices * 4
- * max = 32*32*4 + 32*4
- * [24:31] = stride between vertices in DW = num_inputs * 4
- * max = 32*4
- */
- struct ac_arg vs_state_bits;
- struct ac_arg vs_blit_inputs;
- struct ac_arg ngg_old_thread_id; /* generated by the NGG cull shader */
- /* HW VS */
- struct ac_arg streamout_config;
- struct ac_arg streamout_write_index;
- struct ac_arg streamout_offset[4];
-
- /* API TCS & TES */
- /* Layout of TCS outputs in the offchip buffer
- * # 6 bits
- * [0:5] = the number of patches per threadgroup, max = NUM_PATCHES (40)
- * # 6 bits
- * [6:11] = the number of output vertices per patch, max = 32
- * # 20 bits
- * [12:31] = the offset of per patch attributes in the buffer in bytes.
- * max = NUM_PATCHES*32*32*16
- */
- struct ac_arg tcs_offchip_layout;
-
- /* API TCS */
- /* Offsets where TCS outputs and TCS patch outputs live in LDS:
- * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
- * [16:31] = TCS output patch0 offset for per-patch / 16
- * max = (NUM_PATCHES + 1) * 32*32
- */
- struct ac_arg tcs_out_lds_offsets;
- /* Layout of TCS outputs / TES inputs:
- * [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
- * max = 32*32*4 + 32*4
- * [13:18] = gl_PatchVerticesIn, max = 32
- * [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers
- */
- struct ac_arg tcs_out_lds_layout;
- struct ac_arg tcs_offchip_offset;
- struct ac_arg tcs_factor_offset;
-
- /* API TES */
- struct ac_arg tes_offchip_addr;
- struct ac_arg tes_u;
- struct ac_arg tes_v;
- struct ac_arg tes_rel_patch_id;
- /* HW ES */
- struct ac_arg es2gs_offset;
- /* HW GS */
- /* On gfx10:
- * - bits 0..11: ordered_wave_id
- * - bits 12..20: number of vertices in group
- * - bits 22..30: number of primitives in group
- */
- struct ac_arg gs_tg_info;
- /* API GS */
- struct ac_arg gs2vs_offset;
- struct ac_arg gs_wave_id; /* GFX6 */
- struct ac_arg gs_vtx_offset[6]; /* in dwords (GFX6) */
- struct ac_arg gs_vtx01_offset; /* in dwords (GFX9) */
- struct ac_arg gs_vtx23_offset; /* in dwords (GFX9) */
- struct ac_arg gs_vtx45_offset; /* in dwords (GFX9) */
- /* PS */
- struct ac_arg pos_fixed_pt;
- /* CS */
- struct ac_arg block_size;
- struct ac_arg cs_user_data;
-
- struct ac_llvm_compiler *compiler;
-
- /* Preloaded descriptors. */
- LLVMValueRef esgs_ring;
- LLVMValueRef gsvs_ring[4];
- LLVMValueRef tess_offchip_ring;
-
- LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
- LLVMValueRef gs_next_vertex[4];
- LLVMValueRef gs_curprim_verts[4];
- LLVMValueRef gs_generated_prims[4];
- LLVMValueRef gs_ngg_emit;
- LLVMValueRef gs_ngg_scratch;
- LLVMValueRef postponed_kill;
- LLVMValueRef return_value;
+ struct ac_llvm_context ac;
+ struct si_shader *shader;
+ struct si_screen *screen;
+
+ unsigned type; /* PIPE_SHADER_* specifies the type of shader. */
+
+ /* For clamping the non-constant index in resource indexing: */
+ unsigned num_const_buffers;
+ unsigned num_shader_buffers;
+ unsigned num_images;
+ unsigned num_samplers;
+
+ struct ac_shader_args args;
+ struct ac_shader_abi abi;
+
+ LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
+
+ LLVMBasicBlockRef merged_wrap_if_entry_block;
+ int merged_wrap_if_label;
+
+ LLVMValueRef main_fn;
+ LLVMTypeRef return_type;
+
+ struct ac_arg const_and_shader_buffers;
+ struct ac_arg samplers_and_images;
+
+ /* For merged shaders, the per-stage descriptors for the stage other
+ * than the one we're processing, used to pass them through from the
+ * first stage to the second.
+ */
+ struct ac_arg other_const_and_shader_buffers;
+ struct ac_arg other_samplers_and_images;
+
+ struct ac_arg rw_buffers;
+ struct ac_arg bindless_samplers_and_images;
+ /* Common inputs for merged shaders. */
+ struct ac_arg merged_wave_info;
+ struct ac_arg merged_scratch_offset;
+ struct ac_arg small_prim_cull_info;
+ /* API VS */
+ struct ac_arg vertex_buffers;
+ struct ac_arg vb_descriptors[5];
+ struct ac_arg rel_auto_id;
+ struct ac_arg vs_prim_id;
+ struct ac_arg vertex_index0;
+ /* VS states and layout of LS outputs / TCS inputs at the end
+ * [0] = clamp vertex color
+ * [1] = indexed
+ * [2:3] = NGG: output primitive type
+ * [4:5] = NGG: provoking vertex index
+ * [6] = NGG: streamout queries enabled
+ * [7:10] = NGG: small prim filter precision = num_samples / quant_mode,
+ * but in reality it's: 1/2^n, from 1/16 to 1/4096 = 1/2^4 to 1/2^12
+ * Only the first 4 bits of the exponent are stored.
+ * Set it like this: (fui(num_samples / quant_mode) >> 23)
+ * Expand to FP32 like this: ((0x70 | value) << 23);
+ * With 0x70 = 112, we get 2^(112 + value - 127) = 2^(value - 15)
+ * = 1/2^(15 - value) in FP32
+ * [11:23] = stride between patches in DW = num_inputs * num_vertices * 4
+ * max = 32*32*4 + 32*4
+ * [24:31] = stride between vertices in DW = num_inputs * 4
+ * max = 32*4
+ */
+ struct ac_arg vs_state_bits;
+ struct ac_arg vs_blit_inputs;
+ struct ac_arg ngg_old_thread_id; /* generated by the NGG cull shader */
+ /* HW VS */
+ struct ac_arg streamout_config;
+ struct ac_arg streamout_write_index;
+ struct ac_arg streamout_offset[4];
+
+ /* API TCS & TES */
+ /* Layout of TCS outputs in the offchip buffer
+ * # 6 bits
+ * [0:5] = the number of patches per threadgroup, max = NUM_PATCHES (40)
+ * # 6 bits
+ * [6:11] = the number of output vertices per patch, max = 32
+ * # 20 bits
+ * [12:31] = the offset of per patch attributes in the buffer in bytes.
+ * max = NUM_PATCHES*32*32*16
+ */
+ struct ac_arg tcs_offchip_layout;
+
+ /* API TCS */
+ /* Offsets where TCS outputs and TCS patch outputs live in LDS:
+ * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
+ * [16:31] = TCS output patch0 offset for per-patch / 16
+ * max = (NUM_PATCHES + 1) * 32*32
+ */
+ struct ac_arg tcs_out_lds_offsets;
+ /* Layout of TCS outputs / TES inputs:
+ * [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
+ * max = 32*32*4 + 32*4
+ * [13:18] = gl_PatchVerticesIn, max = 32
+ * [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers
+ */
+ struct ac_arg tcs_out_lds_layout;
+ struct ac_arg tcs_offchip_offset;
+ struct ac_arg tcs_factor_offset;
+
+ /* API TES */
+ struct ac_arg tes_offchip_addr;
+ struct ac_arg tes_u;
+ struct ac_arg tes_v;
+ struct ac_arg tes_rel_patch_id;
+ /* HW ES */
+ struct ac_arg es2gs_offset;
+ /* HW GS */
+ /* On gfx10:
+ * - bits 0..11: ordered_wave_id
+ * - bits 12..20: number of vertices in group
+ * - bits 22..30: number of primitives in group
+ */
+ struct ac_arg gs_tg_info;
+ /* API GS */
+ struct ac_arg gs2vs_offset;
+ struct ac_arg gs_wave_id; /* GFX6 */
+ struct ac_arg gs_vtx_offset[6]; /* in dwords (GFX6) */
+ struct ac_arg gs_vtx01_offset; /* in dwords (GFX9) */
+ struct ac_arg gs_vtx23_offset; /* in dwords (GFX9) */
+ struct ac_arg gs_vtx45_offset; /* in dwords (GFX9) */
+ /* PS */
+ struct ac_arg pos_fixed_pt;
+ /* CS */
+ struct ac_arg block_size;
+ struct ac_arg cs_user_data;
+
+ struct ac_llvm_compiler *compiler;
+
+ /* Preloaded descriptors. */
+ LLVMValueRef esgs_ring;
+ LLVMValueRef gsvs_ring[4];
+ LLVMValueRef tess_offchip_ring;
+
+ LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
+ LLVMValueRef gs_next_vertex[4];
+ LLVMValueRef gs_curprim_verts[4];
+ LLVMValueRef gs_generated_prims[4];
+ LLVMValueRef gs_ngg_emit;
+ LLVMValueRef gs_ngg_scratch;
+ LLVMValueRef postponed_kill;
+ LLVMValueRef return_value;
};
-static inline struct si_shader_context *
-si_shader_context_from_abi(struct ac_shader_abi *abi)
+static inline struct si_shader_context *si_shader_context_from_abi(struct ac_shader_abi *abi)
{
- struct si_shader_context *ctx = NULL;
- return container_of(abi, ctx, abi);
+ struct si_shader_context *ctx = NULL;
+ return container_of(abi, ctx, abi);
}
bool si_is_multi_part_shader(struct si_shader *shader);
bool si_is_merged_shader(struct si_shader *shader);
-void si_add_arg_checked(struct ac_shader_args *args,
- enum ac_arg_regfile file,
- unsigned registers, enum ac_arg_type type,
- struct ac_arg *arg,
- unsigned idx);
+void si_add_arg_checked(struct ac_shader_args *args, enum ac_arg_regfile file, unsigned registers,
+ enum ac_arg_type type, struct ac_arg *arg, unsigned idx);
unsigned si_get_max_workgroup_size(const struct si_shader *shader);
bool si_need_ps_prolog(const union si_shader_part_key *key);
-void si_get_ps_prolog_key(struct si_shader *shader,
- union si_shader_part_key *key,
- bool separate_prolog);
-void si_get_ps_epilog_key(struct si_shader *shader,
- union si_shader_part_key *key);
+void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key,
+ bool separate_prolog);
+void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *key);
void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader);
void si_create_function(struct si_shader_context *ctx, bool ngg_cull_shader);
bool gfx10_ngg_export_prim_early(struct si_shader *shader);
void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx);
-void gfx10_ngg_build_export_prim(struct si_shader_context *ctx,
- LLVMValueRef user_edgeflags[3],
- LLVMValueRef prim_passthrough);
-void gfx10_emit_ngg_culling_epilogue_4x_wave32(struct ac_shader_abi *abi,
- unsigned max_outputs,
- LLVMValueRef *addrs);
-void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
- unsigned max_outputs,
- LLVMValueRef *addrs);
-void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx,
- unsigned stream,
- LLVMValueRef *addrs);
+void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef user_edgeflags[3],
+ LLVMValueRef prim_passthrough);
+void gfx10_emit_ngg_culling_epilogue_4x_wave32(struct ac_shader_abi *abi, unsigned max_outputs,
+ LLVMValueRef *addrs);
+void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs);
+void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LLVMValueRef *addrs);
void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx);
void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx);
void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
/* si_shader_llvm.c */
-bool si_compile_llvm(struct si_screen *sscreen,
- struct si_shader_binary *binary,
- struct ac_shader_config *conf,
- struct ac_llvm_compiler *compiler,
- struct ac_llvm_context *ac,
- struct pipe_debug_callback *debug,
- enum pipe_shader_type shader_type,
- const char *name,
- bool less_optimized);
-void si_llvm_context_init(struct si_shader_context *ctx,
- struct si_screen *sscreen,
- struct ac_llvm_compiler *compiler,
- unsigned wave_size);
-void si_llvm_create_func(struct si_shader_context *ctx, const char *name,
- LLVMTypeRef *return_types, unsigned num_return_elems,
- unsigned max_workgroup_size);
+bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
+ struct ac_shader_config *conf, struct ac_llvm_compiler *compiler,
+ struct ac_llvm_context *ac, struct pipe_debug_callback *debug,
+ enum pipe_shader_type shader_type, const char *name, bool less_optimized);
+void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen,
+ struct ac_llvm_compiler *compiler, unsigned wave_size);
+void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types,
+ unsigned num_return_elems, unsigned max_workgroup_size);
void si_llvm_optimize_module(struct si_shader_context *ctx);
void si_llvm_dispose(struct si_shader_context *ctx);
-LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx,
- LLVMValueRef resource, LLVMValueRef offset);
+LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx, LLVMValueRef resource,
+ LLVMValueRef offset);
void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret);
LLVMValueRef si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
- struct ac_arg param, unsigned return_index);
+ struct ac_arg param, unsigned return_index);
LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
- struct ac_arg param, unsigned return_index);
+ struct ac_arg param, unsigned return_index);
LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
- struct ac_arg param, unsigned return_index);
+ struct ac_arg param, unsigned return_index);
LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx);
-LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
- LLVMTypeRef type, LLVMValueRef val1,
- LLVMValueRef val2);
+LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx, LLVMTypeRef type,
+ LLVMValueRef val1, LLVMValueRef val2);
void si_llvm_emit_barrier(struct si_shader_context *ctx);
void si_llvm_declare_esgs_ring(struct si_shader_context *ctx);
void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param,
- unsigned bitoffset);
-LLVMValueRef si_unpack_param(struct si_shader_context *ctx,
- struct ac_arg param, unsigned rshift,
- unsigned bitwidth);
-LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
- unsigned swizzle);
+ unsigned bitoffset);
+LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift,
+ unsigned bitwidth);
+LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle);
LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi);
void si_llvm_declare_compute_memory(struct si_shader_context *ctx);
bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts,
- unsigned num_parts, unsigned main_part,
- unsigned next_shader_first_part);
+ unsigned num_parts, unsigned main_part,
+ unsigned next_shader_first_part);
/* si_shader_llvm_gs.c */
LLVMValueRef si_is_es_thread(struct si_shader_context *ctx);
LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx);
-void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
- LLVMValueRef *addrs);
+void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs);
void si_preload_esgs_ring(struct si_shader_context *ctx);
void si_preload_gs_rings(struct si_shader_context *ctx);
-void si_llvm_build_gs_prolog(struct si_shader_context *ctx,
- union si_shader_part_key *key);
+void si_llvm_build_gs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key);
void si_llvm_init_gs_callbacks(struct si_shader_context *ctx);
/* si_shader_llvm_tess.c */
void si_llvm_preload_tes_rings(struct si_shader_context *ctx);
-void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
- LLVMValueRef *addrs);
-void si_llvm_build_tcs_epilog(struct si_shader_context *ctx,
- union si_shader_part_key *key);
+void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs);
+void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key);
void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx);
void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader);
/* si_shader_llvm_ps.c */
LLVMValueRef si_get_sample_id(struct si_shader_context *ctx);
-void si_llvm_build_ps_prolog(struct si_shader_context *ctx,
- union si_shader_part_key *key);
-void si_llvm_build_ps_epilog(struct si_shader_context *ctx,
- union si_shader_part_key *key);
-void si_llvm_build_monolithic_ps(struct si_shader_context *ctx,
- struct si_shader *shader);
+void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part_key *key);
+void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part_key *key);
+void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader);
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx);
/* si_shader_llvm_resources.c */
@@ -314,21 +284,16 @@ void si_llvm_init_resource_callbacks(struct si_shader_context *ctx);
/* si_shader_llvm_vs.c */
void si_llvm_load_vs_inputs(struct si_shader_context *ctx, struct nir_shader *nir);
-void si_llvm_streamout_store_output(struct si_shader_context *ctx,
- LLVMValueRef const *so_buffers,
- LLVMValueRef const *so_write_offsets,
- struct pipe_stream_output *stream_out,
- struct si_shader_output_values *shader_out);
-void si_llvm_emit_streamout(struct si_shader_context *ctx,
- struct si_shader_output_values *outputs,
- unsigned noutput, unsigned stream);
+void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef const *so_buffers,
+ LLVMValueRef const *so_write_offsets,
+ struct pipe_stream_output *stream_out,
+ struct si_shader_output_values *shader_out);
+void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs,
+ unsigned noutput, unsigned stream);
void si_llvm_build_vs_exports(struct si_shader_context *ctx,
- struct si_shader_output_values *outputs,
- unsigned noutput);
-void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
- LLVMValueRef *addrs);
-void si_llvm_build_vs_prolog(struct si_shader_context *ctx,
- union si_shader_part_key *key);
+ struct si_shader_output_values *outputs, unsigned noutput);
+void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs);
+void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key);
void si_llvm_init_vs_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader);
#endif