diff options
author | Samuel Pitoiset <[email protected]> | 2018-03-09 16:58:10 +0100 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2018-03-13 14:05:06 +0100 |
commit | b2653007b980e6fc9e226687003784c5b3fe5bcb (patch) | |
tree | 08541b86cc22d084853f69c8749aadae75483866 /src/amd/common | |
parent | 8e15824b9d73660db2ed41233d38c57cc43c9842 (diff) |
ac/nir: move all RADV related code to radv_nir_to_llvm.c
Now the "ac/nir" prefix will really be the shared code between
RadeonSI and RADV, that might avoid confusions in the future.
Signed-off-by: Samuel Pitoiset <[email protected]>
Acked-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/common')
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 3403 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.h | 17 |
2 files changed, 0 insertions, 3420 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index ceda8c540d6..3a9cb7b9209 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -35,16 +35,6 @@ #include "ac_shader_util.h" #include "ac_exp_param.h" -enum radeon_llvm_calling_convention { - RADEON_LLVM_AMDGPU_VS = 87, - RADEON_LLVM_AMDGPU_GS = 88, - RADEON_LLVM_AMDGPU_PS = 89, - RADEON_LLVM_AMDGPU_CS = 90, - RADEON_LLVM_AMDGPU_HS = 93, -}; - -#define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) - struct ac_nir_context { struct ac_llvm_context ac; struct ac_shader_abi *abi; @@ -63,1048 +53,12 @@ struct ac_nir_context { LLVMValueRef *locals; }; -struct radv_shader_context { - struct ac_llvm_context ac; - const struct ac_nir_compiler_options *options; - struct ac_shader_variant_info *shader_info; - struct ac_shader_abi abi; - - unsigned max_workgroup_size; - LLVMContextRef context; - LLVMValueRef main_function; - - LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; - LLVMValueRef ring_offsets; - - LLVMValueRef vertex_buffers; - LLVMValueRef rel_auto_id; - LLVMValueRef vs_prim_id; - LLVMValueRef ls_out_layout; - LLVMValueRef es2gs_offset; - - LLVMValueRef tcs_offchip_layout; - LLVMValueRef tcs_out_offsets; - LLVMValueRef tcs_out_layout; - LLVMValueRef tcs_in_layout; - LLVMValueRef oc_lds; - LLVMValueRef merged_wave_info; - LLVMValueRef tess_factor_offset; - LLVMValueRef tes_rel_patch_id; - LLVMValueRef tes_u; - LLVMValueRef tes_v; - - LLVMValueRef gsvs_ring_stride; - LLVMValueRef gsvs_num_entries; - LLVMValueRef gs2vs_offset; - LLVMValueRef gs_wave_id; - LLVMValueRef gs_vtx_offset[6]; - - LLVMValueRef esgs_ring; - LLVMValueRef gsvs_ring; - LLVMValueRef hs_ring_tess_offchip; - LLVMValueRef hs_ring_tess_factor; - - LLVMValueRef sample_pos_offset; - LLVMValueRef persp_sample, persp_center, persp_centroid; - LLVMValueRef linear_sample, linear_center, linear_centroid; - - gl_shader_stage stage; - - LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; - - uint64_t input_mask; - uint64_t output_mask; - uint8_t num_output_clips; - uint8_t num_output_culls; - - bool is_gs_copy_shader; - LLVMValueRef gs_next_vertex; - unsigned gs_max_out_vertices; - - unsigned tes_primitive_mode; - uint64_t tess_outputs_written; - uint64_t tess_patch_outputs_written; - - uint32_t tcs_patch_outputs_read; - uint64_t tcs_outputs_read; - uint32_t tcs_vertices_per_patch; -}; - -static inline struct radv_shader_context * -radv_shader_context_from_abi(struct ac_shader_abi *abi) -{ - struct radv_shader_context *ctx = NULL; - return container_of(abi, ctx, abi); -} - static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, const nir_deref_var *deref, enum ac_descriptor_type desc_type, const nir_tex_instr *instr, bool image, bool write); -static unsigned shader_io_get_unique_index(gl_varying_slot slot) -{ - /* handle patch indices separate */ - if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) - return 0; - if (slot == VARYING_SLOT_TESS_LEVEL_INNER) - return 1; - if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) - return 2 + (slot - VARYING_SLOT_PATCH0); - - if (slot == VARYING_SLOT_POS) - return 0; - if (slot == VARYING_SLOT_PSIZ) - return 1; - if (slot == VARYING_SLOT_CLIP_DIST0) - return 2; - /* 3 is reserved for clip dist as well */ - if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) - return 4 + (slot - VARYING_SLOT_VAR0); - unreachable("illegal slot in get unique index\n"); -} - -static void set_llvm_calling_convention(LLVMValueRef func, - gl_shader_stage stage) -{ - enum radeon_llvm_calling_convention calling_conv; - - switch (stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_TESS_EVAL: - calling_conv = RADEON_LLVM_AMDGPU_VS; - break; - case MESA_SHADER_GEOMETRY: - calling_conv = RADEON_LLVM_AMDGPU_GS; - break; - case MESA_SHADER_TESS_CTRL: - calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS; - break; - case MESA_SHADER_FRAGMENT: - calling_conv = RADEON_LLVM_AMDGPU_PS; - break; - case MESA_SHADER_COMPUTE: - calling_conv = RADEON_LLVM_AMDGPU_CS; - break; - default: - unreachable("Unhandle shader type"); - } - - LLVMSetFunctionCallConv(func, calling_conv); -} - -#define MAX_ARGS 23 -struct arg_info { - LLVMTypeRef types[MAX_ARGS]; - LLVMValueRef *assign[MAX_ARGS]; - unsigned array_params_mask; - uint8_t count; - uint8_t sgpr_count; - uint8_t num_sgprs_used; - uint8_t num_vgprs_used; -}; - -enum ac_arg_regfile { - ARG_SGPR, - ARG_VGPR, -}; - -static void -add_arg(struct arg_info *info, enum ac_arg_regfile regfile, LLVMTypeRef type, - LLVMValueRef *param_ptr) -{ - assert(info->count < MAX_ARGS); - - info->assign[info->count] = param_ptr; - info->types[info->count] = type; - info->count++; - - if (regfile == ARG_SGPR) { - info->num_sgprs_used += ac_get_type_size(type) / 4; - info->sgpr_count++; - } else { - assert(regfile == ARG_VGPR); - info->num_vgprs_used += ac_get_type_size(type) / 4; - } -} - -static inline void -add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr) -{ - info->array_params_mask |= (1 << info->count); - add_arg(info, ARG_SGPR, type, param_ptr); -} - -static void assign_arguments(LLVMValueRef main_function, - struct arg_info *info) -{ - unsigned i; - for (i = 0; i < info->count; i++) { - if (info->assign[i]) - *info->assign[i] = LLVMGetParam(main_function, i); - } -} - -static LLVMValueRef -create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, - LLVMBuilderRef builder, LLVMTypeRef *return_types, - unsigned num_return_elems, - struct arg_info *args, - unsigned max_workgroup_size, - bool unsafe_math) -{ - LLVMTypeRef main_function_type, ret_type; - LLVMBasicBlockRef main_function_body; - - if (num_return_elems) - ret_type = LLVMStructTypeInContext(ctx, return_types, - num_return_elems, true); - else - ret_type = LLVMVoidTypeInContext(ctx); - - /* Setup the function */ - main_function_type = - LLVMFunctionType(ret_type, args->types, args->count, 0); - LLVMValueRef main_function = - LLVMAddFunction(module, "main", main_function_type); - main_function_body = - LLVMAppendBasicBlockInContext(ctx, main_function, "main_body"); - LLVMPositionBuilderAtEnd(builder, main_function_body); - - LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); - for (unsigned i = 0; i < args->sgpr_count; ++i) { - ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); - - if (args->array_params_mask & (1 << i)) { - LLVMValueRef P = LLVMGetParam(main_function, i); - ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); - ac_add_attr_dereferenceable(P, UINT64_MAX); - } - } - - if (max_workgroup_size) { - ac_llvm_add_target_dep_function_attr(main_function, - "amdgpu-max-work-group-size", - max_workgroup_size); - } - if (unsafe_math) { - /* These were copied from some LLVM test. */ - LLVMAddTargetDependentFunctionAttr(main_function, - "less-precise-fpmad", - "true"); - LLVMAddTargetDependentFunctionAttr(main_function, - "no-infs-fp-math", - "true"); - LLVMAddTargetDependentFunctionAttr(main_function, - "no-nans-fp-math", - "true"); - LLVMAddTargetDependentFunctionAttr(main_function, - "unsafe-fp-math", - "true"); - LLVMAddTargetDependentFunctionAttr(main_function, - "no-signed-zeros-fp-math", - "true"); - } - return main_function; -} - -static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx) -{ - switch (ctx->stage) { - case MESA_SHADER_TESS_CTRL: - return ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); - case MESA_SHADER_TESS_EVAL: - return ctx->tes_rel_patch_id; - break; - default: - unreachable("Illegal stage"); - } -} - -/* Tessellation shaders pass outputs to the next shader using LDS. - * - * LS outputs = TCS inputs - * TCS outputs = TES inputs - * - * The LDS layout is: - * - TCS inputs for patch 0 - * - TCS inputs for patch 1 - * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) - * - ... - * - TCS outputs for patch 0 = get_tcs_out_patch0_offset - * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset - * - TCS outputs for patch 1 - * - Per-patch TCS outputs for patch 1 - * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) - * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) - * - ... - * - * All three shaders VS(LS), TCS, TES share the same LDS space. - */ -static LLVMValueRef -get_tcs_in_patch_stride(struct radv_shader_context *ctx) -{ - if (ctx->stage == MESA_SHADER_VERTEX) - return ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13); - else if (ctx->stage == MESA_SHADER_TESS_CTRL) - return ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13); - else { - assert(0); - return NULL; - } -} - -static LLVMValueRef -get_tcs_out_patch_stride(struct radv_shader_context *ctx) -{ - return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13); -} - -static LLVMValueRef -get_tcs_out_vertex_stride(struct radv_shader_context *ctx) -{ - return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8); -} - -static LLVMValueRef -get_tcs_out_patch0_offset(struct radv_shader_context *ctx) -{ - return LLVMBuildMul(ctx->ac.builder, - ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16), - LLVMConstInt(ctx->ac.i32, 4, false), ""); -} - -static LLVMValueRef -get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx) -{ - return LLVMBuildMul(ctx->ac.builder, - ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16), - LLVMConstInt(ctx->ac.i32, 4, false), ""); -} - -static LLVMValueRef -get_tcs_in_current_patch_offset(struct radv_shader_context *ctx) -{ - LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx); - LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); - - return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, ""); -} - -static LLVMValueRef -get_tcs_out_current_patch_offset(struct radv_shader_context *ctx) -{ - LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx); - LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); - LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); - - return LLVMBuildAdd(ctx->ac.builder, patch0_offset, - LLVMBuildMul(ctx->ac.builder, patch_stride, - rel_patch_id, ""), - ""); -} - -static LLVMValueRef -get_tcs_out_current_patch_data_offset(struct radv_shader_context *ctx) -{ - LLVMValueRef patch0_patch_data_offset = - get_tcs_out_patch0_patch_data_offset(ctx); - LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); - LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); - - return LLVMBuildAdd(ctx->ac.builder, patch0_patch_data_offset, - LLVMBuildMul(ctx->ac.builder, patch_stride, - rel_patch_id, ""), - ""); -} - -static void -set_loc(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs, - uint32_t indirect_offset) -{ - ud_info->sgpr_idx = *sgpr_idx; - ud_info->num_sgprs = num_sgprs; - ud_info->indirect = indirect_offset > 0; - ud_info->indirect_offset = indirect_offset; - *sgpr_idx += num_sgprs; -} - -static void -set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, - uint8_t num_sgprs) -{ - struct ac_userdata_info *ud_info = - &ctx->shader_info->user_sgprs_locs.shader_data[idx]; - assert(ud_info); - - set_loc(ud_info, sgpr_idx, num_sgprs, 0); -} - -static void -set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, - uint32_t indirect_offset) -{ - struct ac_userdata_info *ud_info = - &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx]; - assert(ud_info); - - set_loc(ud_info, sgpr_idx, 2, indirect_offset); -} - -struct user_sgpr_info { - bool need_ring_offsets; - uint8_t sgpr_count; - bool indirect_all_descriptor_sets; -}; - -static bool needs_view_index_sgpr(struct radv_shader_context *ctx, - gl_shader_stage stage) -{ - switch (stage) { - case MESA_SHADER_VERTEX: - if (ctx->shader_info->info.needs_multiview_view_index || - (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index)) - return true; - break; - case MESA_SHADER_TESS_EVAL: - if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index)) - return true; - break; - case MESA_SHADER_GEOMETRY: - case MESA_SHADER_TESS_CTRL: - if (ctx->shader_info->info.needs_multiview_view_index) - return true; - break; - default: - break; - } - return false; -} - -static uint8_t -count_vs_user_sgprs(struct radv_shader_context *ctx) -{ - uint8_t count = 0; - - count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0; - count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; - - return count; -} - -static void allocate_user_sgprs(struct radv_shader_context *ctx, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage, - bool needs_view_index, - struct user_sgpr_info *user_sgpr_info) -{ - memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); - - /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ - if (stage == MESA_SHADER_GEOMETRY || - stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_CTRL || - stage == MESA_SHADER_TESS_EVAL || - ctx->is_gs_copy_shader) - user_sgpr_info->need_ring_offsets = true; - - if (stage == MESA_SHADER_FRAGMENT && - ctx->shader_info->info.ps.needs_sample_positions) - user_sgpr_info->need_ring_offsets = true; - - /* 2 user sgprs will nearly always be allocated for scratch/rings */ - if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) { - user_sgpr_info->sgpr_count += 2; - } - - switch (stage) { - case MESA_SHADER_COMPUTE: - if (ctx->shader_info->info.cs.uses_grid_size) - user_sgpr_info->sgpr_count += 3; - break; - case MESA_SHADER_FRAGMENT: - user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; - break; - case MESA_SHADER_VERTEX: - if (!ctx->is_gs_copy_shader) - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); - if (ctx->options->key.vs.as_ls) - user_sgpr_info->sgpr_count++; - break; - case MESA_SHADER_TESS_CTRL: - if (has_previous_stage) { - if (previous_stage == MESA_SHADER_VERTEX) - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); - user_sgpr_info->sgpr_count++; - } - user_sgpr_info->sgpr_count += 4; - break; - case MESA_SHADER_TESS_EVAL: - user_sgpr_info->sgpr_count += 1; - break; - case MESA_SHADER_GEOMETRY: - if (has_previous_stage) { - if (previous_stage == MESA_SHADER_VERTEX) { - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); - } else { - user_sgpr_info->sgpr_count++; - } - } - user_sgpr_info->sgpr_count += 2; - break; - default: - break; - } - - if (needs_view_index) - user_sgpr_info->sgpr_count++; - - if (ctx->shader_info->info.loads_push_constants) - user_sgpr_info->sgpr_count += 2; - - uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; - uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; - - if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { - user_sgpr_info->sgpr_count += 2; - user_sgpr_info->indirect_all_descriptor_sets = true; - } else { - user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; - } -} - -static void -declare_global_input_sgprs(struct radv_shader_context *ctx, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage, - const struct user_sgpr_info *user_sgpr_info, - struct arg_info *args, - LLVMValueRef *desc_sets) -{ - LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8); - unsigned num_sets = ctx->options->layout ? - ctx->options->layout->num_sets : 0; - unsigned stage_mask = 1 << stage; - - if (has_previous_stage) - stage_mask |= 1 << previous_stage; - - /* 1 for each descriptor set */ - if (!user_sgpr_info->indirect_all_descriptor_sets) { - for (unsigned i = 0; i < num_sets; ++i) { - if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && - ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - add_array_arg(args, type, - &ctx->descriptor_sets[i]); - } - } - } else { - add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets); - } - - if (ctx->shader_info->info.loads_push_constants) { - /* 1 for push constants and dynamic descriptors */ - add_array_arg(args, type, &ctx->abi.push_constants); - } -} - -static void -declare_vs_specific_input_sgprs(struct radv_shader_context *ctx, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage, - struct arg_info *args) -{ - if (!ctx->is_gs_copy_shader && - (stage == MESA_SHADER_VERTEX || - (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { - if (ctx->shader_info->info.vs.has_vertex_buffers) { - add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), - &ctx->vertex_buffers); - } - add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); - add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance); - if (ctx->shader_info->info.vs.needs_draw_id) { - add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id); - } - } -} - -static void -declare_vs_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args) -{ - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id); - if (!ctx->is_gs_copy_shader) { - if (ctx->options->key.vs.as_ls) { - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); - } else { - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id); - } - add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */ - } -} - -static void -declare_tes_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args) -{ - add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u); - add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id); -} - -static void -set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, - bool has_previous_stage, gl_shader_stage previous_stage, - const struct user_sgpr_info *user_sgpr_info, - LLVMValueRef desc_sets, uint8_t *user_sgpr_idx) -{ - unsigned num_sets = ctx->options->layout ? - ctx->options->layout->num_sets : 0; - unsigned stage_mask = 1 << stage; - - if (has_previous_stage) - stage_mask |= 1 << previous_stage; - - if (!user_sgpr_info->indirect_all_descriptor_sets) { - for (unsigned i = 0; i < num_sets; ++i) { - if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && - ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - set_loc_desc(ctx, i, user_sgpr_idx, 0); - } else - ctx->descriptor_sets[i] = NULL; - } - } else { - set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, - user_sgpr_idx, 2); - - for (unsigned i = 0; i < num_sets; ++i) { - if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && - ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - set_loc_desc(ctx, i, user_sgpr_idx, i * 8); - ctx->descriptor_sets[i] = - ac_build_load_to_sgpr(&ctx->ac, - desc_sets, - LLVMConstInt(ctx->ac.i32, i, false)); - - } else - ctx->descriptor_sets[i] = NULL; - } - ctx->shader_info->need_indirect_descriptor_sets = true; - } - - if (ctx->shader_info->info.loads_push_constants) { - set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); - } -} - -static void -set_vs_specific_input_locs(struct radv_shader_context *ctx, - gl_shader_stage stage, bool has_previous_stage, - gl_shader_stage previous_stage, - uint8_t *user_sgpr_idx) -{ - if (!ctx->is_gs_copy_shader && - (stage == MESA_SHADER_VERTEX || - (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { - if (ctx->shader_info->info.vs.has_vertex_buffers) { - set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, - user_sgpr_idx, 2); - } - - unsigned vs_num = 2; - if (ctx->shader_info->info.vs.needs_draw_id) - vs_num++; - - set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, - user_sgpr_idx, vs_num); - } -} - -static void create_function(struct radv_shader_context *ctx, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage) -{ - uint8_t user_sgpr_idx; - struct user_sgpr_info user_sgpr_info; - struct arg_info args = {}; - LLVMValueRef desc_sets; - bool needs_view_index = needs_view_index_sgpr(ctx, stage); - allocate_user_sgprs(ctx, stage, has_previous_stage, - previous_stage, needs_view_index, &user_sgpr_info); - - if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) { - add_arg(&args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), - &ctx->ring_offsets); - } - - switch (stage) { - case MESA_SHADER_COMPUTE: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); - - if (ctx->shader_info->info.cs.uses_grid_size) { - add_arg(&args, ARG_SGPR, ctx->ac.v3i32, - &ctx->abi.num_work_groups); - } - - for (int i = 0; i < 3; i++) { - ctx->abi.workgroup_ids[i] = NULL; - if (ctx->shader_info->info.cs.uses_block_id[i]) { - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.workgroup_ids[i]); - } - } - - if (ctx->shader_info->info.cs.uses_local_invocation_idx) - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.tg_size); - add_arg(&args, ARG_VGPR, ctx->ac.v3i32, - &ctx->abi.local_invocation_ids); - break; - case MESA_SHADER_VERTEX: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); - declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &args); - - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - if (ctx->options->key.vs.as_es) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->es2gs_offset); - else if (ctx->options->key.vs.as_ls) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->ls_out_layout); - - declare_vs_input_vgprs(ctx, &args); - break; - case MESA_SHADER_TESS_CTRL: - if (has_previous_stage) { - // First 6 system regs - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->merged_wave_info); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tess_factor_offset); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, - &desc_sets); - declare_vs_specific_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, &args); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->ls_out_layout); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_offchip_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_offsets); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_in_layout); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.tcs_patch_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.tcs_rel_ids); - - declare_vs_input_vgprs(ctx, &args); - } else { - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, - &desc_sets); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_offchip_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_offsets); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_in_layout); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tess_factor_offset); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.tcs_patch_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.tcs_rel_ids); - } - break; - case MESA_SHADER_TESS_EVAL: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - if (ctx->options->key.tes.as_es) { - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->es2gs_offset); - } else { - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - } - declare_tes_input_vgprs(ctx, &args); - break; - case MESA_SHADER_GEOMETRY: - if (has_previous_stage) { - // First 6 system regs - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gs2vs_offset); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->merged_wave_info); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, - &desc_sets); - - if (previous_stage == MESA_SHADER_TESS_EVAL) { - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_offchip_layout); - } else { - declare_vs_specific_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &args); - } - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_ring_stride); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_num_entries); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[0]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[2]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.gs_prim_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.gs_invocation_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[4]); - - if (previous_stage == MESA_SHADER_VERTEX) { - declare_vs_input_vgprs(ctx, &args); - } else { - declare_tes_input_vgprs(ctx, &args); - } - } else { - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, - &desc_sets); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_ring_stride); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_num_entries); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs2vs_offset); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs_wave_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[0]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[1]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.gs_prim_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[2]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[3]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[4]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[5]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.gs_invocation_id); - } - break; - case MESA_SHADER_FRAGMENT: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); - - if (ctx->shader_info->info.ps.needs_sample_positions) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->sample_pos_offset); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid); - add_arg(&args, ARG_VGPR, ctx->ac.v3i32, NULL); /* persp pull model */ - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_sample); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_center); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_centroid); - add_arg(&args, ARG_VGPR, ctx->ac.f32, NULL); /* line stipple tex */ - add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[0]); - add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[1]); - add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[2]); - add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[3]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.front_face); - add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.ancillary); - add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.sample_coverage); - add_arg(&args, ARG_VGPR, ctx->ac.i32, NULL); /* fixed pt */ - break; - default: - unreachable("Shader stage not implemented"); - } - - ctx->main_function = create_llvm_function( - ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args, - ctx->max_workgroup_size, - ctx->options->unsafe_math); - set_llvm_calling_convention(ctx->main_function, stage); - - - ctx->shader_info->num_input_vgprs = 0; - ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0; - - ctx->shader_info->num_input_sgprs += args.num_sgprs_used; - - if (ctx->stage != MESA_SHADER_FRAGMENT) - ctx->shader_info->num_input_vgprs = args.num_vgprs_used; - - assign_arguments(ctx->main_function, &args); - - user_sgpr_idx = 0; - - if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { - set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, - &user_sgpr_idx, 2); - if (ctx->options->supports_spill) { - ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", - LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE), - NULL, 0, AC_FUNC_ATTR_READNONE); - ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets, - ac_array_in_const_addr_space(ctx->ac.v4i32), ""); - } - } - - /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including - * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ - if (has_previous_stage) - user_sgpr_idx = 0; - - set_global_input_locs(ctx, stage, has_previous_stage, previous_stage, - &user_sgpr_info, desc_sets, &user_sgpr_idx); - - switch (stage) { - case MESA_SHADER_COMPUTE: - if (ctx->shader_info->info.cs.uses_grid_size) { - set_loc_shader(ctx, AC_UD_CS_GRID_SIZE, - &user_sgpr_idx, 3); - } - break; - case MESA_SHADER_VERTEX: - set_vs_specific_input_locs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_idx); - if (ctx->abi.view_index) - set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - if (ctx->options->key.vs.as_ls) { - set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, - &user_sgpr_idx, 1); - } - break; - case MESA_SHADER_TESS_CTRL: - set_vs_specific_input_locs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_idx); - if (has_previous_stage) - set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, - &user_sgpr_idx, 1); - set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4); - if (ctx->abi.view_index) - set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_TESS_EVAL: - set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); - if (ctx->abi.view_index) - set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_GEOMETRY: - if (has_previous_stage) { - if (previous_stage == MESA_SHADER_VERTEX) - set_vs_specific_input_locs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_idx); - else - set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, - &user_sgpr_idx, 1); - } - set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, - &user_sgpr_idx, 2); - if (ctx->abi.view_index) - set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_FRAGMENT: - if (ctx->shader_info->info.ps.needs_sample_positions) { - set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, - &user_sgpr_idx, 1); - } - break; - default: - unreachable("Shader stage not implemented"); - } - - if (stage == MESA_SHADER_TESS_CTRL || - (stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_ls) || - /* GFX9 has the ESGS ring buffer in LDS. */ - (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) { - ac_declare_lds_as_pointer(&ctx->ac); - } - - ctx->shader_info->num_user_sgprs = user_sgpr_idx; -} - static void build_store_values_extended(struct ac_llvm_context *ac, LLVMValueRef *values, @@ -2277,38 +1231,6 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, return ac_build_image_opcode(&ctx->ac, args); } -static LLVMValueRef -radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, - unsigned desc_set, unsigned binding) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; - struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; - struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; - unsigned base_offset = layout->binding[binding].offset; - LLVMValueRef offset, stride; - - if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || - layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start + - layout->binding[binding].dynamic_offset_offset; - desc_ptr = ctx->abi.push_constants; - base_offset = pipeline_layout->push_constant_size + 16 * idx; - stride = LLVMConstInt(ctx->ac.i32, 16, false); - } else - stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false); - - offset = LLVMConstInt(ctx->ac.i32, base_offset, false); - index = LLVMBuildMul(ctx->ac.builder, index, stride, ""); - offset = LLVMBuildAdd(ctx->ac.builder, offset, index, ""); - - desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset); - desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32); - LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); - - return desc_ptr; -} - static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { @@ -2669,343 +1591,6 @@ out: *indir_out = offset; } - -/* The offchip buffer layout for TCS->TES is - * - * - attribute 0 of patch 0 vertex 0 - * - attribute 0 of patch 0 vertex 1 - * - attribute 0 of patch 0 vertex 2 - * ... - * - attribute 0 of patch 1 vertex 0 - * - attribute 0 of patch 1 vertex 1 - * ... - * - attribute 1 of patch 0 vertex 0 - * - attribute 1 of patch 0 vertex 1 - * ... - * - per patch attribute 0 of patch 0 - * - per patch attribute 0 of patch 1 - * ... - * - * Note that every attribute has 4 components. - */ -static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx, - LLVMValueRef vertex_index, - LLVMValueRef param_index) -{ - LLVMValueRef base_addr, vertices_per_patch, num_patches; - LLVMValueRef param_stride, constant16; - LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); - - vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false); - num_patches = ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9); - - constant16 = LLVMConstInt(ctx->ac.i32, 16, false); - if (vertex_index) { - base_addr = LLVMBuildMul(ctx->ac.builder, rel_patch_id, - vertices_per_patch, ""); - - base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, - vertex_index, ""); - - param_stride = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, - num_patches, ""); - } else { - base_addr = rel_patch_id; - param_stride = num_patches; - } - - base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, - LLVMBuildMul(ctx->ac.builder, param_index, - param_stride, ""), ""); - - base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, ""); - - if (!vertex_index) { - LLVMValueRef patch_data_offset = - ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16); - - base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, - patch_data_offset, ""); - } - return base_addr; -} - -static LLVMValueRef get_tcs_tes_buffer_address_params(struct radv_shader_context *ctx, - unsigned param, - unsigned const_index, - bool is_compact, - LLVMValueRef vertex_index, - LLVMValueRef indir_index) -{ - LLVMValueRef param_index; - - if (indir_index) - param_index = LLVMBuildAdd(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, param, false), - indir_index, ""); - else { - if (const_index && !is_compact) - param += const_index; - param_index = LLVMConstInt(ctx->ac.i32, param, false); - } - return get_tcs_tes_buffer_address(ctx, vertex_index, param_index); -} - -static void -mark_tess_output(struct radv_shader_context *ctx, - bool is_patch, uint32_t param) - -{ - if (is_patch) { - ctx->tess_patch_outputs_written |= (1ull << param); - } else - ctx->tess_outputs_written |= (1ull << param); -} - -static LLVMValueRef -get_dw_address(struct radv_shader_context *ctx, - LLVMValueRef dw_addr, - unsigned param, - unsigned const_index, - bool compact_const_index, - LLVMValueRef vertex_index, - LLVMValueRef stride, - LLVMValueRef indir_index) - -{ - - if (vertex_index) { - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMBuildMul(ctx->ac.builder, - vertex_index, - stride, ""), ""); - } - - if (indir_index) - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMBuildMul(ctx->ac.builder, indir_index, - LLVMConstInt(ctx->ac.i32, 4, false), ""), ""); - else if (const_index && !compact_const_index) - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, const_index, false), ""); - - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, param * 4, false), ""); - - if (const_index && compact_const_index) - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, const_index, false), ""); - return dw_addr; -} - -static LLVMValueRef -load_tcs_varyings(struct ac_shader_abi *abi, - LLVMTypeRef type, - LLVMValueRef vertex_index, - LLVMValueRef indir_index, - unsigned const_index, - unsigned location, - unsigned driver_location, - unsigned component, - unsigned num_components, - bool is_patch, - bool is_compact, - bool load_input) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef dw_addr, stride; - LLVMValueRef value[4], result; - unsigned param = shader_io_get_unique_index(location); - - if (load_input) { - stride = ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8); - dw_addr = get_tcs_in_current_patch_offset(ctx); - } else { - if (!is_patch) { - stride = get_tcs_out_vertex_stride(ctx); - dw_addr = get_tcs_out_current_patch_offset(ctx); - } else { - dw_addr = get_tcs_out_current_patch_data_offset(ctx); - stride = NULL; - } - } - - dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, - indir_index); - - for (unsigned i = 0; i < num_components + component; i++) { - value[i] = ac_lds_load(&ctx->ac, dw_addr); - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - ctx->ac.i32_1, ""); - } - result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); - return result; -} - -static void -store_tcs_output(struct ac_shader_abi *abi, - LLVMValueRef vertex_index, - LLVMValueRef param_index, - unsigned const_index, - unsigned location, - unsigned driver_location, - LLVMValueRef src, - unsigned component, - bool is_patch, - bool is_compact, - unsigned writemask) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef dw_addr; - LLVMValueRef stride = NULL; - LLVMValueRef buf_addr = NULL; - unsigned param; - bool store_lds = true; - - if (is_patch) { - if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0)))) - store_lds = false; - } else { - if (!(ctx->tcs_outputs_read & (1ULL << location))) - store_lds = false; - } - - param = shader_io_get_unique_index(location); - if (location == VARYING_SLOT_CLIP_DIST0 && - is_compact && const_index > 3) { - const_index -= 3; - param++; - } - - if (!is_patch) { - stride = get_tcs_out_vertex_stride(ctx); - dw_addr = get_tcs_out_current_patch_offset(ctx); - } else { - dw_addr = get_tcs_out_current_patch_data_offset(ctx); - } - - mark_tess_output(ctx, is_patch, param); - - dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, - param_index); - buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, - vertex_index, param_index); - - bool is_tess_factor = false; - if (location == VARYING_SLOT_TESS_LEVEL_INNER || - location == VARYING_SLOT_TESS_LEVEL_OUTER) - is_tess_factor = true; - - unsigned base = is_compact ? const_index : 0; - for (unsigned chan = 0; chan < 8; chan++) { - if (!(writemask & (1 << chan))) - continue; - LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); - - if (store_lds || is_tess_factor) { - LLVMValueRef dw_addr_chan = - LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, chan, false), ""); - ac_lds_store(&ctx->ac, dw_addr_chan, value); - } - - if (!is_tess_factor && writemask != 0xF) - ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1, - buf_addr, ctx->oc_lds, - 4 * (base + chan), 1, 0, true, false); - } - - if (writemask == 0xF) { - ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4, - buf_addr, ctx->oc_lds, - (base * 4), 1, 0, true, false); - } -} - -static LLVMValueRef -load_tes_input(struct ac_shader_abi *abi, - LLVMTypeRef type, - LLVMValueRef vertex_index, - LLVMValueRef param_index, - unsigned const_index, - unsigned location, - unsigned driver_location, - unsigned component, - unsigned num_components, - bool is_patch, - bool is_compact, - bool load_input) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef buf_addr; - LLVMValueRef result; - unsigned param = shader_io_get_unique_index(location); - - if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { - const_index -= 3; - param++; - } - - buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, - is_compact, vertex_index, param_index); - - LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false); - buf_addr = LLVMBuildAdd(ctx->ac.builder, buf_addr, comp_offset, ""); - - result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL, - buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); - result = ac_trim_vector(&ctx->ac, result, num_components); - return result; -} - -static LLVMValueRef -load_gs_input(struct ac_shader_abi *abi, - unsigned location, - unsigned driver_location, - unsigned component, - unsigned num_components, - unsigned vertex_index, - unsigned const_index, - LLVMTypeRef type) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef vtx_offset; - unsigned param, vtx_offset_param; - LLVMValueRef value[4], result; - - vtx_offset_param = vertex_index; - assert(vtx_offset_param < 6); - vtx_offset = LLVMBuildMul(ctx->ac.builder, ctx->gs_vtx_offset[vtx_offset_param], - LLVMConstInt(ctx->ac.i32, 4, false), ""); - - param = shader_io_get_unique_index(location); - - for (unsigned i = component; i < num_components + component; i++) { - if (ctx->ac.chip_class >= GFX9) { - LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param]; - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), ""); - value[i] = ac_lds_load(&ctx->ac, dw_addr); - } else { - LLVMValueRef soffset = - LLVMConstInt(ctx->ac.i32, - (param * 4 + i + const_index) * 256, - false); - - value[i] = ac_build_buffer_load(&ctx->ac, - ctx->esgs_ring, 1, - ctx->ac.i32_0, - vtx_offset, soffset, - 0, 1, 0, true, false); - - value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], - type, ""); - } - } - result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); - result = ac_to_integer(&ctx->ac, result); - return result; -} - static LLVMValueRef build_gep_for_deref(struct ac_nir_context *ctx, nir_deref_var *deref) @@ -3823,12 +2408,6 @@ void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); } -static void radv_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - ac_build_kill_if_false(&ctx->ac, visible); -} - static void emit_discard(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { @@ -4013,53 +2592,6 @@ static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, return result; } -static LLVMValueRef lookup_interp_param(struct ac_shader_abi *abi, - enum glsl_interp_mode interp, unsigned location) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - - switch (interp) { - case INTERP_MODE_FLAT: - default: - return NULL; - case INTERP_MODE_SMOOTH: - case INTERP_MODE_NONE: - if (location == INTERP_CENTER) - return ctx->persp_center; - else if (location == INTERP_CENTROID) - return ctx->persp_centroid; - else if (location == INTERP_SAMPLE) - return ctx->persp_sample; - break; - case INTERP_MODE_NOPERSPECTIVE: - if (location == INTERP_CENTER) - return ctx->linear_center; - else if (location == INTERP_CENTROID) - return ctx->linear_centroid; - else if (location == INTERP_SAMPLE) - return ctx->linear_sample; - break; - } - return NULL; -} - -static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, - LLVMValueRef sample_id) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - - LLVMValueRef result; - LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false)); - - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, - ac_array_in_const_addr_space(ctx->ac.v2f32), ""); - - sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id, ctx->sample_pos_offset, ""); - result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); - - return result; -} - static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx) { LLVMValueRef values[2]; @@ -4073,33 +2605,6 @@ static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx) return ac_build_gather_values(&ctx->ac, values, 2); } -static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - uint8_t log2_ps_iter_samples = ctx->shader_info->info.ps.force_persample ? - ctx->options->key.fs.log2_num_samples : - ctx->options->key.fs.log2_ps_iter_samples; - - /* The bit pattern matches that used by fixed function fragment - * processing. */ - static const uint16_t ps_iter_masks[] = { - 0xffff, /* not used */ - 0x5555, - 0x1111, - 0x0101, - 0x0001, - }; - assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks)); - - uint32_t ps_iter_mask = ps_iter_masks[log2_ps_iter_samples]; - - LLVMValueRef result, sample_id; - sample_id = ac_unpack_param(&ctx->ac, abi->ancillary, 8, 4); - sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false), sample_id, ""); - result = LLVMBuildAnd(ctx->ac.builder, sample_id, abi->sample_coverage, ""); - return result; -} - static LLVMValueRef visit_interp(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { @@ -4206,104 +2711,6 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx, instr->variables[0]->var->data.location_frac); } -static void -visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs) -{ - LLVMValueRef gs_next_vertex; - LLVMValueRef can_emit; - int idx; - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - - assert(stream == 0); - - /* Write vertex attribute values to GSVS ring */ - gs_next_vertex = LLVMBuildLoad(ctx->ac.builder, - ctx->gs_next_vertex, - ""); - - /* If this thread has already emitted the declared maximum number of - * vertices, kill it: excessive vertex emissions are not supposed to - * have any effect, and GS threads have no externally observable - * effects other than emitting vertices. - */ - can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex, - LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), ""); - ac_build_kill_if_false(&ctx->ac, can_emit); - - /* loop num outputs */ - idx = 0; - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef *out_ptr = &addrs[i * 4]; - int length = 4; - int slot = idx; - int slot_inc = 1; - - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0) { - /* pack clip and cull into a single set of slots */ - length = ctx->num_output_clips + ctx->num_output_culls; - if (length > 4) - slot_inc = 2; - } - for (unsigned j = 0; j < length; j++) { - LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, - out_ptr[j], ""); - LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false); - voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, ""); - voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), ""); - - out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); - - ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring, - out_val, 1, - voffset, ctx->gs2vs_offset, 0, - 1, 1, true, true); - } - idx += slot_inc; - } - - gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, - ctx->ac.i32_1, ""); - LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex); - - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id); -} - -static void -visit_end_primitive(struct ac_shader_abi *abi, unsigned stream) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id); -} - -static LLVMValueRef -load_tess_coord(struct ac_shader_abi *abi) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - - LLVMValueRef coord[4] = { - ctx->tes_u, - ctx->tes_v, - ctx->ac.f32_0, - ctx->ac.f32_0, - }; - - if (ctx->tes_primitive_mode == GL_TRIANGLES) - coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1, - LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), ""); - - return ac_build_gather_values(&ctx->ac, coord, 3); -} - -static LLVMValueRef -load_patch_vertices_in(struct ac_shader_abi *abi) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false); -} - static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { @@ -4608,114 +3015,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, } } -static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi) -{ - return abi->base_vertex; -} - -static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi, - LLVMValueRef buffer_ptr, bool write) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef result; - - LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); - - result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, ""); - LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); - - return result; -} - -static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef result; - - LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); - - result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, ""); - LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); - - return result; -} - -static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, - unsigned descriptor_set, - unsigned base_index, - unsigned constant_index, - LLVMValueRef index, - enum ac_descriptor_type desc_type, - bool image, bool write) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; - struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout; - struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index; - unsigned offset = binding->offset; - unsigned stride = binding->size; - unsigned type_size; - LLVMBuilderRef builder = ctx->ac.builder; - LLVMTypeRef type; - - assert(base_index < layout->binding_count); - - switch (desc_type) { - case AC_DESC_IMAGE: - type = ctx->ac.v8i32; - type_size = 32; - break; - case AC_DESC_FMASK: - type = ctx->ac.v8i32; - offset += 32; - type_size = 32; - break; - case AC_DESC_SAMPLER: - type = ctx->ac.v4i32; - if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - offset += 64; - - type_size = 16; - break; - case AC_DESC_BUFFER: - type = ctx->ac.v4i32; - type_size = 16; - break; - default: - unreachable("invalid desc_type\n"); - } - - offset += constant_index * stride; - - if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && - (!index || binding->immutable_samplers_equal)) { - if (binding->immutable_samplers_equal) - constant_index = 0; - - const uint32_t *samplers = radv_immutable_samplers(layout, binding); - - LLVMValueRef constants[] = { - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0), - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0), - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0), - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0), - }; - return ac_build_gather_values(&ctx->ac, constants, 4); - } - - assert(stride % type_size == 0); - - if (!index) - index = ctx->ac.i32_0; - - index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); - - list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); - list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), ""); - - return ac_build_load_to_sgpr(&ctx->ac, list, index); -} - static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, const nir_deref_var *deref, enum ac_descriptor_type desc_type, @@ -5374,261 +3673,6 @@ static void visit_cf_list(struct ac_nir_context *ctx, } } -static void -handle_vs_input_decl(struct radv_shader_context *ctx, - struct nir_variable *variable) -{ - LLVMValueRef t_list_ptr = ctx->vertex_buffers; - LLVMValueRef t_offset; - LLVMValueRef t_list; - LLVMValueRef input; - LLVMValueRef buffer_index; - int index = variable->data.location - VERT_ATTRIB_GENERIC0; - int idx = variable->data.location; - unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); - uint8_t input_usage_mask = - ctx->shader_info->info.vs.input_usage_mask[variable->data.location]; - unsigned num_channels = util_last_bit(input_usage_mask); - - variable->data.driver_location = idx * 4; - - for (unsigned i = 0; i < attrib_count; ++i, ++idx) { - if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) { - buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id, - ctx->abi.start_instance, ""); - if (ctx->options->key.vs.as_ls) { - ctx->shader_info->vs.vgpr_comp_cnt = - MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); - } else { - ctx->shader_info->vs.vgpr_comp_cnt = - MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); - } - } else - buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id, - ctx->abi.base_vertex, ""); - t_offset = LLVMConstInt(ctx->ac.i32, index + i, false); - - t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); - - input = ac_build_buffer_load_format(&ctx->ac, t_list, - buffer_index, - ctx->ac.i32_0, - num_channels, false, true); - - input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels); - - for (unsigned chan = 0; chan < 4; chan++) { - LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] = - ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, - input, llvm_chan, "")); - } - } -} - -static void interp_fs_input(struct radv_shader_context *ctx, - unsigned attr, - LLVMValueRef interp_param, - LLVMValueRef prim_mask, - LLVMValueRef result[4]) -{ - LLVMValueRef attr_number; - unsigned chan; - LLVMValueRef i, j; - bool interp = interp_param != NULL; - - attr_number = LLVMConstInt(ctx->ac.i32, attr, false); - - /* fs.constant returns the param from the middle vertex, so it's not - * really useful for flat shading. It's meant to be used for custom - * interpolation (but the intrinsic can't fetch from the other two - * vertices). - * - * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state - * to do the right thing. The only reason we use fs.constant is that - * fs.interp cannot be used on integers, because they can be equal - * to NaN. - */ - if (interp) { - interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param, - ctx->ac.v2f32, ""); - - i = LLVMBuildExtractElement(ctx->ac.builder, interp_param, - ctx->ac.i32_0, ""); - j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, - ctx->ac.i32_1, ""); - } - - for (chan = 0; chan < 4; chan++) { - LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - - if (interp) { - result[chan] = ac_build_fs_interp(&ctx->ac, - llvm_chan, - attr_number, - prim_mask, i, j); - } else { - result[chan] = ac_build_fs_interp_mov(&ctx->ac, - LLVMConstInt(ctx->ac.i32, 2, false), - llvm_chan, - attr_number, - prim_mask); - } - } -} - -static void -handle_fs_input_decl(struct radv_shader_context *ctx, - struct nir_variable *variable) -{ - int idx = variable->data.location; - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); - LLVMValueRef interp; - - variable->data.driver_location = idx * 4; - ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location; - - if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { - unsigned interp_type; - if (variable->data.sample) - interp_type = INTERP_SAMPLE; - else if (variable->data.centroid) - interp_type = INTERP_CENTROID; - else - interp_type = INTERP_CENTER; - - interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type); - } else - interp = NULL; - - for (unsigned i = 0; i < attrib_count; ++i) - ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp; - -} - -static void -handle_vs_inputs(struct radv_shader_context *ctx, - struct nir_shader *nir) { - nir_foreach_variable(variable, &nir->inputs) - handle_vs_input_decl(ctx, variable); -} - -static void -prepare_interp_optimize(struct radv_shader_context *ctx, - struct nir_shader *nir) -{ - if (!ctx->options->key.fs.multisample) - return; - - bool uses_center = false; - bool uses_centroid = false; - nir_foreach_variable(variable, &nir->inputs) { - if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT || - variable->data.sample) - continue; - - if (variable->data.centroid) - uses_centroid = true; - else - uses_center = true; - } - - if (uses_center && uses_centroid) { - LLVMValueRef sel = LLVMBuildICmp(ctx->ac.builder, LLVMIntSLT, ctx->abi.prim_mask, ctx->ac.i32_0, ""); - ctx->persp_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->persp_center, ctx->persp_centroid, ""); - ctx->linear_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->linear_center, ctx->linear_centroid, ""); - } -} - -static void -handle_fs_inputs(struct radv_shader_context *ctx, - struct nir_shader *nir) -{ - prepare_interp_optimize(ctx, nir); - - nir_foreach_variable(variable, &nir->inputs) - handle_fs_input_decl(ctx, variable); - - unsigned index = 0; - - if (ctx->shader_info->info.ps.uses_input_attachments || - ctx->shader_info->info.needs_multiview_view_index) - ctx->input_mask |= 1ull << VARYING_SLOT_LAYER; - - for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) { - LLVMValueRef interp_param; - LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0); - - if (!(ctx->input_mask & (1ull << i))) - continue; - - if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || - i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) { - interp_param = *inputs; - interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, - inputs); - - if (!interp_param) - ctx->shader_info->fs.flat_shaded_mask |= 1u << index; - ++index; - } else if (i == VARYING_SLOT_POS) { - for(int i = 0; i < 3; ++i) - inputs[i] = ctx->abi.frag_pos[i]; - - inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, - ctx->abi.frag_pos[3]); - } - } - ctx->shader_info->fs.num_interp = index; - ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0; - - if (ctx->shader_info->info.needs_multiview_view_index) - ctx->abi.view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; -} - -static void -scan_shader_output_decl(struct radv_shader_context *ctx, - struct nir_variable *variable, - struct nir_shader *shader, - gl_shader_stage stage) -{ - int idx = variable->data.location + variable->data.index; - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); - uint64_t mask_attribs; - - variable->data.driver_location = idx * 4; - - /* tess ctrl has it's own load/store paths for outputs */ - if (stage == MESA_SHADER_TESS_CTRL) - return; - - mask_attribs = ((1ull << attrib_count) - 1) << idx; - if (stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_EVAL || - stage == MESA_SHADER_GEOMETRY) { - if (idx == VARYING_SLOT_CLIP_DIST0) { - int length = shader->info.clip_distance_array_size + - shader->info.cull_distance_array_size; - if (stage == MESA_SHADER_VERTEX) { - ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; - ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; - } - if (stage == MESA_SHADER_TESS_EVAL) { - ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; - ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; - } - - if (length > 4) - attrib_count = 2; - else - attrib_count = 1; - mask_attribs = 1ull << idx; - } - } - - ctx->output_mask |= mask_attribs; -} - void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi, @@ -5766,972 +3810,6 @@ setup_shared(struct ac_nir_context *ctx, } } -/* Initialize arguments for the shader export intrinsic */ -static void -si_llvm_init_export_args(struct radv_shader_context *ctx, - LLVMValueRef *values, - unsigned enabled_channels, - unsigned target, - struct ac_export_args *args) -{ - /* Specify the channels that are enabled. */ - args->enabled_channels = enabled_channels; - - /* Specify whether the EXEC mask represents the valid mask */ - args->valid_mask = 0; - - /* Specify whether this is the last export */ - args->done = 0; - - /* Specify the target we are exporting */ - args->target = target; - - args->compr = false; - args->out[0] = LLVMGetUndef(ctx->ac.f32); - args->out[1] = LLVMGetUndef(ctx->ac.f32); - args->out[2] = LLVMGetUndef(ctx->ac.f32); - args->out[3] = LLVMGetUndef(ctx->ac.f32); - - if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) { - unsigned index = target - V_008DFC_SQ_EXP_MRT; - unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf; - bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1; - bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1; - unsigned chan; - - LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL; - LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2], - unsigned bits, bool hi) = NULL; - - switch(col_format) { - case V_028714_SPI_SHADER_ZERO: - args->enabled_channels = 0; /* writemask */ - args->target = V_008DFC_SQ_EXP_NULL; - break; - - case V_028714_SPI_SHADER_32_R: - args->enabled_channels = 1; - args->out[0] = values[0]; - break; - - case V_028714_SPI_SHADER_32_GR: - args->enabled_channels = 0x3; - args->out[0] = values[0]; - args->out[1] = values[1]; - break; - - case V_028714_SPI_SHADER_32_AR: - args->enabled_channels = 0x9; - args->out[0] = values[0]; - args->out[3] = values[3]; - break; - - case V_028714_SPI_SHADER_FP16_ABGR: - args->enabled_channels = 0x5; - packf = ac_build_cvt_pkrtz_f16; - break; - - case V_028714_SPI_SHADER_UNORM16_ABGR: - args->enabled_channels = 0x5; - packf = ac_build_cvt_pknorm_u16; - break; - - case V_028714_SPI_SHADER_SNORM16_ABGR: - args->enabled_channels = 0x5; - packf = ac_build_cvt_pknorm_i16; - break; - - case V_028714_SPI_SHADER_UINT16_ABGR: - args->enabled_channels = 0x5; - packi = ac_build_cvt_pk_u16; - break; - - case V_028714_SPI_SHADER_SINT16_ABGR: - args->enabled_channels = 0x5; - packi = ac_build_cvt_pk_i16; - break; - - default: - case V_028714_SPI_SHADER_32_ABGR: - memcpy(&args->out[0], values, sizeof(values[0]) * 4); - break; - } - - /* Pack f16 or norm_i16/u16. */ - if (packf) { - for (chan = 0; chan < 2; chan++) { - LLVMValueRef pack_args[2] = { - values[2 * chan], - values[2 * chan + 1] - }; - LLVMValueRef packed; - - packed = packf(&ctx->ac, pack_args); - args->out[chan] = ac_to_float(&ctx->ac, packed); - } - args->compr = 1; /* COMPR flag */ - } - - /* Pack i16/u16. */ - if (packi) { - for (chan = 0; chan < 2; chan++) { - LLVMValueRef pack_args[2] = { - ac_to_integer(&ctx->ac, values[2 * chan]), - ac_to_integer(&ctx->ac, values[2 * chan + 1]) - }; - LLVMValueRef packed; - - packed = packi(&ctx->ac, pack_args, - is_int8 ? 8 : is_int10 ? 10 : 16, - chan == 1); - args->out[chan] = ac_to_float(&ctx->ac, packed); - } - args->compr = 1; /* COMPR flag */ - } - return; - } - - memcpy(&args->out[0], values, sizeof(values[0]) * 4); - - for (unsigned i = 0; i < 4; ++i) { - if (!(args->enabled_channels & (1 << i))) - continue; - - args->out[i] = ac_to_float(&ctx->ac, args->out[i]); - } -} - -static void -radv_export_param(struct radv_shader_context *ctx, unsigned index, - LLVMValueRef *values, unsigned enabled_channels) -{ - struct ac_export_args args; - - si_llvm_init_export_args(ctx, values, enabled_channels, - V_008DFC_SQ_EXP_PARAM + index, &args); - ac_build_export(&ctx->ac, &args); -} - -static LLVMValueRef -radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) -{ - LLVMValueRef output = - ctx->abi.outputs[radeon_llvm_reg_index_soa(index, chan)]; - - return LLVMBuildLoad(ctx->ac.builder, output, ""); -} - -static void -handle_vs_outputs_post(struct radv_shader_context *ctx, - bool export_prim_id, - struct ac_vs_output_info *outinfo) -{ - uint32_t param_count = 0; - unsigned target; - unsigned pos_idx, num_pos_exports = 0; - struct ac_export_args args, pos_args[4] = {}; - LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; - int i; - - if (ctx->options->key.has_multiview_view_index) { - LLVMValueRef* tmp_out = &ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; - if(!*tmp_out) { - for(unsigned i = 0; i < 4; ++i) - ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] = - ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, ""); - } - - LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, ctx->abi.view_index), *tmp_out); - ctx->output_mask |= 1ull << VARYING_SLOT_LAYER; - } - - memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, - sizeof(outinfo->vs_output_param_offset)); - - if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { - LLVMValueRef slots[8]; - unsigned j; - - if (outinfo->cull_dist_mask) - outinfo->cull_dist_mask <<= ctx->num_output_clips; - - i = VARYING_SLOT_CLIP_DIST0; - for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++) - slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); - - for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++) - slots[i] = LLVMGetUndef(ctx->ac.f32); - - if (ctx->num_output_clips + ctx->num_output_culls > 4) { - target = V_008DFC_SQ_EXP_POS + 3; - si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args); - memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], - &args, sizeof(args)); - } - - target = V_008DFC_SQ_EXP_POS + 2; - si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args); - memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], - &args, sizeof(args)); - - } - - LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1}; - if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) { - for (unsigned j = 0; j < 4; j++) - pos_values[j] = radv_load_output(ctx, VARYING_SLOT_POS, j); - } - si_llvm_init_export_args(ctx, pos_values, 0xf, V_008DFC_SQ_EXP_POS, &pos_args[0]); - - if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) { - outinfo->writes_pointsize = true; - psize_value = radv_load_output(ctx, VARYING_SLOT_PSIZ, 0); - } - - if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) { - outinfo->writes_layer = true; - layer_value = radv_load_output(ctx, VARYING_SLOT_LAYER, 0); - } - - if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) { - outinfo->writes_viewport_index = true; - viewport_index_value = radv_load_output(ctx, VARYING_SLOT_VIEWPORT, 0); - } - - if (outinfo->writes_pointsize || - outinfo->writes_layer || - outinfo->writes_viewport_index) { - pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) | - (outinfo->writes_layer == true ? 4 : 0)); - pos_args[1].valid_mask = 0; - pos_args[1].done = 0; - pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; - pos_args[1].compr = 0; - pos_args[1].out[0] = ctx->ac.f32_0; /* X */ - pos_args[1].out[1] = ctx->ac.f32_0; /* Y */ - pos_args[1].out[2] = ctx->ac.f32_0; /* Z */ - pos_args[1].out[3] = ctx->ac.f32_0; /* W */ - - if (outinfo->writes_pointsize == true) - pos_args[1].out[0] = psize_value; - if (outinfo->writes_layer == true) - pos_args[1].out[2] = layer_value; - if (outinfo->writes_viewport_index == true) { - if (ctx->options->chip_class >= GFX9) { - /* GFX9 has the layer in out.z[10:0] and the viewport - * index in out.z[19:16]. - */ - LLVMValueRef v = viewport_index_value; - v = ac_to_integer(&ctx->ac, v); - v = LLVMBuildShl(ctx->ac.builder, v, - LLVMConstInt(ctx->ac.i32, 16, false), - ""); - v = LLVMBuildOr(ctx->ac.builder, v, - ac_to_integer(&ctx->ac, pos_args[1].out[2]), ""); - - pos_args[1].out[2] = ac_to_float(&ctx->ac, v); - pos_args[1].enabled_channels |= 1 << 2; - } else { - pos_args[1].out[3] = viewport_index_value; - pos_args[1].enabled_channels |= 1 << 3; - } - } - } - for (i = 0; i < 4; i++) { - if (pos_args[i].out[0]) - num_pos_exports++; - } - - pos_idx = 0; - for (i = 0; i < 4; i++) { - if (!pos_args[i].out[0]) - continue; - - /* Specify the target we are exporting */ - pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; - if (pos_idx == num_pos_exports) - pos_args[i].done = 1; - ac_build_export(&ctx->ac, &pos_args[i]); - } - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef values[4]; - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i != VARYING_SLOT_LAYER && - i != VARYING_SLOT_PRIMITIVE_ID && - i < VARYING_SLOT_VAR0) - continue; - - for (unsigned j = 0; j < 4; j++) - values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); - - unsigned output_usage_mask; - - if (ctx->stage == MESA_SHADER_VERTEX && - !ctx->is_gs_copy_shader) { - output_usage_mask = - ctx->shader_info->info.vs.output_usage_mask[i]; - } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { - output_usage_mask = - ctx->shader_info->info.tes.output_usage_mask[i]; - } else { - /* Enable all channels for the GS copy shader because - * we don't know the output usage mask currently. - */ - output_usage_mask = 0xf; - } - - radv_export_param(ctx, param_count, values, output_usage_mask); - - outinfo->vs_output_param_offset[i] = param_count++; - } - - if (export_prim_id) { - LLVMValueRef values[4]; - - values[0] = ctx->vs_prim_id; - ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2, - ctx->shader_info->vs.vgpr_comp_cnt); - for (unsigned j = 1; j < 4; j++) - values[j] = ctx->ac.f32_0; - - radv_export_param(ctx, param_count, values, 0xf); - - outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++; - outinfo->export_prim_id = true; - } - - outinfo->pos_exports = num_pos_exports; - outinfo->param_exports = param_count; -} - -static void -handle_es_outputs_post(struct radv_shader_context *ctx, - struct ac_es_output_info *outinfo) -{ - int j; - uint64_t max_output_written = 0; - LLVMValueRef lds_base = NULL; - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - int param_index; - int length = 4; - - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0) - length = ctx->num_output_clips + ctx->num_output_culls; - - param_index = shader_io_get_unique_index(i); - - max_output_written = MAX2(param_index + (length > 4), max_output_written); - } - - outinfo->esgs_itemsize = (max_output_written + 1) * 16; - - if (ctx->ac.chip_class >= GFX9) { - unsigned itemsize_dw = outinfo->esgs_itemsize / 4; - LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); - LLVMValueRef wave_idx = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, - LLVMConstInt(ctx->ac.i32, 24, false), - LLVMConstInt(ctx->ac.i32, 4, false), false); - vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, - LLVMBuildMul(ctx->ac.builder, wave_idx, - LLVMConstInt(ctx->ac.i32, 64, false), ""), ""); - lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx, - LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), ""); - } - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef dw_addr = NULL; - LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; - int param_index; - int length = 4; - - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0) - length = ctx->num_output_clips + ctx->num_output_culls; - - param_index = shader_io_get_unique_index(i); - - if (lds_base) { - dw_addr = LLVMBuildAdd(ctx->ac.builder, lds_base, - LLVMConstInt(ctx->ac.i32, param_index * 4, false), - ""); - } - for (j = 0; j < length; j++) { - LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); - out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); - - if (ctx->ac.chip_class >= GFX9) { - ac_lds_store(&ctx->ac, dw_addr, - LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "")); - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, ""); - } else { - ac_build_buffer_store_dword(&ctx->ac, - ctx->esgs_ring, - out_val, 1, - NULL, ctx->es2gs_offset, - (4 * param_index + j) * 4, - 1, 1, true, true); - } - } - } -} - -static void -handle_ls_outputs_post(struct radv_shader_context *ctx) -{ - LLVMValueRef vertex_id = ctx->rel_auto_id; - LLVMValueRef vertex_dw_stride = ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8); - LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, - vertex_dw_stride, ""); - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; - int length = 4; - - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0) - length = ctx->num_output_clips + ctx->num_output_culls; - int param = shader_io_get_unique_index(i); - mark_tess_output(ctx, false, param); - if (length > 4) - mark_tess_output(ctx, false, param + 1); - LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr, - LLVMConstInt(ctx->ac.i32, param * 4, false), - ""); - for (unsigned j = 0; j < length; j++) { - ac_lds_store(&ctx->ac, dw_addr, - LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "")); - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, ""); - } - } -} - -struct ac_build_if_state -{ - struct radv_shader_context *ctx; - LLVMValueRef condition; - LLVMBasicBlockRef entry_block; - LLVMBasicBlockRef true_block; - LLVMBasicBlockRef false_block; - LLVMBasicBlockRef merge_block; -}; - -static LLVMBasicBlockRef -ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name) -{ - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; - - /* get current basic block */ - current_block = LLVMGetInsertBlock(ctx->ac.builder); - - /* chqeck if there's another block after this one */ - next_block = LLVMGetNextBasicBlock(current_block); - if (next_block) { - /* insert the new block before the next block */ - new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name); - } - else { - /* append new block after current block */ - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name); - } - return new_block; -} - -static void -ac_nir_build_if(struct ac_build_if_state *ifthen, - struct radv_shader_context *ctx, - LLVMValueRef condition) -{ - LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder); - - memset(ifthen, 0, sizeof *ifthen); - ifthen->ctx = ctx; - ifthen->condition = condition; - ifthen->entry_block = block; - - /* create endif/merge basic block for the phi functions */ - ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block"); - - /* create/insert true_block before merge_block */ - ifthen->true_block = - LLVMInsertBasicBlockInContext(ctx->context, - ifthen->merge_block, - "if-true-block"); - - /* successive code goes into the true block */ - LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block); -} - -/** - * End a conditional. - */ -static void -ac_nir_build_endif(struct ac_build_if_state *ifthen) -{ - LLVMBuilderRef builder = ifthen->ctx->ac.builder; - - /* Insert branch to the merge block from current block */ - LLVMBuildBr(builder, ifthen->merge_block); - - /* - * Now patch in the various branch instructions. - */ - - /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(builder, ifthen->entry_block); - if (ifthen->false_block) { - /* we have an else clause */ - LLVMBuildCondBr(builder, ifthen->condition, - ifthen->true_block, ifthen->false_block); - } - else { - /* no else clause */ - LLVMBuildCondBr(builder, ifthen->condition, - ifthen->true_block, ifthen->merge_block); - } - - /* Resume building code at end of the ifthen->merge_block */ - LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); -} - -static void -write_tess_factors(struct radv_shader_context *ctx) -{ - unsigned stride, outer_comps, inner_comps; - struct ac_build_if_state if_ctx, inner_if_ctx; - LLVMValueRef invocation_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5); - LLVMValueRef rel_patch_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); - unsigned tess_inner_index = 0, tess_outer_index; - LLVMValueRef lds_base, lds_inner = NULL, lds_outer, byteoffset, buffer; - LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4]; - int i; - ac_emit_barrier(&ctx->ac, ctx->stage); - - switch (ctx->options->key.tcs.primitive_mode) { - case GL_ISOLINES: - stride = 2; - outer_comps = 2; - inner_comps = 0; - break; - case GL_TRIANGLES: - stride = 4; - outer_comps = 3; - inner_comps = 1; - break; - case GL_QUADS: - stride = 6; - outer_comps = 4; - inner_comps = 2; - break; - default: - return; - } - - ac_nir_build_if(&if_ctx, ctx, - LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, - invocation_id, ctx->ac.i32_0, "")); - - lds_base = get_tcs_out_current_patch_data_offset(ctx); - - if (inner_comps) { - tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); - mark_tess_output(ctx, true, tess_inner_index); - lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base, - LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), ""); - } - - tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); - mark_tess_output(ctx, true, tess_outer_index); - lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base, - LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), ""); - - for (i = 0; i < 4; i++) { - inner[i] = LLVMGetUndef(ctx->ac.i32); - outer[i] = LLVMGetUndef(ctx->ac.i32); - } - - // LINES reverseal - if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) { - outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer); - lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer, - ctx->ac.i32_1, ""); - outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer); - } else { - for (i = 0; i < outer_comps; i++) { - outer[i] = out[i] = - ac_lds_load(&ctx->ac, lds_outer); - lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer, - ctx->ac.i32_1, ""); - } - for (i = 0; i < inner_comps; i++) { - inner[i] = out[outer_comps+i] = - ac_lds_load(&ctx->ac, lds_inner); - lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_inner, - ctx->ac.i32_1, ""); - } - } - - /* Convert the outputs to vectors for stores. */ - vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4)); - vec1 = NULL; - - if (stride > 4) - vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4); - - - buffer = ctx->hs_ring_tess_factor; - tf_base = ctx->tess_factor_offset; - byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id, - LLVMConstInt(ctx->ac.i32, 4 * stride, false), ""); - unsigned tf_offset = 0; - - if (ctx->options->chip_class <= VI) { - ac_nir_build_if(&inner_if_ctx, ctx, - LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, - rel_patch_id, ctx->ac.i32_0, "")); - - /* Store the dynamic HS control word. */ - ac_build_buffer_store_dword(&ctx->ac, buffer, - LLVMConstInt(ctx->ac.i32, 0x80000000, false), - 1, ctx->ac.i32_0, tf_base, - 0, 1, 0, true, false); - tf_offset += 4; - - ac_nir_build_endif(&inner_if_ctx); - } - - /* Store the tessellation factors. */ - ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, - MIN2(stride, 4), byteoffset, tf_base, - tf_offset, 1, 0, true, false); - if (vec1) - ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, - stride - 4, byteoffset, tf_base, - 16 + tf_offset, 1, 0, true, false); - - //store to offchip for TES to read - only if TES reads them - if (ctx->options->key.tcs.tes_reads_tess_factors) { - LLVMValueRef inner_vec, outer_vec, tf_outer_offset; - LLVMValueRef tf_inner_offset; - unsigned param_outer, param_inner; - - param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); - tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL, - LLVMConstInt(ctx->ac.i32, param_outer, 0)); - - outer_vec = ac_build_gather_values(&ctx->ac, outer, - util_next_power_of_two(outer_comps)); - - ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec, - outer_comps, tf_outer_offset, - ctx->oc_lds, 0, 1, 0, true, false); - if (inner_comps) { - param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); - tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL, - LLVMConstInt(ctx->ac.i32, param_inner, 0)); - - inner_vec = inner_comps == 1 ? inner[0] : - ac_build_gather_values(&ctx->ac, inner, inner_comps); - ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec, - inner_comps, tf_inner_offset, - ctx->oc_lds, 0, 1, 0, true, false); - } - } - ac_nir_build_endif(&if_ctx); -} - -static void -handle_tcs_outputs_post(struct radv_shader_context *ctx) -{ - write_tess_factors(ctx); -} - -static bool -si_export_mrt_color(struct radv_shader_context *ctx, - LLVMValueRef *color, unsigned index, - struct ac_export_args *args) -{ - /* Export */ - si_llvm_init_export_args(ctx, color, 0xf, - V_008DFC_SQ_EXP_MRT + index, args); - if (!args->enabled_channels) - return false; /* unnecessary NULL export */ - - return true; -} - -static void -radv_export_mrt_z(struct radv_shader_context *ctx, - LLVMValueRef depth, LLVMValueRef stencil, - LLVMValueRef samplemask) -{ - struct ac_export_args args; - - ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args); - - ac_build_export(&ctx->ac, &args); -} - -static void -handle_fs_outputs_post(struct radv_shader_context *ctx) -{ - unsigned index = 0; - LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; - struct ac_export_args color_args[8]; - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef values[4]; - - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i < FRAG_RESULT_DATA0) - continue; - - for (unsigned j = 0; j < 4; j++) - values[j] = ac_to_float(&ctx->ac, - radv_load_output(ctx, i, j)); - - bool ret = si_export_mrt_color(ctx, values, - i - FRAG_RESULT_DATA0, - &color_args[index]); - if (ret) - index++; - } - - /* Process depth, stencil, samplemask. */ - if (ctx->shader_info->info.ps.writes_z) { - depth = ac_to_float(&ctx->ac, - radv_load_output(ctx, FRAG_RESULT_DEPTH, 0)); - } - if (ctx->shader_info->info.ps.writes_stencil) { - stencil = ac_to_float(&ctx->ac, - radv_load_output(ctx, FRAG_RESULT_STENCIL, 0)); - } - if (ctx->shader_info->info.ps.writes_sample_mask) { - samplemask = ac_to_float(&ctx->ac, - radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0)); - } - - /* Set the DONE bit on last non-null color export only if Z isn't - * exported. - */ - if (index > 0 && - !ctx->shader_info->info.ps.writes_z && - !ctx->shader_info->info.ps.writes_stencil && - !ctx->shader_info->info.ps.writes_sample_mask) { - unsigned last = index - 1; - - color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */ - color_args[last].done = 1; /* DONE bit */ - } - - /* Export PS outputs. */ - for (unsigned i = 0; i < index; i++) - ac_build_export(&ctx->ac, &color_args[i]); - - if (depth || stencil || samplemask) - radv_export_mrt_z(ctx, depth, stencil, samplemask); - else if (!index) - ac_build_export_null(&ctx->ac); -} - -static void -emit_gs_epilogue(struct radv_shader_context *ctx) -{ - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id); -} - -static void -handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, - LLVMValueRef *addrs) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - - switch (ctx->stage) { - case MESA_SHADER_VERTEX: - if (ctx->options->key.vs.as_ls) - handle_ls_outputs_post(ctx); - else if (ctx->options->key.vs.as_es) - handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info); - else - handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id, - &ctx->shader_info->vs.outinfo); - break; - case MESA_SHADER_FRAGMENT: - handle_fs_outputs_post(ctx); - break; - case MESA_SHADER_GEOMETRY: - emit_gs_epilogue(ctx); - break; - case MESA_SHADER_TESS_CTRL: - handle_tcs_outputs_post(ctx); - break; - case MESA_SHADER_TESS_EVAL: - if (ctx->options->key.tes.as_es) - handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info); - else - handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id, - &ctx->shader_info->tes.outinfo); - break; - default: - break; - } -} - -static void ac_llvm_finalize_module(struct radv_shader_context *ctx) -{ - LLVMPassManagerRef passmgr; - /* Create the pass manager */ - passmgr = LLVMCreateFunctionPassManagerForModule( - ctx->ac.module); - - /* This pass should eliminate all the load and store instructions */ - LLVMAddPromoteMemoryToRegisterPass(passmgr); - - /* Add some optimization passes */ - LLVMAddScalarReplAggregatesPass(passmgr); - LLVMAddLICMPass(passmgr); - LLVMAddAggressiveDCEPass(passmgr); - LLVMAddCFGSimplificationPass(passmgr); - LLVMAddInstructionCombiningPass(passmgr); - - /* Run the pass */ - LLVMInitializeFunctionPassManager(passmgr); - LLVMRunFunctionPassManager(passmgr, ctx->main_function); - LLVMFinalizeFunctionPassManager(passmgr); - - LLVMDisposeBuilder(ctx->ac.builder); - LLVMDisposePassManager(passmgr); - - ac_llvm_context_dispose(&ctx->ac); -} - -static void -ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) -{ - struct ac_vs_output_info *outinfo; - - switch (ctx->stage) { - case MESA_SHADER_FRAGMENT: - case MESA_SHADER_COMPUTE: - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_GEOMETRY: - return; - case MESA_SHADER_VERTEX: - if (ctx->options->key.vs.as_ls || - ctx->options->key.vs.as_es) - return; - outinfo = &ctx->shader_info->vs.outinfo; - break; - case MESA_SHADER_TESS_EVAL: - if (ctx->options->key.vs.as_es) - return; - outinfo = &ctx->shader_info->tes.outinfo; - break; - default: - unreachable("Unhandled shader type"); - } - - ac_optimize_vs_outputs(&ctx->ac, - ctx->main_function, - outinfo->vs_output_param_offset, - VARYING_SLOT_MAX, - &outinfo->param_exports); -} - -static void -ac_setup_rings(struct radv_shader_context *ctx) -{ - if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) || - (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) { - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false)); - } - - if (ctx->is_gs_copy_shader) { - ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false)); - } - if (ctx->stage == MESA_SHADER_GEOMETRY) { - LLVMValueRef tmp; - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false)); - ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false)); - - ctx->gsvs_ring = LLVMBuildBitCast(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.v4i32, ""); - - ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), ""); - tmp = LLVMBuildExtractElement(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.i32_1, ""); - tmp = LLVMBuildOr(ctx->ac.builder, tmp, ctx->gsvs_ring_stride, ""); - ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, ""); - } - - if (ctx->stage == MESA_SHADER_TESS_CTRL || - ctx->stage == MESA_SHADER_TESS_EVAL) { - ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false)); - ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false)); - } -} - -static unsigned -ac_nir_get_max_workgroup_size(enum chip_class chip_class, - const struct nir_shader *nir) -{ - switch (nir->info.stage) { - case MESA_SHADER_TESS_CTRL: - return chip_class >= CIK ? 128 : 64; - case MESA_SHADER_GEOMETRY: - return chip_class >= GFX9 ? 128 : 64; - case MESA_SHADER_COMPUTE: - break; - default: - return 0; - } - - unsigned max_workgroup_size = nir->info.cs.local_size[0] * - nir->info.cs.local_size[1] * - nir->info.cs.local_size[2]; - return max_workgroup_size; -} - -/* Fixup the HW not emitting the TCS regs if there are no HS threads. */ -static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx) -{ - LLVMValueRef count = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, - LLVMConstInt(ctx->ac.i32, 8, false), - LLVMConstInt(ctx->ac.i32, 8, false), false); - LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, - ctx->ac.i32_0, ""); - ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, ""); - ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, ""); - ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, ""); - ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, ""); -} - -static void prepare_gs_input_vgprs(struct radv_shader_context *ctx) -{ - for(int i = 5; i >= 0; --i) { - ctx->gs_vtx_offset[i] = ac_build_bfe(&ctx->ac, ctx->gs_vtx_offset[i & ~1], - LLVMConstInt(ctx->ac.i32, (i & 1) * 16, false), - LLVMConstInt(ctx->ac.i32, 16, false), false); - } - - ctx->gs_wave_id = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, - LLVMConstInt(ctx->ac.i32, 16, false), - LLVMConstInt(ctx->ac.i32, 8, false), false); -} - void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, struct nir_shader *nir) { @@ -6779,487 +3857,6 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ralloc_free(ctx.vars); } -static -LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, - struct nir_shader *const *shaders, - int shader_count, - struct ac_shader_variant_info *shader_info, - const struct ac_nir_compiler_options *options, - bool dump_shader) -{ - struct radv_shader_context ctx = {0}; - unsigned i; - ctx.options = options; - ctx.shader_info = shader_info; - ctx.context = LLVMContextCreate(); - - ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, - options->family); - ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); - LLVMSetTarget(ctx.ac.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); - - LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); - char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); - LLVMSetDataLayout(ctx.ac.module, data_layout_str); - LLVMDisposeTargetData(data_layout); - LLVMDisposeMessage(data_layout_str); - - enum ac_float_mode float_mode = - options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : - AC_FLOAT_MODE_DEFAULT; - - ctx.ac.builder = ac_create_builder(ctx.context, float_mode); - - memset(shader_info, 0, sizeof(*shader_info)); - - for(int i = 0; i < shader_count; ++i) - ac_nir_shader_info_pass(shaders[i], options, &shader_info->info); - - for (i = 0; i < AC_UD_MAX_SETS; i++) - shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; - for (i = 0; i < AC_UD_MAX_UD; i++) - shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; - - ctx.max_workgroup_size = 0; - for (int i = 0; i < shader_count; ++i) { - ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size, - ac_nir_get_max_workgroup_size(ctx.options->chip_class, - shaders[i])); - } - - create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2, - shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX); - - ctx.abi.inputs = &ctx.inputs[0]; - ctx.abi.emit_outputs = handle_shader_outputs_post; - ctx.abi.emit_vertex = visit_emit_vertex; - ctx.abi.load_ubo = radv_load_ubo; - ctx.abi.load_ssbo = radv_load_ssbo; - ctx.abi.load_sampler_desc = radv_get_sampler_desc; - ctx.abi.load_resource = radv_load_resource; - ctx.abi.clamp_shadow_reference = false; - - if (shader_count >= 2) - ac_init_exec_full_mask(&ctx.ac); - - if (ctx.ac.chip_class == GFX9 && - shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) - ac_nir_fixup_ls_hs_input_vgprs(&ctx); - - for(int i = 0; i < shader_count; ++i) { - ctx.stage = shaders[i]->info.stage; - ctx.output_mask = 0; - ctx.tess_outputs_written = 0; - ctx.num_output_clips = shaders[i]->info.clip_distance_array_size; - ctx.num_output_culls = shaders[i]->info.cull_distance_array_size; - - if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { - ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex"); - ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out; - ctx.abi.load_inputs = load_gs_input; - ctx.abi.emit_primitive = visit_end_primitive; - } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { - ctx.tcs_outputs_read = shaders[i]->info.outputs_read; - ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read; - ctx.abi.load_tess_varyings = load_tcs_varyings; - ctx.abi.load_patch_vertices_in = load_patch_vertices_in; - ctx.abi.store_tcs_outputs = store_tcs_output; - ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; - } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) { - ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode; - ctx.abi.load_tess_varyings = load_tes_input; - ctx.abi.load_tess_coord = load_tess_coord; - ctx.abi.load_patch_vertices_in = load_patch_vertices_in; - ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; - } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) { - if (shader_info->info.vs.needs_instance_id) { - if (ctx.options->key.vs.as_ls) { - ctx.shader_info->vs.vgpr_comp_cnt = - MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt); - } else { - ctx.shader_info->vs.vgpr_comp_cnt = - MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt); - } - } - ctx.abi.load_base_vertex = radv_load_base_vertex; - } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) { - shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard; - ctx.abi.lookup_interp_param = lookup_interp_param; - ctx.abi.load_sample_position = load_sample_position; - ctx.abi.load_sample_mask_in = load_sample_mask_in; - ctx.abi.emit_kill = radv_emit_kill; - } - - if (i) - ac_emit_barrier(&ctx.ac, ctx.stage); - - ac_setup_rings(&ctx); - - LLVMBasicBlockRef merge_block; - if (shader_count >= 2) { - LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); - LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); - merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); - - LLVMValueRef count = ac_build_bfe(&ctx.ac, ctx.merged_wave_info, - LLVMConstInt(ctx.ac.i32, 8 * i, false), - LLVMConstInt(ctx.ac.i32, 8, false), false); - LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac); - LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, - thread_id, count, ""); - LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block); - - LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block); - } - - if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) - handle_fs_inputs(&ctx, shaders[i]); - else if(shaders[i]->info.stage == MESA_SHADER_VERTEX) - handle_vs_inputs(&ctx, shaders[i]); - else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY) - prepare_gs_input_vgprs(&ctx); - - nir_foreach_variable(variable, &shaders[i]->outputs) - scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage); - - ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i]); - - if (shader_count >= 2) { - LLVMBuildBr(ctx.ac.builder, merge_block); - LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block); - } - - if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { - unsigned addclip = shaders[i]->info.clip_distance_array_size + - shaders[i]->info.cull_distance_array_size > 4; - shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16; - shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size * - shaders[i]->info.gs.vertices_out; - } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { - shader_info->tcs.outputs_written = ctx.tess_outputs_written; - shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written; - } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) { - shader_info->vs.outputs_written = ctx.tess_outputs_written; - } - } - - LLVMBuildRetVoid(ctx.ac.builder); - - if (options->dump_preoptir) - ac_dump_module(ctx.ac.module); - - ac_llvm_finalize_module(&ctx); - - if (shader_count == 1) - ac_nir_eliminate_const_vs_outputs(&ctx); - - if (dump_shader) { - ctx.shader_info->private_mem_vgprs = - ac_count_scratch_private_memory(ctx.main_function); - } - - return ctx.ac.module; -} - -static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) -{ - unsigned *retval = (unsigned *)context; - LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); - char *description = LLVMGetDiagInfoDescription(di); - - if (severity == LLVMDSError) { - *retval = 1; - fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", - description); - } - - LLVMDisposeMessage(description); -} - -static unsigned ac_llvm_compile(LLVMModuleRef M, - struct ac_shader_binary *binary, - LLVMTargetMachineRef tm) -{ - unsigned retval = 0; - char *err; - LLVMContextRef llvm_ctx; - LLVMMemoryBufferRef out_buffer; - unsigned buffer_size; - const char *buffer_data; - LLVMBool mem_err; - - /* Setup Diagnostic Handler*/ - llvm_ctx = LLVMGetModuleContext(M); - - LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, - &retval); - - /* Compile IR*/ - mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, - &err, &out_buffer); - - /* Process Errors/Warnings */ - if (mem_err) { - fprintf(stderr, "%s: %s", __FUNCTION__, err); - free(err); - retval = 1; - goto out; - } - - /* Extract Shader Code*/ - buffer_size = LLVMGetBufferSize(out_buffer); - buffer_data = LLVMGetBufferStart(out_buffer); - - ac_elf_read(buffer_data, buffer_size, binary); - - /* Clean up */ - LLVMDisposeMemoryBuffer(out_buffer); - -out: - return retval; -} - -static void ac_compile_llvm_module(LLVMTargetMachineRef tm, - LLVMModuleRef llvm_module, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - gl_shader_stage stage, - bool dump_shader, bool supports_spill) -{ - if (dump_shader) - ac_dump_module(llvm_module); - - memset(binary, 0, sizeof(*binary)); - int v = ac_llvm_compile(llvm_module, binary, tm); - if (v) { - fprintf(stderr, "compile failed\n"); - } - - if (dump_shader) - fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); - - ac_shader_binary_read_config(binary, config, 0, supports_spill); - - LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); - LLVMDisposeModule(llvm_module); - LLVMContextDispose(ctx); - - if (stage == MESA_SHADER_FRAGMENT) { - shader_info->num_input_vgprs = 0; - if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 3; - if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - } - config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs); - - /* +3 for scratch wave offset and VCC */ - config->num_sgprs = MAX2(config->num_sgprs, - shader_info->num_input_sgprs + 3); - - /* Enable 64-bit and 16-bit denormals, because there is no performance - * cost. - * - * If denormals are enabled, all floating-point output modifiers are - * ignored. - * - * Don't enable denormals for 32-bit floats, because: - * - Floating-point output modifiers would be ignored by the hw. - * - Some opcodes don't support denormals, such as v_mad_f32. We would - * have to stop using those. - * - SI & CI would be very slow. - */ - config->float_mode |= V_00B028_FP_64_DENORMS; -} - -static void -ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options) -{ - switch (nir->info.stage) { - case MESA_SHADER_COMPUTE: - for (int i = 0; i < 3; ++i) - shader_info->cs.block_size[i] = nir->info.cs.local_size[i]; - break; - case MESA_SHADER_FRAGMENT: - shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests; - break; - case MESA_SHADER_GEOMETRY: - shader_info->gs.vertices_in = nir->info.gs.vertices_in; - shader_info->gs.vertices_out = nir->info.gs.vertices_out; - shader_info->gs.output_prim = nir->info.gs.output_primitive; - shader_info->gs.invocations = nir->info.gs.invocations; - break; - case MESA_SHADER_TESS_EVAL: - shader_info->tes.primitive_mode = nir->info.tess.primitive_mode; - shader_info->tes.spacing = nir->info.tess.spacing; - shader_info->tes.ccw = nir->info.tess.ccw; - shader_info->tes.point_mode = nir->info.tess.point_mode; - shader_info->tes.as_es = options->key.tes.as_es; - break; - case MESA_SHADER_TESS_CTRL: - shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; - break; - case MESA_SHADER_VERTEX: - shader_info->vs.as_es = options->key.vs.as_es; - shader_info->vs.as_ls = options->key.vs.as_ls; - /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */ - if (options->key.vs.as_ls) - shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt); - break; - default: - break; - } -} - -void ac_compile_nir_shader(LLVMTargetMachineRef tm, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - struct nir_shader *const *nir, - int nir_count, - const struct ac_nir_compiler_options *options, - bool dump_shader) -{ - - LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info, - options, dump_shader); - - ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill); - for (int i = 0; i < nir_count; ++i) - ac_fill_shader_info(shader_info, nir[i], options); - - /* Determine the ES type (VS or TES) for the GS on GFX9. */ - if (options->chip_class == GFX9) { - if (nir_count == 2 && - nir[1]->info.stage == MESA_SHADER_GEOMETRY) { - shader_info->gs.es_type = nir[0]->info.stage; - } - } -} - -static void -ac_gs_copy_shader_emit(struct radv_shader_context *ctx) -{ - LLVMValueRef vtx_offset = - LLVMBuildMul(ctx->ac.builder, ctx->abi.vertex_id, - LLVMConstInt(ctx->ac.i32, 4, false), ""); - int idx = 0; - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - int length = 4; - int slot = idx; - int slot_inc = 1; - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0) { - /* unpack clip and cull from a single set of slots */ - length = ctx->num_output_clips + ctx->num_output_culls; - if (length > 4) - slot_inc = 2; - } - - for (unsigned j = 0; j < length; j++) { - LLVMValueRef value, soffset; - - soffset = LLVMConstInt(ctx->ac.i32, - (slot * 4 + j) * - ctx->gs_max_out_vertices * 16 * 4, false); - - value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring, - 1, ctx->ac.i32_0, - vtx_offset, soffset, - 0, 1, 1, true, false); - - LLVMBuildStore(ctx->ac.builder, - ac_to_float(&ctx->ac, value), ctx->abi.outputs[radeon_llvm_reg_index_soa(i, j)]); - } - idx += slot_inc; - } - handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo); -} - -void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, - struct nir_shader *geom_shader, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - const struct ac_nir_compiler_options *options, - bool dump_shader) -{ - struct radv_shader_context ctx = {0}; - ctx.context = LLVMContextCreate(); - ctx.options = options; - ctx.shader_info = shader_info; - - ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, - options->family); - ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); - - ctx.is_gs_copy_shader = true; - LLVMSetTarget(ctx.ac.module, "amdgcn--"); - - enum ac_float_mode float_mode = - options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : - AC_FLOAT_MODE_DEFAULT; - - ctx.ac.builder = ac_create_builder(ctx.context, float_mode); - ctx.stage = MESA_SHADER_VERTEX; - - create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX); - - ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out; - ac_setup_rings(&ctx); - - ctx.num_output_clips = geom_shader->info.clip_distance_array_size; - ctx.num_output_culls = geom_shader->info.cull_distance_array_size; - - nir_foreach_variable(variable, &geom_shader->outputs) { - scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX); - ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader, - variable, MESA_SHADER_VERTEX); - } - - ac_gs_copy_shader_emit(&ctx); - - LLVMBuildRetVoid(ctx.ac.builder); - - ac_llvm_finalize_module(&ctx); - - ac_compile_llvm_module(tm, ctx.ac.module, binary, config, shader_info, - MESA_SHADER_VERTEX, - dump_shader, options->supports_spill); -} - void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) { diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index ebe18febbf2..4d48f7ebdd9 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -219,23 +219,6 @@ static inline unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) return (index * 4) + chan; } -void ac_compile_nir_shader(LLVMTargetMachineRef tm, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - struct nir_shader *const *nir, - int nir_count, - const struct ac_nir_compiler_options *options, - bool dump_shader); - -void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, - struct nir_shader *geom_shader, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - const struct ac_nir_compiler_options *options, - bool dump_shader); - void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, |