diff options
Diffstat (limited to 'src/amd/vulkan/radv_shader_args.c')
-rw-r--r-- | src/amd/vulkan/radv_shader_args.c | 747 |
1 files changed, 747 insertions, 0 deletions
diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c new file mode 100644 index 00000000000..949b91dcf94 --- /dev/null +++ b/src/amd/vulkan/radv_shader_args.c @@ -0,0 +1,747 @@ +/* + * Copyright © 2019 Valve Corporation. + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "radv_private.h" +#include "radv_shader.h" +#include "radv_shader_args.h" + +static void +set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, + uint8_t num_sgprs) +{ + ud_info->sgpr_idx = *sgpr_idx; + ud_info->num_sgprs = num_sgprs; + *sgpr_idx += num_sgprs; +} + +static void +set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, + uint8_t num_sgprs) +{ + struct radv_userdata_info *ud_info = + &args->shader_info->user_sgprs_locs.shader_data[idx]; + assert(ud_info); + + set_loc(ud_info, sgpr_idx, num_sgprs); +} + +static void +set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) +{ + bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS; + + set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); +} + +static void +set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) +{ + struct radv_userdata_locations *locs = + &args->shader_info->user_sgprs_locs; + struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; + assert(ud_info); + + set_loc(ud_info, sgpr_idx, 1); + + locs->descriptor_sets_enabled |= 1 << idx; +} + +struct user_sgpr_info { + bool need_ring_offsets; + bool indirect_all_descriptor_sets; + uint8_t remaining_sgprs; +}; + +static bool needs_view_index_sgpr(struct radv_shader_args *args, + gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + if (args->shader_info->needs_multiview_view_index || + (!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls && args->options->key.has_multiview_view_index)) + return true; + break; + case MESA_SHADER_TESS_EVAL: + if (args->shader_info->needs_multiview_view_index || (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index)) + return true; + break; + case MESA_SHADER_GEOMETRY: + case MESA_SHADER_TESS_CTRL: + if (args->shader_info->needs_multiview_view_index) + return true; + break; + default: + break; + } + return false; +} + +static uint8_t +count_vs_user_sgprs(struct radv_shader_args *args) +{ + uint8_t count = 0; + + if (args->shader_info->vs.has_vertex_buffers) + count++; + count += args->shader_info->vs.needs_draw_id ? 3 : 2; + + return count; +} + +static void allocate_inline_push_consts(struct radv_shader_args *args, + struct user_sgpr_info *user_sgpr_info) +{ + uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs; + + /* Only supported if shaders use push constants. */ + if (args->shader_info->min_push_constant_used == UINT8_MAX) + return; + + /* Only supported if shaders don't have indirect push constants. */ + if (args->shader_info->has_indirect_push_constants) + return; + + /* Only supported for 32-bit push constants. */ + if (!args->shader_info->has_only_32bit_push_constants) + return; + + uint8_t num_push_consts = + (args->shader_info->max_push_constant_used - + args->shader_info->min_push_constant_used) / 4; + + /* Check if the number of user SGPRs is large enough. */ + if (num_push_consts < remaining_sgprs) { + args->shader_info->num_inline_push_consts = num_push_consts; + } else { + args->shader_info->num_inline_push_consts = remaining_sgprs; + } + + /* Clamp to the maximum number of allowed inlined push constants. */ + if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS) + args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS; + + if (args->shader_info->num_inline_push_consts == num_push_consts && + !args->shader_info->loads_dynamic_offsets) { + /* Disable the default push constants path if all constants are + * inlined and if shaders don't use dynamic descriptors. + */ + args->shader_info->loads_push_constants = false; + } + + args->shader_info->base_inline_push_consts = + args->shader_info->min_push_constant_used / 4; +} + +static void allocate_user_sgprs(struct radv_shader_args *args, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage, + bool needs_view_index, + struct user_sgpr_info *user_sgpr_info) +{ + uint8_t user_sgpr_count = 0; + + memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); + + /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ + if (stage == MESA_SHADER_GEOMETRY || + stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL || + args->is_gs_copy_shader) + user_sgpr_info->need_ring_offsets = true; + + if (stage == MESA_SHADER_FRAGMENT && + args->shader_info->ps.needs_sample_positions) + user_sgpr_info->need_ring_offsets = true; + + /* 2 user sgprs will nearly always be allocated for scratch/rings */ + if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) { + user_sgpr_count += 2; + } + + switch (stage) { + case MESA_SHADER_COMPUTE: + if (args->shader_info->cs.uses_grid_size) + user_sgpr_count += 3; + break; + case MESA_SHADER_FRAGMENT: + user_sgpr_count += args->shader_info->ps.needs_sample_positions; + break; + case MESA_SHADER_VERTEX: + if (!args->is_gs_copy_shader) + user_sgpr_count += count_vs_user_sgprs(args); + break; + case MESA_SHADER_TESS_CTRL: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) + user_sgpr_count += count_vs_user_sgprs(args); + } + break; + case MESA_SHADER_TESS_EVAL: + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) { + user_sgpr_count += count_vs_user_sgprs(args); + } + } + break; + default: + break; + } + + if (needs_view_index) + user_sgpr_count++; + + if (args->shader_info->loads_push_constants) + user_sgpr_count++; + + if (args->shader_info->so.num_outputs) + user_sgpr_count++; + + uint32_t available_sgprs = args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; + uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; + uint32_t num_desc_set = + util_bitcount(args->shader_info->desc_set_used_mask); + + if (remaining_sgprs < num_desc_set) { + user_sgpr_info->indirect_all_descriptor_sets = true; + user_sgpr_info->remaining_sgprs = remaining_sgprs - 1; + } else { + user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set; + } + + allocate_inline_push_consts(args, user_sgpr_info); +} + +static void +declare_global_input_sgprs(struct radv_shader_args *args, + const struct user_sgpr_info *user_sgpr_info) +{ + /* 1 for each descriptor set */ + if (!user_sgpr_info->indirect_all_descriptor_sets) { + uint32_t mask = args->shader_info->desc_set_used_mask; + + while (mask) { + int i = u_bit_scan(&mask); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, + &args->descriptor_sets[i]); + } + } else { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, + &args->descriptor_sets[0]); + } + + if (args->shader_info->loads_push_constants) { + /* 1 for push constants and dynamic descriptors */ + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, + &args->ac.push_constants); + } + + for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.inline_push_consts[i]); + } + args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts; + args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts; + + if (args->shader_info->so.num_outputs) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, + &args->streamout_buffers); + } +} + +static void +declare_vs_specific_input_sgprs(struct radv_shader_args *args, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage) +{ + if (!args->is_gs_copy_shader && + (stage == MESA_SHADER_VERTEX || + (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { + if (args->shader_info->vs.has_vertex_buffers) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, + &args->vertex_buffers); + } + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance); + if (args->shader_info->vs.needs_draw_id) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); + } + } +} + +static void +declare_vs_input_vgprs(struct radv_shader_args *args) +{ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); + if (!args->is_gs_copy_shader) { + if (args->options->key.vs_common_out.as_ls) { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->rel_auto_id); + if (args->options->chip_class >= GFX10) { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + } else { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ + } + } else { + if (args->options->chip_class >= GFX10) { + if (args->options->key.vs_common_out.as_ngg) { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + } else { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + } + } else { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ + } + } + } +} + +static void +declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage) +{ + int i; + + if (args->options->use_ngg_streamout) { + if (stage == MESA_SHADER_TESS_EVAL) + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); + return; + } + + /* Streamout SGPRs. */ + if (args->shader_info->so.num_outputs) { + assert(stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_EVAL); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_config); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_write_idx); + } else if (stage == MESA_SHADER_TESS_EVAL) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); + } + + /* A streamout buffer offset is loaded if the stride is non-zero. */ + for (i = 0; i < 4; i++) { + if (!args->shader_info->so.strides[i]) + continue; + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_offset[i]); + } +} + +static void +declare_tes_input_vgprs(struct radv_shader_args *args) +{ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_u); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_v); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->tes_rel_patch_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id); +} + +static void +set_global_input_locs(struct radv_shader_args *args, + const struct user_sgpr_info *user_sgpr_info, + uint8_t *user_sgpr_idx) +{ + uint32_t mask = args->shader_info->desc_set_used_mask; + + if (!user_sgpr_info->indirect_all_descriptor_sets) { + while (mask) { + int i = u_bit_scan(&mask); + + set_loc_desc(args, i, user_sgpr_idx); + } + } else { + set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, + user_sgpr_idx); + + args->shader_info->need_indirect_descriptor_sets = true; + } + + if (args->shader_info->loads_push_constants) { + set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); + } + + if (args->shader_info->num_inline_push_consts) { + set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, + args->shader_info->num_inline_push_consts); + } + + if (args->streamout_buffers.used) { + set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, + user_sgpr_idx); + } +} + +static void +set_vs_specific_input_locs(struct radv_shader_args *args, + gl_shader_stage stage, bool has_previous_stage, + gl_shader_stage previous_stage, + uint8_t *user_sgpr_idx) +{ + if (!args->is_gs_copy_shader && + (stage == MESA_SHADER_VERTEX || + (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { + if (args->shader_info->vs.has_vertex_buffers) { + set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, + user_sgpr_idx); + } + + unsigned vs_num = 2; + if (args->shader_info->vs.needs_draw_id) + vs_num++; + + set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, + user_sgpr_idx, vs_num); + } +} + +/* Returns whether the stage is a stage that can be directly before the GS */ +static bool is_pre_gs_stage(gl_shader_stage stage) +{ + return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL; +} + +void +radv_declare_shader_args(struct radv_shader_args *args, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage) +{ + struct user_sgpr_info user_sgpr_info; + bool needs_view_index = needs_view_index_sgpr(args, stage); + + if (args->options->chip_class >= GFX10) { + if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) { + /* On GFX10, VS is merged into GS for NGG. */ + previous_stage = stage; + stage = MESA_SHADER_GEOMETRY; + has_previous_stage = true; + } + } + + for (int i = 0; i < MAX_SETS; i++) + args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; + for (int i = 0; i < AC_UD_MAX_UD; i++) + args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; + + + allocate_user_sgprs(args, stage, has_previous_stage, + previous_stage, needs_view_index, &user_sgpr_info); + + if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, + &args->ring_offsets); + } + + switch (stage) { + case MESA_SHADER_COMPUTE: + declare_global_input_sgprs(args, &user_sgpr_info); + + if (args->shader_info->cs.uses_grid_size) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, + &args->ac.num_work_groups); + } + + for (int i = 0; i < 3; i++) { + if (args->shader_info->cs.uses_block_id[i]) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.workgroup_ids[i]); + } + } + + if (args->shader_info->cs.uses_local_invocation_idx) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.tg_size); + } + + ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, + &args->ac.local_invocation_ids); + break; + case MESA_SHADER_VERTEX: + declare_global_input_sgprs(args, &user_sgpr_info); + + declare_vs_specific_input_sgprs(args, stage, has_previous_stage, + previous_stage); + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + if (args->options->key.vs_common_out.as_es) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->es2gs_offset); + } else if (args->options->key.vs_common_out.as_ls) { + /* no extra parameters */ + } else { + declare_streamout_sgprs(args, stage); + } + + declare_vs_input_vgprs(args); + break; + case MESA_SHADER_TESS_CTRL: + if (has_previous_stage) { + // First 6 system regs + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->merged_wave_info); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->tess_factor_offset); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown + + declare_global_input_sgprs(args, &user_sgpr_info); + + declare_vs_specific_input_sgprs(args, stage, + has_previous_stage, + previous_stage); + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.tcs_patch_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.tcs_rel_ids); + + declare_vs_input_vgprs(args); + } else { + declare_global_input_sgprs(args, &user_sgpr_info); + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->tess_factor_offset); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.tcs_patch_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.tcs_rel_ids); + } + break; + case MESA_SHADER_TESS_EVAL: + declare_global_input_sgprs(args, &user_sgpr_info); + + if (needs_view_index) + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + + if (args->options->key.vs_common_out.as_es) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->es2gs_offset); + } else { + declare_streamout_sgprs(args, stage); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + } + declare_tes_input_vgprs(args); + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + // First 6 system regs + if (args->options->key.vs_common_out.as_ngg) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->gs_tg_info); + } else { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->gs2vs_offset); + } + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->merged_wave_info); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown + + declare_global_input_sgprs(args, &user_sgpr_info); + + if (previous_stage != MESA_SHADER_TESS_EVAL) { + declare_vs_specific_input_sgprs(args, stage, + has_previous_stage, + previous_stage); + } + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[0]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[2]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.gs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.gs_invocation_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[4]); + + if (previous_stage == MESA_SHADER_VERTEX) { + declare_vs_input_vgprs(args); + } else { + declare_tes_input_vgprs(args); + } + } else { + declare_global_input_sgprs(args, &user_sgpr_info); + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[0]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[1]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.gs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[2]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[3]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[4]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[5]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.gs_invocation_id); + } + break; + case MESA_SHADER_FRAGMENT: + declare_global_input_sgprs(args, &user_sgpr_info); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); + ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, NULL); /* persp pull model */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */ + break; + default: + unreachable("Shader stage not implemented"); + } + + args->shader_info->num_input_vgprs = 0; + args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0; + args->shader_info->num_input_sgprs += args->ac.num_sgprs_used; + + if (stage != MESA_SHADER_FRAGMENT) + args->shader_info->num_input_vgprs = args->ac.num_vgprs_used; + + uint8_t user_sgpr_idx = 0; + + if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) { + set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, + &user_sgpr_idx); + } + + /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including + * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ + if (has_previous_stage) + user_sgpr_idx = 0; + + set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx); + + switch (stage) { + case MESA_SHADER_COMPUTE: + if (args->shader_info->cs.uses_grid_size) { + set_loc_shader(args, AC_UD_CS_GRID_SIZE, + &user_sgpr_idx, 3); + } + break; + case MESA_SHADER_VERTEX: + set_vs_specific_input_locs(args, stage, has_previous_stage, + previous_stage, &user_sgpr_idx); + if (args->ac.view_index.used) + set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_TESS_CTRL: + set_vs_specific_input_locs(args, stage, has_previous_stage, + previous_stage, &user_sgpr_idx); + if (args->ac.view_index.used) + set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_TESS_EVAL: + if (args->ac.view_index.used) + set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) + set_vs_specific_input_locs(args, stage, + has_previous_stage, + previous_stage, + &user_sgpr_idx); + } + if (args->ac.view_index.used) + set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_FRAGMENT: + break; + default: + unreachable("Shader stage not implemented"); + } + + args->shader_info->num_user_sgprs = user_sgpr_idx; +} + |