diff options
-rw-r--r-- | src/amd/compiler/aco_instruction_selection_setup.cpp | 22 | ||||
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 14 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 5 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.h | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader_args.c | 59 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader_args.h | 1 |
6 files changed, 49 insertions, 54 deletions
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index c3f2832fa8f..fbab89417cd 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -95,9 +95,6 @@ struct isel_context { bool exec_potentially_empty = false; } cf_info; - /* scratch */ - bool scratch_enabled = false; - /* inputs common for merged stages */ Temp merged_wave_info = Temp(0, s1); @@ -639,8 +636,7 @@ static void allocate_user_sgprs(isel_context *ctx, user_sgpr_info.need_ring_offsets = true; /* 2 user sgprs will nearly always be allocated for scratch/rings */ - if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) - user_sgpr_count += 2; + user_sgpr_count += 2; switch (ctx->stage) { case vertex_vs: @@ -895,10 +891,8 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx) arg_info args = {}; /* this needs to be in sgprs 0 and 1 */ - if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) { - add_arg(&args, s2, &ctx->program->private_segment_buffer, 0); - set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx); - } + add_arg(&args, s2, &ctx->program->private_segment_buffer, 0); + set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx); unsigned vgpr_idx = 0; switch (ctx->stage) { @@ -928,8 +922,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx) else declare_streamout_sgprs(ctx, &args, &idx); - if (ctx->options->supports_spill || ctx->scratch_enabled) - add_arg(&args, s1, &ctx->program->scratch_offset, idx++); + add_arg(&args, s1, &ctx->program->scratch_offset, idx++); declare_vs_input_vgprs(ctx, &args); break; @@ -940,8 +933,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx) assert(user_sgpr_info.user_sgpr_idx == user_sgpr_info.num_sgpr); add_arg(&args, s1, &ctx->prim_mask, user_sgpr_info.user_sgpr_idx); - if (ctx->options->supports_spill || ctx->scratch_enabled) - add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1); + add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1); ctx->program->config->spi_ps_input_addr = 0; ctx->program->config->spi_ps_input_ena = 0; @@ -1004,8 +996,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx) if (ctx->program->info->cs.uses_local_invocation_idx) add_arg(&args, s1, &ctx->tg_size, idx++); - if (ctx->options->supports_spill || ctx->scratch_enabled) - add_arg(&args, s1, &ctx->program->scratch_offset, idx++); + add_arg(&args, s1, &ctx->program->scratch_offset, idx++); add_arg(&args, v3, &ctx->local_invocation_ids, vgpr_idx++); break; @@ -1357,7 +1348,6 @@ setup_isel_context(Program* program, unsigned scratch_size = 0; for (unsigned i = 0; i < shader_count; i++) scratch_size = std::max(scratch_size, shaders[i]->scratch_size); - ctx.scratch_enabled = scratch_size > 0; ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024); ctx.block = ctx.program->create_and_insert_block(); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 11f983974d6..5d87b9a675a 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -411,15 +411,11 @@ static void create_function(struct radv_shader_context *ctx, ctx->max_workgroup_size, ctx->args->options); - if (ctx->args->options->supports_spill) { - ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", - LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), - NULL, 0, AC_FUNC_ATTR_READNONE); - ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets, - ac_array_in_const_addr_space(ctx->ac.v4i32), ""); - } else if (ctx->args->ring_offsets.used) { - ctx->ring_offsets = ac_get_arg(&ctx->ac, ctx->args->ring_offsets); - } + ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", + LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), + NULL, 0, AC_FUNC_ATTR_READNONE); + ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets, + ac_array_in_const_addr_space(ctx->ac.v4i32), ""); load_descriptor_sets(ctx); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 81526c7eca0..a7253976f67 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1118,8 +1118,7 @@ shader_variant_compile(struct radv_device *device, struct ac_llvm_compiler ac_llvm; bool thread_compiler; - if (options->supports_spill) - tm_options |= AC_TM_SUPPORTS_SPILL; + tm_options |= AC_TM_SUPPORTS_SPILL; if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED) tm_options |= AC_TM_SISCHED; if (options->check_ir) @@ -1200,7 +1199,7 @@ radv_shader_variant_compile(struct radv_device *device, if (key) options.key = *key; - options.supports_spill = true; + options.explicit_scratch_args = use_aco; options.robust_buffer_access = device->robust_buffer_access; return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info, diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 0dde52e1303..7ffce47bdd8 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -125,7 +125,7 @@ struct radv_shader_variant_key { struct radv_nir_compiler_options { struct radv_pipeline_layout *layout; struct radv_shader_variant_key key; - bool supports_spill; + bool explicit_scratch_args; bool clamp_shadow_reference; bool robust_buffer_access; bool dump_shader; diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 949b91dcf94..bcec3e9d2e7 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -72,7 +72,6 @@ set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) } struct user_sgpr_info { - bool need_ring_offsets; bool indirect_all_descriptor_sets; uint8_t remaining_sgprs; }; @@ -168,22 +167,8 @@ static void allocate_user_sgprs(struct radv_shader_args *args, memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); - /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ - if (stage == MESA_SHADER_GEOMETRY || - stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_CTRL || - stage == MESA_SHADER_TESS_EVAL || - args->is_gs_copy_shader) - user_sgpr_info->need_ring_offsets = true; - - if (stage == MESA_SHADER_FRAGMENT && - args->shader_info->ps.needs_sample_positions) - user_sgpr_info->need_ring_offsets = true; - - /* 2 user sgprs will nearly always be allocated for scratch/rings */ - if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) { - user_sgpr_count += 2; - } + /* 2 user sgprs will always be allocated for scratch/rings */ + user_sgpr_count += 2; switch (stage) { case MESA_SHADER_COMPUTE: @@ -464,7 +449,7 @@ radv_declare_shader_args(struct radv_shader_args *args, allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index, &user_sgpr_info); - if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) { + if (args->options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); } @@ -490,6 +475,11 @@ radv_declare_shader_args(struct radv_shader_args *args, &args->ac.tg_size); } + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } + ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids); break; @@ -513,6 +503,11 @@ radv_declare_shader_args(struct radv_shader_args *args, declare_streamout_sgprs(args, stage); } + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } + declare_vs_input_vgprs(args); break; case MESA_SHADER_TESS_CTRL: @@ -524,7 +519,7 @@ radv_declare_shader_args(struct radv_shader_args *args, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tess_factor_offset); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown @@ -556,6 +551,10 @@ radv_declare_shader_args(struct radv_shader_args *args, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tess_factor_offset); + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, @@ -578,6 +577,10 @@ radv_declare_shader_args(struct radv_shader_args *args, declare_streamout_sgprs(args, stage); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); } + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } declare_tes_input_vgprs(args); break; case MESA_SHADER_GEOMETRY: @@ -595,7 +598,7 @@ radv_declare_shader_args(struct radv_shader_args *args, &args->merged_wave_info); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown @@ -638,6 +641,10 @@ radv_declare_shader_args(struct radv_shader_args *args, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id); + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->gs_vtx_offset[0]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, @@ -660,6 +667,10 @@ radv_declare_shader_args(struct radv_shader_args *args, declare_global_input_sgprs(args, &user_sgpr_info); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); @@ -682,7 +693,7 @@ radv_declare_shader_args(struct radv_shader_args *args, } args->shader_info->num_input_vgprs = 0; - args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0; + args->shader_info->num_input_sgprs = 2; args->shader_info->num_input_sgprs += args->ac.num_sgprs_used; if (stage != MESA_SHADER_FRAGMENT) @@ -690,10 +701,8 @@ radv_declare_shader_args(struct radv_shader_args *args, uint8_t user_sgpr_idx = 0; - if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) { - set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, - &user_sgpr_idx); - } + set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, + &user_sgpr_idx); /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index a7442c617de..0f57058bd6e 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -34,6 +34,7 @@ struct radv_shader_args { struct ac_arg descriptor_sets[MAX_SETS]; struct ac_arg ring_offsets; + struct ac_arg scratch_offset; struct ac_arg vertex_buffers; struct ac_arg rel_auto_id; |