summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/compiler/aco_instruction_selection_setup.cpp22
-rw-r--r--src/amd/vulkan/radv_nir_to_llvm.c14
-rw-r--r--src/amd/vulkan/radv_shader.c5
-rw-r--r--src/amd/vulkan/radv_shader.h2
-rw-r--r--src/amd/vulkan/radv_shader_args.c59
-rw-r--r--src/amd/vulkan/radv_shader_args.h1
6 files changed, 49 insertions, 54 deletions
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index c3f2832fa8f..fbab89417cd 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -95,9 +95,6 @@ struct isel_context {
bool exec_potentially_empty = false;
} cf_info;
- /* scratch */
- bool scratch_enabled = false;
-
/* inputs common for merged stages */
Temp merged_wave_info = Temp(0, s1);
@@ -639,8 +636,7 @@ static void allocate_user_sgprs(isel_context *ctx,
user_sgpr_info.need_ring_offsets = true;
/* 2 user sgprs will nearly always be allocated for scratch/rings */
- if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled)
- user_sgpr_count += 2;
+ user_sgpr_count += 2;
switch (ctx->stage) {
case vertex_vs:
@@ -895,10 +891,8 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
arg_info args = {};
/* this needs to be in sgprs 0 and 1 */
- if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) {
- add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
- set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
- }
+ add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
+ set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
unsigned vgpr_idx = 0;
switch (ctx->stage) {
@@ -928,8 +922,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
else
declare_streamout_sgprs(ctx, &args, &idx);
- if (ctx->options->supports_spill || ctx->scratch_enabled)
- add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
+ add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
declare_vs_input_vgprs(ctx, &args);
break;
@@ -940,8 +933,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
assert(user_sgpr_info.user_sgpr_idx == user_sgpr_info.num_sgpr);
add_arg(&args, s1, &ctx->prim_mask, user_sgpr_info.user_sgpr_idx);
- if (ctx->options->supports_spill || ctx->scratch_enabled)
- add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
+ add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
ctx->program->config->spi_ps_input_addr = 0;
ctx->program->config->spi_ps_input_ena = 0;
@@ -1004,8 +996,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
if (ctx->program->info->cs.uses_local_invocation_idx)
add_arg(&args, s1, &ctx->tg_size, idx++);
- if (ctx->options->supports_spill || ctx->scratch_enabled)
- add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
+ add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
add_arg(&args, v3, &ctx->local_invocation_ids, vgpr_idx++);
break;
@@ -1357,7 +1348,6 @@ setup_isel_context(Program* program,
unsigned scratch_size = 0;
for (unsigned i = 0; i < shader_count; i++)
scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
- ctx.scratch_enabled = scratch_size > 0;
ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024);
ctx.block = ctx.program->create_and_insert_block();
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 11f983974d6..5d87b9a675a 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -411,15 +411,11 @@ static void create_function(struct radv_shader_context *ctx,
ctx->max_workgroup_size,
ctx->args->options);
- if (ctx->args->options->supports_spill) {
- ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
- LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
- NULL, 0, AC_FUNC_ATTR_READNONE);
- ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
- ac_array_in_const_addr_space(ctx->ac.v4i32), "");
- } else if (ctx->args->ring_offsets.used) {
- ctx->ring_offsets = ac_get_arg(&ctx->ac, ctx->args->ring_offsets);
- }
+ ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
+ LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
+ NULL, 0, AC_FUNC_ATTR_READNONE);
+ ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
+ ac_array_in_const_addr_space(ctx->ac.v4i32), "");
load_descriptor_sets(ctx);
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 81526c7eca0..a7253976f67 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1118,8 +1118,7 @@ shader_variant_compile(struct radv_device *device,
struct ac_llvm_compiler ac_llvm;
bool thread_compiler;
- if (options->supports_spill)
- tm_options |= AC_TM_SUPPORTS_SPILL;
+ tm_options |= AC_TM_SUPPORTS_SPILL;
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
tm_options |= AC_TM_SISCHED;
if (options->check_ir)
@@ -1200,7 +1199,7 @@ radv_shader_variant_compile(struct radv_device *device,
if (key)
options.key = *key;
- options.supports_spill = true;
+ options.explicit_scratch_args = use_aco;
options.robust_buffer_access = device->robust_buffer_access;
return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info,
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 0dde52e1303..7ffce47bdd8 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -125,7 +125,7 @@ struct radv_shader_variant_key {
struct radv_nir_compiler_options {
struct radv_pipeline_layout *layout;
struct radv_shader_variant_key key;
- bool supports_spill;
+ bool explicit_scratch_args;
bool clamp_shadow_reference;
bool robust_buffer_access;
bool dump_shader;
diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c
index 949b91dcf94..bcec3e9d2e7 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -72,7 +72,6 @@ set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
}
struct user_sgpr_info {
- bool need_ring_offsets;
bool indirect_all_descriptor_sets;
uint8_t remaining_sgprs;
};
@@ -168,22 +167,8 @@ static void allocate_user_sgprs(struct radv_shader_args *args,
memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
- /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
- if (stage == MESA_SHADER_GEOMETRY ||
- stage == MESA_SHADER_VERTEX ||
- stage == MESA_SHADER_TESS_CTRL ||
- stage == MESA_SHADER_TESS_EVAL ||
- args->is_gs_copy_shader)
- user_sgpr_info->need_ring_offsets = true;
-
- if (stage == MESA_SHADER_FRAGMENT &&
- args->shader_info->ps.needs_sample_positions)
- user_sgpr_info->need_ring_offsets = true;
-
- /* 2 user sgprs will nearly always be allocated for scratch/rings */
- if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) {
- user_sgpr_count += 2;
- }
+ /* 2 user sgprs will always be allocated for scratch/rings */
+ user_sgpr_count += 2;
switch (stage) {
case MESA_SHADER_COMPUTE:
@@ -464,7 +449,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
allocate_user_sgprs(args, stage, has_previous_stage,
previous_stage, needs_view_index, &user_sgpr_info);
- if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) {
+ if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
&args->ring_offsets);
}
@@ -490,6 +475,11 @@ radv_declare_shader_args(struct radv_shader_args *args,
&args->ac.tg_size);
}
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
+
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
&args->ac.local_invocation_ids);
break;
@@ -513,6 +503,11 @@ radv_declare_shader_args(struct radv_shader_args *args,
declare_streamout_sgprs(args, stage);
}
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
+
declare_vs_input_vgprs(args);
break;
case MESA_SHADER_TESS_CTRL:
@@ -524,7 +519,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
@@ -556,6 +551,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
@@ -578,6 +577,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
declare_streamout_sgprs(args, stage);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
}
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
declare_tes_input_vgprs(args);
break;
case MESA_SHADER_GEOMETRY:
@@ -595,7 +598,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
&args->merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
@@ -638,6 +641,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id);
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
@@ -660,6 +667,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
declare_global_input_sgprs(args, &user_sgpr_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
@@ -682,7 +693,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
}
args->shader_info->num_input_vgprs = 0;
- args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0;
+ args->shader_info->num_input_sgprs = 2;
args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
if (stage != MESA_SHADER_FRAGMENT)
@@ -690,10 +701,8 @@ radv_declare_shader_args(struct radv_shader_args *args,
uint8_t user_sgpr_idx = 0;
- if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) {
- set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
- &user_sgpr_idx);
- }
+ set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
+ &user_sgpr_idx);
/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
* the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h
index a7442c617de..0f57058bd6e 100644
--- a/src/amd/vulkan/radv_shader_args.h
+++ b/src/amd/vulkan/radv_shader_args.h
@@ -34,6 +34,7 @@ struct radv_shader_args {
struct ac_arg descriptor_sets[MAX_SETS];
struct ac_arg ring_offsets;
+ struct ac_arg scratch_offset;
struct ac_arg vertex_buffers;
struct ac_arg rel_auto_id;