aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/common/ac_nir_to_llvm.c56
-rw-r--r--src/amd/common/ac_nir_to_llvm.h44
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c181
3 files changed, 208 insertions, 73 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ec1ba9e55b3..41b67f1fd36 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -62,7 +62,7 @@ struct nir_to_llvm_context {
struct hash_table *defs;
struct hash_table *phis;
- LLVMValueRef descriptor_sets[4];
+ LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
LLVMValueRef push_constants;
LLVMValueRef num_work_groups;
LLVMValueRef workgroup_ids;
@@ -426,6 +426,45 @@ static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx,
return result;
}
+static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs)
+{
+ ud_info->sgpr_idx = sgpr_idx;
+ ud_info->num_sgprs = num_sgprs;
+ ud_info->indirect = false;
+ ud_info->indirect_offset = 0;
+}
+
+static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
+ int idx, uint8_t sgpr_idx, uint8_t num_sgprs)
+{
+ set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
+}
+
+#if 0
+static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
+ uint32_t indirect_offset)
+{
+ ud_info->sgpr_idx = sgpr_idx;
+ ud_info->num_sgprs = num_sgprs;
+ ud_info->indirect = true;
+ ud_info->indirect_offset = indirect_offset;
+}
+#endif
+
+#define AC_USERDATA_DESCRIPTOR_SET_0 0
+#define AC_USERDATA_DESCRIPTOR_SET_1 2
+#define AC_USERDATA_DESCRIPTOR_SET_2 4
+#define AC_USERDATA_DESCRIPTOR_SET_3 6
+#define AC_USERDATA_PUSH_CONST_DYN 8
+
+#define AC_USERDATA_VS_VERTEX_BUFFERS 10
+#define AC_USERDATA_VS_BASE_VERTEX 12
+#define AC_USERDATA_VS_START_INSTANCE 13
+
+#define AC_USERDATA_PS_SAMPLE_POS 10
+
+#define AC_USERDATA_CS_GRID_SIZE 10
+
static void create_function(struct nir_to_llvm_context *ctx,
struct nir_shader *nir)
{
@@ -510,14 +549,18 @@ static void create_function(struct nir_to_llvm_context *ctx,
ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
arg_idx = 0;
- for (unsigned i = 0; i < 4; ++i)
+ for (unsigned i = 0; i < 4; ++i) {
+ set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], i * 2, 2);
ctx->descriptor_sets[i] =
LLVMGetParam(ctx->main_function, arg_idx++);
+ }
ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
+ set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, AC_USERDATA_PUSH_CONST_DYN, 2);
switch (nir->stage) {
case MESA_SHADER_COMPUTE:
+ set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, AC_USERDATA_CS_GRID_SIZE, 3);
ctx->num_work_groups =
LLVMGetParam(ctx->main_function, arg_idx++);
ctx->workgroup_ids =
@@ -528,7 +571,9 @@ static void create_function(struct nir_to_llvm_context *ctx,
LLVMGetParam(ctx->main_function, arg_idx++);
break;
case MESA_SHADER_VERTEX:
+ set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, AC_USERDATA_VS_VERTEX_BUFFERS, 2);
ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
+ set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, AC_USERDATA_VS_BASE_VERTEX, 2);
ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
@@ -537,6 +582,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
break;
case MESA_SHADER_FRAGMENT:
+ set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, AC_USERDATA_PS_SAMPLE_POS, 2);
ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
@@ -4604,6 +4650,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
{
struct nir_to_llvm_context ctx = {0};
struct nir_function *func;
+ unsigned i;
ctx.options = options;
ctx.shader_info = shader_info;
ctx.context = LLVMContextCreate();
@@ -4619,6 +4666,11 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.builder = LLVMCreateBuilderInContext(ctx.context);
ctx.stage = nir->stage;
+ for (i = 0; i < AC_UD_MAX_SETS; i++)
+ shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
+ for (i = 0; i < AC_UD_MAX_UD; i++)
+ shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
+
create_function(&ctx, nir);
if (nir->stage == MESA_SHADER_COMPUTE) {
diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h
index f33519ccca3..f488c09bb92 100644
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -56,7 +56,35 @@ struct ac_nir_compiler_options {
enum chip_class chip_class;
};
+struct ac_userdata_info {
+ int8_t sgpr_idx;
+ uint8_t num_sgprs;
+ bool indirect;
+ uint32_t indirect_offset;
+};
+
+enum ac_ud_index {
+ AC_UD_PUSH_CONSTANTS = 0,
+ AC_UD_SHADER_START = 1,
+ AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
+ AC_UD_VS_BASE_VERTEX_START_INSTANCE,
+ AC_UD_VS_MAX_UD,
+ AC_UD_PS_SAMPLE_POS = AC_UD_SHADER_START,
+ AC_UD_PS_MAX_UD,
+ AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
+ AC_UD_CS_MAX_UD,
+ AC_UD_MAX_UD = AC_UD_VS_MAX_UD,
+};
+
+#define AC_UD_MAX_SETS 4
+
+struct ac_userdata_locations {
+ struct ac_userdata_info descriptor_sets[AC_UD_MAX_SETS];
+ struct ac_userdata_info shader_data[AC_UD_MAX_UD];
+};
+
struct ac_shader_variant_info {
+ struct ac_userdata_locations user_sgprs_locs;
unsigned num_user_sgprs;
unsigned num_input_sgprs;
unsigned num_input_vgprs;
@@ -97,20 +125,4 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
const struct ac_nir_compiler_options *options,
bool dump_shader);
-/* SHADER ABI defines */
-
-/* offset in dwords */
-#define AC_USERDATA_DESCRIPTOR_SET_0 0
-#define AC_USERDATA_DESCRIPTOR_SET_1 2
-#define AC_USERDATA_DESCRIPTOR_SET_2 4
-#define AC_USERDATA_DESCRIPTOR_SET_3 6
-#define AC_USERDATA_PUSH_CONST_DYN 8
-
-#define AC_USERDATA_VS_VERTEX_BUFFERS 10
-#define AC_USERDATA_VS_BASE_VERTEX 12
-#define AC_USERDATA_VS_START_INSTANCE 13
-
-#define AC_USERDATA_PS_SAMPLE_POS 10
-
-#define AC_USERDATA_CS_GRID_SIZE 10
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 8a22ec40ad0..4ae4e54f87e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -276,6 +276,46 @@ static unsigned radv_pack_float_12p4(float x)
x >= 4096 ? 0xffff : x * 16;
}
+static uint32_t
+shader_stage_to_user_data_0(gl_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ return R_00B030_SPI_SHADER_USER_DATA_PS_0;
+ case MESA_SHADER_VERTEX:
+ return R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ case MESA_SHADER_COMPUTE:
+ return R_00B900_COMPUTE_USER_DATA_0;
+ default:
+ unreachable("unknown shader");
+ }
+}
+
+static struct ac_userdata_info *
+radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
+ gl_shader_stage stage,
+ int idx)
+{
+ return &pipeline->shaders[stage]->info.user_sgprs_locs.shader_data[idx];
+}
+
+static void
+radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline,
+ gl_shader_stage stage,
+ int idx, uint64_t va)
+{
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
+ uint32_t base_reg = shader_stage_to_user_data_0(stage);
+ if (loc->sgpr_idx == -1)
+ return;
+ assert(loc->num_sgprs == 2);
+ assert(!loc->indirect);
+ radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 2);
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
+}
+
static void
radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
@@ -327,9 +367,8 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
va += samples_offset;
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 + AC_USERDATA_PS_SAMPLE_POS * 4, 2);
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
+ radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT,
+ AC_UD_PS_SAMPLE_POS, va);
}
static void
@@ -896,34 +935,51 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
static void
emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline,
int idx,
uint64_t va,
- uint32_t base_reg)
+ gl_shader_stage stage)
{
+ struct ac_userdata_info *desc_set_loc = &pipeline->shaders[stage]->info.user_sgprs_locs.descriptor_sets[idx];
+ uint32_t base_reg = shader_stage_to_user_data_0(stage);
+
+ if (desc_set_loc->sgpr_idx == -1)
+ return;
+
+ assert(!desc_set_loc->indirect);
+ assert(desc_set_loc->num_sgprs == 2);
radeon_set_sh_reg_seq(cmd_buffer->cs,
- base_reg + 8 * idx, 2);
+ base_reg + desc_set_loc->sgpr_idx * 4, 2);
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
}
static void
radv_emit_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline,
VkShaderStageFlags stages,
struct radv_descriptor_set *set,
unsigned idx)
{
if (stages & VK_SHADER_STAGE_FRAGMENT_BIT)
- emit_stage_descriptor_set_userdata(cmd_buffer, idx, set->va, R_00B030_SPI_SHADER_USER_DATA_PS_0);
+ emit_stage_descriptor_set_userdata(cmd_buffer, pipeline,
+ idx, set->va,
+ MESA_SHADER_FRAGMENT);
if (stages & VK_SHADER_STAGE_VERTEX_BIT)
- emit_stage_descriptor_set_userdata(cmd_buffer, idx, set->va, R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ emit_stage_descriptor_set_userdata(cmd_buffer, pipeline,
+ idx, set->va,
+ MESA_SHADER_VERTEX);
if (stages & VK_SHADER_STAGE_COMPUTE_BIT)
- emit_stage_descriptor_set_userdata(cmd_buffer, idx, set->va, R_00B900_COMPUTE_USER_DATA_0);
+ emit_stage_descriptor_set_userdata(cmd_buffer, pipeline,
+ idx, set->va,
+ MESA_SHADER_COMPUTE);
}
static void
radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline,
VkShaderStageFlags stages)
{
unsigned i;
@@ -937,23 +993,12 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
if (!set)
continue;
- radv_emit_descriptor_set_userdata(cmd_buffer, stages, set, i);
+ radv_emit_descriptor_set_userdata(cmd_buffer, pipeline, stages, set, i);
}
cmd_buffer->state.descriptors_dirty = 0;
}
static void
-emit_constants_set_userdata(struct radv_cmd_buffer *cmd_buffer,
- uint64_t va,
- uint32_t base_reg)
-{
- radeon_set_sh_reg_seq(cmd_buffer->cs,
- base_reg + 4 * AC_USERDATA_PUSH_CONST_DYN, 2);
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
-}
-
-static void
radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline,
VkShaderStageFlags stages)
@@ -979,13 +1024,16 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
va += offset;
if (stages & VK_SHADER_STAGE_VERTEX_BIT)
- emit_constants_set_userdata(cmd_buffer, va, R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX,
+ AC_UD_PUSH_CONSTANTS, va);
if (stages & VK_SHADER_STAGE_FRAGMENT_BIT)
- emit_constants_set_userdata(cmd_buffer, va, R_00B030_SPI_SHADER_USER_DATA_PS_0);
+ radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT,
+ AC_UD_PUSH_CONSTANTS, va);
if (stages & VK_SHADER_STAGE_COMPUTE_BIT)
- emit_constants_set_userdata(cmd_buffer, va, R_00B900_COMPUTE_USER_DATA_0);
+ radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_COMPUTE,
+ AC_UD_PUSH_CONSTANTS, va);
cmd_buffer->push_constant_stages &= ~stages;
}
@@ -1036,11 +1084,9 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer)
va = device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
va += vb_offset;
- radeon_set_sh_reg_seq(cmd_buffer->cs,
- R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_VERTEX_BUFFERS * 4, 2);
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
+ radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX,
+ AC_UD_VS_VERTEX_BUFFERS, va);
}
cmd_buffer->state.vertex_descriptors_dirty = false;
@@ -1075,7 +1121,8 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer)
radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
- radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
+ radv_flush_descriptors(cmd_buffer, cmd_buffer->state.pipeline,
+ VK_SHADER_STAGE_ALL_GRAPHICS);
radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline,
VK_SHADER_STAGE_ALL_GRAPHICS);
@@ -1820,9 +1867,13 @@ void radv_CmdDraw(
unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4, 2);
- radeon_emit(cmd_buffer->cs, firstVertex);
- radeon_emit(cmd_buffer->cs, firstInstance);
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
+ AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+ if (loc->sgpr_idx != -1) {
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + loc->sgpr_idx * 4, 2);
+ radeon_emit(cmd_buffer->cs, firstVertex);
+ radeon_emit(cmd_buffer->cs, firstInstance);
+ }
radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
radeon_emit(cmd_buffer->cs, instanceCount);
@@ -1867,9 +1918,13 @@ void radv_CmdDrawIndexed(
radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4, 2);
- radeon_emit(cmd_buffer->cs, vertexOffset);
- radeon_emit(cmd_buffer->cs, firstInstance);
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
+ AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+ if (loc->sgpr_idx != -1) {
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + loc->sgpr_idx * 4, 2);
+ radeon_emit(cmd_buffer->cs, vertexOffset);
+ radeon_emit(cmd_buffer->cs, firstInstance);
+ }
radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
radeon_emit(cmd_buffer->cs, instanceCount);
@@ -1914,6 +1969,9 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->device->ws->cs_add_buffer(cs, buffer->bo, 8);
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
+ AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+ assert(loc->sgpr_idx != -1);
radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
radeon_emit(cs, 1);
radeon_emit(cs, indirect_va);
@@ -1923,8 +1981,8 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
PKT3_DRAW_INDIRECT_MULTI,
8, false));
radeon_emit(cs, 0);
- radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4) - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_START_INSTANCE * 4) - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + (loc->sgpr_idx + 1) * 4) - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); /* draw_index and count_indirect enable */
radeon_emit(cs, draw_count); /* count */
radeon_emit(cs, count_va); /* count_addr */
@@ -2045,7 +2103,8 @@ static void
radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer)
{
radv_emit_compute_pipeline(cmd_buffer);
- radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
+ radv_flush_descriptors(cmd_buffer, cmd_buffer->state.compute_pipeline,
+ VK_SHADER_STAGE_COMPUTE_BIT);
radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline,
VK_SHADER_STAGE_COMPUTE_BIT);
si_emit_cache_flush(cmd_buffer);
@@ -2063,10 +2122,16 @@ void radv_CmdDispatch(
unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4, 3);
- radeon_emit(cmd_buffer->cs, x);
- radeon_emit(cmd_buffer->cs, y);
- radeon_emit(cmd_buffer->cs, z);
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
+ MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
+ if (loc->sgpr_idx != -1) {
+ assert(!loc->indirect);
+ assert(loc->num_sgprs == 3);
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
+ radeon_emit(cmd_buffer->cs, x);
+ radeon_emit(cmd_buffer->cs, y);
+ radeon_emit(cmd_buffer->cs, z);
+ }
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
PKT3_SHADER_TYPE_S(1));
@@ -2093,15 +2158,18 @@ void radv_CmdDispatchIndirect(
radv_flush_compute_state(cmd_buffer);
unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25);
-
- for (unsigned i = 0; i < 3; ++i) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG));
- radeon_emit(cmd_buffer->cs, (va + 4 * i));
- radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32);
- radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4) >> 2) + i);
- radeon_emit(cmd_buffer->cs, 0);
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
+ MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
+ if (loc->sgpr_idx != -1) {
+ for (unsigned i = 0; i < 3; ++i) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ COPY_DATA_DST_SEL(COPY_DATA_REG));
+ radeon_emit(cmd_buffer->cs, (va + 4 * i));
+ radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32);
+ radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i);
+ radeon_emit(cmd_buffer->cs, 0);
+ }
}
radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_BASE, 2, 0) |
@@ -2152,11 +2220,14 @@ void radv_unaligned_dispatch(
S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]) |
S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4, 3);
- radeon_emit(cmd_buffer->cs, blocks[0]);
- radeon_emit(cmd_buffer->cs, blocks[1]);
- radeon_emit(cmd_buffer->cs, blocks[2]);
-
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
+ MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
+ if (loc->sgpr_idx != -1) {
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
+ radeon_emit(cmd_buffer->cs, blocks[0]);
+ radeon_emit(cmd_buffer->cs, blocks[1]);
+ radeon_emit(cmd_buffer->cs, blocks[2]);
+ }
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
PKT3_SHADER_TYPE_S(1));
radeon_emit(cmd_buffer->cs, blocks[0]);