diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 56 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.h | 44 | ||||
-rw-r--r-- | src/amd/vulkan/radv_cmd_buffer.c | 181 |
3 files changed, 208 insertions, 73 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index ec1ba9e55b3..41b67f1fd36 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -62,7 +62,7 @@ struct nir_to_llvm_context { struct hash_table *defs; struct hash_table *phis; - LLVMValueRef descriptor_sets[4]; + LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; LLVMValueRef push_constants; LLVMValueRef num_work_groups; LLVMValueRef workgroup_ids; @@ -426,6 +426,45 @@ static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx, return result; } +static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs) +{ + ud_info->sgpr_idx = sgpr_idx; + ud_info->num_sgprs = num_sgprs; + ud_info->indirect = false; + ud_info->indirect_offset = 0; +} + +static void set_userdata_location_shader(struct nir_to_llvm_context *ctx, + int idx, uint8_t sgpr_idx, uint8_t num_sgprs) +{ + set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs); +} + +#if 0 +static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs, + uint32_t indirect_offset) +{ + ud_info->sgpr_idx = sgpr_idx; + ud_info->num_sgprs = num_sgprs; + ud_info->indirect = true; + ud_info->indirect_offset = indirect_offset; +} +#endif + +#define AC_USERDATA_DESCRIPTOR_SET_0 0 +#define AC_USERDATA_DESCRIPTOR_SET_1 2 +#define AC_USERDATA_DESCRIPTOR_SET_2 4 +#define AC_USERDATA_DESCRIPTOR_SET_3 6 +#define AC_USERDATA_PUSH_CONST_DYN 8 + +#define AC_USERDATA_VS_VERTEX_BUFFERS 10 +#define AC_USERDATA_VS_BASE_VERTEX 12 +#define AC_USERDATA_VS_START_INSTANCE 13 + +#define AC_USERDATA_PS_SAMPLE_POS 10 + +#define AC_USERDATA_CS_GRID_SIZE 10 + static void create_function(struct nir_to_llvm_context *ctx, struct nir_shader *nir) { @@ -510,14 +549,18 @@ static void create_function(struct nir_to_llvm_context *ctx, ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4; arg_idx = 0; - for (unsigned i = 0; i < 4; ++i) + for (unsigned i = 0; i < 4; ++i) { + set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], i * 2, 2); ctx->descriptor_sets[i] = LLVMGetParam(ctx->main_function, arg_idx++); + } ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++); + set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, AC_USERDATA_PUSH_CONST_DYN, 2); switch (nir->stage) { case MESA_SHADER_COMPUTE: + set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, AC_USERDATA_CS_GRID_SIZE, 3); ctx->num_work_groups = LLVMGetParam(ctx->main_function, arg_idx++); ctx->workgroup_ids = @@ -528,7 +571,9 @@ static void create_function(struct nir_to_llvm_context *ctx, LLVMGetParam(ctx->main_function, arg_idx++); break; case MESA_SHADER_VERTEX: + set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, AC_USERDATA_VS_VERTEX_BUFFERS, 2); ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++); + set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, AC_USERDATA_VS_BASE_VERTEX, 2); ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++); ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++); ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++); @@ -537,6 +582,7 @@ static void create_function(struct nir_to_llvm_context *ctx, ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++); break; case MESA_SHADER_FRAGMENT: + set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, AC_USERDATA_PS_SAMPLE_POS, 2); ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++); ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++); ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++); @@ -4604,6 +4650,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, { struct nir_to_llvm_context ctx = {0}; struct nir_function *func; + unsigned i; ctx.options = options; ctx.shader_info = shader_info; ctx.context = LLVMContextCreate(); @@ -4619,6 +4666,11 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, ctx.builder = LLVMCreateBuilderInContext(ctx.context); ctx.stage = nir->stage; + for (i = 0; i < AC_UD_MAX_SETS; i++) + shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; + for (i = 0; i < AC_UD_MAX_UD; i++) + shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; + create_function(&ctx, nir); if (nir->stage == MESA_SHADER_COMPUTE) { diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index f33519ccca3..f488c09bb92 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -56,7 +56,35 @@ struct ac_nir_compiler_options { enum chip_class chip_class; }; +struct ac_userdata_info { + int8_t sgpr_idx; + uint8_t num_sgprs; + bool indirect; + uint32_t indirect_offset; +}; + +enum ac_ud_index { + AC_UD_PUSH_CONSTANTS = 0, + AC_UD_SHADER_START = 1, + AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, + AC_UD_VS_BASE_VERTEX_START_INSTANCE, + AC_UD_VS_MAX_UD, + AC_UD_PS_SAMPLE_POS = AC_UD_SHADER_START, + AC_UD_PS_MAX_UD, + AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, + AC_UD_CS_MAX_UD, + AC_UD_MAX_UD = AC_UD_VS_MAX_UD, +}; + +#define AC_UD_MAX_SETS 4 + +struct ac_userdata_locations { + struct ac_userdata_info descriptor_sets[AC_UD_MAX_SETS]; + struct ac_userdata_info shader_data[AC_UD_MAX_UD]; +}; + struct ac_shader_variant_info { + struct ac_userdata_locations user_sgprs_locs; unsigned num_user_sgprs; unsigned num_input_sgprs; unsigned num_input_vgprs; @@ -97,20 +125,4 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm, const struct ac_nir_compiler_options *options, bool dump_shader); -/* SHADER ABI defines */ - -/* offset in dwords */ -#define AC_USERDATA_DESCRIPTOR_SET_0 0 -#define AC_USERDATA_DESCRIPTOR_SET_1 2 -#define AC_USERDATA_DESCRIPTOR_SET_2 4 -#define AC_USERDATA_DESCRIPTOR_SET_3 6 -#define AC_USERDATA_PUSH_CONST_DYN 8 - -#define AC_USERDATA_VS_VERTEX_BUFFERS 10 -#define AC_USERDATA_VS_BASE_VERTEX 12 -#define AC_USERDATA_VS_START_INSTANCE 13 - -#define AC_USERDATA_PS_SAMPLE_POS 10 - -#define AC_USERDATA_CS_GRID_SIZE 10 diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8a22ec40ad0..4ae4e54f87e 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -276,6 +276,46 @@ static unsigned radv_pack_float_12p4(float x) x >= 4096 ? 0xffff : x * 16; } +static uint32_t +shader_stage_to_user_data_0(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_FRAGMENT: + return R_00B030_SPI_SHADER_USER_DATA_PS_0; + case MESA_SHADER_VERTEX: + return R_00B130_SPI_SHADER_USER_DATA_VS_0; + case MESA_SHADER_COMPUTE: + return R_00B900_COMPUTE_USER_DATA_0; + default: + unreachable("unknown shader"); + } +} + +static struct ac_userdata_info * +radv_lookup_user_sgpr(struct radv_pipeline *pipeline, + gl_shader_stage stage, + int idx) +{ + return &pipeline->shaders[stage]->info.user_sgprs_locs.shader_data[idx]; +} + +static void +radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, + struct radv_pipeline *pipeline, + gl_shader_stage stage, + int idx, uint64_t va) +{ + struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); + uint32_t base_reg = shader_stage_to_user_data_0(stage); + if (loc->sgpr_idx == -1) + return; + assert(loc->num_sgprs == 2); + assert(!loc->indirect); + radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 2); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); +} + static void radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) @@ -327,9 +367,8 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo); va += samples_offset; - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 + AC_USERDATA_PS_SAMPLE_POS * 4, 2); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); + radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT, + AC_UD_PS_SAMPLE_POS, va); } static void @@ -896,34 +935,51 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer) static void emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer, + struct radv_pipeline *pipeline, int idx, uint64_t va, - uint32_t base_reg) + gl_shader_stage stage) { + struct ac_userdata_info *desc_set_loc = &pipeline->shaders[stage]->info.user_sgprs_locs.descriptor_sets[idx]; + uint32_t base_reg = shader_stage_to_user_data_0(stage); + + if (desc_set_loc->sgpr_idx == -1) + return; + + assert(!desc_set_loc->indirect); + assert(desc_set_loc->num_sgprs == 2); radeon_set_sh_reg_seq(cmd_buffer->cs, - base_reg + 8 * idx, 2); + base_reg + desc_set_loc->sgpr_idx * 4, 2); radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va >> 32); } static void radv_emit_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer, + struct radv_pipeline *pipeline, VkShaderStageFlags stages, struct radv_descriptor_set *set, unsigned idx) { if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) - emit_stage_descriptor_set_userdata(cmd_buffer, idx, set->va, R_00B030_SPI_SHADER_USER_DATA_PS_0); + emit_stage_descriptor_set_userdata(cmd_buffer, pipeline, + idx, set->va, + MESA_SHADER_FRAGMENT); if (stages & VK_SHADER_STAGE_VERTEX_BIT) - emit_stage_descriptor_set_userdata(cmd_buffer, idx, set->va, R_00B130_SPI_SHADER_USER_DATA_VS_0); + emit_stage_descriptor_set_userdata(cmd_buffer, pipeline, + idx, set->va, + MESA_SHADER_VERTEX); if (stages & VK_SHADER_STAGE_COMPUTE_BIT) - emit_stage_descriptor_set_userdata(cmd_buffer, idx, set->va, R_00B900_COMPUTE_USER_DATA_0); + emit_stage_descriptor_set_userdata(cmd_buffer, pipeline, + idx, set->va, + MESA_SHADER_COMPUTE); } static void radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, + struct radv_pipeline *pipeline, VkShaderStageFlags stages) { unsigned i; @@ -937,23 +993,12 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, if (!set) continue; - radv_emit_descriptor_set_userdata(cmd_buffer, stages, set, i); + radv_emit_descriptor_set_userdata(cmd_buffer, pipeline, stages, set, i); } cmd_buffer->state.descriptors_dirty = 0; } static void -emit_constants_set_userdata(struct radv_cmd_buffer *cmd_buffer, - uint64_t va, - uint32_t base_reg) -{ - radeon_set_sh_reg_seq(cmd_buffer->cs, - base_reg + 4 * AC_USERDATA_PUSH_CONST_DYN, 2); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); -} - -static void radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, VkShaderStageFlags stages) @@ -979,13 +1024,16 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, va += offset; if (stages & VK_SHADER_STAGE_VERTEX_BIT) - emit_constants_set_userdata(cmd_buffer, va, R_00B130_SPI_SHADER_USER_DATA_VS_0); + radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX, + AC_UD_PUSH_CONSTANTS, va); if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) - emit_constants_set_userdata(cmd_buffer, va, R_00B030_SPI_SHADER_USER_DATA_PS_0); + radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT, + AC_UD_PUSH_CONSTANTS, va); if (stages & VK_SHADER_STAGE_COMPUTE_BIT) - emit_constants_set_userdata(cmd_buffer, va, R_00B900_COMPUTE_USER_DATA_0); + radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_COMPUTE, + AC_UD_PUSH_CONSTANTS, va); cmd_buffer->push_constant_stages &= ~stages; } @@ -1036,11 +1084,9 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) va = device->ws->buffer_get_va(cmd_buffer->upload.upload_bo); va += vb_offset; - radeon_set_sh_reg_seq(cmd_buffer->cs, - R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_VERTEX_BUFFERS * 4, 2); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); + radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX, + AC_UD_VS_VERTEX_BUFFERS, va); } cmd_buffer->state.vertex_descriptors_dirty = false; @@ -1075,7 +1121,8 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) radv_cmd_buffer_flush_dynamic_state(cmd_buffer); - radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); + radv_flush_descriptors(cmd_buffer, cmd_buffer->state.pipeline, + VK_SHADER_STAGE_ALL_GRAPHICS); radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline, VK_SHADER_STAGE_ALL_GRAPHICS); @@ -1820,9 +1867,13 @@ void radv_CmdDraw( unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4, 2); - radeon_emit(cmd_buffer->cs, firstVertex); - radeon_emit(cmd_buffer->cs, firstInstance); + struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, + AC_UD_VS_BASE_VERTEX_START_INSTANCE); + if (loc->sgpr_idx != -1) { + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + loc->sgpr_idx * 4, 2); + radeon_emit(cmd_buffer->cs, firstVertex); + radeon_emit(cmd_buffer->cs, firstInstance); + } radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cmd_buffer->cs, instanceCount); @@ -1867,9 +1918,13 @@ void radv_CmdDrawIndexed( radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4, 2); - radeon_emit(cmd_buffer->cs, vertexOffset); - radeon_emit(cmd_buffer->cs, firstInstance); + struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, + AC_UD_VS_BASE_VERTEX_START_INSTANCE); + if (loc->sgpr_idx != -1) { + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B130_SPI_SHADER_USER_DATA_VS_0 + loc->sgpr_idx * 4, 2); + radeon_emit(cmd_buffer->cs, vertexOffset); + radeon_emit(cmd_buffer->cs, firstInstance); + } radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cmd_buffer->cs, instanceCount); @@ -1914,6 +1969,9 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->device->ws->cs_add_buffer(cs, buffer->bo, 8); + struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, + AC_UD_VS_BASE_VERTEX_START_INSTANCE); + assert(loc->sgpr_idx != -1); radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); radeon_emit(cs, 1); radeon_emit(cs, indirect_va); @@ -1923,8 +1981,8 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer, PKT3_DRAW_INDIRECT_MULTI, 8, false)); radeon_emit(cs, 0); - radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_BASE_VERTEX * 4) - SI_SH_REG_OFFSET) >> 2); - radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + AC_USERDATA_VS_START_INSTANCE * 4) - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, ((R_00B130_SPI_SHADER_USER_DATA_VS_0 + (loc->sgpr_idx + 1) * 4) - SI_SH_REG_OFFSET) >> 2); radeon_emit(cs, S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); /* draw_index and count_indirect enable */ radeon_emit(cs, draw_count); /* count */ radeon_emit(cs, count_va); /* count_addr */ @@ -2045,7 +2103,8 @@ static void radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer) { radv_emit_compute_pipeline(cmd_buffer); - radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT); + radv_flush_descriptors(cmd_buffer, cmd_buffer->state.compute_pipeline, + VK_SHADER_STAGE_COMPUTE_BIT); radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline, VK_SHADER_STAGE_COMPUTE_BIT); si_emit_cache_flush(cmd_buffer); @@ -2063,10 +2122,16 @@ void radv_CmdDispatch( unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4, 3); - radeon_emit(cmd_buffer->cs, x); - radeon_emit(cmd_buffer->cs, y); - radeon_emit(cmd_buffer->cs, z); + struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, + MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); + if (loc->sgpr_idx != -1) { + assert(!loc->indirect); + assert(loc->num_sgprs == 3); + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); + radeon_emit(cmd_buffer->cs, x); + radeon_emit(cmd_buffer->cs, y); + radeon_emit(cmd_buffer->cs, z); + } radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | PKT3_SHADER_TYPE_S(1)); @@ -2093,15 +2158,18 @@ void radv_CmdDispatchIndirect( radv_flush_compute_state(cmd_buffer); unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25); - - for (unsigned i = 0; i < 3; ++i) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG)); - radeon_emit(cmd_buffer->cs, (va + 4 * i)); - radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32); - radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4) >> 2) + i); - radeon_emit(cmd_buffer->cs, 0); + struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, + MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); + if (loc->sgpr_idx != -1) { + for (unsigned i = 0; i < 3; ++i) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_REG)); + radeon_emit(cmd_buffer->cs, (va + 4 * i)); + radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32); + radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i); + radeon_emit(cmd_buffer->cs, 0); + } } radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_BASE, 2, 0) | @@ -2152,11 +2220,14 @@ void radv_unaligned_dispatch( S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]) | S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + AC_USERDATA_CS_GRID_SIZE * 4, 3); - radeon_emit(cmd_buffer->cs, blocks[0]); - radeon_emit(cmd_buffer->cs, blocks[1]); - radeon_emit(cmd_buffer->cs, blocks[2]); - + struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, + MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); + if (loc->sgpr_idx != -1) { + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); + radeon_emit(cmd_buffer->cs, blocks[0]); + radeon_emit(cmd_buffer->cs, blocks[1]); + radeon_emit(cmd_buffer->cs, blocks[2]); + } radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cmd_buffer->cs, blocks[0]); |