diff options
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 23 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 12 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_internal.h | 1 |
5 files changed, 55 insertions, 57 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 80b027e8b08..7e370845f36 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1748,3 +1748,26 @@ void ac_init_exec_full_mask(struct ac_llvm_context *ctx) "llvm.amdgcn.init.exec", ctx->voidt, &full_mask, 1, AC_FUNC_ATTR_CONVERGENT); } + +void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx) +{ + unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768; + ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0, + LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE), + "lds"); +} + +LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr) +{ + return ac_build_load(ctx, ctx->lds, dw_addr); +} + +void ac_lds_store(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr, + LLVMValueRef value) +{ + value = ac_to_integer(ctx, value); + ac_build_indexed_store(ctx, ctx->lds, + dw_addr, value); +} diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 996f55862ba..7d57b8bd767 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -34,6 +34,10 @@ extern "C" { #endif +enum { + AC_LOCAL_ADDR_SPACE = 3, +}; + struct ac_llvm_context { LLVMContextRef context; LLVMModuleRef module; @@ -65,6 +69,8 @@ struct ac_llvm_context { LLVMValueRef empty_md; enum chip_class chip_class; + + LLVMValueRef lds; }; void @@ -283,6 +289,12 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ac, uint32_t num_outputs, uint8_t *num_param_exports); void ac_init_exec_full_mask(struct ac_llvm_context *ctx); + +void ac_declare_lds_as_pointer(struct ac_llvm_context *ac); +LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr); +void ac_lds_store(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr, LLVMValueRef value); #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 06937d684be..cbd646e10fa 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -162,7 +162,6 @@ struct nir_to_llvm_context { LLVMValueRef empty_md; gl_shader_stage stage; - LLVMValueRef lds; LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; uint64_t input_mask; @@ -548,14 +547,6 @@ static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uin ud_info->indirect_offset = indirect_offset; } -static void declare_tess_lds(struct nir_to_llvm_context *ctx) -{ - unsigned lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768; - ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32zero, - LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE), - "tess_lds"); -} - struct user_sgpr_info { bool need_ring_offsets; uint8_t sgpr_count; @@ -971,7 +962,7 @@ static void create_function(struct nir_to_llvm_context *ctx, set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1); } if (ctx->options->key.vs.as_ls) - declare_tess_lds(ctx); + ac_declare_lds_as_pointer(&ctx->ac); break; case MESA_SHADER_TESS_CTRL: radv_define_vs_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx); @@ -980,7 +971,7 @@ static void create_function(struct nir_to_llvm_context *ctx, set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4); if (ctx->view_index) set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - declare_tess_lds(ctx); + ac_declare_lds_as_pointer(&ctx->ac); break; case MESA_SHADER_TESS_EVAL: set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); @@ -998,7 +989,7 @@ static void create_function(struct nir_to_llvm_context *ctx, if (ctx->view_index) set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); if (has_previous_stage) - declare_tess_lds(ctx); + ac_declare_lds_as_pointer(&ctx->ac); break; case MESA_SHADER_FRAGMENT: if (ctx->shader_info->info.ps.needs_sample_positions) { @@ -2670,23 +2661,6 @@ out: *indir_out = offset; } -static LLVMValueRef -lds_load(struct nir_to_llvm_context *ctx, - LLVMValueRef dw_addr) -{ - LLVMValueRef value; - value = ac_build_load(&ctx->ac, ctx->lds, dw_addr); - return value; -} - -static void -lds_store(struct nir_to_llvm_context *ctx, - LLVMValueRef dw_addr, LLVMValueRef value) -{ - value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, ""); - ac_build_indexed_store(&ctx->ac, ctx->lds, - dw_addr, value); -} /* The offchip buffer layout for TCS->TES is * @@ -2862,7 +2836,7 @@ load_tcs_input(struct nir_to_llvm_context *ctx, unsigned comp = instr->variables[0]->var->data.location_frac; for (unsigned i = 0; i < instr->num_components + comp; i++) { - value[i] = lds_load(ctx, dw_addr); + value[i] = ac_lds_load(&ctx->ac, dw_addr); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, ""); } @@ -2901,7 +2875,7 @@ load_tcs_output(struct nir_to_llvm_context *ctx, unsigned comp = instr->variables[0]->var->data.location_frac; for (unsigned i = comp; i < instr->num_components + comp; i++) { - value[i] = lds_load(ctx, dw_addr); + value[i] = ac_lds_load(&ctx->ac, dw_addr); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, ""); } @@ -2963,7 +2937,7 @@ store_tcs_output(struct nir_to_llvm_context *ctx, continue; LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - comp); - lds_store(ctx, dw_addr, value); + ac_lds_store(&ctx->ac, dw_addr, value); if (!is_tess_factor && writemask != 0xF) ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1, @@ -3044,7 +3018,7 @@ load_gs_input(struct nir_to_llvm_context *ctx, LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param]; dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), ""); - value[i] = lds_load(ctx, dw_addr); + value[i] = ac_lds_load(&ctx->ac, dw_addr); } else { args[0] = ctx->esgs_ring; args[1] = vtx_offset; @@ -5949,8 +5923,8 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx, out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, ""); if (ctx->ac.chip_class >= GFX9) { - lds_store(ctx, dw_addr, - LLVMBuildLoad(ctx->builder, out_ptr[j], "")); + ac_lds_store(&ctx->ac, dw_addr, + LLVMBuildLoad(ctx->builder, out_ptr[j], "")); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, ""); } else { ac_build_buffer_store_dword(&ctx->ac, @@ -5989,8 +5963,8 @@ handle_ls_outputs_post(struct nir_to_llvm_context *ctx) LLVMConstInt(ctx->i32, param * 4, false), ""); for (unsigned j = 0; j < length; j++) { - lds_store(ctx, dw_addr, - LLVMBuildLoad(ctx->builder, out_ptr[j], "")); + ac_lds_store(&ctx->ac, dw_addr, + LLVMBuildLoad(ctx->builder, out_ptr[j], "")); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, ""); } } @@ -6142,20 +6116,20 @@ write_tess_factors(struct nir_to_llvm_context *ctx) // LINES reverseal if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) { - outer[0] = out[1] = lds_load(ctx, lds_outer); + outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer); lds_outer = LLVMBuildAdd(ctx->builder, lds_outer, LLVMConstInt(ctx->i32, 1, false), ""); - outer[1] = out[0] = lds_load(ctx, lds_outer); + outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer); } else { for (i = 0; i < outer_comps; i++) { outer[i] = out[i] = - lds_load(ctx, lds_outer); + ac_lds_load(&ctx->ac, lds_outer); lds_outer = LLVMBuildAdd(ctx->builder, lds_outer, LLVMConstInt(ctx->i32, 1, false), ""); } for (i = 0; i < inner_comps; i++) { inner[i] = out[outer_comps+i] = - lds_load(ctx, lds_inner); + ac_lds_load(&ctx->ac, lds_inner); lds_inner = LLVMBuildAdd(ctx->builder, lds_inner, LLVMConstInt(ctx->i32, 1, false), ""); } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c3430487307..4bf2a45eebd 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1099,12 +1099,12 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base, dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, LLVMConstInt(ctx->i32, swizzle, 0)); - value = ac_build_load(&ctx->ac, ctx->lds, dw_addr); + value = ac_lds_load(&ctx->ac, dw_addr); if (tgsi_type_is_64bit(type)) { LLVMValueRef value2; dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, ctx->i32_1); - value2 = ac_build_load(&ctx->ac, ctx->lds, dw_addr); + value2 = ac_lds_load(&ctx->ac, dw_addr); return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); } @@ -1127,9 +1127,7 @@ static void lds_store(struct lp_build_tgsi_context *bld_base, dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, LLVMConstInt(ctx->i32, dw_offset_imm, 0)); - value = ac_to_integer(&ctx->ac, value); - ac_build_indexed_store(&ctx->ac, ctx->lds, - dw_addr, value); + ac_lds_store(&ctx->ac, dw_addr, value); } static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx, @@ -4254,14 +4252,6 @@ static void declare_streamout_params(struct si_shader_context *ctx, } } -static void declare_lds_as_pointer(struct si_shader_context *ctx) -{ - unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768; - ctx->lds = LLVMBuildIntToPtr(ctx->ac.builder, ctx->i32_0, - LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE), - "lds"); -} - static unsigned si_get_max_workgroup_size(const struct si_shader *shader) { switch (shader->selector->type) { @@ -4752,7 +4742,7 @@ static void create_function(struct si_shader_context *ctx) (ctx->screen->b.chip_class >= GFX9 && (shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY))) - declare_lds_as_pointer(ctx); + ac_declare_lds_as_pointer(&ctx->ac); } /** @@ -7076,7 +7066,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, /* Create the function. */ si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo, ctx->screen->b.chip_class >= CIK ? 128 : 64); - declare_lds_as_pointer(ctx); + ac_declare_lds_as_pointer(&ctx->ac); func = ctx->main_fn; LLVMValueRef invoc0_tess_factors[6]; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 5c736f61251..b249bf961ab 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -209,7 +209,6 @@ struct si_shader_context { LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; - LLVMValueRef lds; LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */ LLVMValueRef gs_next_vertex[4]; LLVMValueRef postponed_kill; |