diff options
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 100 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 11 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 29 |
3 files changed, 119 insertions, 21 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index d87bf098494..2dacb67eede 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1082,6 +1082,106 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, return ac_build_load_custom(ctx, base_ptr, index, true, true, false); } +static void +ac_build_buffer_store_common(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef data, + LLVMValueRef vindex, + LLVMValueRef voffset, + unsigned num_channels, + bool glc, + bool slc, + bool writeonly_memory, + bool use_format) +{ + LLVMValueRef args[] = { + data, + LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), + vindex ? vindex : ctx->i32_0, + voffset, + LLVMConstInt(ctx->i1, glc, 0), + LLVMConstInt(ctx->i1, slc, 0) + }; + unsigned func = CLAMP(num_channels, 1, 3) - 1; + + const char *type_names[] = {"f32", "v2f32", "v4f32"}; + char name[256]; + + if (use_format) { + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.format.%s", + type_names[func]); + } else { + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s", + type_names[func]); + } + + ac_build_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args), + ac_get_store_intr_attribs(writeonly_memory)); +} + +static void +ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef data, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + unsigned num_channels, + bool glc, + bool slc, + bool writeonly_memory, + bool use_format, + bool structurized) +{ + LLVMValueRef args[6]; + int idx = 0; + args[idx++] = data; + args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); + if (structurized) + args[idx++] = vindex ? vindex : ctx->i32_0; + args[idx++] = voffset ? voffset : ctx->i32_0; + args[idx++] = soffset ? soffset : ctx->i32_0; + args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); + unsigned func = CLAMP(num_channels, 1, 3) - 1; + + const char *type_names[] = {"f32", "v2f32", "v4f32"}; + const char *indexing_kind = structurized ? "struct" : "raw"; + char name[256]; + + if (use_format) { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s", + indexing_kind, type_names[func]); + } else { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s", + indexing_kind, type_names[func]); + } + + ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, + ac_get_store_intr_attribs(writeonly_memory)); +} + +void +ac_build_buffer_store_format(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef data, + LLVMValueRef vindex, + LLVMValueRef voffset, + unsigned num_channels, + bool glc, + bool writeonly_memory) +{ + if (HAVE_LLVM >= 0x800) { + ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex, + voffset, NULL, num_channels, + glc, false, writeonly_memory, + true, true); + } else { + ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, + num_channels, glc, false, + writeonly_memory, true); + } +} + /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), * or v4i32 (num_channels=3,4). diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 55068169a34..cc048466978 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -266,6 +266,17 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, bool slc, bool writeonly_memory, bool swizzle_enable_hint); + +void +ac_build_buffer_store_format(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef data, + LLVMValueRef vindex, + LLVMValueRef voffset, + unsigned num_channels, + bool glc, + bool writeonly_memory); + LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index b12cb418e11..e3728a9c381 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2512,7 +2512,6 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, static void visit_image_store(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - LLVMValueRef params[8]; const nir_deref_instr *image_deref = get_image_deref(instr); const struct glsl_type *type = image_deref->type; const nir_variable *var = nir_deref_instr_get_variable(image_deref); @@ -2524,34 +2523,22 @@ static void visit_image_store(struct ac_nir_context *ctx, writeonly_memory); if (dim == GLSL_SAMPLER_DIM_BUF) { - char name[48]; - const char *types[] = { "f32", "v2f32", "v4f32" }; LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true); LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); unsigned src_channels = ac_get_llvm_num_components(src); + LLVMValueRef vindex; if (src_channels == 3) src = ac_build_expand_to_vec4(&ctx->ac, src, 3); - params[0] = src; /* data */ - params[1] = rsrc; - params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), - ctx->ac.i32_0, ""); /* vindex */ - params[3] = ctx->ac.i32_0; /* voffset */ - snprintf(name, sizeof(name), "%s.%s", - HAVE_LLVM >= 0x800 ? "llvm.amdgcn.struct.buffer.store.format" - : "llvm.amdgcn.buffer.store.format", - types[CLAMP(src_channels, 1, 3) - 1]); + vindex = LLVMBuildExtractElement(ctx->ac.builder, + get_src(ctx, instr->src[1]), + ctx->ac.i32_0, ""); - if (HAVE_LLVM >= 0x800) { - params[4] = ctx->ac.i32_0; /* soffset */ - params[5] = (args.cache_policy & ac_glc) ? ctx->ac.i32_1 : ctx->ac.i32_0; - } else { - params[4] = LLVMConstInt(ctx->ac.i1, !!(args.cache_policy & ac_glc), 0); - params[5] = ctx->ac.i1false; /* slc */ - } - ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6, - ac_get_store_intr_attribs(writeonly_memory)); + ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, + ctx->ac.i32_0, src_channels, + args.cache_policy & ac_glc, + writeonly_memory); } else { args.opcode = ac_image_store; args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); |