summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2018-01-17 14:33:39 +0100
committerBas Nieuwenhuizen <[email protected]>2018-01-19 02:00:28 +0100
commit32170d87e3b7bee37234b44ff787ff60fcd3a9aa (patch)
tree355f640022639f2aaf6ac99655302b8188e42643
parentf4211e6f9314b225cdcdc799e0c123b3dceee9eb (diff)
ac/nir: Fix vector extraction if source vector has >4 elements.
v2: Add forgotten argument and start offset. Fixes: 91074bb11bda "radv/ac: Implement Float64 SSBO stores." Tested-by: Timothy Arceri <[email protected]> Acked-by: Timothy Arceri <[email protected]>
-rw-r--r--src/amd/common/ac_nir_to_llvm.c48
1 files changed, 32 insertions, 16 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 90cb4a6eea8..2aef51be855 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2444,6 +2444,36 @@ static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
return new_mask;
}
+static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
+ unsigned start, unsigned count)
+{
+ LLVMTypeRef type = LLVMTypeOf(src);
+
+ if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
+ assert(start == 0);
+ assert(count == 1);
+ return src;
+ }
+
+ unsigned src_elements = LLVMGetVectorSize(type);
+ assert(start < src_elements);
+ assert(start + count <= src_elements);
+
+ if (start == 0 && count == src_elements)
+ return src;
+
+ if (count == 1)
+ return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), "");
+
+ assert(count <= 8);
+ LLVMValueRef indices[8];
+ for (unsigned i = 0; i < count; ++i)
+ indices[i] = LLVMConstInt(ctx->i32, start + i, false);
+
+ LLVMValueRef swizzle = LLVMConstVector(indices, count);
+ return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
+}
+
static void visit_store_ssbo(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
@@ -2476,7 +2506,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
int start, count;
LLVMValueRef data;
LLVMValueRef offset;
- LLVMValueRef tmp;
+
u_bit_scan_consecutive_range(&writemask, &start, &count);
/* Due to an LLVM limitation, split 3-element writes
@@ -2493,28 +2523,14 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
if (count == 4) {
store_name = "llvm.amdgcn.buffer.store.v4f32";
- data = base_data;
} else if (count == 2) {
- tmp = LLVMBuildExtractElement(ctx->ac.builder,
- base_data, LLVMConstInt(ctx->ac.i32, start, false), "");
- data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(ctx->ac.v2f32), tmp,
- ctx->ac.i32_0, "");
-
- tmp = LLVMBuildExtractElement(ctx->ac.builder,
- base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
- data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
- ctx->ac.i32_1, "");
store_name = "llvm.amdgcn.buffer.store.v2f32";
} else {
assert(count == 1);
- if (ac_get_llvm_num_components(base_data) > 1)
- data = LLVMBuildExtractElement(ctx->ac.builder, base_data,
- LLVMConstInt(ctx->ac.i32, start, false), "");
- else
- data = base_data;
store_name = "llvm.amdgcn.buffer.store.f32";
}
+ data = extract_vector_range(&ctx->ac, base_data, start, count);
offset = base_offset;
if (start != 0) {