summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-01-30 18:34:25 +0100
committerMarek Olšák <[email protected]>2018-02-01 16:20:19 +0100
commitbe973ed21f6e456ebd753f26a99151d9ea6e765c (patch)
tree1f08168ae096270918076f69b4a21d766b55efb7 /src/amd
parent472361dd7e2f82e3697047776967544348b4012a (diff)
radeonsi: load the right number of components for VS inputs and TBOs
The supported counts are 1, 2, 4. (3=4) The following snippet loads float, vec2, vec3, and vec4: Before: buffer_load_format_x v9, v4, s[0:3], 0 idxen ; E0002000 80000904 buffer_load_format_xyzw v[0:3], v5, s[8:11], 0 idxen ; E00C2000 80020005 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[2:5], v6, s[12:15], 0 idxen ; E00C2000 80030206 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[5:8], v7, s[4:7], 0 idxen ; E00C2000 80010507 After: buffer_load_format_x v10, v4, s[0:3], 0 idxen ; E0002000 80000A04 buffer_load_format_xy v[8:9], v5, s[8:11], 0 idxen ; E0042000 80020805 buffer_load_format_xyzw v[0:3], v6, s[12:15], 0 idxen ; E00C2000 80030006 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[3:6], v7, s[4:7], 0 idxen ; E00C2000 80010307 Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_llvm_build.c35
-rw-r--r--src/amd/common/ac_llvm_build.h3
2 files changed, 38 insertions, 0 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 6afe7f97236..a5cb72daed5 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -461,6 +461,41 @@ ac_build_gather_values(struct ac_llvm_context *ctx,
return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
}
+/* Expand a scalar or vector to <4 x type> by filling the remaining channels
+ * with undef. Extract at most num_channels components from the input.
+ */
+LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned num_channels)
+{
+ LLVMTypeRef elemtype;
+ LLVMValueRef chan[4];
+
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
+ unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
+ num_channels = MIN2(num_channels, vec_size);
+
+ if (num_channels >= 4)
+ return value;
+
+ for (unsigned i = 0; i < num_channels; i++)
+ chan[i] = ac_llvm_extract_elem(ctx, value, i);
+
+ elemtype = LLVMGetElementType(LLVMTypeOf(value));
+ } else {
+ if (num_channels) {
+ assert(num_channels == 1);
+ chan[0] = value;
+ }
+ elemtype = LLVMTypeOf(value);
+ }
+
+ while (num_channels < 4)
+ chan[num_channels++] = LLVMGetUndef(elemtype);
+
+ return ac_build_gather_values(ctx, chan, 4);
+}
+
LLVMValueRef
ac_build_fdiv(struct ac_llvm_context *ctx,
LLVMValueRef num,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 78437d603e3..3ae96781b6f 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -141,6 +141,9 @@ LLVMValueRef
ac_build_gather_values(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count);
+LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned num_channels);
LLVMValueRef
ac_build_fdiv(struct ac_llvm_context *ctx,