summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2018-05-23 11:34:15 +0200
committerBas Nieuwenhuizen <[email protected]>2018-05-23 21:02:45 +0200
commit699e1f5aacd1d9eed8cc1a37ec0dbd11313fbbdc (patch)
treec874383b335ca57966f2baed12053d155393d167 /src/amd
parentb73b340c37c6b3f1ac0636b385a5403c62a9777e (diff)
ac: Use DPP for build_ddxy where possible.
WQM is pretty reliable now on LLVM 7, so let us just use DPP + WQM. This gives approximately a 1.5% performance increase on the vrcompositor built-in benchmark. v2: Use ac_build_quad_swizzle. Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_llvm_build.c16
1 files changed, 15 insertions, 1 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 36c1d62637b..4eebbbd4d9d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1170,7 +1170,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;
- if (ctx->chip_class >= VI) {
+ if (HAVE_LLVM >= 0x0700) {
+ unsigned tl_lanes[4], trbl_lanes[4];
+
+ for (unsigned i = 0; i < 4; ++i) {
+ tl_lanes[i] = i & mask;
+ trbl_lanes[i] = (i & mask) + idx;
+ }
+
+ tl = ac_build_quad_swizzle(ctx, val,
+ tl_lanes[0], tl_lanes[1],
+ tl_lanes[2], tl_lanes[3]);
+ trbl = ac_build_quad_swizzle(ctx, val,
+ trbl_lanes[0], trbl_lanes[1],
+ trbl_lanes[2], trbl_lanes[3]);
+ } else if (ctx->chip_class >= VI) {
LLVMValueRef thread_id, tl_tid, trbl_tid;
thread_id = ac_get_thread_id(ctx);