diff options
author | Bas Nieuwenhuizen <[email protected]> | 2018-05-23 11:34:15 +0200 |
---|---|---|
committer | Bas Nieuwenhuizen <[email protected]> | 2018-05-23 21:02:45 +0200 |
commit | 699e1f5aacd1d9eed8cc1a37ec0dbd11313fbbdc (patch) | |
tree | c874383b335ca57966f2baed12053d155393d167 /src/amd | |
parent | b73b340c37c6b3f1ac0636b385a5403c62a9777e (diff) |
ac: Use DPP for build_ddxy where possible.
WQM is pretty reliable now on LLVM 7, so let us just use
DPP + WQM.
This gives approximately a 1.5% performance increase on the
vrcompositor built-in benchmark.
v2: Use ac_build_quad_swizzle.
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 36c1d62637b..4eebbbd4d9d 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1170,7 +1170,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx, LLVMValueRef tl, trbl, args[2]; LLVMValueRef result; - if (ctx->chip_class >= VI) { + if (HAVE_LLVM >= 0x0700) { + unsigned tl_lanes[4], trbl_lanes[4]; + + for (unsigned i = 0; i < 4; ++i) { + tl_lanes[i] = i & mask; + trbl_lanes[i] = (i & mask) + idx; + } + + tl = ac_build_quad_swizzle(ctx, val, + tl_lanes[0], tl_lanes[1], + tl_lanes[2], tl_lanes[3]); + trbl = ac_build_quad_swizzle(ctx, val, + trbl_lanes[0], trbl_lanes[1], + trbl_lanes[2], trbl_lanes[3]); + } else if (ctx->chip_class >= VI) { LLVMValueRef thread_id, tl_tid, trbl_tid; thread_id = ac_get_thread_id(ctx); |