summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2017-12-05 18:02:08 +0100
committerSamuel Pitoiset <[email protected]>2017-12-07 17:21:50 +0100
commit5f81a43535e8512cef26ea3dcd1e3a489bd5a1bb (patch)
tree9b411d003145c3d158c2f73bb9b8a9a43c21b27b
parent25e56b2ebafe2bcebb23819cc355e1b079a839d6 (diff)
radv: use a faster version for nir_op_pack_half_2x16
This patch is ported from RadeonSI and it has two effects. It fixes a rendering issue which affects F1 2017 and Dawn of War 3 (Vega only) because LLVM was ending up by generating the new v_mad_mix_{hi,lo} instructions which appear to be buggy in some way. Not sure if Mesa is generating something wrong or if the issue is in LLVM only. Anyway, that explains why the DOW3 issue can't be reproduced with GL on Vega. It also improves performance because v_cvt_pkrtz_f16 is faster, and because I guess the rounding mode behaviour is similar between GL and VK, we can use it. About performance, it improves Talos by +3/4% but I don't see any other impacts. No CTS regressions on Polaris. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Dave Airlie <[email protected]>
-rw-r--r--src/amd/common/ac_nir_to_llvm.c12
1 files changed, 1 insertions, 11 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 96ba289a813..663b27d265a 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1426,23 +1426,13 @@ static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
LLVMValueRef src0)
{
- LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
- int i;
LLVMValueRef comp[2];
src0 = ac_to_float(ctx, src0);
comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
- for (i = 0; i < 2; i++) {
- comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
- comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
- comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
- }
-
- comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
- comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
- return comp[0];
+ return ac_build_cvt_pkrtz_f16(ctx, comp);
}
static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,