diff options
author | Dave Airlie <[email protected]> | 2017-03-17 12:11:30 +1000 |
---|---|---|
committer | Emil Velikov <[email protected]> | 2017-05-10 11:28:07 +0100 |
commit | bd79ce435610be9a7ad2eb25bca22dbcb1a384d9 (patch) | |
tree | d462f66d2beae52f413abe4aa32f47f09bc4107b /src/amd | |
parent | 0640bae86ce4bde2253667916d45c103ed1b531e (diff) |
radv: flush f32->f16 conversion denormals to zero. (v2)
SPIR-V defines the f32->f16 operation as flushing denormals to 0,
this compares the class using amd class opcode.
Thanks to Matt Arsenault for figuring it out.
This fix is VI+ only, add a TODO for SI/CIK.
This fixes:
dEQP-VK.spirv_assembly.instruction.compute.opquantize.flush_to_zero
Acked-by: Bas Nieuwenhuizen <[email protected]>
Signed-off-by: Dave Airlie <[email protected]>
(cherry picked from commit 83e58b036e1c34f26c99d04615df2b530f3045d9)
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 32 | ||||
-rw-r--r-- | src/amd/common/sid.h | 13 |
2 files changed, 41 insertions, 4 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 9bcd5f6db46..43d0520541f 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1230,6 +1230,33 @@ static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx, return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); } +static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx, + LLVMValueRef src0) +{ + LLVMValueRef result; + LLVMValueRef cond; + + src0 = to_float(ctx, src0); + result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, ""); + + /* TODO SI/CIK options here */ + if (ctx->options->chip_class >= VI) { + LLVMValueRef args[2]; + /* Check if the result is a denormal - and flush to 0 if so. */ + args[0] = result; + args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false); + cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE); + } + + /* need to convert back up to f32 */ + result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); + + if (ctx->options->chip_class >= VI) + result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, ""); + + return result; +} + static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1) { @@ -1717,10 +1744,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) result = emit_b2f(ctx, src[0]); break; case nir_op_fquantize2f16: - src[0] = to_float(ctx, src[0]); - result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, ""); - /* need to convert back up to f32 */ - result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); + result = emit_f2f16(ctx, src[0]); break; case nir_op_umul_high: result = emit_umul_high(ctx, src[0], src[1]); diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index 75ba9650ba4..b9ddadc2419 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -9094,5 +9094,18 @@ #define CIK_SDMA_PACKET_SRBM_WRITE 0xe #define CIK_SDMA_COPY_MAX_SIZE 0x3fffe0 +enum amd_cmp_class_flags { + S_NAN = 1 << 0, // Signaling NaN + Q_NAN = 1 << 1, // Quiet NaN + N_INFINITY = 1 << 2, // Negative infinity + N_NORMAL = 1 << 3, // Negative normal + N_SUBNORMAL = 1 << 4, // Negative subnormal + N_ZERO = 1 << 5, // Negative zero + P_ZERO = 1 << 6, // Positive zero + P_SUBNORMAL = 1 << 7, // Positive subnormal + P_NORMAL = 1 << 8, // Positive normal + P_INFINITY = 1 << 9 // Positive infinity +}; + #endif /* _SID_H */ |