summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2017-03-17 12:11:30 +1000
committerDave Airlie <[email protected]>2017-05-03 12:55:34 +1000
commit83e58b036e1c34f26c99d04615df2b530f3045d9 (patch)
tree1c89241329944a5b75c2f4b3a1ea925f649309ad /src
parenteeff7e11544f333d211c8f8ad3679db814050cfa (diff)
radv: flush f32->f16 conversion denormals to zero. (v2)
SPIR-V defines the f32->f16 operation as flushing denormals to 0, this compares the class using amd class opcode. Thanks to Matt Arsenault for figuring it out. This fix is VI+ only, add a TODO for SI/CIK. This fixes: dEQP-VK.spirv_assembly.instruction.compute.opquantize.flush_to_zero Acked-by: Bas Nieuwenhuizen <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/amd/common/ac_nir_to_llvm.c32
-rw-r--r--src/amd/common/sid.h13
2 files changed, 41 insertions, 4 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index d9962c783f0..dbb8ebedd93 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1325,6 +1325,33 @@ static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
}
+static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
+ LLVMValueRef src0)
+{
+ LLVMValueRef result;
+ LLVMValueRef cond;
+
+ src0 = to_float(ctx, src0);
+ result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
+
+ /* TODO SI/CIK options here */
+ if (ctx->options->chip_class >= VI) {
+ LLVMValueRef args[2];
+ /* Check if the result is a denormal - and flush to 0 if so. */
+ args[0] = result;
+ args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
+ cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
+ }
+
+ /* need to convert back up to f32 */
+ result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
+
+ if (ctx->options->chip_class >= VI)
+ result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
+
+ return result;
+}
+
static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
LLVMValueRef src0, LLVMValueRef src1)
{
@@ -1812,10 +1839,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
result = emit_b2f(ctx, src[0]);
break;
case nir_op_fquantize2f16:
- src[0] = to_float(ctx, src[0]);
- result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
- /* need to convert back up to f32 */
- result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
+ result = emit_f2f16(ctx, src[0]);
break;
case nir_op_umul_high:
result = emit_umul_high(ctx, src[0], src[1]);
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 08cdfd77f07..d329ad9493f 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -9095,5 +9095,18 @@
#define CIK_SDMA_PACKET_SRBM_WRITE 0xe
#define CIK_SDMA_COPY_MAX_SIZE 0x3fffe0
+enum amd_cmp_class_flags {
+ S_NAN = 1 << 0, // Signaling NaN
+ Q_NAN = 1 << 1, // Quiet NaN
+ N_INFINITY = 1 << 2, // Negative infinity
+ N_NORMAL = 1 << 3, // Negative normal
+ N_SUBNORMAL = 1 << 4, // Negative subnormal
+ N_ZERO = 1 << 5, // Negative zero
+ P_ZERO = 1 << 6, // Positive zero
+ P_SUBNORMAL = 1 << 7, // Positive subnormal
+ P_NORMAL = 1 << 8, // Positive normal
+ P_INFINITY = 1 << 9 // Positive infinity
+};
+
#endif /* _SID_H */