radv/ac: canonicalize the output for 32-bit float min/max.

This fixes: dEQP-VK.glsl.builtin.precision.min.* dEQP-VK.glsl.builtin.precision.max.* dEQP-VK.glsl.builtin.precision.clamp.* The problem is the hw doesn't compare denorms properly, so we have to flush them, even though the spec says flushing is optional, if you don't flush the results should be correct. The -pro driver changes the shader float mode, it would be nice if llvm could grow that perhaps. Acked-by: Bas Nieuwenhuizen <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
author: Dave Airlie <[email protected]> 2017-03-17 13:38:41 +1000
committer: Dave Airlie <[email protected]> 2017-05-03 12:55:34 +1000
commit: 3bf3f9866c1387872521242921bb00c7fb7c2834 (patch)
tree: e52860b3851d9d5d5a284abbb3aadefdbca8b026
parent: 83e58b036e1c34f26c99d04615df2b530f3045d9 (diff)
1 files changed, 8 insertions, 0 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index dbb8ebedd93..fccac1a3219 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1748,10 +1748,18 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
 	case nir_op_fmax:
 		result = emit_intrin_2f_param(ctx, "llvm.maxnum",
 		                              to_float_type(ctx, def_type), src[0], src[1]);
+		if (instr->dest.dest.ssa.bit_size == 32)
+			result = emit_intrin_1f_param(ctx, "llvm.canonicalize",
+						      to_float_type(ctx, def_type),
+						      result);
 		break;
 	case nir_op_fmin:
 		result = emit_intrin_2f_param(ctx, "llvm.minnum",
 		                              to_float_type(ctx, def_type), src[0], src[1]);
+		if (instr->dest.dest.ssa.bit_size == 32)
+			result = emit_intrin_1f_param(ctx, "llvm.canonicalize",
+						      to_float_type(ctx, def_type),
+						      result);
 		break;
 	case nir_op_ffma:
 		result = emit_intrin_3f_param(ctx, "llvm.fma",
author	Dave Airlie <[email protected]>	2017-03-17 13:38:41 +1000
committer	Dave Airlie <[email protected]>	2017-05-03 12:55:34 +1000
commit	3bf3f9866c1387872521242921bb00c7fb7c2834 (patch)
tree	e52860b3851d9d5d5a284abbb3aadefdbca8b026
parent	83e58b036e1c34f26c99d04615df2b530f3045d9 (diff)