r600: use min_dx10/max_dx10 instead of min/max

I believe this is the safe thing to do, especially ever since the driver actually generates NaNs for muls too. The ISA docs are not very helpful here, however the dx10 versions will pick a non-nan result over a NaN one (this is also the ieee754 behavior), whereas the non-dx10 ones will pick the NaN (verified by newly changed piglit isinf-and-isnan test). Other "modern" drivers will most likely do the same. This was shown to make some difference for bug 103544, albeit it is not required to fix it. Reviewed-by: Dave Airlie <[email protected]>
author: Roland Scheidegger <[email protected]> 2017-11-09 19:37:54 +0100
committer: Roland Scheidegger <[email protected]> 2017-11-15 03:13:46 +0100
commit: aab0bfc648bf1be50b81a25224970015f1dc78b8 (patch)
tree: 4807515fd5818b2a0fdf3e1e2e8761e75cc86b7b
parent: 3ceee04a4f963694012ca7f9acd3c747a036472e (diff)
2 files changed, 9 insertions, 6 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 0fa2a1f0d1a..805b3b6b3dc 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -9175,8 +9175,9 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
 	[TGSI_OPCODE_DP3]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
-	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN, tgsi_op2},
-	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX, tgsi_op2},
+	/* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */
+	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN_DX10, tgsi_op2},
+	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX_DX10, tgsi_op2},
 	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
 	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
 	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9373,8 +9374,8 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_DP3]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
-	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN, tgsi_op2},
-	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX, tgsi_op2},
+	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN_DX10, tgsi_op2},
+	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX_DX10, tgsi_op2},
 	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
 	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
 	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9596,8 +9597,8 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_DP3]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
-	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN, tgsi_op2},
-	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX, tgsi_op2},
+	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN_DX10, tgsi_op2},
+	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX_DX10, tgsi_op2},
 	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
 	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
 	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 3dd3a4815ba..7a5d62c8e87 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -753,7 +753,9 @@ bool expr_handler::fold_alu_op2(alu_node& n) {
 				n.bc.src[0].abs == n.bc.src[1].abs) {
 			switch (n.bc.op) {
 			case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
+			case ALU_OP2_MIN_DX10:
 			case ALU_OP2_MAX:
+			case ALU_OP2_MAX_DX10:
 				convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
 				return fold_alu_op1(n);
 			case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
author	Roland Scheidegger <[email protected]>	2017-11-09 19:37:54 +0100
committer	Roland Scheidegger <[email protected]>	2017-11-15 03:13:46 +0100
commit	aab0bfc648bf1be50b81a25224970015f1dc78b8 (patch)
tree	4807515fd5818b2a0fdf3e1e2e8761e75cc86b7b
parent	3ceee04a4f963694012ca7f9acd3c747a036472e (diff)