diff options
author | Roland Scheidegger <[email protected]> | 2017-11-09 19:37:54 +0100 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2017-11-15 03:13:46 +0100 |
commit | aab0bfc648bf1be50b81a25224970015f1dc78b8 (patch) | |
tree | 4807515fd5818b2a0fdf3e1e2e8761e75cc86b7b /src/gallium/drivers/r600 | |
parent | 3ceee04a4f963694012ca7f9acd3c747a036472e (diff) |
r600: use min_dx10/max_dx10 instead of min/max
I believe this is the safe thing to do, especially ever since the driver
actually generates NaNs for muls too.
The ISA docs are not very helpful here, however the dx10 versions will pick
a non-nan result over a NaN one (this is also the ieee754 behavior), whereas
the non-dx10 ones will pick the NaN (verified by newly changed piglit
isinf-and-isnan test).
Other "modern" drivers will most likely do the same.
This was shown to make some difference for bug 103544, albeit it is not
required to fix it.
Reviewed-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_expr.cpp | 2 |
2 files changed, 9 insertions, 6 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 0fa2a1f0d1a..805b3b6b3dc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -9175,8 +9175,9 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + /* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */ + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, @@ -9373,8 +9374,8 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, @@ -9596,8 +9597,8 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp index 3dd3a4815ba..7a5d62c8e87 100644 --- a/src/gallium/drivers/r600/sb/sb_expr.cpp +++ b/src/gallium/drivers/r600/sb/sb_expr.cpp @@ -753,7 +753,9 @@ bool expr_handler::fold_alu_op2(alu_node& n) { n.bc.src[0].abs == n.bc.src[1].abs) { switch (n.bc.op) { case ALU_OP2_MIN: // (MIN x, x) => (MOV x) + case ALU_OP2_MIN_DX10: case ALU_OP2_MAX: + case ALU_OP2_MAX_DX10: convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs); return fold_alu_op1(n); case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2) |