summaryrefslogtreecommitdiffstats
path: root/src/gallium/state_trackers/nine
diff options
context:
space:
mode:
authorAxel Davy <[email protected]>2017-01-27 23:13:29 +0100
committerAxel Davy <[email protected]>2018-09-25 22:05:23 +0200
commit7ee5e5e239a5528c6eed2d1bb47b48434de74a6e (patch)
tree56db9a65c6ffe93a5fa70ca1c0322b15bf495ea1 /src/gallium/state_trackers/nine
parent1f3fe4aaebea5e2f0a1c35c9731372502e4b34cd (diff)
st/nine: Clamp RCP when 0*inf!=0
Tests done on several devices of all 3 vendors and of different generations showed that there are several ways of handling infs and NaN for d3d9. Tests showed Intel on windows does always clamp RCP, RSQ and LOG (thus preventing inf/nan generation), for all shader versions (some vendor behaviours vary with shader versions). Doing this in nine avoids 0*inf issues for drivers that can't generate 0*inf=0 (which is controled by TGSI's MUL_ZERO_WINS). For now clamp for all drivers. An ulterior optimization would be to avoid clamping for drivers with MUL_ZERO_WINS for the specific shader versions where NV or AMD don't clamp. LOG and RSQ being already clamped, this patch only clamps RCP. Fixes: https://github.com/iXit/Mesa-3D/issues/316 Signed-off-by: Axel Davy <[email protected]> CC: <[email protected]>
Diffstat (limited to 'src/gallium/state_trackers/nine')
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 7db07d8f693..5b8ad3f161e 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -2273,6 +2273,18 @@ DECL_SPECIAL(POW)
return D3D_OK;
}
+DECL_SPECIAL(RCP)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
+ struct ureg_dst tmp = tx_scratch(tx);
+ ureg_RCP(ureg, tmp, src);
+ ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
+ ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), ureg_src(tmp));
+ return D3D_OK;
+}
+
DECL_SPECIAL(RSQ)
{
struct ureg_program *ureg = tx->ureg;
@@ -2909,7 +2921,7 @@ static const struct sm1_op_info inst_table[] =
_OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
_OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
_OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
- _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
+ _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
_OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
_OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
_OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */