diff options
author | Axel Davy <[email protected]> | 2017-01-27 23:13:29 +0100 |
---|---|---|
committer | Axel Davy <[email protected]> | 2018-09-25 22:05:23 +0200 |
commit | 7ee5e5e239a5528c6eed2d1bb47b48434de74a6e (patch) | |
tree | 56db9a65c6ffe93a5fa70ca1c0322b15bf495ea1 /src | |
parent | 1f3fe4aaebea5e2f0a1c35c9731372502e4b34cd (diff) |
st/nine: Clamp RCP when 0*inf!=0
Tests done on several devices of all 3 vendors and
of different generations showed that there are several
ways of handling infs and NaN for d3d9.
Tests showed Intel on windows does always clamp
RCP, RSQ and LOG (thus preventing inf/nan generation),
for all shader versions (some vendor behaviours vary
with shader versions).
Doing this in nine avoids 0*inf issues for drivers
that can't generate 0*inf=0 (which is controled by
TGSI's MUL_ZERO_WINS).
For now clamp for all drivers. An ulterior optimization
would be to avoid clamping for drivers with MUL_ZERO_WINS
for the specific shader versions where NV or AMD don't
clamp.
LOG and RSQ being already clamped, this patch only
clamps RCP.
Fixes: https://github.com/iXit/Mesa-3D/issues/316
Signed-off-by: Axel Davy <[email protected]>
CC: <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/state_trackers/nine/nine_shader.c | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 7db07d8f693..5b8ad3f161e 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -2273,6 +2273,18 @@ DECL_SPECIAL(POW) return D3D_OK; } +DECL_SPECIAL(RCP) +{ + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); + struct ureg_dst tmp = tx_scratch(tx); + ureg_RCP(ureg, tmp, src); + ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp)); + ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), ureg_src(tmp)); + return D3D_OK; +} + DECL_SPECIAL(RSQ) { struct ureg_program *ureg = tx->ureg; @@ -2909,7 +2921,7 @@ static const struct sm1_op_info inst_table[] = _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */ _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */ _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */ - _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */ + _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */ _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */ _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */ _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */ |