diff options
-rw-r--r-- | src/compiler/spirv/vtn_glsl450.c | 77 |
1 files changed, 55 insertions, 22 deletions
diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 0d32fddbef4..8509f641317 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -302,28 +302,61 @@ build_atan(nir_builder *b, nir_ssa_def *y_over_x) static nir_ssa_def * build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) { - nir_ssa_def *zero = nir_imm_float(b, 0.0f); - - /* If |x| >= 1.0e-8 * |y|: */ - nir_ssa_def *condition = - nir_fge(b, nir_fabs(b, x), - nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y))); - - /* Then...call atan(y/x) and fix it up: */ - nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); - nir_ssa_def *r_then = - nir_bcsel(b, nir_flt(b, x, zero), - nir_fadd(b, atan1, - nir_bcsel(b, nir_fge(b, y, zero), - nir_imm_float(b, M_PIf), - nir_imm_float(b, -M_PIf))), - atan1); - - /* Else... */ - nir_ssa_def *r_else = - nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); - - return nir_bcsel(b, condition, r_then, r_else); + nir_ssa_def *zero = nir_imm_float(b, 0); + nir_ssa_def *one = nir_imm_float(b, 1); + + /* If we're on the left half-plane rotate the coordinates π/2 clock-wise + * for the y=0 discontinuity to end up aligned with the vertical + * discontinuity of atan(s/t) along t=0. This also makes sure that we + * don't attempt to divide by zero along the vertical line, which may give + * unspecified results on non-GLSL 4.1-capable hardware. + */ + nir_ssa_def *flip = nir_fge(b, zero, x); + nir_ssa_def *s = nir_bcsel(b, flip, nir_fabs(b, x), y); + nir_ssa_def *t = nir_bcsel(b, flip, y, nir_fabs(b, x)); + + /* If the magnitude of the denominator exceeds some huge value, scale down + * the arguments in order to prevent the reciprocal operation from flushing + * its result to zero, which would cause precision problems, and for s + * infinite would cause us to return a NaN instead of the correct finite + * value. + * + * If fmin and fmax are respectively the smallest and largest positive + * normalized floating point values representable by the implementation, + * the constants below should be in agreement with: + * + * huge <= 1 / fmin + * scale <= 1 / fmin / fmax (for |t| >= huge) + * + * In addition scale should be a negative power of two in order to avoid + * loss of precision. The values chosen below should work for most usual + * floating point representations with at least the dynamic range of ATI's + * 24-bit representation. + */ + nir_ssa_def *huge = nir_imm_float(b, 1e18f); + nir_ssa_def *scale = nir_bcsel(b, nir_fge(b, nir_fabs(b, t), huge), + nir_imm_float(b, 0.25), one); + nir_ssa_def *rcp_scaled_t = nir_frcp(b, nir_fmul(b, t, scale)); + nir_ssa_def *s_over_t = nir_fmul(b, nir_fmul(b, s, scale), rcp_scaled_t); + + /* Calculate the arctangent and fix up the result if we had flipped the + * coordinate system. + */ + nir_ssa_def *arc = nir_fadd(b, nir_fmul(b, nir_b2f(b, flip), + nir_imm_float(b, M_PI_2f)), + build_atan(b, nir_fabs(b, s_over_t))); + + /* Rather convoluted calculation of the sign of the result. When x < 0 we + * cannot use fsign because we need to be able to distinguish between + * negative and positive zero. We don't use bitwise arithmetic tricks for + * consistency with the GLSL front-end. When x >= 0 rcp_scaled_t will + * always be non-negative so this won't be able to distinguish between + * negative and positive zero, but we don't care because atan2 is + * continuous along the whole positive y = 0 half-line, so it won't affect + * the result significantly. + */ + return nir_bcsel(b, nir_flt(b, nir_fmin(b, y, rcp_scaled_t), zero), + nir_fneg(b, arc), arc); } static nir_ssa_def * |