diff options
-rw-r--r-- | src/compiler/glsl/builtin_functions.cpp | 96 |
1 files changed, 60 insertions, 36 deletions
diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 4a6c5afd65c..432df656e2f 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3560,44 +3560,68 @@ builtin_builder::_acos(const glsl_type *type) ir_function_signature * builtin_builder::_atan2(const glsl_type *type) { - ir_variable *vec_y = in_var(type, "vec_y"); - ir_variable *vec_x = in_var(type, "vec_x"); - MAKE_SIG(type, always_available, 2, vec_y, vec_x); - - ir_variable *vec_result = body.make_temp(type, "vec_result"); - ir_variable *r = body.make_temp(glsl_type::float_type, "r"); - for (int i = 0; i < type->vector_elements; i++) { - ir_variable *y = body.make_temp(glsl_type::float_type, "y"); - ir_variable *x = body.make_temp(glsl_type::float_type, "x"); - body.emit(assign(y, swizzle(vec_y, i, 1))); - body.emit(assign(x, swizzle(vec_x, i, 1))); - - /* If |x| >= 1.0e-8 * |y|: */ - ir_if *outer_if = - new(mem_ctx) ir_if(greater(abs(x), mul(imm(1.0e-8f), abs(y)))); - - ir_factory outer_then(&outer_if->then_instructions, mem_ctx); - - /* Then...call atan(y/x) */ - do_atan(outer_then, glsl_type::float_type, r, div(y, x)); - - /* ...and fix it up: */ - ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f))); - inner_if->then_instructions.push_tail( - if_tree(gequal(y, imm(0.0f)), - assign(r, add(r, imm(M_PIf))), - assign(r, sub(r, imm(M_PIf))))); - outer_then.emit(inner_if); - - /* Else... */ - outer_if->else_instructions.push_tail( - assign(r, mul(sign(y), imm(M_PI_2f)))); + const unsigned n = type->vector_elements; + ir_variable *y = in_var(type, "y"); + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, always_available, 2, y, x); - body.emit(outer_if); + /* If we're on the left half-plane rotate the coordinates π/2 clock-wise + * for the y=0 discontinuity to end up aligned with the vertical + * discontinuity of atan(s/t) along t=0. This also makes sure that we + * don't attempt to divide by zero along the vertical line, which may give + * unspecified results on non-GLSL 4.1-capable hardware. + */ + ir_variable *flip = body.make_temp(glsl_type::bvec(n), "flip"); + body.emit(assign(flip, gequal(imm(0.0f, n), x))); + ir_variable *s = body.make_temp(type, "s"); + body.emit(assign(s, csel(flip, abs(x), y))); + ir_variable *t = body.make_temp(type, "t"); + body.emit(assign(t, csel(flip, y, abs(x)))); - body.emit(assign(vec_result, r, 1 << i)); - } - body.emit(ret(vec_result)); + /* If the magnitude of the denominator exceeds some huge value, scale down + * the arguments in order to prevent the reciprocal operation from flushing + * its result to zero, which would cause precision problems, and for s + * infinite would cause us to return a NaN instead of the correct finite + * value. + * + * If fmin and fmax are respectively the smallest and largest positive + * normalized floating point values representable by the implementation, + * the constants below should be in agreement with: + * + * huge <= 1 / fmin + * scale <= 1 / fmin / fmax (for |t| >= huge) + * + * In addition scale should be a negative power of two in order to avoid + * loss of precision. The values chosen below should work for most usual + * floating point representations with at least the dynamic range of ATI's + * 24-bit representation. + */ + ir_constant *huge = imm(1e18f, n); + ir_variable *scale = body.make_temp(type, "scale"); + body.emit(assign(scale, csel(gequal(abs(t), huge), + imm(0.25f, n), imm(1.0f, n)))); + ir_variable *rcp_scaled_t = body.make_temp(type, "rcp_scaled_t"); + body.emit(assign(rcp_scaled_t, rcp(mul(t, scale)))); + ir_expression *s_over_t = mul(mul(s, scale), rcp_scaled_t); + + /* Calculate the arctangent and fix up the result if we had flipped the + * coordinate system. + */ + ir_variable *arc = body.make_temp(type, "arc"); + do_atan(body, type, arc, abs(s_over_t)); + body.emit(assign(arc, add(arc, mul(b2f(flip), imm(M_PI_2f))))); + + /* Rather convoluted calculation of the sign of the result. When x < 0 we + * cannot use fsign because we need to be able to distinguish between + * negative and positive zero. Unfortunately we cannot use bitwise + * arithmetic tricks either because of back-ends without integer support. + * When x >= 0 rcp_scaled_t will always be non-negative so this won't be + * able to distinguish between negative and positive zero, but we don't + * care because atan2 is continuous along the whole positive y = 0 + * half-line, so it won't affect the result significantly. + */ + body.emit(ret(csel(less(min2(y, rcp_scaled_t), imm(0.0f, n)), + neg(arc), arc))); return sig; } |