diff options
author | Matt Turner <[email protected]> | 2013-09-09 11:13:20 -0700 |
---|---|---|
committer | Matt Turner <[email protected]> | 2013-09-17 17:01:58 -0700 |
commit | d56bbd04415d439b8e04c8f27e911485813f01e4 (patch) | |
tree | 9e9577192c871f6fe4cd944ec868da9ffa4156eb /src | |
parent | c43d6060b196c3e25b16a0985caa561ae1449ce8 (diff) |
glsl: Add frexp signatures and implementation.
I initially implemented frexp() as an IR opcode with a lowering pass,
but since it returns a value and has an out-parameter, it would break
assumptions our optimization passes make about ir_expressions being pure
(i.e., having no side effects).
For example, if opt_tree_grafting encounters this code:
uniform float u;
void main()
{
int exp;
float f = frexp(u, out exp);
float g = float(exp)/256.0;
float h = float(exp) + 1.0;
gl_FragColor = vec4(f, g, h, g + h);
}
it may try to optimize it to this:
uniform float u;
void main()
{
int exp;
float g = float(exp)/256.0;
float h = float(exp) + 1.0;
gl_FragColor = vec4(frexp(u, out exp), g, h, g + h);
}
Some hardware has an instruction which performs frexp(), but we would
need some other compiler infrastructure to be able to generate it, such
as an intrinsics system that would allow backends to emit specific code
for particular bits of IR.
Reviewed-by: Paul Berry <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/glsl/builtin_functions.cpp | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index b5be7043fcf..31f7489d18f 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -511,6 +511,7 @@ private: B1(findMSB) B1(fma) B2(ldexp) + B2(frexp) #undef B0 #undef B1 #undef B2 @@ -1830,6 +1831,13 @@ builtin_builder::create_builtins() _ldexp(glsl_type::vec3_type, glsl_type::ivec3_type), _ldexp(glsl_type::vec4_type, glsl_type::ivec4_type), NULL); + + add_function("frexp", + _frexp(glsl_type::float_type, glsl_type::int_type), + _frexp(glsl_type::vec2_type, glsl_type::ivec2_type), + _frexp(glsl_type::vec3_type, glsl_type::ivec3_type), + _frexp(glsl_type::vec4_type, glsl_type::ivec4_type), + NULL); #undef F #undef FI #undef FIU @@ -3528,6 +3536,54 @@ builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type) { return binop(ir_binop_ldexp, gpu_shader5, x_type, x_type, exp_type); } + +ir_function_signature * +builtin_builder::_frexp(const glsl_type *x_type, const glsl_type *exp_type) +{ + ir_variable *x = in_var(x_type, "x"); + ir_variable *exponent = out_var(exp_type, "exp"); + MAKE_SIG(x_type, gpu_shader5, 2, x, exponent); + + const unsigned vec_elem = x_type->vector_elements; + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); + + /* Single-precision floating-point values are stored as + * 1 sign bit; + * 8 exponent bits; + * 23 mantissa bits. + * + * An exponent shift of 23 will shift the mantissa out, leaving only the + * exponent and sign bit (which itself may be zero, if the absolute value + * was taken before the bitcast and shift. + */ + ir_constant *exponent_shift = imm(23); + ir_constant *exponent_bias = imm(-126, vec_elem); + + ir_constant *sign_mantissa_mask = imm(0x807fffffu, vec_elem); + + /* Exponent of floating-point values in the range [0.5, 1.0). */ + ir_constant *exponent_value = imm(0x3f000000u, vec_elem); + + ir_variable *is_not_zero = body.make_temp(bvec, "is_not_zero"); + body.emit(assign(is_not_zero, nequal(abs(x), imm(0.0f, vec_elem)))); + + /* Since abs(x) ensures that the sign bit is zero, we don't need to bitcast + * to unsigned integers to ensure that 1 bits aren't shifted in. + */ + body.emit(assign(exponent, rshift(bitcast_f2i(abs(x)), exponent_shift))); + body.emit(assign(exponent, add(exponent, csel(is_not_zero, exponent_bias, + imm(0, vec_elem))))); + + ir_variable *bits = body.make_temp(uvec, "bits"); + body.emit(assign(bits, bitcast_f2u(x))); + body.emit(assign(bits, bit_and(bits, sign_mantissa_mask))); + body.emit(assign(bits, bit_or(bits, csel(is_not_zero, exponent_value, + imm(0u, vec_elem))))); + body.emit(ret(bitcast_u2f(bits))); + + return sig; +} /** @} */ /******************************************************************************/ |