diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/auxiliary/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.h | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format.h | 11 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 25 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c | 294 |
6 files changed, 332 insertions, 7 deletions
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index c989584cf41..acbcef7e2ed 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -173,6 +173,7 @@ GALLIVM_SOURCES := \ gallivm/lp_bld_format_aos.c \ gallivm/lp_bld_format_aos_array.c \ gallivm/lp_bld_format_float.c \ + gallivm/lp_bld_format_srgb.c \ gallivm/lp_bld_format_soa.c \ gallivm/lp_bld_format_yuv.c \ gallivm/lp_bld_gather.c \ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 7d6fe04f50d..e7955aa6bf4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -2896,7 +2896,7 @@ lp_build_log(struct lp_build_context *bld, * Generate polynomial. * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2]. */ -static LLVMValueRef +LLVMValueRef lp_build_polynomial(struct lp_build_context *bld, LLVMValueRef x, const double *coeffs, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 920e339cda5..04e180c94fd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -239,6 +239,12 @@ lp_build_fast_rsqrt(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_polynomial(struct lp_build_context *bld, + LLVMValueRef x, + const double *coeffs, + unsigned num_coeffs); + +LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 12a03180180..744d0028941 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -158,4 +158,15 @@ lp_build_rgb9e5_to_float(struct gallivm_state *gallivm, LLVMValueRef src, LLVMValueRef *dst); +LLVMValueRef +lp_build_linear_to_srgb(struct gallivm_state *gallivm, + struct lp_type src_type, + LLVMValueRef src); + +LLVMValueRef +lp_build_srgb_to_linear(struct gallivm_state *gallivm, + struct lp_type src_type, + LLVMValueRef src); + + #endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 4c6bd81047a..114ce03bbdc 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -163,11 +163,23 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, */ if (type.floating) { - if(format_desc->channel[chan].normalized) - input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); - else - input = LLVMBuildSIToFP(builder, input, - lp_build_vec_type(gallivm, type), ""); + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + assert(width == 8); + if (format_desc->swizzle[3] == chan) { + input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); + } + else { + struct lp_type conv_type = lp_uint_type(type); + input = lp_build_srgb_to_linear(gallivm, conv_type, input); + } + } + else { + if(format_desc->channel[chan].normalized) + input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); + else + input = LLVMBuildSIToFP(builder, input, + lp_build_vec_type(gallivm, type), ""); + } } else if (format_desc->channel[chan].pure_integer) { /* Nothing to do */ @@ -344,6 +356,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && @@ -394,7 +407,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, packed = lp_build_gather(gallivm, type.length, format_desc->block.bits, type.width, base_ptr, offset, - FALSE); + FALSE); if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) { lp_build_r11g11b10_to_float(gallivm, packed, rgba_out); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c new file mode 100644 index 00000000000..217aaa99838 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c @@ -0,0 +1,294 @@ +/************************************************************************** + * + * Copyright 2013 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Format conversion code for srgb formats. + * + * Functions for converting from srgb to linear and vice versa. + * From http://www.opengl.org/registry/specs/EXT/texture_sRGB.txt: + * + * srgb->linear: + * cl = cs / 12.92, cs <= 0.04045 + * cl = ((cs + 0.055)/1.055)^2.4, cs > 0.04045 + * + * linear->srgb: + * if (isnan(cl)) { + * Map IEEE-754 Not-a-number to zero. + * cs = 0.0; + * } else if (cl > 1.0) { + * cs = 1.0; + * } else if (cl < 0.0) { + * cs = 0.0; + * } else if (cl < 0.0031308) { + * cs = 12.92 * cl; + * } else { + * cs = 1.055 * pow(cl, 0.41666) - 0.055; + * } + * + * This does not need to be accurate, however at least for d3d10 + * (http://msdn.microsoft.com/en-us/library/windows/desktop/dd607323%28v=vs.85%29.aspx): + * 1) For srgb->linear, it is required that the error on the srgb side is + * not larger than 0.5f, which I interpret that if you map the value back + * to srgb from linear using the ideal conversion, it would not be off by + * more than 0.5f (that is, it would map to the same 8-bit integer value + * as it was before conversion to linear). + * 2) linear->srgb is permitted 0.6f which luckily looks like quite a large + * error is allowed. + * 3) Additionally, all srgb values converted to linear and back must result + * in the same value as they were originally. + * + * @author Roland Scheidegger <[email protected]> + */ + + +#include "util/u_debug.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" +#include "lp_bld_logic.h" +#include "lp_bld_format.h" + + + +/** + * Convert srgb int values to linear float values. + * Several possibilities how to do this, e.g. + * - table + * - doing the pow() with int-to-float and float-to-int tricks + * (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent) + * - just using standard polynomial approximation + * (3rd order polynomial is required for crappy but just sufficient accuracy) + * + * @param src integer (vector) value(s) to convert + * (8 bit values unpacked to 32 bit already). + */ +LLVMValueRef +lp_build_srgb_to_linear(struct gallivm_state *gallivm, + struct lp_type src_type, + LLVMValueRef src) +{ + struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32); + struct lp_build_context f32_bld; + LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh; + double coeffs[4] = {0.0023f, + 0.0030f / 255.0f, + 0.6935f / (255.0f * 255.0f), + 0.3012f / (255.0f * 255.0f * 255.0f) + }; + + assert(src_type.width == 32); + + lp_build_context_init(&f32_bld, gallivm, f32_type); + + /* + * using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023) + * ( poly = 0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023) + * (found with octave polyfit and some magic as I couldn't get the error + * function right). Using the above mentioned error function, the values stay + * within +-0.35, except for the lowest values - hence tweaking linear segment + * to cover the first 16 instead of the first 11 values (the error stays + * just about acceptable there too). + * Hence: lin = src > 15 ? poly : src / 12.6 + * This function really only makes sense for vectors, should use LUT otherwise. + * All in all (including float conversion) 11 instructions (with sse4.1), + * 6 constants (polynomial could be done with 1 instruction less at the cost + * of slightly worse dependency chain, fma should also help). + */ + /* doing the 1/255 mul as part of the approximation */ + srcf = lp_build_int_to_float(&f32_bld, src); + lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f)); + part_lin = lp_build_mul(&f32_bld, srcf, lin_const); + + part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4); + + lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f); + is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh); + return lp_build_select(&f32_bld, is_linear, part_lin, part_pow); +} + + +/** + * Convert linear float values to srgb int values. + * Several possibilities how to do this, e.g. + * - use table (based on exponent/highest order mantissa bits) and do + * linear interpolation (https://gist.github.com/rygorous/2203834) + * - Chebyshev polynomial + * - Approximation using reciprocals + * - using int-to-float and float-to-int tricks for pow() + * (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent) + * + * @param src float (vector) value(s) to convert. + */ +LLVMValueRef +lp_build_linear_to_srgb(struct gallivm_state *gallivm, + struct lp_type src_type, + LLVMValueRef src) +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context f32_bld; + LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final; + + lp_build_context_init(&f32_bld, gallivm, src_type); + + src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one); + + if (0) { + /* + * using int-to-float and float-to-int trick for pow(). + * This is much more accurate than necessary thanks to the correction, + * but it most certainly makes no sense without rsqrt available. + * Bonus points if you understand how this works... + * All in all (including min/max clamp, conversion) 19 instructions. + */ + + float exp_f = 2.0f / 3.0f; + float coeff_f = 0.62996f; + LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2; + struct lp_type int_type = lp_int_type(src_type); + + /* + * First calculate approx x^8/12 + */ + exponent = lp_build_const_vec(gallivm, src_type, exp_f); + coeff = lp_build_const_vec(gallivm, src_type, + exp2f(127.0f / exp_f - 127.0f) * + powf(coeff_f, 1.0f / exp_f)); + + /* premultiply src */ + tmp = lp_build_mul(&f32_bld, coeff, src); + /* "log2" */ + tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), ""); + tmp = lp_build_int_to_float(&f32_bld, tmp); + /* multiply for pow */ + tmp = lp_build_mul(&f32_bld, tmp, exponent); + /* "exp2" */ + pow_approx = lp_build_itrunc(&f32_bld, tmp); + pow_approx = LLVMBuildBitCast(builder, pow_approx, + lp_build_vec_type(gallivm, src_type), ""); + + /* + * Since that pow was inaccurate (like 3 bits, though each sqrt step would + * give another bit), compensate the error (which is why we chose another + * exponent in the first place). + */ + /* x * x^(8/12) = x^(20/12) */ + pow_1 = lp_build_mul(&f32_bld, pow_approx, src); + + /* x * x * x^(-4/12) = x^(20/12) */ + /* Should avoid using rsqrt if it's not available, but + * using x * x^(4/12) * x^(4/12) instead will change error weight */ + tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx); + x2 = lp_build_mul(&f32_bld, src, src); + pow_2 = lp_build_mul(&f32_bld, x2, tmp); + + /* average the values so the errors cancel out, compensate bias, + * we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul + * for conversion to int in here */ + tmp = lp_build_add(&f32_bld, pow_1, pow_2); + coeff = lp_build_const_vec(gallivm, src_type, + 1.0f / (3.0f * coeff_f) * 0.999852f * + powf(1.055f * 255.0f, 4.0f)); + pow_final = lp_build_mul(&f32_bld, tmp, coeff); + + /* x^(5/12) = rsqrt(rsqrt(x^20/12)) */ + if (lp_build_fast_rsqrt_available(src_type)) { + pow_final = lp_build_fast_rsqrt(&f32_bld, + lp_build_fast_rsqrt(&f32_bld, pow_final)); + } + else { + pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final)); + } + pow_final = lp_build_add(&f32_bld, pow_final, + lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f)); + } + + else { + /* + * using "rational polynomial" approximation here. + * Essentially y = a*x^0.375 + b*x^0.5 + c, with also + * factoring in the 255.0 mul and the scaling mul. + * (a is closer to actual value so has higher weight than b.) + * Note: the constants are magic values. They were found empirically, + * possibly could be improved but good enough (be VERY careful with + * error metric if you'd want to tweak them, they also MUST fit with + * the crappy polynomial above for srgb->linear since it is required + * that each srgb value maps back to the same value). + * This function has an error of max +-0.17 (and we'd only require +-0.6), + * for the approximated srgb->linear values the error is naturally larger + * (+-0.42) but still accurate enough (required +-0.5 essentially). + * All in all (including min/max clamp, conversion) 15 instructions. + * FMA would help (minus 2 instructions). + */ + + LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2; + + if (lp_build_fast_rsqrt_available(src_type)) { + tmp = lp_build_fast_rsqrt(&f32_bld, src); + x05 = lp_build_mul(&f32_bld, src, tmp); + } + else { + /* + * I don't really expect this to be practical without rsqrt + * but there's no reason for triple punishment so at least + * save the otherwise resulting division and unnecessary mul... + */ + x05 = lp_build_sqrt(&f32_bld, src); + } + + tmp = lp_build_mul(&f32_bld, x05, src); + if (lp_build_fast_rsqrt_available(src_type)) { + x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp)); + } + else { + x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp)); + } + + a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f); + b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f); + c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f); + + tmp = lp_build_mul(&f32_bld, a_const, x0375); + tmp2 = lp_build_mul(&f32_bld, b_const, x05); + tmp2 = lp_build_add(&f32_bld, tmp2, c_const); + pow_final = lp_build_add(&f32_bld, tmp, tmp2); + } + + /* linear part is easy */ + lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f); + lin = lp_build_mul(&f32_bld, src, lin_const); + + lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f); + is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh); + tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final); + + f32_bld.type.sign = 0; + return lp_build_iround(&f32_bld, tmp); +} |