/************************************************************************** * * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ /** * @file * AoS pixel format manipulation. * * @author Jose Fonseca */ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_pointer.h" #include "util/u_string.h" #include "util/u_cpu_detect.h" #include "lp_bld_arit.h" #include "lp_bld_init.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_swizzle.h" #include "lp_bld_gather.h" #include "lp_bld_debug.h" #include "lp_bld_format.h" #include "lp_bld_pack.h" #include "lp_bld_intr.h" #include "lp_bld_logic.h" #include "lp_bld_bitarit.h" /** * Basic swizzling. Rearrange the order of the unswizzled array elements * according to the format description. PIPE_SWIZZLE_0/ONE are supported * too. * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}. */ LLVMValueRef lp_build_format_swizzle_aos(const struct util_format_description *desc, struct lp_build_context *bld, LLVMValueRef unswizzled) { unsigned char swizzles[4]; unsigned chan; assert(bld->type.length % 4 == 0); for (chan = 0; chan < 4; ++chan) { enum pipe_swizzle swizzle; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { /* * For ZS formats do RGBA = ZZZ1 */ if (chan == 3) { swizzle = PIPE_SWIZZLE_1; } else if (desc->swizzle[0] == PIPE_SWIZZLE_NONE) { swizzle = PIPE_SWIZZLE_0; } else { swizzle = desc->swizzle[0]; } } else { swizzle = desc->swizzle[chan]; } swizzles[chan] = swizzle; } return lp_build_swizzle_aos(bld, unswizzled, swizzles); } /** * Whether the format matches the vector type, apart of swizzles. */ static inline boolean format_matches_type(const struct util_format_description *desc, struct lp_type type) { enum util_format_type chan_type; unsigned chan; assert(type.length % 4 == 0); if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB || desc->block.width != 1 || desc->block.height != 1) { return FALSE; } if (type.floating) { chan_type = UTIL_FORMAT_TYPE_FLOAT; } else if (type.fixed) { chan_type = UTIL_FORMAT_TYPE_FIXED; } else if (type.sign) { chan_type = UTIL_FORMAT_TYPE_SIGNED; } else { chan_type = UTIL_FORMAT_TYPE_UNSIGNED; } for (chan = 0; chan < desc->nr_channels; ++chan) { if (desc->channel[chan].size != type.width) { return FALSE; } if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) { if (desc->channel[chan].type != chan_type || desc->channel[chan].normalized != type.norm) { return FALSE; } } } return TRUE; } /* * Do rounding when converting small unorm values to larger ones. * Not quite 100% accurate, as it's done by appending MSBs, but * should be good enough. */ static inline LLVMValueRef scale_bits_up(struct gallivm_state *gallivm, int src_bits, int dst_bits, LLVMValueRef src, struct lp_type src_type) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef result = src; if (src_bits == 1 && dst_bits > 1) { /* * Useful for a1 - we'd need quite some repeated copies otherwise. */ struct lp_build_context bld; LLVMValueRef dst_mask; lp_build_context_init(&bld, gallivm, src_type); dst_mask = lp_build_const_int_vec(gallivm, src_type, (1 << dst_bits) - 1), result = lp_build_cmp(&bld, PIPE_FUNC_EQUAL, src, lp_build_const_int_vec(gallivm, src_type, 0)); result = lp_build_andnot(&bld, dst_mask, result); } else if (dst_bits > src_bits) { /* Scale up bits */ int db = dst_bits - src_bits; /* Shift left by difference in bits */ result = LLVMBuildShl(builder, src, lp_build_const_int_vec(gallivm, src_type, db), ""); if (db <= src_bits) { /* Enough bits in src to fill the remainder */ LLVMValueRef lower = LLVMBuildLShr(builder, src, lp_build_const_int_vec(gallivm, src_type, src_bits - db), ""); result = LLVMBuildOr(builder, result, lower, ""); } else if (db > src_bits) { /* Need to repeatedly copy src bits to fill remainder in dst */ unsigned n; for (n = src_bits; n < dst_bits; n *= 2) { LLVMValueRef shuv = lp_build_const_int_vec(gallivm, src_type, n); result = LLVMBuildOr(builder, result, LLVMBuildLShr(builder, result, shuv, ""), ""); } } } else { assert (dst_bits == src_bits); } return result; } /** * Unpack a single pixel into its XYZW components. * * @param desc the pixel format for the packed pixel value * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM * * @return XYZW in a float[4] or ubyte[4] or ushort[4] vector. */ static inline LLVMValueRef lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef packed) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; LLVMValueRef scales[4]; LLVMTypeRef vec32_type; boolean normalized; boolean needs_uitofp; unsigned i; /* TODO: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); /* Do the intermediate integer computations with 32bit integers since it * matches floating point size */ assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); vec32_type = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4); /* Broadcast the packed value to all four channels * before: packed = BGRA * after: packed = {BGRA, BGRA, BGRA, BGRA} */ packed = LLVMBuildInsertElement(builder, LLVMGetUndef(vec32_type), packed, LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), ""); packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(vec32_type), LLVMConstNull(vec32_type), ""); /* Initialize vector constants */ normalized = FALSE; needs_uitofp = FALSE; /* Loop over 4 color components */ for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; unsigned shift = desc->channel[i].shift; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned long long mask = (1ULL << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); if (bits == 32) { needs_uitofp = TRUE; } shifts[i] = lp_build_const_int32(gallivm, shift); masks[i] = lp_build_const_int32(gallivm, mask); if (desc->channel[i].normalized) { scales[i] = lp_build_const_float(gallivm, 1.0 / mask); normalized = TRUE; } else scales[i] = lp_build_const_float(gallivm, 1.0); } } /* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW} * into masked = {X, Y, Z, W} */ if (desc->block.bits < 32 && normalized) { /* * Note: we cannot do the shift below on x86 natively until AVX2. * * Old llvm versions will resort to scalar extract/shift insert, * which is definitely terrible, new versions will just do * several vector shifts and shuffle/blend results together. * We could turn this into a variable left shift plus a constant * right shift, and llvm would then turn the variable left shift * into a mul for us (albeit without sse41 the mul needs emulation * too...). However, since we're going to do a float mul * anyway, we just adjust that mul instead (plus the mask), skipping * the shift completely. * We could also use a extra mul when the format isn't normalized and * we don't have AVX2 support, but don't bother for now. Unfortunately, * this strategy doesn't work for 32bit formats (such as rgb10a2 or even * rgba8 if it ends up here), as that would require UIToFP, albeit that * would be fixable with easy 16bit shuffle (unless there's channels * crossing 16bit boundaries). */ for (i = 0; i < 4; ++i) { if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { unsigned bits = desc->channel[i].size; unsigned shift = desc->channel[i].shift; unsigned long long mask = ((1ULL << bits) - 1) << shift; scales[i] = lp_build_const_float(gallivm, 1.0 / mask); masks[i] = lp_build_const_int32(gallivm, mask); } } masked = LLVMBuildAnd(builder, packed, LLVMConstVector(masks, 4), ""); } else { shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); } if (!needs_uitofp) { /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } else { casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } /* * At this point 'casted' may be a vector of floats such as * {255.0, 255.0, 255.0, 255.0}. (Normalized values may be multiplied * by powers of two). Next, if the pixel values are normalized * we'll scale this to {1.0, 1.0, 1.0, 1.0}. */ if (normalized) scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); else scaled = casted; return scaled; } /** * Pack a single pixel. * * @param rgba 4 float vector with the unpacked components. * * XXX: This is mostly for reference and testing -- operating a single pixel at * a time is rarely if ever needed. */ LLVMValueRef lp_build_pack_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef rgba) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; LLVMValueRef shifted, casted, scaled, unswizzled; LLVMValueRef shifts[4]; LLVMValueRef scales[4]; boolean normalized; unsigned i, j; assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); type = LLVMIntTypeInContext(gallivm->context, desc->block.bits); /* Unswizzle the color components into the source vector. */ for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { if (desc->swizzle[j] == i) break; } if (j < 4) swizzles[i] = lp_build_const_int32(gallivm, j); else swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); } unswizzled = LLVMBuildShuffleVector(builder, rgba, LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)), LLVMConstVector(swizzles, 4), ""); normalized = FALSE; for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; unsigned shift = desc->channel[i].shift; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned mask = (1 << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(bits < 32); shifts[i] = lp_build_const_int32(gallivm, shift); if (desc->channel[i].normalized) { scales[i] = lp_build_const_float(gallivm, mask); normalized = TRUE; } else scales[i] = lp_build_const_float(gallivm, 1.0); } } if (normalized) scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); else scaled = unswizzled; casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), ""); shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); /* Bitwise or all components */ for (i = 0; i < 4; ++i) { if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, lp_build_const_int32(gallivm, i), ""); if (packed) packed = LLVMBuildOr(builder, packed, component, ""); else packed = component; } } if (!packed) packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); if (desc->block.bits < 32) packed = LLVMBuildTrunc(builder, packed, type, ""); return packed; } /** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param aligned whether the data is guaranteed to be aligned * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j, LLVMValueRef cache) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; struct lp_build_context bld; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(type.length % 4 == 0); lp_build_context_init(&bld, gallivm, type); /* * Trivial case * * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ if (format_matches_type(format_desc, type) && format_desc->block.bits <= type.width * 4 && /* XXX this shouldn't be needed */ util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type); struct lp_type fetch_type; unsigned vec_len = type.width * type.length; /* * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ fetch_type = lp_type_uint(type.width*4); packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, fetch_type, aligned, base_ptr, offset, TRUE); assert(format_desc->block.bits <= vec_len); (void) vec_len; /* silence unused var warning for non-debug build */ packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } /* * Bit arithmetic for converting small_unorm to unorm8. * * This misses some opportunities for optimizations (like skipping mask * for the highest channel for instance, or doing bit scaling in parallel * for channels with the same bit width) but it should be passable for * all arithmetic formats. */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && util_format_fits_8unorm(format_desc) && type.width == 8 && type.norm == 1 && type.sign == 0 && type.fixed == 0 && type.floating == 0) { LLVMValueRef packed, res = NULL, chans[4], rgba[4]; LLVMTypeRef dst_vec_type, conv_vec_type; struct lp_type fetch_type, conv_type; struct lp_build_context bld_conv; unsigned j; fetch_type = lp_type_uint(type.width*4); conv_type = lp_type_int_vec(type.width*4, type.width * type.length); dst_vec_type = lp_build_vec_type(gallivm, type); conv_vec_type = lp_build_vec_type(gallivm, conv_type); lp_build_context_init(&bld_conv, gallivm, conv_type); packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, fetch_type, aligned, base_ptr, offset, TRUE); assert(format_desc->block.bits * type.length / 4 <= type.width * type.length); packed = LLVMBuildBitCast(gallivm->builder, packed, conv_vec_type, ""); for (j = 0; j < format_desc->nr_channels; ++j) { unsigned mask = 0; unsigned sa = format_desc->channel[j].shift; mask = (1 << format_desc->channel[j].size) - 1; /* Extract bits from source */ chans[j] = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, conv_type, sa), ""); chans[j] = LLVMBuildAnd(builder, chans[j], lp_build_const_int_vec(gallivm, conv_type, mask), ""); /* Scale bits */ if (type.norm) { chans[j] = scale_bits_up(gallivm, format_desc->channel[j].size, type.width, chans[j], conv_type); } } /* * This is a hacked lp_build_format_swizzle_soa() since we need a * normalized 1 but only 8 bits in a 32bit vector... */ for (j = 0; j < 4; ++j) { enum pipe_swizzle swizzle = format_desc->swizzle[j]; if (swizzle == PIPE_SWIZZLE_1) { rgba[j] = lp_build_const_int_vec(gallivm, conv_type, (1 << type.width) - 1); } else { rgba[j] = lp_build_swizzle_soa_channel(&bld_conv, chans, swizzle); } if (j == 0) { res = rgba[j]; } else { rgba[j] = LLVMBuildShl(builder, rgba[j], lp_build_const_int_vec(gallivm, conv_type, j * type.width), ""); res = LLVMBuildOr(builder, res, rgba[j], ""); } } res = LLVMBuildBitCast(gallivm->builder, res, dst_vec_type, ""); return res; } /* * Bit arithmetic */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && /* XXX this shouldn't be needed */ util_is_power_of_two(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) && !format_desc->channel[0].pure_integer) { LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res[LP_MAX_VECTOR_WIDTH / 128]; struct lp_type conv_type; unsigned k, num_conv_src, num_conv_dst; /* * Note this path is generally terrible for fetching multiple pixels. * We should make sure we cannot hit this code path for anything but * single pixels. */ /* * Unpack a pixel at a time into a <4 x float> RGBA vector */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef packed; packed = lp_build_gather_elem(gallivm, num_pixels, format_desc->block.bits, 32, aligned, base_ptr, offset, k, FALSE); tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, format_desc, packed); } /* * Type conversion. * * TODO: We could avoid floating conversion for integer to * integer conversions. */ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { debug_printf("%s: unpacking %s with floating point\n", __FUNCTION__, format_desc->short_name); } conv_type = lp_float32_vec4_type(); num_conv_src = num_pixels; num_conv_dst = 1; if (num_pixels % 8 == 0) { lp_build_concat_n(gallivm, lp_float32_vec4_type(), tmps, num_pixels, tmps, num_pixels / 2); conv_type.length *= num_pixels / 4; num_conv_src = 4 * num_pixels / 8; if (type.width == 8 && type.floating == 0 && type.fixed == 0) { /* * FIXME: The fast float->unorm path (which is basically * skipping the MIN/MAX which are extremely pointless in any * case) requires that there's 2 destinations... * In any case, we really should make sure we don't hit this * code with multiple pixels for unorm8 dst types, it's * completely hopeless even if we do hit the right conversion. */ type.length /= num_pixels / 4; num_conv_dst = num_pixels / 4; } } lp_build_conv(gallivm, conv_type, type, tmps, num_conv_src, res, num_conv_dst); if (num_pixels % 8 == 0 && (type.width == 8 && type.floating == 0 && type.fixed == 0)) { lp_build_concat_n(gallivm, type, res, num_conv_dst, res, 1); } return lp_build_format_swizzle_aos(format_desc, &bld, res[0]); } /* If all channels are of same type and we are not using half-floats */ if (format_desc->is_array && format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) { assert(!format_desc->is_mixed); return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset); } /* * YUV / subsampled formats */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, offset, i, j); lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); return tmp; } /* * s3tc rgb formats */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && cache) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_cached_texels(gallivm, format_desc, num_pixels, base_ptr, offset, i, j, cache); lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); return tmp; } /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ if (format_desc->fetch_rgba_8unorm && !type.floating && type.width == 8 && !type.sign && type.norm) { /* * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp; LLVMValueRef res; unsigned k; if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n", __FUNCTION__, format_desc->short_name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ { /* * Function to call looks like: * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, ARRAY_SIZE(arg_types), 0); /* make const pointer for the C fetch_rgba_8unorm function */ function = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, function, LLVMPointerType(function_type, 0), "cast callee"); } tmp_ptr = lp_build_alloca(gallivm, i32t, ""); res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); /* * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), ""); tmp = LLVMBuildLoad(builder, tmp_ptr, ""); if (num_pixels == 1) { res = tmp; } else { res = LLVMBuildInsertElement(builder, res, tmp, index, ""); } } /* Bitcast from to <4n x i8> */ res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); return res; } /* * Fallback to util_format_description::fetch_rgba_float(). */ if (format_desc->fetch_rgba_float) { /* * Fallback to calling util_format_description::fetch_rgba_float. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n", __FUNCTION__, format_desc->short_name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ { /* * Function to call looks like: * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function = lp_build_const_func_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_float), ret_type, arg_types, ARRAY_SIZE(arg_types), format_desc->short_name); } tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { LLVMValueRef index = lp_build_const_int32(gallivm, k); args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), ""); tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return res; } assert(!util_format_is_pure_integer(format_desc->format)); assert(0); return lp_build_undef(gallivm, type); }