/************************************************************************** * * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ /** * @file * Helpers for emiting intrinsic calls. * * LLVM vanilla IR doesn't represent all basic arithmetic operations we care * about, and it is often necessary to resort target-specific intrinsics for * performance, convenience. * * Ideally we would like to stay away from target specific intrinsics and * move all the instruction selection logic into upstream LLVM where it belongs. * * These functions are also used for calling C functions provided by us from * generated LLVM code. * * @author Jose Fonseca */ #include "util/u_debug.h" #include "util/u_string.h" #include "lp_bld_const.h" #include "lp_bld_intr.h" #include "lp_bld_type.h" #include "lp_bld_pack.h" void lp_format_intrinsic(char *name, size_t size, const char *name_root, LLVMTypeRef type) { unsigned length = 0; unsigned width; char c; LLVMTypeKind kind = LLVMGetTypeKind(type); if (kind == LLVMVectorTypeKind) { length = LLVMGetVectorSize(type); type = LLVMGetElementType(type); kind = LLVMGetTypeKind(type); } switch (kind) { case LLVMIntegerTypeKind: c = 'i'; width = LLVMGetIntTypeWidth(type); break; case LLVMFloatTypeKind: c = 'f'; width = 32; break; case LLVMDoubleTypeKind: c = 'f'; width = 64; break; default: assert(0); } if (length) { util_snprintf(name, size, "%s.v%u%c%u", name_root, length, c, width); } else { util_snprintf(name, size, "%s.%c%u", name_root, c, width); } } LLVMValueRef lp_declare_intrinsic(LLVMModuleRef module, const char *name, LLVMTypeRef ret_type, LLVMTypeRef *arg_types, unsigned num_args) { LLVMTypeRef function_type; LLVMValueRef function; assert(!LLVMGetNamedFunction(module, name)); function_type = LLVMFunctionType(ret_type, arg_types, num_args, 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); return function; } LLVMValueRef lp_build_intrinsic(LLVMBuilderRef builder, const char *name, LLVMTypeRef ret_type, LLVMValueRef *args, unsigned num_args, LLVMAttribute attr) { LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); LLVMValueRef function; function = LLVMGetNamedFunction(module, name); if(!function) { LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS]; unsigned i; assert(num_args <= LP_MAX_FUNC_ARGS); for(i = 0; i < num_args; ++i) { assert(args[i]); arg_types[i] = LLVMTypeOf(args[i]); } function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); if (attr) LLVMAddFunctionAttr(function, attr); } return LLVMBuildCall(builder, function, args, num_args, ""); } LLVMValueRef lp_build_intrinsic_unary(LLVMBuilderRef builder, const char *name, LLVMTypeRef ret_type, LLVMValueRef a) { return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0); } LLVMValueRef lp_build_intrinsic_binary(LLVMBuilderRef builder, const char *name, LLVMTypeRef ret_type, LLVMValueRef a, LLVMValueRef b) { LLVMValueRef args[2]; args[0] = a; args[1] = b; return lp_build_intrinsic(builder, name, ret_type, args, 2, 0); } /** * Call intrinsic with arguments adapted to intrinsic vector length. * * Split vectors which are too large for the hw, or expand them if they * are too small, so a caller calling a function which might use intrinsics * doesn't need to do splitting/expansion on its own. * This only supports intrinsics where src and dst types match. */ LLVMValueRef lp_build_intrinsic_binary_anylength(struct gallivm_state *gallivm, const char *name, struct lp_type src_type, unsigned intr_size, LLVMValueRef a, LLVMValueRef b) { unsigned i; struct lp_type intrin_type = src_type; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); LLVMValueRef anative, bnative; unsigned intrin_length = intr_size / src_type.width; intrin_type.length = intrin_length; if (intrin_length > src_type.length) { LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; LLVMValueRef constvec, tmp; for (i = 0; i < src_type.length; i++) { elems[i] = lp_build_const_int32(gallivm, i); } for (; i < intrin_length; i++) { elems[i] = i32undef; } if (src_type.length == 1) { LLVMTypeRef elem_type = lp_build_elem_type(gallivm, intrin_type); a = LLVMBuildBitCast(builder, a, LLVMVectorType(elem_type, 1), ""); b = LLVMBuildBitCast(builder, b, LLVMVectorType(elem_type, 1), ""); } constvec = LLVMConstVector(elems, intrin_length); anative = LLVMBuildShuffleVector(builder, a, a, constvec, ""); bnative = LLVMBuildShuffleVector(builder, b, b, constvec, ""); tmp = lp_build_intrinsic_binary(builder, name, lp_build_vec_type(gallivm, intrin_type), anative, bnative); if (src_type.length > 1) { constvec = LLVMConstVector(elems, src_type.length); return LLVMBuildShuffleVector(builder, tmp, tmp, constvec, ""); } else { return LLVMBuildExtractElement(builder, tmp, elems[0], ""); } } else if (intrin_length < src_type.length) { unsigned num_vec = src_type.length / intrin_length; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; /* don't support arbitrary size here as this is so yuck */ if (src_type.length % intrin_length) { /* FIXME: This is something which should be supported * but there doesn't seem to be any need for it currently * so crash and burn. */ debug_printf("%s: should handle arbitrary vector size\n", __FUNCTION__); assert(0); return NULL; } for (i = 0; i < num_vec; i++) { anative = lp_build_extract_range(gallivm, a, i*intrin_length, intrin_length); bnative = lp_build_extract_range(gallivm, b, i*intrin_length, intrin_length); tmp[i] = lp_build_intrinsic_binary(builder, name, lp_build_vec_type(gallivm, intrin_type), anative, bnative); } return lp_build_concat(gallivm, tmp, intrin_type, num_vec); } else { return lp_build_intrinsic_binary(builder, name, lp_build_vec_type(gallivm, src_type), a, b); } } LLVMValueRef lp_build_intrinsic_map(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef *args, unsigned num_args) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef ret_elem_type = LLVMGetElementType(ret_type); unsigned n = LLVMGetVectorSize(ret_type); unsigned i, j; LLVMValueRef res; assert(num_args <= LP_MAX_FUNC_ARGS); res = LLVMGetUndef(ret_type); for(i = 0; i < n; ++i) { LLVMValueRef index = lp_build_const_int32(gallivm, i); LLVMValueRef arg_elems[LP_MAX_FUNC_ARGS]; LLVMValueRef res_elem; for(j = 0; j < num_args; ++j) arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, ""); res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args, 0); res = LLVMBuildInsertElement(builder, res, res_elem, index, ""); } return res; } LLVMValueRef lp_build_intrinsic_map_unary(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef a) { return lp_build_intrinsic_map(gallivm, name, ret_type, &a, 1); } LLVMValueRef lp_build_intrinsic_map_binary(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef a, LLVMValueRef b) { LLVMValueRef args[2]; args[0] = a; args[1] = b; return lp_build_intrinsic_map(gallivm, name, ret_type, args, 2); }