summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
authorBrian Paul <[email protected]>2009-02-10 16:44:02 -0700
committerBrian Paul <[email protected]>2009-02-10 16:44:02 -0700
commit5340b6dff73a0a23531ce2a5f28fba8303adab6e (patch)
treeb141fc3648568dd8b941c966059e6ed32a8bd0ad /src/gallium/auxiliary/gallivm
parent9fd26daec24f21dbe17afcb2e2ab272667ee9a69 (diff)
parentee4c921b65fb76998711f3c40330505cbc49a0e0 (diff)
Merge commit 'origin/gallium-master-merge'
This is the big merge of the gallium-0.2 branch into master. gallium-master-merge was just the staging area for it. Both gallium-0.2 and gallium-master-merge are considered closed now. Conflicts: progs/demos/Makefile src/mesa/main/state.c src/mesa/main/texenvprogram.c
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/Makefile92
-rw-r--r--src/gallium/auxiliary/gallivm/SConscript16
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm.cpp332
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm.h118
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_builtins.cpp140
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_cpu.cpp243
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm_p.h110
-rw-r--r--src/gallium/auxiliary/gallivm/instructions.cpp1193
-rw-r--r--src/gallium/auxiliary/gallivm/instructions.h175
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.cpp523
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.h116
-rw-r--r--src/gallium/auxiliary/gallivm/llvm_builtins.c114
-rw-r--r--src/gallium/auxiliary/gallivm/loweringpass.cpp17
-rw-r--r--src/gallium/auxiliary/gallivm/loweringpass.h15
-rw-r--r--src/gallium/auxiliary/gallivm/soabuiltins.c210
-rw-r--r--src/gallium/auxiliary/gallivm/storage.cpp364
-rw-r--r--src/gallium/auxiliary/gallivm/storage.h133
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.cpp438
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.h107
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.cpp1164
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.h20
21 files changed, 5640 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile
new file mode 100644
index 00000000000..5a96d94ec37
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/Makefile
@@ -0,0 +1,92 @@
+# -*-makefile-*-
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = gallivm
+
+
+GALLIVM_SOURCES = \
+ gallivm.cpp \
+ gallivm_cpu.cpp \
+ instructions.cpp \
+ loweringpass.cpp \
+ tgsitollvm.cpp \
+ storage.cpp \
+ storagesoa.cpp \
+ instructionssoa.cpp
+
+INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp
+
+CPP_SOURCES = \
+ $(GALLIVM_SOURCES)
+
+C_SOURCES =
+ASM_SOURCES =
+
+OBJECTS = $(C_SOURCES:.c=.o) \
+ $(CPP_SOURCES:.cpp=.o) \
+ $(ASM_SOURCES:.S=.o)
+
+### Include directories
+INCLUDES = \
+ -I. \
+ -I$(TOP)/src/gallium/drivers \
+ -I$(TOP)/src/gallium/auxiliary \
+ -I$(TOP)/src/gallium/include \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/include
+
+
+##### RULES #####
+
+.c.o:
+ $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+
+.cpp.o:
+ $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
+
+.S.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+
+##### TARGETS #####
+
+default:: depend symlinks $(LIBNAME)
+
+
+$(LIBNAME): $(OBJECTS) Makefile
+ $(TOP)/bin/mklib -o $@ -static $(OBJECTS)
+
+
+depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES)
+ rm -f depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \
+ $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null
+
+
+gallivm_builtins.cpp: llvm_builtins.c
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin
+ (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
+ rm temp1.bin
+
+gallivmsoabuiltins.cpp: soabuiltins.c
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin
+ (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@
+ rm temp2.bin
+
+# Emacs tags
+tags:
+ etags `find . -name \*.[ch]` `find ../include`
+
+
+# Remove .o and backup files
+clean:
+ -rm -f *.o */*.o *~ *.so *~ server/*.o
+ -rm -f depend depend.bak
+ -rm -f gallivm_builtins.cpp
+ -rm -f gallivmsoabuiltins.cpp
+
+symlinks:
+
+
+include depend
diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript
new file mode 100644
index 00000000000..c0aa51b90a9
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/SConscript
@@ -0,0 +1,16 @@
+Import('*')
+
+gallivm = env.ConvenienceLibrary(
+ target = 'gallivm',
+ source = [
+ 'gallivm.cpp',
+ 'gallivm_cpu.cpp',
+ 'instructions.cpp',
+ 'loweringpass.cpp',
+ 'tgsitollvm.cpp',
+ 'storage.cpp',
+ 'storagesoa.cpp',
+ 'instructionssoa.cpp',
+ ])
+
+auxiliaries.insert(0, gallivm)
diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp
new file mode 100644
index 00000000000..f4af5cc8ad5
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/gallivm.cpp
@@ -0,0 +1,332 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin [email protected]
+ */
+#ifdef MESA_LLVM
+
+#include "gallivm.h"
+#include "gallivm_p.h"
+
+#include "instructions.h"
+#include "loweringpass.h"
+#include "storage.h"
+#include "tgsitollvm.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
+
+#include <llvm/Module.h>
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/ModuleProvider.h>
+#include <llvm/Pass.h>
+#include <llvm/PassManager.h>
+#include <llvm/Attributes.h>
+#include <llvm/Support/PatternMatch.h>
+#include <llvm/ExecutionEngine/JIT.h>
+#include <llvm/ExecutionEngine/Interpreter.h>
+#include <llvm/ExecutionEngine/GenericValue.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/LinkAllPasses.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Analysis/LoopPass.h>
+#include <llvm/Target/TargetData.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+static int GLOBAL_ID = 0;
+
+using namespace llvm;
+
+static inline
+void AddStandardCompilePasses(PassManager &PM)
+{
+ PM.add(new LoweringPass());
+ PM.add(createVerifierPass()); // Verify that input is correct
+
+ PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp
+
+ //PM.add(createStripSymbolsPass(true));
+
+ PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst
+ PM.add(createCFGSimplificationPass()); // Clean up disgusting code
+ PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas
+ PM.add(createGlobalOptimizerPass()); // Optimize out global vars
+ PM.add(createGlobalDCEPass()); // Remove unused fns and globs
+ PM.add(createIPConstantPropagationPass());// IP Constant Propagation
+ PM.add(createDeadArgEliminationPass()); // Dead argument elimination
+ PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
+ PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+
+ PM.add(createPruneEHPass()); // Remove dead EH info
+
+ PM.add(createFunctionInliningPass()); // Inline small functions
+ PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
+
+ PM.add(createTailDuplicationPass()); // Simplify cfg by copying code
+ PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
+ PM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas
+ PM.add(createInstructionCombiningPass()); // Combine silly seq's
+ PM.add(createCondPropagationPass()); // Propagate conditionals
+
+ PM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ PM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ PM.add(createReassociatePass()); // Reassociate expressions
+ PM.add(createLoopRotatePass());
+ PM.add(createLICMPass()); // Hoist loop invariants
+ PM.add(createLoopUnswitchPass()); // Unswitch loops.
+ PM.add(createLoopIndexSplitPass()); // Index split loops.
+ PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc
+ PM.add(createIndVarSimplifyPass()); // Canonicalize indvars
+ PM.add(createLoopUnrollPass()); // Unroll small loops
+ PM.add(createInstructionCombiningPass()); // Clean up after the unroller
+ PM.add(createGVNPass()); // Remove redundancies
+ PM.add(createSCCPPass()); // Constant prop with SCCP
+
+ // Run instcombine after redundancy elimination to exploit opportunities
+ // opened up by them.
+ PM.add(createInstructionCombiningPass());
+ PM.add(createCondPropagationPass()); // Propagate conditionals
+
+ PM.add(createDeadStoreEliminationPass()); // Delete dead stores
+ PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE'
+ PM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations
+ PM.add(createDeadTypeEliminationPass()); // Eliminate dead types
+ PM.add(createConstantMergePass()); // Merge dup global constants
+}
+
+void gallivm_prog_delete(struct gallivm_prog *prog)
+{
+ delete prog->module;
+ prog->module = 0;
+ prog->function = 0;
+ free(prog);
+}
+
+static inline void
+constant_interpolation(float (*inputs)[16][4],
+ const struct tgsi_interp_coef *coefs,
+ unsigned attrib,
+ unsigned chan)
+{
+ unsigned i;
+
+ for (i = 0; i < QUAD_SIZE; ++i) {
+ inputs[i][attrib][chan] = coefs[attrib].a0[chan];
+ }
+}
+
+static inline void
+linear_interpolation(float (*inputs)[16][4],
+ const struct tgsi_interp_coef *coefs,
+ unsigned attrib,
+ unsigned chan)
+{
+ unsigned i;
+
+ for( i = 0; i < QUAD_SIZE; i++ ) {
+ const float x = inputs[i][0][0];
+ const float y = inputs[i][0][1];
+
+ inputs[i][attrib][chan] =
+ coefs[attrib].a0[chan] +
+ coefs[attrib].dadx[chan] * x +
+ coefs[attrib].dady[chan] * y;
+ }
+}
+
+static inline void
+perspective_interpolation(float (*inputs)[16][4],
+ const struct tgsi_interp_coef *coefs,
+ unsigned attrib,
+ unsigned chan )
+{
+ unsigned i;
+
+ for( i = 0; i < QUAD_SIZE; i++ ) {
+ const float x = inputs[i][0][0];
+ const float y = inputs[i][0][1];
+ /* WPOS.w here is really 1/w */
+ const float w = 1.0f / inputs[i][0][3];
+ assert(inputs[i][0][3] != 0.0);
+
+ inputs[i][attrib][chan] =
+ (coefs[attrib].a0[chan] +
+ coefs[attrib].dadx[chan] * x +
+ coefs[attrib].dady[chan] * y) * w;
+ }
+}
+
+void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix)
+{
+ if (!ir || !ir->module)
+ return;
+
+ if (file_prefix) {
+ std::ostringstream stream;
+ stream << file_prefix;
+ stream << ir->id;
+ stream << ".ll";
+ std::string name = stream.str();
+ std::ofstream out(name.c_str());
+ if (!out) {
+ std::cerr<<"Can't open file : "<<stream.str()<<std::endl;;
+ return;
+ }
+ out << (*ir->module);
+ out.close();
+ } else {
+ const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList();
+ llvm::Module::FunctionListType::const_iterator itr;
+ std::cout<<"; ---------- Start shader "<<ir->id<<std::endl;
+ for (itr = funcs.begin(); itr != funcs.end(); ++itr) {
+ const llvm::Function &func = (*itr);
+ std::string name = func.getName();
+ const llvm::Function *found = 0;
+ if (name.find("vs_shader") != std::string::npos ||
+ name.find("fs_shader") != std::string::npos ||
+ name.find("function") != std::string::npos)
+ found = &func;
+ if (found) {
+ std::cout<<*found<<std::endl;
+ }
+ }
+ std::cout<<"; ---------- End shader "<<ir->id<<std::endl;
+ }
+}
+
+
+void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
+ float (*inputs)[16][4],
+ const struct tgsi_interp_coef *coef)
+{
+ for (int i = 0; i < prog->num_interp; ++i) {
+ const gallivm_interpolate &interp = prog->interpolators[i];
+ switch (interp.type) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ constant_interpolation(inputs, coef, interp.attrib, interp.chan);
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ linear_interpolation(inputs, coef, interp.attrib, interp.chan);
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ perspective_interpolation(inputs, coef, interp.attrib, interp.chan);
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+}
+
+
+struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type)
+{
+ struct gallivm_ir *ir =
+ (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir));
+ ++GLOBAL_ID;
+ ir->id = GLOBAL_ID;
+ ir->type = type;
+
+ return ir;
+}
+
+void gallivm_ir_set_layout(struct gallivm_ir *ir,
+ enum gallivm_vector_layout layout)
+{
+ ir->layout = layout;
+}
+
+void gallivm_ir_set_components(struct gallivm_ir *ir, int num)
+{
+ ir->num_components = num;
+}
+
+void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens)
+{
+ std::cout << "Creating llvm from: " <<std::endl;
+ tgsi_dump(tokens, 0);
+
+ llvm::Module *mod = tgsi_to_llvmir(ir, tokens);
+ ir->module = mod;
+ gallivm_ir_dump(ir, 0);
+}
+
+void gallivm_ir_delete(struct gallivm_ir *ir)
+{
+ delete ir->module;
+ free(ir);
+}
+
+struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir)
+{
+ struct gallivm_prog *prog =
+ (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog));
+
+ std::cout << "Before optimizations:"<<std::endl;
+ ir->module->dump();
+ std::cout<<"-------------------------------"<<std::endl;
+
+ PassManager veri;
+ veri.add(createVerifierPass());
+ veri.run(*ir->module);
+ llvm::Module *mod = llvm::CloneModule(ir->module);
+ prog->num_consts = ir->num_consts;
+ memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators));
+ prog->num_interp = ir->num_interp;
+
+ /* Run optimization passes over it */
+ PassManager passes;
+ passes.add(new TargetData(mod));
+ AddStandardCompilePasses(passes);
+ passes.run(*mod);
+ prog->module = mod;
+
+ std::cout << "After optimizations:"<<std::endl;
+ mod->dump();
+
+ return prog;
+}
+
+#endif /* MESA_LLVM */
diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h
new file mode 100644
index 00000000000..36a64a77471
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/gallivm.h
@@ -0,0 +1,118 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin [email protected]
+ */
+
+#ifndef GALLIVM_H
+#define GALLIVM_H
+
+/*
+ LLVM representation consists of two stages - layout independent
+ intermediate representation gallivm_ir and driver specific
+ gallivm_prog. TGSI is first being translated into gallivm_ir
+ after that driver can set number of options on gallivm_ir and
+ have it compiled into gallivm_prog. gallivm_prog can be either
+ executed (assuming there's LLVM JIT backend for the current
+ target) or machine code generation can be done (assuming there's
+ a LLVM code generator for thecurrent target)
+ */
+#if defined __cplusplus
+extern "C" {
+#endif
+
+#include "pipe/p_state.h"
+
+#ifdef MESA_LLVM
+
+struct tgsi_token;
+
+struct gallivm_ir;
+struct gallivm_prog;
+struct gallivm_cpu_engine;
+struct tgsi_interp_coef;
+struct tgsi_sampler;
+struct tgsi_exec_vector;
+
+enum gallivm_shader_type {
+ GALLIVM_VS,
+ GALLIVM_FS
+};
+
+enum gallivm_vector_layout {
+ GALLIVM_AOS,
+ GALLIVM_SOA
+};
+
+struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type);
+void gallivm_ir_set_layout(struct gallivm_ir *ir,
+ enum gallivm_vector_layout layout);
+void gallivm_ir_set_components(struct gallivm_ir *ir, int num);
+void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens);
+void gallivm_ir_delete(struct gallivm_ir *ir);
+
+
+struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir);
+
+void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
+ float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
+ const struct tgsi_interp_coef *coefs);
+void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix);
+
+
+struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog);
+struct gallivm_cpu_engine *gallivm_global_cpu_engine();
+int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
+ struct tgsi_exec_machine *machine,
+ const float (*input)[4],
+ unsigned num_inputs,
+ float (*output)[4],
+ unsigned num_outputs,
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride);
+int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
+ float x, float y,
+ float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
+ float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
+ float (*consts)[4],
+ struct tgsi_sampler *samplers);
+void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog);
+void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee);
+
+
+#endif /* MESA_LLVM */
+
+#if defined __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
new file mode 100644
index 00000000000..634bac01507
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp
@@ -0,0 +1,140 @@
+static const unsigned char llvm_builtins_data[] = {
+0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x27,0x02,0x00,0x00,0x01,0x10,0x00,0x00,
+0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39,
+0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02,
+0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24,
+0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64,
+0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x03,0x00,0x00,0x00,0x0b,0x84,0xff,0xff,
+0xff,0xff,0x1f,0xc0,0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00,
+0x32,0x22,0x48,0x09,0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45,
+0xc6,0x05,0x42,0x52,0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83,
+0x29,0x80,0x21,0x00,0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47,
+0x80,0x50,0x2b,0x03,0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40,
+0x14,0x01,0x80,0x11,0x80,0x22,0x88,0x00,0x13,0x30,0x7c,0xc0,0x03,0x3b,0xf8,0x05,
+0x3b,0xa0,0x83,0x36,0xa8,0x07,0x77,0x58,0x07,0x77,0x78,0x87,0x7b,0x70,0x87,0x36,
+0x60,0x87,0x74,0x70,0x87,0x7a,0xc0,0x87,0x36,0x38,0x07,0x77,0xa8,0x87,0x0d,0xf7,
+0x50,0x0e,0x6d,0x00,0x0f,0x7a,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60,
+0x07,0x74,0xd0,0x06,0xe9,0x10,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e,
+0x78,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06,0xe9,0x10,0x07,0x76,0xa0,0x07,0x71,
+0x60,0x07,0x7a,0x10,0x07,0x76,0xd0,0x06,0xe9,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,
+0x07,0x7a,0x30,0x07,0x72,0xd0,0x06,0xe9,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,
+0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x7a,
+0x30,0x07,0x72,0xd0,0x06,0xe6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60,
+0x07,0x74,0xd0,0x06,0xf6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60,0x07,
+0x74,0xd0,0x06,0xf6,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a,
+0x10,0x07,0x72,0x80,0x07,0x6d,0x10,0x0e,0x70,0xa0,0x07,0x70,0xa0,0x07,0x76,0x40,
+0x07,0x6d,0x60,0x0e,0x78,0x00,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07,
+0x72,0x80,0x07,0x3a,0x0f,0x84,0x48,0x20,0x23,0x24,0x40,0x00,0x62,0x67,0x88,0x9f,
+0x19,0x92,0x24,0x00,0x10,0x04,0x00,0x00,0x00,0x43,0x92,0x04,0x08,0x00,0x00,0x00,
+0x00,0x60,0x48,0xa2,0x00,0x40,0x10,0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x02,
+0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x41,0x00,0x00,0x00,0x30,0x24,0x61,0x80,0x00,
+0x00,0x00,0x00,0x00,0x86,0x24,0x07,0x10,0x00,0x00,0x00,0x00,0xc0,0x90,0x44,0x01,
+0x80,0x20,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12,
+0x05,0x00,0x82,0x00,0x00,0x00,0x60,0x48,0x52,0x00,0x40,0x10,0x00,0x00,0x00,0x64,
+0x81,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90,
+0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02,
+0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66,
+0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95,
+0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00,
+0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00,
+0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60,
+0x04,0x00,0x00,0x00,0xc3,0x0d,0xce,0x43,0x4c,0x37,0x3c,0x8e,0x34,0xdc,0x30,0x41,
+0xc2,0x74,0x03,0x34,0x51,0xc3,0x0d,0x4d,0x44,0x4c,0x37,0x44,0x8d,0x35,0x56,0x01,
+0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0xd6,0x10,0x00,0xe6,
+0x10,0x04,0x76,0x81,0x00,0x3e,0x30,0x0c,0x91,0x4f,0x1b,0x05,0x21,0x30,0x8f,0x6d,
+0x13,0x48,0xe0,0x03,0xc3,0x10,0xf9,0xb4,0x55,0x20,0x81,0x0f,0x0c,0x43,0xe4,0xd7,
+0x66,0x41,0x08,0xcc,0xa3,0x1f,0x40,0x41,0x34,0x53,0x84,0x99,0xc4,0x20,0x30,0x8f,
+0x61,0x10,0x02,0xb0,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,
+0x27,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00,
+0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84,
+0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00,
+0x63,0x08,0x0d,0x34,0xdc,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15,
+0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19,
+0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x27,0x50,0x20,0x05,
+0xd1,0x0c,0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d,
+0x14,0x13,0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4,
+0xba,0x35,0x0c,0x13,0xf3,0xd8,0x05,0x31,0x31,0x8f,0x6e,0x1c,0x84,0x00,0x2c,0xcb,
+0x01,0x14,0x44,0x33,0x45,0x98,0x61,0x0c,0x02,0xf3,0x00,0x00,0x00,0x00,0x00,0x00,
+0x61,0x20,0x00,0x00,0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00,
+0x04,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91,
+0x11,0x00,0x00,0x00,0x63,0x08,0x4d,0x64,0x16,0xc1,0xe1,0x86,0xab,0x22,0x66,0x19,
+0x02,0x01,0x1b,0x43,0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81,
+0x0a,0x20,0x0b,0x34,0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x70,
+0x83,0x57,0x11,0xb3,0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0,
+0x48,0xb3,0x04,0xc6,0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48,
+0x63,0x08,0xcd,0x64,0x64,0x40,0x70,0xb8,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc,
+0x60,0x0c,0xc1,0x99,0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80,
+0x33,0x38,0xd0,0x00,0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80,
+0xe0,0x70,0x03,0x1b,0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12,
+0x30,0x63,0x08,0x0f,0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7,
+0xa0,0x06,0x70,0x00,0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,
+0x76,0x52,0x4c,0xcc,0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46,
+0xc6,0x50,0x8a,0x89,0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89,
+0x79,0x68,0x73,0x20,0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62,
+0x9e,0xdb,0x32,0x88,0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98,
+0xa7,0xb7,0x95,0x62,0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a,
+0x34,0x35,0x56,0x62,0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25,
+0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7,
+0x24,0x01,0x63,0xec,0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9,
+0xfc,0xc4,0xd0,0x90,0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6,
+0xc1,0x71,0x7b,0x29,0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01,
+0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08,
+0x4e,0x33,0x58,0x47,0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e,
+0x33,0xe1,0xbc,0xa5,0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e,
+0x3a,0x40,0xc6,0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee,
+0x6f,0x20,0x11,0x9c,0x66,0xf0,0x05,0xa7,0x99,0xec,0x82,0x10,0x9c,0xa6,0x32,0x93,
+0x42,0x60,0x1e,0x7b,0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a,
+0xa7,0x6d,0xa4,0x98,0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10,
+0x9c,0x66,0xc0,0x7b,0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06,
+0x8b,0xe0,0x34,0x83,0x2f,0x38,0xcd,0x64,0xd3,0x07,0x50,0x10,0xcd,0x14,0x61,0xe6,
+0x61,0x08,0x4e,0x53,0xd5,0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,
+0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00,
+0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10,
+0x1e,0xe1,0x19,0xc6,0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63,
+0x15,0xc1,0x31,0x84,0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1,
+0x71,0x6c,0x23,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48,
+0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58,
+0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7,
+0x30,0x06,0x64,0xe0,0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82,
+0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00,
+0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb,
+0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6,
+0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5,
+0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18,
+0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f,
+0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59,
+0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8,
+0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86,
+0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72,
+0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,
+0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,
+0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33,
+0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3,
+0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x26,0x41,0x08,0xc0,
+0xb2,0x18,0x45,0x21,0x00,0xcb,0xb2,0x5b,0x04,0x31,0x31,0x8f,0x6d,0x13,0xc4,0xc4,
+0x3c,0xb9,0x35,0x0c,0x15,0xb0,0x58,0x05,0x31,0x31,0x4f,0x7f,0x00,0x05,0xd1,0x4c,
+0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00,
+0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04,
+0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca,
+0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c,
+0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x76,0x51,0x4c,0xcc,
+0x53,0xdb,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31,
+0x31,0x8f,0x6e,0x16,0xc4,0xc4,0x3c,0xbd,0x51,0x10,0x02,0xb0,0x2c,0xd6,0x30,0x54,
+0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,
+0x2c,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,
+0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34,
+0xdc,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xdc,0x50,0x49,0xc4,
+0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xdc,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60,
+0x63,0x08,0x8d,0x33,0xdc,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00,
+0x17,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb,
+0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x76,
+0x41,0x4c,0xcc,0xb3,0x1f,0x81,0x11,0x11,0x13,0x15,0x35,0x37,0x90,0x2c,0x4e,0xf4,
+0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44,
+0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0,
+0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03,
+0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82,
+0x23,0x19,0xc3,0xa0,0x20,0x8b,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3,
+0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c,
+0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84,
+0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84,
+0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
new file mode 100644
index 00000000000..1bd00a0c2a6
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
@@ -0,0 +1,243 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin [email protected]
+ */
+#ifdef MESA_LLVM
+
+#include "gallivm.h"
+#include "gallivm_p.h"
+
+#include "instructions.h"
+#include "loweringpass.h"
+#include "storage.h"
+#include "tgsitollvm.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include <llvm/Module.h>
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/ModuleProvider.h>
+#include <llvm/Pass.h>
+#include <llvm/PassManager.h>
+#include <llvm/Attributes.h>
+#include <llvm/Support/PatternMatch.h>
+#include <llvm/ExecutionEngine/JIT.h>
+#include <llvm/ExecutionEngine/Interpreter.h>
+#include <llvm/ExecutionEngine/GenericValue.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/LinkAllPasses.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Analysis/LoopPass.h>
+#include <llvm/Target/TargetData.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+struct gallivm_cpu_engine {
+ llvm::ExecutionEngine *engine;
+};
+
+static struct gallivm_cpu_engine *CPU = 0;
+
+typedef int (*fragment_shader_runner)(float x, float y,
+ float (*dests)[16][4],
+ float (*inputs)[16][4],
+ int num_attribs,
+ float (*consts)[4], int num_consts,
+ struct tgsi_sampler *samplers);
+
+int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
+ float fx, float fy,
+ float (*dests)[16][4],
+ float (*inputs)[16][4],
+ float (*consts)[4],
+ struct tgsi_sampler *samplers)
+{
+ fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function);
+ assert(runner);
+
+ return runner(fx, fy, dests, inputs, prog->num_interp,
+ consts, prog->num_consts,
+ samplers);
+}
+
+static inline llvm::Function *func_for_shader(struct gallivm_prog *prog)
+{
+ llvm::Module *mod = prog->module;
+ llvm::Function *func = 0;
+
+ switch (prog->type) {
+ case GALLIVM_VS:
+ func = mod->getFunction("vs_shader");
+ break;
+ case GALLIVM_FS:
+ func = mod->getFunction("fs_shader");
+ break;
+ default:
+ assert(!"Unknown shader type!");
+ break;
+ }
+ return func;
+}
+
+/*!
+ This function creates a CPU based execution engine for the given gallivm_prog.
+ gallivm_cpu_engine should be used as a singleton throughout the library. Before
+ executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile.
+ The gallivm_prog instance which is being passed to the constructor is being
+ automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile
+ with it again.
+ */
+struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog)
+{
+ struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *)
+ calloc(1, sizeof(struct gallivm_cpu_engine));
+ llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
+ llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
+ llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false);
+ ee->DisableLazyCompilation();
+ cpu->engine = ee;
+
+ llvm::Function *func = func_for_shader(prog);
+
+ prog->function = ee->getPointerToFunction(func);
+ CPU = cpu;
+ return cpu;
+}
+
+
+/*!
+ This function JIT compiles the given gallivm_prog with the given cpu based execution engine.
+ The reference to the generated machine code entry point will be stored
+ in the gallivm_prog program. After executing this function one can call gallivm_prog_exec
+ in order to execute the gallivm_prog on the CPU.
+ */
+void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog)
+{
+ llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
+ llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
+ llvm::ExecutionEngine *ee = cpu->engine;
+ assert(ee);
+ /*FIXME : why was this disabled ? we need it for pow/sqrt/... */
+ ee->DisableLazyCompilation(false);
+ ee->addModuleProvider(mp);
+
+ llvm::Function *func = func_for_shader(prog);
+ prog->function = ee->getPointerToFunction(func);
+}
+
+void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu)
+{
+ free(cpu);
+}
+
+struct gallivm_cpu_engine * gallivm_global_cpu_engine()
+{
+ return CPU;
+}
+
+
+typedef void (*vertex_shader_runner)(void *ainputs,
+ void *dests,
+ float (*aconsts)[4]);
+
+#define MAX_TGSI_VERTICES 4
+/*!
+ This function is used to execute the gallivm_prog in software. Before calling
+ this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile
+ function.
+ */
+int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
+ struct tgsi_exec_machine *machine,
+ const float (*input)[4],
+ unsigned num_inputs,
+ float (*output)[4],
+ unsigned num_outputs,
+ const float (*constants)[4],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ unsigned int i, j;
+ unsigned slot;
+ vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
+ assert(runner);
+
+ for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
+ unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
+
+ /* Swizzle inputs.
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < num_inputs; slot++) {
+ machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
+ machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
+ machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
+ machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
+ }
+
+ input = (const float (*)[4])((const char *)input + input_stride);
+ }
+
+ /* run shader */
+ runner(machine->Inputs,
+ machine->Outputs,
+ (float (*)[4]) constants);
+
+ /* Unswizzle all output results
+ */
+ for (j = 0; j < max_vertices; j++) {
+ for (slot = 0; slot < num_outputs; slot++) {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
+ output = (float (*)[4])((char *)output + output_stride);
+ }
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h
new file mode 100644
index 00000000000..d2c5852bdf7
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/gallivm_p.h
@@ -0,0 +1,110 @@
+#ifndef GALLIVM_P_H
+#define GALLIVM_P_H
+
+#ifdef MESA_LLVM
+
+#include "gallivm.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_compiler.h"
+
+namespace llvm {
+ class Module;
+}
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+enum gallivm_shader_type;
+enum gallivm_vector_layout;
+
+struct gallivm_interpolate {
+ int attrib;
+ int chan;
+ int type;
+};
+
+struct gallivm_ir {
+ llvm::Module *module;
+ int id;
+ enum gallivm_shader_type type;
+ enum gallivm_vector_layout layout;
+ int num_components;
+ int num_consts;
+
+ /* FIXME: this might not be enough for some shaders */
+ struct gallivm_interpolate interpolators[32*4];
+ int num_interp;
+};
+
+struct gallivm_prog {
+ llvm::Module *module;
+ void *function;
+
+ int id;
+ enum gallivm_shader_type type;
+
+ int num_consts;
+
+ /* FIXME: this might not be enough for some shaders */
+ struct gallivm_interpolate interpolators[32*4];
+ int num_interp;
+};
+
+static INLINE void gallivm_swizzle_components(int swizzle,
+ int *xc, int *yc,
+ int *zc, int *wc)
+{
+ int x = swizzle / 1000; swizzle -= x * 1000;
+ int y = swizzle / 100; swizzle -= y * 100;
+ int z = swizzle / 10; swizzle -= z * 10;
+ int w = swizzle;
+
+ if (xc) *xc = x;
+ if (yc) *yc = y;
+ if (zc) *zc = z;
+ if (wc) *wc = w;
+}
+
+static INLINE boolean gallivm_is_swizzle(int swizzle)
+{
+ const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 +
+ TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W;
+ return swizzle != NO_SWIZZLE;
+}
+
+static INLINE int gallivm_x_swizzle(int swizzle)
+{
+ int x;
+ gallivm_swizzle_components(swizzle, &x, 0, 0, 0);
+ return x;
+}
+
+static INLINE int gallivm_y_swizzle(int swizzle)
+{
+ int y;
+ gallivm_swizzle_components(swizzle, 0, &y, 0, 0);
+ return y;
+}
+
+static INLINE int gallivm_z_swizzle(int swizzle)
+{
+ int z;
+ gallivm_swizzle_components(swizzle, 0, 0, &z, 0);
+ return z;
+}
+
+static INLINE int gallivm_w_swizzle(int swizzle)
+{
+ int w;
+ gallivm_swizzle_components(swizzle, 0, 0, 0, &w);
+ return w;
+}
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* MESA_LLVM */
+
+#endif
diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp
new file mode 100644
index 00000000000..ee8162efce5
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/instructions.cpp
@@ -0,0 +1,1193 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin [email protected]
+ */
+#ifdef MESA_LLVM
+
+#include "instructions.h"
+
+#include "storage.h"
+
+#include "util/u_memory.h"
+
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Function.h>
+#include <llvm/InstrTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/Attributes.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+using namespace llvm;
+
+#include "gallivm_builtins.cpp"
+
+#if 0
+llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals)
+{
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vectorArray = ArrayType::get(vectorType, 4);
+}
+#endif
+
+static inline std::string createFuncName(int label)
+{
+ std::ostringstream stream;
+ stream << "function";
+ stream << label;
+ return stream.str();
+}
+
+Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block,
+ Storage *storage)
+ : m_mod(mod), m_func(func), m_builder(block), m_idx(0),
+ m_storage(storage)
+{
+ m_floatVecType = VectorType::get(Type::FloatTy, 4);
+
+ m_llvmFSqrt = 0;
+ m_llvmFAbs = 0;
+ m_llvmPow = 0;
+ m_llvmFloor = 0;
+ m_llvmFlog = 0;
+ m_llvmFexp = 0;
+ m_llvmLit = 0;
+ m_fmtPtr = 0;
+
+ MemoryBuffer *buffer = MemoryBuffer::getMemBuffer(
+ (const char*)&llvm_builtins_data[0],
+ (const char*)&llvm_builtins_data[Elements(llvm_builtins_data)-1]);
+ m_mod = ParseBitcodeFile(buffer);
+}
+
+llvm::BasicBlock * Instructions::currentBlock() const
+{
+ return m_builder.GetInsertBlock();
+}
+
+llvm::Value * Instructions::abs(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ Value *xabs = callFAbs(vec[0]);
+ Value *yabs = callFAbs(vec[1]);
+ Value *zabs = callFAbs(vec[2]);
+ Value *wabs = callFAbs(vec[3]);
+ return vectorFromVals(xabs, yabs, zabs, wabs);
+}
+
+llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2)
+{
+ return m_builder.CreateAdd(in1, in2, name("add"));
+}
+
+llvm::Value * Instructions::arl(llvm::Value *in)
+{
+ return floor(in);
+}
+
+void Instructions::beginLoop()
+{
+ BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0);
+ BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0);
+
+ m_builder.CreateBr(begin);
+ Loop loop;
+ loop.begin = begin;
+ loop.end = end;
+ m_builder.SetInsertPoint(begin);
+ m_loopStack.push(loop);
+}
+
+void Instructions::bgnSub(unsigned label)
+{
+ llvm::Function *func = findFunction(label);
+
+ Function::arg_iterator args = func->arg_begin();
+ Value *ptr_INPUT = args++;
+ ptr_INPUT->setName("INPUT");
+ m_storage->pushArguments(ptr_INPUT);
+
+ llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0);
+
+ m_func = func;
+ m_builder.SetInsertPoint(entry);
+}
+
+void Instructions::brk()
+{
+ assert(!m_loopStack.empty());
+ BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0);
+ m_builder.CreateBr(m_loopStack.top().end);
+ m_builder.SetInsertPoint(unr);
+}
+
+void Instructions::cal(int label, llvm::Value *input)
+{
+ std::vector<Value*> params;
+ params.push_back(input);
+ llvm::Function *func = findFunction(label);
+
+ m_builder.CreateCall(func, params.begin(), params.end());
+}
+
+llvm::Value * Instructions::ceil(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ return vectorFromVals(callCeil(vec[0]), callCeil(vec[1]),
+ callCeil(vec[2]), callCeil(vec[3]));
+}
+
+llvm::Value * Instructions::clamp(llvm::Value *in1)
+{
+ llvm::Value *zero = constVector(0.0f, 0.0f, 0.0f, 0.0f);
+ llvm::Value *one = constVector(1.0f, 1.0f, 1.0f, 1.0f);
+ return min( max(zero, in1), one);
+}
+
+llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3)
+{
+ llvm::Function *func = m_mod->getFunction("cmp");
+ assert(func);
+
+ std::vector<Value*> params;
+ params.push_back(in1);
+ params.push_back(in2);
+ params.push_back(in3);
+ CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres"));
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3)
+{
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+ std::vector<llvm::Value*> vec3 = extractVector(in3);
+ Constant *half = ConstantFP::get(APFloat(0.5f));
+
+ Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], half, name("xcmp"));
+ Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0],
+ name("selx"));
+
+ Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], half, name("ycmp"));
+ Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1],
+ name("sely"));
+
+ Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], half, name("zcmp"));
+ Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2],
+ name("selz"));
+
+ Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], half, name("wcmp"));
+ Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3],
+ name("selw"));
+
+ return vectorFromVals(selx, sely, selz, selw);
+}
+
+llvm::Value * Instructions::cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3)
+{
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+ std::vector<llvm::Value*> vec3 = extractVector(in3);
+ Constant *zero = Constant::getNullValue(Type::FloatTy);
+
+ Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], zero, name("xcmp"));
+ Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0],
+ name("selx"));
+
+ Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], zero, name("ycmp"));
+ Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1],
+ name("sely"));
+
+ Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], zero, name("zcmp"));
+ Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2],
+ name("selz"));
+
+ Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], zero, name("wcmp"));
+ Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3],
+ name("selw"));
+
+ return vectorFromVals(selx, sely, selz, selw);
+}
+
+llvm::Value * Instructions::cos(llvm::Value *in)
+{
+#if 0
+ llvm::Function *func = m_mod->getFunction("vcos");
+ assert(func);
+
+ CallInst *call = m_builder.CreateCall(func, in, name("cosres"));
+ call->setTailCall(false);
+ return call;
+#else
+ std::vector<llvm::Value*> elems = extractVector(in);
+ Function *func = m_mod->getFunction("cosf");
+ assert(func);
+ CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres"));
+ cos->setCallingConv(CallingConv::C);
+ cos->setTailCall(true);
+ return vectorFromVals(cos, cos, cos, cos);
+#endif
+}
+
+llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *x1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(0),
+ name("x1"));
+ Value *y1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(1),
+ name("y1"));
+ Value *z1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(2),
+ name("z1"));
+
+ Value *x2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(0),
+ name("x2"));
+ Value *y2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(1),
+ name("y2"));
+ Value *z2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(2),
+ name("z2"));
+ Value *y1z2 = mul(y1, z2);
+ Value *z1y2 = mul(z1, y2);
+
+ Value *z1x2 = mul(z1, x2);
+ Value *x1z2 = mul(x1, z2);
+
+ Value *x1y2 = mul(x1, y2);
+ Value *y1x2 = mul(y1, x2);
+
+ return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2));
+}
+
+llvm::Value * Instructions::ddx(llvm::Value *in)
+{
+ // FIXME
+ assert(0);
+}
+
+llvm::Value * Instructions::ddy(llvm::Value *in)
+{
+ // FIXME
+ assert(0);
+}
+
+llvm::Value * Instructions::div(llvm::Value *in1, llvm::Value *in2)
+{
+ return m_builder.CreateFDiv(in1, in2, name("div"));
+}
+
+llvm::Value * Instructions::dot2add(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3)
+{
+ Value *mulRes = mul(in1, in2);
+ Value *x = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(0),
+ name("extractx"));
+ Value *y = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(1),
+ name("extracty"));
+ Value *z = m_builder.CreateExtractElement(in3,
+ m_storage->constantInt(2),
+ name("extractz"));
+ Value *xy = m_builder.CreateAdd(x, y,name("xy"));
+ Value *dot2add = m_builder.CreateAdd(xy, z, name("dot2add"));
+ return vectorFromVals(dot2add, dot2add, dot2add, dot2add);
+}
+
+llvm::Value * Instructions::dp2(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *mulRes = mul(in1, in2);
+ Value *x = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(0),
+ name("extractx"));
+ Value *y = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(1),
+ name("extracty"));
+ Value *xy = m_builder.CreateAdd(x, y,name("xy"));
+ return vectorFromVals(xy, xy, xy, xy);
+}
+
+llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *mulRes = mul(in1, in2);
+ Value *x = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(0),
+ name("extractx"));
+ Value *y = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(1),
+ name("extracty"));
+ Value *z = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(2),
+ name("extractz"));
+ Value *xy = m_builder.CreateAdd(x, y,name("xy"));
+ Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3"));
+ return vectorFromVals(dot3, dot3, dot3, dot3);
+}
+
+llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *mulRes = mul(in1, in2);
+ std::vector<llvm::Value*> vec = extractVector(mulRes);
+ Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy"));
+ Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz"));
+ Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4"));
+ return vectorFromVals(dot4, dot4, dot4, dot4);
+}
+
+llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *mulRes = mul(in1, in2);
+ std::vector<llvm::Value*> vec1 = extractVector(mulRes);
+ Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy"));
+ Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz"));
+ Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph"));
+ return vectorFromVals(dph, dph, dph, dph);
+}
+
+llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *y1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(1),
+ name("y1"));
+ Value *z = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(2),
+ name("z"));
+ Value *y2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(1),
+ name("y2"));
+ Value *w = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(3),
+ name("w"));
+ Value *ry = m_builder.CreateMul(y1, y2, name("tyuy"));
+ return vectorFromVals(ConstantFP::get(APFloat(1.f)),
+ ry, z, w);
+}
+
+void Instructions::elseop()
+{
+ assert(!m_ifStack.empty());
+ BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0);
+ m_builder.CreateBr(ifend);
+ m_builder.SetInsertPoint(m_ifStack.top());
+ currentBlock()->setName(name("ifelse"));
+ m_ifStack.pop();
+ m_ifStack.push(ifend);
+}
+
+void Instructions::endif()
+{
+ assert(!m_ifStack.empty());
+ m_builder.CreateBr(m_ifStack.top());
+ m_builder.SetInsertPoint(m_ifStack.top());
+ m_ifStack.pop();
+}
+
+void Instructions::endLoop()
+{
+ assert(!m_loopStack.empty());
+ Loop loop = m_loopStack.top();
+ m_builder.CreateBr(loop.begin);
+ loop.end->moveAfter(currentBlock());
+ m_builder.SetInsertPoint(loop.end);
+ m_loopStack.pop();
+}
+
+void Instructions::end()
+{
+ m_builder.CreateRetVoid();
+}
+
+void Instructions::endSub()
+{
+ m_func = 0;
+ m_builder.SetInsertPoint(0);
+}
+
+llvm::Value * Instructions::exp(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ return vectorFromVals(callFExp(vec[0]), callFExp(vec[1]),
+ callFExp(vec[2]), callFExp(vec[3]));
+}
+
+llvm::Value * Instructions::ex2(llvm::Value *in)
+{
+ llvm::Value *val = callPow(ConstantFP::get(APFloat(2.f)),
+ m_builder.CreateExtractElement(
+ in, m_storage->constantInt(0),
+ name("x1")));
+ return vectorFromVals(val, val, val, val);
+}
+
+llvm::Value * Instructions::floor(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]),
+ callFloor(vec[2]), callFloor(vec[3]));
+}
+
+llvm::Value * Instructions::frc(llvm::Value *in)
+{
+ llvm::Value *flr = floor(in);
+ return sub(in, flr);
+}
+
+void Instructions::ifop(llvm::Value *in)
+{
+ BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0);
+ BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0);
+
+ //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0);
+ //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0);
+ //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0);
+
+ Constant *float0 = Constant::getNullValue(Type::FloatTy);
+
+ Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0),
+ name("extractx"));
+ Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp"));
+ m_builder.CreateCondBr(xcmp, ifthen, ifend);
+ //m_builder.SetInsertPoint(yblock);
+
+ m_builder.SetInsertPoint(ifthen);
+ m_ifStack.push(ifend);
+}
+
+llvm::Value * Instructions::kil(llvm::Value *in)
+{
+ llvm::Function *func = m_mod->getFunction("kil");
+ assert(func);
+
+ CallInst *call = m_builder.CreateCall(func, in, name("kilpres"));
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2,
+ llvm::Value *in3)
+{
+ llvm::Value *m = mul(in1, in2);
+ llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f);
+ llvm::Value *s = sub(vec1, in1);
+ return add(m, mul(s, in3));
+}
+
+llvm::Value * Instructions::lg2(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ llvm::Value *const_vec = constVector(1.442695f, 1.442695f,
+ 1.442695f, 1.442695f);
+ return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]),
+ callFLog(vec[2]), callFLog(vec[3])), const_vec);
+}
+
+llvm::Value * Instructions::lit(llvm::Value *in)
+{
+ if (!m_llvmLit) {
+ m_llvmLit = m_mod->getFunction("lit");
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::log(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ return vectorFromVals(callFLog(vec[0]), callFLog(vec[1]),
+ callFLog(vec[2]), callFLog(vec[3]));
+}
+
+llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2,
+ llvm::Value *in3)
+{
+ Value *mulRes = mul(in1, in2);
+ return add(mulRes, in3);
+}
+
+llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2)
+{
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0],
+ name("xcmp"));
+ Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0],
+ name("selx"));
+
+ Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1],
+ name("ycmp"));
+ Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1],
+ name("sely"));
+
+ Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2],
+ name("zcmp"));
+ Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2],
+ name("selz"));
+
+ Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3],
+ name("wcmp"));
+ Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3],
+ name("selw"));
+
+ return vectorFromVals(selx, sely, selz, selw);
+}
+
+llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2)
+{
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp"));
+ Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0],
+ name("selx"));
+
+ Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp"));
+ Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1],
+ name("sely"));
+
+ Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp"));
+ Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2],
+ name("selz"));
+
+ Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp"));
+ Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3],
+ name("selw"));
+
+ return vectorFromVals(selx, sely, selz, selw);
+}
+
+llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2)
+{
+ return m_builder.CreateMul(in1, in2, name("mul"));
+}
+
+llvm::Value * Instructions::neg(llvm::Value *in)
+{
+ Value *neg = m_builder.CreateNeg(in, name("neg"));
+ return neg;
+}
+
+llvm::Value * Instructions::nrm(llvm::Value *in)
+{
+ llvm::Value *v = rsq(in);
+ return mul(v, in);
+}
+
+llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *x1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(0),
+ name("x1"));
+ Value *x2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(0),
+ name("x2"));
+ llvm::Value *val = callPow(x1, x2);
+ return vectorFromVals(val, val, val, val);
+}
+
+llvm::Value * Instructions::rcp(llvm::Value *in1)
+{
+ Value *x1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(0),
+ name("x1"));
+ Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)),
+ x1, name("rcp"));
+ return vectorFromVals(res, res, res, res);
+}
+
+llvm::Value * Instructions::rsq(llvm::Value *in1)
+{
+ Value *x = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(0),
+ name("extractx"));
+ Value *abs = callFAbs(x);
+ Value *sqrt = callFSqrt(abs);
+
+ Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)),
+ sqrt,
+ name("rsqrt"));
+ return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt);
+}
+
+llvm::Value * Instructions::scs(llvm::Value *in)
+{
+ llvm::Function *func = m_mod->getFunction("scs");
+ assert(func);
+
+ CallInst *call = m_builder.CreateCall(func, in, name("scsres"));
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::seq(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOEQ(vec1[0], vec2[0], name("xcmp"));
+ Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
+
+ Value *ycmp = m_builder.CreateFCmpOEQ(vec1[1], vec2[1], name("ycmp"));
+ Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
+
+ Value *zcmp = m_builder.CreateFCmpOEQ(vec1[2], vec2[2], name("zcmp"));
+ Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
+
+ Value *wcmp = m_builder.CreateFCmpOEQ(vec1[3], vec2[3], name("wcmp"));
+ Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
+
+ return vectorFromVals(x, y, z, w);
+}
+
+llvm::Value * Instructions::sfl(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ return vectorFromVals(const0f, const0f, const0f, const0f);
+}
+
+llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp"));
+ Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
+
+ Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp"));
+ Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
+
+ Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp"));
+ Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
+
+ Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp"));
+ Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
+
+ return vectorFromVals(x, y, z, w);
+}
+
+llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+ Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp"));
+ Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
+
+ Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp"));
+ Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
+
+ Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp"));
+ Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
+
+ Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp"));
+ Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
+
+ return vectorFromVals(x, y, z, w);
+}
+
+llvm::Value * Instructions::sin(llvm::Value *in)
+{
+ llvm::Function *func = m_mod->getFunction("vsin");
+ assert(func);
+
+ CallInst *call = m_builder.CreateCall(func, in, name("sinres"));
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::sle(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOLE(vec1[0], vec2[0], name("xcmp"));
+ Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
+
+ Value *ycmp = m_builder.CreateFCmpOLE(vec1[1], vec2[1], name("ycmp"));
+ Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
+
+ Value *zcmp = m_builder.CreateFCmpOLE(vec1[2], vec2[2], name("zcmp"));
+ Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
+
+ Value *wcmp = m_builder.CreateFCmpOLE(vec1[3], vec2[3], name("wcmp"));
+ Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
+
+ return vectorFromVals(x, y, z, w);
+}
+
+llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp"));
+ Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
+
+ Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp"));
+ Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
+
+ Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp"));
+ Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
+
+ Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp"));
+ Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
+
+ return vectorFromVals(x, y, z, w);
+}
+
+llvm::Value * Instructions::sne(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpONE(vec1[0], vec2[0], name("xcmp"));
+ Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
+
+ Value *ycmp = m_builder.CreateFCmpONE(vec1[1], vec2[1], name("ycmp"));
+ Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
+
+ Value *zcmp = m_builder.CreateFCmpONE(vec1[2], vec2[2], name("zcmp"));
+ Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
+
+ Value *wcmp = m_builder.CreateFCmpONE(vec1[3], vec2[3], name("wcmp"));
+ Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
+
+ return vectorFromVals(x, y, z, w);
+}
+
+llvm::Value * Instructions::str(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f));
+
+ return vectorFromVals(const1f, const1f, const1f, const1f);
+}
+
+llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *res = m_builder.CreateSub(in1, in2, name("sub"));
+ return res;
+}
+
+llvm::Value * Instructions::trunc(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32),
+ name("ftoix"));
+ Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32),
+ name("ftoiy"));
+ Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32),
+ name("ftoiz"));
+ Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32),
+ name("ftoiw"));
+ Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy,
+ name("fx"));
+ Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy,
+ name("fy"));
+ Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy,
+ name("fz"));
+ Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy,
+ name("fw"));
+ return vectorFromVals(fx, fy, fz, fw);
+}
+
+llvm::Value * Instructions::x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3)
+{
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+ std::vector<llvm::Value*> vec3 = extractVector(in3);
+
+ Value *x2x3 = m_builder.CreateMul( vec2[0], vec3[0], name("x2x3"));
+ Value *y2y3 = m_builder.CreateMul( vec2[1], vec3[1], name("y2y3"));
+ Value *x1px2x3 = m_builder.CreateAdd (vec1[0], x2x3, name("x1 + x2x3"));
+ Value *x1px2x3py2y3 = m_builder.CreateAdd (x1px2x3, y2y3, name("x1 + x2x3 + y2y3"));
+
+ Value *x2z3 = m_builder.CreateMul( vec2[0], vec3[2], name("x2z3"));
+ Value *y2w3 = m_builder.CreateMul( vec2[1], vec3[3], name("y2w3"));
+ Value *y1px2z3 = m_builder.CreateAdd (vec1[1], x2z3, name("y1 + x2z3"));
+ Value *y1px2z3py2w3 = m_builder.CreateAdd (y1px2z3, y2w3, name("y1 + x2z3 + y2w3"));
+
+ return vectorFromVals(x1px2x3py2y3, y1px2z3py2w3, x1px2x3py2y3, y1px2z3py2w3);
+}
+
+void Instructions::printVector(llvm::Value *val)
+{
+ static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A";
+
+ if (!m_fmtPtr) {
+ Constant *format = ConstantArray::get(frmt, true);
+ ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1);
+ GlobalVariable* globalFormat = new GlobalVariable(
+ /*Type=*/arrayTy,
+ /*isConstant=*/true,
+ /*Linkage=*/GlobalValue::InternalLinkage,
+ /*Initializer=*/0, // has initializer, specified below
+ /*Name=*/name(".str"),
+ m_mod);
+ globalFormat->setInitializer(format);
+
+ Constant* const_int0 = Constant::getNullValue(IntegerType::get(32));
+ std::vector<Constant*> const_ptr_21_indices;
+ const_ptr_21_indices.push_back(const_int0);
+ const_ptr_21_indices.push_back(const_int0);
+ m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat,
+ &const_ptr_21_indices[0], const_ptr_21_indices.size());
+ }
+
+ Function *func_printf = m_mod->getFunction("printf");
+ if (!func_printf)
+ func_printf = declarePrintf();
+ assert(func_printf);
+ std::vector<llvm::Value*> vec = extractVector(val);
+ Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx"));
+ Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy"));
+ Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz"));
+ Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw"));
+ std::vector<Value*> params;
+ params.push_back(m_fmtPtr);
+ params.push_back(dx);
+ params.push_back(dy);
+ params.push_back(dz);
+ params.push_back(dw);
+ CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(),
+ name("printf"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(true);
+}
+
+const char * Instructions::name(const char *prefix)
+{
+ ++m_idx;
+ snprintf(m_name, 32, "%s%d", prefix, m_idx);
+ return m_name;
+}
+
+llvm::Value * Instructions::callCeil(llvm::Value *val)
+{
+ if (!m_llvmCeil) {
+ // predeclare the intrinsic
+ std::vector<const Type*> ceilArgs;
+ ceilArgs.push_back(Type::FloatTy);
+ AttrListPtr ceilPal;
+ FunctionType* ceilType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/ceilArgs,
+ /*isVarArg=*/false);
+ m_llvmCeil = Function::Create(
+ /*Type=*/ceilType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"ceilf", m_mod);
+ m_llvmCeil->setCallingConv(CallingConv::C);
+ m_llvmCeil->setAttributes(ceilPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmCeil, val,
+ name("ceilf"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value *Instructions::callFAbs(llvm::Value *val)
+{
+ if (!m_llvmFAbs) {
+ // predeclare the intrinsic
+ std::vector<const Type*> fabsArgs;
+ fabsArgs.push_back(Type::FloatTy);
+ AttrListPtr fabsPal;
+ FunctionType* fabsType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/fabsArgs,
+ /*isVarArg=*/false);
+ m_llvmFAbs = Function::Create(
+ /*Type=*/fabsType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"fabs", m_mod);
+ m_llvmFAbs->setCallingConv(CallingConv::C);
+ m_llvmFAbs->setAttributes(fabsPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmFAbs, val,
+ name("fabs"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::callFExp(llvm::Value *val)
+{
+ if (!m_llvmFexp) {
+ // predeclare the intrinsic
+ std::vector<const Type*> fexpArgs;
+ fexpArgs.push_back(Type::FloatTy);
+ AttrListPtr fexpPal;
+ FunctionType* fexpType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/fexpArgs,
+ /*isVarArg=*/false);
+ m_llvmFexp = Function::Create(
+ /*Type=*/fexpType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"expf", m_mod);
+ m_llvmFexp->setCallingConv(CallingConv::C);
+ m_llvmFexp->setAttributes(fexpPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmFexp, val,
+ name("expf"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::callFLog(llvm::Value *val)
+{
+ if (!m_llvmFlog) {
+ // predeclare the intrinsic
+ std::vector<const Type*> flogArgs;
+ flogArgs.push_back(Type::FloatTy);
+ AttrListPtr flogPal;
+ FunctionType* flogType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/flogArgs,
+ /*isVarArg=*/false);
+ m_llvmFlog = Function::Create(
+ /*Type=*/flogType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"logf", m_mod);
+ m_llvmFlog->setCallingConv(CallingConv::C);
+ m_llvmFlog->setAttributes(flogPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmFlog, val,
+ name("logf"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::callFloor(llvm::Value *val)
+{
+ if (!m_llvmFloor) {
+ // predeclare the intrinsic
+ std::vector<const Type*> floorArgs;
+ floorArgs.push_back(Type::FloatTy);
+ AttrListPtr floorPal;
+ FunctionType* floorType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/floorArgs,
+ /*isVarArg=*/false);
+ m_llvmFloor = Function::Create(
+ /*Type=*/floorType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"floorf", m_mod);
+ m_llvmFloor->setCallingConv(CallingConv::C);
+ m_llvmFloor->setAttributes(floorPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmFloor, val,
+ name("floorf"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value *Instructions::callFSqrt(llvm::Value *val)
+{
+ if (!m_llvmFSqrt) {
+ // predeclare the intrinsic
+ std::vector<const Type*> fsqrtArgs;
+ fsqrtArgs.push_back(Type::FloatTy);
+ AttrListPtr fsqrtPal;
+ FunctionType* fsqrtType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/fsqrtArgs,
+ /*isVarArg=*/false);
+ m_llvmFSqrt = Function::Create(
+ /*Type=*/fsqrtType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"llvm.sqrt.f32", m_mod);
+ m_llvmFSqrt->setCallingConv(CallingConv::C);
+ m_llvmFSqrt->setAttributes(fsqrtPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val,
+ name("sqrt"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2)
+{
+ if (!m_llvmPow) {
+ // predeclare the intrinsic
+ std::vector<const Type*> powArgs;
+ powArgs.push_back(Type::FloatTy);
+ powArgs.push_back(Type::FloatTy);
+ AttrListPtr powPal;
+ FunctionType* powType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/powArgs,
+ /*isVarArg=*/false);
+ m_llvmPow = Function::Create(
+ /*Type=*/powType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"llvm.pow.f32", m_mod);
+ m_llvmPow->setCallingConv(CallingConv::C);
+ m_llvmPow->setAttributes(powPal);
+ }
+ std::vector<Value*> params;
+ params.push_back(val1);
+ params.push_back(val2);
+ CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(),
+ name("pow"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y,
+ llvm::Value *z, llvm::Value *w)
+{
+ Constant *const_vec = Constant::getNullValue(m_floatVecType);
+ Value *res = m_builder.CreateInsertElement(const_vec, x,
+ m_storage->constantInt(0),
+ name("vecx"));
+ res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1),
+ name("vecxy"));
+ res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2),
+ name("vecxyz"));
+ if (w)
+ res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3),
+ name("vecxyzw"));
+ return res;
+}
+
+llvm::Value * Instructions::constVector(float x, float y, float z, float w)
+{
+ std::vector<Constant*> vec(4);
+ vec[0] = ConstantFP::get(APFloat(x));
+ vec[1] = ConstantFP::get(APFloat(y));
+ vec[2] = ConstantFP::get(APFloat(z));
+ vec[3] = ConstantFP::get(APFloat(w));
+ return ConstantVector::get(m_floatVecType, vec);
+}
+
+llvm::Function * Instructions::declarePrintf()
+{
+ std::vector<const Type*> args;
+ AttrListPtr params;
+ FunctionType* funcTy = FunctionType::get(
+ /*Result=*/IntegerType::get(32),
+ /*Params=*/args,
+ /*isVarArg=*/true);
+ Function* func_printf = Function::Create(
+ /*Type=*/funcTy,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"printf", m_mod);
+ func_printf->setCallingConv(CallingConv::C);
+ func_printf->setAttributes(params);
+ return func_printf;
+}
+
+llvm::Function * Instructions::declareFunc(int label)
+{
+ PointerType *vecPtr = PointerType::getUnqual(m_floatVecType);
+ std::vector<const Type*> args;
+ args.push_back(vecPtr);
+ args.push_back(vecPtr);
+ args.push_back(vecPtr);
+ args.push_back(vecPtr);
+ AttrListPtr params;
+ FunctionType *funcType = FunctionType::get(
+ /*Result=*/Type::VoidTy,
+ /*Params=*/args,
+ /*isVarArg=*/false);
+ std::string name = createFuncName(label);
+ Function *func = Function::Create(
+ /*Type=*/funcType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/name.c_str(), m_mod);
+ func->setCallingConv(CallingConv::C);
+ func->setAttributes(params);
+ return func;
+}
+
+llvm::Function * Instructions::findFunction(int label)
+{
+ llvm::Function *func = m_functions[label];
+ if (!func) {
+ func = declareFunc(label);
+ m_functions[label] = func;
+ }
+ return func;
+}
+
+std::vector<llvm::Value*> Instructions::extractVector(llvm::Value *vec)
+{
+ std::vector<llvm::Value*> elems(4);
+ elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0),
+ name("x"));
+ elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1),
+ name("y"));
+ elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2),
+ name("z"));
+ elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3),
+ name("w"));
+ return elems;
+}
+
+
+#endif //MESA_LLVM
+
+
diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h
new file mode 100644
index 00000000000..e18571251ee
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/instructions.h
@@ -0,0 +1,175 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin [email protected]
+ */
+
+#ifndef INSTRUCTIONS_H
+#define INSTRUCTIONS_H
+
+#include <llvm/BasicBlock.h>
+#include <llvm/Module.h>
+#include <llvm/Value.h>
+#include <llvm/Support/IRBuilder.h>
+
+#include <map>
+#include <stack>
+
+namespace llvm {
+ class VectorType;
+ class Function;
+}
+
+class Storage;
+
+class Instructions
+{
+public:
+ Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block,
+ Storage *storage);
+
+ llvm::BasicBlock *currentBlock() const;
+
+ llvm::Value *abs(llvm::Value *in1);
+ llvm::Value *add(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *arl(llvm::Value *in1);
+ void beginLoop();
+ void bgnSub(unsigned);
+ void brk();
+ void cal(int label, llvm::Value *input);
+ llvm::Value *ceil(llvm::Value *in);
+ llvm::Value *clamp(llvm::Value *in);
+ llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3);
+ llvm::Value *cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3);
+ llvm::Value *cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3);
+ llvm::Value *cos(llvm::Value *in);
+ llvm::Value *cross(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *ddx(llvm::Value *in);
+ llvm::Value *ddy(llvm::Value *in);
+ llvm::Value *div(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *dot2add(llvm::Value *in, llvm::Value *in2, llvm::Value *in3);
+ llvm::Value *dp2(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *dph(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *dst(llvm::Value *in1, llvm::Value *in2);
+ void elseop();
+ void endif();
+ void endLoop();
+ void end();
+ void endSub();
+ llvm::Value *exp(llvm::Value *in);
+ llvm::Value *ex2(llvm::Value *in);
+ llvm::Value *floor(llvm::Value *in);
+ llvm::Value *frc(llvm::Value *in);
+ void ifop(llvm::Value *in);
+ llvm::Value *kil(llvm::Value *in);
+ llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2,
+ llvm::Value *in3);
+ llvm::Value *lg2(llvm::Value *in);
+ llvm::Value *lit(llvm::Value *in);
+ llvm::Value *log(llvm::Value *in);
+ llvm::Value *madd(llvm::Value *in1, llvm::Value *in2,
+ llvm::Value *in3);
+ llvm::Value *max(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *min(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *mul(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *neg(llvm::Value *in);
+ llvm::Value *nrm(llvm::Value *in);
+ llvm::Value *pow(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *rcp(llvm::Value *in);
+ llvm::Value *rsq(llvm::Value *in);
+ llvm::Value *scs(llvm::Value *in);
+ llvm::Value *seq(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *sfl(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *sge(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *sin(llvm::Value *in);
+ llvm::Value *sle(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *slt(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *sne(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *str(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *sub(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *trunc(llvm::Value *in);
+ llvm::Value *x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3);
+
+ void printVector(llvm::Value *val);
+private:
+ const char *name(const char *prefix);
+
+ llvm::Value *callCeil(llvm::Value *val);
+ llvm::Value *callFAbs(llvm::Value *val);
+ llvm::Value *callFExp(llvm::Value *val);
+ llvm::Value *callFLog(llvm::Value *val);
+ llvm::Value *callFloor(llvm::Value *val);
+ llvm::Value *callFSqrt(llvm::Value *val);
+ llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2);
+
+ llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
+ llvm::Value *z, llvm::Value *w=0);
+
+ llvm::Value *constVector(float x, float y, float z, float w);
+
+ llvm::Function *declarePrintf();
+ llvm::Function *declareFunc(int label);
+
+ llvm::Function *findFunction(int label);
+
+ std::vector<llvm::Value*> extractVector(llvm::Value *vec);
+private:
+ llvm::Module *m_mod;
+ llvm::Function *m_func;
+ char m_name[32];
+ llvm::IRBuilder<> m_builder;
+ int m_idx;
+
+ llvm::VectorType *m_floatVecType;
+
+ llvm::Function *m_llvmCeil;
+ llvm::Function *m_llvmFSqrt;
+ llvm::Function *m_llvmFAbs;
+ llvm::Function *m_llvmPow;
+ llvm::Function *m_llvmFloor;
+ llvm::Function *m_llvmFlog;
+ llvm::Function *m_llvmFexp;
+ llvm::Function *m_llvmLit;
+
+ llvm::Constant *m_fmtPtr;
+
+ std::stack<llvm::BasicBlock*> m_ifStack;
+ struct Loop {
+ llvm::BasicBlock *begin;
+ llvm::BasicBlock *end;
+ };
+ std::stack<Loop> m_loopStack;
+ std::map<int, llvm::Function*> m_functions;
+ Storage *m_storage;
+};
+
+#endif
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
new file mode 100644
index 00000000000..925e948763e
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
@@ -0,0 +1,523 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "instructionssoa.h"
+
+#include "storagesoa.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_memory.h"
+
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/Module.h>
+#include <llvm/Function.h>
+#include <llvm/Instructions.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+#include <llvm/Attributes.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+
+
+#include <iostream>
+
+
+/* disable some warnings. this file is autogenerated */
+#if defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+using namespace llvm;
+#include "gallivmsoabuiltins.cpp"
+#if defined(__GNUC__)
+#pragma GCC diagnostic warning "-Wunused-variable"
+#endif
+
+InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func,
+ llvm::BasicBlock *block, StorageSoa *storage)
+ : m_builder(block),
+ m_storage(storage),
+ m_idx(0)
+{
+ createFunctionMap();
+ createBuiltins();
+}
+
+const char * InstructionsSoa::name(const char *prefix) const
+{
+ ++m_idx;
+ snprintf(m_name, 32, "%s%d", prefix, m_idx);
+ return m_name;
+}
+
+llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y,
+ llvm::Value *z, llvm::Value *w)
+{
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ Constant *constVector = Constant::getNullValue(vectorType);
+ Value *res = m_builder.CreateInsertElement(constVector, x,
+ m_storage->constantInt(0),
+ name("vecx"));
+ res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1),
+ name("vecxy"));
+ res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2),
+ name("vecxyz"));
+ if (w)
+ res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3),
+ name("vecxyzw"));
+ return res;
+}
+
+void InstructionsSoa::end()
+{
+ m_builder.CreateRetVoid();
+}
+
+std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
+{
+ std::vector<llvm::Value*> res(4);
+ res[0] = m_builder.CreateExtractElement(vector,
+ m_storage->constantInt(0),
+ name("extract1X"));
+ res[1] = m_builder.CreateExtractElement(vector,
+ m_storage->constantInt(1),
+ name("extract2X"));
+ res[2] = m_builder.CreateExtractElement(vector,
+ m_storage->constantInt(2),
+ name("extract3X"));
+ res[3] = m_builder.CreateExtractElement(vector,
+ m_storage->constantInt(3),
+ name("extract4X"));
+
+ return res;
+}
+
+llvm::IRBuilder<>* InstructionsSoa::getIRBuilder()
+{
+ return &m_builder;
+}
+
+void InstructionsSoa::createFunctionMap()
+{
+ m_functionsMap[TGSI_OPCODE_ABS] = "abs";
+ m_functionsMap[TGSI_OPCODE_DP3] = "dp3";
+ m_functionsMap[TGSI_OPCODE_DP4] = "dp4";
+ m_functionsMap[TGSI_OPCODE_MIN] = "min";
+ m_functionsMap[TGSI_OPCODE_MAX] = "max";
+ m_functionsMap[TGSI_OPCODE_POWER] = "pow";
+ m_functionsMap[TGSI_OPCODE_LIT] = "lit";
+ m_functionsMap[TGSI_OPCODE_RSQ] = "rsq";
+ m_functionsMap[TGSI_OPCODE_SLT] = "slt";
+}
+
+void InstructionsSoa::createDependencies()
+{
+ {
+ std::vector<std::string> powDeps(2);
+ powDeps[0] = "powf";
+ powDeps[1] = "powvec";
+ m_builtinDependencies["pow"] = powDeps;
+ }
+ {
+ std::vector<std::string> absDeps(2);
+ absDeps[0] = "fabsf";
+ absDeps[1] = "absvec";
+ m_builtinDependencies["abs"] = absDeps;
+ }
+ {
+ std::vector<std::string> maxDeps(1);
+ maxDeps[0] = "maxvec";
+ m_builtinDependencies["max"] = maxDeps;
+ }
+ {
+ std::vector<std::string> minDeps(1);
+ minDeps[0] = "minvec";
+ m_builtinDependencies["min"] = minDeps;
+ }
+ {
+ std::vector<std::string> litDeps(4);
+ litDeps[0] = "minvec";
+ litDeps[1] = "maxvec";
+ litDeps[2] = "powf";
+ litDeps[3] = "powvec";
+ m_builtinDependencies["lit"] = litDeps;
+ }
+ {
+ std::vector<std::string> rsqDeps(4);
+ rsqDeps[0] = "sqrtf";
+ rsqDeps[1] = "sqrtvec";
+ rsqDeps[2] = "fabsf";
+ rsqDeps[3] = "absvec";
+ m_builtinDependencies["rsq"] = rsqDeps;
+ }
+}
+
+llvm::Function * InstructionsSoa::function(int op)
+{
+ if (m_functions.find(op) != m_functions.end())
+ return m_functions[op];
+
+ std::string name = m_functionsMap[op];
+
+ std::cout <<"For op = "<<op<<", func is '"<<name<<"'"<<std::endl;
+
+ std::vector<std::string> deps = m_builtinDependencies[name];
+ for (unsigned int i = 0; i < deps.size(); ++i) {
+ llvm::Function *func = m_builtins->getFunction(deps[i]);
+ std::cout <<"\tinjecting dep = '"<<func->getName()<<"'"<<std::endl;
+ injectFunction(func);
+ }
+
+ llvm::Function *originalFunc = m_builtins->getFunction(name);
+ injectFunction(originalFunc, op);
+ return m_functions[op];
+}
+
+llvm::Module * InstructionsSoa::currentModule() const
+{
+ BasicBlock *block = m_builder.GetInsertBlock();
+ if (!block || !block->getParent())
+ return 0;
+
+ return block->getParent()->getParent();
+}
+
+void InstructionsSoa::createBuiltins()
+{
+ std::string ErrMsg;
+ MemoryBuffer *buffer = MemoryBuffer::getMemBuffer(
+ (const char*)&soabuiltins_data[0],
+ (const char*)&soabuiltins_data[Elements(soabuiltins_data) - 1]);
+ m_builtins = ParseBitcodeFile(buffer, &ErrMsg);
+ std::cout<<"Builtins created at "<<m_builtins<<" ("<<ErrMsg<<")"<<std::endl;
+ assert(m_builtins);
+ createDependencies();
+}
+
+
+std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> in1)
+{
+ llvm::Function *func = function(TGSI_OPCODE_ABS);
+ return callBuiltin(func, in1);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx"));
+ res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy"));
+ res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz"));
+ res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw"));
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in)
+{
+ std::vector<llvm::Value*> res(4);
+
+ //Extract x's
+ llvm::Value *x1 = m_builder.CreateExtractElement(in[0],
+ m_storage->constantInt(0),
+ name("extractX"));
+ //cast it to an unsigned int
+ x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast"));
+
+ res[0] = x1;//vectorFromVals(x1, x2, x3, x4);
+ //only x is valid. the others shouldn't be necessary
+ /*
+ res[1] = Constant::getNullValue(m_floatVecType);
+ res[2] = Constant::getNullValue(m_floatVecType);
+ res[3] = Constant::getNullValue(m_floatVecType);
+ */
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_DP3);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in)
+{
+ llvm::Function *func = function(TGSI_OPCODE_LIT);
+ return callBuiltin(func, in);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3)
+{
+ std::vector<llvm::Value*> res = mul(in1, in2);
+ return add(res, in3);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_MAX);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_MIN);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx"));
+ res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly"));
+ res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz"));
+ res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw"));
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_POWER);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in)
+{
+ llvm::Function *func = function(TGSI_OPCODE_RSQ);
+ return callBuiltin(func, in);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_SLT);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx"));
+ res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby"));
+ res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz"));
+ res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw"));
+
+ return res;
+}
+
+void checkFunction(Function *func)
+{
+ for (Function::const_iterator BI = func->begin(), BE = func->end();
+ BI != BE; ++BI) {
+ const BasicBlock &BB = *BI;
+ for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end();
+ II != IE; ++II) {
+ const Instruction &I = *II;
+ std::cout<< "Instr = "<<I;
+ for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) {
+ const Value *Op = I.getOperand(op);
+ std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl;
+ //I->setOperand(op, V);
+ }
+ }
+ }
+}
+
+llvm::Value * InstructionsSoa::allocaTemp()
+{
+ VectorType *vector = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vecArray = ArrayType::get(vector, 4);
+ AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"),
+ m_builder.GetInsertBlock());
+
+ std::vector<Value*> indices;
+ indices.push_back(m_storage->constantInt(0));
+ indices.push_back(m_storage->constantInt(0));
+ GetElementPtrInst *getElem = GetElementPtrInst::Create(alloca,
+ indices.begin(),
+ indices.end(),
+ name("allocaPtr"),
+ m_builder.GetInsertBlock());
+ return getElem;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr)
+{
+ GetElementPtrInst *xElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(0),
+ name("xPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *yElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(1),
+ name("yPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *zElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(2),
+ name("zPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *wElemPtr = GetElementPtrInst::Create(allocaPtr,
+ m_storage->constantInt(3),
+ name("wPtr"),
+ m_builder.GetInsertBlock());
+
+ std::vector<llvm::Value*> res(4);
+ res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock());
+ res[1] = new LoadInst(yElemPtr, name("yRes"), false, m_builder.GetInsertBlock());
+ res[2] = new LoadInst(zElemPtr, name("zRes"), false, m_builder.GetInsertBlock());
+ res[3] = new LoadInst(wElemPtr, name("wRes"), false, m_builder.GetInsertBlock());
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::dp4(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_DP4);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1)
+{
+ std::vector<Value*> params;
+
+ llvm::Value *allocaPtr = allocaTemp();
+ params.push_back(allocaPtr);
+ params.push_back(in1[0]);
+ params.push_back(in1[1]);
+ params.push_back(in1[2]);
+ params.push_back(in1[3]);
+ CallInst *call = m_builder.CreateCall(func, params.begin(), params.end());
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+
+ return allocaToResult(allocaPtr);
+}
+
+std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<Value*> params;
+
+ llvm::Value *allocaPtr = allocaTemp();
+ params.push_back(allocaPtr);
+ params.push_back(in1[0]);
+ params.push_back(in1[1]);
+ params.push_back(in1[2]);
+ params.push_back(in1[3]);
+ params.push_back(in2[0]);
+ params.push_back(in2[1]);
+ params.push_back(in2[2]);
+ params.push_back(in2[3]);
+ CallInst *call = m_builder.CreateCall(func, params.begin(), params.end());
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+
+ return allocaToResult(allocaPtr);
+}
+
+std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3)
+{
+ std::vector<Value*> params;
+
+ llvm::Value *allocaPtr = allocaTemp();
+ params.push_back(allocaPtr);
+ params.push_back(in1[0]);
+ params.push_back(in1[1]);
+ params.push_back(in1[2]);
+ params.push_back(in1[3]);
+ params.push_back(in2[0]);
+ params.push_back(in2[1]);
+ params.push_back(in2[2]);
+ params.push_back(in2[3]);
+ params.push_back(in3[0]);
+ params.push_back(in3[1]);
+ params.push_back(in3[2]);
+ params.push_back(in3[3]);
+ CallInst *call = m_builder.CreateCall(func, params.begin(), params.end());
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+
+ return allocaToResult(allocaPtr);
+}
+
+void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op)
+{
+ assert(originalFunc);
+ std::cout << "injecting function originalFunc " <<originalFunc->getName() <<std::endl;
+ if (op != TGSI_OPCODE_LAST) {
+ /* in this case it's possible the function has been already
+ * injected as part of the dependency chain, which gets
+ * injected below */
+ llvm::Function *func = currentModule()->getFunction(originalFunc->getName());
+ if (func) {
+ m_functions[op] = func;
+ return;
+ }
+ }
+ llvm::Function *func = 0;
+ if (originalFunc->isDeclaration()) {
+ func = Function::Create(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage,
+ originalFunc->getName(), currentModule());
+ func->setCallingConv(CallingConv::C);
+ const AttrListPtr pal;
+ func->setAttributes(pal);
+ currentModule()->dump();
+ } else {
+ DenseMap<const Value*, Value *> val;
+ val[m_builtins->getFunction("fabsf")] = currentModule()->getFunction("fabsf");
+ val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf");
+ val[m_builtins->getFunction("sqrtf")] = currentModule()->getFunction("sqrtf");
+ func = CloneFunction(originalFunc, val);
+#if 0
+ std::cout <<" replacing "<<m_builtins->getFunction("powf")
+ <<", with " <<currentModule()->getFunction("powf")<<std::endl;
+ std::cout<<"1111-------------------------------"<<std::endl;
+ checkFunction(originalFunc);
+ std::cout<<"2222-------------------------------"<<std::endl;
+ checkFunction(func);
+ std::cout <<"XXXX = " <<val[m_builtins->getFunction("powf")]<<std::endl;
+#endif
+ currentModule()->getFunctionList().push_back(func);
+ }
+ if (op != TGSI_OPCODE_LAST) {
+ m_functions[op] = func;
+ }
+}
+
+
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h
new file mode 100644
index 00000000000..d6831e0a6b9
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.h
@@ -0,0 +1,116 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef INSTRUCTIONSSOA_H
+#define INSTRUCTIONSSOA_H
+
+#include <pipe/p_shader_tokens.h>
+#include <llvm/Support/IRBuilder.h>
+
+#include <map>
+#include <vector>
+
+namespace llvm {
+ class Module;
+ class Function;
+ class BasicBlock;
+ class Value;
+}
+class StorageSoa;
+
+class InstructionsSoa
+{
+public:
+ InstructionsSoa(llvm::Module *mod, llvm::Function *func,
+ llvm::BasicBlock *block, StorageSoa *storage);
+
+ std::vector<llvm::Value*> abs(const std::vector<llvm::Value*> in1);
+ std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in);
+ std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> dp3(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> dp4(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> lit(const std::vector<llvm::Value*> in);
+ std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3);
+ std::vector<llvm::Value*> max(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> min(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> pow(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> rsq(const std::vector<llvm::Value*> in1);
+ std::vector<llvm::Value*> slt(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> sub(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ void end();
+
+ std::vector<llvm::Value*> extractVector(llvm::Value *vector);
+ llvm::IRBuilder<>* getIRBuilder();
+private:
+ const char * name(const char *prefix) const;
+ llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
+ llvm::Value *z, llvm::Value *w);
+ void createFunctionMap();
+ void createBuiltins();
+ void createDependencies();
+ llvm::Function *function(int);
+ llvm::Module *currentModule() const;
+ llvm::Value *allocaTemp();
+ std::vector<llvm::Value*> allocaToResult(llvm::Value *allocaPtr);
+ std::vector<llvm::Value*> callBuiltin(llvm::Function *func,
+ const std::vector<llvm::Value*> in1);
+ std::vector<llvm::Value*> callBuiltin(llvm::Function *func,
+ const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> callBuiltin(llvm::Function *func,
+ const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3);
+ void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST);
+private:
+ llvm::IRBuilder<> m_builder;
+ StorageSoa *m_storage;
+
+ std::map<int, std::string> m_functionsMap;
+ std::map<int, llvm::Function*> m_functions;
+ llvm::Module *m_builtins;
+ std::map<std::string, std::vector<std::string> > m_builtinDependencies;
+
+private:
+ mutable int m_idx;
+ mutable char m_name[32];
+};
+
+
+#endif
diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c
new file mode 100644
index 00000000000..d5a003a48b2
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c
@@ -0,0 +1,114 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin [email protected]
+ */
+typedef __attribute__(( ext_vector_type(4) )) float float4;
+
+extern float powf(float a, float b);
+
+inline float approx(float a, float b)
+{
+ if (b < -128.0f) b = -128.0f;
+ if (b > 128.0f) b = 128.0f;
+ if (a < 0) a = 0;
+ return powf(a, b);
+}
+
+inline float4 lit(float4 tmp)
+{
+ float4 result;
+ result.x = 1.0;
+ result.w = 1.0;
+ if (tmp.x > 0) {
+ result.y = tmp.x;
+ result.z = approx(tmp.y, tmp.w);
+ } else {
+ result.y = 0;
+ result.z = 0;
+ }
+ return result;
+}
+
+inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2)
+{
+ float4 result;
+
+ result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x;
+ result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y;
+ result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z;
+ result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w;
+
+ return result;
+}
+
+extern float cosf(float val);
+extern float sinf(float val);
+
+inline float4 vcos(float4 val)
+{
+ float4 result;
+ printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w);
+ result.x = cosf(val.x);
+ result.y = cosf(val.x);
+ result.z = cosf(val.x);
+ result.w = cosf(val.x);
+ printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w);
+ return result;
+}
+
+inline float4 scs(float4 val)
+{
+ float4 result;
+ float tmp = val.x;
+ result.x = cosf(tmp);
+ result.y = sinf(tmp);
+ return result;
+}
+
+
+inline float4 vsin(float4 val)
+{
+ float4 result;
+ float tmp = val.x;
+ float res = sinf(tmp);
+ result.x = res;
+ result.y = res;
+ result.z = res;
+ result.w = res;
+ return result;
+}
+
+inline int kil(float4 val)
+{
+ if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0)
+ return 1;
+ else
+ return 0;
+}
diff --git a/src/gallium/auxiliary/gallivm/loweringpass.cpp b/src/gallium/auxiliary/gallivm/loweringpass.cpp
new file mode 100644
index 00000000000..556dbec3661
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/loweringpass.cpp
@@ -0,0 +1,17 @@
+#include "loweringpass.h"
+
+using namespace llvm;
+
+char LoweringPass::ID = 0;
+RegisterPass<LoweringPass> X("lowering", "Lowering Pass");
+
+LoweringPass::LoweringPass()
+ : ModulePass((intptr_t)&ID)
+{
+}
+
+bool LoweringPass::runOnModule(Module &m)
+{
+ llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n";
+ return false;
+}
diff --git a/src/gallium/auxiliary/gallivm/loweringpass.h b/src/gallium/auxiliary/gallivm/loweringpass.h
new file mode 100644
index 00000000000..f62dcf6ba73
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/loweringpass.h
@@ -0,0 +1,15 @@
+#ifndef LOWERINGPASS_H
+#define LOWERINGPASS_H
+
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+
+struct LoweringPass : public llvm::ModulePass
+{
+ static char ID;
+ LoweringPass();
+
+ virtual bool runOnModule(llvm::Module &m);
+};
+
+#endif
diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c
new file mode 100644
index 00000000000..cb85e1734ec
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/soabuiltins.c
@@ -0,0 +1,210 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * This file is compiled with clang into the LLVM bitcode
+ *
+ * Authors:
+ * Zack Rusin [email protected]
+ */
+typedef __attribute__(( ext_vector_type(4) )) float float4;
+
+
+extern float fabsf(float val);
+
+/* helpers */
+
+float4 absvec(float4 vec)
+{
+ float4 res;
+ res.x = fabsf(vec.x);
+ res.y = fabsf(vec.y);
+ res.z = fabsf(vec.z);
+ res.w = fabsf(vec.w);
+
+ return res;
+}
+
+float4 maxvec(float4 a, float4 b)
+{
+ return (float4){(a.x > b.x) ? a.x : b.x,
+ (a.y > b.y) ? a.y : b.y,
+ (a.z > b.z) ? a.z : b.z,
+ (a.w > b.w) ? a.w : b.w};
+}
+
+float4 minvec(float4 a, float4 b)
+{
+ return (float4){(a.x < b.x) ? a.x : b.x,
+ (a.y < b.y) ? a.y : b.y,
+ (a.z < b.z) ? a.z : b.z,
+ (a.w < b.w) ? a.w : b.w};
+}
+
+extern float powf(float num, float p);
+extern float sqrtf(float x);
+
+float4 powvec(float4 vec, float4 q)
+{
+ float4 p;
+ p.x = powf(vec.x, q.x);
+ p.y = powf(vec.y, q.y);
+ p.z = powf(vec.z, q.z);
+ p.w = powf(vec.w, q.w);
+ return p;
+}
+
+float4 sqrtvec(float4 vec)
+{
+ float4 p;
+ p.x = sqrtf(vec.x);
+ p.y = sqrtf(vec.y);
+ p.z = sqrtf(vec.z);
+ p.w = sqrtf(vec.w);
+ return p;
+}
+
+float4 sltvec(float4 v1, float4 v2)
+{
+ float4 p;
+ p.x = (v1.x < v2.x) ? 1.0 : 0.0;
+ p.y = (v1.y < v2.y) ? 1.0 : 0.0;
+ p.z = (v1.z < v2.z) ? 1.0 : 0.0;
+ p.w = (v1.w < v2.w) ? 1.0 : 0.0;
+ return p;
+}
+
+
+/* instructions */
+
+void abs(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
+{
+ res[0] = absvec(tmp0x);
+ res[1] = absvec(tmp0y);
+ res[2] = absvec(tmp0z);
+ res[3] = absvec(tmp0w);
+}
+
+void dp3(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
+ (tmp0z * tmp1z);
+
+ res[0] = dot;
+ res[1] = dot;
+ res[2] = dot;
+ res[3] = dot;
+}
+
+void dp4(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
+ (tmp0z * tmp1z) + (tmp0w * tmp1w);
+
+ res[0] = dot;
+ res[1] = dot;
+ res[2] = dot;
+ res[3] = dot;
+}
+
+void lit(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
+{
+ const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0};
+ const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f};
+ const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f};
+
+ res[0] = (float4){1.0, 1.0, 1.0, 1.0};
+ if (tmp0x.x > 0) {
+ float4 tmpy = maxvec(tmp0y, zerovec);
+ float4 tmpw = minvec(tmp0w, plus128);
+ tmpw = maxvec(tmpw, min128);
+ res[1] = tmp0x;
+ res[2] = powvec(tmpy, tmpw);
+ } else {
+ res[1] = zerovec;
+ res[2] = zerovec;
+ }
+ res[3] = (float4){1.0, 1.0, 1.0, 1.0};
+}
+
+void min(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ res[0] = minvec(tmp0x, tmp1x);
+ res[1] = minvec(tmp0y, tmp1y);
+ res[2] = minvec(tmp0z, tmp1z);
+ res[3] = minvec(tmp0w, tmp1w);
+}
+
+
+void max(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ res[0] = maxvec(tmp0x, tmp1x);
+ res[1] = maxvec(tmp0y, tmp1y);
+ res[2] = maxvec(tmp0z, tmp1z);
+ res[3] = maxvec(tmp0w, tmp1w);
+}
+
+void pow(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ res[0] = powvec(tmp0x, tmp1x);
+ res[1] = res[0];
+ res[2] = res[0];
+ res[3] = res[0];
+}
+
+void rsq(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
+{
+ const float4 onevec = (float4) {1., 1., 1., 1.};
+ res[0] = onevec/sqrtvec(absvec(tmp0x));
+ res[1] = onevec/sqrtvec(absvec(tmp0y));
+ res[2] = onevec/sqrtvec(absvec(tmp0z));
+ res[3] = onevec/sqrtvec(absvec(tmp0w));
+}
+
+void slt(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ res[0] = sltvec(tmp0x, tmp1x);
+ res[1] = sltvec(tmp0y, tmp1y);
+ res[2] = sltvec(tmp0z, tmp1z);
+ res[3] = sltvec(tmp0w, tmp1w);
+}
+
diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp
new file mode 100644
index 00000000000..73df24c9769
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/storage.cpp
@@ -0,0 +1,364 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin [email protected]
+ */
+#ifdef MESA_LLVM
+
+#include "storage.h"
+
+#include "gallivm_p.h"
+
+#include "pipe/p_shader_tokens.h"
+#include <llvm/BasicBlock.h>
+#include <llvm/Module.h>
+#include <llvm/Value.h>
+
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/InstrTypes.h>
+#include <llvm/Instructions.h>
+
+using namespace llvm;
+
+Storage::Storage(llvm::BasicBlock *block, llvm::Value *input)
+ : m_block(block),
+ m_INPUT(input),
+ m_addrs(32),
+ m_idx(0)
+{
+ m_floatVecType = VectorType::get(Type::FloatTy, 4);
+ m_intVecType = VectorType::get(IntegerType::get(32), 4);
+
+ m_undefFloatVec = UndefValue::get(m_floatVecType);
+ m_undefIntVec = UndefValue::get(m_intVecType);
+ m_extSwizzleVec = 0;
+
+ m_numConsts = 0;
+}
+
+//can only build vectors with all members in the [0, 9] range
+llvm::Constant *Storage::shuffleMask(int vec)
+{
+ if (!m_extSwizzleVec) {
+ std::vector<Constant*> elems;
+ elems.push_back(ConstantFP::get(APFloat(0.f)));
+ elems.push_back(ConstantFP::get(APFloat(1.f)));
+ elems.push_back(ConstantFP::get(APFloat(0.f)));
+ elems.push_back(ConstantFP::get(APFloat(1.f)));
+ m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems);
+ }
+
+ if (m_intVecs.find(vec) != m_intVecs.end()) {
+ return m_intVecs[vec];
+ }
+ int origVec = vec;
+ Constant* const_vec = 0;
+ if (origVec == 0) {
+ const_vec = Constant::getNullValue(m_intVecType);
+ } else {
+ int x = gallivm_x_swizzle(vec);
+ int y = gallivm_y_swizzle(vec);
+ int z = gallivm_z_swizzle(vec);
+ int w = gallivm_w_swizzle(vec);
+ std::vector<Constant*> elems;
+ elems.push_back(constantInt(x));
+ elems.push_back(constantInt(y));
+ elems.push_back(constantInt(z));
+ elems.push_back(constantInt(w));
+ const_vec = ConstantVector::get(m_intVecType, elems);
+ }
+
+ m_intVecs[origVec] = const_vec;
+ return const_vec;
+}
+
+llvm::ConstantInt *Storage::constantInt(int idx)
+{
+ if (m_constInts.find(idx) != m_constInts.end()) {
+ return m_constInts[idx];
+ }
+ ConstantInt *const_int = ConstantInt::get(APInt(32, idx));
+ m_constInts[idx] = const_int;
+ return const_int;
+}
+
+llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx)
+{
+ Value *val = element(InputsArg, idx, indIdx);
+ LoadInst *load = new LoadInst(val, name("input"), false, m_block);
+ load->setAlignment(8);
+
+ return load;
+}
+
+llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx)
+{
+ m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts;
+
+ Value *elem = element(ConstsArg, idx, indIdx);
+ LoadInst *load = new LoadInst(elem, name("const"), false, m_block);
+ load->setAlignment(8);
+ return load;
+}
+
+llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle)
+{
+ Constant *mask = shuffleMask(shuffle);
+ ShuffleVectorInst *res =
+ new ShuffleVectorInst(vec, m_extSwizzleVec, mask,
+ name("shuffle"), m_block);
+ return res;
+}
+
+
+llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx)
+{
+ Value *elem = element(TempsArg, idx, indIdx);
+
+ LoadInst *load = new LoadInst(elem, name("temp"), false, m_block);
+ load->setAlignment(8);
+
+ return load;
+}
+
+void Storage::setTempElement(int idx, llvm::Value *val, int mask)
+{
+ if (mask != TGSI_WRITEMASK_XYZW) {
+ llvm::Value *templ = 0;
+ if (m_tempWriteMap[idx])
+ templ = tempElement(idx);
+ val = maskWrite(val, mask, templ);
+ }
+ Value *elem = element(TempsArg, idx);
+ StoreInst *st = new StoreInst(val, elem, false, m_block);
+ st->setAlignment(8);
+ m_tempWriteMap[idx] = true;
+}
+
+void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask)
+{
+ if (mask != TGSI_WRITEMASK_XYZW) {
+ llvm::Value *templ = 0;
+ if (m_destWriteMap[dstIdx])
+ templ = outputElement(dstIdx);
+ val = maskWrite(val, mask, templ);
+ }
+
+ Value *elem = element(DestsArg, dstIdx);
+ StoreInst *st = new StoreInst(val, elem, false, m_block);
+ st->setAlignment(8);
+ m_destWriteMap[dstIdx] = true;
+}
+
+llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ)
+{
+ llvm::Value *dst = templ;
+ if (!dst)
+ dst = Constant::getNullValue(m_floatVecType);
+ if ((mask & TGSI_WRITEMASK_X)) {
+ llvm::Value *x = new ExtractElementInst(src, unsigned(0),
+ name("x"), m_block);
+ dst = InsertElementInst::Create(dst, x, unsigned(0),
+ name("dstx"), m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_Y)) {
+ llvm::Value *y = new ExtractElementInst(src, unsigned(1),
+ name("y"), m_block);
+ dst = InsertElementInst::Create(dst, y, unsigned(1),
+ name("dsty"), m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_Z)) {
+ llvm::Value *z = new ExtractElementInst(src, unsigned(2),
+ name("z"), m_block);
+ dst = InsertElementInst::Create(dst, z, unsigned(2),
+ name("dstz"), m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_W)) {
+ llvm::Value *w = new ExtractElementInst(src, unsigned(3),
+ name("w"), m_block);
+ dst = InsertElementInst::Create(dst, w, unsigned(3),
+ name("dstw"), m_block);
+ }
+ return dst;
+}
+
+const char * Storage::name(const char *prefix)
+{
+ ++m_idx;
+ snprintf(m_name, 32, "%s%d", prefix, m_idx);
+ return m_name;
+}
+
+int Storage::numConsts() const
+{
+ return m_numConsts;
+}
+
+llvm::Value * Storage::addrElement(int idx) const
+{
+ Value *ret = m_addrs[idx];
+ if (!ret)
+ return m_undefFloatVec;
+ return ret;
+}
+
+void Storage::setAddrElement(int idx, llvm::Value *val, int mask)
+{
+ if (mask != TGSI_WRITEMASK_XYZW) {
+ llvm::Value *templ = m_addrs[idx];
+ val = maskWrite(val, mask, templ);
+ }
+ m_addrs[idx] = val;
+}
+
+llvm::Value * Storage::extractIndex(llvm::Value *vec)
+{
+ llvm::Value *x = new ExtractElementInst(vec, unsigned(0),
+ name("x"), m_block);
+ return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block);
+}
+
+void Storage::setCurrentBlock(llvm::BasicBlock *block)
+{
+ m_block = block;
+}
+
+llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx)
+{
+ Value *elem = element(DestsArg, idx, indIdx);
+ LoadInst *load = new LoadInst(elem, name("output"), false, m_block);
+ load->setAlignment(8);
+
+ return load;
+}
+
+llvm::Value * Storage::inputPtr() const
+{
+ return m_INPUT;
+}
+
+void Storage::pushArguments(llvm::Value *input)
+{
+ m_argStack.push(m_INPUT);
+
+ m_INPUT = input;
+}
+
+void Storage::popArguments()
+{
+ m_INPUT = m_argStack.top();
+ m_argStack.pop();
+}
+
+void Storage::pushTemps()
+{
+ m_extSwizzleVec = 0;
+}
+
+void Storage::popTemps()
+{
+}
+
+llvm::Value * Storage::immediateElement(int idx)
+{
+ return m_immediates[idx];
+}
+
+void Storage::addImmediate(float *val)
+{
+ std::vector<Constant*> vec(4);
+ vec[0] = ConstantFP::get(APFloat(val[0]));
+ vec[1] = ConstantFP::get(APFloat(val[1]));
+ vec[2] = ConstantFP::get(APFloat(val[2]));
+ vec[3] = ConstantFP::get(APFloat(val[3]));
+ m_immediates.push_back(ConstantVector::get(m_floatVecType, vec));
+}
+
+
+llvm::Value * Storage::elemPtr(Args arg)
+{
+ std::vector<Value*> indices;
+ indices.push_back(constantInt(0));
+ indices.push_back(constantInt(static_cast<int>(arg)));
+ GetElementPtrInst *getElem = GetElementPtrInst::Create(m_INPUT,
+ indices.begin(),
+ indices.end(),
+ name("input_ptr"),
+ m_block);
+ return new LoadInst(getElem, name("input_field"), false, m_block);
+}
+
+llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
+ llvm::Value *indIdx )
+{
+ GetElementPtrInst *getElem = 0;
+
+ if (indIdx) {
+ getElem = GetElementPtrInst::Create(ptr,
+ BinaryOperator::Create(Instruction::Add,
+ indIdx,
+ constantInt(idx),
+ name("add"),
+ m_block),
+ name("field"),
+ m_block);
+ } else {
+ getElem = GetElementPtrInst::Create(ptr,
+ constantInt(idx),
+ name("field"),
+ m_block);
+ }
+ return getElem;
+}
+
+llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx )
+{
+ Value *val = elemPtr(arg);
+ return elemIdx(val, idx, indIdx);
+}
+
+void Storage::setKilElement(llvm::Value *val)
+{
+ std::vector<Value*> indices;
+ indices.push_back(constantInt(0));
+ indices.push_back(constantInt(static_cast<int>(KilArg)));
+ GetElementPtrInst *elem = GetElementPtrInst::Create(m_INPUT,
+ indices.begin(),
+ indices.end(),
+ name("kil_ptr"),
+ m_block);
+ StoreInst *st = new StoreInst(val, elem, false, m_block);
+ st->setAlignment(8);
+}
+
+#endif //MESA_LLVM
+
+
diff --git a/src/gallium/auxiliary/gallivm/storage.h b/src/gallium/auxiliary/gallivm/storage.h
new file mode 100644
index 00000000000..8574f7554e3
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/storage.h
@@ -0,0 +1,133 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin [email protected]
+ */
+
+#ifndef STORAGE_H
+#define STORAGE_H
+
+#include <map>
+#include <set>
+#include <stack>
+#include <vector>
+
+namespace llvm {
+ class BasicBlock;
+ class Constant;
+ class ConstantInt;
+ class LoadInst;
+ class Value;
+ class VectorType;
+}
+
+class Storage
+{
+public:
+ Storage(llvm::BasicBlock *block,
+ llvm::Value *input);
+
+ llvm::Value *inputPtr() const;
+
+ void setCurrentBlock(llvm::BasicBlock *block);
+
+ llvm::ConstantInt *constantInt(int);
+ llvm::Constant *shuffleMask(int vec);
+ llvm::Value *inputElement(int idx, llvm::Value *indIdx =0);
+ llvm::Value *constElement(int idx, llvm::Value *indIdx =0);
+ llvm::Value *outputElement(int idx, llvm::Value *indIdx =0);
+ llvm::Value *tempElement(int idx, llvm::Value *indIdx =0);
+ llvm::Value *immediateElement(int idx);
+
+ void setOutputElement(int dstIdx, llvm::Value *val, int mask);
+ void setTempElement(int idx, llvm::Value *val, int mask);
+
+ llvm::Value *addrElement(int idx) const;
+ void setAddrElement(int idx, llvm::Value *val, int mask);
+
+ void setKilElement(llvm::Value *val);
+
+ llvm::Value *shuffleVector(llvm::Value *vec, int shuffle);
+
+ llvm::Value *extractIndex(llvm::Value *vec);
+
+ int numConsts() const;
+
+ void pushArguments(llvm::Value *input);
+ void popArguments();
+ void pushTemps();
+ void popTemps();
+
+ void addImmediate(float *val);
+
+private:
+ llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ);
+ const char *name(const char *prefix);
+
+ enum Args {
+ DestsArg = 0,
+ InputsArg = 1,
+ TempsArg = 2,
+ ConstsArg = 3,
+ KilArg = 4
+ };
+ llvm::Value *elemPtr(Args arg);
+ llvm::Value *elemIdx(llvm::Value *ptr, int idx,
+ llvm::Value *indIdx = 0);
+ llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0);
+
+private:
+ llvm::BasicBlock *m_block;
+ llvm::Value *m_INPUT;
+
+ std::map<int, llvm::ConstantInt*> m_constInts;
+ std::map<int, llvm::Constant*> m_intVecs;
+ std::vector<llvm::Value*> m_addrs;
+ std::vector<llvm::Constant*> m_immediates;
+
+ llvm::VectorType *m_floatVecType;
+ llvm::VectorType *m_intVecType;
+
+ char m_name[32];
+ int m_idx;
+
+ int m_numConsts;
+
+ std::map<int, bool > m_destWriteMap;
+ std::map<int, bool > m_tempWriteMap;
+
+ llvm::Value *m_undefFloatVec;
+ llvm::Value *m_undefIntVec;
+ llvm::Value *m_extSwizzleVec;
+
+ std::stack<llvm::Value*> m_argStack;
+ std::stack<std::vector<llvm::Value*> > m_tempStack;
+};
+
+#endif
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp
new file mode 100644
index 00000000000..e1e5cabcf55
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp
@@ -0,0 +1,438 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "storagesoa.h"
+
+#include "gallivm_p.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_debug.h"
+
+#include <llvm/BasicBlock.h>
+#include <llvm/Module.h>
+#include <llvm/Value.h>
+
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/InstrTypes.h>
+#include <llvm/Instructions.h>
+
+using namespace llvm;
+
+
+StorageSoa::StorageSoa(llvm::BasicBlock *block,
+ llvm::Value *input,
+ llvm::Value *output,
+ llvm::Value *consts)
+ : m_block(block),
+ m_input(input),
+ m_output(output),
+ m_consts(consts),
+ m_immediates(0),
+ m_idx(0)
+{
+}
+
+void StorageSoa::addImmediate(float *vec)
+{
+ std::vector<float> vals(4);
+ vals[0] = vec[0];
+ vals[1] = vec[1];
+ vals[2] = vec[2];
+ vals[3] = vec[3];
+ m_immediatesToFlush.push_back(vals);
+}
+
+void StorageSoa::declareImmediates()
+{
+ if (m_immediatesToFlush.empty())
+ return;
+
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vectorChannels = ArrayType::get(vectorType, 4);
+ ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size());
+
+ m_immediates = new GlobalVariable(
+ /*Type=*/arrayType,
+ /*isConstant=*/false,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Initializer=*/0, // has initializer, specified below
+ /*Name=*/name("immediates"),
+ currentModule());
+
+ std::vector<Constant*> arrayVals;
+ for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) {
+ std::vector<float> vec = m_immediatesToFlush[i];
+ std::vector<float> vals(4);
+ std::vector<Constant*> channelArray;
+
+ vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3];
+ llvm::Constant *xChannel = createConstGlobalVector(vals);
+
+ vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1];
+ llvm::Constant *yChannel = createConstGlobalVector(vals);
+
+ vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2];
+ llvm::Constant *zChannel = createConstGlobalVector(vals);
+
+ vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3];
+ llvm::Constant *wChannel = createConstGlobalVector(vals);
+ channelArray.push_back(xChannel);
+ channelArray.push_back(yChannel);
+ channelArray.push_back(zChannel);
+ channelArray.push_back(wChannel);
+ Constant *constChannels = ConstantArray::get(vectorChannels,
+ channelArray);
+ arrayVals.push_back(constChannels);
+ }
+ Constant *constArray = ConstantArray::get(arrayType, arrayVals);
+ m_immediates->setInitializer(constArray);
+
+ m_immediatesToFlush.clear();
+}
+
+llvm::Value *StorageSoa::addrElement(int idx) const
+{
+ std::map<int, llvm::Value*>::const_iterator itr = m_addresses.find(idx);
+ if (itr == m_addresses.end()) {
+ debug_printf("Trying to access invalid shader 'address'\n");
+ return 0;
+ }
+ llvm::Value * res = (*itr).second;
+
+ res = new LoadInst(res, name("addr"), false, m_block);
+
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = element(m_input, idx, 0);
+ res[1] = element(m_input, idx, 1);
+ res[2] = element(m_input, idx, 2);
+ res[3] = element(m_input, idx, 3);
+
+ return res;
+}
+
+llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc)
+{
+ std::vector<llvm::Value*> x(4);
+ x[0] = m_builder->CreateExtractElement(vector,
+ constantInt(cc),
+ name("x"));
+
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ Constant *constVector = Constant::getNullValue(vectorType);
+ Value *res = m_builder->CreateInsertElement(constVector, x[0],
+ constantInt(0),
+ name("vecx"));
+ res = m_builder->CreateInsertElement(res, x[0], constantInt(1),
+ name("vecxx"));
+ res = m_builder->CreateInsertElement(res, x[0], constantInt(2),
+ name("vecxxx"));
+ res = m_builder->CreateInsertElement(res, x[0], constantInt(3),
+ name("vecxxxx"));
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx)
+{
+ llvm::Value* res;
+ std::vector<llvm::Value*> res2(4);
+ llvm::Value *xChannel;
+
+ xChannel = elementPointer(m_consts, idx, 0);
+
+ res = alignedArrayLoad(xChannel);
+
+ res2[0]=unpackConstElement(m_builder, res,0);
+ res2[1]=unpackConstElement(m_builder, res,1);
+ res2[2]=unpackConstElement(m_builder, res,2);
+ res2[3]=unpackConstElement(m_builder, res,3);
+
+ return res2;
+}
+
+std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = element(m_output, idx, 0);
+ res[1] = element(m_output, idx, 1);
+ res[2] = element(m_output, idx, 2);
+ res[3] = element(m_output, idx, 3);
+
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx)
+{
+ std::vector<llvm::Value*> res(4);
+ llvm::Value *temp = m_temps[idx];
+
+ res[0] = element(temp, constantInt(0), 0);
+ res[1] = element(temp, constantInt(0), 1);
+ res[2] = element(temp, constantInt(0), 2);
+ res[3] = element(temp, constantInt(0), 3);
+
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::immediateElement(llvm::Value *idx)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = element(m_immediates, idx, 0);
+ res[1] = element(m_immediates, idx, 1);
+ res[2] = element(m_immediates, idx, 2);
+ res[3] = element(m_immediates, idx, 3);
+
+ return res;
+}
+
+llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index,
+ int channel) const
+{
+ std::vector<Value*> indices;
+ if (m_immediates == ptr)
+ indices.push_back(constantInt(0));
+ indices.push_back(index);
+ indices.push_back(constantInt(channel));
+
+ GetElementPtrInst *getElem = GetElementPtrInst::Create(ptr,
+ indices.begin(),
+ indices.end(),
+ name("ptr"),
+ m_block);
+ return getElem;
+}
+
+llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index,
+ int channel) const
+{
+ llvm::Value *res = elementPointer(ptr, index, channel);
+ LoadInst *load = new LoadInst(res, name("element"), false, m_block);
+ //load->setAlignment(8);
+ return load;
+}
+
+const char * StorageSoa::name(const char *prefix) const
+{
+ ++m_idx;
+ snprintf(m_name, 32, "%s%d", prefix, m_idx);
+ return m_name;
+}
+
+llvm::ConstantInt * StorageSoa::constantInt(int idx) const
+{
+ if (m_constInts.find(idx) != m_constInts.end()) {
+ return m_constInts[idx];
+ }
+ ConstantInt *constInt = ConstantInt::get(APInt(32, idx));
+ m_constInts[idx] = constInt;
+ return constInt;
+}
+
+llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val)
+{
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ PointerType *vectorPtr = PointerType::get(vectorType, 0);
+
+ CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block);
+ LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block);
+ load->setAlignment(8);
+ return load;
+}
+
+llvm::Module * StorageSoa::currentModule() const
+{
+ if (!m_block || !m_block->getParent())
+ return 0;
+
+ return m_block->getParent()->getParent();
+}
+
+llvm::Constant * StorageSoa::createConstGlobalFloat(const float val)
+{
+ Constant*c = ConstantFP::get(APFloat(val));
+ return c;
+}
+
+llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec)
+{
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ std::vector<Constant*> immValues;
+ ConstantFP *constx = ConstantFP::get(APFloat(vec[0]));
+ ConstantFP *consty = ConstantFP::get(APFloat(vec[1]));
+ ConstantFP *constz = ConstantFP::get(APFloat(vec[2]));
+ ConstantFP *constw = ConstantFP::get(APFloat(vec[3]));
+ immValues.push_back(constx);
+ immValues.push_back(consty);
+ immValues.push_back(constz);
+ immValues.push_back(constw);
+ Constant *constVector = ConstantVector::get(vectorType, immValues);
+
+ return constVector;
+}
+
+std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle,
+ llvm::IRBuilder<>* m_builder,llvm::Value *indIdx)
+{
+ std::vector<llvm::Value*> val(4);
+
+ //if we have an indirect index, always use that
+ // if not use the integer offset to create one
+ llvm::Value *realIndex = 0;
+ if (indIdx)
+ realIndex = indIdx;
+ else
+ realIndex = constantInt(idx);
+ debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx);
+
+ switch(type) {
+ case TGSI_FILE_INPUT:
+ val = inputElement(realIndex);
+ break;
+ case TGSI_FILE_OUTPUT:
+ val = outputElement(realIndex);
+ break;
+ case TGSI_FILE_TEMPORARY:
+ val = tempElement(m_builder, idx);
+ break;
+ case TGSI_FILE_CONSTANT:
+ val = constElement(m_builder, realIndex);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ val = immediateElement(realIndex);
+ break;
+ case TGSI_FILE_ADDRESS:
+ debug_printf("Address not handled in the load phase!\n");
+ assert(0);
+ break;
+ default:
+ debug_printf("Unknown load!\n");
+ assert(0);
+ break;
+ }
+ if (!gallivm_is_swizzle(swizzle))
+ return val;
+
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = val[gallivm_x_swizzle(swizzle)];
+ res[1] = val[gallivm_y_swizzle(swizzle)];
+ res[2] = val[gallivm_z_swizzle(swizzle)];
+ res[3] = val[gallivm_w_swizzle(swizzle)];
+ return res;
+}
+
+llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder)
+{
+ VectorType *vector = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vecArray = ArrayType::get(vector, 4);
+ AllocaInst *alloca = new AllocaInst(vecArray, "temp",
+ m_builder->GetInsertBlock());
+
+ return alloca;
+}
+
+
+void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val,
+ int mask, llvm::IRBuilder<>* m_builder)
+{
+ llvm::Value *out = 0;
+ llvm::Value *realIndex = 0;
+ switch(type) {
+ case TGSI_FILE_OUTPUT:
+ out = m_output;
+ realIndex = constantInt(idx);
+ break;
+ case TGSI_FILE_TEMPORARY:
+ // if that temp doesn't already exist, alloca it
+ if (m_temps.find(idx) == m_temps.end())
+ m_temps[idx] = allocaTemp(m_builder);
+
+ out = m_temps[idx];
+
+ realIndex = constantInt(0);
+ break;
+ case TGSI_FILE_INPUT:
+ out = m_input;
+ realIndex = constantInt(idx);
+ break;
+ case TGSI_FILE_ADDRESS: {
+ llvm::Value *addr = m_addresses[idx];
+ if (!addr) {
+ addAddress(idx);
+ addr = m_addresses[idx];
+ assert(addr);
+ }
+ new StoreInst(val[0], addr, false, m_block);
+ return;
+ break;
+ }
+ default:
+ debug_printf("Can't save output of this type: %d !\n", type);
+ assert(0);
+ break;
+ }
+ if ((mask & TGSI_WRITEMASK_X)) {
+ llvm::Value *xChannel = elementPointer(out, realIndex, 0);
+ new StoreInst(val[0], xChannel, false, m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_Y)) {
+ llvm::Value *yChannel = elementPointer(out, realIndex, 1);
+ new StoreInst(val[1], yChannel, false, m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_Z)) {
+ llvm::Value *zChannel = elementPointer(out, realIndex, 2);
+ new StoreInst(val[2], zChannel, false, m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_W)) {
+ llvm::Value *wChannel = elementPointer(out, realIndex, 3);
+ new StoreInst(val[3], wChannel, false, m_block);
+ }
+}
+
+void StorageSoa::addAddress(int idx)
+{
+ GlobalVariable *val = new GlobalVariable(
+ /*Type=*/IntegerType::get(32),
+ /*isConstant=*/false,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Initializer=*/0, // has initializer, specified below
+ /*Name=*/name("address"),
+ currentModule());
+ val->setInitializer(Constant::getNullValue(IntegerType::get(32)));
+
+ debug_printf("adding to %d\n", idx);
+ m_addresses[idx] = val;
+}
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h
new file mode 100644
index 00000000000..56886f85e7a
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/storagesoa.h
@@ -0,0 +1,107 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef STORAGESOA_H
+#define STORAGESOA_H
+
+#include <pipe/p_shader_tokens.h>
+#include <llvm/Support/IRBuilder.h>
+
+#include <vector>
+#include <list>
+#include <map>
+
+namespace llvm {
+ class BasicBlock;
+ class Constant;
+ class ConstantInt;
+ class GlobalVariable;
+ class LoadInst;
+ class Value;
+ class VectorType;
+ class Module;
+}
+
+class StorageSoa
+{
+public:
+ StorageSoa(llvm::BasicBlock *block,
+ llvm::Value *input,
+ llvm::Value *output,
+ llvm::Value *consts);
+
+
+ std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle,
+ llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0);
+ void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val,
+ int mask, llvm::IRBuilder<>* m_builder);
+
+ void addImmediate(float *vec);
+ void declareImmediates();
+
+ void addAddress(int idx);
+
+ llvm::Value * addrElement(int idx) const;
+
+ llvm::ConstantInt *constantInt(int) const;
+private:
+ llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx,
+ int channel) const;
+ llvm::Value *element(llvm::Value *ptr, llvm::Value *idx,
+ int channel) const;
+ const char *name(const char *prefix) const;
+ llvm::Value *alignedArrayLoad(llvm::Value *val);
+ llvm::Module *currentModule() const;
+ llvm::Constant *createConstGlobalFloat(const float val);
+ llvm::Constant *createConstGlobalVector(const std::vector<float> &vec);
+
+ std::vector<llvm::Value*> inputElement(llvm::Value *indIdx);
+ llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc);
+ std::vector<llvm::Value*> constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx);
+ std::vector<llvm::Value*> outputElement(llvm::Value *indIdx);
+ std::vector<llvm::Value*> tempElement(llvm::IRBuilder<>* m_builder, int idx);
+ std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx);
+private:
+ llvm::BasicBlock *m_block;
+
+ llvm::Value *m_input;
+ llvm::Value *m_output;
+ llvm::Value *m_consts;
+ std::map<int, llvm::Value*> m_temps;
+ llvm::GlobalVariable *m_immediates;
+
+ std::map<int, llvm::Value*> m_addresses;
+
+ std::vector<std::vector<float> > m_immediatesToFlush;
+ llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder);
+
+ mutable std::map<int, llvm::ConstantInt*> m_constInts;
+ mutable char m_name[32];
+ mutable int m_idx;
+};
+
+#endif
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
new file mode 100644
index 00000000000..5b08200d142
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -0,0 +1,1164 @@
+#include "tgsitollvm.h"
+
+#include "gallivm.h"
+#include "gallivm_p.h"
+
+#include "storage.h"
+#include "instructions.h"
+#include "storagesoa.h"
+#include "instructionssoa.h"
+
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_dump.h"
+
+
+#include <llvm/Module.h>
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/ModuleProvider.h>
+#include <llvm/Pass.h>
+#include <llvm/PassManager.h>
+#include <llvm/Attributes.h>
+#include <llvm/Support/PatternMatch.h>
+#include <llvm/ExecutionEngine/JIT.h>
+#include <llvm/ExecutionEngine/Interpreter.h>
+#include <llvm/ExecutionEngine/GenericValue.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/LinkAllPasses.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Analysis/LoopPass.h>
+#include <llvm/Target/TargetData.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+using namespace llvm;
+
+static inline FunctionType *vertexShaderFunctionType()
+{
+ //Function takes three arguments,
+ // the calling code has to make sure the types it will
+ // pass are castable to the following:
+ // [4 x <4 x float>] inputs,
+ // [4 x <4 x float>] output,
+ // [4 x [1 x float]] consts,
+
+ std::vector<const Type*> funcArgs;
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vectorArray = ArrayType::get(vectorType, 4);
+ PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0);
+
+ ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4);
+ ArrayType *constsArray = ArrayType::get(floatArray, 1);
+ PointerType *constsArrayPtr = PointerType::get(constsArray, 0);
+
+ funcArgs.push_back(vectorArrayPtr);//inputs
+ funcArgs.push_back(vectorArrayPtr);//output
+ funcArgs.push_back(constsArrayPtr);//consts
+
+ FunctionType *functionType = FunctionType::get(
+ /*Result=*/Type::VoidTy,
+ /*Params=*/funcArgs,
+ /*isVarArg=*/false);
+
+ return functionType;
+}
+
+static inline void
+add_interpolator(struct gallivm_ir *ir,
+ struct gallivm_interpolate *interp)
+{
+ ir->interpolators[ir->num_interp] = *interp;
+ ++ir->num_interp;
+}
+
+static void
+translate_declaration(struct gallivm_ir *prog,
+ llvm::Module *module,
+ Storage *storage,
+ struct tgsi_full_declaration *decl,
+ struct tgsi_full_declaration *fd)
+{
+ if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ unsigned first, last, mask;
+ uint interp_method;
+
+ first = decl->DeclarationRange.First;
+ last = decl->DeclarationRange.Last;
+ mask = decl->Declaration.UsageMask;
+
+ /* Do not touch WPOS.xy */
+ if (first == 0) {
+ mask &= ~TGSI_WRITEMASK_XY;
+ if (mask == TGSI_WRITEMASK_NONE) {
+ first++;
+ if (first > last) {
+ return;
+ }
+ }
+ }
+
+ interp_method = decl->Declaration.Interpolate;
+
+ if (mask == TGSI_WRITEMASK_XYZW) {
+ unsigned i, j;
+
+ for (i = first; i <= last; i++) {
+ for (j = 0; j < NUM_CHANNELS; j++) {
+ //interp( mach, i, j );
+ struct gallivm_interpolate interp;
+ interp.type = interp_method;
+ interp.attrib = i;
+ interp.chan = j;
+ add_interpolator(prog, &interp);
+ }
+ }
+ } else {
+ unsigned i, j;
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ if( mask & (1 << j) ) {
+ for( i = first; i <= last; i++ ) {
+ struct gallivm_interpolate interp;
+ interp.type = interp_method;
+ interp.attrib = i;
+ interp.chan = j;
+ add_interpolator(prog, &interp);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void
+translate_declarationir(struct gallivm_ir *,
+ llvm::Module *,
+ StorageSoa *storage,
+ struct tgsi_full_declaration *decl,
+ struct tgsi_full_declaration *)
+{
+ if (decl->Declaration.File == TGSI_FILE_ADDRESS) {
+ int idx = decl->DeclarationRange.First;
+ storage->addAddress(idx);
+ }
+}
+
+static void
+translate_immediate(Storage *storage,
+ struct tgsi_full_immediate *imm)
+{
+ float vec[4];
+ int i;
+ for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
+ switch (imm->Immediate.DataType) {
+ case TGSI_IMM_FLOAT32:
+ vec[i] = imm->u.ImmediateFloat32[i].Float;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ storage->addImmediate(vec);
+}
+
+
+static void
+translate_immediateir(StorageSoa *storage,
+ struct tgsi_full_immediate *imm)
+{
+ float vec[4];
+ int i;
+ for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
+ switch (imm->Immediate.DataType) {
+ case TGSI_IMM_FLOAT32:
+ vec[i] = imm->u.ImmediateFloat32[i].Float;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ storage->addImmediate(vec);
+}
+
+static inline int
+swizzleInt(struct tgsi_full_src_register *src)
+{
+ int swizzle = 0;
+ int start = 1000;
+
+ for (int k = 0; k < 4; ++k) {
+ swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start;
+ start /= 10;
+ }
+ return swizzle;
+}
+
+static inline llvm::Value *
+swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src,
+ Storage *storage)
+{
+ int swizzle = swizzleInt(src);
+
+ if (gallivm_is_swizzle(swizzle)) {
+ /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/
+ val = storage->shuffleVector(val, swizzle);
+ }
+ return val;
+}
+
+static void
+translate_instruction(llvm::Module *module,
+ Storage *storage,
+ Instructions *instr,
+ struct tgsi_full_instruction *inst,
+ struct tgsi_full_instruction *fi,
+ unsigned instno)
+{
+ llvm::Value *inputs[4];
+ inputs[0] = 0;
+ inputs[1] = 0;
+ inputs[2] = 0;
+ inputs[3] = 0;
+
+ for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
+ struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+ llvm::Value *val = 0;
+ llvm::Value *indIdx = 0;
+
+ if (src->SrcRegister.Indirect) {
+ indIdx = storage->addrElement(src->SrcRegisterInd.Index);
+ indIdx = storage->extractIndex(indIdx);
+ }
+ if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
+ val = storage->constElement(src->SrcRegister.Index, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
+ val = storage->inputElement(src->SrcRegister.Index, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ val = storage->tempElement(src->SrcRegister.Index);
+ } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
+ val = storage->outputElement(src->SrcRegister.Index, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+ val = storage->immediateElement(src->SrcRegister.Index);
+ } else {
+ fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
+ return;
+ }
+
+ inputs[i] = swizzleVector(val, src, storage);
+ }
+
+ /*if (inputs[0])
+ instr->printVector(inputs[0]);
+ if (inputs[1])
+ instr->printVector(inputs[1]);*/
+ llvm::Value *out = 0;
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL: {
+ out = instr->arl(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_MOV: {
+ out = inputs[0];
+ }
+ break;
+ case TGSI_OPCODE_LIT: {
+ out = instr->lit(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_RCP: {
+ out = instr->rcp(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_RSQ: {
+ out = instr->rsq(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_EXP: {
+ out = instr->exp(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_LOG: {
+ out = instr->log(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_MUL: {
+ out = instr->mul(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_ADD: {
+ out = instr->add(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DP3: {
+ out = instr->dp3(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DP4: {
+ out = instr->dp4(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DST: {
+ out = instr->dst(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_MIN: {
+ out = instr->min(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_MAX: {
+ out = instr->max(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SLT: {
+ out = instr->slt(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SGE: {
+ out = instr->sge(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_MAD: {
+ out = instr->madd(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_SUB: {
+ out = instr->sub(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_LERP: {
+ out = instr->lerp(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_CND: {
+ out = instr->cnd(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_CND0: {
+ out = instr->cnd0(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_DOT2ADD: {
+ out = instr->dot2add(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_INDEX:
+ break;
+ case TGSI_OPCODE_NEGATE: {
+ out = instr->neg(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_FRAC: {
+ out = instr->frc(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_CLAMP: {
+ out = instr->clamp(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_FLOOR: {
+ out = instr->floor(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_ROUND:
+ break;
+ case TGSI_OPCODE_EXPBASE2: {
+ out = instr->ex2(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_LOGBASE2: {
+ out = instr->lg2(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_POWER: {
+ out = instr->pow(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_CROSSPRODUCT: {
+ out = instr->cross(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_MULTIPLYMATRIX:
+ break;
+ case TGSI_OPCODE_ABS: {
+ out = instr->abs(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_RCC:
+ break;
+ case TGSI_OPCODE_DPH: {
+ out = instr->dph(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_COS: {
+ out = instr->cos(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_DDX: {
+ out = instr->ddx(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_DDY: {
+ out = instr->ddy(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_KILP:
+ break;
+ case TGSI_OPCODE_PK2H:
+ break;
+ case TGSI_OPCODE_PK2US:
+ break;
+ case TGSI_OPCODE_PK4B:
+ break;
+ case TGSI_OPCODE_PK4UB:
+ break;
+ case TGSI_OPCODE_RFL:
+ break;
+ case TGSI_OPCODE_SEQ: {
+ out = instr->seq(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SFL: {
+ out = instr->sfl(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SGT: {
+ out = instr->sgt(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SIN: {
+ out = instr->sin(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_SLE: {
+ out = instr->sle(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SNE: {
+ out = instr->sne(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_STR: {
+ out = instr->str(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_TEX:
+ break;
+ case TGSI_OPCODE_TXD:
+ break;
+ case TGSI_OPCODE_UP2H:
+ break;
+ case TGSI_OPCODE_UP2US:
+ break;
+ case TGSI_OPCODE_UP4B:
+ break;
+ case TGSI_OPCODE_UP4UB:
+ break;
+ case TGSI_OPCODE_X2D: {
+ out = instr->x2d(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_ARA:
+ break;
+ case TGSI_OPCODE_ARR:
+ break;
+ case TGSI_OPCODE_BRA:
+ break;
+ case TGSI_OPCODE_CAL: {
+ instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr());
+ return;
+ }
+ break;
+ case TGSI_OPCODE_RET: {
+ instr->end();
+ return;
+ }
+ break;
+ case TGSI_OPCODE_SSG:
+ break;
+ case TGSI_OPCODE_CMP: {
+ out = instr->cmp(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_SCS: {
+ out = instr->scs(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_TXB:
+ break;
+ case TGSI_OPCODE_NRM4:
+ case TGSI_OPCODE_NRM: {
+ out = instr->nrm(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_DIV: {
+ out = instr->div(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DP2: {
+ out = instr->dp2(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_TXL:
+ break;
+ case TGSI_OPCODE_BRK: {
+ instr->brk();
+ return;
+ }
+ break;
+ case TGSI_OPCODE_IF: {
+ instr->ifop(inputs[0]);
+ storage->setCurrentBlock(instr->currentBlock());
+ return; //just update the state
+ }
+ break;
+ case TGSI_OPCODE_LOOP:
+ break;
+ case TGSI_OPCODE_REP:
+ break;
+ case TGSI_OPCODE_ELSE: {
+ instr->elseop();
+ storage->setCurrentBlock(instr->currentBlock());
+ return; //only state update
+ }
+ break;
+ case TGSI_OPCODE_ENDIF: {
+ instr->endif();
+ storage->setCurrentBlock(instr->currentBlock());
+ return; //just update the state
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ break;
+ case TGSI_OPCODE_ENDREP:
+ break;
+ case TGSI_OPCODE_PUSHA:
+ break;
+ case TGSI_OPCODE_POPA:
+ break;
+ case TGSI_OPCODE_CEIL:
+ break;
+ case TGSI_OPCODE_I2F:
+ break;
+ case TGSI_OPCODE_NOT:
+ break;
+ case TGSI_OPCODE_TRUNC: {
+ out = instr->trunc(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_SHL:
+ break;
+ case TGSI_OPCODE_SHR:
+ break;
+ case TGSI_OPCODE_AND:
+ break;
+ case TGSI_OPCODE_OR:
+ break;
+ case TGSI_OPCODE_MOD:
+ break;
+ case TGSI_OPCODE_XOR:
+ break;
+ case TGSI_OPCODE_SAD:
+ break;
+ case TGSI_OPCODE_TXF:
+ break;
+ case TGSI_OPCODE_TXQ:
+ break;
+ case TGSI_OPCODE_CONT:
+ break;
+ case TGSI_OPCODE_EMIT:
+ break;
+ case TGSI_OPCODE_ENDPRIM:
+ break;
+ case TGSI_OPCODE_BGNLOOP2: {
+ instr->beginLoop();
+ storage->setCurrentBlock(instr->currentBlock());
+ return;
+ }
+ break;
+ case TGSI_OPCODE_BGNSUB: {
+ instr->bgnSub(instno);
+ storage->setCurrentBlock(instr->currentBlock());
+ storage->pushTemps();
+ return;
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP2: {
+ instr->endLoop();
+ storage->setCurrentBlock(instr->currentBlock());
+ return;
+ }
+ break;
+ case TGSI_OPCODE_ENDSUB: {
+ instr->endSub();
+ storage->setCurrentBlock(instr->currentBlock());
+ storage->popArguments();
+ storage->popTemps();
+ return;
+ }
+ break;
+ case TGSI_OPCODE_NOISE1:
+ break;
+ case TGSI_OPCODE_NOISE2:
+ break;
+ case TGSI_OPCODE_NOISE3:
+ break;
+ case TGSI_OPCODE_NOISE4:
+ break;
+ case TGSI_OPCODE_NOP:
+ break;
+ case TGSI_OPCODE_M4X3:
+ break;
+ case TGSI_OPCODE_M3X4:
+ break;
+ case TGSI_OPCODE_M3X3:
+ break;
+ case TGSI_OPCODE_M3X2:
+ break;
+ case TGSI_OPCODE_CALLNZ:
+ break;
+ case TGSI_OPCODE_IFC:
+ break;
+ case TGSI_OPCODE_BREAKC:
+ break;
+ case TGSI_OPCODE_KIL: {
+ out = instr->kil(inputs[0]);
+ storage->setKilElement(out);
+ return;
+ }
+ break;
+ case TGSI_OPCODE_END:
+ instr->end();
+ return;
+ break;
+ default:
+ fprintf(stderr, "ERROR: Unknown opcode %d\n",
+ inst->Instruction.Opcode);
+ assert(0);
+ break;
+ }
+
+ if (!out) {
+ fprintf(stderr, "ERROR: unsupported opcode %d\n",
+ inst->Instruction.Opcode);
+ assert(!"Unsupported opcode");
+ }
+
+ /* # not sure if we need this */
+ switch( inst->Instruction.Saturate ) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ /*TXT( "_SAT" );*/
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ /*TXT( "_SAT[-1,1]" );*/
+ break;
+ default:
+ assert( 0 );
+ }
+
+ /* store results */
+ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+ struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+
+ if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+ storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
+ storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
+ storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ } else {
+ fprintf(stderr, "ERROR: unsupported LLVM destination!");
+ assert(!"wrong destination");
+ }
+ }
+}
+
+
+static void
+translate_instructionir(llvm::Module *module,
+ StorageSoa *storage,
+ InstructionsSoa *instr,
+ struct tgsi_full_instruction *inst,
+ struct tgsi_full_instruction *fi,
+ unsigned instno)
+{
+ std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs);
+
+ for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
+ struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+ std::vector<llvm::Value*> val;
+ llvm::Value *indIdx = 0;
+ int swizzle = swizzleInt(src);
+
+ if (src->SrcRegister.Indirect) {
+ indIdx = storage->addrElement(src->SrcRegisterInd.Index);
+ }
+ val = storage->load((enum tgsi_file_type)src->SrcRegister.File,
+ src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx);
+
+ inputs[i] = val;
+ }
+
+ std::vector<llvm::Value*> out(4);
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL: {
+ out = instr->arl(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_MOV: {
+ out = inputs[0];
+ }
+ break;
+ case TGSI_OPCODE_LIT: {
+ out = instr->lit(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_RCP: {
+ }
+ break;
+ case TGSI_OPCODE_RSQ: {
+ out = instr->rsq(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_EXP:
+ break;
+ case TGSI_OPCODE_LOG:
+ break;
+ case TGSI_OPCODE_MUL: {
+ out = instr->mul(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_ADD: {
+ out = instr->add(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DP3: {
+ out = instr->dp3(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DP4: {
+ out = instr->dp4(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DST: {
+ }
+ break;
+ case TGSI_OPCODE_MIN: {
+ out = instr->min(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_MAX: {
+ out = instr->max(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SLT: {
+ out = instr->slt(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SGE: {
+ }
+ break;
+ case TGSI_OPCODE_MAD: {
+ out = instr->madd(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_SUB: {
+ out = instr->sub(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_LERP: {
+ }
+ break;
+ case TGSI_OPCODE_CND:
+ break;
+ case TGSI_OPCODE_CND0:
+ break;
+ case TGSI_OPCODE_DOT2ADD:
+ break;
+ case TGSI_OPCODE_INDEX:
+ break;
+ case TGSI_OPCODE_NEGATE:
+ break;
+ case TGSI_OPCODE_FRAC: {
+ }
+ break;
+ case TGSI_OPCODE_CLAMP:
+ break;
+ case TGSI_OPCODE_FLOOR: {
+ }
+ break;
+ case TGSI_OPCODE_ROUND:
+ break;
+ case TGSI_OPCODE_EXPBASE2: {
+ }
+ break;
+ case TGSI_OPCODE_LOGBASE2: {
+ }
+ break;
+ case TGSI_OPCODE_POWER: {
+ out = instr->pow(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_CROSSPRODUCT: {
+ }
+ break;
+ case TGSI_OPCODE_MULTIPLYMATRIX:
+ break;
+ case TGSI_OPCODE_ABS: {
+ out = instr->abs(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_RCC:
+ break;
+ case TGSI_OPCODE_DPH: {
+ }
+ break;
+ case TGSI_OPCODE_COS: {
+ }
+ break;
+ case TGSI_OPCODE_DDX:
+ break;
+ case TGSI_OPCODE_DDY:
+ break;
+ case TGSI_OPCODE_KILP:
+ break;
+ case TGSI_OPCODE_PK2H:
+ break;
+ case TGSI_OPCODE_PK2US:
+ break;
+ case TGSI_OPCODE_PK4B:
+ break;
+ case TGSI_OPCODE_PK4UB:
+ break;
+ case TGSI_OPCODE_RFL:
+ break;
+ case TGSI_OPCODE_SEQ:
+ break;
+ case TGSI_OPCODE_SFL:
+ break;
+ case TGSI_OPCODE_SGT: {
+ }
+ break;
+ case TGSI_OPCODE_SIN: {
+ }
+ break;
+ case TGSI_OPCODE_SLE:
+ break;
+ case TGSI_OPCODE_SNE:
+ break;
+ case TGSI_OPCODE_STR:
+ break;
+ case TGSI_OPCODE_TEX:
+ break;
+ case TGSI_OPCODE_TXD:
+ break;
+ case TGSI_OPCODE_UP2H:
+ break;
+ case TGSI_OPCODE_UP2US:
+ break;
+ case TGSI_OPCODE_UP4B:
+ break;
+ case TGSI_OPCODE_UP4UB:
+ break;
+ case TGSI_OPCODE_X2D:
+ break;
+ case TGSI_OPCODE_ARA:
+ break;
+ case TGSI_OPCODE_ARR:
+ break;
+ case TGSI_OPCODE_BRA:
+ break;
+ case TGSI_OPCODE_CAL: {
+ }
+ break;
+ case TGSI_OPCODE_RET: {
+ }
+ break;
+ case TGSI_OPCODE_SSG:
+ break;
+ case TGSI_OPCODE_CMP: {
+ }
+ break;
+ case TGSI_OPCODE_SCS: {
+ }
+ break;
+ case TGSI_OPCODE_TXB:
+ break;
+ case TGSI_OPCODE_NRM:
+ break;
+ case TGSI_OPCODE_DIV:
+ break;
+ case TGSI_OPCODE_DP2:
+ break;
+ case TGSI_OPCODE_TXL:
+ break;
+ case TGSI_OPCODE_BRK: {
+ }
+ break;
+ case TGSI_OPCODE_IF: {
+ }
+ break;
+ case TGSI_OPCODE_LOOP:
+ break;
+ case TGSI_OPCODE_REP:
+ break;
+ case TGSI_OPCODE_ELSE: {
+ }
+ break;
+ case TGSI_OPCODE_ENDIF: {
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ break;
+ case TGSI_OPCODE_ENDREP:
+ break;
+ case TGSI_OPCODE_PUSHA:
+ break;
+ case TGSI_OPCODE_POPA:
+ break;
+ case TGSI_OPCODE_CEIL:
+ break;
+ case TGSI_OPCODE_I2F:
+ break;
+ case TGSI_OPCODE_NOT:
+ break;
+ case TGSI_OPCODE_TRUNC: {
+ }
+ break;
+ case TGSI_OPCODE_SHL:
+ break;
+ case TGSI_OPCODE_SHR:
+ break;
+ case TGSI_OPCODE_AND:
+ break;
+ case TGSI_OPCODE_OR:
+ break;
+ case TGSI_OPCODE_MOD:
+ break;
+ case TGSI_OPCODE_XOR:
+ break;
+ case TGSI_OPCODE_SAD:
+ break;
+ case TGSI_OPCODE_TXF:
+ break;
+ case TGSI_OPCODE_TXQ:
+ break;
+ case TGSI_OPCODE_CONT:
+ break;
+ case TGSI_OPCODE_EMIT:
+ break;
+ case TGSI_OPCODE_ENDPRIM:
+ break;
+ case TGSI_OPCODE_BGNLOOP2: {
+ }
+ break;
+ case TGSI_OPCODE_BGNSUB: {
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP2: {
+ }
+ break;
+ case TGSI_OPCODE_ENDSUB: {
+ }
+ break;
+ case TGSI_OPCODE_NOISE1:
+ break;
+ case TGSI_OPCODE_NOISE2:
+ break;
+ case TGSI_OPCODE_NOISE3:
+ break;
+ case TGSI_OPCODE_NOISE4:
+ break;
+ case TGSI_OPCODE_NOP:
+ break;
+ case TGSI_OPCODE_M4X3:
+ break;
+ case TGSI_OPCODE_M3X4:
+ break;
+ case TGSI_OPCODE_M3X3:
+ break;
+ case TGSI_OPCODE_M3X2:
+ break;
+ case TGSI_OPCODE_NRM4:
+ break;
+ case TGSI_OPCODE_CALLNZ:
+ break;
+ case TGSI_OPCODE_IFC:
+ break;
+ case TGSI_OPCODE_BREAKC:
+ break;
+ case TGSI_OPCODE_KIL: {
+ }
+ break;
+ case TGSI_OPCODE_END:
+ instr->end();
+ return;
+ break;
+ default:
+ fprintf(stderr, "ERROR: Unknown opcode %d\n",
+ inst->Instruction.Opcode);
+ assert(0);
+ break;
+ }
+
+ if (!out[0]) {
+ fprintf(stderr, "ERROR: unsupported opcode %d\n",
+ inst->Instruction.Opcode);
+ assert(!"Unsupported opcode");
+ }
+
+ /* store results */
+ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+ struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+ storage->store((enum tgsi_file_type)dst->DstRegister.File,
+ dst->DstRegister.Index, out, dst->DstRegister.WriteMask,
+ instr->getIRBuilder() );
+ }
+}
+
+llvm::Module *
+tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens)
+{
+ llvm::Module *mod = new Module("shader");
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction fi;
+ struct tgsi_full_declaration fd;
+ unsigned instno = 0;
+ Function* shader = mod->getFunction("execute_shader");
+ std::ostringstream stream;
+ if (ir->type == GALLIVM_VS) {
+ stream << "vs_shader";
+ } else {
+ stream << "fs_shader";
+ }
+ stream << ir->id;
+ std::string func_name = stream.str();
+ shader->setName(func_name.c_str());
+
+ Function::arg_iterator args = shader->arg_begin();
+ Value *ptr_INPUT = args++;
+ ptr_INPUT->setName("input");
+
+ BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0);
+
+ tgsi_parse_init(&parse, tokens);
+
+ fi = tgsi_default_full_instruction();
+ fd = tgsi_default_full_declaration();
+ Storage storage(label_entry, ptr_INPUT);
+ Instructions instr(mod, shader, label_entry, &storage);
+ while(!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ translate_declaration(ir, mod, &storage,
+ &parse.FullToken.FullDeclaration,
+ &fd);
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ translate_immediate(&storage,
+ &parse.FullToken.FullImmediate);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ translate_instruction(mod, &storage, &instr,
+ &parse.FullToken.FullInstruction,
+ &fi, instno);
+ ++instno;
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ tgsi_parse_free(&parse);
+
+ ir->num_consts = storage.numConsts();
+ return mod;
+}
+
+llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens)
+{
+ llvm::Module *mod = new Module("shader");
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction fi;
+ struct tgsi_full_declaration fd;
+ unsigned instno = 0;
+ std::ostringstream stream;
+ if (ir->type == GALLIVM_VS) {
+ stream << "vs_shader";
+ } else {
+ stream << "fs_shader";
+ }
+ //stream << ir->id;
+ std::string func_name = stream.str();
+ Function *shader = llvm::cast<Function>(mod->getOrInsertFunction(
+ func_name.c_str(),
+ vertexShaderFunctionType()));
+
+ Function::arg_iterator args = shader->arg_begin();
+ Value *input = args++;
+ input->setName("inputs");
+ Value *output = args++;
+ output->setName("outputs");
+ Value *consts = args++;
+ consts->setName("consts");
+
+ BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0);
+
+ tgsi_parse_init(&parse, tokens);
+
+ fi = tgsi_default_full_instruction();
+ fd = tgsi_default_full_declaration();
+
+ StorageSoa storage(label_entry, input, output, consts);
+ InstructionsSoa instr(mod, shader, label_entry, &storage);
+
+ while(!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ translate_declarationir(ir, mod, &storage,
+ &parse.FullToken.FullDeclaration,
+ &fd);
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ translate_immediateir(&storage,
+ &parse.FullToken.FullImmediate);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ storage.declareImmediates();
+ translate_instructionir(mod, &storage, &instr,
+ &parse.FullToken.FullInstruction,
+ &fi, instno);
+ ++instno;
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ tgsi_parse_free(&parse);
+
+ return mod;
+}
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.h b/src/gallium/auxiliary/gallivm/tgsitollvm.h
new file mode 100644
index 00000000000..7ada04d6299
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.h
@@ -0,0 +1,20 @@
+#ifndef TGSITOLLVM_H
+#define TGSITOLLVM_H
+
+
+namespace llvm {
+ class Module;
+}
+
+struct gallivm_ir;
+struct tgsi_token;
+
+
+llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens);
+
+
+llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens);
+
+#endif