diff options
author | Brian Paul <[email protected]> | 2009-02-10 16:44:02 -0700 |
---|---|---|
committer | Brian Paul <[email protected]> | 2009-02-10 16:44:02 -0700 |
commit | 5340b6dff73a0a23531ce2a5f28fba8303adab6e (patch) | |
tree | b141fc3648568dd8b941c966059e6ed32a8bd0ad /src/gallium/auxiliary/gallivm | |
parent | 9fd26daec24f21dbe17afcb2e2ab272667ee9a69 (diff) | |
parent | ee4c921b65fb76998711f3c40330505cbc49a0e0 (diff) |
Merge commit 'origin/gallium-master-merge'
This is the big merge of the gallium-0.2 branch into master.
gallium-master-merge was just the staging area for it.
Both gallium-0.2 and gallium-master-merge are considered closed now.
Conflicts:
progs/demos/Makefile
src/mesa/main/state.c
src/mesa/main/texenvprogram.c
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
21 files changed, 5640 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile new file mode 100644 index 00000000000..5a96d94ec37 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -0,0 +1,92 @@ +# -*-makefile-*- +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = gallivm + + +GALLIVM_SOURCES = \ + gallivm.cpp \ + gallivm_cpu.cpp \ + instructions.cpp \ + loweringpass.cpp \ + tgsitollvm.cpp \ + storage.cpp \ + storagesoa.cpp \ + instructionssoa.cpp + +INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp + +CPP_SOURCES = \ + $(GALLIVM_SOURCES) + +C_SOURCES = +ASM_SOURCES = + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + +### Include directories +INCLUDES = \ + -I. \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/include + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +##### TARGETS ##### + +default:: depend symlinks $(LIBNAME) + + +$(LIBNAME): $(OBJECTS) Makefile + $(TOP)/bin/mklib -o $@ -static $(OBJECTS) + + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ + $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null + + +gallivm_builtins.cpp: llvm_builtins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ + rm temp1.bin + +gallivmsoabuiltins.cpp: soabuiltins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ + rm temp2.bin + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean: + -rm -f *.o */*.o *~ *.so *~ server/*.o + -rm -f depend depend.bak + -rm -f gallivm_builtins.cpp + -rm -f gallivmsoabuiltins.cpp + +symlinks: + + +include depend diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript new file mode 100644 index 00000000000..c0aa51b90a9 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/SConscript @@ -0,0 +1,16 @@ +Import('*') + +gallivm = env.ConvenienceLibrary( + target = 'gallivm', + source = [ + 'gallivm.cpp', + 'gallivm_cpu.cpp', + 'instructions.cpp', + 'loweringpass.cpp', + 'tgsitollvm.cpp', + 'storage.cpp', + 'storagesoa.cpp', + 'instructionssoa.cpp', + ]) + +auxiliaries.insert(0, gallivm) diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp new file mode 100644 index 00000000000..f4af5cc8ad5 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -0,0 +1,332 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin [email protected] + */ +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "instructions.h" +#include "loweringpass.h" +#include "storage.h" +#include "tgsitollvm.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" + +#include <llvm/Module.h> +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Instructions.h> +#include <llvm/ModuleProvider.h> +#include <llvm/Pass.h> +#include <llvm/PassManager.h> +#include <llvm/Attributes.h> +#include <llvm/Support/PatternMatch.h> +#include <llvm/ExecutionEngine/JIT.h> +#include <llvm/ExecutionEngine/Interpreter.h> +#include <llvm/ExecutionEngine/GenericValue.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/LinkAllPasses.h> +#include <llvm/Analysis/Verifier.h> +#include <llvm/Analysis/LoopPass.h> +#include <llvm/Target/TargetData.h> +#include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Transforms/Utils/Cloning.h> + +#include <sstream> +#include <fstream> +#include <iostream> + +static int GLOBAL_ID = 0; + +using namespace llvm; + +static inline +void AddStandardCompilePasses(PassManager &PM) +{ + PM.add(new LoweringPass()); + PM.add(createVerifierPass()); // Verify that input is correct + + PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp + + //PM.add(createStripSymbolsPass(true)); + + PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst + PM.add(createCFGSimplificationPass()); // Clean up disgusting code + PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas + PM.add(createGlobalOptimizerPass()); // Optimize out global vars + PM.add(createGlobalDCEPass()); // Remove unused fns and globs + PM.add(createIPConstantPropagationPass());// IP Constant Propagation + PM.add(createDeadArgEliminationPass()); // Dead argument elimination + PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE + PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + + PM.add(createPruneEHPass()); // Remove dead EH info + + PM.add(createFunctionInliningPass()); // Inline small functions + PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + PM.add(createTailDuplicationPass()); // Simplify cfg by copying code + PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas + PM.add(createInstructionCombiningPass()); // Combine silly seq's + PM.add(createCondPropagationPass()); // Propagate conditionals + + PM.add(createTailCallEliminationPass()); // Eliminate tail calls + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createReassociatePass()); // Reassociate expressions + PM.add(createLoopRotatePass()); + PM.add(createLICMPass()); // Hoist loop invariants + PM.add(createLoopUnswitchPass()); // Unswitch loops. + PM.add(createLoopIndexSplitPass()); // Index split loops. + PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc + PM.add(createIndVarSimplifyPass()); // Canonicalize indvars + PM.add(createLoopUnrollPass()); // Unroll small loops + PM.add(createInstructionCombiningPass()); // Clean up after the unroller + PM.add(createGVNPass()); // Remove redundancies + PM.add(createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + PM.add(createInstructionCombiningPass()); + PM.add(createCondPropagationPass()); // Propagate conditionals + + PM.add(createDeadStoreEliminationPass()); // Delete dead stores + PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE' + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations + PM.add(createDeadTypeEliminationPass()); // Eliminate dead types + PM.add(createConstantMergePass()); // Merge dup global constants +} + +void gallivm_prog_delete(struct gallivm_prog *prog) +{ + delete prog->module; + prog->module = 0; + prog->function = 0; + free(prog); +} + +static inline void +constant_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan) +{ + unsigned i; + + for (i = 0; i < QUAD_SIZE; ++i) { + inputs[i][attrib][chan] = coefs[attrib].a0[chan]; + } +} + +static inline void +linear_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = inputs[i][0][0]; + const float y = inputs[i][0][1]; + + inputs[i][attrib][chan] = + coefs[attrib].a0[chan] + + coefs[attrib].dadx[chan] * x + + coefs[attrib].dady[chan] * y; + } +} + +static inline void +perspective_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = inputs[i][0][0]; + const float y = inputs[i][0][1]; + /* WPOS.w here is really 1/w */ + const float w = 1.0f / inputs[i][0][3]; + assert(inputs[i][0][3] != 0.0); + + inputs[i][attrib][chan] = + (coefs[attrib].a0[chan] + + coefs[attrib].dadx[chan] * x + + coefs[attrib].dady[chan] * y) * w; + } +} + +void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix) +{ + if (!ir || !ir->module) + return; + + if (file_prefix) { + std::ostringstream stream; + stream << file_prefix; + stream << ir->id; + stream << ".ll"; + std::string name = stream.str(); + std::ofstream out(name.c_str()); + if (!out) { + std::cerr<<"Can't open file : "<<stream.str()<<std::endl;; + return; + } + out << (*ir->module); + out.close(); + } else { + const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList(); + llvm::Module::FunctionListType::const_iterator itr; + std::cout<<"; ---------- Start shader "<<ir->id<<std::endl; + for (itr = funcs.begin(); itr != funcs.end(); ++itr) { + const llvm::Function &func = (*itr); + std::string name = func.getName(); + const llvm::Function *found = 0; + if (name.find("vs_shader") != std::string::npos || + name.find("fs_shader") != std::string::npos || + name.find("function") != std::string::npos) + found = &func; + if (found) { + std::cout<<*found<<std::endl; + } + } + std::cout<<"; ---------- End shader "<<ir->id<<std::endl; + } +} + + +void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, + float (*inputs)[16][4], + const struct tgsi_interp_coef *coef) +{ + for (int i = 0; i < prog->num_interp; ++i) { + const gallivm_interpolate &interp = prog->interpolators[i]; + switch (interp.type) { + case TGSI_INTERPOLATE_CONSTANT: + constant_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + case TGSI_INTERPOLATE_LINEAR: + linear_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + perspective_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + default: + assert( 0 ); + } + } +} + + +struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type) +{ + struct gallivm_ir *ir = + (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir)); + ++GLOBAL_ID; + ir->id = GLOBAL_ID; + ir->type = type; + + return ir; +} + +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout) +{ + ir->layout = layout; +} + +void gallivm_ir_set_components(struct gallivm_ir *ir, int num) +{ + ir->num_components = num; +} + +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + std::cout << "Creating llvm from: " <<std::endl; + tgsi_dump(tokens, 0); + + llvm::Module *mod = tgsi_to_llvmir(ir, tokens); + ir->module = mod; + gallivm_ir_dump(ir, 0); +} + +void gallivm_ir_delete(struct gallivm_ir *ir) +{ + delete ir->module; + free(ir); +} + +struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) +{ + struct gallivm_prog *prog = + (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); + + std::cout << "Before optimizations:"<<std::endl; + ir->module->dump(); + std::cout<<"-------------------------------"<<std::endl; + + PassManager veri; + veri.add(createVerifierPass()); + veri.run(*ir->module); + llvm::Module *mod = llvm::CloneModule(ir->module); + prog->num_consts = ir->num_consts; + memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); + prog->num_interp = ir->num_interp; + + /* Run optimization passes over it */ + PassManager passes; + passes.add(new TargetData(mod)); + AddStandardCompilePasses(passes); + passes.run(*mod); + prog->module = mod; + + std::cout << "After optimizations:"<<std::endl; + mod->dump(); + + return prog; +} + +#endif /* MESA_LLVM */ diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h new file mode 100644 index 00000000000..36a64a77471 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin [email protected] + */ + +#ifndef GALLIVM_H +#define GALLIVM_H + +/* + LLVM representation consists of two stages - layout independent + intermediate representation gallivm_ir and driver specific + gallivm_prog. TGSI is first being translated into gallivm_ir + after that driver can set number of options on gallivm_ir and + have it compiled into gallivm_prog. gallivm_prog can be either + executed (assuming there's LLVM JIT backend for the current + target) or machine code generation can be done (assuming there's + a LLVM code generator for thecurrent target) + */ +#if defined __cplusplus +extern "C" { +#endif + +#include "pipe/p_state.h" + +#ifdef MESA_LLVM + +struct tgsi_token; + +struct gallivm_ir; +struct gallivm_prog; +struct gallivm_cpu_engine; +struct tgsi_interp_coef; +struct tgsi_sampler; +struct tgsi_exec_vector; + +enum gallivm_shader_type { + GALLIVM_VS, + GALLIVM_FS +}; + +enum gallivm_vector_layout { + GALLIVM_AOS, + GALLIVM_SOA +}; + +struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type); +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout); +void gallivm_ir_set_components(struct gallivm_ir *ir, int num); +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens); +void gallivm_ir_delete(struct gallivm_ir *ir); + + +struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir); + +void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, + float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], + const struct tgsi_interp_coef *coefs); +void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); + + +struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); +struct gallivm_cpu_engine *gallivm_global_cpu_engine(); +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_machine *machine, + const float (*input)[4], + unsigned num_inputs, + float (*output)[4], + unsigned num_outputs, + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride); +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float x, float y, + float (*dests)[PIPE_MAX_SHADER_INPUTS][4], + float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], + float (*consts)[4], + struct tgsi_sampler *samplers); +void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog); +void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); + + +#endif /* MESA_LLVM */ + +#if defined __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp new file mode 100644 index 00000000000..634bac01507 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -0,0 +1,140 @@ +static const unsigned char llvm_builtins_data[] = { +0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x27,0x02,0x00,0x00,0x01,0x10,0x00,0x00, +0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39, +0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02, +0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24, +0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64, +0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x03,0x00,0x00,0x00,0x0b,0x84,0xff,0xff, +0xff,0xff,0x1f,0xc0,0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00, +0x32,0x22,0x48,0x09,0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45, +0xc6,0x05,0x42,0x52,0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83, +0x29,0x80,0x21,0x00,0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47, +0x80,0x50,0x2b,0x03,0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40, +0x14,0x01,0x80,0x11,0x80,0x22,0x88,0x00,0x13,0x30,0x7c,0xc0,0x03,0x3b,0xf8,0x05, +0x3b,0xa0,0x83,0x36,0xa8,0x07,0x77,0x58,0x07,0x77,0x78,0x87,0x7b,0x70,0x87,0x36, +0x60,0x87,0x74,0x70,0x87,0x7a,0xc0,0x87,0x36,0x38,0x07,0x77,0xa8,0x87,0x0d,0xf7, +0x50,0x0e,0x6d,0x00,0x0f,0x7a,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xe9,0x10,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e, +0x78,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06,0xe9,0x10,0x07,0x76,0xa0,0x07,0x71, +0x60,0x07,0x7a,0x10,0x07,0x76,0xd0,0x06,0xe9,0x30,0x07,0x72,0xa0,0x07,0x73,0x20, +0x07,0x7a,0x30,0x07,0x72,0xd0,0x06,0xe9,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07, +0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x7a, +0x30,0x07,0x72,0xd0,0x06,0xe6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xf6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60,0x07, +0x74,0xd0,0x06,0xf6,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a, +0x10,0x07,0x72,0x80,0x07,0x6d,0x10,0x0e,0x70,0xa0,0x07,0x70,0xa0,0x07,0x76,0x40, +0x07,0x6d,0x60,0x0e,0x78,0x00,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07, +0x72,0x80,0x07,0x3a,0x0f,0x84,0x48,0x20,0x23,0x24,0x40,0x00,0x62,0x67,0x88,0x9f, +0x19,0x92,0x24,0x00,0x10,0x04,0x00,0x00,0x00,0x43,0x92,0x04,0x08,0x00,0x00,0x00, +0x00,0x60,0x48,0xa2,0x00,0x40,0x10,0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x02, +0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x41,0x00,0x00,0x00,0x30,0x24,0x61,0x80,0x00, +0x00,0x00,0x00,0x00,0x86,0x24,0x07,0x10,0x00,0x00,0x00,0x00,0xc0,0x90,0x44,0x01, +0x80,0x20,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, +0x05,0x00,0x82,0x00,0x00,0x00,0x60,0x48,0x52,0x00,0x40,0x10,0x00,0x00,0x00,0x64, +0x81,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, +0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02, +0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66, +0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95, +0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00, +0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60, +0x04,0x00,0x00,0x00,0xc3,0x0d,0xce,0x43,0x4c,0x37,0x3c,0x8e,0x34,0xdc,0x30,0x41, +0xc2,0x74,0x03,0x34,0x51,0xc3,0x0d,0x4d,0x44,0x4c,0x37,0x44,0x8d,0x35,0x56,0x01, +0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0xd6,0x10,0x00,0xe6, +0x10,0x04,0x76,0x81,0x00,0x3e,0x30,0x0c,0x91,0x4f,0x1b,0x05,0x21,0x30,0x8f,0x6d, +0x13,0x48,0xe0,0x03,0xc3,0x10,0xf9,0xb4,0x55,0x20,0x81,0x0f,0x0c,0x43,0xe4,0xd7, +0x66,0x41,0x08,0xcc,0xa3,0x1f,0x40,0x41,0x34,0x53,0x84,0x99,0xc4,0x20,0x30,0x8f, +0x61,0x10,0x02,0xb0,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x27,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, +0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84, +0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00, +0x63,0x08,0x0d,0x34,0xdc,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, +0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19, +0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x27,0x50,0x20,0x05, +0xd1,0x0c,0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d, +0x14,0x13,0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4, +0xba,0x35,0x0c,0x13,0xf3,0xd8,0x05,0x31,0x31,0x8f,0x6e,0x1c,0x84,0x00,0x2c,0xcb, +0x01,0x14,0x44,0x33,0x45,0x98,0x61,0x0c,0x02,0xf3,0x00,0x00,0x00,0x00,0x00,0x00, +0x61,0x20,0x00,0x00,0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91, +0x11,0x00,0x00,0x00,0x63,0x08,0x4d,0x64,0x16,0xc1,0xe1,0x86,0xab,0x22,0x66,0x19, +0x02,0x01,0x1b,0x43,0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81, +0x0a,0x20,0x0b,0x34,0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x70, +0x83,0x57,0x11,0xb3,0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0, +0x48,0xb3,0x04,0xc6,0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48, +0x63,0x08,0xcd,0x64,0x64,0x40,0x70,0xb8,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc, +0x60,0x0c,0xc1,0x99,0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80, +0x33,0x38,0xd0,0x00,0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80, +0xe0,0x70,0x03,0x1b,0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12, +0x30,0x63,0x08,0x0f,0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7, +0xa0,0x06,0x70,0x00,0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, +0x76,0x52,0x4c,0xcc,0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46, +0xc6,0x50,0x8a,0x89,0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89, +0x79,0x68,0x73,0x20,0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62, +0x9e,0xdb,0x32,0x88,0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98, +0xa7,0xb7,0x95,0x62,0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a, +0x34,0x35,0x56,0x62,0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25, +0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7, +0x24,0x01,0x63,0xec,0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9, +0xfc,0xc4,0xd0,0x90,0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6, +0xc1,0x71,0x7b,0x29,0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01, +0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08, +0x4e,0x33,0x58,0x47,0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e, +0x33,0xe1,0xbc,0xa5,0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e, +0x3a,0x40,0xc6,0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee, +0x6f,0x20,0x11,0x9c,0x66,0xf0,0x05,0xa7,0x99,0xec,0x82,0x10,0x9c,0xa6,0x32,0x93, +0x42,0x60,0x1e,0x7b,0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a, +0xa7,0x6d,0xa4,0x98,0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10, +0x9c,0x66,0xc0,0x7b,0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06, +0x8b,0xe0,0x34,0x83,0x2f,0x38,0xcd,0x64,0xd3,0x07,0x50,0x10,0xcd,0x14,0x61,0xe6, +0x61,0x08,0x4e,0x53,0xd5,0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00, +0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10, +0x1e,0xe1,0x19,0xc6,0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63, +0x15,0xc1,0x31,0x84,0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1, +0x71,0x6c,0x23,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48, +0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58, +0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7, +0x30,0x06,0x64,0xe0,0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82, +0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00, +0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb, +0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6, +0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5, +0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18, +0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f, +0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59, +0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8, +0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86, +0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72, +0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33, +0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3, +0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x26,0x41,0x08,0xc0, +0xb2,0x18,0x45,0x21,0x00,0xcb,0xb2,0x5b,0x04,0x31,0x31,0x8f,0x6d,0x13,0xc4,0xc4, +0x3c,0xb9,0x35,0x0c,0x15,0xb0,0x58,0x05,0x31,0x31,0x4f,0x7f,0x00,0x05,0xd1,0x4c, +0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00, +0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04, +0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca, +0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c, +0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x76,0x51,0x4c,0xcc, +0x53,0xdb,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31, +0x31,0x8f,0x6e,0x16,0xc4,0xc4,0x3c,0xbd,0x51,0x10,0x02,0xb0,0x2c,0xd6,0x30,0x54, +0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x2c,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34, +0xdc,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xdc,0x50,0x49,0xc4, +0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xdc,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60, +0x63,0x08,0x8d,0x33,0xdc,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00, +0x17,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb, +0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x76, +0x41,0x4c,0xcc,0xb3,0x1f,0x81,0x11,0x11,0x13,0x15,0x35,0x37,0x90,0x2c,0x4e,0xf4, +0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44, +0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0, +0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03, +0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82, +0x23,0x19,0xc3,0xa0,0x20,0x8b,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3, +0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, +0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, +0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84, +0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp new file mode 100644 index 00000000000..1bd00a0c2a6 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -0,0 +1,243 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin [email protected] + */ +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "instructions.h" +#include "loweringpass.h" +#include "storage.h" +#include "tgsitollvm.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include <llvm/Module.h> +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Instructions.h> +#include <llvm/ModuleProvider.h> +#include <llvm/Pass.h> +#include <llvm/PassManager.h> +#include <llvm/Attributes.h> +#include <llvm/Support/PatternMatch.h> +#include <llvm/ExecutionEngine/JIT.h> +#include <llvm/ExecutionEngine/Interpreter.h> +#include <llvm/ExecutionEngine/GenericValue.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/LinkAllPasses.h> +#include <llvm/Analysis/Verifier.h> +#include <llvm/Analysis/LoopPass.h> +#include <llvm/Target/TargetData.h> +#include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Transforms/Utils/Cloning.h> + +#include <sstream> +#include <fstream> +#include <iostream> + +struct gallivm_cpu_engine { + llvm::ExecutionEngine *engine; +}; + +static struct gallivm_cpu_engine *CPU = 0; + +typedef int (*fragment_shader_runner)(float x, float y, + float (*dests)[16][4], + float (*inputs)[16][4], + int num_attribs, + float (*consts)[4], int num_consts, + struct tgsi_sampler *samplers); + +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float fx, float fy, + float (*dests)[16][4], + float (*inputs)[16][4], + float (*consts)[4], + struct tgsi_sampler *samplers) +{ + fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function); + assert(runner); + + return runner(fx, fy, dests, inputs, prog->num_interp, + consts, prog->num_consts, + samplers); +} + +static inline llvm::Function *func_for_shader(struct gallivm_prog *prog) +{ + llvm::Module *mod = prog->module; + llvm::Function *func = 0; + + switch (prog->type) { + case GALLIVM_VS: + func = mod->getFunction("vs_shader"); + break; + case GALLIVM_FS: + func = mod->getFunction("fs_shader"); + break; + default: + assert(!"Unknown shader type!"); + break; + } + return func; +} + +/*! + This function creates a CPU based execution engine for the given gallivm_prog. + gallivm_cpu_engine should be used as a singleton throughout the library. Before + executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. + The gallivm_prog instance which is being passed to the constructor is being + automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile + with it again. + */ +struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) +{ + struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) + calloc(1, sizeof(struct gallivm_cpu_engine)); + llvm::Module *mod = static_cast<llvm::Module*>(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); + ee->DisableLazyCompilation(); + cpu->engine = ee; + + llvm::Function *func = func_for_shader(prog); + + prog->function = ee->getPointerToFunction(func); + CPU = cpu; + return cpu; +} + + +/*! + This function JIT compiles the given gallivm_prog with the given cpu based execution engine. + The reference to the generated machine code entry point will be stored + in the gallivm_prog program. After executing this function one can call gallivm_prog_exec + in order to execute the gallivm_prog on the CPU. + */ +void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) +{ + llvm::Module *mod = static_cast<llvm::Module*>(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = cpu->engine; + assert(ee); + /*FIXME : why was this disabled ? we need it for pow/sqrt/... */ + ee->DisableLazyCompilation(false); + ee->addModuleProvider(mp); + + llvm::Function *func = func_for_shader(prog); + prog->function = ee->getPointerToFunction(func); +} + +void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) +{ + free(cpu); +} + +struct gallivm_cpu_engine * gallivm_global_cpu_engine() +{ + return CPU; +} + + +typedef void (*vertex_shader_runner)(void *ainputs, + void *dests, + float (*aconsts)[4]); + +#define MAX_TGSI_VERTICES 4 +/*! + This function is used to execute the gallivm_prog in software. Before calling + this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile + function. + */ +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_machine *machine, + const float (*input)[4], + unsigned num_inputs, + float (*output)[4], + unsigned num_outputs, + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + unsigned int i, j; + unsigned slot; + vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function); + assert(runner); + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + + input = (const float (*)[4])((const char *)input + input_stride); + } + + /* run shader */ + runner(machine->Inputs, + machine->Outputs, + (float (*)[4]) constants); + + /* Unswizzle all output results + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + output = (float (*)[4])((char *)output + output_stride); + } + } + + return 0; +} + +#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h new file mode 100644 index 00000000000..d2c5852bdf7 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm_p.h @@ -0,0 +1,110 @@ +#ifndef GALLIVM_P_H +#define GALLIVM_P_H + +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_compiler.h" + +namespace llvm { + class Module; +} + +#if defined __cplusplus +extern "C" { +#endif + +enum gallivm_shader_type; +enum gallivm_vector_layout; + +struct gallivm_interpolate { + int attrib; + int chan; + int type; +}; + +struct gallivm_ir { + llvm::Module *module; + int id; + enum gallivm_shader_type type; + enum gallivm_vector_layout layout; + int num_components; + int num_consts; + + /* FIXME: this might not be enough for some shaders */ + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +struct gallivm_prog { + llvm::Module *module; + void *function; + + int id; + enum gallivm_shader_type type; + + int num_consts; + + /* FIXME: this might not be enough for some shaders */ + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +static INLINE void gallivm_swizzle_components(int swizzle, + int *xc, int *yc, + int *zc, int *wc) +{ + int x = swizzle / 1000; swizzle -= x * 1000; + int y = swizzle / 100; swizzle -= y * 100; + int z = swizzle / 10; swizzle -= z * 10; + int w = swizzle; + + if (xc) *xc = x; + if (yc) *yc = y; + if (zc) *zc = z; + if (wc) *wc = w; +} + +static INLINE boolean gallivm_is_swizzle(int swizzle) +{ + const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + + TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; + return swizzle != NO_SWIZZLE; +} + +static INLINE int gallivm_x_swizzle(int swizzle) +{ + int x; + gallivm_swizzle_components(swizzle, &x, 0, 0, 0); + return x; +} + +static INLINE int gallivm_y_swizzle(int swizzle) +{ + int y; + gallivm_swizzle_components(swizzle, 0, &y, 0, 0); + return y; +} + +static INLINE int gallivm_z_swizzle(int swizzle) +{ + int z; + gallivm_swizzle_components(swizzle, 0, 0, &z, 0); + return z; +} + +static INLINE int gallivm_w_swizzle(int swizzle) +{ + int w; + gallivm_swizzle_components(swizzle, 0, 0, 0, &w); + return w; +} + +#if defined __cplusplus +} +#endif + +#endif /* MESA_LLVM */ + +#endif diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp new file mode 100644 index 00000000000..ee8162efce5 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -0,0 +1,1193 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin [email protected] + */ +#ifdef MESA_LLVM + +#include "instructions.h" + +#include "storage.h" + +#include "util/u_memory.h" + +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Function.h> +#include <llvm/InstrTypes.h> +#include <llvm/Instructions.h> +#include <llvm/Attributes.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/Bitcode/ReaderWriter.h> + +#include <sstream> +#include <fstream> +#include <iostream> + +using namespace llvm; + +#include "gallivm_builtins.cpp" + +#if 0 +llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorArray = ArrayType::get(vectorType, 4); +} +#endif + +static inline std::string createFuncName(int label) +{ + std::ostringstream stream; + stream << "function"; + stream << label; + return stream.str(); +} + +Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, + Storage *storage) + : m_mod(mod), m_func(func), m_builder(block), m_idx(0), + m_storage(storage) +{ + m_floatVecType = VectorType::get(Type::FloatTy, 4); + + m_llvmFSqrt = 0; + m_llvmFAbs = 0; + m_llvmPow = 0; + m_llvmFloor = 0; + m_llvmFlog = 0; + m_llvmFexp = 0; + m_llvmLit = 0; + m_fmtPtr = 0; + + MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( + (const char*)&llvm_builtins_data[0], + (const char*)&llvm_builtins_data[Elements(llvm_builtins_data)-1]); + m_mod = ParseBitcodeFile(buffer); +} + +llvm::BasicBlock * Instructions::currentBlock() const +{ + return m_builder.GetInsertBlock(); +} + +llvm::Value * Instructions::abs(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + Value *xabs = callFAbs(vec[0]); + Value *yabs = callFAbs(vec[1]); + Value *zabs = callFAbs(vec[2]); + Value *wabs = callFAbs(vec[3]); + return vectorFromVals(xabs, yabs, zabs, wabs); +} + +llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateAdd(in1, in2, name("add")); +} + +llvm::Value * Instructions::arl(llvm::Value *in) +{ + return floor(in); +} + +void Instructions::beginLoop() +{ + BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); + BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); + + m_builder.CreateBr(begin); + Loop loop; + loop.begin = begin; + loop.end = end; + m_builder.SetInsertPoint(begin); + m_loopStack.push(loop); +} + +void Instructions::bgnSub(unsigned label) +{ + llvm::Function *func = findFunction(label); + + Function::arg_iterator args = func->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("INPUT"); + m_storage->pushArguments(ptr_INPUT); + + llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); + + m_func = func; + m_builder.SetInsertPoint(entry); +} + +void Instructions::brk() +{ + assert(!m_loopStack.empty()); + BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); + m_builder.CreateBr(m_loopStack.top().end); + m_builder.SetInsertPoint(unr); +} + +void Instructions::cal(int label, llvm::Value *input) +{ + std::vector<Value*> params; + params.push_back(input); + llvm::Function *func = findFunction(label); + + m_builder.CreateCall(func, params.begin(), params.end()); +} + +llvm::Value * Instructions::ceil(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + return vectorFromVals(callCeil(vec[0]), callCeil(vec[1]), + callCeil(vec[2]), callCeil(vec[3])); +} + +llvm::Value * Instructions::clamp(llvm::Value *in1) +{ + llvm::Value *zero = constVector(0.0f, 0.0f, 0.0f, 0.0f); + llvm::Value *one = constVector(1.0f, 1.0f, 1.0f, 1.0f); + return min( max(zero, in1), one); +} + +llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + llvm::Function *func = m_mod->getFunction("cmp"); + assert(func); + + std::vector<Value*> params; + params.push_back(in1); + params.push_back(in2); + params.push_back(in3); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + std::vector<llvm::Value*> vec3 = extractVector(in3); + Constant *half = ConstantFP::get(APFloat(0.5f)); + + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], half, name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], half, name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], half, name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], half, name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + std::vector<llvm::Value*> vec3 = extractVector(in3); + Constant *zero = Constant::getNullValue(Type::FloatTy); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], zero, name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], zero, name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], zero, name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], zero, name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::cos(llvm::Value *in) +{ +#if 0 + llvm::Function *func = m_mod->getFunction("vcos"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("cosres")); + call->setTailCall(false); + return call; +#else + std::vector<llvm::Value*> elems = extractVector(in); + Function *func = m_mod->getFunction("cosf"); + assert(func); + CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); + cos->setCallingConv(CallingConv::C); + cos->setTailCall(true); + return vectorFromVals(cos, cos, cos, cos); +#endif +} + +llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z1")); + + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *z2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(2), + name("z2")); + Value *y1z2 = mul(y1, z2); + Value *z1y2 = mul(z1, y2); + + Value *z1x2 = mul(z1, x2); + Value *x1z2 = mul(x1, z2); + + Value *x1y2 = mul(x1, y2); + Value *y1x2 = mul(y1, x2); + + return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); +} + +llvm::Value * Instructions::ddx(llvm::Value *in) +{ + // FIXME + assert(0); +} + +llvm::Value * Instructions::ddy(llvm::Value *in) +{ + // FIXME + assert(0); +} + +llvm::Value * Instructions::div(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateFDiv(in1, in2, name("div")); +} + +llvm::Value * Instructions::dot2add(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(in3, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot2add = m_builder.CreateAdd(xy, z, name("dot2add")); + return vectorFromVals(dot2add, dot2add, dot2add, dot2add); +} + +llvm::Value * Instructions::dp2(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + return vectorFromVals(xy, xy, xy, xy); +} + +llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); + return vectorFromVals(dot3, dot3, dot3, dot3); +} + +llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + std::vector<llvm::Value*> vec = extractVector(mulRes); + Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy")); + Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz")); + Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4")); + return vectorFromVals(dot4, dot4, dot4, dot4); +} + +llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + std::vector<llvm::Value*> vec1 = extractVector(mulRes); + Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy")); + Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz")); + Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph")); + return vectorFromVals(dph, dph, dph, dph); +} + +llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) +{ + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *w = m_builder.CreateExtractElement(in2, + m_storage->constantInt(3), + name("w")); + Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); + return vectorFromVals(ConstantFP::get(APFloat(1.f)), + ry, z, w); +} + +void Instructions::elseop() +{ + assert(!m_ifStack.empty()); + BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); + m_builder.CreateBr(ifend); + m_builder.SetInsertPoint(m_ifStack.top()); + currentBlock()->setName(name("ifelse")); + m_ifStack.pop(); + m_ifStack.push(ifend); +} + +void Instructions::endif() +{ + assert(!m_ifStack.empty()); + m_builder.CreateBr(m_ifStack.top()); + m_builder.SetInsertPoint(m_ifStack.top()); + m_ifStack.pop(); +} + +void Instructions::endLoop() +{ + assert(!m_loopStack.empty()); + Loop loop = m_loopStack.top(); + m_builder.CreateBr(loop.begin); + loop.end->moveAfter(currentBlock()); + m_builder.SetInsertPoint(loop.end); + m_loopStack.pop(); +} + +void Instructions::end() +{ + m_builder.CreateRetVoid(); +} + +void Instructions::endSub() +{ + m_func = 0; + m_builder.SetInsertPoint(0); +} + +llvm::Value * Instructions::exp(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + return vectorFromVals(callFExp(vec[0]), callFExp(vec[1]), + callFExp(vec[2]), callFExp(vec[3])); +} + +llvm::Value * Instructions::ex2(llvm::Value *in) +{ + llvm::Value *val = callPow(ConstantFP::get(APFloat(2.f)), + m_builder.CreateExtractElement( + in, m_storage->constantInt(0), + name("x1"))); + return vectorFromVals(val, val, val, val); +} + +llvm::Value * Instructions::floor(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]), + callFloor(vec[2]), callFloor(vec[3])); +} + +llvm::Value * Instructions::frc(llvm::Value *in) +{ + llvm::Value *flr = floor(in); + return sub(in, flr); +} + +void Instructions::ifop(llvm::Value *in) +{ + BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); + BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); + + //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); + //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); + //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); + + Constant *float0 = Constant::getNullValue(Type::FloatTy); + + Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), + name("extractx")); + Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); + m_builder.CreateCondBr(xcmp, ifthen, ifend); + //m_builder.SetInsertPoint(yblock); + + m_builder.SetInsertPoint(ifthen); + m_ifStack.push(ifend); +} + +llvm::Value * Instructions::kil(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("kil"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + llvm::Value *m = mul(in1, in2); + llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); + llvm::Value *s = sub(vec1, in1); + return add(m, mul(s, in3)); +} + +llvm::Value * Instructions::lg2(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + llvm::Value *const_vec = constVector(1.442695f, 1.442695f, + 1.442695f, 1.442695f); + return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), + callFLog(vec[2]), callFLog(vec[3])), const_vec); +} + +llvm::Value * Instructions::lit(llvm::Value *in) +{ + if (!m_llvmLit) { + m_llvmLit = m_mod->getFunction("lit"); + } + CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::log(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + return vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), + callFLog(vec[2]), callFLog(vec[3])); +} + +llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + return add(mulRes, in3); +} + +llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], + name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], + name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], + name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], + name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateMul(in1, in2, name("mul")); +} + +llvm::Value * Instructions::neg(llvm::Value *in) +{ + Value *neg = m_builder.CreateNeg(in, name("neg")); + return neg; +} + +llvm::Value * Instructions::nrm(llvm::Value *in) +{ + llvm::Value *v = rsq(in); + return mul(v, in); +} + +llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + llvm::Value *val = callPow(x1, x2); + return vectorFromVals(val, val, val, val); +} + +llvm::Value * Instructions::rcp(llvm::Value *in1) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + x1, name("rcp")); + return vectorFromVals(res, res, res, res); +} + +llvm::Value * Instructions::rsq(llvm::Value *in1) +{ + Value *x = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("extractx")); + Value *abs = callFAbs(x); + Value *sqrt = callFSqrt(abs); + + Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + sqrt, + name("rsqrt")); + return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); +} + +llvm::Value * Instructions::scs(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("scs"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("scsres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::seq(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOEQ(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOEQ(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOEQ(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOEQ(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::sfl(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + return vectorFromVals(const0f, const0f, const0f, const0f); +} + +llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::sin(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("vsin"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("sinres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::sle(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOLE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOLE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOLE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::sne(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpONE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpONE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpONE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpONE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::str(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + + return vectorFromVals(const1f, const1f, const1f, const1f); +} + +llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) +{ + Value *res = m_builder.CreateSub(in1, in2, name("sub")); + return res; +} + +llvm::Value * Instructions::trunc(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), + name("ftoix")); + Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), + name("ftoiy")); + Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), + name("ftoiz")); + Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), + name("ftoiw")); + Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, + name("fx")); + Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, + name("fy")); + Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, + name("fz")); + Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, + name("fw")); + return vectorFromVals(fx, fy, fz, fw); +} + +llvm::Value * Instructions::x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + std::vector<llvm::Value*> vec3 = extractVector(in3); + + Value *x2x3 = m_builder.CreateMul( vec2[0], vec3[0], name("x2x3")); + Value *y2y3 = m_builder.CreateMul( vec2[1], vec3[1], name("y2y3")); + Value *x1px2x3 = m_builder.CreateAdd (vec1[0], x2x3, name("x1 + x2x3")); + Value *x1px2x3py2y3 = m_builder.CreateAdd (x1px2x3, y2y3, name("x1 + x2x3 + y2y3")); + + Value *x2z3 = m_builder.CreateMul( vec2[0], vec3[2], name("x2z3")); + Value *y2w3 = m_builder.CreateMul( vec2[1], vec3[3], name("y2w3")); + Value *y1px2z3 = m_builder.CreateAdd (vec1[1], x2z3, name("y1 + x2z3")); + Value *y1px2z3py2w3 = m_builder.CreateAdd (y1px2z3, y2w3, name("y1 + x2z3 + y2w3")); + + return vectorFromVals(x1px2x3py2y3, y1px2z3py2w3, x1px2x3py2y3, y1px2z3py2w3); +} + +void Instructions::printVector(llvm::Value *val) +{ + static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; + + if (!m_fmtPtr) { + Constant *format = ConstantArray::get(frmt, true); + ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); + GlobalVariable* globalFormat = new GlobalVariable( + /*Type=*/arrayTy, + /*isConstant=*/true, + /*Linkage=*/GlobalValue::InternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name(".str"), + m_mod); + globalFormat->setInitializer(format); + + Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); + std::vector<Constant*> const_ptr_21_indices; + const_ptr_21_indices.push_back(const_int0); + const_ptr_21_indices.push_back(const_int0); + m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, + &const_ptr_21_indices[0], const_ptr_21_indices.size()); + } + + Function *func_printf = m_mod->getFunction("printf"); + if (!func_printf) + func_printf = declarePrintf(); + assert(func_printf); + std::vector<llvm::Value*> vec = extractVector(val); + Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); + Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); + Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); + Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); + std::vector<Value*> params; + params.push_back(m_fmtPtr); + params.push_back(dx); + params.push_back(dy); + params.push_back(dz); + params.push_back(dw); + CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), + name("printf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(true); +} + +const char * Instructions::name(const char *prefix) +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::Value * Instructions::callCeil(llvm::Value *val) +{ + if (!m_llvmCeil) { + // predeclare the intrinsic + std::vector<const Type*> ceilArgs; + ceilArgs.push_back(Type::FloatTy); + AttrListPtr ceilPal; + FunctionType* ceilType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/ceilArgs, + /*isVarArg=*/false); + m_llvmCeil = Function::Create( + /*Type=*/ceilType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"ceilf", m_mod); + m_llvmCeil->setCallingConv(CallingConv::C); + m_llvmCeil->setAttributes(ceilPal); + } + CallInst *call = m_builder.CreateCall(m_llvmCeil, val, + name("ceilf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value *Instructions::callFAbs(llvm::Value *val) +{ + if (!m_llvmFAbs) { + // predeclare the intrinsic + std::vector<const Type*> fabsArgs; + fabsArgs.push_back(Type::FloatTy); + AttrListPtr fabsPal; + FunctionType* fabsType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fabsArgs, + /*isVarArg=*/false); + m_llvmFAbs = Function::Create( + /*Type=*/fabsType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"fabs", m_mod); + m_llvmFAbs->setCallingConv(CallingConv::C); + m_llvmFAbs->setAttributes(fabsPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, + name("fabs")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::callFExp(llvm::Value *val) +{ + if (!m_llvmFexp) { + // predeclare the intrinsic + std::vector<const Type*> fexpArgs; + fexpArgs.push_back(Type::FloatTy); + AttrListPtr fexpPal; + FunctionType* fexpType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fexpArgs, + /*isVarArg=*/false); + m_llvmFexp = Function::Create( + /*Type=*/fexpType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"expf", m_mod); + m_llvmFexp->setCallingConv(CallingConv::C); + m_llvmFexp->setAttributes(fexpPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFexp, val, + name("expf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::callFLog(llvm::Value *val) +{ + if (!m_llvmFlog) { + // predeclare the intrinsic + std::vector<const Type*> flogArgs; + flogArgs.push_back(Type::FloatTy); + AttrListPtr flogPal; + FunctionType* flogType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/flogArgs, + /*isVarArg=*/false); + m_llvmFlog = Function::Create( + /*Type=*/flogType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"logf", m_mod); + m_llvmFlog->setCallingConv(CallingConv::C); + m_llvmFlog->setAttributes(flogPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFlog, val, + name("logf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::callFloor(llvm::Value *val) +{ + if (!m_llvmFloor) { + // predeclare the intrinsic + std::vector<const Type*> floorArgs; + floorArgs.push_back(Type::FloatTy); + AttrListPtr floorPal; + FunctionType* floorType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/floorArgs, + /*isVarArg=*/false); + m_llvmFloor = Function::Create( + /*Type=*/floorType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"floorf", m_mod); + m_llvmFloor->setCallingConv(CallingConv::C); + m_llvmFloor->setAttributes(floorPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFloor, val, + name("floorf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value *Instructions::callFSqrt(llvm::Value *val) +{ + if (!m_llvmFSqrt) { + // predeclare the intrinsic + std::vector<const Type*> fsqrtArgs; + fsqrtArgs.push_back(Type::FloatTy); + AttrListPtr fsqrtPal; + FunctionType* fsqrtType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fsqrtArgs, + /*isVarArg=*/false); + m_llvmFSqrt = Function::Create( + /*Type=*/fsqrtType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.sqrt.f32", m_mod); + m_llvmFSqrt->setCallingConv(CallingConv::C); + m_llvmFSqrt->setAttributes(fsqrtPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, + name("sqrt")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) +{ + if (!m_llvmPow) { + // predeclare the intrinsic + std::vector<const Type*> powArgs; + powArgs.push_back(Type::FloatTy); + powArgs.push_back(Type::FloatTy); + AttrListPtr powPal; + FunctionType* powType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/powArgs, + /*isVarArg=*/false); + m_llvmPow = Function::Create( + /*Type=*/powType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.pow.f32", m_mod); + m_llvmPow->setCallingConv(CallingConv::C); + m_llvmPow->setAttributes(powPal); + } + std::vector<Value*> params; + params.push_back(val1); + params.push_back(val2); + CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), + name("pow")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) +{ + Constant *const_vec = Constant::getNullValue(m_floatVecType); + Value *res = m_builder.CreateInsertElement(const_vec, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; +} + +llvm::Value * Instructions::constVector(float x, float y, float z, float w) +{ + std::vector<Constant*> vec(4); + vec[0] = ConstantFP::get(APFloat(x)); + vec[1] = ConstantFP::get(APFloat(y)); + vec[2] = ConstantFP::get(APFloat(z)); + vec[3] = ConstantFP::get(APFloat(w)); + return ConstantVector::get(m_floatVecType, vec); +} + +llvm::Function * Instructions::declarePrintf() +{ + std::vector<const Type*> args; + AttrListPtr params; + FunctionType* funcTy = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/args, + /*isVarArg=*/true); + Function* func_printf = Function::Create( + /*Type=*/funcTy, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"printf", m_mod); + func_printf->setCallingConv(CallingConv::C); + func_printf->setAttributes(params); + return func_printf; +} + +llvm::Function * Instructions::declareFunc(int label) +{ + PointerType *vecPtr = PointerType::getUnqual(m_floatVecType); + std::vector<const Type*> args; + args.push_back(vecPtr); + args.push_back(vecPtr); + args.push_back(vecPtr); + args.push_back(vecPtr); + AttrListPtr params; + FunctionType *funcType = FunctionType::get( + /*Result=*/Type::VoidTy, + /*Params=*/args, + /*isVarArg=*/false); + std::string name = createFuncName(label); + Function *func = Function::Create( + /*Type=*/funcType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/name.c_str(), m_mod); + func->setCallingConv(CallingConv::C); + func->setAttributes(params); + return func; +} + +llvm::Function * Instructions::findFunction(int label) +{ + llvm::Function *func = m_functions[label]; + if (!func) { + func = declareFunc(label); + m_functions[label] = func; + } + return func; +} + +std::vector<llvm::Value*> Instructions::extractVector(llvm::Value *vec) +{ + std::vector<llvm::Value*> elems(4); + elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0), + name("x")); + elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1), + name("y")); + elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2), + name("z")); + elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3), + name("w")); + return elems; +} + + +#endif //MESA_LLVM + + diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h new file mode 100644 index 00000000000..e18571251ee --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin [email protected] + */ + +#ifndef INSTRUCTIONS_H +#define INSTRUCTIONS_H + +#include <llvm/BasicBlock.h> +#include <llvm/Module.h> +#include <llvm/Value.h> +#include <llvm/Support/IRBuilder.h> + +#include <map> +#include <stack> + +namespace llvm { + class VectorType; + class Function; +} + +class Storage; + +class Instructions +{ +public: + Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, + Storage *storage); + + llvm::BasicBlock *currentBlock() const; + + llvm::Value *abs(llvm::Value *in1); + llvm::Value *add(llvm::Value *in1, llvm::Value *in2); + llvm::Value *arl(llvm::Value *in1); + void beginLoop(); + void bgnSub(unsigned); + void brk(); + void cal(int label, llvm::Value *input); + llvm::Value *ceil(llvm::Value *in); + llvm::Value *clamp(llvm::Value *in); + llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cos(llvm::Value *in); + llvm::Value *cross(llvm::Value *in1, llvm::Value *in2); + llvm::Value *ddx(llvm::Value *in); + llvm::Value *ddy(llvm::Value *in); + llvm::Value *div(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dot2add(llvm::Value *in, llvm::Value *in2, llvm::Value *in3); + llvm::Value *dp2(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dst(llvm::Value *in1, llvm::Value *in2); + void elseop(); + void endif(); + void endLoop(); + void end(); + void endSub(); + llvm::Value *exp(llvm::Value *in); + llvm::Value *ex2(llvm::Value *in); + llvm::Value *floor(llvm::Value *in); + llvm::Value *frc(llvm::Value *in); + void ifop(llvm::Value *in); + llvm::Value *kil(llvm::Value *in); + llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3); + llvm::Value *lg2(llvm::Value *in); + llvm::Value *lit(llvm::Value *in); + llvm::Value *log(llvm::Value *in); + llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3); + llvm::Value *max(llvm::Value *in1, llvm::Value *in2); + llvm::Value *min(llvm::Value *in1, llvm::Value *in2); + llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); + llvm::Value *neg(llvm::Value *in); + llvm::Value *nrm(llvm::Value *in); + llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); + llvm::Value *rcp(llvm::Value *in); + llvm::Value *rsq(llvm::Value *in); + llvm::Value *scs(llvm::Value *in); + llvm::Value *seq(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sfl(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sge(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sin(llvm::Value *in); + llvm::Value *sle(llvm::Value *in1, llvm::Value *in2); + llvm::Value *slt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sne(llvm::Value *in1, llvm::Value *in2); + llvm::Value *str(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sub(llvm::Value *in1, llvm::Value *in2); + llvm::Value *trunc(llvm::Value *in); + llvm::Value *x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + + void printVector(llvm::Value *val); +private: + const char *name(const char *prefix); + + llvm::Value *callCeil(llvm::Value *val); + llvm::Value *callFAbs(llvm::Value *val); + llvm::Value *callFExp(llvm::Value *val); + llvm::Value *callFLog(llvm::Value *val); + llvm::Value *callFloor(llvm::Value *val); + llvm::Value *callFSqrt(llvm::Value *val); + llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2); + + llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w=0); + + llvm::Value *constVector(float x, float y, float z, float w); + + llvm::Function *declarePrintf(); + llvm::Function *declareFunc(int label); + + llvm::Function *findFunction(int label); + + std::vector<llvm::Value*> extractVector(llvm::Value *vec); +private: + llvm::Module *m_mod; + llvm::Function *m_func; + char m_name[32]; + llvm::IRBuilder<> m_builder; + int m_idx; + + llvm::VectorType *m_floatVecType; + + llvm::Function *m_llvmCeil; + llvm::Function *m_llvmFSqrt; + llvm::Function *m_llvmFAbs; + llvm::Function *m_llvmPow; + llvm::Function *m_llvmFloor; + llvm::Function *m_llvmFlog; + llvm::Function *m_llvmFexp; + llvm::Function *m_llvmLit; + + llvm::Constant *m_fmtPtr; + + std::stack<llvm::BasicBlock*> m_ifStack; + struct Loop { + llvm::BasicBlock *begin; + llvm::BasicBlock *end; + }; + std::stack<Loop> m_loopStack; + std::map<int, llvm::Function*> m_functions; + Storage *m_storage; +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp new file mode 100644 index 00000000000..925e948763e --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -0,0 +1,523 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#include "instructionssoa.h" + +#include "storagesoa.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" + +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/Module.h> +#include <llvm/Function.h> +#include <llvm/Instructions.h> +#include <llvm/Transforms/Utils/Cloning.h> +#include <llvm/Attributes.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/Bitcode/ReaderWriter.h> + + +#include <iostream> + + +/* disable some warnings. this file is autogenerated */ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif +using namespace llvm; +#include "gallivmsoabuiltins.cpp" +#if defined(__GNUC__) +#pragma GCC diagnostic warning "-Wunused-variable" +#endif + +InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage) + : m_builder(block), + m_storage(storage), + m_idx(0) +{ + createFunctionMap(); + createBuiltins(); +} + +const char * InstructionsSoa::name(const char *prefix) const +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + Constant *constVector = Constant::getNullValue(vectorType); + Value *res = m_builder.CreateInsertElement(constVector, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; +} + +void InstructionsSoa::end() +{ + m_builder.CreateRetVoid(); +} + +std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) +{ + std::vector<llvm::Value*> res(4); + res[0] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(0), + name("extract1X")); + res[1] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(1), + name("extract2X")); + res[2] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(2), + name("extract3X")); + res[3] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(3), + name("extract4X")); + + return res; +} + +llvm::IRBuilder<>* InstructionsSoa::getIRBuilder() +{ + return &m_builder; +} + +void InstructionsSoa::createFunctionMap() +{ + m_functionsMap[TGSI_OPCODE_ABS] = "abs"; + m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; + m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; + m_functionsMap[TGSI_OPCODE_MIN] = "min"; + m_functionsMap[TGSI_OPCODE_MAX] = "max"; + m_functionsMap[TGSI_OPCODE_POWER] = "pow"; + m_functionsMap[TGSI_OPCODE_LIT] = "lit"; + m_functionsMap[TGSI_OPCODE_RSQ] = "rsq"; + m_functionsMap[TGSI_OPCODE_SLT] = "slt"; +} + +void InstructionsSoa::createDependencies() +{ + { + std::vector<std::string> powDeps(2); + powDeps[0] = "powf"; + powDeps[1] = "powvec"; + m_builtinDependencies["pow"] = powDeps; + } + { + std::vector<std::string> absDeps(2); + absDeps[0] = "fabsf"; + absDeps[1] = "absvec"; + m_builtinDependencies["abs"] = absDeps; + } + { + std::vector<std::string> maxDeps(1); + maxDeps[0] = "maxvec"; + m_builtinDependencies["max"] = maxDeps; + } + { + std::vector<std::string> minDeps(1); + minDeps[0] = "minvec"; + m_builtinDependencies["min"] = minDeps; + } + { + std::vector<std::string> litDeps(4); + litDeps[0] = "minvec"; + litDeps[1] = "maxvec"; + litDeps[2] = "powf"; + litDeps[3] = "powvec"; + m_builtinDependencies["lit"] = litDeps; + } + { + std::vector<std::string> rsqDeps(4); + rsqDeps[0] = "sqrtf"; + rsqDeps[1] = "sqrtvec"; + rsqDeps[2] = "fabsf"; + rsqDeps[3] = "absvec"; + m_builtinDependencies["rsq"] = rsqDeps; + } +} + +llvm::Function * InstructionsSoa::function(int op) +{ + if (m_functions.find(op) != m_functions.end()) + return m_functions[op]; + + std::string name = m_functionsMap[op]; + + std::cout <<"For op = "<<op<<", func is '"<<name<<"'"<<std::endl; + + std::vector<std::string> deps = m_builtinDependencies[name]; + for (unsigned int i = 0; i < deps.size(); ++i) { + llvm::Function *func = m_builtins->getFunction(deps[i]); + std::cout <<"\tinjecting dep = '"<<func->getName()<<"'"<<std::endl; + injectFunction(func); + } + + llvm::Function *originalFunc = m_builtins->getFunction(name); + injectFunction(originalFunc, op); + return m_functions[op]; +} + +llvm::Module * InstructionsSoa::currentModule() const +{ + BasicBlock *block = m_builder.GetInsertBlock(); + if (!block || !block->getParent()) + return 0; + + return block->getParent()->getParent(); +} + +void InstructionsSoa::createBuiltins() +{ + std::string ErrMsg; + MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( + (const char*)&soabuiltins_data[0], + (const char*)&soabuiltins_data[Elements(soabuiltins_data) - 1]); + m_builtins = ParseBitcodeFile(buffer, &ErrMsg); + std::cout<<"Builtins created at "<<m_builtins<<" ("<<ErrMsg<<")"<<std::endl; + assert(m_builtins); + createDependencies(); +} + + +std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> in1) +{ + llvm::Function *func = function(TGSI_OPCODE_ABS); + return callBuiltin(func, in1); +} + +std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); + res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); + res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); + res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in) +{ + std::vector<llvm::Value*> res(4); + + //Extract x's + llvm::Value *x1 = m_builder.CreateExtractElement(in[0], + m_storage->constantInt(0), + name("extractX")); + //cast it to an unsigned int + x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); + + res[0] = x1;//vectorFromVals(x1, x2, x3, x4); + //only x is valid. the others shouldn't be necessary + /* + res[1] = Constant::getNullValue(m_floatVecType); + res[2] = Constant::getNullValue(m_floatVecType); + res[3] = Constant::getNullValue(m_floatVecType); + */ + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_DP3); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_LIT); + return callBuiltin(func, in); +} + +std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3) +{ + std::vector<llvm::Value*> res = mul(in1, in2); + return add(res, in3); +} + +std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MAX); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MIN); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); + res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); + res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); + res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_POWER); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_RSQ); + return callBuiltin(func, in); +} + +std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_SLT); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); + res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); + res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); + res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); + + return res; +} + +void checkFunction(Function *func) +{ + for (Function::const_iterator BI = func->begin(), BE = func->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); + II != IE; ++II) { + const Instruction &I = *II; + std::cout<< "Instr = "<<I; + for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) { + const Value *Op = I.getOperand(op); + std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl; + //I->setOperand(op, V); + } + } + } +} + +llvm::Value * InstructionsSoa::allocaTemp() +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"), + m_builder.GetInsertBlock()); + + std::vector<Value*> indices; + indices.push_back(m_storage->constantInt(0)); + indices.push_back(m_storage->constantInt(0)); + GetElementPtrInst *getElem = GetElementPtrInst::Create(alloca, + indices.begin(), + indices.end(), + name("allocaPtr"), + m_builder.GetInsertBlock()); + return getElem; +} + +std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr) +{ + GetElementPtrInst *xElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(0), + name("xPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *yElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(1), + name("yPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *zElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(2), + name("zPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *wElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(3), + name("wPtr"), + m_builder.GetInsertBlock()); + + std::vector<llvm::Value*> res(4); + res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock()); + res[1] = new LoadInst(yElemPtr, name("yRes"), false, m_builder.GetInsertBlock()); + res[2] = new LoadInst(zElemPtr, name("zRes"), false, m_builder.GetInsertBlock()); + res[3] = new LoadInst(wElemPtr, name("wRes"), false, m_builder.GetInsertBlock()); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::dp4(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_DP4); + return callBuiltin(func, in1, in2); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + params.push_back(in3[0]); + params.push_back(in3[1]); + params.push_back(in3[2]); + params.push_back(in3[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) +{ + assert(originalFunc); + std::cout << "injecting function originalFunc " <<originalFunc->getName() <<std::endl; + if (op != TGSI_OPCODE_LAST) { + /* in this case it's possible the function has been already + * injected as part of the dependency chain, which gets + * injected below */ + llvm::Function *func = currentModule()->getFunction(originalFunc->getName()); + if (func) { + m_functions[op] = func; + return; + } + } + llvm::Function *func = 0; + if (originalFunc->isDeclaration()) { + func = Function::Create(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage, + originalFunc->getName(), currentModule()); + func->setCallingConv(CallingConv::C); + const AttrListPtr pal; + func->setAttributes(pal); + currentModule()->dump(); + } else { + DenseMap<const Value*, Value *> val; + val[m_builtins->getFunction("fabsf")] = currentModule()->getFunction("fabsf"); + val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf"); + val[m_builtins->getFunction("sqrtf")] = currentModule()->getFunction("sqrtf"); + func = CloneFunction(originalFunc, val); +#if 0 + std::cout <<" replacing "<<m_builtins->getFunction("powf") + <<", with " <<currentModule()->getFunction("powf")<<std::endl; + std::cout<<"1111-------------------------------"<<std::endl; + checkFunction(originalFunc); + std::cout<<"2222-------------------------------"<<std::endl; + checkFunction(func); + std::cout <<"XXXX = " <<val[m_builtins->getFunction("powf")]<<std::endl; +#endif + currentModule()->getFunctionList().push_back(func); + } + if (op != TGSI_OPCODE_LAST) { + m_functions[op] = func; + } +} + + diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h new file mode 100644 index 00000000000..d6831e0a6b9 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INSTRUCTIONSSOA_H +#define INSTRUCTIONSSOA_H + +#include <pipe/p_shader_tokens.h> +#include <llvm/Support/IRBuilder.h> + +#include <map> +#include <vector> + +namespace llvm { + class Module; + class Function; + class BasicBlock; + class Value; +} +class StorageSoa; + +class InstructionsSoa +{ +public: + InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage); + + std::vector<llvm::Value*> abs(const std::vector<llvm::Value*> in1); + std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in); + std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> dp3(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> dp4(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> lit(const std::vector<llvm::Value*> in); + std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3); + std::vector<llvm::Value*> max(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> min(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> pow(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> rsq(const std::vector<llvm::Value*> in1); + std::vector<llvm::Value*> slt(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> sub(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + void end(); + + std::vector<llvm::Value*> extractVector(llvm::Value *vector); + llvm::IRBuilder<>* getIRBuilder(); +private: + const char * name(const char *prefix) const; + llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w); + void createFunctionMap(); + void createBuiltins(); + void createDependencies(); + llvm::Function *function(int); + llvm::Module *currentModule() const; + llvm::Value *allocaTemp(); + std::vector<llvm::Value*> allocaToResult(llvm::Value *allocaPtr); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3); + void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST); +private: + llvm::IRBuilder<> m_builder; + StorageSoa *m_storage; + + std::map<int, std::string> m_functionsMap; + std::map<int, llvm::Function*> m_functions; + llvm::Module *m_builtins; + std::map<std::string, std::vector<std::string> > m_builtinDependencies; + +private: + mutable int m_idx; + mutable char m_name[32]; +}; + + +#endif diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c new file mode 100644 index 00000000000..d5a003a48b2 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c @@ -0,0 +1,114 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin [email protected] + */ +typedef __attribute__(( ext_vector_type(4) )) float float4; + +extern float powf(float a, float b); + +inline float approx(float a, float b) +{ + if (b < -128.0f) b = -128.0f; + if (b > 128.0f) b = 128.0f; + if (a < 0) a = 0; + return powf(a, b); +} + +inline float4 lit(float4 tmp) +{ + float4 result; + result.x = 1.0; + result.w = 1.0; + if (tmp.x > 0) { + result.y = tmp.x; + result.z = approx(tmp.y, tmp.w); + } else { + result.y = 0; + result.z = 0; + } + return result; +} + +inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2) +{ + float4 result; + + result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x; + result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y; + result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z; + result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w; + + return result; +} + +extern float cosf(float val); +extern float sinf(float val); + +inline float4 vcos(float4 val) +{ + float4 result; + printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w); + result.x = cosf(val.x); + result.y = cosf(val.x); + result.z = cosf(val.x); + result.w = cosf(val.x); + printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w); + return result; +} + +inline float4 scs(float4 val) +{ + float4 result; + float tmp = val.x; + result.x = cosf(tmp); + result.y = sinf(tmp); + return result; +} + + +inline float4 vsin(float4 val) +{ + float4 result; + float tmp = val.x; + float res = sinf(tmp); + result.x = res; + result.y = res; + result.z = res; + result.w = res; + return result; +} + +inline int kil(float4 val) +{ + if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0) + return 1; + else + return 0; +} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.cpp b/src/gallium/auxiliary/gallivm/loweringpass.cpp new file mode 100644 index 00000000000..556dbec3661 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/loweringpass.cpp @@ -0,0 +1,17 @@ +#include "loweringpass.h" + +using namespace llvm; + +char LoweringPass::ID = 0; +RegisterPass<LoweringPass> X("lowering", "Lowering Pass"); + +LoweringPass::LoweringPass() + : ModulePass((intptr_t)&ID) +{ +} + +bool LoweringPass::runOnModule(Module &m) +{ + llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n"; + return false; +} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.h b/src/gallium/auxiliary/gallivm/loweringpass.h new file mode 100644 index 00000000000..f62dcf6ba73 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/loweringpass.h @@ -0,0 +1,15 @@ +#ifndef LOWERINGPASS_H +#define LOWERINGPASS_H + +#include "llvm/Pass.h" +#include "llvm/Module.h" + +struct LoweringPass : public llvm::ModulePass +{ + static char ID; + LoweringPass(); + + virtual bool runOnModule(llvm::Module &m); +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c new file mode 100644 index 00000000000..cb85e1734ec --- /dev/null +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * This file is compiled with clang into the LLVM bitcode + * + * Authors: + * Zack Rusin [email protected] + */ +typedef __attribute__(( ext_vector_type(4) )) float float4; + + +extern float fabsf(float val); + +/* helpers */ + +float4 absvec(float4 vec) +{ + float4 res; + res.x = fabsf(vec.x); + res.y = fabsf(vec.y); + res.z = fabsf(vec.z); + res.w = fabsf(vec.w); + + return res; +} + +float4 maxvec(float4 a, float4 b) +{ + return (float4){(a.x > b.x) ? a.x : b.x, + (a.y > b.y) ? a.y : b.y, + (a.z > b.z) ? a.z : b.z, + (a.w > b.w) ? a.w : b.w}; +} + +float4 minvec(float4 a, float4 b) +{ + return (float4){(a.x < b.x) ? a.x : b.x, + (a.y < b.y) ? a.y : b.y, + (a.z < b.z) ? a.z : b.z, + (a.w < b.w) ? a.w : b.w}; +} + +extern float powf(float num, float p); +extern float sqrtf(float x); + +float4 powvec(float4 vec, float4 q) +{ + float4 p; + p.x = powf(vec.x, q.x); + p.y = powf(vec.y, q.y); + p.z = powf(vec.z, q.z); + p.w = powf(vec.w, q.w); + return p; +} + +float4 sqrtvec(float4 vec) +{ + float4 p; + p.x = sqrtf(vec.x); + p.y = sqrtf(vec.y); + p.z = sqrtf(vec.z); + p.w = sqrtf(vec.w); + return p; +} + +float4 sltvec(float4 v1, float4 v2) +{ + float4 p; + p.x = (v1.x < v2.x) ? 1.0 : 0.0; + p.y = (v1.y < v2.y) ? 1.0 : 0.0; + p.z = (v1.z < v2.z) ? 1.0 : 0.0; + p.w = (v1.w < v2.w) ? 1.0 : 0.0; + return p; +} + + +/* instructions */ + +void abs(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + res[0] = absvec(tmp0x); + res[1] = absvec(tmp0y); + res[2] = absvec(tmp0z); + res[3] = absvec(tmp0w); +} + +void dp3(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + +void dp4(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z) + (tmp0w * tmp1w); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + +void lit(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; + const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; + const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; + + res[0] = (float4){1.0, 1.0, 1.0, 1.0}; + if (tmp0x.x > 0) { + float4 tmpy = maxvec(tmp0y, zerovec); + float4 tmpw = minvec(tmp0w, plus128); + tmpw = maxvec(tmpw, min128); + res[1] = tmp0x; + res[2] = powvec(tmpy, tmpw); + } else { + res[1] = zerovec; + res[2] = zerovec; + } + res[3] = (float4){1.0, 1.0, 1.0, 1.0}; +} + +void min(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = minvec(tmp0x, tmp1x); + res[1] = minvec(tmp0y, tmp1y); + res[2] = minvec(tmp0z, tmp1z); + res[3] = minvec(tmp0w, tmp1w); +} + + +void max(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = maxvec(tmp0x, tmp1x); + res[1] = maxvec(tmp0y, tmp1y); + res[2] = maxvec(tmp0z, tmp1z); + res[3] = maxvec(tmp0w, tmp1w); +} + +void pow(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = powvec(tmp0x, tmp1x); + res[1] = res[0]; + res[2] = res[0]; + res[3] = res[0]; +} + +void rsq(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + const float4 onevec = (float4) {1., 1., 1., 1.}; + res[0] = onevec/sqrtvec(absvec(tmp0x)); + res[1] = onevec/sqrtvec(absvec(tmp0y)); + res[2] = onevec/sqrtvec(absvec(tmp0z)); + res[3] = onevec/sqrtvec(absvec(tmp0w)); +} + +void slt(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = sltvec(tmp0x, tmp1x); + res[1] = sltvec(tmp0y, tmp1y); + res[2] = sltvec(tmp0z, tmp1z); + res[3] = sltvec(tmp0w, tmp1w); +} + diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp new file mode 100644 index 00000000000..73df24c9769 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storage.cpp @@ -0,0 +1,364 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin [email protected] + */ +#ifdef MESA_LLVM + +#include "storage.h" + +#include "gallivm_p.h" + +#include "pipe/p_shader_tokens.h" +#include <llvm/BasicBlock.h> +#include <llvm/Module.h> +#include <llvm/Value.h> + +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/InstrTypes.h> +#include <llvm/Instructions.h> + +using namespace llvm; + +Storage::Storage(llvm::BasicBlock *block, llvm::Value *input) + : m_block(block), + m_INPUT(input), + m_addrs(32), + m_idx(0) +{ + m_floatVecType = VectorType::get(Type::FloatTy, 4); + m_intVecType = VectorType::get(IntegerType::get(32), 4); + + m_undefFloatVec = UndefValue::get(m_floatVecType); + m_undefIntVec = UndefValue::get(m_intVecType); + m_extSwizzleVec = 0; + + m_numConsts = 0; +} + +//can only build vectors with all members in the [0, 9] range +llvm::Constant *Storage::shuffleMask(int vec) +{ + if (!m_extSwizzleVec) { + std::vector<Constant*> elems; + elems.push_back(ConstantFP::get(APFloat(0.f))); + elems.push_back(ConstantFP::get(APFloat(1.f))); + elems.push_back(ConstantFP::get(APFloat(0.f))); + elems.push_back(ConstantFP::get(APFloat(1.f))); + m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems); + } + + if (m_intVecs.find(vec) != m_intVecs.end()) { + return m_intVecs[vec]; + } + int origVec = vec; + Constant* const_vec = 0; + if (origVec == 0) { + const_vec = Constant::getNullValue(m_intVecType); + } else { + int x = gallivm_x_swizzle(vec); + int y = gallivm_y_swizzle(vec); + int z = gallivm_z_swizzle(vec); + int w = gallivm_w_swizzle(vec); + std::vector<Constant*> elems; + elems.push_back(constantInt(x)); + elems.push_back(constantInt(y)); + elems.push_back(constantInt(z)); + elems.push_back(constantInt(w)); + const_vec = ConstantVector::get(m_intVecType, elems); + } + + m_intVecs[origVec] = const_vec; + return const_vec; +} + +llvm::ConstantInt *Storage::constantInt(int idx) +{ + if (m_constInts.find(idx) != m_constInts.end()) { + return m_constInts[idx]; + } + ConstantInt *const_int = ConstantInt::get(APInt(32, idx)); + m_constInts[idx] = const_int; + return const_int; +} + +llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx) +{ + Value *val = element(InputsArg, idx, indIdx); + LoadInst *load = new LoadInst(val, name("input"), false, m_block); + load->setAlignment(8); + + return load; +} + +llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx) +{ + m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts; + + Value *elem = element(ConstsArg, idx, indIdx); + LoadInst *load = new LoadInst(elem, name("const"), false, m_block); + load->setAlignment(8); + return load; +} + +llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle) +{ + Constant *mask = shuffleMask(shuffle); + ShuffleVectorInst *res = + new ShuffleVectorInst(vec, m_extSwizzleVec, mask, + name("shuffle"), m_block); + return res; +} + + +llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx) +{ + Value *elem = element(TempsArg, idx, indIdx); + + LoadInst *load = new LoadInst(elem, name("temp"), false, m_block); + load->setAlignment(8); + + return load; +} + +void Storage::setTempElement(int idx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = 0; + if (m_tempWriteMap[idx]) + templ = tempElement(idx); + val = maskWrite(val, mask, templ); + } + Value *elem = element(TempsArg, idx); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); + m_tempWriteMap[idx] = true; +} + +void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = 0; + if (m_destWriteMap[dstIdx]) + templ = outputElement(dstIdx); + val = maskWrite(val, mask, templ); + } + + Value *elem = element(DestsArg, dstIdx); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); + m_destWriteMap[dstIdx] = true; +} + +llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ) +{ + llvm::Value *dst = templ; + if (!dst) + dst = Constant::getNullValue(m_floatVecType); + if ((mask & TGSI_WRITEMASK_X)) { + llvm::Value *x = new ExtractElementInst(src, unsigned(0), + name("x"), m_block); + dst = InsertElementInst::Create(dst, x, unsigned(0), + name("dstx"), m_block); + } + if ((mask & TGSI_WRITEMASK_Y)) { + llvm::Value *y = new ExtractElementInst(src, unsigned(1), + name("y"), m_block); + dst = InsertElementInst::Create(dst, y, unsigned(1), + name("dsty"), m_block); + } + if ((mask & TGSI_WRITEMASK_Z)) { + llvm::Value *z = new ExtractElementInst(src, unsigned(2), + name("z"), m_block); + dst = InsertElementInst::Create(dst, z, unsigned(2), + name("dstz"), m_block); + } + if ((mask & TGSI_WRITEMASK_W)) { + llvm::Value *w = new ExtractElementInst(src, unsigned(3), + name("w"), m_block); + dst = InsertElementInst::Create(dst, w, unsigned(3), + name("dstw"), m_block); + } + return dst; +} + +const char * Storage::name(const char *prefix) +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +int Storage::numConsts() const +{ + return m_numConsts; +} + +llvm::Value * Storage::addrElement(int idx) const +{ + Value *ret = m_addrs[idx]; + if (!ret) + return m_undefFloatVec; + return ret; +} + +void Storage::setAddrElement(int idx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = m_addrs[idx]; + val = maskWrite(val, mask, templ); + } + m_addrs[idx] = val; +} + +llvm::Value * Storage::extractIndex(llvm::Value *vec) +{ + llvm::Value *x = new ExtractElementInst(vec, unsigned(0), + name("x"), m_block); + return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block); +} + +void Storage::setCurrentBlock(llvm::BasicBlock *block) +{ + m_block = block; +} + +llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx) +{ + Value *elem = element(DestsArg, idx, indIdx); + LoadInst *load = new LoadInst(elem, name("output"), false, m_block); + load->setAlignment(8); + + return load; +} + +llvm::Value * Storage::inputPtr() const +{ + return m_INPUT; +} + +void Storage::pushArguments(llvm::Value *input) +{ + m_argStack.push(m_INPUT); + + m_INPUT = input; +} + +void Storage::popArguments() +{ + m_INPUT = m_argStack.top(); + m_argStack.pop(); +} + +void Storage::pushTemps() +{ + m_extSwizzleVec = 0; +} + +void Storage::popTemps() +{ +} + +llvm::Value * Storage::immediateElement(int idx) +{ + return m_immediates[idx]; +} + +void Storage::addImmediate(float *val) +{ + std::vector<Constant*> vec(4); + vec[0] = ConstantFP::get(APFloat(val[0])); + vec[1] = ConstantFP::get(APFloat(val[1])); + vec[2] = ConstantFP::get(APFloat(val[2])); + vec[3] = ConstantFP::get(APFloat(val[3])); + m_immediates.push_back(ConstantVector::get(m_floatVecType, vec)); +} + + +llvm::Value * Storage::elemPtr(Args arg) +{ + std::vector<Value*> indices; + indices.push_back(constantInt(0)); + indices.push_back(constantInt(static_cast<int>(arg))); + GetElementPtrInst *getElem = GetElementPtrInst::Create(m_INPUT, + indices.begin(), + indices.end(), + name("input_ptr"), + m_block); + return new LoadInst(getElem, name("input_field"), false, m_block); +} + +llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, + llvm::Value *indIdx ) +{ + GetElementPtrInst *getElem = 0; + + if (indIdx) { + getElem = GetElementPtrInst::Create(ptr, + BinaryOperator::Create(Instruction::Add, + indIdx, + constantInt(idx), + name("add"), + m_block), + name("field"), + m_block); + } else { + getElem = GetElementPtrInst::Create(ptr, + constantInt(idx), + name("field"), + m_block); + } + return getElem; +} + +llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx ) +{ + Value *val = elemPtr(arg); + return elemIdx(val, idx, indIdx); +} + +void Storage::setKilElement(llvm::Value *val) +{ + std::vector<Value*> indices; + indices.push_back(constantInt(0)); + indices.push_back(constantInt(static_cast<int>(KilArg))); + GetElementPtrInst *elem = GetElementPtrInst::Create(m_INPUT, + indices.begin(), + indices.end(), + name("kil_ptr"), + m_block); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); +} + +#endif //MESA_LLVM + + diff --git a/src/gallium/auxiliary/gallivm/storage.h b/src/gallium/auxiliary/gallivm/storage.h new file mode 100644 index 00000000000..8574f7554e3 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storage.h @@ -0,0 +1,133 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin [email protected] + */ + +#ifndef STORAGE_H +#define STORAGE_H + +#include <map> +#include <set> +#include <stack> +#include <vector> + +namespace llvm { + class BasicBlock; + class Constant; + class ConstantInt; + class LoadInst; + class Value; + class VectorType; +} + +class Storage +{ +public: + Storage(llvm::BasicBlock *block, + llvm::Value *input); + + llvm::Value *inputPtr() const; + + void setCurrentBlock(llvm::BasicBlock *block); + + llvm::ConstantInt *constantInt(int); + llvm::Constant *shuffleMask(int vec); + llvm::Value *inputElement(int idx, llvm::Value *indIdx =0); + llvm::Value *constElement(int idx, llvm::Value *indIdx =0); + llvm::Value *outputElement(int idx, llvm::Value *indIdx =0); + llvm::Value *tempElement(int idx, llvm::Value *indIdx =0); + llvm::Value *immediateElement(int idx); + + void setOutputElement(int dstIdx, llvm::Value *val, int mask); + void setTempElement(int idx, llvm::Value *val, int mask); + + llvm::Value *addrElement(int idx) const; + void setAddrElement(int idx, llvm::Value *val, int mask); + + void setKilElement(llvm::Value *val); + + llvm::Value *shuffleVector(llvm::Value *vec, int shuffle); + + llvm::Value *extractIndex(llvm::Value *vec); + + int numConsts() const; + + void pushArguments(llvm::Value *input); + void popArguments(); + void pushTemps(); + void popTemps(); + + void addImmediate(float *val); + +private: + llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ); + const char *name(const char *prefix); + + enum Args { + DestsArg = 0, + InputsArg = 1, + TempsArg = 2, + ConstsArg = 3, + KilArg = 4 + }; + llvm::Value *elemPtr(Args arg); + llvm::Value *elemIdx(llvm::Value *ptr, int idx, + llvm::Value *indIdx = 0); + llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0); + +private: + llvm::BasicBlock *m_block; + llvm::Value *m_INPUT; + + std::map<int, llvm::ConstantInt*> m_constInts; + std::map<int, llvm::Constant*> m_intVecs; + std::vector<llvm::Value*> m_addrs; + std::vector<llvm::Constant*> m_immediates; + + llvm::VectorType *m_floatVecType; + llvm::VectorType *m_intVecType; + + char m_name[32]; + int m_idx; + + int m_numConsts; + + std::map<int, bool > m_destWriteMap; + std::map<int, bool > m_tempWriteMap; + + llvm::Value *m_undefFloatVec; + llvm::Value *m_undefIntVec; + llvm::Value *m_extSwizzleVec; + + std::stack<llvm::Value*> m_argStack; + std::stack<std::vector<llvm::Value*> > m_tempStack; +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp new file mode 100644 index 00000000000..e1e5cabcf55 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -0,0 +1,438 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "storagesoa.h" + +#include "gallivm_p.h" + +#include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" + +#include <llvm/BasicBlock.h> +#include <llvm/Module.h> +#include <llvm/Value.h> + +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/InstrTypes.h> +#include <llvm/Instructions.h> + +using namespace llvm; + + +StorageSoa::StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts) + : m_block(block), + m_input(input), + m_output(output), + m_consts(consts), + m_immediates(0), + m_idx(0) +{ +} + +void StorageSoa::addImmediate(float *vec) +{ + std::vector<float> vals(4); + vals[0] = vec[0]; + vals[1] = vec[1]; + vals[2] = vec[2]; + vals[3] = vec[3]; + m_immediatesToFlush.push_back(vals); +} + +void StorageSoa::declareImmediates() +{ + if (m_immediatesToFlush.empty()) + return; + + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorChannels = ArrayType::get(vectorType, 4); + ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size()); + + m_immediates = new GlobalVariable( + /*Type=*/arrayType, + /*isConstant=*/false, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name("immediates"), + currentModule()); + + std::vector<Constant*> arrayVals; + for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) { + std::vector<float> vec = m_immediatesToFlush[i]; + std::vector<float> vals(4); + std::vector<Constant*> channelArray; + + vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3]; + llvm::Constant *xChannel = createConstGlobalVector(vals); + + vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; + llvm::Constant *yChannel = createConstGlobalVector(vals); + + vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2]; + llvm::Constant *zChannel = createConstGlobalVector(vals); + + vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3]; + llvm::Constant *wChannel = createConstGlobalVector(vals); + channelArray.push_back(xChannel); + channelArray.push_back(yChannel); + channelArray.push_back(zChannel); + channelArray.push_back(wChannel); + Constant *constChannels = ConstantArray::get(vectorChannels, + channelArray); + arrayVals.push_back(constChannels); + } + Constant *constArray = ConstantArray::get(arrayType, arrayVals); + m_immediates->setInitializer(constArray); + + m_immediatesToFlush.clear(); +} + +llvm::Value *StorageSoa::addrElement(int idx) const +{ + std::map<int, llvm::Value*>::const_iterator itr = m_addresses.find(idx); + if (itr == m_addresses.end()) { + debug_printf("Trying to access invalid shader 'address'\n"); + return 0; + } + llvm::Value * res = (*itr).second; + + res = new LoadInst(res, name("addr"), false, m_block); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx) +{ + std::vector<llvm::Value*> res(4); + + res[0] = element(m_input, idx, 0); + res[1] = element(m_input, idx, 1); + res[2] = element(m_input, idx, 2); + res[3] = element(m_input, idx, 3); + + return res; +} + +llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc) +{ + std::vector<llvm::Value*> x(4); + x[0] = m_builder->CreateExtractElement(vector, + constantInt(cc), + name("x")); + + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + Constant *constVector = Constant::getNullValue(vectorType); + Value *res = m_builder->CreateInsertElement(constVector, x[0], + constantInt(0), + name("vecx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(1), + name("vecxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(2), + name("vecxxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(3), + name("vecxxxx")); + return res; +} + +std::vector<llvm::Value*> StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx) +{ + llvm::Value* res; + std::vector<llvm::Value*> res2(4); + llvm::Value *xChannel; + + xChannel = elementPointer(m_consts, idx, 0); + + res = alignedArrayLoad(xChannel); + + res2[0]=unpackConstElement(m_builder, res,0); + res2[1]=unpackConstElement(m_builder, res,1); + res2[2]=unpackConstElement(m_builder, res,2); + res2[3]=unpackConstElement(m_builder, res,3); + + return res2; +} + +std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx) +{ + std::vector<llvm::Value*> res(4); + + res[0] = element(m_output, idx, 0); + res[1] = element(m_output, idx, 1); + res[2] = element(m_output, idx, 2); + res[3] = element(m_output, idx, 3); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx) +{ + std::vector<llvm::Value*> res(4); + llvm::Value *temp = m_temps[idx]; + + res[0] = element(temp, constantInt(0), 0); + res[1] = element(temp, constantInt(0), 1); + res[2] = element(temp, constantInt(0), 2); + res[3] = element(temp, constantInt(0), 3); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::immediateElement(llvm::Value *idx) +{ + std::vector<llvm::Value*> res(4); + + res[0] = element(m_immediates, idx, 0); + res[1] = element(m_immediates, idx, 1); + res[2] = element(m_immediates, idx, 2); + res[3] = element(m_immediates, idx, 3); + + return res; +} + +llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index, + int channel) const +{ + std::vector<Value*> indices; + if (m_immediates == ptr) + indices.push_back(constantInt(0)); + indices.push_back(index); + indices.push_back(constantInt(channel)); + + GetElementPtrInst *getElem = GetElementPtrInst::Create(ptr, + indices.begin(), + indices.end(), + name("ptr"), + m_block); + return getElem; +} + +llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index, + int channel) const +{ + llvm::Value *res = elementPointer(ptr, index, channel); + LoadInst *load = new LoadInst(res, name("element"), false, m_block); + //load->setAlignment(8); + return load; +} + +const char * StorageSoa::name(const char *prefix) const +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::ConstantInt * StorageSoa::constantInt(int idx) const +{ + if (m_constInts.find(idx) != m_constInts.end()) { + return m_constInts[idx]; + } + ConstantInt *constInt = ConstantInt::get(APInt(32, idx)); + m_constInts[idx] = constInt; + return constInt; +} + +llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + PointerType *vectorPtr = PointerType::get(vectorType, 0); + + CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block); + LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block); + load->setAlignment(8); + return load; +} + +llvm::Module * StorageSoa::currentModule() const +{ + if (!m_block || !m_block->getParent()) + return 0; + + return m_block->getParent()->getParent(); +} + +llvm::Constant * StorageSoa::createConstGlobalFloat(const float val) +{ + Constant*c = ConstantFP::get(APFloat(val)); + return c; +} + +llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + std::vector<Constant*> immValues; + ConstantFP *constx = ConstantFP::get(APFloat(vec[0])); + ConstantFP *consty = ConstantFP::get(APFloat(vec[1])); + ConstantFP *constz = ConstantFP::get(APFloat(vec[2])); + ConstantFP *constw = ConstantFP::get(APFloat(vec[3])); + immValues.push_back(constx); + immValues.push_back(consty); + immValues.push_back(constz); + immValues.push_back(constw); + Constant *constVector = ConstantVector::get(vectorType, immValues); + + return constVector; +} + +std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, + llvm::IRBuilder<>* m_builder,llvm::Value *indIdx) +{ + std::vector<llvm::Value*> val(4); + + //if we have an indirect index, always use that + // if not use the integer offset to create one + llvm::Value *realIndex = 0; + if (indIdx) + realIndex = indIdx; + else + realIndex = constantInt(idx); + debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); + + switch(type) { + case TGSI_FILE_INPUT: + val = inputElement(realIndex); + break; + case TGSI_FILE_OUTPUT: + val = outputElement(realIndex); + break; + case TGSI_FILE_TEMPORARY: + val = tempElement(m_builder, idx); + break; + case TGSI_FILE_CONSTANT: + val = constElement(m_builder, realIndex); + break; + case TGSI_FILE_IMMEDIATE: + val = immediateElement(realIndex); + break; + case TGSI_FILE_ADDRESS: + debug_printf("Address not handled in the load phase!\n"); + assert(0); + break; + default: + debug_printf("Unknown load!\n"); + assert(0); + break; + } + if (!gallivm_is_swizzle(swizzle)) + return val; + + std::vector<llvm::Value*> res(4); + + res[0] = val[gallivm_x_swizzle(swizzle)]; + res[1] = val[gallivm_y_swizzle(swizzle)]; + res[2] = val[gallivm_z_swizzle(swizzle)]; + res[3] = val[gallivm_w_swizzle(swizzle)]; + return res; +} + +llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder) +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, "temp", + m_builder->GetInsertBlock()); + + return alloca; +} + + +void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, + int mask, llvm::IRBuilder<>* m_builder) +{ + llvm::Value *out = 0; + llvm::Value *realIndex = 0; + switch(type) { + case TGSI_FILE_OUTPUT: + out = m_output; + realIndex = constantInt(idx); + break; + case TGSI_FILE_TEMPORARY: + // if that temp doesn't already exist, alloca it + if (m_temps.find(idx) == m_temps.end()) + m_temps[idx] = allocaTemp(m_builder); + + out = m_temps[idx]; + + realIndex = constantInt(0); + break; + case TGSI_FILE_INPUT: + out = m_input; + realIndex = constantInt(idx); + break; + case TGSI_FILE_ADDRESS: { + llvm::Value *addr = m_addresses[idx]; + if (!addr) { + addAddress(idx); + addr = m_addresses[idx]; + assert(addr); + } + new StoreInst(val[0], addr, false, m_block); + return; + break; + } + default: + debug_printf("Can't save output of this type: %d !\n", type); + assert(0); + break; + } + if ((mask & TGSI_WRITEMASK_X)) { + llvm::Value *xChannel = elementPointer(out, realIndex, 0); + new StoreInst(val[0], xChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_Y)) { + llvm::Value *yChannel = elementPointer(out, realIndex, 1); + new StoreInst(val[1], yChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_Z)) { + llvm::Value *zChannel = elementPointer(out, realIndex, 2); + new StoreInst(val[2], zChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_W)) { + llvm::Value *wChannel = elementPointer(out, realIndex, 3); + new StoreInst(val[3], wChannel, false, m_block); + } +} + +void StorageSoa::addAddress(int idx) +{ + GlobalVariable *val = new GlobalVariable( + /*Type=*/IntegerType::get(32), + /*isConstant=*/false, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name("address"), + currentModule()); + val->setInitializer(Constant::getNullValue(IntegerType::get(32))); + + debug_printf("adding to %d\n", idx); + m_addresses[idx] = val; +} diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h new file mode 100644 index 00000000000..56886f85e7a --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storagesoa.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STORAGESOA_H +#define STORAGESOA_H + +#include <pipe/p_shader_tokens.h> +#include <llvm/Support/IRBuilder.h> + +#include <vector> +#include <list> +#include <map> + +namespace llvm { + class BasicBlock; + class Constant; + class ConstantInt; + class GlobalVariable; + class LoadInst; + class Value; + class VectorType; + class Module; +} + +class StorageSoa +{ +public: + StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts); + + + std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle, + llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0); + void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, + int mask, llvm::IRBuilder<>* m_builder); + + void addImmediate(float *vec); + void declareImmediates(); + + void addAddress(int idx); + + llvm::Value * addrElement(int idx) const; + + llvm::ConstantInt *constantInt(int) const; +private: + llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx, + int channel) const; + llvm::Value *element(llvm::Value *ptr, llvm::Value *idx, + int channel) const; + const char *name(const char *prefix) const; + llvm::Value *alignedArrayLoad(llvm::Value *val); + llvm::Module *currentModule() const; + llvm::Constant *createConstGlobalFloat(const float val); + llvm::Constant *createConstGlobalVector(const std::vector<float> &vec); + + std::vector<llvm::Value*> inputElement(llvm::Value *indIdx); + llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc); + std::vector<llvm::Value*> constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx); + std::vector<llvm::Value*> outputElement(llvm::Value *indIdx); + std::vector<llvm::Value*> tempElement(llvm::IRBuilder<>* m_builder, int idx); + std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx); +private: + llvm::BasicBlock *m_block; + + llvm::Value *m_input; + llvm::Value *m_output; + llvm::Value *m_consts; + std::map<int, llvm::Value*> m_temps; + llvm::GlobalVariable *m_immediates; + + std::map<int, llvm::Value*> m_addresses; + + std::vector<std::vector<float> > m_immediatesToFlush; + llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder); + + mutable std::map<int, llvm::ConstantInt*> m_constInts; + mutable char m_name[32]; + mutable int m_idx; +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp new file mode 100644 index 00000000000..5b08200d142 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -0,0 +1,1164 @@ +#include "tgsitollvm.h" + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "storage.h" +#include "instructions.h" +#include "storagesoa.h" +#include "instructionssoa.h" + +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" + + +#include <llvm/Module.h> +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Instructions.h> +#include <llvm/ModuleProvider.h> +#include <llvm/Pass.h> +#include <llvm/PassManager.h> +#include <llvm/Attributes.h> +#include <llvm/Support/PatternMatch.h> +#include <llvm/ExecutionEngine/JIT.h> +#include <llvm/ExecutionEngine/Interpreter.h> +#include <llvm/ExecutionEngine/GenericValue.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/LinkAllPasses.h> +#include <llvm/Analysis/Verifier.h> +#include <llvm/Analysis/LoopPass.h> +#include <llvm/Target/TargetData.h> +#include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Transforms/Utils/Cloning.h> + + +#include <sstream> +#include <fstream> +#include <iostream> + +using namespace llvm; + +static inline FunctionType *vertexShaderFunctionType() +{ + //Function takes three arguments, + // the calling code has to make sure the types it will + // pass are castable to the following: + // [4 x <4 x float>] inputs, + // [4 x <4 x float>] output, + // [4 x [1 x float]] consts, + + std::vector<const Type*> funcArgs; + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorArray = ArrayType::get(vectorType, 4); + PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); + + ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); + ArrayType *constsArray = ArrayType::get(floatArray, 1); + PointerType *constsArrayPtr = PointerType::get(constsArray, 0); + + funcArgs.push_back(vectorArrayPtr);//inputs + funcArgs.push_back(vectorArrayPtr);//output + funcArgs.push_back(constsArrayPtr);//consts + + FunctionType *functionType = FunctionType::get( + /*Result=*/Type::VoidTy, + /*Params=*/funcArgs, + /*isVarArg=*/false); + + return functionType; +} + +static inline void +add_interpolator(struct gallivm_ir *ir, + struct gallivm_interpolate *interp) +{ + ir->interpolators[ir->num_interp] = *interp; + ++ir->num_interp; +} + +static void +translate_declaration(struct gallivm_ir *prog, + llvm::Module *module, + Storage *storage, + struct tgsi_full_declaration *decl, + struct tgsi_full_declaration *fd) +{ + if (decl->Declaration.File == TGSI_FILE_INPUT) { + unsigned first, last, mask; + uint interp_method; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + /* Do not touch WPOS.xy */ + if (first == 0) { + mask &= ~TGSI_WRITEMASK_XY; + if (mask == TGSI_WRITEMASK_NONE) { + first++; + if (first > last) { + return; + } + } + } + + interp_method = decl->Declaration.Interpolate; + + if (mask == TGSI_WRITEMASK_XYZW) { + unsigned i, j; + + for (i = first; i <= last; i++) { + for (j = 0; j < NUM_CHANNELS; j++) { + //interp( mach, i, j ); + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } else { + unsigned i, j; + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } + } + } +} + +static void +translate_declarationir(struct gallivm_ir *, + llvm::Module *, + StorageSoa *storage, + struct tgsi_full_declaration *decl, + struct tgsi_full_declaration *) +{ + if (decl->Declaration.File == TGSI_FILE_ADDRESS) { + int idx = decl->DeclarationRange.First; + storage->addAddress(idx); + } +} + +static void +translate_immediate(Storage *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + + +static void +translate_immediateir(StorageSoa *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + +static inline int +swizzleInt(struct tgsi_full_src_register *src) +{ + int swizzle = 0; + int start = 1000; + + for (int k = 0; k < 4; ++k) { + swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; + start /= 10; + } + return swizzle; +} + +static inline llvm::Value * +swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, + Storage *storage) +{ + int swizzle = swizzleInt(src); + + if (gallivm_is_swizzle(swizzle)) { + /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ + val = storage->shuffleVector(val, swizzle); + } + return val; +} + +static void +translate_instruction(llvm::Module *module, + Storage *storage, + Instructions *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + llvm::Value *inputs[4]; + inputs[0] = 0; + inputs[1] = 0; + inputs[2] = 0; + inputs[3] = 0; + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + llvm::Value *val = 0; + llvm::Value *indIdx = 0; + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + indIdx = storage->extractIndex(indIdx); + } + if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { + val = storage->constElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { + val = storage->inputElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { + val = storage->tempElement(src->SrcRegister.Index); + } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { + val = storage->outputElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + val = storage->immediateElement(src->SrcRegister.Index); + } else { + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + return; + } + + inputs[i] = swizzleVector(val, src, storage); + } + + /*if (inputs[0]) + instr->printVector(inputs[0]); + if (inputs[1]) + instr->printVector(inputs[1]);*/ + llvm::Value *out = 0; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + out = instr->arl(inputs[0]); + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + out = instr->lit(inputs[0]); + } + break; + case TGSI_OPCODE_RCP: { + out = instr->rcp(inputs[0]); + } + break; + case TGSI_OPCODE_RSQ: { + out = instr->rsq(inputs[0]); + } + break; + case TGSI_OPCODE_EXP: { + out = instr->exp(inputs[0]); + } + break; + case TGSI_OPCODE_LOG: { + out = instr->log(inputs[0]); + } + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DST: { + out = instr->dst(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MIN: { + out = instr->min(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAX: { + out = instr->max(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SLT: { + out = instr->slt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SGE: { + out = instr->sge(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAD: { + out = instr->madd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SUB: { + out = instr->sub(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_LERP: { + out = instr->lerp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_CND: { + out = instr->cnd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_CND0: { + out = instr->cnd0(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_DOT2ADD: { + out = instr->dot2add(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: { + out = instr->neg(inputs[0]); + } + break; + case TGSI_OPCODE_FRAC: { + out = instr->frc(inputs[0]); + } + break; + case TGSI_OPCODE_CLAMP: { + out = instr->clamp(inputs[0]); + } + break; + case TGSI_OPCODE_FLOOR: { + out = instr->floor(inputs[0]); + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + out = instr->ex2(inputs[0]); + } + break; + case TGSI_OPCODE_LOGBASE2: { + out = instr->lg2(inputs[0]); + } + break; + case TGSI_OPCODE_POWER: { + out = instr->pow(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + out = instr->cross(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + out = instr->abs(inputs[0]); + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + out = instr->dph(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_COS: { + out = instr->cos(inputs[0]); + } + break; + case TGSI_OPCODE_DDX: { + out = instr->ddx(inputs[0]); + } + break; + case TGSI_OPCODE_DDY: { + out = instr->ddy(inputs[0]); + } + break; + case TGSI_OPCODE_KILP: + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: { + out = instr->seq(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SFL: { + out = instr->sfl(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SGT: { + out = instr->sgt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SIN: { + out = instr->sin(inputs[0]); + } + break; + case TGSI_OPCODE_SLE: { + out = instr->sle(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SNE: { + out = instr->sne(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_STR: { + out = instr->str(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: { + out = instr->x2d(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); + return; + } + break; + case TGSI_OPCODE_RET: { + instr->end(); + return; + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + out = instr->cmp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SCS: { + out = instr->scs(inputs[0]); + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM4: + case TGSI_OPCODE_NRM: { + out = instr->nrm(inputs[0]); + } + break; + case TGSI_OPCODE_DIV: { + out = instr->div(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP2: { + out = instr->dp2(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + instr->brk(); + return; + } + break; + case TGSI_OPCODE_IF: { + instr->ifop(inputs[0]); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + instr->elseop(); + storage->setCurrentBlock(instr->currentBlock()); + return; //only state update + } + break; + case TGSI_OPCODE_ENDIF: { + instr->endif(); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + out = instr->trunc(inputs[0]); + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + instr->beginLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_BGNSUB: { + instr->bgnSub(instno); + storage->setCurrentBlock(instr->currentBlock()); + storage->pushTemps(); + return; + } + break; + case TGSI_OPCODE_ENDLOOP2: { + instr->endLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_ENDSUB: { + instr->endSub(); + storage->setCurrentBlock(instr->currentBlock()); + storage->popArguments(); + storage->popTemps(); + return; + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: { + out = instr->kil(inputs[0]); + storage->setKilElement(out); + return; + } + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* # not sure if we need this */ + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + /*TXT( "_SAT" );*/ + break; + case TGSI_SAT_MINUS_PLUS_ONE: + /*TXT( "_SAT[-1,1]" );*/ + break; + default: + assert( 0 ); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { + storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { + storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else { + fprintf(stderr, "ERROR: unsupported LLVM destination!"); + assert(!"wrong destination"); + } + } +} + + +static void +translate_instructionir(llvm::Module *module, + StorageSoa *storage, + InstructionsSoa *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs); + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + std::vector<llvm::Value*> val; + llvm::Value *indIdx = 0; + int swizzle = swizzleInt(src); + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + } + val = storage->load((enum tgsi_file_type)src->SrcRegister.File, + src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx); + + inputs[i] = val; + } + + std::vector<llvm::Value*> out(4); + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + out = instr->arl(inputs[0]); + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + out = instr->lit(inputs[0]); + } + break; + case TGSI_OPCODE_RCP: { + } + break; + case TGSI_OPCODE_RSQ: { + out = instr->rsq(inputs[0]); + } + break; + case TGSI_OPCODE_EXP: + break; + case TGSI_OPCODE_LOG: + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DST: { + } + break; + case TGSI_OPCODE_MIN: { + out = instr->min(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAX: { + out = instr->max(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SLT: { + out = instr->slt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SGE: { + } + break; + case TGSI_OPCODE_MAD: { + out = instr->madd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SUB: { + out = instr->sub(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_LERP: { + } + break; + case TGSI_OPCODE_CND: + break; + case TGSI_OPCODE_CND0: + break; + case TGSI_OPCODE_DOT2ADD: + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: + break; + case TGSI_OPCODE_FRAC: { + } + break; + case TGSI_OPCODE_CLAMP: + break; + case TGSI_OPCODE_FLOOR: { + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + } + break; + case TGSI_OPCODE_LOGBASE2: { + } + break; + case TGSI_OPCODE_POWER: { + out = instr->pow(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + out = instr->abs(inputs[0]); + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + } + break; + case TGSI_OPCODE_COS: { + } + break; + case TGSI_OPCODE_DDX: + break; + case TGSI_OPCODE_DDY: + break; + case TGSI_OPCODE_KILP: + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: + break; + case TGSI_OPCODE_SFL: + break; + case TGSI_OPCODE_SGT: { + } + break; + case TGSI_OPCODE_SIN: { + } + break; + case TGSI_OPCODE_SLE: + break; + case TGSI_OPCODE_SNE: + break; + case TGSI_OPCODE_STR: + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + } + break; + case TGSI_OPCODE_RET: { + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + } + break; + case TGSI_OPCODE_SCS: { + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM: + break; + case TGSI_OPCODE_DIV: + break; + case TGSI_OPCODE_DP2: + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + } + break; + case TGSI_OPCODE_IF: { + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + } + break; + case TGSI_OPCODE_ENDIF: { + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + } + break; + case TGSI_OPCODE_BGNSUB: { + } + break; + case TGSI_OPCODE_ENDLOOP2: { + } + break; + case TGSI_OPCODE_ENDSUB: { + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_NRM4: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: { + } + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out[0]) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + storage->store((enum tgsi_file_type)dst->DstRegister.File, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask, + instr->getIRBuilder() ); + } +} + +llvm::Module * +tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) +{ + llvm::Module *mod = new Module("shader"); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + Function* shader = mod->getFunction("execute_shader"); + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + stream << ir->id; + std::string func_name = stream.str(); + shader->setName(func_name.c_str()); + + Function::arg_iterator args = shader->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("input"); + + BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + Storage storage(label_entry, ptr_INPUT); + Instructions instr(mod, shader, label_entry, &storage); + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declaration(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediate(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + translate_instruction(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + ir->num_consts = storage.numConsts(); + return mod; +} + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + llvm::Module *mod = new Module("shader"); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + //stream << ir->id; + std::string func_name = stream.str(); + Function *shader = llvm::cast<Function>(mod->getOrInsertFunction( + func_name.c_str(), + vertexShaderFunctionType())); + + Function::arg_iterator args = shader->arg_begin(); + Value *input = args++; + input->setName("inputs"); + Value *output = args++; + output->setName("outputs"); + Value *consts = args++; + consts->setName("consts"); + + BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + StorageSoa storage(label_entry, input, output, consts); + InstructionsSoa instr(mod, shader, label_entry, &storage); + + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declarationir(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediateir(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + storage.declareImmediates(); + translate_instructionir(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + return mod; +} diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.h b/src/gallium/auxiliary/gallivm/tgsitollvm.h new file mode 100644 index 00000000000..7ada04d6299 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.h @@ -0,0 +1,20 @@ +#ifndef TGSITOLLVM_H +#define TGSITOLLVM_H + + +namespace llvm { + class Module; +} + +struct gallivm_ir; +struct tgsi_token; + + +llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + +#endif |