From 9e6d58fac26a12246e9e560f3802ebcbce2423bc Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 15 Oct 2007 08:12:22 -0400 Subject: Generate the base shader. --- src/mesa/pipe/draw/draw_vertex_shader_llvm.c | 5 +- src/mesa/pipe/llvm/llvm_builtins.c | 115 ++++++++++++++++++++------ src/mesa/pipe/llvm/llvmtgsi.cpp | 117 ++++++++++++++++++++++----- src/mesa/pipe/llvm/llvmtgsi.h | 4 +- 4 files changed, 194 insertions(+), 47 deletions(-) diff --git a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c index b38498efc57..fd499010516 100644 --- a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c +++ b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c @@ -112,7 +112,7 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw) float (*consts)[4] = (float (*)[4]) draw->mapped_constants; struct ga_llvm_prog *prog = draw->vertex_shader->state->llvm_prog; - fprintf(stderr, "XX q(%d) ", draw->vs.queue_nr); + fprintf(stderr, "--- XX q(%d) ", draw->vs.queue_nr); /* fetch the inputs */ for (i = 0; i < draw->vs.queue_nr; ++i) { @@ -123,7 +123,8 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw) /* batch execute the shaders on all the vertices */ ga_llvm_prog_exec(prog, inputs, dests, consts, - draw->vs.queue_nr); + draw->vs.queue_nr, + draw->vertex_info.num_attribs); draw->vs.queue_nr = 0; } diff --git a/src/mesa/pipe/llvm/llvm_builtins.c b/src/mesa/pipe/llvm/llvm_builtins.c index 0f0efeb3039..c7a9ea0d5a1 100644 --- a/src/mesa/pipe/llvm/llvm_builtins.c +++ b/src/mesa/pipe/llvm/llvm_builtins.c @@ -1,20 +1,29 @@ +/* clang --emit-llvm llvm_builtins.c |llvm-as |opt -std-compile-opts |llvm-dis */ +/* clang --emit-llvm llvm_builtins.c |llvm-as |opt -std-compile-opts |llvm2cpp -for=Shader -gen-module -funcname=createBaseShader */ +typedef __attribute__(( ocu_vector_type(4) )) float float4; +#if 0 +//clang doesn't suppoer "struct->member" notation yet +struct vertex_header { + unsigned clipmask:12; + unsigned edgeflag:1; + unsigned pad:3; + unsigned vertex_id:16; -inline float4 compute_clip(float4 vec, float4 scale, float4 trans) -{ - return vec*scale + trans; -} + float clip[4]; + float data[][4]; +}; inline float -dot4(const float4 a, const float4 b) +dot4(float4 a, float4 b) { float4 c = a*b; return c.x + c.y + c.z + c.w; } inline unsigned -compute_clipmask(float4 clip, const float4 (*plane), unsigned nr) +compute_clipmask(float4 clip, float4 (*plane), unsigned nr) { unsigned mask = 0; unsigned i; @@ -29,7 +38,8 @@ compute_clipmask(float4 clip, const float4 (*plane), unsigned nr) inline void collect_results(float4 *results, struct vertex_header *vOut, float4 *planes, int nr_planes, - float4 scale, float4 trans) + float4 scale, float4 trans, + int num_attribs) { /* store results */ unsigned slot; @@ -38,13 +48,14 @@ inline void collect_results(float4 *results, struct vertex_header *vOut, /* Handle attr[0] (position) specially: */ float4 res0 = results[0]; - x = vOut->clip[0] = clip.x; - y = vOut->clip[1] = clip.y; - z = vOut->clip[2] = clip.z; - w = vOut->clip[3] = clip.w; + float *clip = vOut->clip; + x = clip[0] = res0.x; + y = clip[1] = res0.y; + z = clip[2] = res0.z; + w = clip[3] = res0.w; - vOut[i]->clipmask = compute_clipmask(res0, planes, nr_planes); - vOut[i]->edgeflag = 1; + vOut->clipmask = compute_clipmask(res0, planes, nr_planes); + vOut->edgeflag = 1; /* divide by w */ w = 1.0f / w; @@ -54,10 +65,10 @@ inline void collect_results(float4 *results, struct vertex_header *vOut, res0.x = x; res0.y = y; res0.z = z; res0.w = 1; /* Viewport mapping */ - res = res * scale + trans; - vOut->data[0][0] = res.x; - vOut->data[0][1] = res.y; - vOut->data[0][2] = res.z; + res0 = res0 * scale + trans; + vOut->data[0][0] = res0.x; + vOut->data[0][1] = res0.y; + vOut->data[0][2] = res0.z; vOut->data[0][3] = w; /* Remaining attributes are packed into sequential post-transform @@ -65,7 +76,7 @@ inline void collect_results(float4 *results, struct vertex_header *vOut, * Skip 0 since we just did it above. * Subtract two because of the VERTEX_HEADER, CLIP_POS attribs. */ - for (slot = 1; slot < draw->vertex_info.num_attribs - 2; slot++) { + for (slot = 1; slot < num_attribs - 2; slot++) { float4 vec = results[slot]; vOut->data[slot][0] = vec.x; vOut->data[slot][1] = vec.y; @@ -79,12 +90,68 @@ inline void collect_results(float4 *results, struct vertex_header *vOut, vOut->data[slot][3]); } } +#endif -void run_vertex_shader(float ainputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4], - struct vertex_header *dests[VS_QUEUE_LENGTH], - float *aconsts[4] - int count) +void from_array(float4 (*res)[32], float (*ainputs)[32][4], + int count, int num_attribs) { - float4 inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS]; - float4 *consts; + for (int i = 0; i < count; ++i) { + for (int j = 0; j < num_attribs; ++j) { + float4 vec; + vec.x = ainputs[i][j][0]; + vec.y = ainputs[i][j][1]; + vec.z = ainputs[i][j][2]; + vec.w = ainputs[i][j][3]; + res[i][j] = vec; + } + } +} + +void from_consts(float4 *res, float (*ainputs)[4], + int count) +{ + for (int i = 0; i < count; ++i) { + float4 vec; + vec.x = ainputs[i][0]; + vec.y = ainputs[i][1]; + vec.z = ainputs[i][2]; + vec.w = ainputs[i][3]; + res[i] = vec; + } +} + +void to_array(float (*dests)[4], float4 *in, int num_attribs) +{ + for (int i = 0; i < num_attribs; ++i) { + float *rd = dests[i]; + float4 ri = in[i]; + rd[0] = ri.x; + rd[1] = ri.y; + rd[2] = ri.z; + rd[3] = ri.w; + } +} + +extern void execute_shader(float4 *dests, float4 *inputs, + float4 *consts); + +void run_vertex_shader(float (*ainputs)[32][4], + float (*dests)[32][4], + float (*aconsts)[4], + int count, + int num_attribs) +{ + float4 inputs[16*32*4][32]; + float4 consts[32]; + float4 results[16*32*4][32]; + + printf("XXXXXXXXXXX run_vertex_shader\n"); + from_array(inputs, ainputs, count, num_attribs); + from_consts(consts, aconsts, 32); + for (int i = 0; i < count; ++i) { + float4 *in = inputs[i]; + float4 *res = results[i]; + to_array(dests[i], results[i], num_attribs); + execute_shader(res, in, consts); + } } diff --git a/src/mesa/pipe/llvm/llvmtgsi.cpp b/src/mesa/pipe/llvm/llvmtgsi.cpp index 46b7561b5e8..1abc148521f 100644 --- a/src/mesa/pipe/llvm/llvmtgsi.cpp +++ b/src/mesa/pipe/llvm/llvmtgsi.cpp @@ -14,15 +14,88 @@ #include #include #include +#include +#include #include #include #include #include #include #include +#include +#include +#include +#include #include #include +using namespace llvm; +#include "llvm_base_shader.cpp" + + +static inline void addPass(PassManager &PM, Pass *P) { + // Add the pass to the pass manager... + PM.add(P); +} + +static inline void AddStandardCompilePasses(PassManager &PM) { + PM.add(createVerifierPass()); // Verify that input is correct + + addPass(PM, createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp + + // If the -strip-debug command line option was specified, do it. + //if (StripDebug) + // addPass(PM, createStripSymbolsPass(true)); + + addPass(PM, createRaiseAllocationsPass()); // call %malloc -> malloc inst + addPass(PM, createCFGSimplificationPass()); // Clean up disgusting code + addPass(PM, createPromoteMemoryToRegisterPass());// Kill useless allocas + addPass(PM, createGlobalOptimizerPass()); // Optimize out global vars + addPass(PM, createGlobalDCEPass()); // Remove unused fns and globs + addPass(PM, createIPConstantPropagationPass());// IP Constant Propagation + addPass(PM, createDeadArgEliminationPass()); // Dead argument elimination + addPass(PM, createInstructionCombiningPass()); // Clean up after IPCP & DAE + addPass(PM, createCFGSimplificationPass()); // Clean up after IPCP & DAE + + addPass(PM, createPruneEHPass()); // Remove dead EH info + + //if (!DisableInline) + addPass(PM, createFunctionInliningPass()); // Inline small functions + addPass(PM, createArgumentPromotionPass()); // Scalarize uninlined fn args + + addPass(PM, createTailDuplicationPass()); // Simplify cfg by copying code + addPass(PM, createInstructionCombiningPass()); // Cleanup for scalarrepl. + addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs + addPass(PM, createScalarReplAggregatesPass()); // Break up aggregate allocas + addPass(PM, createInstructionCombiningPass()); // Combine silly seq's + addPass(PM, createCondPropagationPass()); // Propagate conditionals + + addPass(PM, createTailCallEliminationPass()); // Eliminate tail calls + addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs + addPass(PM, createReassociatePass()); // Reassociate expressions + addPass(PM, createLoopRotatePass()); + addPass(PM, createLICMPass()); // Hoist loop invariants + addPass(PM, createLoopUnswitchPass()); // Unswitch loops. + addPass(PM, createLoopIndexSplitPass()); // Index split loops. + addPass(PM, createInstructionCombiningPass()); // Clean up after LICM/reassoc + addPass(PM, createIndVarSimplifyPass()); // Canonicalize indvars + addPass(PM, createLoopUnrollPass()); // Unroll small loops + addPass(PM, createInstructionCombiningPass()); // Clean up after the unroller + addPass(PM, createGVNPass()); // Remove redundancies + addPass(PM, createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + addPass(PM, createInstructionCombiningPass()); + addPass(PM, createCondPropagationPass()); // Propagate conditionals + + addPass(PM, createDeadStoreEliminationPass()); // Delete dead stores + addPass(PM, createAggressiveDCEPass()); // SSA based 'Aggressive DCE' + addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs + addPass(PM, createSimplifyLibCallsPass()); // Library Call Optimizations + addPass(PM, createDeadTypeEliminationPass()); // Eliminate dead types + addPass(PM, createConstantMergePass()); // Merge dup global constants +} static void translate_declaration(llvm::Module *module, @@ -341,7 +414,7 @@ translate_instruction(llvm::Module *module, static llvm::Module * tgsi_to_llvm(const struct tgsi_token *tokens) { - llvm::Module *mod = new llvm::Module("tgsi"); + llvm::Module *mod = createBaseShader(); struct tgsi_parse_context parse; struct tgsi_full_instruction fi; struct tgsi_full_declaration fd; @@ -402,18 +475,33 @@ ga_llvm_from_tgsi(const struct tgsi_token *tokens) struct ga_llvm_prog *ga_llvm = (struct ga_llvm_prog *)malloc(sizeof(struct ga_llvm_prog)); llvm::Module *mod = tgsi_to_llvm(tokens); + + /* Run optimization passes over it */ + PassManager passes; + // Add an appropriate TargetData instance for this module... + passes.add(new TargetData(mod)); + AddStandardCompilePasses(passes); + std::cout<<"Running optimization passes..."<getFunction("run_vertex_shader"); + std::cout << "run_vertex_shader = "<function = ee->getPointerToFunctionOrStub(func); + std::cout << " -- FUNC is " <function; + return ga_llvm; } @@ -423,6 +511,7 @@ void ga_llvm_prog_delete(struct ga_llvm_prog *prog) delete mod; prog->module = 0; prog->engine = 0; + prog->function = 0; free(prog); } @@ -430,24 +519,12 @@ int ga_llvm_prog_exec(struct ga_llvm_prog *prog, float (*inputs)[32][4], void *dests[16*32*4], float (*consts)[4], - int count) + int count, + int num_attribs) { - //std::cout << "START "<(prog->module); - llvm::Function *func = mod->getFunction("main"); - llvm::ExecutionEngine *ee = static_cast(prog->engine); - - std::vector args(0); - //args[0] = GenericValue(&st); - //std::cout << "Mod is "<<*mod; - //std::cout << "\n\nRunning llvm: " << std::endl; - if (func) { - std::cout << "Func is "<runFunction(func, args); - } + std::cout << "---- START LLVM Execution "<