diff options
author | José Fonseca <[email protected]> | 2010-09-02 12:45:50 +0100 |
---|---|---|
committer | José Fonseca <[email protected]> | 2010-09-11 13:25:49 +0100 |
commit | 93158622e26df1227f6eca8d619b5521f4cb1368 (patch) | |
tree | 289177e80a42c16ec34e44b0cb52be4e90e301e8 /src/gallium | |
parent | 58daea741fa21fe3f89fd7bf106df1545c5b21af (diff) |
gallivm: Basic AoS TGSI -> LLVM IR.
Essentially a variation of the SoA version.
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/auxiliary/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/SConscript | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 29 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 1125 |
4 files changed, 1156 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index eb86d83d2a2..5388f4ecd5a 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -171,6 +171,7 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_sample_soa.c \ gallivm/lp_bld_struct.c \ gallivm/lp_bld_swizzle.c \ + gallivm/lp_bld_tgsi_aos.c \ gallivm/lp_bld_tgsi_soa.c \ gallivm/lp_bld_type.c \ draw/draw_llvm.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 6210ada990e..ba8be2efd1f 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -222,6 +222,7 @@ if env['llvm']: 'gallivm/lp_bld_sample_soa.c', 'gallivm/lp_bld_struct.c', 'gallivm/lp_bld_swizzle.c', + 'gallivm/lp_bld_tgsi_aos.c', 'gallivm/lp_bld_tgsi_soa.c', 'gallivm/lp_bld_type.c', 'draw/draw_llvm.c', diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index e60ad1d9044..bc9140852ff 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -82,6 +82,24 @@ struct lp_build_sampler_soa }; +struct lp_build_sampler_aos +{ + void + (*destroy)( struct lp_build_sampler_aos *sampler ); + + LLVMValueRef + (*emit_fetch_texel)( const struct lp_build_sampler_aos *sampler, + LLVMBuilderRef builder, + struct lp_type type, + unsigned target, /* TGSI_TEXTURE_* */ + unsigned unit, + LLVMValueRef coords, + LLVMValueRef ddx, + LLVMValueRef ddy, + enum lp_build_tex_modifier modifier); +}; + + void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, @@ -95,4 +113,15 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_shader_info *info); +void +lp_build_tgsi_aos(LLVMBuilderRef builder, + const struct tgsi_token *tokens, + struct lp_type type, + LLVMValueRef consts_ptr, + const LLVMValueRef *inputs, + LLVMValueRef *outputs, + struct lp_build_sampler_aos *sampler, + const struct tgsi_shader_info *info); + + #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c new file mode 100644 index 00000000000..2793a1a3976 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -0,0 +1,1125 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * TGSI to LLVM IR translation -- AoS. + * + * FIXME: + * - No control flow support: the existing control flow code should be factored + * out into from the SoA code into a common module and shared. + * - No derivatives. Derivate logic should be pluggable, just like the samplers. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_config.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_scan.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_gather.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_flow.h" +#include "lp_bld_quad.h" +#include "lp_bld_tgsi.h" +#include "lp_bld_limits.h" +#include "lp_bld_debug.h" + + +#define LP_MAX_INSTRUCTIONS 256 + + +struct lp_build_tgsi_aos_context +{ + struct lp_build_context base; + + /* Builder for integer masks and indices */ + struct lp_build_context int_bld; + + LLVMValueRef consts_ptr; + const LLVMValueRef *inputs; + LLVMValueRef *outputs; + + const struct lp_build_sampler_aos *sampler; + + LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES]; + LLVMValueRef temps[LP_MAX_TGSI_TEMPS]; + LLVMValueRef addr[LP_MAX_TGSI_ADDRS]; + LLVMValueRef preds[LP_MAX_TGSI_PREDS]; + + /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is + * set in the indirect_files field. + * The temps[] array above is unused then. + */ + LLVMValueRef temps_array; + + /** bitmask indicating which register files are accessed indirectly */ + unsigned indirect_files; + + struct tgsi_full_instruction *instructions; + uint max_instructions; +}; + + +/** + * Register fetch. + */ +static LLVMValueRef +emit_fetch( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + unsigned src_op) +{ + struct lp_type type = bld->base.type; + const struct tgsi_full_src_register *reg = &inst->Src[src_op]; + unsigned char swizzles[4]; + LLVMValueRef res; + unsigned chan; + + assert(!reg->Register.Indirect); + + /* + * Fetch the from the register file. + */ + + switch (reg->Register.File) { + case TGSI_FILE_CONSTANT: + /* + * Get the constants components + */ + + res = bld->base.undef; + for (chan = 0; chan < 4; ++chan) { + LLVMValueRef index; + LLVMValueRef scalar_ptr; + LLVMValueRef scalar; + + index = LLVMConstInt(LLVMInt32Type(), + reg->Register.Index*4 + chan, 0); + + scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, + &index, 1, ""); + + scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + + lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); + + index = LLVMConstInt(LLVMInt32Type(), chan, 0); + + res = LLVMBuildInsertElement(bld->base.builder, res, scalar, index, ""); + } + + /* + * Broadcast the first quaternion to all others. + * + * XXX: could be factored into a reusable function. + */ + + if (type.length > 4) { + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + for (chan = 0; chan < 4; ++chan) { + shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0); + } + + for (i = 4; i < type.length; ++i) { + shuffles[i] = shuffles[i % 4]; + } + + res = LLVMBuildShuffleVector(bld->base.builder, + res, bld->base.undef, + LLVMConstVector(shuffles, type.length), + ""); + } + break; + + case TGSI_FILE_IMMEDIATE: + res = bld->immediates[reg->Register.Index]; + assert(res); + break; + + case TGSI_FILE_INPUT: + res = bld->inputs[reg->Register.Index]; + assert(res); + break; + + case TGSI_FILE_TEMPORARY: + { + LLVMValueRef temp_ptr; + temp_ptr = bld->temps[reg->Register.Index]; + res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); + if (!res) + return bld->base.undef; + } + break; + + default: + assert(0 && "invalid src register in emit_fetch()"); + return bld->base.undef; + } + + /* + * Apply sign modifier. + */ + + if (reg->Register.Absolute) { + res = lp_build_abs(&bld->base, res); + } + + if(reg->Register.Negate) { + res = lp_build_negate(&bld->base, res); + } + + /* + * Swizzle the argument + */ + + for (chan = 0; chan < 4; ++chan) { + const unsigned swizzle = + tgsi_util_get_full_src_register_swizzle(reg, chan); + if (swizzle > 3) { + assert(0 && "invalid swizzle in emit_fetch()"); + return bld->base.undef; + } + swizzles[chan] = swizzle; + } + + res = lp_build_swizzle_aos(&bld->base, res, swizzles); + + return res; +} + + +/** + * Register store. + */ +static void +emit_store( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + LLVMValueRef value) +{ + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; + LLVMValueRef mask = NULL; + LLVMValueRef ptr; + + /* + * Saturate the value + */ + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + value = lp_build_max(&bld->base, value, bld->base.zero); + value = lp_build_min(&bld->base, value, bld->base.one); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); + value = lp_build_min(&bld->base, value, bld->base.one); + break; + + default: + assert(0); + } + + /* + * Translate the register file + */ + + assert(!reg->Register.Indirect); + + switch (reg->Register.File) { + case TGSI_FILE_OUTPUT: + ptr = bld->outputs[reg->Register.Index]; + break; + + case TGSI_FILE_TEMPORARY: + ptr = bld->temps[reg->Register.Index]; + break; + + case TGSI_FILE_ADDRESS: + ptr = bld->addr[reg->Indirect.Index]; + break; + + case TGSI_FILE_PREDICATE: + ptr = bld->preds[reg->Register.Index]; + break; + + default: + assert(0); + return; + } + + /* + * Predicate + */ + + if (inst->Instruction.Predicate) { + unsigned char swizzles[4]; + LLVMValueRef pred; + + assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); + + pred = LLVMBuildLoad(bld->base.builder, + bld->preds[inst->Predicate.Index], ""); + + /* + * Convert the value to an integer mask. + */ + pred = lp_build_compare(bld->base.builder, + bld->base.type, + PIPE_FUNC_NOTEQUAL, + pred, + bld->base.zero); + + if (inst->Predicate.Negate) { + pred = LLVMBuildNot(bld->base.builder, pred, ""); + } + + swizzles[0] = inst->Predicate.SwizzleX; + swizzles[1] = inst->Predicate.SwizzleY; + swizzles[2] = inst->Predicate.SwizzleZ; + swizzles[3] = inst->Predicate.SwizzleW; + + pred = lp_build_swizzle_aos(&bld->base, pred, swizzles); + + if (mask) { + mask = LLVMBuildAnd(bld->base.builder, mask, pred, ""); + } else { + mask = pred; + } + } + + /* + * Writemask + */ + + if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { + LLVMValueRef writemask; + + writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask); + + if (mask) { + mask = LLVMBuildAnd(bld->base.builder, mask, writemask, ""); + } else { + mask = writemask; + } + } + + if (mask) { + LLVMValueRef orig_value; + + orig_value = LLVMBuildLoad(bld->base.builder, ptr, ""); + value = lp_build_select(&bld->base, + mask, value, orig_value); + } + + LLVMBuildStore(bld->base.builder, value, ptr); +} + + +/** + * High-level instruction translators. + */ + +static LLVMValueRef +emit_tex(struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + enum lp_build_tex_modifier modifier) +{ + unsigned target; + unsigned unit; + LLVMValueRef coords; + LLVMValueRef ddx; + LLVMValueRef ddy; + + if (!bld->sampler) { + _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); + return bld->base.undef; + } + + target = inst->Texture.Texture; + + coords = emit_fetch( bld, inst, 0 ); + + if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + ddx = emit_fetch( bld, inst, 1 ); + ddy = emit_fetch( bld, inst, 2 ); + unit = inst->Src[3].Register.Index; + } else { +#if 0 + ddx = lp_build_ddx( &bld->base, coords ); + ddy = lp_build_ddy( &bld->base, coords ); +#else + /* TODO */ + ddx = bld->base.one; + ddy = bld->base.one; +#endif + unit = inst->Src[1].Register.Index; + } + + return bld->sampler->emit_fetch_texel(bld->sampler, + bld->base.builder, + bld->base.type, + target, unit, + coords, ddx, ddy, + modifier); +} + + +static void +emit_declaration( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_declaration *decl) +{ + LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); + + unsigned first = decl->Range.First; + unsigned last = decl->Range.Last; + unsigned idx; + + for (idx = first; idx <= last; ++idx) { + switch (decl->Declaration.File) { + case TGSI_FILE_TEMPORARY: + assert(idx < LP_MAX_TGSI_TEMPS); + if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), + last + 1, 0); + bld->temps_array = lp_build_array_alloca(bld->base.builder, + vec_type, array_size, ""); + } else { + bld->temps[idx] = lp_build_alloca(bld->base.builder, + vec_type, ""); + } + break; + + case TGSI_FILE_OUTPUT: + bld->outputs[idx] = lp_build_alloca(bld->base.builder, + vec_type, ""); + break; + + case TGSI_FILE_ADDRESS: + assert(idx < LP_MAX_TGSI_ADDRS); + bld->addr[idx] = lp_build_alloca(bld->base.builder, + vec_type, ""); + break; + + case TGSI_FILE_PREDICATE: + assert(idx < LP_MAX_TGSI_PREDS); + bld->preds[idx] = lp_build_alloca(bld->base.builder, + vec_type, ""); + break; + + default: + /* don't need to declare other vars */ + break; + } + } +} + + +/** + * Emit LLVM for one TGSI instruction. + * \param return TRUE for success, FALSE otherwise + */ +static boolean +emit_instruction( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info, + int *pc) +{ + LLVMValueRef src0, src1, src2; + LLVMValueRef tmp0, tmp1; + LLVMValueRef dst0; + + /* + * Stores and write masks are handled in a general fashion after the long + * instruction opcode switch statement. + * + * Although not stricitly necessary, we avoid generating instructions for + * channels which won't be stored, in cases where's that easy. For some + * complex instructions, like texture sampling, it is more convenient to + * assume a full writemask and then let LLVM optimization passes eliminate + * redundant code. + */ + + (*pc)++; + + assert(info->num_dst <= 1); + if (info->num_dst) { + dst0 = bld->base.undef; + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_floor(&bld->base, src0); + break; + + case TGSI_OPCODE_MOV: + dst0 = emit_fetch(bld, inst, 0); + break; + + case TGSI_OPCODE_LIT: + return FALSE; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_rcp(&bld->base, src0); + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + src0 = emit_fetch(bld, inst, 0); + tmp0 = lp_build_abs(&bld->base, src0); + dst0 = lp_build_rsqrt(&bld->base, tmp0); + break; + + case TGSI_OPCODE_EXP: + return FALSE; + + case TGSI_OPCODE_LOG: + return FALSE; + + case TGSI_OPCODE_MUL: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_mul(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_ADD: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_add(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + return FALSE; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + return FALSE; + + case TGSI_OPCODE_DST: + return FALSE; + + case TGSI_OPCODE_MIN: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_max(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_MAX: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_max(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp0 = lp_build_mul(&bld->base, src0, src1); + dst0 = lp_build_add(&bld->base, tmp0, src2); + break; + + case TGSI_OPCODE_SUB: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + dst0 = lp_build_sub(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_LRP: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp0 = lp_build_sub(&bld->base, src1, src2); + tmp0 = lp_build_mul(&bld->base, src0, tmp0); + dst0 = lp_build_add(&bld->base, tmp0, src2); + break; + + case TGSI_OPCODE_CND: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp1 = lp_build_const_vec(bld->base.type, 0.5); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1); + dst0 = lp_build_select(&bld->base, tmp0, src0, src1); + break; + + case TGSI_OPCODE_DP2A: + return FALSE; + + case TGSI_OPCODE_FRC: + src0 = emit_fetch(bld, inst, 0); + tmp0 = lp_build_floor(&bld->base, src0); + dst0 = lp_build_sub(&bld->base, src0, tmp0); + break; + + case TGSI_OPCODE_CLAMP: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp0 = lp_build_max(&bld->base, src0, src1); + dst0 = lp_build_min(&bld->base, tmp0, src2); + break; + + case TGSI_OPCODE_FLR: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_floor(&bld->base, src0); + break; + + case TGSI_OPCODE_ROUND: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_round(&bld->base, src0); + break; + + case TGSI_OPCODE_EX2: + src0 = emit_fetch(bld, inst, 0); + tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X); + dst0 = lp_build_exp2(&bld->base, tmp0); + break; + + case TGSI_OPCODE_LG2: + src0 = emit_fetch(bld, inst, 0); + tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X); + dst0 = lp_build_log2(&bld->base, tmp0); + break; + + case TGSI_OPCODE_POW: + src0 = emit_fetch(bld, inst, 0); + src0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X); + src1 = emit_fetch(bld, inst, 1); + src1 = lp_build_swizzle_scalar_aos(&bld->base, src1, TGSI_SWIZZLE_X); + dst0 = lp_build_pow(&bld->base, src0, src1); + break; + + case TGSI_OPCODE_XPD: + return FALSE; + + case TGSI_OPCODE_ABS: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_abs(&bld->base, src0); + break; + + case TGSI_OPCODE_RCC: + /* deprecated? */ + assert(0); + return FALSE; + + case TGSI_OPCODE_DPH: + return FALSE; + + case TGSI_OPCODE_COS: + src0 = emit_fetch(bld, inst, 0); + tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X); + dst0 = lp_build_cos(&bld->base, tmp0); + break; + + case TGSI_OPCODE_DDX: + return FALSE; + + case TGSI_OPCODE_DDY: + return FALSE; + + case TGSI_OPCODE_KILP: + /* predicated kill */ + return FALSE; + + case TGSI_OPCODE_KIL: + /* conditional kill */ + return FALSE; + + case TGSI_OPCODE_PK2H: + return FALSE; + break; + + case TGSI_OPCODE_PK2US: + return FALSE; + break; + + case TGSI_OPCODE_PK4B: + return FALSE; + break; + + case TGSI_OPCODE_PK4UB: + return FALSE; + + case TGSI_OPCODE_RFL: + return FALSE; + + case TGSI_OPCODE_SEQ: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_SFL: + dst0 = bld->base.zero; + break; + + case TGSI_OPCODE_SGT: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_SIN: + src0 = emit_fetch(bld, inst, 0); + tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X); + dst0 = lp_build_sin(&bld->base, tmp0); + break; + + case TGSI_OPCODE_SLE: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_SNE: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1); + dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + break; + + case TGSI_OPCODE_STR: + dst0 = bld->base.one; + break; + + case TGSI_OPCODE_TEX: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); + break; + + case TGSI_OPCODE_TXD: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); + break; + + case TGSI_OPCODE_UP2H: + /* deprecated */ + assert (0); + return FALSE; + break; + + case TGSI_OPCODE_UP2US: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_UP4B: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_UP4UB: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_X2D: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_ARA: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_ARR: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_round(&bld->base, src0); + break; + + case TGSI_OPCODE_BRA: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_CAL: + return FALSE; + + case TGSI_OPCODE_RET: + return FALSE; + + case TGSI_OPCODE_END: + *pc = -1; + break; + + case TGSI_OPCODE_SSG: + /* TGSI_OPCODE_SGN */ + tmp0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_sgn(&bld->base, tmp0); + break; + + case TGSI_OPCODE_CMP: + src0 = emit_fetch(bld, inst, 0); + src1 = emit_fetch(bld, inst, 1); + src2 = emit_fetch(bld, inst, 2); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero); + dst0 = lp_build_select(&bld->base, tmp0, src1, src2); + break; + + case TGSI_OPCODE_SCS: + return FALSE; + + case TGSI_OPCODE_TXB: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); + break; + + case TGSI_OPCODE_NRM: + /* fall-through */ + case TGSI_OPCODE_NRM4: + return FALSE; + + case TGSI_OPCODE_DIV: + /* deprecated */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_DP2: + return FALSE; + + case TGSI_OPCODE_TXL: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); + break; + + case TGSI_OPCODE_TXP: + dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); + break; + + case TGSI_OPCODE_BRK: + return FALSE; + + case TGSI_OPCODE_IF: + return FALSE; + + case TGSI_OPCODE_BGNLOOP: + return FALSE; + + case TGSI_OPCODE_BGNSUB: + return FALSE; + + case TGSI_OPCODE_ELSE: + return FALSE; + + case TGSI_OPCODE_ENDIF: + return FALSE; + + case TGSI_OPCODE_ENDLOOP: + return FALSE; + + case TGSI_OPCODE_ENDSUB: + return FALSE; + + case TGSI_OPCODE_PUSHA: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_POPA: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_CEIL: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_ceil(&bld->base, src0); + break; + + case TGSI_OPCODE_I2F: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_NOT: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_TRUNC: + src0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_trunc(&bld->base, src0); + break; + + case TGSI_OPCODE_SHL: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_ISHR: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_AND: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_OR: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_MOD: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_XOR: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_SAD: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_TXF: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_TXQ: + /* deprecated? */ + assert(0); + return FALSE; + break; + + case TGSI_OPCODE_CONT: + return FALSE; + + case TGSI_OPCODE_EMIT: + return FALSE; + break; + + case TGSI_OPCODE_ENDPRIM: + return FALSE; + break; + + case TGSI_OPCODE_NOP: + break; + + default: + return FALSE; + } + + if (info->num_dst) { + emit_store(bld, inst, 0, dst0); + } + + return TRUE; +} + + +void +lp_build_tgsi_aos(LLVMBuilderRef builder, + const struct tgsi_token *tokens, + struct lp_type type, + LLVMValueRef consts_ptr, + const LLVMValueRef *inputs, + LLVMValueRef *outputs, + struct lp_build_sampler_aos *sampler, + const struct tgsi_shader_info *info) +{ + struct lp_build_tgsi_aos_context bld; + struct tgsi_parse_context parse; + uint num_immediates = 0; + uint num_instructions = 0; + unsigned chan; + int pc = 0; + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + lp_build_context_init(&bld.int_bld, builder, lp_int_type(type)); + bld.inputs = inputs; + bld.outputs = outputs; + bld.consts_ptr = consts_ptr; + bld.sampler = sampler; + bld.indirect_files = info->indirect_files; + bld.instructions = (struct tgsi_full_instruction *) + MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction)); + bld.max_instructions = LP_MAX_INSTRUCTIONS; + + if (!bld.instructions) { + return; + } + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch(parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* Inputs already interpolated */ + emit_declaration(&bld, &parse.FullToken.FullDeclaration); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + /* save expanded instruction */ + if (num_instructions == bld.max_instructions) { + struct tgsi_full_instruction *instructions; + instructions = REALLOC(bld.instructions, + bld.max_instructions + * sizeof(struct tgsi_full_instruction), + (bld.max_instructions + LP_MAX_INSTRUCTIONS) + * sizeof(struct tgsi_full_instruction)); + if (!instructions) { + break; + } + bld.instructions = instructions; + bld.max_instructions += LP_MAX_INSTRUCTIONS; + } + + memcpy(bld.instructions + num_instructions, + &parse.FullToken.FullInstruction, + sizeof(bld.instructions[0])); + + num_instructions++; + } + + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + float rgba[4]; + assert(size <= 4); + assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); + for (chan = 0; chan < size; ++chan) { + rgba[chan] = parse.FullToken.FullImmediate.u[chan].Float; + } + for (chan = size; chan < 4; ++chan) { + rgba[chan] = 0.0f; + } + bld.immediates[num_immediates] = + lp_build_const_aos(type, + rgba[0], rgba[1], rgba[2], rgba[3], + NULL); + num_immediates++; + } + break; + + case TGSI_TOKEN_TYPE_PROPERTY: + break; + + default: + assert(0); + } + } + + while (pc != -1) { + struct tgsi_full_instruction *instr = bld.instructions + pc; + const struct tgsi_opcode_info *opcode_info = + tgsi_get_opcode_info(instr->Instruction.Opcode); + if (!emit_instruction(&bld, instr, opcode_info, &pc)) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + opcode_info->mnemonic); + } + + if (0) { + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + debug_printf("11111111111111111111111111111 \n"); + tgsi_dump(tokens, 0); + lp_debug_dump_value(function); + debug_printf("2222222222222222222222222222 \n"); + } + tgsi_parse_free(&parse); + + if (0) { + LLVMModuleRef module = LLVMGetGlobalParent( + LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); + LLVMDumpModule(module); + } + + FREE(bld.instructions); +} + |