diff options
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 131 |
1 files changed, 71 insertions, 60 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 0d8d49be2c7..441aebae298 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -49,6 +49,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" #include "lp_bld_gather.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" @@ -119,7 +120,7 @@ struct lp_build_tgsi_soa_context struct lp_build_context base; /* Builder for integer masks and indices */ - struct lp_build_context int_bld; + struct lp_build_context uint_bld; LLVMValueRef consts_ptr; const LLVMValueRef *pos; @@ -139,6 +140,7 @@ struct lp_build_tgsi_soa_context */ LLVMValueRef temps_array; + const struct tgsi_shader_info *info; /** bitmask indicating which register files are accessed indirectly */ unsigned indirect_files; @@ -472,27 +474,41 @@ build_gather(struct lp_build_tgsi_soa_context *bld, * temporary register file. */ static LLVMValueRef -get_indirect_offsets(struct lp_build_tgsi_soa_context *bld, - const struct tgsi_src_register *indirect_reg) +get_indirect_index(struct lp_build_tgsi_soa_context *bld, + unsigned reg_file, unsigned reg_index, + const struct tgsi_src_register *indirect_reg) { + struct lp_build_context *uint_bld = &bld->uint_bld; /* always use X component of address register */ unsigned swizzle = indirect_reg->SwizzleX; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); - LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4); - LLVMValueRef addr_vec; + LLVMValueRef base; + LLVMValueRef rel; + LLVMValueRef max_index; + LLVMValueRef index; - addr_vec = LLVMBuildLoad(bld->base.builder, - bld->addr[indirect_reg->Index][swizzle], - "load addr reg"); + assert(bld->indirect_files & (1 << reg_file)); + + base = lp_build_const_int_vec(uint_bld->type, reg_index); + + assert(swizzle < 4); + rel = LLVMBuildLoad(bld->base.builder, + bld->addr[indirect_reg->Index][swizzle], + "load addr reg"); /* for indexing we want integers */ - addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec, - int_vec_type, ""); + rel = LLVMBuildFPToSI(bld->base.builder, + rel, + uint_bld->vec_type, ""); + + index = lp_build_add(uint_bld, base, rel); - /* addr_vec = addr_vec * 4 */ - addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4); + max_index = lp_build_const_int_vec(uint_bld->type, + bld->info->file_max[reg_file]); - return addr_vec; + assert(!uint_bld->type.sign); + index = lp_build_min(uint_bld, index, max_index); + + return index; } @@ -506,11 +522,12 @@ emit_fetch( unsigned src_op, const unsigned chan_index ) { + struct lp_build_context *uint_bld = &bld->uint_bld; const struct tgsi_full_src_register *reg = &inst->Src[src_op]; const unsigned swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); LLVMValueRef res; - LLVMValueRef addr_vec = NULL; + LLVMValueRef indirect_index = NULL; if (swizzle > 3) { assert(0 && "invalid swizzle in emit_fetch()"); @@ -518,23 +535,24 @@ emit_fetch( } if (reg->Register.Indirect) { - assert(bld->indirect_files); - addr_vec = get_indirect_offsets(bld, ®->Indirect); + indirect_index = get_indirect_index(bld, + reg->Register.File, + reg->Register.Index, + ®->Indirect); + } else { + assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); } switch (reg->Register.File) { case TGSI_FILE_CONSTANT: if (reg->Register.Indirect) { + LLVMValueRef swizzle_vec = + lp_build_const_int_vec(uint_bld->type, swizzle); LLVMValueRef index_vec; /* index into the const buffer */ - assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT)); - - /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ - index_vec = lp_build_const_int_vec(bld->int_bld.type, - reg->Register.Index * 4 + swizzle); - - /* index_vec = index_vec + addr_vec */ - index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); + /* index_vec = indirect_index * 4 + swizzle */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); /* Gather values from the constant buffer */ res = build_gather(bld, bld->consts_ptr, index_vec); @@ -565,27 +583,22 @@ emit_fetch( case TGSI_FILE_TEMPORARY: if (reg->Register.Indirect) { - LLVMValueRef vec_len = - lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length); + LLVMValueRef swizzle_vec = + lp_build_const_int_vec(uint_bld->type, swizzle); + LLVMValueRef length_vec = + lp_build_const_int_vec(uint_bld->type, bld->base.type.length); LLVMValueRef index_vec; /* index into the const buffer */ LLVMValueRef temps_array; LLVMTypeRef float4_ptr_type; - assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)); - - /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ - index_vec = lp_build_const_int_vec(bld->int_bld.type, - reg->Register.Index * 4 + swizzle); - - /* index_vec += addr_vec */ - index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); - - /* index_vec *= vector_length */ - index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len); + /* index_vec = (indirect_index * 4 + swizzle) * length */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); /* cast temps_array pointer to float* */ float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); - temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array, + temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array, float4_ptr_type, ""); /* Gather values from the temporary register array */ @@ -735,7 +748,7 @@ emit_store( LLVMValueRef value) { const struct tgsi_full_dst_register *reg = &inst->Dst[index]; - LLVMValueRef addr = NULL; + LLVMValueRef indirect_index = NULL; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -756,24 +769,12 @@ emit_store( } if (reg->Register.Indirect) { - /* XXX use get_indirect_offsets() here eventually */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); - unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); - - assert(bld->indirect_files); - - addr = LLVMBuildLoad(bld->base.builder, - bld->addr[reg->Indirect.Index][swizzle], - ""); - /* for indexing we want integers */ - addr = LLVMBuildFPToSI(bld->base.builder, addr, - int_vec_type, ""); - addr = LLVMBuildExtractElement(bld->base.builder, - addr, LLVMConstInt(LLVMInt32Type(), 0, 0), - ""); - addr = LLVMBuildMul(bld->base.builder, - addr, LLVMConstInt(LLVMInt32Type(), 4, 0), - ""); + indirect_index = get_indirect_index(bld, + reg->Register.File, + reg->Register.Index, + ®->Indirect); + } else { + assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); } switch( reg->Register.File ) { @@ -993,13 +994,14 @@ emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) { - LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); + LLVMTypeRef vec_type = bld->base.vec_type; unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx, i; for (idx = first; idx <= last; ++idx) { + assert(last <= bld->info->file_max[decl->Declaration.File]); switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_TGSI_TEMPS); @@ -2017,16 +2019,25 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, unsigned i; int pc = 0; + struct lp_type res_type; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + memset(&res_type, 0, sizeof res_type); + res_type.width = type.width; + res_type.length = type.length; + res_type.sign = 1; + /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); - lp_build_context_init(&bld.int_bld, builder, lp_int_type(type)); + lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type)); bld.mask = mask; bld.pos = pos; bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; + bld.info = info; bld.indirect_files = info->indirect_files; bld.instructions = (struct tgsi_full_instruction *) MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); |