diff options
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/SConscript | 71 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.c | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_init.c | 8 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 335 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_type.c | 16 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_type.h | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/os/os_stream_stdc.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/pipebuffer/SConscript | 18 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_build.c | 736 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_build.h | 161 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 801 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 27 |
12 files changed, 1143 insertions, 1046 deletions
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index f22c8b96123..4b9059d9aa3 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -7,8 +7,6 @@ env.Append(CPPPATH = [ 'util', ]) -env.Tool('udis86') - env.CodeGenerate( target = 'indices/u_indices_gen.c', script = 'indices/u_indices_gen.py', @@ -200,39 +198,42 @@ source = [ ] if env['llvm']: + if env['UDIS86']: + env.Append(CPPDEFINES = [('HAVE_UDIS86', '1')]) + source += [ - 'gallivm/lp_bld_arit.c', - 'gallivm/lp_bld_assert.c', - 'gallivm/lp_bld_bitarit.c', - 'gallivm/lp_bld_const.c', - 'gallivm/lp_bld_conv.c', - 'gallivm/lp_bld_debug.c', - 'gallivm/lp_bld_flow.c', - 'gallivm/lp_bld_format_aos.c', - 'gallivm/lp_bld_format_soa.c', - 'gallivm/lp_bld_format_yuv.c', - 'gallivm/lp_bld_gather.c', - 'gallivm/lp_bld_init.c', - 'gallivm/lp_bld_intr.c', - 'gallivm/lp_bld_logic.c', - 'gallivm/lp_bld_misc.cpp', - 'gallivm/lp_bld_pack.c', - 'gallivm/lp_bld_printf.c', - 'gallivm/lp_bld_quad.c', - 'gallivm/lp_bld_sample.c', - 'gallivm/lp_bld_sample_aos.c', - 'gallivm/lp_bld_sample_soa.c', - 'gallivm/lp_bld_struct.c', - 'gallivm/lp_bld_swizzle.c', - 'gallivm/lp_bld_tgsi_aos.c', - 'gallivm/lp_bld_tgsi_info.c', - 'gallivm/lp_bld_tgsi_soa.c', - 'gallivm/lp_bld_type.c', - 'draw/draw_llvm.c', - 'draw/draw_llvm_sample.c', - 'draw/draw_llvm_translate.c', - 'draw/draw_pt_fetch_shade_pipeline_llvm.c', - 'draw/draw_vs_llvm.c' + 'gallivm/lp_bld_arit.c', + 'gallivm/lp_bld_assert.c', + 'gallivm/lp_bld_bitarit.c', + 'gallivm/lp_bld_const.c', + 'gallivm/lp_bld_conv.c', + 'gallivm/lp_bld_debug.c', + 'gallivm/lp_bld_flow.c', + 'gallivm/lp_bld_format_aos.c', + 'gallivm/lp_bld_format_soa.c', + 'gallivm/lp_bld_format_yuv.c', + 'gallivm/lp_bld_gather.c', + 'gallivm/lp_bld_init.c', + 'gallivm/lp_bld_intr.c', + 'gallivm/lp_bld_logic.c', + 'gallivm/lp_bld_misc.cpp', + 'gallivm/lp_bld_pack.c', + 'gallivm/lp_bld_printf.c', + 'gallivm/lp_bld_quad.c', + 'gallivm/lp_bld_sample.c', + 'gallivm/lp_bld_sample_aos.c', + 'gallivm/lp_bld_sample_soa.c', + 'gallivm/lp_bld_struct.c', + 'gallivm/lp_bld_swizzle.c', + 'gallivm/lp_bld_tgsi_aos.c', + 'gallivm/lp_bld_tgsi_info.c', + 'gallivm/lp_bld_tgsi_soa.c', + 'gallivm/lp_bld_type.c', + 'draw/draw_llvm.c', + 'draw/draw_llvm_sample.c', + 'draw/draw_llvm_translate.c', + 'draw/draw_pt_fetch_shade_pipeline_llvm.c', + 'draw/draw_vs_llvm.c' ] gallium = env.ConvenienceLibrary( @@ -240,4 +241,6 @@ gallium = env.ConvenienceLibrary( source = source, ) +env.Alias('gallium', gallium) + Export('gallium') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 140e596f994..2b5f01cda74 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1164,11 +1164,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) sampler->destroy(sampler); -#ifdef PIPE_ARCH_X86 - /* Avoid corrupting the FPU stack on 32bit OSes. */ - lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); -#endif - ret = LLVMBuildLoad(builder, ret_ptr,""); LLVMBuildRet(builder, ret); @@ -1378,11 +1373,6 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian sampler->destroy(sampler); -#ifdef PIPE_ARCH_X86 - /* Avoid corrupting the FPU stack on 32bit OSes. */ - lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); -#endif - ret = LLVMBuildLoad(builder, ret_ptr,""); LLVMBuildRet(builder, ret); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 5598ca5c489..0b9a6f745fb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -145,13 +145,7 @@ lp_build_init(void) LLVMAddCFGSimplificationPass(lp_build_pass); LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); LLVMAddConstantPropagationPass(lp_build_pass); - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(lp_build_pass); - } + LLVMAddInstructionCombiningPass(lp_build_pass); LLVMAddGVNPass(lp_build_pass); } else { /* We need at least this pass to prevent the backends to fail in diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 3c318cc8c80..7f0f058c222 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -58,6 +58,7 @@ #include "lp_bld_tgsi.h" #include "lp_bld_limits.h" #include "lp_bld_debug.h" +#include "lp_bld_printf.h" #define FOR_EACH_CHANNEL( CHAN )\ @@ -119,9 +120,12 @@ struct lp_build_tgsi_soa_context { struct lp_build_context base; - /* Builder for integer masks and indices */ + /* Builder for vector integer masks and indices */ struct lp_build_context uint_bld; + /* Builder for scalar elements of shader's data type (float) */ + struct lp_build_context elem_bld; + LLVMValueRef consts_ptr; const LLVMValueRef *pos; const LLVMValueRef (*inputs)[NUM_CHANNELS]; @@ -140,6 +144,18 @@ struct lp_build_tgsi_soa_context */ LLVMValueRef temps_array; + /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is + * set in the indirect_files field. + * The outputs[] array above is unused then. + */ + LLVMValueRef outputs_array; + + /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is + * set in the indirect_files field. + * The inputs[] array above is unused then. + */ + LLVMValueRef inputs_array; + const struct tgsi_shader_info *info; /** bitmask indicating which register files are accessed indirectly */ unsigned indirect_files; @@ -435,6 +451,26 @@ get_temp_ptr(struct lp_build_tgsi_soa_context *bld, } } +/** + * Return pointer to a output register channel (src or dest). + * Note that indirect addressing cannot be handled here. + * \param index which output register + * \param chan which channel of the output register. + */ +static LLVMValueRef +get_output_ptr(struct lp_build_tgsi_soa_context *bld, + unsigned index, + unsigned chan) +{ + assert(chan < 4); + if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { + LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); + return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, ""); + } + else { + return bld->outputs[index][chan]; + } +} /** * Gather vector. @@ -457,7 +493,7 @@ build_gather(struct lp_build_tgsi_soa_context *bld, LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, indexes, ii, ""); LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, - &index, 1, ""); + &index, 1, "gather_ptr"); LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); @@ -468,8 +504,60 @@ build_gather(struct lp_build_tgsi_soa_context *bld, /** + * Scatter/store vector. + */ +static void +emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef base_ptr, + LLVMValueRef indexes, + LLVMValueRef values, + struct lp_exec_mask *mask, + LLVMValueRef pred) +{ + LLVMBuilderRef builder = bld->base.builder; + unsigned i; + + /* Mix the predicate and execution mask */ + if (mask->has_mask) { + if (pred) { + pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); + } + else { + pred = mask->exec_mask; + } + } + + /* + * Loop over elements of index_vec, store scalar value. + */ + for (i = 0; i < bld->base.type.length; i++) { + LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); + LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); + LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); + LLVMValueRef scalar_pred = pred ? + LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; + + if (0) + lp_build_printf(builder, "scatter %d: val %f at %d %p\n", + ii, val, index, scalar_ptr); + + if (scalar_pred) { + LLVMValueRef real_val, dst_val; + dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); + real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); + LLVMBuildStore(builder, real_val, scalar_ptr); + } + else { + LLVMBuildStore(builder, val, scalar_ptr); + } + } +} + + +/** * Read the current value of the ADDR register, convert the floats to - * ints, multiply by four and return the vector of offsets. + * ints, add the base index and return the vector of offsets. * The offsets will be used to index into the constant buffer or * temporary register file. */ @@ -577,7 +665,38 @@ emit_fetch( break; case TGSI_FILE_INPUT: - res = bld->inputs[reg->Register.Index][swizzle]; + if (reg->Register.Indirect) { + LLVMValueRef swizzle_vec = + lp_build_const_int_vec(uint_bld->type, swizzle); + LLVMValueRef length_vec = + lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + LLVMValueRef index_vec; /* index into the const buffer */ + LLVMValueRef inputs_array; + LLVMTypeRef float4_ptr_type; + + /* index_vec = (indirect_index * 4 + swizzle) * length */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + + /* cast inputs_array pointer to float* */ + float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array, + float4_ptr_type, ""); + + /* Gather values from the temporary register array */ + res = build_gather(bld, inputs_array, index_vec); + } else { + if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { + LLVMValueRef lindex = lp_build_const_int32(reg->Register.Index * 4 + swizzle); + LLVMValueRef input_ptr = LLVMBuildGEP(bld->base.builder, + bld->inputs_array, &lindex, 1, ""); + res = LLVMBuildLoad(bld->base.builder, input_ptr, ""); + } + else { + res = bld->inputs[reg->Register.Index][swizzle]; + } + } assert(res); break; @@ -748,6 +867,7 @@ emit_store( LLVMValueRef value) { const struct tgsi_full_dst_register *reg = &inst->Dst[index]; + struct lp_build_context *uint_bld = &bld->uint_bld; LLVMValueRef indirect_index = NULL; switch( inst->Instruction.Saturate ) { @@ -779,15 +899,81 @@ emit_store( switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - lp_exec_mask_store(&bld->exec_mask, pred, value, - bld->outputs[reg->Register.Index][chan_index]); + if (reg->Register.Indirect) { + LLVMBuilderRef builder = bld->base.builder; + LLVMValueRef chan_vec = + lp_build_const_int_vec(uint_bld->type, chan_index); + LLVMValueRef length_vec = + lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + LLVMValueRef index_vec; /* indexes into the temp registers */ + LLVMValueRef outputs_array; + LLVMValueRef pixel_offsets; + LLVMTypeRef float_ptr_type; + int i; + + /* build pixel offset vector: {0, 1, 2, 3, ...} */ + pixel_offsets = uint_bld->undef; + for (i = 0; i < bld->base.type.length; i++) { + LLVMValueRef ii = lp_build_const_int32(i); + pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, + ii, ii, ""); + } + + /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, chan_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); + + float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, + float_ptr_type, ""); + + /* Scatter store values into temp registers */ + emit_mask_scatter(bld, outputs_array, index_vec, value, + &bld->exec_mask, pred); + } + else { + LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, + chan_index); + lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); + } break; case TGSI_FILE_TEMPORARY: if (reg->Register.Indirect) { - /* XXX not done yet */ - debug_printf("WARNING: LLVM scatter store of temp regs" - " not implemented\n"); + LLVMBuilderRef builder = bld->base.builder; + LLVMValueRef chan_vec = + lp_build_const_int_vec(uint_bld->type, chan_index); + LLVMValueRef length_vec = + lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + LLVMValueRef index_vec; /* indexes into the temp registers */ + LLVMValueRef temps_array; + LLVMValueRef pixel_offsets; + LLVMTypeRef float_ptr_type; + int i; + + /* build pixel offset vector: {0, 1, 2, 3, ...} */ + pixel_offsets = uint_bld->undef; + for (i = 0; i < bld->base.type.length; i++) { + LLVMValueRef ii = lp_build_const_int32(i); + pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, + ii, ii, ""); + } + + /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, chan_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); + + float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + temps_array = LLVMBuildBitCast(builder, bld->temps_array, + float_ptr_type, ""); + + /* Scatter store values into temp registers */ + emit_mask_scatter(bld, temps_array, index_vec, value, + &bld->exec_mask, pred); } else { LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, @@ -1040,15 +1226,60 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, lp_build_mask_check(bld->mask); } + +/** + * Emit code which will dump the value of all the temporary registers + * to stdout. + */ +static void +emit_dump_temps(struct lp_build_tgsi_soa_context *bld) +{ + LLVMBuilderRef builder = bld->base.builder; + LLVMValueRef temp_ptr; + LLVMValueRef i0 = lp_build_const_int32(0); + LLVMValueRef i1 = lp_build_const_int32(1); + LLVMValueRef i2 = lp_build_const_int32(2); + LLVMValueRef i3 = lp_build_const_int32(3); + int index; + int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; + + for (index = 0; index < n; index++) { + LLVMValueRef idx = lp_build_const_int32(index); + LLVMValueRef v[4][4], res; + int chan; + + lp_build_printf(builder, "TEMP[%d]:\n", idx); + + for (chan = 0; chan < 4; chan++) { + temp_ptr = get_temp_ptr(bld, index, chan); + res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); + v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); + v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); + v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); + v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); + } + + lp_build_printf(builder, " X: %f %f %f %f\n", + v[0][0], v[0][1], v[0][2], v[0][3]); + lp_build_printf(builder, " Y: %f %f %f %f\n", + v[1][0], v[1][1], v[1][2], v[1][3]); + lp_build_printf(builder, " Z: %f %f %f %f\n", + v[2][0], v[2][1], v[2][2], v[2][3]); + lp_build_printf(builder, " W: %f %f %f %f\n", + v[3][0], v[3][1], v[3][2], v[3][3]); + } +} + + + static void emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) { LLVMTypeRef vec_type = bld->base.vec_type; - - unsigned first = decl->Range.First; - unsigned last = decl->Range.Last; + const unsigned first = decl->Range.First; + const unsigned last = decl->Range.Last; unsigned idx, i; for (idx = first; idx <= last; ++idx) { @@ -1056,36 +1287,33 @@ emit_declaration( switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_TGSI_TEMPS); - if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { - LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), - last*4 + 4, 0); - bld->temps_array = lp_build_array_alloca(bld->base.builder, - vec_type, array_size, ""); - } else { + if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { for (i = 0; i < NUM_CHANNELS; i++) bld->temps[idx][i] = lp_build_alloca(bld->base.builder, - vec_type, ""); + vec_type, "temp"); } break; case TGSI_FILE_OUTPUT: - for (i = 0; i < NUM_CHANNELS; i++) - bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, - vec_type, ""); + if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { + for (i = 0; i < NUM_CHANNELS; i++) + bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, "output"); + } break; case TGSI_FILE_ADDRESS: assert(idx < LP_MAX_TGSI_ADDRS); for (i = 0; i < NUM_CHANNELS; i++) bld->addr[idx][i] = lp_build_alloca(bld->base.builder, - vec_type, ""); + vec_type, "addr"); break; case TGSI_FILE_PREDICATE: assert(idx < LP_MAX_TGSI_PREDS); for (i = 0; i < NUM_CHANNELS; i++) bld->preds[idx][i] = lp_build_alloca(bld->base.builder, - vec_type, ""); + vec_type, "predicate"); break; default: @@ -1740,6 +1968,10 @@ emit_instruction( break; case TGSI_OPCODE_END: + if (0) { + /* for debugging */ + emit_dump_temps(bld); + } *pc = -1; break; @@ -2082,6 +2314,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type)); + lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type)); bld.mask = mask; bld.pos = pos; bld.inputs = inputs; @@ -2100,6 +2333,48 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, lp_exec_mask_init(&bld.exec_mask, &bld.base); + if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), + info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0); + bld.temps_array = lp_build_array_alloca(bld.base.builder, + bld.base.vec_type, array_size, + "temp_array"); + } + + if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { + LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), + info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0); + bld.outputs_array = lp_build_array_alloca(bld.base.builder, + bld.base.vec_type, array_size, + "output_array"); + } + + /* If we have indirect addressing in inputs we need to copy them into + * our alloca array to be able to iterate over them */ + if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { + unsigned index, chan; + LLVMTypeRef vec_type = bld.base.vec_type; + LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), + info->file_max[TGSI_FILE_INPUT]*4 + 4, 0); + bld.inputs_array = lp_build_array_alloca(bld.base.builder, + vec_type, array_size, + "input_array"); + + assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1); + + for (index = 0; index < info->num_inputs; ++index) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); + LLVMValueRef input_ptr = + LLVMBuildGEP(bld.base.builder, bld.inputs_array, + &lindex, 1, ""); + LLVMValueRef value = bld.inputs[index][chan]; + if (value) + LLVMBuildStore(bld.base.builder, value, input_ptr); + } + } + } + tgsi_parse_init( &parse, tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { @@ -2169,6 +2444,18 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, opcode_info->mnemonic); } + /* If we have indirect addressing in outputs we need to copy our alloca array + * to the outputs slots specified by the called */ + if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { + unsigned index, chan; + assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); + for (index = 0; index < info->num_outputs; ++index) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); + } + } + } + if (0) { LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); LLVMValueRef function = LLVMGetBasicBlockParent(block); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index 06f1aae6dcc..5205c7ada91 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -188,6 +188,22 @@ lp_build_int32_vec4_type(void) /** + * Create element of vector type + */ +struct lp_type +lp_elem_type(struct lp_type type) +{ + struct lp_type res_type; + + assert(type.length > 1); + res_type = type; + res_type.length = 1; + + return res_type; +} + + +/** * Create unsigned integer type variation of given type. */ struct lp_type diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index fec1d3dfbc6..a135d0df847 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -365,6 +365,10 @@ lp_unorm8_vec4_type(void) struct lp_type +lp_elem_type(struct lp_type type); + + +struct lp_type lp_uint_type(struct lp_type type); diff --git a/src/gallium/auxiliary/os/os_stream_stdc.c b/src/gallium/auxiliary/os/os_stream_stdc.c index 37e7d063e2b..afd3ff6dcea 100644 --- a/src/gallium/auxiliary/os/os_stream_stdc.c +++ b/src/gallium/auxiliary/os/os_stream_stdc.c @@ -106,7 +106,7 @@ os_file_stream_create(const char *filename) stream->base.flush = &os_stdc_stream_flush; stream->base.vprintf = &os_stdc_stream_vprintf; - stream->file = fopen(filename, "w"); + stream->file = fopen(filename, "wb"); if(!stream->file) goto no_file; diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript deleted file mode 100644 index a074a554717..00000000000 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ /dev/null @@ -1,18 +0,0 @@ -Import('*') - -pipebuffer = env.ConvenienceLibrary( - target = 'pipebuffer', - source = [ - 'pb_buffer_fenced.c', - 'pb_buffer_malloc.c', - 'pb_bufmgr_alt.c', - 'pb_bufmgr_cache.c', - 'pb_bufmgr_debug.c', - 'pb_bufmgr_mm.c', - 'pb_bufmgr_ondemand.c', - 'pb_bufmgr_pool.c', - 'pb_bufmgr_slab.c', - 'pb_validate.c', - ]) - -auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 6dbedf15ca8..16a205f2068 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -64,25 +64,14 @@ header_bodysize_grow( struct tgsi_header *header ) } struct tgsi_processor -tgsi_default_processor( void ) -{ - struct tgsi_processor processor; - - processor.Processor = TGSI_PROCESSOR_FRAGMENT; - processor.Padding = 0; - - return processor; -} - -struct tgsi_processor tgsi_build_processor( unsigned type, struct tgsi_header *header ) { struct tgsi_processor processor; - processor = tgsi_default_processor(); processor.Processor = type; + processor.Padding = 0; header_headersize_grow( header ); @@ -93,7 +82,19 @@ tgsi_build_processor( * declaration */ -struct tgsi_declaration +static void +declaration_grow( + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + assert( declaration->NrTokens < 0xFF ); + + declaration->NrTokens++; + + header_bodysize_grow( header ); +} + +static struct tgsi_declaration tgsi_default_declaration( void ) { struct tgsi_declaration declaration; @@ -112,7 +113,7 @@ tgsi_default_declaration( void ) return declaration; } -struct tgsi_declaration +static struct tgsi_declaration tgsi_build_declaration( unsigned file, unsigned usage_mask, @@ -144,16 +145,85 @@ tgsi_build_declaration( return declaration; } -static void -declaration_grow( +static struct tgsi_declaration_range +tgsi_default_declaration_range( void ) +{ + struct tgsi_declaration_range dr; + + dr.First = 0; + dr.Last = 0; + + return dr; +} + +static struct tgsi_declaration_range +tgsi_build_declaration_range( + unsigned first, + unsigned last, struct tgsi_declaration *declaration, struct tgsi_header *header ) { - assert( declaration->NrTokens < 0xFF ); + struct tgsi_declaration_range declaration_range; - declaration->NrTokens++; + assert( last >= first ); + assert( last <= 0xFFFF ); - header_bodysize_grow( header ); + declaration_range.First = first; + declaration_range.Last = last; + + declaration_grow( declaration, header ); + + return declaration_range; +} + +static struct tgsi_declaration_dimension +tgsi_build_declaration_dimension(unsigned index_2d, + struct tgsi_declaration *declaration, + struct tgsi_header *header) +{ + struct tgsi_declaration_dimension dd; + + assert(index_2d <= 0xFFFF); + + dd.Index2D = index_2d; + dd.Padding = 0; + + declaration_grow(declaration, header); + + return dd; +} + +static struct tgsi_declaration_semantic +tgsi_default_declaration_semantic( void ) +{ + struct tgsi_declaration_semantic ds; + + ds.Name = TGSI_SEMANTIC_POSITION; + ds.Index = 0; + ds.Padding = 0; + + return ds; +} + +static struct tgsi_declaration_semantic +tgsi_build_declaration_semantic( + unsigned semantic_name, + unsigned semantic_index, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_semantic ds; + + assert( semantic_name <= TGSI_SEMANTIC_COUNT ); + assert( semantic_index <= 0xFFFF ); + + ds.Name = semantic_name; + ds.Index = semantic_index; + ds.Padding = 0; + + declaration_grow( declaration, header ); + + return ds; } struct tgsi_full_declaration @@ -257,104 +327,11 @@ tgsi_build_full_declaration( return size; } -struct tgsi_declaration_range -tgsi_default_declaration_range( void ) -{ - struct tgsi_declaration_range dr; - - dr.First = 0; - dr.Last = 0; - - return dr; -} - -struct tgsi_declaration_range -tgsi_build_declaration_range( - unsigned first, - unsigned last, - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - struct tgsi_declaration_range declaration_range; - - assert( last >= first ); - assert( last <= 0xFFFF ); - - declaration_range = tgsi_default_declaration_range(); - declaration_range.First = first; - declaration_range.Last = last; - - declaration_grow( declaration, header ); - - return declaration_range; -} - -struct tgsi_declaration_dimension -tgsi_default_declaration_dimension(void) -{ - struct tgsi_declaration_dimension dd; - - dd.Index2D = 0; - dd.Padding = 0; - - return dd; -} - -struct tgsi_declaration_dimension -tgsi_build_declaration_dimension(unsigned index_2d, - struct tgsi_declaration *declaration, - struct tgsi_header *header) -{ - struct tgsi_declaration_dimension dd; - - assert(index_2d <= 0xFFFF); - - dd = tgsi_default_declaration_dimension(); - dd.Index2D = index_2d; - - declaration_grow(declaration, header); - - return dd; -} - -struct tgsi_declaration_semantic -tgsi_default_declaration_semantic( void ) -{ - struct tgsi_declaration_semantic ds; - - ds.Name = TGSI_SEMANTIC_POSITION; - ds.Index = 0; - ds.Padding = 0; - - return ds; -} - -struct tgsi_declaration_semantic -tgsi_build_declaration_semantic( - unsigned semantic_name, - unsigned semantic_index, - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - struct tgsi_declaration_semantic ds; - - assert( semantic_name <= TGSI_SEMANTIC_COUNT ); - assert( semantic_index <= 0xFFFF ); - - ds = tgsi_default_declaration_semantic(); - ds.Name = semantic_name; - ds.Index = semantic_index; - - declaration_grow( declaration, header ); - - return ds; -} - /* * immediate */ -struct tgsi_immediate +static struct tgsi_immediate tgsi_default_immediate( void ) { struct tgsi_immediate immediate; @@ -367,7 +344,7 @@ tgsi_default_immediate( void ) return immediate; } -struct tgsi_immediate +static struct tgsi_immediate tgsi_build_immediate( struct tgsi_header *header ) { @@ -406,7 +383,7 @@ immediate_grow( header_bodysize_grow( header ); } -union tgsi_immediate_data +static union tgsi_immediate_data tgsi_build_immediate_float32( float value, struct tgsi_immediate *immediate, @@ -480,7 +457,7 @@ tgsi_default_instruction( void ) return instruction; } -struct tgsi_instruction +static struct tgsi_instruction tgsi_build_instruction(unsigned opcode, unsigned saturate, unsigned predicate, @@ -519,6 +496,266 @@ instruction_grow( header_bodysize_grow( header ); } +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void) +{ + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate.SwizzleX = TGSI_SWIZZLE_X; + instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y; + instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z; + instruction_predicate.SwizzleW = TGSI_SWIZZLE_W; + instruction_predicate.Negate = 0; + instruction_predicate.Index = 0; + instruction_predicate.Padding = 0; + + return instruction_predicate; +} + +static struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header) +{ + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate = tgsi_default_instruction_predicate(); + instruction_predicate.SwizzleX = swizzleX; + instruction_predicate.SwizzleY = swizzleY; + instruction_predicate.SwizzleZ = swizzleZ; + instruction_predicate.SwizzleW = swizzleW; + instruction_predicate.Negate = negate; + instruction_predicate.Index = index; + + instruction_grow(instruction, header); + + return instruction_predicate; +} + +static struct tgsi_instruction_label +tgsi_default_instruction_label( void ) +{ + struct tgsi_instruction_label instruction_label; + + instruction_label.Label = 0; + instruction_label.Padding = 0; + + return instruction_label; +} + +static struct tgsi_instruction_label +tgsi_build_instruction_label( + unsigned label, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_label instruction_label; + + instruction_label.Label = label; + instruction_label.Padding = 0; + instruction->Label = 1; + + instruction_grow( instruction, header ); + + return instruction_label; +} + +static struct tgsi_instruction_texture +tgsi_default_instruction_texture( void ) +{ + struct tgsi_instruction_texture instruction_texture; + + instruction_texture.Texture = TGSI_TEXTURE_UNKNOWN; + instruction_texture.Padding = 0; + + return instruction_texture; +} + +static struct tgsi_instruction_texture +tgsi_build_instruction_texture( + unsigned texture, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_texture instruction_texture; + + instruction_texture.Texture = texture; + instruction_texture.Padding = 0; + instruction->Texture = 1; + + instruction_grow( instruction, header ); + + return instruction_texture; +} + +static struct tgsi_src_register +tgsi_default_src_register( void ) +{ + struct tgsi_src_register src_register; + + src_register.File = TGSI_FILE_NULL; + src_register.SwizzleX = TGSI_SWIZZLE_X; + src_register.SwizzleY = TGSI_SWIZZLE_Y; + src_register.SwizzleZ = TGSI_SWIZZLE_Z; + src_register.SwizzleW = TGSI_SWIZZLE_W; + src_register.Negate = 0; + src_register.Absolute = 0; + src_register.Indirect = 0; + src_register.Dimension = 0; + src_register.Index = 0; + + return src_register; +} + +static struct tgsi_src_register +tgsi_build_src_register( + unsigned file, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + unsigned negate, + unsigned absolute, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register src_register; + + assert( file < TGSI_FILE_COUNT ); + assert( swizzle_x <= TGSI_SWIZZLE_W ); + assert( swizzle_y <= TGSI_SWIZZLE_W ); + assert( swizzle_z <= TGSI_SWIZZLE_W ); + assert( swizzle_w <= TGSI_SWIZZLE_W ); + assert( negate <= 1 ); + assert( index >= -0x8000 && index <= 0x7FFF ); + + src_register.File = file; + src_register.SwizzleX = swizzle_x; + src_register.SwizzleY = swizzle_y; + src_register.SwizzleZ = swizzle_z; + src_register.SwizzleW = swizzle_w; + src_register.Negate = negate; + src_register.Absolute = absolute; + src_register.Indirect = indirect; + src_register.Dimension = dimension; + src_register.Index = index; + + instruction_grow( instruction, header ); + + return src_register; +} + +static struct tgsi_dimension +tgsi_default_dimension( void ) +{ + struct tgsi_dimension dimension; + + dimension.Indirect = 0; + dimension.Dimension = 0; + dimension.Padding = 0; + dimension.Index = 0; + + return dimension; +} + +static struct tgsi_full_src_register +tgsi_default_full_src_register( void ) +{ + struct tgsi_full_src_register full_src_register; + + full_src_register.Register = tgsi_default_src_register(); + full_src_register.Indirect = tgsi_default_src_register(); + full_src_register.Dimension = tgsi_default_dimension(); + full_src_register.DimIndirect = tgsi_default_src_register(); + + return full_src_register; +} + +static struct tgsi_dimension +tgsi_build_dimension( + unsigned indirect, + unsigned index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dimension dimension; + + dimension.Indirect = indirect; + dimension.Dimension = 0; + dimension.Padding = 0; + dimension.Index = index; + + instruction_grow( instruction, header ); + + return dimension; +} + +static struct tgsi_dst_register +tgsi_default_dst_register( void ) +{ + struct tgsi_dst_register dst_register; + + dst_register.File = TGSI_FILE_NULL; + dst_register.WriteMask = TGSI_WRITEMASK_XYZW; + dst_register.Indirect = 0; + dst_register.Dimension = 0; + dst_register.Index = 0; + dst_register.Padding = 0; + + return dst_register; +} + +static struct tgsi_dst_register +tgsi_build_dst_register( + unsigned file, + unsigned mask, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register dst_register; + + assert( file < TGSI_FILE_COUNT ); + assert( mask <= TGSI_WRITEMASK_XYZW ); + assert( index >= -32768 && index <= 32767 ); + + dst_register.File = file; + dst_register.WriteMask = mask; + dst_register.Indirect = indirect; + dst_register.Dimension = dimension; + dst_register.Index = index; + dst_register.Padding = 0; + + instruction_grow( instruction, header ); + + return dst_register; +} + +static struct tgsi_full_dst_register +tgsi_default_full_dst_register( void ) +{ + struct tgsi_full_dst_register full_dst_register; + + full_dst_register.Register = tgsi_default_dst_register(); + full_dst_register.Indirect = tgsi_default_src_register(); + full_dst_register.Dimension = tgsi_default_dimension(); + full_dst_register.DimIndirect = tgsi_default_src_register(); + + return full_dst_register; +} + struct tgsi_full_instruction tgsi_default_full_instruction( void ) { @@ -794,268 +1031,7 @@ tgsi_build_full_instruction( return size; } -struct tgsi_instruction_predicate -tgsi_default_instruction_predicate(void) -{ - struct tgsi_instruction_predicate instruction_predicate; - - instruction_predicate.SwizzleX = TGSI_SWIZZLE_X; - instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y; - instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z; - instruction_predicate.SwizzleW = TGSI_SWIZZLE_W; - instruction_predicate.Negate = 0; - instruction_predicate.Index = 0; - instruction_predicate.Padding = 0; - - return instruction_predicate; -} - -struct tgsi_instruction_predicate -tgsi_build_instruction_predicate(int index, - unsigned negate, - unsigned swizzleX, - unsigned swizzleY, - unsigned swizzleZ, - unsigned swizzleW, - struct tgsi_instruction *instruction, - struct tgsi_header *header) -{ - struct tgsi_instruction_predicate instruction_predicate; - - instruction_predicate = tgsi_default_instruction_predicate(); - instruction_predicate.SwizzleX = swizzleX; - instruction_predicate.SwizzleY = swizzleY; - instruction_predicate.SwizzleZ = swizzleZ; - instruction_predicate.SwizzleW = swizzleW; - instruction_predicate.Negate = negate; - instruction_predicate.Index = index; - - instruction_grow(instruction, header); - - return instruction_predicate; -} - -struct tgsi_instruction_label -tgsi_default_instruction_label( void ) -{ - struct tgsi_instruction_label instruction_label; - - instruction_label.Label = 0; - instruction_label.Padding = 0; - - return instruction_label; -} - -struct tgsi_instruction_label -tgsi_build_instruction_label( - unsigned label, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_label instruction_label; - - instruction_label = tgsi_default_instruction_label(); - instruction_label.Label = label; - instruction->Label = 1; - - instruction_grow( instruction, header ); - - return instruction_label; -} - -struct tgsi_instruction_texture -tgsi_default_instruction_texture( void ) -{ - struct tgsi_instruction_texture instruction_texture; - - instruction_texture.Texture = TGSI_TEXTURE_UNKNOWN; - instruction_texture.Padding = 0; - - return instruction_texture; -} - -struct tgsi_instruction_texture -tgsi_build_instruction_texture( - unsigned texture, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_texture instruction_texture; - - instruction_texture = tgsi_default_instruction_texture(); - instruction_texture.Texture = texture; - instruction->Texture = 1; - - instruction_grow( instruction, header ); - - return instruction_texture; -} - -struct tgsi_src_register -tgsi_default_src_register( void ) -{ - struct tgsi_src_register src_register; - - src_register.File = TGSI_FILE_NULL; - src_register.SwizzleX = TGSI_SWIZZLE_X; - src_register.SwizzleY = TGSI_SWIZZLE_Y; - src_register.SwizzleZ = TGSI_SWIZZLE_Z; - src_register.SwizzleW = TGSI_SWIZZLE_W; - src_register.Negate = 0; - src_register.Absolute = 0; - src_register.Indirect = 0; - src_register.Dimension = 0; - src_register.Index = 0; - - return src_register; -} - -struct tgsi_src_register -tgsi_build_src_register( - unsigned file, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - unsigned negate, - unsigned absolute, - unsigned indirect, - unsigned dimension, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register src_register; - - assert( file < TGSI_FILE_COUNT ); - assert( swizzle_x <= TGSI_SWIZZLE_W ); - assert( swizzle_y <= TGSI_SWIZZLE_W ); - assert( swizzle_z <= TGSI_SWIZZLE_W ); - assert( swizzle_w <= TGSI_SWIZZLE_W ); - assert( negate <= 1 ); - assert( index >= -0x8000 && index <= 0x7FFF ); - - src_register = tgsi_default_src_register(); - src_register.File = file; - src_register.SwizzleX = swizzle_x; - src_register.SwizzleY = swizzle_y; - src_register.SwizzleZ = swizzle_z; - src_register.SwizzleW = swizzle_w; - src_register.Negate = negate; - src_register.Absolute = absolute; - src_register.Indirect = indirect; - src_register.Dimension = dimension; - src_register.Index = index; - - instruction_grow( instruction, header ); - - return src_register; -} - -struct tgsi_full_src_register -tgsi_default_full_src_register( void ) -{ - struct tgsi_full_src_register full_src_register; - - full_src_register.Register = tgsi_default_src_register(); - full_src_register.Indirect = tgsi_default_src_register(); - full_src_register.Dimension = tgsi_default_dimension(); - full_src_register.DimIndirect = tgsi_default_src_register(); - - return full_src_register; -} - - -struct tgsi_dimension -tgsi_default_dimension( void ) -{ - struct tgsi_dimension dimension; - - dimension.Indirect = 0; - dimension.Dimension = 0; - dimension.Padding = 0; - dimension.Index = 0; - - return dimension; -} - -struct tgsi_dimension -tgsi_build_dimension( - unsigned indirect, - unsigned index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dimension dimension; - - dimension = tgsi_default_dimension(); - dimension.Indirect = indirect; - dimension.Index = index; - - instruction_grow( instruction, header ); - - return dimension; -} - -struct tgsi_dst_register -tgsi_default_dst_register( void ) -{ - struct tgsi_dst_register dst_register; - - dst_register.File = TGSI_FILE_NULL; - dst_register.WriteMask = TGSI_WRITEMASK_XYZW; - dst_register.Indirect = 0; - dst_register.Dimension = 0; - dst_register.Index = 0; - dst_register.Padding = 0; - - return dst_register; -} - -struct tgsi_dst_register -tgsi_build_dst_register( - unsigned file, - unsigned mask, - unsigned indirect, - unsigned dimension, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register dst_register; - - assert( file < TGSI_FILE_COUNT ); - assert( mask <= TGSI_WRITEMASK_XYZW ); - assert( index >= -32768 && index <= 32767 ); - - dst_register = tgsi_default_dst_register(); - dst_register.File = file; - dst_register.WriteMask = mask; - dst_register.Index = index; - dst_register.Indirect = indirect; - dst_register.Dimension = dimension; - - instruction_grow( instruction, header ); - - return dst_register; -} - -struct tgsi_full_dst_register -tgsi_default_full_dst_register( void ) -{ - struct tgsi_full_dst_register full_dst_register; - - full_dst_register.Register = tgsi_default_dst_register(); - full_dst_register.Indirect = tgsi_default_src_register(); - full_dst_register.Dimension = tgsi_default_dimension(); - full_dst_register.DimIndirect = tgsi_default_src_register(); - - return full_dst_register; -} - -struct tgsi_property +static struct tgsi_property tgsi_default_property( void ) { struct tgsi_property property; @@ -1068,7 +1044,7 @@ tgsi_default_property( void ) return property; } -struct tgsi_property +static struct tgsi_property tgsi_build_property(unsigned property_name, struct tgsi_header *header) { @@ -1107,7 +1083,7 @@ property_grow( header_bodysize_grow( header ); } -struct tgsi_property_data +static struct tgsi_property_data tgsi_build_property_data( unsigned value, struct tgsi_property *property, diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 112107a0881..3f236a9c241 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -45,9 +45,6 @@ struct tgsi_header tgsi_build_header( void ); struct tgsi_processor -tgsi_default_processor( void ); - -struct tgsi_processor tgsi_build_processor( unsigned processor, struct tgsi_header *header ); @@ -56,21 +53,6 @@ tgsi_build_processor( * declaration */ -struct tgsi_declaration -tgsi_default_declaration( void ); - -struct tgsi_declaration -tgsi_build_declaration( - unsigned file, - unsigned usage_mask, - unsigned interpolate, - unsigned dimension, - unsigned semantic, - unsigned centroid, - unsigned invariant, - unsigned cylindrical_wrap, - struct tgsi_header *header ); - struct tgsi_full_declaration tgsi_default_full_declaration( void ); @@ -81,54 +63,13 @@ tgsi_build_full_declaration( struct tgsi_header *header, unsigned maxsize ); -struct tgsi_declaration_range -tgsi_default_declaration_range( void ); - -struct tgsi_declaration_range -tgsi_build_declaration_range( - unsigned first, - unsigned last, - struct tgsi_declaration *declaration, - struct tgsi_header *header ); - -struct tgsi_declaration_dimension -tgsi_default_declaration_dimension(void); - -struct tgsi_declaration_dimension -tgsi_build_declaration_dimension(unsigned index_2d, - struct tgsi_declaration *declaration, - struct tgsi_header *header); - -struct tgsi_declaration_semantic -tgsi_default_declaration_semantic( void ); - -struct tgsi_declaration_semantic -tgsi_build_declaration_semantic( - unsigned semantic_name, - unsigned semantic_index, - struct tgsi_declaration *declaration, - struct tgsi_header *header ); - /* * immediate */ -struct tgsi_immediate -tgsi_default_immediate( void ); - -struct tgsi_immediate -tgsi_build_immediate( - struct tgsi_header *header ); - struct tgsi_full_immediate tgsi_default_full_immediate( void ); -union tgsi_immediate_data -tgsi_build_immediate_float32( - float value, - struct tgsi_immediate *immediate, - struct tgsi_header *header ); - unsigned tgsi_build_full_immediate( const struct tgsi_full_immediate *full_imm, @@ -140,23 +81,9 @@ tgsi_build_full_immediate( * properties */ -struct tgsi_property -tgsi_default_property( void ); - -struct tgsi_property -tgsi_build_property( - unsigned property_name, - struct tgsi_header *header ); - struct tgsi_full_property tgsi_default_full_property( void ); -struct tgsi_property_data -tgsi_build_property_data( - unsigned value, - struct tgsi_property *property, - struct tgsi_header *header ); - unsigned tgsi_build_full_property( const struct tgsi_full_property *full_prop, @@ -171,15 +98,6 @@ tgsi_build_full_property( struct tgsi_instruction tgsi_default_instruction( void ); -struct tgsi_instruction -tgsi_build_instruction( - unsigned opcode, - unsigned saturate, - unsigned predicate, - unsigned num_dst_regs, - unsigned num_src_regs, - struct tgsi_header *header ); - struct tgsi_full_instruction tgsi_default_full_instruction( void ); @@ -193,85 +111,6 @@ tgsi_build_full_instruction( struct tgsi_instruction_predicate tgsi_default_instruction_predicate(void); -struct tgsi_instruction_predicate -tgsi_build_instruction_predicate(int index, - unsigned negate, - unsigned swizzleX, - unsigned swizzleY, - unsigned swizzleZ, - unsigned swizzleW, - struct tgsi_instruction *instruction, - struct tgsi_header *header); - -struct tgsi_instruction_label -tgsi_default_instruction_label( void ); - -struct tgsi_instruction_label -tgsi_build_instruction_label( - unsigned label, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_instruction_texture -tgsi_default_instruction_texture( void ); - -struct tgsi_instruction_texture -tgsi_build_instruction_texture( - unsigned texture, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_src_register -tgsi_default_src_register( void ); - -struct tgsi_src_register -tgsi_build_src_register( - unsigned file, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - unsigned negate, - unsigned absolute, - unsigned indirect, - unsigned dimension, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_full_src_register -tgsi_default_full_src_register( void ); - - -struct tgsi_dimension -tgsi_default_dimension( void ); - -struct tgsi_dimension -tgsi_build_dimension( - unsigned indirect, - unsigned index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_dst_register -tgsi_default_dst_register( void ); - -struct tgsi_dst_register -tgsi_build_dst_register( - unsigned file, - unsigned mask, - unsigned indirect, - unsigned dimension, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_full_dst_register -tgsi_default_full_dst_register( void ); - - #if defined __cplusplus } #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 3a71540506d..7892a67f04c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -429,6 +429,24 @@ micro_sne(union tgsi_exec_channel *dst, } static void +micro_sfl(union tgsi_exec_channel *dst) +{ + dst->f[0] = 0.0f; + dst->f[1] = 0.0f; + dst->f[2] = 0.0f; + dst->f[3] = 0.0f; +} + +static void +micro_str(union tgsi_exec_channel *dst) +{ + dst->f[0] = 1.0f; + dst->f[1] = 1.0f; + dst->f[2] = 1.0f; + dst->f[3] = 1.0f; +} + +static void micro_trunc(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -453,50 +471,12 @@ enum tgsi_exec_datatype { /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ -#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I -#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C -#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I -#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C -#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I -#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C -#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I -#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C -#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I -#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C -#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I -#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C -#define TEMP_128_I TGSI_EXEC_TEMP_128_I -#define TEMP_128_C TGSI_EXEC_TEMP_128_C -#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I -#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C -#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I -#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C -#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I -#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C -#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I -#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C -#define TEMP_R0 TGSI_EXEC_TEMP_R0 -#define TEMP_P0 TGSI_EXEC_TEMP_P0 - -#define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) - -#define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) - -#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ - for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ - if (IS_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ - for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ - if (IS_CHANNEL_ENABLED2( INST, CHAN )) /** The execution mask depends on the conditional mask and the loop mask */ @@ -511,6 +491,14 @@ static const union tgsi_exec_channel OneVec = { {1.0f, 1.0f, 1.0f, 1.0f} }; +static const union tgsi_exec_channel P128Vec = { + {128.0f, 128.0f, 128.0f, 128.0f} +}; + +static const union tgsi_exec_channel M128Vec = { + {-128.0f, -128.0f, -128.0f, -128.0f} +}; + /** * Assert that none of the float values in 'chan' are infinite or NaN. @@ -572,8 +560,6 @@ tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, } - - /** * Check if there's a potential src/dst register data dependency when * using SOA execution. @@ -607,18 +593,20 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) inst->Dst[0].Register.File) && ((inst->Src[i].Register.Index == inst->Dst[0].Register.Index) || - inst->Src[i].Register.Indirect || - inst->Dst[0].Register.Indirect)) { + inst->Src[i].Register.Indirect || + inst->Dst[0].Register.Indirect)) { /* loop over dest channels */ uint channelsWritten = 0x0; - FOR_EACH_ENABLED_CHANNEL(*inst, chan) { - /* check if we're reading a channel that's been written */ - uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); - if (channelsWritten & (1 << swizzle)) { - return TRUE; - } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + /* check if we're reading a channel that's been written */ + uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); + if (channelsWritten & (1 << swizzle)) { + return TRUE; + } - channelsWritten |= (1 << chan); + channelsWritten |= (1 << chan); + } } } } @@ -813,18 +801,18 @@ tgsi_exec_machine_create( void ) mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; - /* Setup constants. */ + /* Setup constants needed by the SSE2 executor. */ for( i = 0; i < 4; i++ ) { - mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; - mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; - mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; - mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; - mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; - mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; - mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; - mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; - mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; - mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; + mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; + mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; + mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; + mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ + mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; + mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ + mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; + mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; + mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; + mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; } #ifdef DEBUG @@ -886,27 +874,35 @@ micro_div( } static void -micro_float_clamp(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) +micro_rcc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) { uint i; for (i = 0; i < 4; i++) { - if (src->f[i] > 0.0f) { - if (src->f[i] > 1.884467e+019f) + float recip = 1.0f / src->f[i]; + + if (recip > 0.0f) { + if (recip > 1.884467e+019f) { dst->f[i] = 1.884467e+019f; - else if (src->f[i] < 5.42101e-020f) + } + else if (recip < 5.42101e-020f) { dst->f[i] = 5.42101e-020f; - else - dst->f[i] = src->f[i]; + } + else { + dst->f[i] = recip; + } } else { - if (src->f[i] < -1.884467e+019f) + if (recip < -1.884467e+019f) { dst->f[i] = -1.884467e+019f; - else if (src->f[i] > -5.42101e-020f) + } + else if (recip > -5.42101e-020f) { dst->f[i] = -5.42101e-020f; - else - dst->f[i] = src->f[i]; + } + else { + dst->f[i] = recip; + } } } } @@ -958,60 +954,6 @@ micro_mul(union tgsi_exec_channel *dst, dst->f[3] = src0->f[3] * src1->f[3]; } -#if 0 -static void -micro_imul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->i[0] = src0->i[0] * src1->i[0]; - dst1->i[1] = src0->i[1] * src1->i[1]; - dst1->i[2] = src0->i[2] * src1->i[2]; - dst1->i[3] = src0->i[3] * src1->i[3]; - dst0->i[0] = 0; - dst0->i[1] = 0; - dst0->i[2] = 0; - dst0->i[3] = 0; -} -#endif - -#if 0 -static void -micro_umul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->u[0] = src0->u[0] * src1->u[0]; - dst1->u[1] = src0->u[1] * src1->u[1]; - dst1->u[2] = src0->u[2] * src1->u[2]; - dst1->u[3] = src0->u[3] * src1->u[3]; - dst0->u[0] = 0; - dst0->u[1] = 0; - dst0->u[2] = 0; - dst0->u[3] = 0; -} -#endif - - -#if 0 -static void -micro_movc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2 ) -{ - dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; - dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; - dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; - dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; -} -#endif - static void micro_neg( union tgsi_exec_channel *dst, @@ -1607,9 +1549,6 @@ store_dest(struct tgsi_exec_machine *mach, #define FETCH(VAL,INDEX,CHAN)\ fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) -#define STORE(VAL,INDEX,CHAN)\ - store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) - /** * Execute ARB-style KIL which is predicated by a src register. @@ -1753,7 +1692,7 @@ exec_tex(struct tgsi_exec_machine *mach, union tgsi_exec_channel r[4]; const union tgsi_exec_channel *lod = &ZeroVec; enum tgsi_sampler_control control; - uint chan_index; + uint chan; if (modifier != TEX_MODIFIER_NONE) { FETCH(&r[3], 0, CHAN_W); @@ -1825,8 +1764,10 @@ exec_tex(struct tgsi_exec_machine *mach, assert(0); } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&r[chan_index], 0, chan_index); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } } } @@ -1836,7 +1777,7 @@ exec_txd(struct tgsi_exec_machine *mach, { const uint unit = inst->Src[3].Register.Index; union tgsi_exec_channel r[4]; - uint chan_index; + uint chan; /* * XXX: This is fake TXD -- the derivatives are not taken into account, yet. @@ -1886,8 +1827,10 @@ exec_txd(struct tgsi_exec_machine *mach, assert(0); } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&r[chan_index], 0, chan_index); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } } } @@ -2021,6 +1964,26 @@ exec_declaration(struct tgsi_exec_machine *mach, } } +typedef void (* micro_op)(union tgsi_exec_channel *dst); + +static void +exec_vector(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype) +{ + unsigned int chan; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel dst; + + op(&dst); + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src); @@ -2074,6 +2037,27 @@ typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src1); static void +exec_scalar_binary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_binary_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + union tgsi_exec_channel src[2]; + union tgsi_exec_channel dst; + + fetch_source(mach, &src[0], &inst->Src[0], CHAN_X, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], CHAN_Y, src_datatype); + op(&dst, &src[0], &src[1]); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void exec_vector_binary(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, micro_binary_op op, @@ -2320,6 +2304,289 @@ exec_nrm3(struct tgsi_exec_machine *mach, } static void +exec_scs(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { + union tgsi_exec_channel arg; + union tgsi_exec_channel result; + + fetch_source(mach, &arg, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + micro_cos(&result, &arg); + store_dest(mach, &result, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_sin(&result, &arg); + store_dest(mach, &result, &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &ZeroVec, &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_x2d(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[4]; + union tgsi_exec_channel d[2]; + + fetch_source(mach, &r[0], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) { + fetch_source(mach, &r[2], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[2], &r[2], &r[0]); + fetch_source(mach, &r[3], &inst->Src[2], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[3], &r[3], &r[1]); + micro_add(&r[2], &r[2], &r[3]); + fetch_source(mach, &r[3], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_add(&d[0], &r[2], &r[3]); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) { + fetch_source(mach, &r[2], &inst->Src[2], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[2], &r[2], &r[0]); + fetch_source(mach, &r[3], &inst->Src[2], CHAN_W, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[3], &r[3], &r[1]); + micro_add(&r[2], &r[2], &r[3]); + fetch_source(mach, &r[3], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_add(&d[1], &r[2], &r[3]); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_rfl(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[9]; + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { + /* r0 = dp3(src0, src0) */ + fetch_source(mach, &r[2], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[0], &r[2], &r[2]); + fetch_source(mach, &r[4], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[8], &r[4], &r[4]); + micro_add(&r[0], &r[0], &r[8]); + fetch_source(mach, &r[6], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[8], &r[6], &r[6]); + micro_add(&r[0], &r[0], &r[8]); + + /* r1 = dp3(src0, src1) */ + fetch_source(mach, &r[3], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[1], &r[2], &r[3]); + fetch_source(mach, &r[5], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[8], &r[4], &r[5]); + micro_add(&r[1], &r[1], &r[8]); + fetch_source(mach, &r[7], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[8], &r[6], &r[7]); + micro_add(&r[1], &r[1], &r[8]); + + /* r1 = 2 * r1 / r0 */ + micro_add(&r[1], &r[1], &r[1]); + micro_div(&r[1], &r[1], &r[0]); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + micro_mul(&r[2], &r[2], &r[1]); + micro_sub(&r[2], &r[2], &r[3]); + store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_mul(&r[4], &r[4], &r[1]); + micro_sub(&r[4], &r[4], &r[5]); + store_dest(mach, &r[4], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + micro_mul(&r[6], &r[6], &r[1]); + micro_sub(&r[6], &r[6], &r[7]); + store_dest(mach, &r[6], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_xpd(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[6]; + union tgsi_exec_channel d[3]; + + fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &r[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + + micro_mul(&r[2], &r[0], &r[1]); + + fetch_source(mach, &r[3], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &r[4], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + + micro_mul(&r[5], &r[3], &r[4] ); + micro_sub(&d[CHAN_X], &r[2], &r[5]); + + fetch_source(mach, &r[2], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + + micro_mul(&r[3], &r[3], &r[2]); + + fetch_source(mach, &r[5], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + + micro_mul(&r[1], &r[1], &r[5]); + micro_sub(&d[CHAN_Y], &r[3], &r[1]); + + micro_mul(&r[5], &r[5], &r[4]); + micro_mul(&r[0], &r[0], &r[2]); + micro_sub(&d[CHAN_Z], &r[5], &r[0]); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &d[CHAN_X], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_dst(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[2]; + union tgsi_exec_channel d[4]; + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mul(&d[CHAN_Y], &r[0], &r[1]); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + fetch_source(mach, &d[CHAN_Z], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + fetch_source(mach, &d[CHAN_W], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); + } + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &d[CHAN_W], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_log(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3]; + + fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ + micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ + micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ + micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ + store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &r[1], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_exp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3]; + + fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ + store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ + store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ + store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_lit(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3]; + union tgsi_exec_channel d[3]; + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { + fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_max(&d[CHAN_Y], &r[0], &ZeroVec); + store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + fetch_source(mach, &r[1], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_max(&r[1], &r[1], &ZeroVec); + + fetch_source(mach, &r[2], &inst->Src[0], CHAN_W, TGSI_EXEC_DATA_FLOAT); + micro_min(&r[2], &r[2], &P128Vec); + micro_max(&r[2], &r[2], &M128Vec); + micro_pow(&r[1], &r[1], &r[2]); + micro_lt(&d[CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); + store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void exec_break(struct tgsi_exec_machine *mach) { if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { @@ -2702,9 +2969,7 @@ exec_instruction( const struct tgsi_full_instruction *inst, int *pc ) { - uint chan_index; union tgsi_exec_channel r[10]; - union tgsi_exec_channel d[8]; (*pc)++; @@ -2718,36 +2983,7 @@ exec_instruction( break; case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[1], 0, CHAN_Y ); - micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - - FETCH( &r[2], 0, CHAN_W ); - micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); - micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); - micro_pow( &r[1], &r[1], &r[2] ); - micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&d[CHAN_Y], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&d[CHAN_Z], 0, CHAN_Z); - } - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_lit(mach, inst); break; case TGSI_OPCODE_RCP: @@ -2759,44 +2995,11 @@ exec_instruction( break; case TGSI_OPCODE_EXP: - FETCH( &r[0], 0, CHAN_X ); - micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ - STORE( &r[2], 0, CHAN_X ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ - STORE( &r[2], 0, CHAN_Y ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ - STORE( &r[2], 0, CHAN_Z ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_exp(mach, inst); break; case TGSI_OPCODE_LOG: - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ - micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ - micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[0], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ - micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ - STORE( &r[0], 0, CHAN_Y ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[1], 0, CHAN_Z ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_log(mach, inst); break; case TGSI_OPCODE_MUL: @@ -2816,30 +3019,7 @@ exec_instruction( break; case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - FETCH( &r[0], 0, CHAN_Y ); - FETCH( &r[1], 1, CHAN_Y); - micro_mul(&d[CHAN_Y], &r[0], &r[1]); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH(&d[CHAN_Z], 0, CHAN_Z); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH(&d[CHAN_W], 1, CHAN_W); - } - - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&d[CHAN_Y], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&d[CHAN_Z], 0, CHAN_Z); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&d[CHAN_W], 0, CHAN_W); - } + exec_dst(mach, inst); break; case TGSI_OPCODE_MIN: @@ -2903,53 +3083,11 @@ exec_instruction( break; case TGSI_OPCODE_POW: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_pow( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_XPD: - FETCH(&r[0], 0, CHAN_Y); - FETCH(&r[1], 1, CHAN_Z); - - micro_mul( &r[2], &r[0], &r[1] ); - - FETCH(&r[3], 0, CHAN_Z); - FETCH(&r[4], 1, CHAN_Y); - - micro_mul( &r[5], &r[3], &r[4] ); - micro_sub(&d[CHAN_X], &r[2], &r[5]); - - FETCH(&r[2], 1, CHAN_X); - - micro_mul( &r[3], &r[3], &r[2] ); - - FETCH(&r[5], 0, CHAN_X); - - micro_mul( &r[1], &r[1], &r[5] ); - micro_sub(&d[CHAN_Y], &r[3], &r[1]); - - micro_mul( &r[5], &r[5], &r[4] ); - micro_mul( &r[0], &r[0], &r[2] ); - micro_sub(&d[CHAN_Z], &r[5], &r[0]); - - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&d[CHAN_X], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&d[CHAN_Y], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&d[CHAN_Z], 0, CHAN_Z); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_xpd(mach, inst); break; case TGSI_OPCODE_ABS: @@ -2957,12 +3095,7 @@ exec_instruction( break; case TGSI_OPCODE_RCC: - FETCH(&r[0], 0, CHAN_X); - micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); - micro_float_clamp(&r[0], &r[0]); - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&r[0], 0, chan_index); - } + exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DPH: @@ -3006,52 +3139,7 @@ exec_instruction( break; case TGSI_OPCODE_RFL: - if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || - IS_CHANNEL_ENABLED(*inst, CHAN_Y) || - IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - /* r0 = dp3(src0, src0) */ - FETCH(&r[2], 0, CHAN_X); - micro_mul(&r[0], &r[2], &r[2]); - FETCH(&r[4], 0, CHAN_Y); - micro_mul(&r[8], &r[4], &r[4]); - micro_add(&r[0], &r[0], &r[8]); - FETCH(&r[6], 0, CHAN_Z); - micro_mul(&r[8], &r[6], &r[6]); - micro_add(&r[0], &r[0], &r[8]); - - /* r1 = dp3(src0, src1) */ - FETCH(&r[3], 1, CHAN_X); - micro_mul(&r[1], &r[2], &r[3]); - FETCH(&r[5], 1, CHAN_Y); - micro_mul(&r[8], &r[4], &r[5]); - micro_add(&r[1], &r[1], &r[8]); - FETCH(&r[7], 1, CHAN_Z); - micro_mul(&r[8], &r[6], &r[7]); - micro_add(&r[1], &r[1], &r[8]); - - /* r1 = 2 * r1 / r0 */ - micro_add(&r[1], &r[1], &r[1]); - micro_div(&r[1], &r[1], &r[0]); - - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - micro_mul(&r[2], &r[2], &r[1]); - micro_sub(&r[2], &r[2], &r[3]); - STORE(&r[2], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - micro_mul(&r[4], &r[4], &r[1]); - micro_sub(&r[4], &r[4], &r[5]); - STORE(&r[4], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - micro_mul(&r[6], &r[6], &r[1]); - micro_sub(&r[6], &r[6], &r[7]); - STORE(&r[6], 0, CHAN_Z); - } - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); - } + exec_rfl(mach, inst); break; case TGSI_OPCODE_SEQ: @@ -3059,9 +3147,7 @@ exec_instruction( break; case TGSI_OPCODE_SFL: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); - } + exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SGT: @@ -3081,9 +3167,7 @@ exec_instruction( break; case TGSI_OPCODE_STR: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); - } + exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_TEX: @@ -3140,42 +3224,7 @@ exec_instruction( break; case TGSI_OPCODE_X2D: - FETCH(&r[0], 1, CHAN_X); - FETCH(&r[1], 1, CHAN_Y); - if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || - IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - FETCH(&r[2], 2, CHAN_X); - micro_mul(&r[2], &r[2], &r[0]); - FETCH(&r[3], 2, CHAN_Y); - micro_mul(&r[3], &r[3], &r[1]); - micro_add(&r[2], &r[2], &r[3]); - FETCH(&r[3], 0, CHAN_X); - micro_add(&d[CHAN_X], &r[2], &r[3]); - - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || - IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - FETCH(&r[2], 2, CHAN_Z); - micro_mul(&r[2], &r[2], &r[0]); - FETCH(&r[3], 2, CHAN_W); - micro_mul(&r[3], &r[3], &r[1]); - micro_add(&r[2], &r[2], &r[3]); - FETCH(&r[3], 0, CHAN_Y); - micro_add(&d[CHAN_Y], &r[2], &r[3]); - - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&d[CHAN_X], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&d[CHAN_Y], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&d[CHAN_X], 0, CHAN_Z); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&d[CHAN_Y], 0, CHAN_W); - } + exec_x2d(mach, inst); break; case TGSI_OPCODE_ARA: @@ -3283,23 +3332,7 @@ exec_instruction( break; case TGSI_OPCODE_SCS: - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - micro_cos(&r[1], &r[0]); - STORE(&r[1], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - micro_sin(&r[1], &r[0]); - STORE(&r[1], 0, CHAN_Y); - } - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_scs(mach, inst); break; case TGSI_OPCODE_NRM: @@ -3684,14 +3717,6 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->Primitives[0] = 0; } - for (i = 0; i < QUAD_SIZE; i++) { - mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = - (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | - (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | - (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | - (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); - } - /* execute declarations (interpolants) */ for (i = 0; i < mach->NumDeclarations; i++) { exec_declaration( mach, mach->Declarations+i ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 9d62c1d7e7e..bd5492e9497 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -131,34 +131,15 @@ struct tgsi_sampler #define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2) #define TGSI_EXEC_TEMP_PRIMITIVE_C 2 -/* NVIDIA condition code (CC) vector - */ -#define TGSI_EXEC_CC_GT 0x01 -#define TGSI_EXEC_CC_EQ 0x02 -#define TGSI_EXEC_CC_LT 0x04 -#define TGSI_EXEC_CC_UN 0x08 - -#define TGSI_EXEC_CC_X_MASK 0x000000ff -#define TGSI_EXEC_CC_X_SHIFT 0 -#define TGSI_EXEC_CC_Y_MASK 0x0000ff00 -#define TGSI_EXEC_CC_Y_SHIFT 8 -#define TGSI_EXEC_CC_Z_MASK 0x00ff0000 -#define TGSI_EXEC_CC_Z_SHIFT 16 -#define TGSI_EXEC_CC_W_MASK 0xff000000 -#define TGSI_EXEC_CC_W_SHIFT 24 - -#define TGSI_EXEC_TEMP_CC_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_CC_C 3 - -#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_TEMP_THREE_C 0 +#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_THREE_C 3 #define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_TEMP_HALF_C 1 +#define TGSI_EXEC_TEMP_HALF_C 0 /* execution mask, each value is either 0 or ~0 */ #define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_MASK_C 2 +#define TGSI_EXEC_MASK_C 1 /* 4 register buffer for various purposes */ #define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) |