summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/SConscript71
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c335
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.c16
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h4
-rw-r--r--src/gallium/auxiliary/os/os_stream_stdc.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/SConscript18
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c736
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.h161
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c801
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h27
12 files changed, 1143 insertions, 1046 deletions
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index f22c8b96123..4b9059d9aa3 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -7,8 +7,6 @@ env.Append(CPPPATH = [
'util',
])
-env.Tool('udis86')
-
env.CodeGenerate(
target = 'indices/u_indices_gen.c',
script = 'indices/u_indices_gen.py',
@@ -200,39 +198,42 @@ source = [
]
if env['llvm']:
+ if env['UDIS86']:
+ env.Append(CPPDEFINES = [('HAVE_UDIS86', '1')])
+
source += [
- 'gallivm/lp_bld_arit.c',
- 'gallivm/lp_bld_assert.c',
- 'gallivm/lp_bld_bitarit.c',
- 'gallivm/lp_bld_const.c',
- 'gallivm/lp_bld_conv.c',
- 'gallivm/lp_bld_debug.c',
- 'gallivm/lp_bld_flow.c',
- 'gallivm/lp_bld_format_aos.c',
- 'gallivm/lp_bld_format_soa.c',
- 'gallivm/lp_bld_format_yuv.c',
- 'gallivm/lp_bld_gather.c',
- 'gallivm/lp_bld_init.c',
- 'gallivm/lp_bld_intr.c',
- 'gallivm/lp_bld_logic.c',
- 'gallivm/lp_bld_misc.cpp',
- 'gallivm/lp_bld_pack.c',
- 'gallivm/lp_bld_printf.c',
- 'gallivm/lp_bld_quad.c',
- 'gallivm/lp_bld_sample.c',
- 'gallivm/lp_bld_sample_aos.c',
- 'gallivm/lp_bld_sample_soa.c',
- 'gallivm/lp_bld_struct.c',
- 'gallivm/lp_bld_swizzle.c',
- 'gallivm/lp_bld_tgsi_aos.c',
- 'gallivm/lp_bld_tgsi_info.c',
- 'gallivm/lp_bld_tgsi_soa.c',
- 'gallivm/lp_bld_type.c',
- 'draw/draw_llvm.c',
- 'draw/draw_llvm_sample.c',
- 'draw/draw_llvm_translate.c',
- 'draw/draw_pt_fetch_shade_pipeline_llvm.c',
- 'draw/draw_vs_llvm.c'
+ 'gallivm/lp_bld_arit.c',
+ 'gallivm/lp_bld_assert.c',
+ 'gallivm/lp_bld_bitarit.c',
+ 'gallivm/lp_bld_const.c',
+ 'gallivm/lp_bld_conv.c',
+ 'gallivm/lp_bld_debug.c',
+ 'gallivm/lp_bld_flow.c',
+ 'gallivm/lp_bld_format_aos.c',
+ 'gallivm/lp_bld_format_soa.c',
+ 'gallivm/lp_bld_format_yuv.c',
+ 'gallivm/lp_bld_gather.c',
+ 'gallivm/lp_bld_init.c',
+ 'gallivm/lp_bld_intr.c',
+ 'gallivm/lp_bld_logic.c',
+ 'gallivm/lp_bld_misc.cpp',
+ 'gallivm/lp_bld_pack.c',
+ 'gallivm/lp_bld_printf.c',
+ 'gallivm/lp_bld_quad.c',
+ 'gallivm/lp_bld_sample.c',
+ 'gallivm/lp_bld_sample_aos.c',
+ 'gallivm/lp_bld_sample_soa.c',
+ 'gallivm/lp_bld_struct.c',
+ 'gallivm/lp_bld_swizzle.c',
+ 'gallivm/lp_bld_tgsi_aos.c',
+ 'gallivm/lp_bld_tgsi_info.c',
+ 'gallivm/lp_bld_tgsi_soa.c',
+ 'gallivm/lp_bld_type.c',
+ 'draw/draw_llvm.c',
+ 'draw/draw_llvm_sample.c',
+ 'draw/draw_llvm_translate.c',
+ 'draw/draw_pt_fetch_shade_pipeline_llvm.c',
+ 'draw/draw_vs_llvm.c'
]
gallium = env.ConvenienceLibrary(
@@ -240,4 +241,6 @@ gallium = env.ConvenienceLibrary(
source = source,
)
+env.Alias('gallium', gallium)
+
Export('gallium')
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 140e596f994..2b5f01cda74 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1164,11 +1164,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
sampler->destroy(sampler);
-#ifdef PIPE_ARCH_X86
- /* Avoid corrupting the FPU stack on 32bit OSes. */
- lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
-#endif
-
ret = LLVMBuildLoad(builder, ret_ptr,"");
LLVMBuildRet(builder, ret);
@@ -1378,11 +1373,6 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
sampler->destroy(sampler);
-#ifdef PIPE_ARCH_X86
- /* Avoid corrupting the FPU stack on 32bit OSes. */
- lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
-#endif
-
ret = LLVMBuildLoad(builder, ret_ptr,"");
LLVMBuildRet(builder, ret);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 5598ca5c489..0b9a6f745fb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -145,13 +145,7 @@ lp_build_init(void)
LLVMAddCFGSimplificationPass(lp_build_pass);
LLVMAddPromoteMemoryToRegisterPass(lp_build_pass);
LLVMAddConstantPropagationPass(lp_build_pass);
- if(util_cpu_caps.has_sse4_1) {
- /* FIXME: There is a bug in this pass, whereby the combination of fptosi
- * and sitofp (necessary for trunc/floor/ceil/round implementation)
- * somehow becomes invalid code.
- */
- LLVMAddInstructionCombiningPass(lp_build_pass);
- }
+ LLVMAddInstructionCombiningPass(lp_build_pass);
LLVMAddGVNPass(lp_build_pass);
} else {
/* We need at least this pass to prevent the backends to fail in
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 3c318cc8c80..7f0f058c222 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -58,6 +58,7 @@
#include "lp_bld_tgsi.h"
#include "lp_bld_limits.h"
#include "lp_bld_debug.h"
+#include "lp_bld_printf.h"
#define FOR_EACH_CHANNEL( CHAN )\
@@ -119,9 +120,12 @@ struct lp_build_tgsi_soa_context
{
struct lp_build_context base;
- /* Builder for integer masks and indices */
+ /* Builder for vector integer masks and indices */
struct lp_build_context uint_bld;
+ /* Builder for scalar elements of shader's data type (float) */
+ struct lp_build_context elem_bld;
+
LLVMValueRef consts_ptr;
const LLVMValueRef *pos;
const LLVMValueRef (*inputs)[NUM_CHANNELS];
@@ -140,6 +144,18 @@ struct lp_build_tgsi_soa_context
*/
LLVMValueRef temps_array;
+ /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
+ * set in the indirect_files field.
+ * The outputs[] array above is unused then.
+ */
+ LLVMValueRef outputs_array;
+
+ /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
+ * set in the indirect_files field.
+ * The inputs[] array above is unused then.
+ */
+ LLVMValueRef inputs_array;
+
const struct tgsi_shader_info *info;
/** bitmask indicating which register files are accessed indirectly */
unsigned indirect_files;
@@ -435,6 +451,26 @@ get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
}
}
+/**
+ * Return pointer to a output register channel (src or dest).
+ * Note that indirect addressing cannot be handled here.
+ * \param index which output register
+ * \param chan which channel of the output register.
+ */
+static LLVMValueRef
+get_output_ptr(struct lp_build_tgsi_soa_context *bld,
+ unsigned index,
+ unsigned chan)
+{
+ assert(chan < 4);
+ if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+ LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
+ return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, "");
+ }
+ else {
+ return bld->outputs[index][chan];
+ }
+}
/**
* Gather vector.
@@ -457,7 +493,7 @@ build_gather(struct lp_build_tgsi_soa_context *bld,
LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
indexes, ii, "");
LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
- &index, 1, "");
+ &index, 1, "gather_ptr");
LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
@@ -468,8 +504,60 @@ build_gather(struct lp_build_tgsi_soa_context *bld,
/**
+ * Scatter/store vector.
+ */
+static void
+emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef base_ptr,
+ LLVMValueRef indexes,
+ LLVMValueRef values,
+ struct lp_exec_mask *mask,
+ LLVMValueRef pred)
+{
+ LLVMBuilderRef builder = bld->base.builder;
+ unsigned i;
+
+ /* Mix the predicate and execution mask */
+ if (mask->has_mask) {
+ if (pred) {
+ pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
+ }
+ else {
+ pred = mask->exec_mask;
+ }
+ }
+
+ /*
+ * Loop over elements of index_vec, store scalar value.
+ */
+ for (i = 0; i < bld->base.type.length; i++) {
+ LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
+ LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
+ LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
+ LLVMValueRef scalar_pred = pred ?
+ LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
+
+ if (0)
+ lp_build_printf(builder, "scatter %d: val %f at %d %p\n",
+ ii, val, index, scalar_ptr);
+
+ if (scalar_pred) {
+ LLVMValueRef real_val, dst_val;
+ dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
+ real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
+ LLVMBuildStore(builder, real_val, scalar_ptr);
+ }
+ else {
+ LLVMBuildStore(builder, val, scalar_ptr);
+ }
+ }
+}
+
+
+/**
* Read the current value of the ADDR register, convert the floats to
- * ints, multiply by four and return the vector of offsets.
+ * ints, add the base index and return the vector of offsets.
* The offsets will be used to index into the constant buffer or
* temporary register file.
*/
@@ -577,7 +665,38 @@ emit_fetch(
break;
case TGSI_FILE_INPUT:
- res = bld->inputs[reg->Register.Index][swizzle];
+ if (reg->Register.Indirect) {
+ LLVMValueRef swizzle_vec =
+ lp_build_const_int_vec(uint_bld->type, swizzle);
+ LLVMValueRef length_vec =
+ lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
+ LLVMValueRef index_vec; /* index into the const buffer */
+ LLVMValueRef inputs_array;
+ LLVMTypeRef float4_ptr_type;
+
+ /* index_vec = (indirect_index * 4 + swizzle) * length */
+ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+ index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+
+ /* cast inputs_array pointer to float* */
+ float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
+ inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array,
+ float4_ptr_type, "");
+
+ /* Gather values from the temporary register array */
+ res = build_gather(bld, inputs_array, index_vec);
+ } else {
+ if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
+ LLVMValueRef lindex = lp_build_const_int32(reg->Register.Index * 4 + swizzle);
+ LLVMValueRef input_ptr = LLVMBuildGEP(bld->base.builder,
+ bld->inputs_array, &lindex, 1, "");
+ res = LLVMBuildLoad(bld->base.builder, input_ptr, "");
+ }
+ else {
+ res = bld->inputs[reg->Register.Index][swizzle];
+ }
+ }
assert(res);
break;
@@ -748,6 +867,7 @@ emit_store(
LLVMValueRef value)
{
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
+ struct lp_build_context *uint_bld = &bld->uint_bld;
LLVMValueRef indirect_index = NULL;
switch( inst->Instruction.Saturate ) {
@@ -779,15 +899,81 @@ emit_store(
switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
- lp_exec_mask_store(&bld->exec_mask, pred, value,
- bld->outputs[reg->Register.Index][chan_index]);
+ if (reg->Register.Indirect) {
+ LLVMBuilderRef builder = bld->base.builder;
+ LLVMValueRef chan_vec =
+ lp_build_const_int_vec(uint_bld->type, chan_index);
+ LLVMValueRef length_vec =
+ lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
+ LLVMValueRef index_vec; /* indexes into the temp registers */
+ LLVMValueRef outputs_array;
+ LLVMValueRef pixel_offsets;
+ LLVMTypeRef float_ptr_type;
+ int i;
+
+ /* build pixel offset vector: {0, 1, 2, 3, ...} */
+ pixel_offsets = uint_bld->undef;
+ for (i = 0; i < bld->base.type.length; i++) {
+ LLVMValueRef ii = lp_build_const_int32(i);
+ pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
+ ii, ii, "");
+ }
+
+ /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
+ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
+ index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+ index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
+
+ float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
+ outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
+ float_ptr_type, "");
+
+ /* Scatter store values into temp registers */
+ emit_mask_scatter(bld, outputs_array, index_vec, value,
+ &bld->exec_mask, pred);
+ }
+ else {
+ LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
+ chan_index);
+ lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
+ }
break;
case TGSI_FILE_TEMPORARY:
if (reg->Register.Indirect) {
- /* XXX not done yet */
- debug_printf("WARNING: LLVM scatter store of temp regs"
- " not implemented\n");
+ LLVMBuilderRef builder = bld->base.builder;
+ LLVMValueRef chan_vec =
+ lp_build_const_int_vec(uint_bld->type, chan_index);
+ LLVMValueRef length_vec =
+ lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
+ LLVMValueRef index_vec; /* indexes into the temp registers */
+ LLVMValueRef temps_array;
+ LLVMValueRef pixel_offsets;
+ LLVMTypeRef float_ptr_type;
+ int i;
+
+ /* build pixel offset vector: {0, 1, 2, 3, ...} */
+ pixel_offsets = uint_bld->undef;
+ for (i = 0; i < bld->base.type.length; i++) {
+ LLVMValueRef ii = lp_build_const_int32(i);
+ pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
+ ii, ii, "");
+ }
+
+ /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
+ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
+ index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+ index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
+
+ float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
+ temps_array = LLVMBuildBitCast(builder, bld->temps_array,
+ float_ptr_type, "");
+
+ /* Scatter store values into temp registers */
+ emit_mask_scatter(bld, temps_array, index_vec, value,
+ &bld->exec_mask, pred);
}
else {
LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
@@ -1040,15 +1226,60 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
lp_build_mask_check(bld->mask);
}
+
+/**
+ * Emit code which will dump the value of all the temporary registers
+ * to stdout.
+ */
+static void
+emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
+{
+ LLVMBuilderRef builder = bld->base.builder;
+ LLVMValueRef temp_ptr;
+ LLVMValueRef i0 = lp_build_const_int32(0);
+ LLVMValueRef i1 = lp_build_const_int32(1);
+ LLVMValueRef i2 = lp_build_const_int32(2);
+ LLVMValueRef i3 = lp_build_const_int32(3);
+ int index;
+ int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
+
+ for (index = 0; index < n; index++) {
+ LLVMValueRef idx = lp_build_const_int32(index);
+ LLVMValueRef v[4][4], res;
+ int chan;
+
+ lp_build_printf(builder, "TEMP[%d]:\n", idx);
+
+ for (chan = 0; chan < 4; chan++) {
+ temp_ptr = get_temp_ptr(bld, index, chan);
+ res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
+ v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
+ v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
+ v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
+ v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
+ }
+
+ lp_build_printf(builder, " X: %f %f %f %f\n",
+ v[0][0], v[0][1], v[0][2], v[0][3]);
+ lp_build_printf(builder, " Y: %f %f %f %f\n",
+ v[1][0], v[1][1], v[1][2], v[1][3]);
+ lp_build_printf(builder, " Z: %f %f %f %f\n",
+ v[2][0], v[2][1], v[2][2], v[2][3]);
+ lp_build_printf(builder, " W: %f %f %f %f\n",
+ v[3][0], v[3][1], v[3][2], v[3][3]);
+ }
+}
+
+
+
static void
emit_declaration(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_declaration *decl)
{
LLVMTypeRef vec_type = bld->base.vec_type;
-
- unsigned first = decl->Range.First;
- unsigned last = decl->Range.Last;
+ const unsigned first = decl->Range.First;
+ const unsigned last = decl->Range.Last;
unsigned idx, i;
for (idx = first; idx <= last; ++idx) {
@@ -1056,36 +1287,33 @@ emit_declaration(
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
assert(idx < LP_MAX_TGSI_TEMPS);
- if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
- LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
- last*4 + 4, 0);
- bld->temps_array = lp_build_array_alloca(bld->base.builder,
- vec_type, array_size, "");
- } else {
+ if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
for (i = 0; i < NUM_CHANNELS; i++)
bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
- vec_type, "");
+ vec_type, "temp");
}
break;
case TGSI_FILE_OUTPUT:
- for (i = 0; i < NUM_CHANNELS; i++)
- bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
- vec_type, "");
+ if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "output");
+ }
break;
case TGSI_FILE_ADDRESS:
assert(idx < LP_MAX_TGSI_ADDRS);
for (i = 0; i < NUM_CHANNELS; i++)
bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
- vec_type, "");
+ vec_type, "addr");
break;
case TGSI_FILE_PREDICATE:
assert(idx < LP_MAX_TGSI_PREDS);
for (i = 0; i < NUM_CHANNELS; i++)
bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
- vec_type, "");
+ vec_type, "predicate");
break;
default:
@@ -1740,6 +1968,10 @@ emit_instruction(
break;
case TGSI_OPCODE_END:
+ if (0) {
+ /* for debugging */
+ emit_dump_temps(bld);
+ }
*pc = -1;
break;
@@ -2082,6 +2314,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
memset(&bld, 0, sizeof bld);
lp_build_context_init(&bld.base, builder, type);
lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type));
+ lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type));
bld.mask = mask;
bld.pos = pos;
bld.inputs = inputs;
@@ -2100,6 +2333,48 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
lp_exec_mask_init(&bld.exec_mask, &bld.base);
+ if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
+ info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0);
+ bld.temps_array = lp_build_array_alloca(bld.base.builder,
+ bld.base.vec_type, array_size,
+ "temp_array");
+ }
+
+ if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+ LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
+ info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0);
+ bld.outputs_array = lp_build_array_alloca(bld.base.builder,
+ bld.base.vec_type, array_size,
+ "output_array");
+ }
+
+ /* If we have indirect addressing in inputs we need to copy them into
+ * our alloca array to be able to iterate over them */
+ if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
+ unsigned index, chan;
+ LLVMTypeRef vec_type = bld.base.vec_type;
+ LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
+ info->file_max[TGSI_FILE_INPUT]*4 + 4, 0);
+ bld.inputs_array = lp_build_array_alloca(bld.base.builder,
+ vec_type, array_size,
+ "input_array");
+
+ assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
+
+ for (index = 0; index < info->num_inputs; ++index) {
+ for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+ LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
+ LLVMValueRef input_ptr =
+ LLVMBuildGEP(bld.base.builder, bld.inputs_array,
+ &lindex, 1, "");
+ LLVMValueRef value = bld.inputs[index][chan];
+ if (value)
+ LLVMBuildStore(bld.base.builder, value, input_ptr);
+ }
+ }
+ }
+
tgsi_parse_init( &parse, tokens );
while( !tgsi_parse_end_of_tokens( &parse ) ) {
@@ -2169,6 +2444,18 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
opcode_info->mnemonic);
}
+ /* If we have indirect addressing in outputs we need to copy our alloca array
+ * to the outputs slots specified by the called */
+ if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+ unsigned index, chan;
+ assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
+ for (index = 0; index < info->num_outputs; ++index) {
+ for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+ bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
+ }
+ }
+ }
+
if (0) {
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
LLVMValueRef function = LLVMGetBasicBlockParent(block);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c
index 06f1aae6dcc..5205c7ada91 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c
@@ -188,6 +188,22 @@ lp_build_int32_vec4_type(void)
/**
+ * Create element of vector type
+ */
+struct lp_type
+lp_elem_type(struct lp_type type)
+{
+ struct lp_type res_type;
+
+ assert(type.length > 1);
+ res_type = type;
+ res_type.length = 1;
+
+ return res_type;
+}
+
+
+/**
* Create unsigned integer type variation of given type.
*/
struct lp_type
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index fec1d3dfbc6..a135d0df847 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -365,6 +365,10 @@ lp_unorm8_vec4_type(void)
struct lp_type
+lp_elem_type(struct lp_type type);
+
+
+struct lp_type
lp_uint_type(struct lp_type type);
diff --git a/src/gallium/auxiliary/os/os_stream_stdc.c b/src/gallium/auxiliary/os/os_stream_stdc.c
index 37e7d063e2b..afd3ff6dcea 100644
--- a/src/gallium/auxiliary/os/os_stream_stdc.c
+++ b/src/gallium/auxiliary/os/os_stream_stdc.c
@@ -106,7 +106,7 @@ os_file_stream_create(const char *filename)
stream->base.flush = &os_stdc_stream_flush;
stream->base.vprintf = &os_stdc_stream_vprintf;
- stream->file = fopen(filename, "w");
+ stream->file = fopen(filename, "wb");
if(!stream->file)
goto no_file;
diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript
deleted file mode 100644
index a074a554717..00000000000
--- a/src/gallium/auxiliary/pipebuffer/SConscript
+++ /dev/null
@@ -1,18 +0,0 @@
-Import('*')
-
-pipebuffer = env.ConvenienceLibrary(
- target = 'pipebuffer',
- source = [
- 'pb_buffer_fenced.c',
- 'pb_buffer_malloc.c',
- 'pb_bufmgr_alt.c',
- 'pb_bufmgr_cache.c',
- 'pb_bufmgr_debug.c',
- 'pb_bufmgr_mm.c',
- 'pb_bufmgr_ondemand.c',
- 'pb_bufmgr_pool.c',
- 'pb_bufmgr_slab.c',
- 'pb_validate.c',
- ])
-
-auxiliaries.insert(0, pipebuffer)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 6dbedf15ca8..16a205f2068 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -64,25 +64,14 @@ header_bodysize_grow( struct tgsi_header *header )
}
struct tgsi_processor
-tgsi_default_processor( void )
-{
- struct tgsi_processor processor;
-
- processor.Processor = TGSI_PROCESSOR_FRAGMENT;
- processor.Padding = 0;
-
- return processor;
-}
-
-struct tgsi_processor
tgsi_build_processor(
unsigned type,
struct tgsi_header *header )
{
struct tgsi_processor processor;
- processor = tgsi_default_processor();
processor.Processor = type;
+ processor.Padding = 0;
header_headersize_grow( header );
@@ -93,7 +82,19 @@ tgsi_build_processor(
* declaration
*/
-struct tgsi_declaration
+static void
+declaration_grow(
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header )
+{
+ assert( declaration->NrTokens < 0xFF );
+
+ declaration->NrTokens++;
+
+ header_bodysize_grow( header );
+}
+
+static struct tgsi_declaration
tgsi_default_declaration( void )
{
struct tgsi_declaration declaration;
@@ -112,7 +113,7 @@ tgsi_default_declaration( void )
return declaration;
}
-struct tgsi_declaration
+static struct tgsi_declaration
tgsi_build_declaration(
unsigned file,
unsigned usage_mask,
@@ -144,16 +145,85 @@ tgsi_build_declaration(
return declaration;
}
-static void
-declaration_grow(
+static struct tgsi_declaration_range
+tgsi_default_declaration_range( void )
+{
+ struct tgsi_declaration_range dr;
+
+ dr.First = 0;
+ dr.Last = 0;
+
+ return dr;
+}
+
+static struct tgsi_declaration_range
+tgsi_build_declaration_range(
+ unsigned first,
+ unsigned last,
struct tgsi_declaration *declaration,
struct tgsi_header *header )
{
- assert( declaration->NrTokens < 0xFF );
+ struct tgsi_declaration_range declaration_range;
- declaration->NrTokens++;
+ assert( last >= first );
+ assert( last <= 0xFFFF );
- header_bodysize_grow( header );
+ declaration_range.First = first;
+ declaration_range.Last = last;
+
+ declaration_grow( declaration, header );
+
+ return declaration_range;
+}
+
+static struct tgsi_declaration_dimension
+tgsi_build_declaration_dimension(unsigned index_2d,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header)
+{
+ struct tgsi_declaration_dimension dd;
+
+ assert(index_2d <= 0xFFFF);
+
+ dd.Index2D = index_2d;
+ dd.Padding = 0;
+
+ declaration_grow(declaration, header);
+
+ return dd;
+}
+
+static struct tgsi_declaration_semantic
+tgsi_default_declaration_semantic( void )
+{
+ struct tgsi_declaration_semantic ds;
+
+ ds.Name = TGSI_SEMANTIC_POSITION;
+ ds.Index = 0;
+ ds.Padding = 0;
+
+ return ds;
+}
+
+static struct tgsi_declaration_semantic
+tgsi_build_declaration_semantic(
+ unsigned semantic_name,
+ unsigned semantic_index,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header )
+{
+ struct tgsi_declaration_semantic ds;
+
+ assert( semantic_name <= TGSI_SEMANTIC_COUNT );
+ assert( semantic_index <= 0xFFFF );
+
+ ds.Name = semantic_name;
+ ds.Index = semantic_index;
+ ds.Padding = 0;
+
+ declaration_grow( declaration, header );
+
+ return ds;
}
struct tgsi_full_declaration
@@ -257,104 +327,11 @@ tgsi_build_full_declaration(
return size;
}
-struct tgsi_declaration_range
-tgsi_default_declaration_range( void )
-{
- struct tgsi_declaration_range dr;
-
- dr.First = 0;
- dr.Last = 0;
-
- return dr;
-}
-
-struct tgsi_declaration_range
-tgsi_build_declaration_range(
- unsigned first,
- unsigned last,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header )
-{
- struct tgsi_declaration_range declaration_range;
-
- assert( last >= first );
- assert( last <= 0xFFFF );
-
- declaration_range = tgsi_default_declaration_range();
- declaration_range.First = first;
- declaration_range.Last = last;
-
- declaration_grow( declaration, header );
-
- return declaration_range;
-}
-
-struct tgsi_declaration_dimension
-tgsi_default_declaration_dimension(void)
-{
- struct tgsi_declaration_dimension dd;
-
- dd.Index2D = 0;
- dd.Padding = 0;
-
- return dd;
-}
-
-struct tgsi_declaration_dimension
-tgsi_build_declaration_dimension(unsigned index_2d,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header)
-{
- struct tgsi_declaration_dimension dd;
-
- assert(index_2d <= 0xFFFF);
-
- dd = tgsi_default_declaration_dimension();
- dd.Index2D = index_2d;
-
- declaration_grow(declaration, header);
-
- return dd;
-}
-
-struct tgsi_declaration_semantic
-tgsi_default_declaration_semantic( void )
-{
- struct tgsi_declaration_semantic ds;
-
- ds.Name = TGSI_SEMANTIC_POSITION;
- ds.Index = 0;
- ds.Padding = 0;
-
- return ds;
-}
-
-struct tgsi_declaration_semantic
-tgsi_build_declaration_semantic(
- unsigned semantic_name,
- unsigned semantic_index,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header )
-{
- struct tgsi_declaration_semantic ds;
-
- assert( semantic_name <= TGSI_SEMANTIC_COUNT );
- assert( semantic_index <= 0xFFFF );
-
- ds = tgsi_default_declaration_semantic();
- ds.Name = semantic_name;
- ds.Index = semantic_index;
-
- declaration_grow( declaration, header );
-
- return ds;
-}
-
/*
* immediate
*/
-struct tgsi_immediate
+static struct tgsi_immediate
tgsi_default_immediate( void )
{
struct tgsi_immediate immediate;
@@ -367,7 +344,7 @@ tgsi_default_immediate( void )
return immediate;
}
-struct tgsi_immediate
+static struct tgsi_immediate
tgsi_build_immediate(
struct tgsi_header *header )
{
@@ -406,7 +383,7 @@ immediate_grow(
header_bodysize_grow( header );
}
-union tgsi_immediate_data
+static union tgsi_immediate_data
tgsi_build_immediate_float32(
float value,
struct tgsi_immediate *immediate,
@@ -480,7 +457,7 @@ tgsi_default_instruction( void )
return instruction;
}
-struct tgsi_instruction
+static struct tgsi_instruction
tgsi_build_instruction(unsigned opcode,
unsigned saturate,
unsigned predicate,
@@ -519,6 +496,266 @@ instruction_grow(
header_bodysize_grow( header );
}
+struct tgsi_instruction_predicate
+tgsi_default_instruction_predicate(void)
+{
+ struct tgsi_instruction_predicate instruction_predicate;
+
+ instruction_predicate.SwizzleX = TGSI_SWIZZLE_X;
+ instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y;
+ instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
+ instruction_predicate.SwizzleW = TGSI_SWIZZLE_W;
+ instruction_predicate.Negate = 0;
+ instruction_predicate.Index = 0;
+ instruction_predicate.Padding = 0;
+
+ return instruction_predicate;
+}
+
+static struct tgsi_instruction_predicate
+tgsi_build_instruction_predicate(int index,
+ unsigned negate,
+ unsigned swizzleX,
+ unsigned swizzleY,
+ unsigned swizzleZ,
+ unsigned swizzleW,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header)
+{
+ struct tgsi_instruction_predicate instruction_predicate;
+
+ instruction_predicate = tgsi_default_instruction_predicate();
+ instruction_predicate.SwizzleX = swizzleX;
+ instruction_predicate.SwizzleY = swizzleY;
+ instruction_predicate.SwizzleZ = swizzleZ;
+ instruction_predicate.SwizzleW = swizzleW;
+ instruction_predicate.Negate = negate;
+ instruction_predicate.Index = index;
+
+ instruction_grow(instruction, header);
+
+ return instruction_predicate;
+}
+
+static struct tgsi_instruction_label
+tgsi_default_instruction_label( void )
+{
+ struct tgsi_instruction_label instruction_label;
+
+ instruction_label.Label = 0;
+ instruction_label.Padding = 0;
+
+ return instruction_label;
+}
+
+static struct tgsi_instruction_label
+tgsi_build_instruction_label(
+ unsigned label,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_instruction_label instruction_label;
+
+ instruction_label.Label = label;
+ instruction_label.Padding = 0;
+ instruction->Label = 1;
+
+ instruction_grow( instruction, header );
+
+ return instruction_label;
+}
+
+static struct tgsi_instruction_texture
+tgsi_default_instruction_texture( void )
+{
+ struct tgsi_instruction_texture instruction_texture;
+
+ instruction_texture.Texture = TGSI_TEXTURE_UNKNOWN;
+ instruction_texture.Padding = 0;
+
+ return instruction_texture;
+}
+
+static struct tgsi_instruction_texture
+tgsi_build_instruction_texture(
+ unsigned texture,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_instruction_texture instruction_texture;
+
+ instruction_texture.Texture = texture;
+ instruction_texture.Padding = 0;
+ instruction->Texture = 1;
+
+ instruction_grow( instruction, header );
+
+ return instruction_texture;
+}
+
+static struct tgsi_src_register
+tgsi_default_src_register( void )
+{
+ struct tgsi_src_register src_register;
+
+ src_register.File = TGSI_FILE_NULL;
+ src_register.SwizzleX = TGSI_SWIZZLE_X;
+ src_register.SwizzleY = TGSI_SWIZZLE_Y;
+ src_register.SwizzleZ = TGSI_SWIZZLE_Z;
+ src_register.SwizzleW = TGSI_SWIZZLE_W;
+ src_register.Negate = 0;
+ src_register.Absolute = 0;
+ src_register.Indirect = 0;
+ src_register.Dimension = 0;
+ src_register.Index = 0;
+
+ return src_register;
+}
+
+static struct tgsi_src_register
+tgsi_build_src_register(
+ unsigned file,
+ unsigned swizzle_x,
+ unsigned swizzle_y,
+ unsigned swizzle_z,
+ unsigned swizzle_w,
+ unsigned negate,
+ unsigned absolute,
+ unsigned indirect,
+ unsigned dimension,
+ int index,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_src_register src_register;
+
+ assert( file < TGSI_FILE_COUNT );
+ assert( swizzle_x <= TGSI_SWIZZLE_W );
+ assert( swizzle_y <= TGSI_SWIZZLE_W );
+ assert( swizzle_z <= TGSI_SWIZZLE_W );
+ assert( swizzle_w <= TGSI_SWIZZLE_W );
+ assert( negate <= 1 );
+ assert( index >= -0x8000 && index <= 0x7FFF );
+
+ src_register.File = file;
+ src_register.SwizzleX = swizzle_x;
+ src_register.SwizzleY = swizzle_y;
+ src_register.SwizzleZ = swizzle_z;
+ src_register.SwizzleW = swizzle_w;
+ src_register.Negate = negate;
+ src_register.Absolute = absolute;
+ src_register.Indirect = indirect;
+ src_register.Dimension = dimension;
+ src_register.Index = index;
+
+ instruction_grow( instruction, header );
+
+ return src_register;
+}
+
+static struct tgsi_dimension
+tgsi_default_dimension( void )
+{
+ struct tgsi_dimension dimension;
+
+ dimension.Indirect = 0;
+ dimension.Dimension = 0;
+ dimension.Padding = 0;
+ dimension.Index = 0;
+
+ return dimension;
+}
+
+static struct tgsi_full_src_register
+tgsi_default_full_src_register( void )
+{
+ struct tgsi_full_src_register full_src_register;
+
+ full_src_register.Register = tgsi_default_src_register();
+ full_src_register.Indirect = tgsi_default_src_register();
+ full_src_register.Dimension = tgsi_default_dimension();
+ full_src_register.DimIndirect = tgsi_default_src_register();
+
+ return full_src_register;
+}
+
+static struct tgsi_dimension
+tgsi_build_dimension(
+ unsigned indirect,
+ unsigned index,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_dimension dimension;
+
+ dimension.Indirect = indirect;
+ dimension.Dimension = 0;
+ dimension.Padding = 0;
+ dimension.Index = index;
+
+ instruction_grow( instruction, header );
+
+ return dimension;
+}
+
+static struct tgsi_dst_register
+tgsi_default_dst_register( void )
+{
+ struct tgsi_dst_register dst_register;
+
+ dst_register.File = TGSI_FILE_NULL;
+ dst_register.WriteMask = TGSI_WRITEMASK_XYZW;
+ dst_register.Indirect = 0;
+ dst_register.Dimension = 0;
+ dst_register.Index = 0;
+ dst_register.Padding = 0;
+
+ return dst_register;
+}
+
+static struct tgsi_dst_register
+tgsi_build_dst_register(
+ unsigned file,
+ unsigned mask,
+ unsigned indirect,
+ unsigned dimension,
+ int index,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_dst_register dst_register;
+
+ assert( file < TGSI_FILE_COUNT );
+ assert( mask <= TGSI_WRITEMASK_XYZW );
+ assert( index >= -32768 && index <= 32767 );
+
+ dst_register.File = file;
+ dst_register.WriteMask = mask;
+ dst_register.Indirect = indirect;
+ dst_register.Dimension = dimension;
+ dst_register.Index = index;
+ dst_register.Padding = 0;
+
+ instruction_grow( instruction, header );
+
+ return dst_register;
+}
+
+static struct tgsi_full_dst_register
+tgsi_default_full_dst_register( void )
+{
+ struct tgsi_full_dst_register full_dst_register;
+
+ full_dst_register.Register = tgsi_default_dst_register();
+ full_dst_register.Indirect = tgsi_default_src_register();
+ full_dst_register.Dimension = tgsi_default_dimension();
+ full_dst_register.DimIndirect = tgsi_default_src_register();
+
+ return full_dst_register;
+}
+
struct tgsi_full_instruction
tgsi_default_full_instruction( void )
{
@@ -794,268 +1031,7 @@ tgsi_build_full_instruction(
return size;
}
-struct tgsi_instruction_predicate
-tgsi_default_instruction_predicate(void)
-{
- struct tgsi_instruction_predicate instruction_predicate;
-
- instruction_predicate.SwizzleX = TGSI_SWIZZLE_X;
- instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y;
- instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
- instruction_predicate.SwizzleW = TGSI_SWIZZLE_W;
- instruction_predicate.Negate = 0;
- instruction_predicate.Index = 0;
- instruction_predicate.Padding = 0;
-
- return instruction_predicate;
-}
-
-struct tgsi_instruction_predicate
-tgsi_build_instruction_predicate(int index,
- unsigned negate,
- unsigned swizzleX,
- unsigned swizzleY,
- unsigned swizzleZ,
- unsigned swizzleW,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header)
-{
- struct tgsi_instruction_predicate instruction_predicate;
-
- instruction_predicate = tgsi_default_instruction_predicate();
- instruction_predicate.SwizzleX = swizzleX;
- instruction_predicate.SwizzleY = swizzleY;
- instruction_predicate.SwizzleZ = swizzleZ;
- instruction_predicate.SwizzleW = swizzleW;
- instruction_predicate.Negate = negate;
- instruction_predicate.Index = index;
-
- instruction_grow(instruction, header);
-
- return instruction_predicate;
-}
-
-struct tgsi_instruction_label
-tgsi_default_instruction_label( void )
-{
- struct tgsi_instruction_label instruction_label;
-
- instruction_label.Label = 0;
- instruction_label.Padding = 0;
-
- return instruction_label;
-}
-
-struct tgsi_instruction_label
-tgsi_build_instruction_label(
- unsigned label,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_instruction_label instruction_label;
-
- instruction_label = tgsi_default_instruction_label();
- instruction_label.Label = label;
- instruction->Label = 1;
-
- instruction_grow( instruction, header );
-
- return instruction_label;
-}
-
-struct tgsi_instruction_texture
-tgsi_default_instruction_texture( void )
-{
- struct tgsi_instruction_texture instruction_texture;
-
- instruction_texture.Texture = TGSI_TEXTURE_UNKNOWN;
- instruction_texture.Padding = 0;
-
- return instruction_texture;
-}
-
-struct tgsi_instruction_texture
-tgsi_build_instruction_texture(
- unsigned texture,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_instruction_texture instruction_texture;
-
- instruction_texture = tgsi_default_instruction_texture();
- instruction_texture.Texture = texture;
- instruction->Texture = 1;
-
- instruction_grow( instruction, header );
-
- return instruction_texture;
-}
-
-struct tgsi_src_register
-tgsi_default_src_register( void )
-{
- struct tgsi_src_register src_register;
-
- src_register.File = TGSI_FILE_NULL;
- src_register.SwizzleX = TGSI_SWIZZLE_X;
- src_register.SwizzleY = TGSI_SWIZZLE_Y;
- src_register.SwizzleZ = TGSI_SWIZZLE_Z;
- src_register.SwizzleW = TGSI_SWIZZLE_W;
- src_register.Negate = 0;
- src_register.Absolute = 0;
- src_register.Indirect = 0;
- src_register.Dimension = 0;
- src_register.Index = 0;
-
- return src_register;
-}
-
-struct tgsi_src_register
-tgsi_build_src_register(
- unsigned file,
- unsigned swizzle_x,
- unsigned swizzle_y,
- unsigned swizzle_z,
- unsigned swizzle_w,
- unsigned negate,
- unsigned absolute,
- unsigned indirect,
- unsigned dimension,
- int index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_src_register src_register;
-
- assert( file < TGSI_FILE_COUNT );
- assert( swizzle_x <= TGSI_SWIZZLE_W );
- assert( swizzle_y <= TGSI_SWIZZLE_W );
- assert( swizzle_z <= TGSI_SWIZZLE_W );
- assert( swizzle_w <= TGSI_SWIZZLE_W );
- assert( negate <= 1 );
- assert( index >= -0x8000 && index <= 0x7FFF );
-
- src_register = tgsi_default_src_register();
- src_register.File = file;
- src_register.SwizzleX = swizzle_x;
- src_register.SwizzleY = swizzle_y;
- src_register.SwizzleZ = swizzle_z;
- src_register.SwizzleW = swizzle_w;
- src_register.Negate = negate;
- src_register.Absolute = absolute;
- src_register.Indirect = indirect;
- src_register.Dimension = dimension;
- src_register.Index = index;
-
- instruction_grow( instruction, header );
-
- return src_register;
-}
-
-struct tgsi_full_src_register
-tgsi_default_full_src_register( void )
-{
- struct tgsi_full_src_register full_src_register;
-
- full_src_register.Register = tgsi_default_src_register();
- full_src_register.Indirect = tgsi_default_src_register();
- full_src_register.Dimension = tgsi_default_dimension();
- full_src_register.DimIndirect = tgsi_default_src_register();
-
- return full_src_register;
-}
-
-
-struct tgsi_dimension
-tgsi_default_dimension( void )
-{
- struct tgsi_dimension dimension;
-
- dimension.Indirect = 0;
- dimension.Dimension = 0;
- dimension.Padding = 0;
- dimension.Index = 0;
-
- return dimension;
-}
-
-struct tgsi_dimension
-tgsi_build_dimension(
- unsigned indirect,
- unsigned index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_dimension dimension;
-
- dimension = tgsi_default_dimension();
- dimension.Indirect = indirect;
- dimension.Index = index;
-
- instruction_grow( instruction, header );
-
- return dimension;
-}
-
-struct tgsi_dst_register
-tgsi_default_dst_register( void )
-{
- struct tgsi_dst_register dst_register;
-
- dst_register.File = TGSI_FILE_NULL;
- dst_register.WriteMask = TGSI_WRITEMASK_XYZW;
- dst_register.Indirect = 0;
- dst_register.Dimension = 0;
- dst_register.Index = 0;
- dst_register.Padding = 0;
-
- return dst_register;
-}
-
-struct tgsi_dst_register
-tgsi_build_dst_register(
- unsigned file,
- unsigned mask,
- unsigned indirect,
- unsigned dimension,
- int index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_dst_register dst_register;
-
- assert( file < TGSI_FILE_COUNT );
- assert( mask <= TGSI_WRITEMASK_XYZW );
- assert( index >= -32768 && index <= 32767 );
-
- dst_register = tgsi_default_dst_register();
- dst_register.File = file;
- dst_register.WriteMask = mask;
- dst_register.Index = index;
- dst_register.Indirect = indirect;
- dst_register.Dimension = dimension;
-
- instruction_grow( instruction, header );
-
- return dst_register;
-}
-
-struct tgsi_full_dst_register
-tgsi_default_full_dst_register( void )
-{
- struct tgsi_full_dst_register full_dst_register;
-
- full_dst_register.Register = tgsi_default_dst_register();
- full_dst_register.Indirect = tgsi_default_src_register();
- full_dst_register.Dimension = tgsi_default_dimension();
- full_dst_register.DimIndirect = tgsi_default_src_register();
-
- return full_dst_register;
-}
-
-struct tgsi_property
+static struct tgsi_property
tgsi_default_property( void )
{
struct tgsi_property property;
@@ -1068,7 +1044,7 @@ tgsi_default_property( void )
return property;
}
-struct tgsi_property
+static struct tgsi_property
tgsi_build_property(unsigned property_name,
struct tgsi_header *header)
{
@@ -1107,7 +1083,7 @@ property_grow(
header_bodysize_grow( header );
}
-struct tgsi_property_data
+static struct tgsi_property_data
tgsi_build_property_data(
unsigned value,
struct tgsi_property *property,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 112107a0881..3f236a9c241 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -45,9 +45,6 @@ struct tgsi_header
tgsi_build_header( void );
struct tgsi_processor
-tgsi_default_processor( void );
-
-struct tgsi_processor
tgsi_build_processor(
unsigned processor,
struct tgsi_header *header );
@@ -56,21 +53,6 @@ tgsi_build_processor(
* declaration
*/
-struct tgsi_declaration
-tgsi_default_declaration( void );
-
-struct tgsi_declaration
-tgsi_build_declaration(
- unsigned file,
- unsigned usage_mask,
- unsigned interpolate,
- unsigned dimension,
- unsigned semantic,
- unsigned centroid,
- unsigned invariant,
- unsigned cylindrical_wrap,
- struct tgsi_header *header );
-
struct tgsi_full_declaration
tgsi_default_full_declaration( void );
@@ -81,54 +63,13 @@ tgsi_build_full_declaration(
struct tgsi_header *header,
unsigned maxsize );
-struct tgsi_declaration_range
-tgsi_default_declaration_range( void );
-
-struct tgsi_declaration_range
-tgsi_build_declaration_range(
- unsigned first,
- unsigned last,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header );
-
-struct tgsi_declaration_dimension
-tgsi_default_declaration_dimension(void);
-
-struct tgsi_declaration_dimension
-tgsi_build_declaration_dimension(unsigned index_2d,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header);
-
-struct tgsi_declaration_semantic
-tgsi_default_declaration_semantic( void );
-
-struct tgsi_declaration_semantic
-tgsi_build_declaration_semantic(
- unsigned semantic_name,
- unsigned semantic_index,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header );
-
/*
* immediate
*/
-struct tgsi_immediate
-tgsi_default_immediate( void );
-
-struct tgsi_immediate
-tgsi_build_immediate(
- struct tgsi_header *header );
-
struct tgsi_full_immediate
tgsi_default_full_immediate( void );
-union tgsi_immediate_data
-tgsi_build_immediate_float32(
- float value,
- struct tgsi_immediate *immediate,
- struct tgsi_header *header );
-
unsigned
tgsi_build_full_immediate(
const struct tgsi_full_immediate *full_imm,
@@ -140,23 +81,9 @@ tgsi_build_full_immediate(
* properties
*/
-struct tgsi_property
-tgsi_default_property( void );
-
-struct tgsi_property
-tgsi_build_property(
- unsigned property_name,
- struct tgsi_header *header );
-
struct tgsi_full_property
tgsi_default_full_property( void );
-struct tgsi_property_data
-tgsi_build_property_data(
- unsigned value,
- struct tgsi_property *property,
- struct tgsi_header *header );
-
unsigned
tgsi_build_full_property(
const struct tgsi_full_property *full_prop,
@@ -171,15 +98,6 @@ tgsi_build_full_property(
struct tgsi_instruction
tgsi_default_instruction( void );
-struct tgsi_instruction
-tgsi_build_instruction(
- unsigned opcode,
- unsigned saturate,
- unsigned predicate,
- unsigned num_dst_regs,
- unsigned num_src_regs,
- struct tgsi_header *header );
-
struct tgsi_full_instruction
tgsi_default_full_instruction( void );
@@ -193,85 +111,6 @@ tgsi_build_full_instruction(
struct tgsi_instruction_predicate
tgsi_default_instruction_predicate(void);
-struct tgsi_instruction_predicate
-tgsi_build_instruction_predicate(int index,
- unsigned negate,
- unsigned swizzleX,
- unsigned swizzleY,
- unsigned swizzleZ,
- unsigned swizzleW,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header);
-
-struct tgsi_instruction_label
-tgsi_default_instruction_label( void );
-
-struct tgsi_instruction_label
-tgsi_build_instruction_label(
- unsigned label,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_instruction_texture
-tgsi_default_instruction_texture( void );
-
-struct tgsi_instruction_texture
-tgsi_build_instruction_texture(
- unsigned texture,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_src_register
-tgsi_default_src_register( void );
-
-struct tgsi_src_register
-tgsi_build_src_register(
- unsigned file,
- unsigned swizzle_x,
- unsigned swizzle_y,
- unsigned swizzle_z,
- unsigned swizzle_w,
- unsigned negate,
- unsigned absolute,
- unsigned indirect,
- unsigned dimension,
- int index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_full_src_register
-tgsi_default_full_src_register( void );
-
-
-struct tgsi_dimension
-tgsi_default_dimension( void );
-
-struct tgsi_dimension
-tgsi_build_dimension(
- unsigned indirect,
- unsigned index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_dst_register
-tgsi_default_dst_register( void );
-
-struct tgsi_dst_register
-tgsi_build_dst_register(
- unsigned file,
- unsigned mask,
- unsigned indirect,
- unsigned dimension,
- int index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_full_dst_register
-tgsi_default_full_dst_register( void );
-
-
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 3a71540506d..7892a67f04c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -429,6 +429,24 @@ micro_sne(union tgsi_exec_channel *dst,
}
static void
+micro_sfl(union tgsi_exec_channel *dst)
+{
+ dst->f[0] = 0.0f;
+ dst->f[1] = 0.0f;
+ dst->f[2] = 0.0f;
+ dst->f[3] = 0.0f;
+}
+
+static void
+micro_str(union tgsi_exec_channel *dst)
+{
+ dst->f[0] = 1.0f;
+ dst->f[1] = 1.0f;
+ dst->f[2] = 1.0f;
+ dst->f[3] = 1.0f;
+}
+
+static void
micro_trunc(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
@@ -453,50 +471,12 @@ enum tgsi_exec_datatype {
/*
* Shorthand locations of various utility registers (_I = Index, _C = Channel)
*/
-#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
-#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
-#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
-#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
-#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
-#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
-#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
-#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
-#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
-#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
-#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
-#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
-#define TEMP_128_I TGSI_EXEC_TEMP_128_I
-#define TEMP_128_C TGSI_EXEC_TEMP_128_C
-#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
-#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
-#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
-#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
-#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
-#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
-#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
-#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
-#define TEMP_R0 TGSI_EXEC_TEMP_R0
-#define TEMP_P0 TGSI_EXEC_TEMP_P0
-
-#define IS_CHANNEL_ENABLED(INST, CHAN)\
- ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
-
-#define IS_CHANNEL_ENABLED2(INST, CHAN)\
- ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
-
-#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
- for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
- if (IS_CHANNEL_ENABLED( INST, CHAN ))
-
-#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
- for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
- if (IS_CHANNEL_ENABLED2( INST, CHAN ))
/** The execution mask depends on the conditional mask and the loop mask */
@@ -511,6 +491,14 @@ static const union tgsi_exec_channel OneVec = {
{1.0f, 1.0f, 1.0f, 1.0f}
};
+static const union tgsi_exec_channel P128Vec = {
+ {128.0f, 128.0f, 128.0f, 128.0f}
+};
+
+static const union tgsi_exec_channel M128Vec = {
+ {-128.0f, -128.0f, -128.0f, -128.0f}
+};
+
/**
* Assert that none of the float values in 'chan' are infinite or NaN.
@@ -572,8 +560,6 @@ tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
}
-
-
/**
* Check if there's a potential src/dst register data dependency when
* using SOA execution.
@@ -607,18 +593,20 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
inst->Dst[0].Register.File) &&
((inst->Src[i].Register.Index ==
inst->Dst[0].Register.Index) ||
- inst->Src[i].Register.Indirect ||
- inst->Dst[0].Register.Indirect)) {
+ inst->Src[i].Register.Indirect ||
+ inst->Dst[0].Register.Indirect)) {
/* loop over dest channels */
uint channelsWritten = 0x0;
- FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
- /* check if we're reading a channel that's been written */
- uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
- if (channelsWritten & (1 << swizzle)) {
- return TRUE;
- }
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ /* check if we're reading a channel that's been written */
+ uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
+ if (channelsWritten & (1 << swizzle)) {
+ return TRUE;
+ }
- channelsWritten |= (1 << chan);
+ channelsWritten |= (1 << chan);
+ }
}
}
}
@@ -813,18 +801,18 @@ tgsi_exec_machine_create( void )
mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
- /* Setup constants. */
+ /* Setup constants needed by the SSE2 executor. */
for( i = 0; i < 4; i++ ) {
- mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
- mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
- mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
- mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
- mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
- mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
- mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
- mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
- mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
- mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
+ mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
+ mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
+ mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
+ mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */
+ mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
+ mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */
+ mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
+ mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
+ mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
+ mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
}
#ifdef DEBUG
@@ -886,27 +874,35 @@ micro_div(
}
static void
-micro_float_clamp(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+micro_rcc(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
{
uint i;
for (i = 0; i < 4; i++) {
- if (src->f[i] > 0.0f) {
- if (src->f[i] > 1.884467e+019f)
+ float recip = 1.0f / src->f[i];
+
+ if (recip > 0.0f) {
+ if (recip > 1.884467e+019f) {
dst->f[i] = 1.884467e+019f;
- else if (src->f[i] < 5.42101e-020f)
+ }
+ else if (recip < 5.42101e-020f) {
dst->f[i] = 5.42101e-020f;
- else
- dst->f[i] = src->f[i];
+ }
+ else {
+ dst->f[i] = recip;
+ }
}
else {
- if (src->f[i] < -1.884467e+019f)
+ if (recip < -1.884467e+019f) {
dst->f[i] = -1.884467e+019f;
- else if (src->f[i] > -5.42101e-020f)
+ }
+ else if (recip > -5.42101e-020f) {
dst->f[i] = -5.42101e-020f;
- else
- dst->f[i] = src->f[i];
+ }
+ else {
+ dst->f[i] = recip;
+ }
}
}
}
@@ -958,60 +954,6 @@ micro_mul(union tgsi_exec_channel *dst,
dst->f[3] = src0->f[3] * src1->f[3];
}
-#if 0
-static void
-micro_imul64(
- union tgsi_exec_channel *dst0,
- union tgsi_exec_channel *dst1,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst1->i[0] = src0->i[0] * src1->i[0];
- dst1->i[1] = src0->i[1] * src1->i[1];
- dst1->i[2] = src0->i[2] * src1->i[2];
- dst1->i[3] = src0->i[3] * src1->i[3];
- dst0->i[0] = 0;
- dst0->i[1] = 0;
- dst0->i[2] = 0;
- dst0->i[3] = 0;
-}
-#endif
-
-#if 0
-static void
-micro_umul64(
- union tgsi_exec_channel *dst0,
- union tgsi_exec_channel *dst1,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst1->u[0] = src0->u[0] * src1->u[0];
- dst1->u[1] = src0->u[1] * src1->u[1];
- dst1->u[2] = src0->u[2] * src1->u[2];
- dst1->u[3] = src0->u[3] * src1->u[3];
- dst0->u[0] = 0;
- dst0->u[1] = 0;
- dst0->u[2] = 0;
- dst0->u[3] = 0;
-}
-#endif
-
-
-#if 0
-static void
-micro_movc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2 )
-{
- dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
- dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
- dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
- dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
-}
-#endif
-
static void
micro_neg(
union tgsi_exec_channel *dst,
@@ -1607,9 +1549,6 @@ store_dest(struct tgsi_exec_machine *mach,
#define FETCH(VAL,INDEX,CHAN)\
fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
-#define STORE(VAL,INDEX,CHAN)\
- store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
-
/**
* Execute ARB-style KIL which is predicated by a src register.
@@ -1753,7 +1692,7 @@ exec_tex(struct tgsi_exec_machine *mach,
union tgsi_exec_channel r[4];
const union tgsi_exec_channel *lod = &ZeroVec;
enum tgsi_sampler_control control;
- uint chan_index;
+ uint chan;
if (modifier != TEX_MODIFIER_NONE) {
FETCH(&r[3], 0, CHAN_W);
@@ -1825,8 +1764,10 @@ exec_tex(struct tgsi_exec_machine *mach,
assert(0);
}
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&r[chan_index], 0, chan_index);
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
}
}
@@ -1836,7 +1777,7 @@ exec_txd(struct tgsi_exec_machine *mach,
{
const uint unit = inst->Src[3].Register.Index;
union tgsi_exec_channel r[4];
- uint chan_index;
+ uint chan;
/*
* XXX: This is fake TXD -- the derivatives are not taken into account, yet.
@@ -1886,8 +1827,10 @@ exec_txd(struct tgsi_exec_machine *mach,
assert(0);
}
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&r[chan_index], 0, chan_index);
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
}
}
@@ -2021,6 +1964,26 @@ exec_declaration(struct tgsi_exec_machine *mach,
}
}
+typedef void (* micro_op)(union tgsi_exec_channel *dst);
+
+static void
+exec_vector(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_op op,
+ enum tgsi_exec_datatype dst_datatype)
+{
+ unsigned int chan;
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ union tgsi_exec_channel dst;
+
+ op(&dst);
+ store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src);
@@ -2074,6 +2037,27 @@ typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src1);
static void
+exec_scalar_binary(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_binary_op op,
+ enum tgsi_exec_datatype dst_datatype,
+ enum tgsi_exec_datatype src_datatype)
+{
+ unsigned int chan;
+ union tgsi_exec_channel src[2];
+ union tgsi_exec_channel dst;
+
+ fetch_source(mach, &src[0], &inst->Src[0], CHAN_X, src_datatype);
+ fetch_source(mach, &src[1], &inst->Src[1], CHAN_Y, src_datatype);
+ op(&dst, &src[0], &src[1]);
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
+ }
+ }
+}
+
+static void
exec_vector_binary(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
micro_binary_op op,
@@ -2320,6 +2304,289 @@ exec_nrm3(struct tgsi_exec_machine *mach,
}
static void
+exec_scs(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
+ union tgsi_exec_channel arg;
+ union tgsi_exec_channel result;
+
+ fetch_source(mach, &arg, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ micro_cos(&result, &arg);
+ store_dest(mach, &result, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_sin(&result, &arg);
+ store_dest(mach, &result, &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &ZeroVec, &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_x2d(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[4];
+ union tgsi_exec_channel d[2];
+
+ fetch_source(mach, &r[0], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) {
+ fetch_source(mach, &r[2], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[2], &r[2], &r[0]);
+ fetch_source(mach, &r[3], &inst->Src[2], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[3], &r[3], &r[1]);
+ micro_add(&r[2], &r[2], &r[3]);
+ fetch_source(mach, &r[3], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_add(&d[0], &r[2], &r[3]);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) {
+ fetch_source(mach, &r[2], &inst->Src[2], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[2], &r[2], &r[0]);
+ fetch_source(mach, &r[3], &inst->Src[2], CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[3], &r[3], &r[1]);
+ micro_add(&r[2], &r[2], &r[3]);
+ fetch_source(mach, &r[3], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_add(&d[1], &r[2], &r[3]);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_rfl(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[9];
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
+ /* r0 = dp3(src0, src0) */
+ fetch_source(mach, &r[2], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[0], &r[2], &r[2]);
+ fetch_source(mach, &r[4], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[8], &r[4], &r[4]);
+ micro_add(&r[0], &r[0], &r[8]);
+ fetch_source(mach, &r[6], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[8], &r[6], &r[6]);
+ micro_add(&r[0], &r[0], &r[8]);
+
+ /* r1 = dp3(src0, src1) */
+ fetch_source(mach, &r[3], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[1], &r[2], &r[3]);
+ fetch_source(mach, &r[5], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[8], &r[4], &r[5]);
+ micro_add(&r[1], &r[1], &r[8]);
+ fetch_source(mach, &r[7], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[8], &r[6], &r[7]);
+ micro_add(&r[1], &r[1], &r[8]);
+
+ /* r1 = 2 * r1 / r0 */
+ micro_add(&r[1], &r[1], &r[1]);
+ micro_div(&r[1], &r[1], &r[0]);
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ micro_mul(&r[2], &r[2], &r[1]);
+ micro_sub(&r[2], &r[2], &r[3]);
+ store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_mul(&r[4], &r[4], &r[1]);
+ micro_sub(&r[4], &r[4], &r[5]);
+ store_dest(mach, &r[4], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ micro_mul(&r[6], &r[6], &r[1]);
+ micro_sub(&r[6], &r[6], &r[7]);
+ store_dest(mach, &r[6], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_xpd(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[6];
+ union tgsi_exec_channel d[3];
+
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &r[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+
+ micro_mul(&r[2], &r[0], &r[1]);
+
+ fetch_source(mach, &r[3], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &r[4], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+
+ micro_mul(&r[5], &r[3], &r[4] );
+ micro_sub(&d[CHAN_X], &r[2], &r[5]);
+
+ fetch_source(mach, &r[2], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+
+ micro_mul(&r[3], &r[3], &r[2]);
+
+ fetch_source(mach, &r[5], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+
+ micro_mul(&r[1], &r[1], &r[5]);
+ micro_sub(&d[CHAN_Y], &r[3], &r[1]);
+
+ micro_mul(&r[5], &r[5], &r[4]);
+ micro_mul(&r[0], &r[0], &r[2]);
+ micro_sub(&d[CHAN_Z], &r[5], &r[0]);
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &d[CHAN_X], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_dst(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[2];
+ union tgsi_exec_channel d[4];
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&d[CHAN_Y], &r[0], &r[1]);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ fetch_source(mach, &d[CHAN_Z], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ fetch_source(mach, &d[CHAN_W], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &d[CHAN_W], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_log(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
+ micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
+ micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
+ micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
+ store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &r[1], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_exp(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
+ store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
+ store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
+ store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_lit(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+ union tgsi_exec_channel d[3];
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_max(&d[CHAN_Y], &r[0], &ZeroVec);
+ store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ fetch_source(mach, &r[1], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_max(&r[1], &r[1], &ZeroVec);
+
+ fetch_source(mach, &r[2], &inst->Src[0], CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ micro_min(&r[2], &r[2], &P128Vec);
+ micro_max(&r[2], &r[2], &M128Vec);
+ micro_pow(&r[1], &r[1], &r[2]);
+ micro_lt(&d[CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
+ store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
exec_break(struct tgsi_exec_machine *mach)
{
if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
@@ -2702,9 +2969,7 @@ exec_instruction(
const struct tgsi_full_instruction *inst,
int *pc )
{
- uint chan_index;
union tgsi_exec_channel r[10];
- union tgsi_exec_channel d[8];
(*pc)++;
@@ -2718,36 +2983,7 @@ exec_instruction(
break;
case TGSI_OPCODE_LIT:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_X );
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[1], 0, CHAN_Y );
- micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-
- FETCH( &r[2], 0, CHAN_W );
- micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
- micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
- micro_pow( &r[1], &r[1], &r[2] );
- micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- }
-
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- STORE(&d[CHAN_Y], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- STORE(&d[CHAN_Z], 0, CHAN_Z);
- }
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
+ exec_lit(mach, inst);
break;
case TGSI_OPCODE_RCP:
@@ -2759,44 +2995,11 @@ exec_instruction(
break;
case TGSI_OPCODE_EXP:
- FETCH( &r[0], 0, CHAN_X );
- micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
- STORE( &r[2], 0, CHAN_X ); /* store r2 */
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
- STORE( &r[2], 0, CHAN_Y ); /* store r2 */
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
- STORE( &r[2], 0, CHAN_Z ); /* store r2 */
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
+ exec_exp(mach, inst);
break;
case TGSI_OPCODE_LOG:
- FETCH( &r[0], 0, CHAN_X );
- micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
- micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
- micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &r[0], 0, CHAN_X );
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
- micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
- STORE( &r[0], 0, CHAN_Y );
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( &r[1], 0, CHAN_Z );
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
+ exec_log(mach, inst);
break;
case TGSI_OPCODE_MUL:
@@ -2816,30 +3019,7 @@ exec_instruction(
break;
case TGSI_OPCODE_DST:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- FETCH( &r[0], 0, CHAN_Y );
- FETCH( &r[1], 1, CHAN_Y);
- micro_mul(&d[CHAN_Y], &r[0], &r[1]);
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH(&d[CHAN_Z], 0, CHAN_Z);
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- FETCH(&d[CHAN_W], 1, CHAN_W);
- }
-
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- STORE(&d[CHAN_Y], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- STORE(&d[CHAN_Z], 0, CHAN_Z);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
- STORE(&d[CHAN_W], 0, CHAN_W);
- }
+ exec_dst(mach, inst);
break;
case TGSI_OPCODE_MIN:
@@ -2903,53 +3083,11 @@ exec_instruction(
break;
case TGSI_OPCODE_POW:
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- micro_pow( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_XPD:
- FETCH(&r[0], 0, CHAN_Y);
- FETCH(&r[1], 1, CHAN_Z);
-
- micro_mul( &r[2], &r[0], &r[1] );
-
- FETCH(&r[3], 0, CHAN_Z);
- FETCH(&r[4], 1, CHAN_Y);
-
- micro_mul( &r[5], &r[3], &r[4] );
- micro_sub(&d[CHAN_X], &r[2], &r[5]);
-
- FETCH(&r[2], 1, CHAN_X);
-
- micro_mul( &r[3], &r[3], &r[2] );
-
- FETCH(&r[5], 0, CHAN_X);
-
- micro_mul( &r[1], &r[1], &r[5] );
- micro_sub(&d[CHAN_Y], &r[3], &r[1]);
-
- micro_mul( &r[5], &r[5], &r[4] );
- micro_mul( &r[0], &r[0], &r[2] );
- micro_sub(&d[CHAN_Z], &r[5], &r[0]);
-
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- STORE(&d[CHAN_X], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- STORE(&d[CHAN_Y], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- STORE(&d[CHAN_Z], 0, CHAN_Z);
- }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
+ exec_xpd(mach, inst);
break;
case TGSI_OPCODE_ABS:
@@ -2957,12 +3095,7 @@ exec_instruction(
break;
case TGSI_OPCODE_RCC:
- FETCH(&r[0], 0, CHAN_X);
- micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
- micro_float_clamp(&r[0], &r[0]);
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&r[0], 0, chan_index);
- }
+ exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DPH:
@@ -3006,52 +3139,7 @@ exec_instruction(
break;
case TGSI_OPCODE_RFL:
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
- IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
- IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- /* r0 = dp3(src0, src0) */
- FETCH(&r[2], 0, CHAN_X);
- micro_mul(&r[0], &r[2], &r[2]);
- FETCH(&r[4], 0, CHAN_Y);
- micro_mul(&r[8], &r[4], &r[4]);
- micro_add(&r[0], &r[0], &r[8]);
- FETCH(&r[6], 0, CHAN_Z);
- micro_mul(&r[8], &r[6], &r[6]);
- micro_add(&r[0], &r[0], &r[8]);
-
- /* r1 = dp3(src0, src1) */
- FETCH(&r[3], 1, CHAN_X);
- micro_mul(&r[1], &r[2], &r[3]);
- FETCH(&r[5], 1, CHAN_Y);
- micro_mul(&r[8], &r[4], &r[5]);
- micro_add(&r[1], &r[1], &r[8]);
- FETCH(&r[7], 1, CHAN_Z);
- micro_mul(&r[8], &r[6], &r[7]);
- micro_add(&r[1], &r[1], &r[8]);
-
- /* r1 = 2 * r1 / r0 */
- micro_add(&r[1], &r[1], &r[1]);
- micro_div(&r[1], &r[1], &r[0]);
-
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- micro_mul(&r[2], &r[2], &r[1]);
- micro_sub(&r[2], &r[2], &r[3]);
- STORE(&r[2], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- micro_mul(&r[4], &r[4], &r[1]);
- micro_sub(&r[4], &r[4], &r[5]);
- STORE(&r[4], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- micro_mul(&r[6], &r[6], &r[1]);
- micro_sub(&r[6], &r[6], &r[7]);
- STORE(&r[6], 0, CHAN_Z);
- }
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
- STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
- }
+ exec_rfl(mach, inst);
break;
case TGSI_OPCODE_SEQ:
@@ -3059,9 +3147,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SFL:
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
- }
+ exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SGT:
@@ -3081,9 +3167,7 @@ exec_instruction(
break;
case TGSI_OPCODE_STR:
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
- }
+ exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_TEX:
@@ -3140,42 +3224,7 @@ exec_instruction(
break;
case TGSI_OPCODE_X2D:
- FETCH(&r[0], 1, CHAN_X);
- FETCH(&r[1], 1, CHAN_Y);
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
- IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- FETCH(&r[2], 2, CHAN_X);
- micro_mul(&r[2], &r[2], &r[0]);
- FETCH(&r[3], 2, CHAN_Y);
- micro_mul(&r[3], &r[3], &r[1]);
- micro_add(&r[2], &r[2], &r[3]);
- FETCH(&r[3], 0, CHAN_X);
- micro_add(&d[CHAN_X], &r[2], &r[3]);
-
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
- IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
- FETCH(&r[2], 2, CHAN_Z);
- micro_mul(&r[2], &r[2], &r[0]);
- FETCH(&r[3], 2, CHAN_W);
- micro_mul(&r[3], &r[3], &r[1]);
- micro_add(&r[2], &r[2], &r[3]);
- FETCH(&r[3], 0, CHAN_Y);
- micro_add(&d[CHAN_Y], &r[2], &r[3]);
-
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- STORE(&d[CHAN_X], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- STORE(&d[CHAN_Y], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- STORE(&d[CHAN_X], 0, CHAN_Z);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
- STORE(&d[CHAN_Y], 0, CHAN_W);
- }
+ exec_x2d(mach, inst);
break;
case TGSI_OPCODE_ARA:
@@ -3283,23 +3332,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SCS:
- if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( &r[0], 0, CHAN_X );
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- micro_cos(&r[1], &r[0]);
- STORE(&r[1], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- micro_sin(&r[1], &r[0]);
- STORE(&r[1], 0, CHAN_Y);
- }
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
+ exec_scs(mach, inst);
break;
case TGSI_OPCODE_NRM:
@@ -3684,14 +3717,6 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
mach->Primitives[0] = 0;
}
- for (i = 0; i < QUAD_SIZE; i++) {
- mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
- (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
- (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
- (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
- (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
- }
-
/* execute declarations (interpolants) */
for (i = 0; i < mach->NumDeclarations; i++) {
exec_declaration( mach, mach->Declarations+i );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 9d62c1d7e7e..bd5492e9497 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -131,34 +131,15 @@ struct tgsi_sampler
#define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2)
#define TGSI_EXEC_TEMP_PRIMITIVE_C 2
-/* NVIDIA condition code (CC) vector
- */
-#define TGSI_EXEC_CC_GT 0x01
-#define TGSI_EXEC_CC_EQ 0x02
-#define TGSI_EXEC_CC_LT 0x04
-#define TGSI_EXEC_CC_UN 0x08
-
-#define TGSI_EXEC_CC_X_MASK 0x000000ff
-#define TGSI_EXEC_CC_X_SHIFT 0
-#define TGSI_EXEC_CC_Y_MASK 0x0000ff00
-#define TGSI_EXEC_CC_Y_SHIFT 8
-#define TGSI_EXEC_CC_Z_MASK 0x00ff0000
-#define TGSI_EXEC_CC_Z_SHIFT 16
-#define TGSI_EXEC_CC_W_MASK 0xff000000
-#define TGSI_EXEC_CC_W_SHIFT 24
-
-#define TGSI_EXEC_TEMP_CC_I (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_CC_C 3
-
-#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_THREE_C 0
+#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2)
+#define TGSI_EXEC_TEMP_THREE_C 3
#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_HALF_C 1
+#define TGSI_EXEC_TEMP_HALF_C 0
/* execution mask, each value is either 0 or ~0 */
#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_MASK_C 2
+#define TGSI_EXEC_MASK_C 1
/* 4 register buffer for various purposes */
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)