summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c342
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h11
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c15
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c64
5 files changed, 245 insertions, 190 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index f43233bdb49..2bcb33392a8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1509,7 +1509,8 @@ fetch_texel( struct tgsi_sampler *sampler,
const union tgsi_exec_channel *s,
const union tgsi_exec_channel *t,
const union tgsi_exec_channel *p,
- float lodbias, /* XXX should be float[4] */
+ const union tgsi_exec_channel *c0,
+ enum tgsi_sampler_control control,
union tgsi_exec_channel *r,
union tgsi_exec_channel *g,
union tgsi_exec_channel *b,
@@ -1518,7 +1519,7 @@ fetch_texel( struct tgsi_sampler *sampler,
uint j;
float rgba[NUM_CHANNELS][QUAD_SIZE];
- sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
+ sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba);
for (j = 0; j < 4; j++) {
r->f[j] = rgba[0][j];
@@ -1529,102 +1530,95 @@ fetch_texel( struct tgsi_sampler *sampler,
}
+#define TEX_MODIFIER_NONE 0
+#define TEX_MODIFIER_PROJECTED 1
+#define TEX_MODIFIER_LOD_BIAS 2
+#define TEX_MODIFIER_EXPLICIT_LOD 3
+
+
static void
exec_tex(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
- boolean biasLod,
- boolean projected)
+ uint modifier)
{
const uint unit = inst->Src[1].Register.Index;
union tgsi_exec_channel r[4];
+ const union tgsi_exec_channel *lod = &ZeroVec;
+ enum tgsi_sampler_control control;
uint chan_index;
- float lodBias;
- /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
+ if (modifier != TEX_MODIFIER_NONE) {
+ FETCH(&r[3], 0, CHAN_W);
+ if (modifier != TEX_MODIFIER_PROJECTED) {
+ lod = &r[3];
+ }
+ }
+
+ if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
+ control = tgsi_sampler_lod_explicit;
+ } else {
+ control = tgsi_sampler_lod_bias;
+ }
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
-
FETCH(&r[0], 0, CHAN_X);
- if (projected) {
- FETCH(&r[1], 0, CHAN_W);
- micro_div( &r[0], &r[0], &r[1] );
+ if (modifier == TEX_MODIFIER_PROJECTED) {
+ micro_div(&r[0], &r[0], &r[3]);
}
- if (biasLod) {
- FETCH(&r[1], 0, CHAN_W);
- lodBias = r[2].f[0];
- }
- else
- lodBias = 0.0;
-
fetch_texel(mach->Samplers[unit],
- &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
- &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
+ control,
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
-
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 0, CHAN_Y);
FETCH(&r[2], 0, CHAN_Z);
- if (projected) {
- FETCH(&r[3], 0, CHAN_W);
- micro_div( &r[0], &r[0], &r[3] );
- micro_div( &r[1], &r[1], &r[3] );
- micro_div( &r[2], &r[2], &r[3] );
- }
-
- if (biasLod) {
- FETCH(&r[3], 0, CHAN_W);
- lodBias = r[3].f[0];
+ if (modifier == TEX_MODIFIER_PROJECTED) {
+ micro_div(&r[0], &r[0], &r[3]);
+ micro_div(&r[1], &r[1], &r[3]);
+ micro_div(&r[2], &r[2], &r[3]);
}
- else
- lodBias = 0.0;
fetch_texel(mach->Samplers[unit],
- &r[0], &r[1], &r[2], lodBias, /* inputs */
+ &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
+ control,
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
-
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 0, CHAN_Y);
FETCH(&r[2], 0, CHAN_Z);
- if (projected) {
- FETCH(&r[3], 0, CHAN_W);
- micro_div( &r[0], &r[0], &r[3] );
- micro_div( &r[1], &r[1], &r[3] );
- micro_div( &r[2], &r[2], &r[3] );
- }
-
- if (biasLod) {
- FETCH(&r[3], 0, CHAN_W);
- lodBias = r[3].f[0];
+ if (modifier == TEX_MODIFIER_PROJECTED) {
+ micro_div(&r[0], &r[0], &r[3]);
+ micro_div(&r[1], &r[1], &r[3]);
+ micro_div(&r[2], &r[2], &r[3]);
}
- else
- lodBias = 0.0;
fetch_texel(mach->Samplers[unit],
- &r[0], &r[1], &r[2], lodBias,
+ &r[0], &r[1], &r[2], lod,
+ control,
&r[0], &r[1], &r[2], &r[3]);
break;
default:
- assert (0);
+ assert(0);
}
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[chan_index], 0, chan_index );
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&r[chan_index], 0, chan_index);
}
}
@@ -1647,8 +1641,9 @@ exec_txd(struct tgsi_exec_machine *mach,
FETCH(&r[0], 0, CHAN_X);
fetch_texel(mach->Samplers[unit],
- &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
- &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
+ tgsi_sampler_lod_bias,
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
case TGSI_TEXTURE_2D:
@@ -1661,8 +1656,9 @@ exec_txd(struct tgsi_exec_machine *mach,
FETCH(&r[2], 0, CHAN_Z);
fetch_texel(mach->Samplers[unit],
- &r[0], &r[1], &r[2], 0.0f, /* inputs */
- &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
+ tgsi_sampler_lod_bias,
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
case TGSI_TEXTURE_3D:
@@ -1673,7 +1669,8 @@ exec_txd(struct tgsi_exec_machine *mach,
FETCH(&r[2], 0, CHAN_Z);
fetch_texel(mach->Samplers[unit],
- &r[0], &r[1], &r[2], 0.0f,
+ &r[0], &r[1], &r[2], &ZeroVec,
+ tgsi_sampler_lod_bias,
&r[0], &r[1], &r[2], &r[3]);
break;
@@ -1918,6 +1915,130 @@ exec_vector_trinary(struct tgsi_exec_machine *mach,
}
static void
+exec_dp3(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned int chan;
+ union tgsi_exec_channel arg[3];
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&arg[2], &arg[0], &arg[1]);
+
+ for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
+ fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
+ micro_mad(&arg[2], arg);
+ }
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_dp4(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned int chan;
+ union tgsi_exec_channel arg[3];
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&arg[2], &arg[0], &arg[1]);
+
+ for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
+ fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
+ micro_mad(&arg[2], arg);
+ }
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_dp2a(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned int chan;
+ union tgsi_exec_channel arg[3];
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&arg[2], &arg[0], &arg[1]);
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mad(&arg[0], arg);
+
+ fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_add(&arg[0], &arg[0], &arg[1]);
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_dph(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned int chan;
+ union tgsi_exec_channel arg[3];
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&arg[2], &arg[0], &arg[1]);
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mad(&arg[2], arg);
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ micro_mad(&arg[0], arg);
+
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ micro_add(&arg[0], &arg[0], &arg[1]);
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_dp2(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned int chan;
+ union tgsi_exec_channel arg[3];
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&arg[2], &arg[0], &arg[1]);
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mad(&arg[2], arg);
+
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
exec_break(struct tgsi_exec_machine *mach)
{
if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
@@ -2397,54 +2518,11 @@ exec_instruction(
break;
case TGSI_OPCODE_DP3:
- /* TGSI_OPCODE_DOT3 */
- FETCH( &r[0], 0, CHAN_X );
- FETCH( &r[1], 1, CHAN_X );
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Y );
- FETCH( &r[2], 1, CHAN_Y );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Z );
- FETCH( &r[2], 1, CHAN_Z );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_dp3(mach, inst);
break;
- case TGSI_OPCODE_DP4:
- /* TGSI_OPCODE_DOT4 */
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 1, CHAN_Y);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Z);
- FETCH(&r[2], 1, CHAN_Z);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_W);
- FETCH(&r[2], 1, CHAN_W);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ case TGSI_OPCODE_DP4:
+ exec_dp4(mach, inst);
break;
case TGSI_OPCODE_DST:
@@ -2540,21 +2618,7 @@ exec_instruction(
break;
case TGSI_OPCODE_DP2A:
- FETCH( &r[0], 0, CHAN_X );
- FETCH( &r[1], 1, CHAN_X );
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Y );
- FETCH( &r[2], 1, CHAN_Y );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH( &r[2], 2, CHAN_X );
- micro_add( &r[0], &r[0], &r[2] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_dp2a(mach, inst);
break;
case TGSI_OPCODE_FRC:
@@ -2654,30 +2718,7 @@ exec_instruction(
break;
case TGSI_OPCODE_DPH:
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 1, CHAN_Y);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Z);
- FETCH(&r[2], 1, CHAN_Z);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 1, CHAN_W);
-
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_dph(mach, inst);
break;
case TGSI_OPCODE_COS:
@@ -2801,14 +2842,14 @@ exec_instruction(
/* simple texture lookup */
/* src[0] = texcoord */
/* src[1] = sampler unit */
- exec_tex(mach, inst, FALSE, FALSE);
+ exec_tex(mach, inst, TEX_MODIFIER_NONE);
break;
case TGSI_OPCODE_TXB:
/* Texture lookup with lod bias */
/* src[0] = texcoord (src[0].w = LOD bias) */
/* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE, FALSE);
+ exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS);
break;
case TGSI_OPCODE_TXD:
@@ -2824,14 +2865,14 @@ exec_instruction(
/* Texture lookup with explit LOD */
/* src[0] = texcoord (src[0].w = LOD) */
/* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE, FALSE);
+ exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
break;
case TGSI_OPCODE_TXP:
/* Texture lookup with projection */
/* src[0] = texcoord (src[0].w = projection) */
/* src[1] = sampler unit */
- exec_tex(mach, inst, FALSE, TRUE);
+ exec_tex(mach, inst, TEX_MODIFIER_PROJECTED);
break;
case TGSI_OPCODE_UP2H:
@@ -3091,18 +3132,7 @@ exec_instruction(
break;
case TGSI_OPCODE_DP2:
- FETCH( &r[0], 0, CHAN_X );
- FETCH( &r[1], 1, CHAN_X );
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Y );
- FETCH( &r[2], 1, CHAN_Y );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
+ exec_dp2(mach, inst);
break;
case TGSI_OPCODE_IF:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index aa3a98d7f18..59e3b445cc3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -2,6 +2,7 @@
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2009-2010 VMware, Inc. All rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -35,11 +36,13 @@
extern "C" {
#endif
+
#define MAX_LABELS (4 * 1024) /**< basically, max instructions */
#define NUM_CHANNELS 4 /* R,G,B,A */
#define QUAD_SIZE 4 /* 4 pixel/quad */
+
/**
* Registers may be treated as float, signed int or unsigned int.
*/
@@ -69,6 +72,11 @@ struct tgsi_interp_coef
float dady[NUM_CHANNELS];
};
+enum tgsi_sampler_control {
+ tgsi_sampler_lod_bias,
+ tgsi_sampler_lod_explicit
+};
+
/**
* Information for sampling textures, which must be implemented
* by code outside the TGSI executor.
@@ -80,7 +88,8 @@ struct tgsi_sampler
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE]);
};
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 138d2d095bb..ad553c71a57 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -51,7 +51,8 @@
* Since it's pretty much impossible to form PPC vector immediates, load
* them from memory here:
*/
-const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
+PIPE_ALIGN_VAR(16) const float
+ppc_builtin_constants[] = {
1.0f, -128.0f, 128.0, 0.0
};
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 9b0644465af..7f1c8e5dd68 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -265,13 +265,15 @@ check_register_usage(
}
else {
if (!is_register_declared( ctx, reg )) {
- if (reg->dimensions == 2)
+ if (reg->dimensions == 2) {
report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file],
reg->indices[0], reg->indices[1], name );
- else
+ }
+ else {
report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file],
reg->indices[0], name );
}
+ }
if (!is_register_used( ctx, reg ))
cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg);
else
@@ -333,15 +335,15 @@ iter_instruction(
fill_scan_register1d(ind_reg,
inst->Src[i].Indirect.File,
inst->Src[i].Indirect.Index);
+ if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) ||
+ reg->indices[0] != 0) {
+ report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]");
+ }
check_register_usage(
ctx,
reg,
"indirect",
FALSE );
- if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) ||
- reg->indices[0] != 0) {
- report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]");
- }
}
}
@@ -513,6 +515,7 @@ regs_hash_destroy(struct cso_hash *hash)
while (!cso_hash_iter_is_null(iter)) {
scan_register *reg = (scan_register *)cso_hash_iter_data(iter);
iter = cso_hash_erase(hash, iter);
+ assert(reg->file < TGSI_FILE_COUNT);
FREE(reg);
}
cso_hash_delete(hash);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 118059ace9c..a85cc4659e0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2,6 +2,7 @@
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2009-2010 VMware, Inc. All rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -1418,13 +1419,13 @@ fetch_texel( struct tgsi_sampler **sampler,
sampler, *sampler,
store );
- debug_printf("lodbias %f\n", store[12]);
-
for (j = 0; j < 4; j++)
- debug_printf("sample %d texcoord %f %f\n",
+ debug_printf("sample %d texcoord %f %f %f lodbias %f\n",
j,
store[0+j],
- store[4+j]);
+ store[4+j],
+ store[8 + j],
+ store[12 + j]);
#endif
{
@@ -1433,7 +1434,8 @@ fetch_texel( struct tgsi_sampler **sampler,
&store[0], /* s */
&store[4], /* t */
&store[8], /* r */
- store[12], /* lodbias */
+ &store[12], /* lodbias */
+ tgsi_sampler_lod_bias,
rgba); /* results */
memcpy( store, rgba, 16 * sizeof(float));
@@ -2144,40 +2146,50 @@ emit_instruction(
break;
case TGSI_OPCODE_XPD:
+ /* Note: we do all stores after all operands have been fetched
+ * to avoid src/dst register aliasing issues for an instruction
+ * such as: XPD TEMP[2].xyz, TEMP[0], TEMP[2];
+ */
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( func, *inst, 1, 1, CHAN_Z );
- FETCH( func, *inst, 3, 0, CHAN_Z );
+ FETCH( func, *inst, 1, 1, CHAN_Z ); /* xmm[1] = src[1].z */
+ FETCH( func, *inst, 3, 0, CHAN_Z ); /* xmm[3] = src[0].z */
}
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- FETCH( func, *inst, 0, 0, CHAN_Y );
- FETCH( func, *inst, 4, 1, CHAN_Y );
+ FETCH( func, *inst, 0, 0, CHAN_Y ); /* xmm[0] = src[0].y */
+ FETCH( func, *inst, 4, 1, CHAN_Y ); /* xmm[4] = src[1].y */
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- emit_MOV( func, 2, 0 );
- emit_mul( func, 2, 1 );
- emit_MOV( func, 5, 3 );
- emit_mul( func, 5, 4 );
- emit_sub( func, 2, 5 );
- STORE( func, *inst, 2, 0, CHAN_X );
+ emit_MOV( func, 7, 0 ); /* xmm[7] = xmm[0] */
+ emit_mul( func, 7, 1 ); /* xmm[7] = xmm[2] * xmm[1] */
+ emit_MOV( func, 5, 3 ); /* xmm[5] = xmm[3] */
+ emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */
+ emit_sub( func, 7, 5 ); /* xmm[7] = xmm[2] - xmm[5] */
+ /* store xmm[7] in dst.x below */
}
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- FETCH( func, *inst, 2, 1, CHAN_X );
- FETCH( func, *inst, 5, 0, CHAN_X );
+ FETCH( func, *inst, 2, 1, CHAN_X ); /* xmm[2] = src[1].x */
+ FETCH( func, *inst, 5, 0, CHAN_X ); /* xmm[5] = src[0].x */
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- emit_mul( func, 3, 2 );
- emit_mul( func, 1, 5 );
- emit_sub( func, 3, 1 );
- STORE( func, *inst, 3, 0, CHAN_Y );
+ emit_mul( func, 3, 2 ); /* xmm[3] = xmm[3] * xmm[2] */
+ emit_mul( func, 1, 5 ); /* xmm[1] = xmm[1] * xmm[5] */
+ emit_sub( func, 3, 1 ); /* xmm[3] = xmm[3] - xmm[1] */
+ /* store xmm[3] in dst.y below */
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- emit_mul( func, 5, 4 );
- emit_mul( func, 0, 2 );
- emit_sub( func, 5, 0 );
- STORE( func, *inst, 5, 0, CHAN_Z );
+ emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */
+ emit_mul( func, 0, 2 ); /* xmm[0] = xmm[0] * xmm[2] */
+ emit_sub( func, 5, 0 ); /* xmm[5] = xmm[5] - xmm[0] */
+ STORE( func, *inst, 5, 0, CHAN_Z ); /* dst.z = xmm[5] */
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+ STORE( func, *inst, 7, 0, CHAN_X ); /* dst.x = xmm[7] */
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+ STORE( func, *inst, 3, 0, CHAN_Y ); /* dst.y = xmm[3] */
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
emit_tempf(
@@ -2506,7 +2518,7 @@ emit_instruction(
break;
case TGSI_OPCODE_TXL:
- emit_tex( func, inst, TRUE, FALSE );
+ return 0;
break;
case TGSI_OPCODE_TXP: