summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMichal Krol <[email protected]>2010-02-18 11:26:18 +0100
committerMichal Krol <[email protected]>2010-02-19 19:02:26 +0100
commitd68f024b7dd1891d4939bf56d3065acc225b9c81 (patch)
tree636af796c363fd707c3b49385e2ee666fa8f5239 /src
parent298be2b028263b2c343a707662c6fbfa18293cb2 (diff)
tgsi: Rewrite exec implementations of NRM and NRM4.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c131
1 files changed, 70 insertions, 61 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 262422364bf..14035d4b2d3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -461,6 +461,10 @@ enum tgsi_exec_datatype {
static const union tgsi_exec_channel ZeroVec =
{ { 0.0, 0.0, 0.0, 0.0 } };
+static const union tgsi_exec_channel OneVec = {
+ {1.0f, 1.0f, 1.0f, 1.0f}
+};
+
/**
* Assert that none of the float values in 'chan' are infinite or NaN.
@@ -2030,6 +2034,70 @@ exec_dp2(struct tgsi_exec_machine *mach,
}
static void
+exec_nrm4(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned int chan;
+ union tgsi_exec_channel arg[4];
+ union tgsi_exec_channel scale;
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&scale, &arg[0], &arg[0]);
+
+ for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
+ union tgsi_exec_channel product;
+
+ fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&product, &arg[chan], &arg[chan]);
+ micro_add(&scale, &scale, &product);
+ }
+
+ micro_rsq(&scale, &scale);
+
+ for (chan = CHAN_X; chan <= CHAN_W; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ micro_mul(&arg[chan], &arg[chan], &scale);
+ store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_nrm3(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
+ unsigned int chan;
+ union tgsi_exec_channel arg[3];
+ union tgsi_exec_channel scale;
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&scale, &arg[0], &arg[0]);
+
+ for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
+ union tgsi_exec_channel product;
+
+ fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&product, &arg[chan], &arg[chan]);
+ micro_add(&scale, &scale, &product);
+ }
+
+ micro_rsq(&scale, &scale);
+
+ for (chan = CHAN_X; chan <= CHAN_Z; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ micro_mul(&arg[chan], &arg[chan], &scale);
+ store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+ }
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
exec_break(struct tgsi_exec_machine *mach)
{
if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
@@ -3052,70 +3120,11 @@ exec_instruction(
break;
case TGSI_OPCODE_NRM:
- /* 3-component vector normalize */
- if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
- IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
- IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- /* r3 = sqrt(dp3(src0, src0)) */
- FETCH(&r[0], 0, CHAN_X);
- micro_mul(&r[3], &r[0], &r[0]);
- FETCH(&r[1], 0, CHAN_Y);
- micro_mul(&r[4], &r[1], &r[1]);
- micro_add(&r[3], &r[3], &r[4]);
- FETCH(&r[2], 0, CHAN_Z);
- micro_mul(&r[4], &r[2], &r[2]);
- micro_add(&r[3], &r[3], &r[4]);
- micro_sqrt(&r[3], &r[3]);
-
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- micro_div(&r[0], &r[0], &r[3]);
- STORE(&r[0], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- micro_div(&r[1], &r[1], &r[3]);
- STORE(&r[1], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- micro_div(&r[2], &r[2], &r[3]);
- STORE(&r[2], 0, CHAN_Z);
- }
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
- STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
- }
+ exec_nrm3(mach, inst);
break;
case TGSI_OPCODE_NRM4:
- /* 4-component vector normalize */
- {
- union tgsi_exec_channel tmp, dot;
-
- /* tmp = dp4(src0, src0): */
- FETCH( &r[0], 0, CHAN_X );
- micro_mul( &tmp, &r[0], &r[0] );
-
- FETCH( &r[1], 0, CHAN_Y );
- micro_mul( &dot, &r[1], &r[1] );
- micro_add( &tmp, &tmp, &dot );
-
- FETCH( &r[2], 0, CHAN_Z );
- micro_mul( &dot, &r[2], &r[2] );
- micro_add( &tmp, &tmp, &dot );
-
- FETCH( &r[3], 0, CHAN_W );
- micro_mul( &dot, &r[3], &r[3] );
- micro_add( &tmp, &tmp, &dot );
-
- /* tmp = 1 / sqrt(tmp) */
- micro_sqrt( &tmp, &tmp );
- micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- /* chan = chan * tmp */
- micro_mul( &r[chan_index], &tmp, &r[chan_index] );
- STORE( &r[chan_index], 0, chan_index );
- }
- }
+ exec_nrm4(mach, inst);
break;
case TGSI_OPCODE_DIV: