diff options
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 242 |
1 files changed, 133 insertions, 109 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 4d56cac64df..a4c47b38f41 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -60,8 +61,39 @@ #include "util/u_memory.h" #include "util/u_math.h" + #define FAST_MATH 1 +static void +micro_iabs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; + dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; + dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; + dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; +} + +static void +micro_ineg(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_mov(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src->u[0]; + dst->u[1] = src->u[1]; + dst->u[2] = src->u[2]; + dst->u[3] = src->u[3]; +} + #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 #define TILE_BOTTOM_LEFT 2 @@ -72,6 +104,12 @@ #define CHAN_Z 2 #define CHAN_W 3 +enum tgsi_exec_datatype { + TGSI_EXEC_DATA_FLOAT, + TGSI_EXEC_DATA_INT, + TGSI_EXEC_DATA_UINT +}; + /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ @@ -130,16 +168,12 @@ static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; -#ifdef DEBUG -static void -check_inf_or_nan(const union tgsi_exec_channel *chan) -{ - assert(!util_is_inf_or_nan(chan->f[0])); - assert(!util_is_inf_or_nan(chan->f[1])); - assert(!util_is_inf_or_nan(chan->f[2])); - assert(!util_is_inf_or_nan(chan->f[3])); -} -#endif +#define CHECK_INF_OR_NAN(chan) do {\ + assert(!util_is_inf_or_nan((chan)->f[0]));\ + assert(!util_is_inf_or_nan((chan)->f[1]));\ + assert(!util_is_inf_or_nan((chan)->f[2]));\ + assert(!util_is_inf_or_nan((chan)->f[3]));\ + } while (0) #ifdef DEBUG @@ -952,11 +986,11 @@ fetch_src_file_channel( } static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) +fetch_source(const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index, + enum tgsi_exec_datatype src_datatype) { union tgsi_exec_channel index; uint swizzle; @@ -1113,32 +1147,30 @@ fetch_source( &index, chan ); - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); - break; - - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); - break; + if (reg->Register.Absolute) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_abs(chan, chan); + } else { + micro_iabs(chan, chan); + } + } - case TGSI_UTIL_SIGN_KEEP: - break; + if (reg->Register.Negate) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_neg(chan, chan); + } else { + micro_ineg(chan, chan); + } } } static void -store_dest( - struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) +store_dest(struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index, + enum tgsi_exec_datatype dst_datatype) { uint i; union tgsi_exec_channel null; @@ -1147,9 +1179,9 @@ store_dest( int offset = 0; /* indirection offset */ int index; -#ifdef DEBUG - check_inf_or_nan(chan); -#endif + if (dst_datatype == TGSI_EXEC_DATA_FLOAT) { + CHECK_INF_OR_NAN(chan); + } /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1314,10 +1346,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) + store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) /** @@ -1726,7 +1758,9 @@ typedef void (* micro_op)(union tgsi_exec_channel *dst, static void exec_vector_unary(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - micro_op op) + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) { unsigned int chan; struct tgsi_exec_vector dst; @@ -1735,13 +1769,13 @@ exec_vector_unary(struct tgsi_exec_machine *mach, if (inst->Dst[0].Register.WriteMask & (1 << chan)) { union tgsi_exec_channel src; - fetch_source(mach, &src, &inst->Src[0], chan); + fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); op(&dst.xyzw[chan], &src); } } for (chan = 0; chan < NUM_CHANNELS; chan++) { if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); } } } @@ -1749,7 +1783,9 @@ exec_vector_unary(struct tgsi_exec_machine *mach, static void exec_vector_binary(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - micro_op op) + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) { unsigned int chan; struct tgsi_exec_vector dst; @@ -1758,14 +1794,14 @@ exec_vector_binary(struct tgsi_exec_machine *mach, if (inst->Dst[0].Register.WriteMask & (1 << chan)) { union tgsi_exec_channel src[2]; - fetch_source(mach, &src[0], &inst->Src[0], chan); - fetch_source(mach, &src[1], &inst->Src[1], chan); + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); op(&dst.xyzw[chan], src); } } for (chan = 0; chan < NUM_CHANNELS; chan++) { if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); } } } @@ -1773,7 +1809,9 @@ exec_vector_binary(struct tgsi_exec_machine *mach, static void exec_vector_trinary(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - micro_op op) + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) { unsigned int chan; struct tgsi_exec_vector dst; @@ -1782,15 +1820,15 @@ exec_vector_trinary(struct tgsi_exec_machine *mach, if (inst->Dst[0].Register.WriteMask & (1 << chan)) { union tgsi_exec_channel src[3]; - fetch_source(mach, &src[0], &inst->Src[0], chan); - fetch_source(mach, &src[1], &inst->Src[1], chan); - fetch_source(mach, &src[2], &inst->Src[2], chan); + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); op(&dst.xyzw[chan], src); } } for (chan = 0; chan < NUM_CHANNELS; chan++) { if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); } } } @@ -1820,7 +1858,7 @@ exec_switch(struct tgsi_exec_machine *mach, assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; - fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X); + fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); mach->Switch.mask = 0x0; mach->Switch.defaultMask = 0x0; @@ -1838,7 +1876,7 @@ exec_case(struct tgsi_exec_machine *mach, union tgsi_exec_channel src; uint mask = 0; - fetch_source(mach, &src, &inst->Src[0], CHAN_X); + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); if (mach->Switch.selector.u[0] == src.u[0]) { mask |= 0x1; @@ -1980,16 +2018,6 @@ micro_imin(union tgsi_exec_channel *dst, } static void -micro_ineg(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} - -static void micro_isge(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -2184,12 +2212,7 @@ exec_instruction( break; case TGSI_OPCODE_MOV: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&d[chan_index], 0, chan_index); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_LIT: @@ -3204,11 +3227,11 @@ exec_instruction( break; case TGSI_OPCODE_I2F: - exec_vector_unary(mach, inst, micro_i2f); + exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_NOT: - exec_vector_unary(mach, inst, micro_not); + exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_TRUNC: @@ -3222,15 +3245,15 @@ exec_instruction( break; case TGSI_OPCODE_SHL: - exec_vector_binary(mach, inst, micro_shl); + exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_AND: - exec_vector_binary(mach, inst, micro_and); + exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_OR: - exec_vector_binary(mach, inst, micro_or); + exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_MOD: @@ -3238,7 +3261,7 @@ exec_instruction( break; case TGSI_OPCODE_XOR: - exec_vector_binary(mach, inst, micro_xor); + exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_SAD: @@ -3442,91 +3465,91 @@ exec_instruction( break; case TGSI_OPCODE_F2I: - exec_vector_unary(mach, inst, micro_f2i); + exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_IDIV: - exec_vector_binary(mach, inst, micro_idiv); + exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_IMAX: - exec_vector_binary(mach, inst, micro_imax); + exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_IMIN: - exec_vector_binary(mach, inst, micro_imin); + exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_INEG: - exec_vector_unary(mach, inst, micro_ineg); + exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_ISGE: - exec_vector_binary(mach, inst, micro_isge); + exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_ISHR: - exec_vector_binary(mach, inst, micro_ishr); + exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_ISLT: - exec_vector_binary(mach, inst, micro_islt); + exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_F2U: - exec_vector_unary(mach, inst, micro_f2u); + exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_U2F: - exec_vector_unary(mach, inst, micro_u2f); + exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_UADD: - exec_vector_binary(mach, inst, micro_uadd); + exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_UDIV: - exec_vector_binary(mach, inst, micro_udiv); + exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_UMAD: - exec_vector_trinary(mach, inst, micro_umad); + exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_UMAX: - exec_vector_binary(mach, inst, micro_umax); + exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_UMIN: - exec_vector_binary(mach, inst, micro_umin); + exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_UMOD: - exec_vector_binary(mach, inst, micro_umod); + exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_UMUL: - exec_vector_binary(mach, inst, micro_umul); + exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_USEQ: - exec_vector_binary(mach, inst, micro_useq); + exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_USGE: - exec_vector_binary(mach, inst, micro_usge); + exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_USHR: - exec_vector_binary(mach, inst, micro_ushr); + exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_USLT: - exec_vector_binary(mach, inst, micro_uslt); + exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_USNE: - exec_vector_binary(mach, inst, micro_usne); + exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_SWITCH: @@ -3550,6 +3573,7 @@ exec_instruction( } } + #define DEBUG_EXECUTION 0 @@ -3632,11 +3656,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("(%6f, %6f, %6f, %6f)\n", - temps[i].xyzw[0].f[j], - temps[i].xyzw[1].f[j], - temps[i].xyzw[2].f[j], - temps[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], + temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], + temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], + temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); } } } @@ -3650,11 +3674,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("{%6f, %6f, %6f, %6f}\n", - outputs[i].xyzw[0].f[j], - outputs[i].xyzw[1].f[j], - outputs[i].xyzw[2].f[j], - outputs[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } } } |