diff options
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_dump.c | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 1826 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 34 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.c | 30 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 23 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_parse.c | 22 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sanity.c | 16 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.c | 144 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.h | 94 |
10 files changed, 1329 insertions, 872 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 2c65ff16d81..e2e5394f86f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -128,7 +128,9 @@ static const char *semantic_names[] = static const char *immediate_type_names[] = { - "FLT32" + "FLT32", + "UINT32", + "INT32" }; static const char *swizzle_names[] = @@ -412,6 +414,12 @@ iter_immediate( case TGSI_IMM_FLOAT32: FLT( imm->u[i].Float ); break; + case TGSI_IMM_UINT32: + UID(imm->u[i].Uint); + break; + case TGSI_IMM_INT32: + SID(imm->u[i].Int); + break; default: assert( 0 ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index ba89f2fbc3c..f43233bdb49 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -60,6 +61,7 @@ #include "util/u_memory.h" #include "util/u_math.h" + #define FAST_MATH 1 #define TILE_TOP_LEFT 0 @@ -67,11 +69,329 @@ #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +static void +micro_abs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = fabsf(src->f[0]); + dst->f[1] = fabsf(src->f[1]); + dst->f[2] = fabsf(src->f[2]); + dst->f[3] = fabsf(src->f[3]); +} + +static void +micro_arl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0]); + dst->i[1] = (int)floorf(src->f[1]); + dst->i[2] = (int)floorf(src->f[2]); + dst->i[3] = (int)floorf(src->f[3]); +} + +static void +micro_arr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0] + 0.5f); + dst->i[1] = (int)floorf(src->f[1] + 0.5f); + dst->i[2] = (int)floorf(src->f[2] + 0.5f); + dst->i[3] = (int)floorf(src->f[3] + 0.5f); +} + +static void +micro_ceil(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = ceilf(src->f[0]); + dst->f[1] = ceilf(src->f[1]); + dst->f[2] = ceilf(src->f[2]); + dst->f[3] = ceilf(src->f[3]); +} + +static void +micro_cos(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = cosf(src->f[0]); + dst->f[1] = cosf(src->f[1]); + dst->f[2] = cosf(src->f[2]); + dst->f[3] = cosf(src->f[3]); +} + +static void +micro_ddx(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; +} + +static void +micro_exp2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_exp2(src->f[0]); + dst->f[1] = util_fast_exp2(src->f[1]); + dst->f[2] = util_fast_exp2(src->f[2]); + dst->f[3] = util_fast_exp2(src->f[3]); +#else +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif /* DEBUG */ + + dst->f[0] = powf(2.0f, src->f[0]); + dst->f[1] = powf(2.0f, src->f[1]); + dst->f[2] = powf(2.0f, src->f[2]); + dst->f[3] = powf(2.0f, src->f[3]); +#endif /* FAST_MATH */ +} + +static void +micro_flr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0]); + dst->f[1] = floorf(src->f[1]); + dst->f[2] = floorf(src->f[2]); + dst->f[3] = floorf(src->f[3]); +} + +static void +micro_frc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] - floorf(src->f[0]); + dst->f[1] = src->f[1] - floorf(src->f[1]); + dst->f[2] = src->f[2] - floorf(src->f[2]); + dst->f[3] = src->f[3] - floorf(src->f[3]); +} + +static void +micro_iabs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; + dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; + dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; + dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; +} + +static void +micro_ineg(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_lg2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_log2(src->f[0]); + dst->f[1] = util_fast_log2(src->f[1]); + dst->f[2] = util_fast_log2(src->f[2]); + dst->f[3] = util_fast_log2(src->f[3]); +#else + dst->f[0] = logf(src->f[0]) * 1.442695f; + dst->f[1] = logf(src->f[1]) * 1.442695f; + dst->f[2] = logf(src->f[2]) * 1.442695f; + dst->f[3] = logf(src->f[3]) * 1.442695f; +#endif +} + +static void +micro_lrp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0]; + dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1]; + dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2]; + dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3]; +} + +static void +micro_mad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0]; + dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1]; + dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2]; + dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3]; +} + +static void +micro_mov(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src->u[0]; + dst->u[1] = src->u[1]; + dst->u[2] = src->u[2]; + dst->u[3] = src->u[3]; +} + +static void +micro_rcp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / src->f[0]; + dst->f[1] = 1.0f / src->f[1]; + dst->f[2] = 1.0f / src->f[2]; + dst->f[3] = 1.0f / src->f[3]; +} + +static void +micro_rnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0] + 0.5f); + dst->f[1] = floorf(src->f[1] + 0.5f); + dst->f[2] = floorf(src->f[2] + 0.5f); + dst->f[3] = floorf(src->f[3] + 0.5f); +} + +static void +micro_rsq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); + dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); + dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); + dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); +} + +static void +micro_seq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sgn(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; + dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; + dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; + dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; +} + +static void +micro_sgt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = sinf(src->f[0]); + dst->f[1] = sinf(src->f[1]); + dst->f[2] = sinf(src->f[2]); + dst->f[3] = sinf(src->f[3]); +} + +static void +micro_sle(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_slt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_trunc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)(int)src->f[0]; + dst->f[1] = (float)(int)src->f[1]; + dst->f[2] = (float)(int)src->f[2]; + dst->f[3] = (float)(int)src->f[3]; +} + + #define CHAN_X 0 #define CHAN_Y 1 #define CHAN_Z 2 #define CHAN_W 3 +enum tgsi_exec_datatype { + TGSI_EXEC_DATA_FLOAT, + TGSI_EXEC_DATA_INT, + TGSI_EXEC_DATA_UINT +}; + /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ @@ -123,23 +443,19 @@ /** The execution mask depends on the conditional mask and the loop mask */ #define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; -#ifdef DEBUG -static void -check_inf_or_nan(const union tgsi_exec_channel *chan) -{ - assert(!util_is_inf_or_nan(chan->f[0])); - assert(!util_is_inf_or_nan(chan->f[1])); - assert(!util_is_inf_or_nan(chan->f[2])); - assert(!util_is_inf_or_nan(chan->f[3])); -} -#endif +#define CHECK_INF_OR_NAN(chan) do {\ + assert(!util_is_inf_or_nan((chan)->f[0]));\ + assert(!util_is_inf_or_nan((chan)->f[1]));\ + assert(!util_is_inf_or_nan((chan)->f[2]));\ + assert(!util_is_inf_or_nan((chan)->f[3]));\ + } while (0) #ifdef DEBUG @@ -422,18 +738,6 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) align_free(mach); } - -static void -micro_abs( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = fabsf( src->f[0] ); - dst->f[1] = fabsf( src->f[1] ); - dst->f[2] = fabsf( src->f[2] ); - dst->f[3] = fabsf( src->f[3] ); -} - static void micro_add( union tgsi_exec_channel *dst, @@ -446,76 +750,6 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } -#if 0 -static void -micro_iadd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] + src1->i[0]; - dst->i[1] = src0->i[1] + src1->i[1]; - dst->i[2] = src0->i[2] + src1->i[2]; - dst->i[3] = src0->i[3] + src1->i[3]; -} -#endif - -static void -micro_and( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] & src1->u[0]; - dst->u[1] = src0->u[1] & src1->u[1]; - dst->u[2] = src0->u[2] & src1->u[2]; - dst->u[3] = src0->u[3] & src1->u[3]; -} - -static void -micro_ceil( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = ceilf( src->f[0] ); - dst->f[1] = ceilf( src->f[1] ); - dst->f[2] = ceilf( src->f[2] ); - dst->f[3] = ceilf( src->f[3] ); -} - -static void -micro_cos( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = cosf( src->f[0] ); - dst->f[1] = cosf( src->f[1] ); - dst->f[2] = cosf( src->f[2] ); - dst->f[3] = cosf( src->f[3] ); -} - -static void -micro_ddx( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_ddy( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; -} - static void micro_div( union tgsi_exec_channel *dst, @@ -536,99 +770,6 @@ micro_div( } } -#if 0 -static void -micro_udiv( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] / src1->u[0]; - dst->u[1] = src0->u[1] / src1->u[1]; - dst->u[2] = src0->u[2] / src1->u[2]; - dst->u[3] = src0->u[3] / src1->u[3]; -} -#endif - -static void -micro_eq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; -} - -#if 0 -static void -micro_ieq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -static void -micro_exp2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ -#if FAST_MATH - dst->f[0] = util_fast_exp2( src->f[0] ); - dst->f[1] = util_fast_exp2( src->f[1] ); - dst->f[2] = util_fast_exp2( src->f[2] ); - dst->f[3] = util_fast_exp2( src->f[3] ); -#else - -#if DEBUG - /* Inf is okay for this instruction, so clamp it to silence assertions. */ - uint i; - union tgsi_exec_channel clamped; - - for (i = 0; i < 4; i++) { - if (src->f[i] > 127.99999f) { - clamped.f[i] = 127.99999f; - } else if (src->f[i] < -126.99999f) { - clamped.f[i] = -126.99999f; - } else { - clamped.f[i] = src->f[i]; - } - } - src = &clamped; -#endif - - dst->f[0] = powf( 2.0f, src->f[0] ); - dst->f[1] = powf( 2.0f, src->f[1] ); - dst->f[2] = powf( 2.0f, src->f[2] ); - dst->f[3] = powf( 2.0f, src->f[3] ); -#endif -} - -#if 0 -static void -micro_f2ut( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = (uint) src->f[0]; - dst->u[1] = (uint) src->f[1]; - dst->u[2] = (uint) src->f[2]; - dst->u[3] = (uint) src->f[3]; -} -#endif - static void micro_float_clamp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -656,71 +797,6 @@ micro_float_clamp(union tgsi_exec_channel *dst, } static void -micro_flr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] ); - dst->f[1] = floorf( src->f[1] ); - dst->f[2] = floorf( src->f[2] ); - dst->f[3] = floorf( src->f[3] ); -} - -static void -micro_frc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] - floorf( src->f[0] ); - dst->f[1] = src->f[1] - floorf( src->f[1] ); - dst->f[2] = src->f[2] - floorf( src->f[2] ); - dst->f[3] = src->f[3] - floorf( src->f[3] ); -} - -static void -micro_i2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->i[0]; - dst->f[1] = (float) src->i[1]; - dst->f[2] = (float) src->i[2]; - dst->f[3] = (float) src->i[3]; -} - -static void -micro_lg2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ -#if FAST_MATH - dst->f[0] = util_fast_log2( src->f[0] ); - dst->f[1] = util_fast_log2( src->f[1] ); - dst->f[2] = util_fast_log2( src->f[2] ); - dst->f[3] = util_fast_log2( src->f[3] ); -#else - dst->f[0] = logf( src->f[0] ) * 1.442695f; - dst->f[1] = logf( src->f[1] ) * 1.442695f; - dst->f[2] = logf( src->f[2] ) * 1.442695f; - dst->f[3] = logf( src->f[3] ) * 1.442695f; -#endif -} - -static void -micro_le( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void micro_lt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, @@ -734,38 +810,6 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } -#if 0 -static void -micro_ilt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -#if 0 -static void -micro_ult( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; -} -#endif - static void micro_max( union tgsi_exec_channel *dst, @@ -778,34 +822,6 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - static void micro_min( union tgsi_exec_channel *dst, @@ -818,48 +834,6 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - -#if 0 -static void -micro_umod( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] % src1->u[0]; - dst->u[1] = src0->u[1] % src1->u[1]; - dst->u[2] = src0->u[2] % src1->u[2]; - dst->u[3] = src0->u[3] % src1->u[3]; -} -#endif - static void micro_mul( union tgsi_exec_channel *dst, @@ -874,20 +848,6 @@ micro_mul( #if 0 static void -micro_imul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] * src1->i[0]; - dst->i[1] = src0->i[1] * src1->i[1]; - dst->i[2] = src0->i[2] * src1->i[2]; - dst->i[3] = src0->i[3] * src1->i[3]; -} -#endif - -#if 0 -static void micro_imul64( union tgsi_exec_channel *dst0, union tgsi_exec_channel *dst1, @@ -951,42 +911,6 @@ micro_neg( dst->f[3] = -src->f[3]; } -#if 0 -static void -micro_ineg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} -#endif - -static void -micro_not( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = ~src->u[0]; - dst->u[1] = ~src->u[1]; - dst->u[2] = ~src->u[2]; - dst->u[3] = ~src->u[3]; -} - -static void -micro_or( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] | src1->u[0]; - dst->u[1] = src0->u[1] | src1->u[1]; - dst->u[2] = src0->u[2] | src1->u[2]; - dst->u[3] = src0->u[3] | src1->u[3]; -} - static void micro_pow( union tgsi_exec_channel *dst, @@ -1007,88 +931,6 @@ micro_pow( } static void -micro_rnd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] + 0.5f ); - dst->f[1] = floorf( src->f[1] + 0.5f ); - dst->f[2] = floorf( src->f[2] + 0.5f ); - dst->f[3] = floorf( src->f[3] + 0.5f ); -} - -static void -micro_sgn( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; - dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; - dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; - dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; -} - -static void -micro_shl( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] << src1->i[0]; - dst->i[1] = src0->i[1] << src1->i[1]; - dst->i[2] = src0->i[2] << src1->i[2]; - dst->i[3] = src0->i[3] << src1->i[3]; -} - -static void -micro_ishr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] >> src1->i[0]; - dst->i[1] = src0->i[1] >> src1->i[1]; - dst->i[2] = src0->i[2] >> src1->i[2]; - dst->i[3] = src0->i[3] >> src1->i[3]; -} - -static void -micro_trunc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0 ) -{ - dst->f[0] = (float) (int) src0->f[0]; - dst->f[1] = (float) (int) src0->f[1]; - dst->f[2] = (float) (int) src0->f[2]; - dst->f[3] = (float) (int) src0->f[3]; -} - -#if 0 -static void -micro_ushr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] >> src1->u[0]; - dst->u[1] = src0->u[1] >> src1->u[1]; - dst->u[2] = src0->u[2] >> src1->u[2]; - dst->u[3] = src0->u[3] >> src1->u[3]; -} -#endif - -static void -micro_sin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sinf( src->f[0] ); - dst->f[1] = sinf( src->f[1] ); - dst->f[2] = sinf( src->f[2] ); - dst->f[3] = sinf( src->f[3] ); -} - -static void micro_sqrt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { @@ -1110,31 +952,6 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } -#if 0 -static void -micro_u2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->u[0]; - dst->f[1] = (float) src->u[1]; - dst->f[2] = (float) src->u[2]; - dst->f[3] = (float) src->u[3]; -} -#endif - -static void -micro_xor( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] ^ src1->u[0]; - dst->u[1] = src0->u[1] ^ src1->u[1]; - dst->u[2] = src0->u[2] ^ src1->u[2]; - dst->u[3] = src0->u[3] ^ src1->u[3]; -} - static void fetch_src_file_channel( const struct tgsi_exec_machine *mach, @@ -1233,11 +1050,11 @@ fetch_src_file_channel( } static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) +fetch_source(const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index, + enum tgsi_exec_datatype src_datatype) { union tgsi_exec_channel index; uint swizzle; @@ -1286,10 +1103,10 @@ fetch_source( &indir_index ); /* add value of address register to the offset */ - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1366,10 +1183,10 @@ fetch_source( &index2, &indir_index ); - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1394,32 +1211,30 @@ fetch_source( &index, chan ); - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); - break; - - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); - break; + if (reg->Register.Absolute) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_abs(chan, chan); + } else { + micro_iabs(chan, chan); + } + } - case TGSI_UTIL_SIGN_KEEP: - break; + if (reg->Register.Negate) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_neg(chan, chan); + } else { + micro_ineg(chan, chan); + } } } static void -store_dest( - struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) +store_dest(struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index, + enum tgsi_exec_datatype dst_datatype) { uint i; union tgsi_exec_channel null; @@ -1428,9 +1243,9 @@ store_dest( int offset = 0; /* indirection offset */ int index; -#ifdef DEBUG - check_inf_or_nan(chan); -#endif + if (dst_datatype == TGSI_EXEC_DATA_FLOAT) { + CHECK_INF_OR_NAN(chan); + } /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1465,7 +1280,7 @@ store_dest( &indir_index ); /* save indirection offset */ - offset = (int) indir_index.f[0]; + offset = indir_index.i[0]; } switch (reg->Register.File) { @@ -1595,10 +1410,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) + store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) /** @@ -1955,7 +1770,7 @@ exec_declaration(struct tgsi_exec_machine *mach, if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { assert(decl->Semantic.Index == 0); assert(first == last); - assert(mask = TGSI_WRITEMASK_XYZW); + assert(mask == TGSI_WRITEMASK_XYZW); mach->Inputs[first] = mach->QuadPos; } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { @@ -2001,6 +1816,461 @@ exec_declaration(struct tgsi_exec_machine *mach, } } +typedef void (* micro_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src); + +static void +exec_scalar_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + union tgsi_exec_channel src; + union tgsi_exec_channel dst; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); + op(&dst, &src); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src; + + fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); + op(&dst.xyzw[chan], &src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_binary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[2]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_trinary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[3]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_break(struct tgsi_exec_machine *mach) +{ + if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + } else { + assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); + + mach->Switch.mask = 0x0; + + UPDATE_EXEC_MASK(mach); + } +} + +static void +exec_switch(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + mach->Switch.mask = 0x0; + mach->Switch.defaultMask = 0x0; + + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_case(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + union tgsi_exec_channel src; + uint mask = 0; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + + if (mach->Switch.selector.u[0] == src.u[0]) { + mask |= 0x1; + } + if (mach->Switch.selector.u[1] == src.u[1]) { + mask |= 0x2; + } + if (mach->Switch.selector.u[2] == src.u[2]) { + mask |= 0x4; + } + if (mach->Switch.selector.u[3] == src.u[3]) { + mask |= 0x8; + } + + mach->Switch.defaultMask |= mask; + + mach->Switch.mask |= mask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_default(struct tgsi_exec_machine *mach) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + + mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_endswitch(struct tgsi_exec_machine *mach) +{ + mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; + + UPDATE_EXEC_MASK(mach); +} + +static void +micro_i2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->i[0]; + dst->f[1] = (float)src->i[1]; + dst->f[2] = (float)src->i[2]; + dst->f[3] = (float)src->i[3]; +} + +static void +micro_not(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_shl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] << src[1].u[0]; + dst->u[1] = src[0].u[1] << src[1].u[1]; + dst->u[2] = src[0].u[2] << src[1].u[2]; + dst->u[3] = src[0].u[3] << src[1].u[3]; +} + +static void +micro_and(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] & src[1].u[0]; + dst->u[1] = src[0].u[1] & src[1].u[1]; + dst->u[2] = src[0].u[2] & src[1].u[2]; + dst->u[3] = src[0].u[3] & src[1].u[3]; +} + +static void +micro_or(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] | src[1].u[0]; + dst->u[1] = src[0].u[1] | src[1].u[1]; + dst->u[2] = src[0].u[2] | src[1].u[2]; + dst->u[3] = src[0].u[3] | src[1].u[3]; +} + +static void +micro_xor(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] ^ src[1].u[0]; + dst->u[1] = src[0].u[1] ^ src[1].u[1]; + dst->u[2] = src[0].u[2] ^ src[1].u[2]; + dst->u[3] = src[0].u[3] ^ src[1].u[3]; +} + +static void +micro_f2i(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)src->f[0]; + dst->i[1] = (int)src->f[1]; + dst->i[2] = (int)src->f[2]; + dst->i[3] = (int)src->f[3]; +} + +static void +micro_idiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] / src[1].i[0]; + dst->i[1] = src[0].i[1] / src[1].i[1]; + dst->i[2] = src[0].i[2] / src[1].i[2]; + dst->i[3] = src[0].i[3] / src[1].i[3]; +} + +static void +micro_imax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_imin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_isge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0; +} + +static void +micro_ishr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >> src[1].i[0]; + dst->i[1] = src[0].i[1] >> src[1].i[1]; + dst->i[2] = src[0].i[2] >> src[1].i[2]; + dst->i[3] = src[0].i[3] >> src[1].i[3]; +} + +static void +micro_islt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0; +} + +static void +micro_f2u(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = (uint)src->f[0]; + dst->u[1] = (uint)src->f[1]; + dst->u[2] = (uint)src->f[2]; + dst->u[3] = (uint)src->f[3]; +} + +static void +micro_u2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->u[0]; + dst->f[1] = (float)src->u[1]; + dst->f[2] = (float)src->u[2]; + dst->f[3] = (float)src->u[3]; +} + +static void +micro_uadd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] + src[1].u[0]; + dst->u[1] = src[0].u[1] + src[1].u[1]; + dst->u[2] = src[0].u[2] + src[1].u[2]; + dst->u[3] = src[0].u[3] + src[1].u[3]; +} + +static void +micro_udiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] / src[1].u[0]; + dst->u[1] = src[0].u[1] / src[1].u[1]; + dst->u[2] = src[0].u[2] / src[1].u[2]; + dst->u[3] = src[0].u[3] / src[1].u[3]; +} + +static void +micro_umad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3]; +} + +static void +micro_umax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] % src[1].u[0]; + dst->u[1] = src[0].u[1] % src[1].u[1]; + dst->u[2] = src[0].u[2] % src[1].u[2]; + dst->u[3] = src[0].u[3] % src[1].u[3]; +} + +static void +micro_umul(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3]; +} + +static void +micro_useq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0; +} + +static void +micro_usge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0; +} + +static void +micro_ushr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >> src[1].u[0]; + dst->u[1] = src[0].u[1] >> src[1].u[1]; + dst->u[2] = src[0].u[2] >> src[1].u[2]; + dst->u[3] = src[0].u[3] >> src[1].u[3]; +} + +static void +micro_uslt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0; +} + +static void +micro_usne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0; +} + static void exec_instruction( struct tgsi_exec_machine *mach, @@ -2015,23 +2285,11 @@ exec_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - case TGSI_OPCODE_FLR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MOV: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&d[chan_index], 0, chan_index); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LIT: @@ -2068,23 +2326,11 @@ exec_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[0], &r[0] ); - micro_sqrt( &r[0], &r[0] ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EXP: @@ -2255,41 +2501,15 @@ exec_instruction( break; case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1] ); - FETCH( &r[1], 2, chan_index ); - micro_add(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SUB: @@ -2304,17 +2524,7 @@ exec_instruction( break; case TGSI_OPCODE_LRP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); - micro_mul( &r[0], &r[0], &r[1] ); - micro_add(&d[chan_index], &r[0], &r[2]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CND: @@ -2348,13 +2558,7 @@ exec_instruction( break; case TGSI_OPCODE_FRC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_frc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CLAMP: @@ -2370,33 +2574,20 @@ exec_instruction( } break; + case TGSI_OPCODE_FLR: + exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_ARR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_rnd(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EX2: - FETCH(&r[0], 0, CHAN_X); - - micro_exp2( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LG2: - FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_POW: @@ -2449,15 +2640,9 @@ exec_instruction( } break; - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - micro_abs(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; + case TGSI_OPCODE_ABS: + exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; case TGSI_OPCODE_RCC: FETCH(&r[0], 0, CHAN_X); @@ -2496,33 +2681,15 @@ exec_instruction( break; case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - micro_cos( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddx(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddy(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_KILP: @@ -2599,14 +2766,7 @@ exec_instruction( break; case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SFL: @@ -2616,44 +2776,19 @@ exec_instruction( break; case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - micro_sin( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_STR: @@ -2758,6 +2893,10 @@ exec_instruction( assert (0); break; + case TGSI_OPCODE_ARR: + exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_BRA: assert (0); break; @@ -2777,6 +2916,8 @@ exec_instruction( mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; + mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; /* note that PC was already incremented above */ mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; @@ -2784,12 +2925,17 @@ exec_instruction( /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ @@ -2822,6 +2968,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -2832,14 +2984,7 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_sgn(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CMP: @@ -3023,87 +3168,31 @@ exec_instruction( break; case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ceil(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_i2f(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_not(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_trunc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_shl(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ishr(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_and(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_or(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_MOD: @@ -3111,14 +3200,7 @@ exec_instruction( break; case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_xor(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_SAD: @@ -3167,11 +3249,15 @@ exec_instruction( case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; case TGSI_OPCODE_ENDFOR: @@ -3218,6 +3304,8 @@ exec_instruction( --mach->LoopLabelStackTop; assert(mach->LoopCounterStackTop > 0); --mach->LoopCounterStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; @@ -3241,15 +3329,14 @@ exec_instruction( mach->ContMask = mach->ContStack[--mach->ContStackTop]; assert(mach->LoopLabelStackTop > 0); --mach->LoopLabelStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); + exec_break(mach); break; case TGSI_OPCODE_CONT: @@ -3280,6 +3367,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -3310,11 +3403,116 @@ exec_instruction( UPDATE_EXEC_MASK(mach); break; + case TGSI_OPCODE_F2I: + exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_IDIV: + exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMAX: + exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMIN: + exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_INEG: + exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISGE: + exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISHR: + exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISLT: + exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_F2U: + exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U2F: + exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UADD: + exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UDIV: + exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAD: + exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAX: + exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMIN: + exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMOD: + exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMUL: + exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USEQ: + exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USGE: + exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USHR: + exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USLT: + exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USNE: + exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_SWITCH: + exec_switch(mach, inst); + break; + + case TGSI_OPCODE_CASE: + exec_case(mach, inst); + break; + + case TGSI_OPCODE_DEFAULT: + exec_default(mach); + break; + + case TGSI_OPCODE_ENDSWITCH: + exec_endswitch(mach); + break; + default: assert( 0 ); } } + #define DEBUG_EXECUTION 0 @@ -3334,9 +3532,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; + mach->Switch.mask = 0xf; + assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; @@ -3393,11 +3595,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("(%6f, %6f, %6f, %6f)\n", - temps[i].xyzw[0].f[j], - temps[i].xyzw[1].f[j], - temps[i].xyzw[2].f[j], - temps[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], + temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], + temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], + temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); } } } @@ -3411,11 +3613,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("{%6f, %6f, %6f, %6f}\n", - outputs[i].xyzw[0].f[j], - outputs[i].xyzw[1].f[j], - outputs[i].xyzw[2].f[j], - outputs[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } } } @@ -3437,6 +3639,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index afaf5c39c48..aa3a98d7f18 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -179,6 +179,7 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_COND_NESTING 32 #define TGSI_EXEC_MAX_LOOP_NESTING 32 +#define TGSI_EXEC_MAX_SWITCH_NESTING 32 #define TGSI_EXEC_MAX_CALL_NESTING 32 /* The maximum number of input attributes per vertex. For 2D @@ -206,9 +207,29 @@ struct tgsi_call_record uint CondStackTop; uint LoopStackTop; uint ContStackTop; + int SwitchStackTop; + int BreakStackTop; uint ReturnAddr; }; + +/* Switch-case block state. */ +struct tgsi_switch_record { + uint mask; /**< execution mask */ + union tgsi_exec_channel selector; /**< a value case statements are compared to */ + uint defaultMask; /**< non-execute mask for default case */ +}; + + +enum tgsi_break_type { + TGSI_EXEC_BREAK_INSIDE_LOOP, + TGSI_EXEC_BREAK_INSIDE_SWITCH +}; + + +#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING) + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -251,6 +272,12 @@ struct tgsi_exec_machine uint FuncMask; /**< For function calls */ uint ExecMask; /**< = CondMask & LoopMask */ + /* Current switch-case state. */ + struct tgsi_switch_record Switch; + + /* Current break type. */ + enum tgsi_break_type BreakType; + /** Condition mask stack (for nested conditionals) */ uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; int CondStackTop; @@ -271,6 +298,13 @@ struct tgsi_exec_machine uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; + /** Switch case stack */ + struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING]; + int SwitchStackTop; + + enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK]; + int BreakStackTop; + /** Function execution mask stack (for executing subroutine code) */ uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; int FuncStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index be375cabb8b..de0e09cdbae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -119,7 +119,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 0, 0, 0, 0, 0, 0, "", 88 }, /* removed */ { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, @@ -149,7 +149,33 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */ + { 0, 0, 0, 0, 0, 0, "", 118 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "F2I", TGSI_OPCODE_F2I }, + { 1, 2, 0, 0, 0, 0, "IDIV", TGSI_OPCODE_IDIV }, + { 1, 2, 0, 0, 0, 0, "IMAX", TGSI_OPCODE_IMAX }, + { 1, 2, 0, 0, 0, 0, "IMIN", TGSI_OPCODE_IMIN }, + { 1, 1, 0, 0, 0, 0, "INEG", TGSI_OPCODE_INEG }, + { 1, 2, 0, 0, 0, 0, "ISGE", TGSI_OPCODE_ISGE }, + { 1, 2, 0, 0, 0, 0, "ISHR", TGSI_OPCODE_ISHR }, + { 1, 2, 0, 0, 0, 0, "ISLT", TGSI_OPCODE_ISLT }, + { 1, 1, 0, 0, 0, 0, "F2U", TGSI_OPCODE_F2U }, + { 1, 1, 0, 0, 0, 0, "U2F", TGSI_OPCODE_U2F }, + { 1, 2, 0, 0, 0, 0, "UADD", TGSI_OPCODE_UADD }, + { 1, 2, 0, 0, 0, 0, "UDIV", TGSI_OPCODE_UDIV }, + { 1, 3, 0, 0, 0, 0, "UMAD", TGSI_OPCODE_UMAD }, + { 1, 2, 0, 0, 0, 0, "UMAX", TGSI_OPCODE_UMAX }, + { 1, 2, 0, 0, 0, 0, "UMIN", TGSI_OPCODE_UMIN }, + { 1, 2, 0, 0, 0, 0, "UMOD", TGSI_OPCODE_UMOD }, + { 1, 2, 0, 0, 0, 0, "UMUL", TGSI_OPCODE_UMUL }, + { 1, 2, 0, 0, 0, 0, "USEQ", TGSI_OPCODE_USEQ }, + { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE }, + { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR }, + { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT }, + { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE }, + { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH }, + { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE }, + { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT }, + { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b34263da489..e4af15c156f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -124,7 +124,6 @@ OP11(I2F) OP11(NOT) OP11(TRUNC) OP12(SHL) -OP12(SHR) OP12(AND) OP12(OR) OP12(MOD) @@ -146,6 +145,28 @@ OP01(IFC) OP01(BREAKC) OP01(KIL) OP00(END) +OP11(F2I) +OP12(IDIV) +OP12(IMAX) +OP12(IMIN) +OP11(INEG) +OP12(ISGE) +OP12(ISHR) +OP12(ISLT) +OP11(F2U) +OP11(U2F) +OP12(UADD) +OP12(UDIV) +OP13(UMAD) +OP12(UMAX) +OP12(UMIN) +OP12(UMOD) +OP12(UMUL) +OP12(USEQ) +OP12(USGE) +OP12(USHR) +OP12(USLT) +OP12(USNE) #undef OP00 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index fa65ecb9975..8c7062d850c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -119,17 +119,29 @@ tgsi_parse_token( case TGSI_TOKEN_TYPE_IMMEDIATE: { struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + uint imm_count; memset(imm, 0, sizeof *imm); copy_token(&imm->Immediate, &token); + imm_count = imm->Immediate.NrTokens - 1; + switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: - { - uint imm_count = imm->Immediate.NrTokens - 1; - for (i = 0; i < imm_count; i++) { - next_token(ctx, &imm->u[i]); - } + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Float); + } + break; + + case TGSI_IMM_UINT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Uint); + } + break; + + case TGSI_IMM_INT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Int); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 16b8ec60518..9b0644465af 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -195,7 +195,7 @@ is_any_register_declared( struct cso_hash_iter iter = cso_hash_first_node(ctx->regs_decl); - while (cso_hash_iter_is_null(iter)) { + while (!cso_hash_iter_is_null(iter)) { scan_register *reg = (scan_register *)cso_hash_iter_data(iter); if (reg->file == file) return TRUE; @@ -247,7 +247,7 @@ check_register_usage( boolean indirect_access ) { if (!check_file_name( ctx, reg->file )) { - free(reg); + FREE(reg); return FALSE; } @@ -261,7 +261,7 @@ check_register_usage( if (!is_ind_register_used(ctx, reg)) cso_hash_insert(ctx->regs_ind_used, reg->file, reg); else - free(reg); + FREE(reg); } else { if (!is_register_declared( ctx, reg )) { @@ -275,7 +275,7 @@ check_register_usage( if (!is_register_used( ctx, reg )) cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg); else - free(reg); + FREE(reg); } return TRUE; } @@ -445,7 +445,9 @@ iter_immediate( /* Check data type validity. */ - if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32 && + imm->Immediate.DataType != TGSI_IMM_UINT32 && + imm->Immediate.DataType != TGSI_IMM_INT32) { report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType ); return TRUE; } @@ -486,7 +488,7 @@ epilog( struct cso_hash_iter iter = cso_hash_first_node(ctx->regs_decl); - while (cso_hash_iter_is_null(iter)) { + while (!cso_hash_iter_is_null(iter)) { scan_register *reg = (scan_register *)cso_hash_iter_data(iter); if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) { report_warning( ctx, "%s[%u]: Register never used", @@ -511,7 +513,7 @@ regs_hash_destroy(struct cso_hash *hash) while (!cso_hash_iter_is_null(iter)) { scan_register *reg = (scan_register *)cso_hash_iter_data(iter); iter = cso_hash_erase(hash, iter); - free(reg); + FREE(reg); } cso_hash_delete(hash); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index d63c75dafb3..118059ace9c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2578,7 +2578,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: return 0; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 6a0af664dd4..e64e2b731df 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -101,8 +101,13 @@ struct ureg_program unsigned nr_outputs; struct { - float v[4]; + union { + float f[4]; + unsigned u[4]; + int i[4]; + } value; unsigned nr; + unsigned type; } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; @@ -486,22 +491,22 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, } - - -static int match_or_expand_immediate( const float *v, - unsigned nr, - float *v2, - unsigned *nr2, - unsigned *swizzle ) +static int +match_or_expand_immediate( const unsigned *v, + unsigned nr, + unsigned *v2, + unsigned *pnr2, + unsigned *swizzle ) { + unsigned nr2 = *pnr2; unsigned i, j; - + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; - for (j = 0; j < *nr2 && !found; j++) { + for (j = 0; j < nr2 && !found; j++) { if (v[i] == v2[j]) { *swizzle |= j << (i * 2); found = TRUE; @@ -509,24 +514,28 @@ static int match_or_expand_immediate( const float *v, } if (!found) { - if (*nr2 >= 4) + if (nr2 >= 4) { return FALSE; + } - v2[*nr2] = v[i]; - *swizzle |= *nr2 << (i * 2); - (*nr2)++; + v2[nr2] = v[i]; + *swizzle |= nr2 << (i * 2); + nr2++; } } + /* Actually expand immediate only when fully succeeded. + */ + *pnr2 = nr2; return TRUE; } - - -struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, - const float *v, - unsigned nr ) +static struct ureg_src +decl_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned nr, + unsigned type ) { unsigned i, j; unsigned swizzle; @@ -536,38 +545,82 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, */ for (i = 0; i < ureg->nr_immediates; i++) { - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + if (ureg->immediate[i].type != type) { + continue; + } + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { i = ureg->nr_immediates++; - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + ureg->immediate[i].type = type; + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } - set_bad( ureg ); + set_bad(ureg); out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - for (j = nr; j < 4; j++) + for (j = nr; j < 4; j++) { swizzle |= (swizzle & 0x3) << (j * 2); + } + + return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), + (swizzle >> 0) & 0x3, + (swizzle >> 2) & 0x3, + (swizzle >> 4) & 0x3, + (swizzle >> 6) & 0x3); +} + + +struct ureg_src +ureg_DECL_immediate( struct ureg_program *ureg, + const float *v, + unsigned nr ) +{ + union { + float f[4]; + unsigned u[4]; + } fu; + unsigned int i; + + for (i = 0; i < nr; i++) { + fu.f[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); +} + - return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), - (swizzle >> 0) & 0x3, - (swizzle >> 2) & 0x3, - (swizzle >> 4) & 0x3, - (swizzle >> 6) & 0x3); +struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); +} + + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *ureg, + const int *v, + unsigned nr ) +{ + return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } @@ -955,21 +1008,23 @@ static void emit_decl_range( struct ureg_program *ureg, out[1].decl_range.Last = first + count - 1; } -static void emit_immediate( struct ureg_program *ureg, - const float *v ) +static void +emit_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned type ) { union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); out[0].value = 0; out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; out[0].imm.NrTokens = 5; - out[0].imm.DataType = TGSI_IMM_FLOAT32; + out[0].imm.DataType = type; out[0].imm.Padding = 0; - out[1].imm_data.Float = v[0]; - out[2].imm_data.Float = v[1]; - out[3].imm_data.Float = v[2]; - out[4].imm_data.Float = v[3]; + out[1].imm_data.Uint = v[0]; + out[2].imm_data.Uint = v[1]; + out[3].imm_data.Uint = v[2]; + out[4].imm_data.Uint = v[3]; } @@ -1055,7 +1110,8 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, - ureg->immediate[i].v ); + ureg->immediate[i].value.u, + ureg->immediate[i].type ); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 7e3e7bcf1d3..6f11273320a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -148,6 +148,16 @@ ureg_DECL_immediate( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *, + const int *v, + unsigned nr ); + +struct ureg_src ureg_DECL_constant( struct ureg_program *, unsigned index ); @@ -221,6 +231,90 @@ ureg_imm1f( struct ureg_program *ureg, return ureg_DECL_immediate( ureg, v, 1 ); } +static INLINE struct ureg_src +ureg_imm4u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c, unsigned d) +{ + unsigned v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_uint( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c) +{ + unsigned v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_uint( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2u( struct ureg_program *ureg, + unsigned a, unsigned b) +{ + unsigned v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_uint( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1u( struct ureg_program *ureg, + unsigned a) +{ + return ureg_DECL_immediate_uint( ureg, &a, 1 ); +} + +static INLINE struct ureg_src +ureg_imm4i( struct ureg_program *ureg, + int a, int b, + int c, int d) +{ + int v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_int( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3i( struct ureg_program *ureg, + int a, int b, + int c) +{ + int v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_int( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2i( struct ureg_program *ureg, + int a, int b) +{ + int v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_int( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1i( struct ureg_program *ureg, + int a) +{ + return ureg_DECL_immediate_int( ureg, &a, 1 ); +} + /*********************************************************************** * Functions for patching up labels */ |