diff options
author | Michal Krol <[email protected]> | 2010-01-05 11:04:50 +0100 |
---|---|---|
committer | Michal Krol <[email protected]> | 2010-01-05 11:04:50 +0100 |
commit | 9b21b3c52a8a7d58d08151d1a6bf25c472dec213 (patch) | |
tree | d9083b6af4e2e9b70a7fa6cd31bac45a36e0f6b6 /src/gallium/auxiliary/tgsi | |
parent | 543b9566bdaa48fea2df1866fa1310c1cdbcde27 (diff) | |
parent | 1f9aa38f4e2be47229d92be2c1189c2b8d9c7133 (diff) |
Merge branch 'master' into instanced-arrays
Conflicts:
src/gallium/auxiliary/tgsi/tgsi_dump.c
src/gallium/include/pipe/p_shader_tokens.h
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r-- | src/gallium/auxiliary/tgsi/Makefile | 22 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/SConscript | 23 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_build.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_dump.c | 91 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 1879 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 47 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.c | 30 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 23 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_parse.c | 22 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sanity.c | 287 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_scan.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_text.c | 380 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.c | 152 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.h | 94 |
15 files changed, 1929 insertions, 1129 deletions
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile deleted file mode 100644 index 5f0a580b096..00000000000 --- a/src/gallium/auxiliary/tgsi/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = tgsi - -C_SOURCES = \ - tgsi_sanity.c \ - tgsi_build.c \ - tgsi_dump.c \ - tgsi_exec.c \ - tgsi_info.c \ - tgsi_iterate.c \ - tgsi_parse.c \ - tgsi_ppc.c \ - tgsi_scan.c \ - tgsi_sse2.c \ - tgsi_text.c \ - tgsi_transform.c \ - tgsi_ureg.c \ - tgsi_util.c - -include ../../Makefile.template diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript deleted file mode 100644 index b6bc2924f06..00000000000 --- a/src/gallium/auxiliary/tgsi/SConscript +++ /dev/null @@ -1,23 +0,0 @@ -Import('*') - -tgsi = env.ConvenienceLibrary( - target = 'tgsi', - source = [ - 'tgsi_build.c', - 'tgsi_dump.c', - 'tgsi_dump_c.c', - 'tgsi_exec.c', - 'tgsi_info.c', - 'tgsi_iterate.c', - 'tgsi_parse.c', - 'tgsi_sanity.c', - 'tgsi_scan.c', - 'tgsi_ppc.c', - 'tgsi_sse2.c', - 'tgsi_text.c', - 'tgsi_transform.c', - 'tgsi_ureg.c', - 'tgsi_util.c', - ]) - -auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 92903fe57f3..de9cbc86305 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -1025,7 +1025,7 @@ tgsi_build_full_property( size++; *property = tgsi_build_property( - TGSI_PROPERTY_GS_INPUT_PRIM, + full_prop->Property.PropertyName, header ); assert( full_prop->Property.NrTokens <= 8 + 1 ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 4391ca75d1d..d7ff262f30a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -123,12 +123,15 @@ static const char *semantic_names[] = "NORMAL", "FACE", "EDGEFLAG", + "PRIM_ID", "INSTANCEID" }; static const char *immediate_type_names[] = { - "FLT32" + "FLT32", + "UINT32", + "INT32" }; static const char *swizzle_names[] = @@ -175,13 +178,19 @@ static const char *primitive_names[] = static void -_dump_register( +_dump_register_decl( struct dump_ctx *ctx, uint file, int first, int last ) { ENM( file, file_names ); + + /* all geometry shader inputs are two dimensional */ + if (file == TGSI_FILE_INPUT && + ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) + TXT("[]"); + CHR( '[' ); SID( first ); if (first != last) { @@ -192,6 +201,52 @@ _dump_register( } static void +_dump_register_dst( + struct dump_ctx *ctx, + uint file, + int index) +{ + ENM( file, file_names ); + + CHR( '[' ); + SID( index ); + CHR( ']' ); +} + + +static void +_dump_register_src( + struct dump_ctx *ctx, + const struct tgsi_full_src_register *src ) +{ + if (src->Register.Indirect) { + ENM( src->Register.File, file_names ); + CHR( '[' ); + ENM( src->Indirect.File, file_names ); + CHR( '[' ); + SID( src->Indirect.Index ); + TXT( "]." ); + ENM( src->Indirect.SwizzleX, swizzle_names ); + if (src->Register.Index != 0) { + if (src->Register.Index > 0) + CHR( '+' ); + SID( src->Register.Index ); + } + CHR( ']' ); + } else { + ENM( src->Register.File, file_names ); + CHR( '[' ); + SID( src->Register.Index ); + CHR( ']' ); + } + if (src->Register.Dimension) { + CHR( '[' ); + SID( src->Dimension.Index ); + CHR( ']' ); + } +} + +static void _dump_register_ind( struct dump_ctx *ctx, uint file, @@ -245,7 +300,7 @@ iter_declaration( TXT( "DCL " ); - _dump_register( + _dump_register_decl( ctx, decl->Declaration.File, decl->Range.First, @@ -360,6 +415,12 @@ iter_immediate( case TGSI_IMM_FLOAT32: FLT( imm->u[i].Float ); break; + case TGSI_IMM_UINT32: + UID(imm->u[i].Uint); + break; + case TGSI_IMM_INT32: + SID(imm->u[i].Int); + break; default: assert( 0 ); } @@ -436,10 +497,9 @@ iter_instruction( dst->Indirect.SwizzleX ); } else { - _dump_register( + _dump_register_dst( ctx, dst->Register.File, - dst->Register.Index, dst->Register.Index ); } _dump_writemask( ctx, dst->Register.WriteMask ); @@ -455,26 +515,11 @@ iter_instruction( CHR( ' ' ); if (src->Register.Negate) - TXT( "-(" ); + CHR( '-' ); if (src->Register.Absolute) CHR( '|' ); - if (src->Register.Indirect) { - _dump_register_ind( - ctx, - src->Register.File, - src->Register.Index, - src->Indirect.File, - src->Indirect.Index, - src->Indirect.SwizzleX ); - } - else { - _dump_register( - ctx, - src->Register.File, - src->Register.Index, - src->Register.Index ); - } + _dump_register_src(ctx, src); if (src->Register.SwizzleX != TGSI_SWIZZLE_X || src->Register.SwizzleY != TGSI_SWIZZLE_Y || @@ -489,8 +534,6 @@ iter_instruction( if (src->Register.Absolute) CHR( '|' ); - if (src->Register.Negate) - CHR( ')' ); first_reg = FALSE; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 123117cb0a3..f43233bdb49 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -60,6 +61,7 @@ #include "util/u_memory.h" #include "util/u_math.h" + #define FAST_MATH 1 #define TILE_TOP_LEFT 0 @@ -67,11 +69,329 @@ #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +static void +micro_abs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = fabsf(src->f[0]); + dst->f[1] = fabsf(src->f[1]); + dst->f[2] = fabsf(src->f[2]); + dst->f[3] = fabsf(src->f[3]); +} + +static void +micro_arl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0]); + dst->i[1] = (int)floorf(src->f[1]); + dst->i[2] = (int)floorf(src->f[2]); + dst->i[3] = (int)floorf(src->f[3]); +} + +static void +micro_arr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0] + 0.5f); + dst->i[1] = (int)floorf(src->f[1] + 0.5f); + dst->i[2] = (int)floorf(src->f[2] + 0.5f); + dst->i[3] = (int)floorf(src->f[3] + 0.5f); +} + +static void +micro_ceil(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = ceilf(src->f[0]); + dst->f[1] = ceilf(src->f[1]); + dst->f[2] = ceilf(src->f[2]); + dst->f[3] = ceilf(src->f[3]); +} + +static void +micro_cos(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = cosf(src->f[0]); + dst->f[1] = cosf(src->f[1]); + dst->f[2] = cosf(src->f[2]); + dst->f[3] = cosf(src->f[3]); +} + +static void +micro_ddx(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; +} + +static void +micro_exp2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_exp2(src->f[0]); + dst->f[1] = util_fast_exp2(src->f[1]); + dst->f[2] = util_fast_exp2(src->f[2]); + dst->f[3] = util_fast_exp2(src->f[3]); +#else +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif /* DEBUG */ + + dst->f[0] = powf(2.0f, src->f[0]); + dst->f[1] = powf(2.0f, src->f[1]); + dst->f[2] = powf(2.0f, src->f[2]); + dst->f[3] = powf(2.0f, src->f[3]); +#endif /* FAST_MATH */ +} + +static void +micro_flr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0]); + dst->f[1] = floorf(src->f[1]); + dst->f[2] = floorf(src->f[2]); + dst->f[3] = floorf(src->f[3]); +} + +static void +micro_frc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] - floorf(src->f[0]); + dst->f[1] = src->f[1] - floorf(src->f[1]); + dst->f[2] = src->f[2] - floorf(src->f[2]); + dst->f[3] = src->f[3] - floorf(src->f[3]); +} + +static void +micro_iabs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; + dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; + dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; + dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; +} + +static void +micro_ineg(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_lg2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_log2(src->f[0]); + dst->f[1] = util_fast_log2(src->f[1]); + dst->f[2] = util_fast_log2(src->f[2]); + dst->f[3] = util_fast_log2(src->f[3]); +#else + dst->f[0] = logf(src->f[0]) * 1.442695f; + dst->f[1] = logf(src->f[1]) * 1.442695f; + dst->f[2] = logf(src->f[2]) * 1.442695f; + dst->f[3] = logf(src->f[3]) * 1.442695f; +#endif +} + +static void +micro_lrp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0]; + dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1]; + dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2]; + dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3]; +} + +static void +micro_mad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0]; + dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1]; + dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2]; + dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3]; +} + +static void +micro_mov(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src->u[0]; + dst->u[1] = src->u[1]; + dst->u[2] = src->u[2]; + dst->u[3] = src->u[3]; +} + +static void +micro_rcp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / src->f[0]; + dst->f[1] = 1.0f / src->f[1]; + dst->f[2] = 1.0f / src->f[2]; + dst->f[3] = 1.0f / src->f[3]; +} + +static void +micro_rnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0] + 0.5f); + dst->f[1] = floorf(src->f[1] + 0.5f); + dst->f[2] = floorf(src->f[2] + 0.5f); + dst->f[3] = floorf(src->f[3] + 0.5f); +} + +static void +micro_rsq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); + dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); + dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); + dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); +} + +static void +micro_seq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sgn(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; + dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; + dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; + dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; +} + +static void +micro_sgt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = sinf(src->f[0]); + dst->f[1] = sinf(src->f[1]); + dst->f[2] = sinf(src->f[2]); + dst->f[3] = sinf(src->f[3]); +} + +static void +micro_sle(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_slt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_trunc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)(int)src->f[0]; + dst->f[1] = (float)(int)src->f[1]; + dst->f[2] = (float)(int)src->f[2]; + dst->f[3] = (float)(int)src->f[3]; +} + + #define CHAN_X 0 #define CHAN_Y 1 #define CHAN_Z 2 #define CHAN_W 3 +enum tgsi_exec_datatype { + TGSI_EXEC_DATA_FLOAT, + TGSI_EXEC_DATA_INT, + TGSI_EXEC_DATA_UINT +}; + /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ @@ -123,23 +443,19 @@ /** The execution mask depends on the conditional mask and the loop mask */ #define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; -#ifdef DEBUG -static void -check_inf_or_nan(const union tgsi_exec_channel *chan) -{ - assert(!util_is_inf_or_nan(chan->f[0])); - assert(!util_is_inf_or_nan(chan->f[1])); - assert(!util_is_inf_or_nan(chan->f[2])); - assert(!util_is_inf_or_nan(chan->f[3])); -} -#endif +#define CHECK_INF_OR_NAN(chan) do {\ + assert(!util_is_inf_or_nan((chan)->f[0]));\ + assert(!util_is_inf_or_nan((chan)->f[1]));\ + assert(!util_is_inf_or_nan((chan)->f[2]));\ + assert(!util_is_inf_or_nan((chan)->f[3]));\ + } while (0) #ifdef DEBUG @@ -292,6 +608,14 @@ tgsi_exec_machine_bind_shader( * sizeof(struct tgsi_full_declaration)); maxDeclarations += 10; } + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { + unsigned reg; + for (reg = parse.FullToken.FullDeclaration.Range.First; + reg <= parse.FullToken.FullDeclaration.Range.Last; + ++reg) { + ++mach->NumOutputs; + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -372,6 +696,7 @@ tgsi_exec_machine_create( void ) memset(mach, 0, sizeof(*mach)); mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; /* Setup constants. */ @@ -413,18 +738,6 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) align_free(mach); } - -static void -micro_abs( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = fabsf( src->f[0] ); - dst->f[1] = fabsf( src->f[1] ); - dst->f[2] = fabsf( src->f[2] ); - dst->f[3] = fabsf( src->f[3] ); -} - static void micro_add( union tgsi_exec_channel *dst, @@ -437,76 +750,6 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } -#if 0 -static void -micro_iadd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] + src1->i[0]; - dst->i[1] = src0->i[1] + src1->i[1]; - dst->i[2] = src0->i[2] + src1->i[2]; - dst->i[3] = src0->i[3] + src1->i[3]; -} -#endif - -static void -micro_and( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] & src1->u[0]; - dst->u[1] = src0->u[1] & src1->u[1]; - dst->u[2] = src0->u[2] & src1->u[2]; - dst->u[3] = src0->u[3] & src1->u[3]; -} - -static void -micro_ceil( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = ceilf( src->f[0] ); - dst->f[1] = ceilf( src->f[1] ); - dst->f[2] = ceilf( src->f[2] ); - dst->f[3] = ceilf( src->f[3] ); -} - -static void -micro_cos( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = cosf( src->f[0] ); - dst->f[1] = cosf( src->f[1] ); - dst->f[2] = cosf( src->f[2] ); - dst->f[3] = cosf( src->f[3] ); -} - -static void -micro_ddx( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_ddy( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; -} - static void micro_div( union tgsi_exec_channel *dst, @@ -527,99 +770,6 @@ micro_div( } } -#if 0 -static void -micro_udiv( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] / src1->u[0]; - dst->u[1] = src0->u[1] / src1->u[1]; - dst->u[2] = src0->u[2] / src1->u[2]; - dst->u[3] = src0->u[3] / src1->u[3]; -} -#endif - -static void -micro_eq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; -} - -#if 0 -static void -micro_ieq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -static void -micro_exp2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ -#if FAST_MATH - dst->f[0] = util_fast_exp2( src->f[0] ); - dst->f[1] = util_fast_exp2( src->f[1] ); - dst->f[2] = util_fast_exp2( src->f[2] ); - dst->f[3] = util_fast_exp2( src->f[3] ); -#else - -#if DEBUG - /* Inf is okay for this instruction, so clamp it to silence assertions. */ - uint i; - union tgsi_exec_channel clamped; - - for (i = 0; i < 4; i++) { - if (src->f[i] > 127.99999f) { - clamped.f[i] = 127.99999f; - } else if (src->f[i] < -126.99999f) { - clamped.f[i] = -126.99999f; - } else { - clamped.f[i] = src->f[i]; - } - } - src = &clamped; -#endif - - dst->f[0] = powf( 2.0f, src->f[0] ); - dst->f[1] = powf( 2.0f, src->f[1] ); - dst->f[2] = powf( 2.0f, src->f[2] ); - dst->f[3] = powf( 2.0f, src->f[3] ); -#endif -} - -#if 0 -static void -micro_f2ut( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = (uint) src->f[0]; - dst->u[1] = (uint) src->f[1]; - dst->u[2] = (uint) src->f[2]; - dst->u[3] = (uint) src->f[3]; -} -#endif - static void micro_float_clamp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -647,71 +797,6 @@ micro_float_clamp(union tgsi_exec_channel *dst, } static void -micro_flr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] ); - dst->f[1] = floorf( src->f[1] ); - dst->f[2] = floorf( src->f[2] ); - dst->f[3] = floorf( src->f[3] ); -} - -static void -micro_frc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] - floorf( src->f[0] ); - dst->f[1] = src->f[1] - floorf( src->f[1] ); - dst->f[2] = src->f[2] - floorf( src->f[2] ); - dst->f[3] = src->f[3] - floorf( src->f[3] ); -} - -static void -micro_i2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->i[0]; - dst->f[1] = (float) src->i[1]; - dst->f[2] = (float) src->i[2]; - dst->f[3] = (float) src->i[3]; -} - -static void -micro_lg2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ -#if FAST_MATH - dst->f[0] = util_fast_log2( src->f[0] ); - dst->f[1] = util_fast_log2( src->f[1] ); - dst->f[2] = util_fast_log2( src->f[2] ); - dst->f[3] = util_fast_log2( src->f[3] ); -#else - dst->f[0] = logf( src->f[0] ) * 1.442695f; - dst->f[1] = logf( src->f[1] ) * 1.442695f; - dst->f[2] = logf( src->f[2] ) * 1.442695f; - dst->f[3] = logf( src->f[3] ) * 1.442695f; -#endif -} - -static void -micro_le( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void micro_lt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, @@ -725,38 +810,6 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } -#if 0 -static void -micro_ilt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -#if 0 -static void -micro_ult( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; -} -#endif - static void micro_max( union tgsi_exec_channel *dst, @@ -769,34 +822,6 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - static void micro_min( union tgsi_exec_channel *dst, @@ -809,48 +834,6 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - -#if 0 -static void -micro_umod( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] % src1->u[0]; - dst->u[1] = src0->u[1] % src1->u[1]; - dst->u[2] = src0->u[2] % src1->u[2]; - dst->u[3] = src0->u[3] % src1->u[3]; -} -#endif - static void micro_mul( union tgsi_exec_channel *dst, @@ -865,20 +848,6 @@ micro_mul( #if 0 static void -micro_imul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] * src1->i[0]; - dst->i[1] = src0->i[1] * src1->i[1]; - dst->i[2] = src0->i[2] * src1->i[2]; - dst->i[3] = src0->i[3] * src1->i[3]; -} -#endif - -#if 0 -static void micro_imul64( union tgsi_exec_channel *dst0, union tgsi_exec_channel *dst1, @@ -942,42 +911,6 @@ micro_neg( dst->f[3] = -src->f[3]; } -#if 0 -static void -micro_ineg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} -#endif - -static void -micro_not( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = ~src->u[0]; - dst->u[1] = ~src->u[1]; - dst->u[2] = ~src->u[2]; - dst->u[3] = ~src->u[3]; -} - -static void -micro_or( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] | src1->u[0]; - dst->u[1] = src0->u[1] | src1->u[1]; - dst->u[2] = src0->u[2] | src1->u[2]; - dst->u[3] = src0->u[3] | src1->u[3]; -} - static void micro_pow( union tgsi_exec_channel *dst, @@ -998,88 +931,6 @@ micro_pow( } static void -micro_rnd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] + 0.5f ); - dst->f[1] = floorf( src->f[1] + 0.5f ); - dst->f[2] = floorf( src->f[2] + 0.5f ); - dst->f[3] = floorf( src->f[3] + 0.5f ); -} - -static void -micro_sgn( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; - dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; - dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; - dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; -} - -static void -micro_shl( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] << src1->i[0]; - dst->i[1] = src0->i[1] << src1->i[1]; - dst->i[2] = src0->i[2] << src1->i[2]; - dst->i[3] = src0->i[3] << src1->i[3]; -} - -static void -micro_ishr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] >> src1->i[0]; - dst->i[1] = src0->i[1] >> src1->i[1]; - dst->i[2] = src0->i[2] >> src1->i[2]; - dst->i[3] = src0->i[3] >> src1->i[3]; -} - -static void -micro_trunc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0 ) -{ - dst->f[0] = (float) (int) src0->f[0]; - dst->f[1] = (float) (int) src0->f[1]; - dst->f[2] = (float) (int) src0->f[2]; - dst->f[3] = (float) (int) src0->f[3]; -} - -#if 0 -static void -micro_ushr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] >> src1->u[0]; - dst->u[1] = src0->u[1] >> src1->u[1]; - dst->u[2] = src0->u[2] >> src1->u[2]; - dst->u[3] = src0->u[3] >> src1->u[3]; -} -#endif - -static void -micro_sin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sinf( src->f[0] ); - dst->f[1] = sinf( src->f[1] ); - dst->f[2] = sinf( src->f[2] ); - dst->f[3] = sinf( src->f[3] ); -} - -static void micro_sqrt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { @@ -1101,31 +952,6 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } -#if 0 -static void -micro_u2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->u[0]; - dst->f[1] = (float) src->u[1]; - dst->f[2] = (float) src->u[2]; - dst->f[3] = (float) src->u[3]; -} -#endif - -static void -micro_xor( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] ^ src1->u[0]; - dst->u[1] = src0->u[1] ^ src1->u[1]; - dst->u[2] = src0->u[2] ^ src1->u[2]; - dst->u[3] = src0->u[3] ^ src1->u[3]; -} - static void fetch_src_file_channel( const struct tgsi_exec_machine *mach, @@ -1224,11 +1050,11 @@ fetch_src_file_channel( } static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) +fetch_source(const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index, + enum tgsi_exec_datatype src_datatype) { union tgsi_exec_channel index; uint swizzle; @@ -1277,10 +1103,10 @@ fetch_source( &indir_index ); /* add value of address register to the offset */ - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1357,10 +1183,10 @@ fetch_source( &index2, &indir_index ); - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1385,32 +1211,30 @@ fetch_source( &index, chan ); - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); - break; - - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); - break; + if (reg->Register.Absolute) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_abs(chan, chan); + } else { + micro_iabs(chan, chan); + } + } - case TGSI_UTIL_SIGN_KEEP: - break; + if (reg->Register.Negate) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_neg(chan, chan); + } else { + micro_ineg(chan, chan); + } } } static void -store_dest( - struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) +store_dest(struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index, + enum tgsi_exec_datatype dst_datatype) { uint i; union tgsi_exec_channel null; @@ -1419,9 +1243,9 @@ store_dest( int offset = 0; /* indirection offset */ int index; -#ifdef DEBUG - check_inf_or_nan(chan); -#endif + if (dst_datatype == TGSI_EXEC_DATA_FLOAT) { + CHECK_INF_OR_NAN(chan); + } /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1456,7 +1280,7 @@ store_dest( &indir_index ); /* save indirection offset */ - offset = (int) indir_index.f[0]; + offset = indir_index.i[0]; } switch (reg->Register.File) { @@ -1468,6 +1292,15 @@ store_dest( index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + reg->Register.Index; dst = &mach->Outputs[offset + index].xyzw[chan_index]; +#if 0 + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) + fprintf(stderr, "%f, ", chan->f[i]); + fprintf(stderr, ")\n"); + } +#endif break; case TGSI_FILE_TEMPORARY: @@ -1577,10 +1410,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) + store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) /** @@ -1638,6 +1471,35 @@ exec_kilp(struct tgsi_exec_machine *mach, mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +static void +emit_vertex(struct tgsi_exec_machine *mach) +{ + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + } +} + +static void +emit_primitive(struct tgsi_exec_machine *mach) +{ + unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + ++(*prim_count); + debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); + mach->Primitives[*prim_count] = 0; + } +} /* * Fetch a four texture samples using STR texture coordinates. @@ -1908,7 +1770,7 @@ exec_declaration(struct tgsi_exec_machine *mach, if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { assert(decl->Semantic.Index == 0); assert(first == last); - assert(mask = TGSI_WRITEMASK_XYZW); + assert(mask == TGSI_WRITEMASK_XYZW); mach->Inputs[first] = mach->QuadPos; } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { @@ -1954,6 +1816,461 @@ exec_declaration(struct tgsi_exec_machine *mach, } } +typedef void (* micro_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src); + +static void +exec_scalar_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + union tgsi_exec_channel src; + union tgsi_exec_channel dst; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); + op(&dst, &src); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src; + + fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); + op(&dst.xyzw[chan], &src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_binary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[2]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_trinary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[3]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_break(struct tgsi_exec_machine *mach) +{ + if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + } else { + assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); + + mach->Switch.mask = 0x0; + + UPDATE_EXEC_MASK(mach); + } +} + +static void +exec_switch(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + mach->Switch.mask = 0x0; + mach->Switch.defaultMask = 0x0; + + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_case(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + union tgsi_exec_channel src; + uint mask = 0; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + + if (mach->Switch.selector.u[0] == src.u[0]) { + mask |= 0x1; + } + if (mach->Switch.selector.u[1] == src.u[1]) { + mask |= 0x2; + } + if (mach->Switch.selector.u[2] == src.u[2]) { + mask |= 0x4; + } + if (mach->Switch.selector.u[3] == src.u[3]) { + mask |= 0x8; + } + + mach->Switch.defaultMask |= mask; + + mach->Switch.mask |= mask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_default(struct tgsi_exec_machine *mach) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + + mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_endswitch(struct tgsi_exec_machine *mach) +{ + mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; + + UPDATE_EXEC_MASK(mach); +} + +static void +micro_i2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->i[0]; + dst->f[1] = (float)src->i[1]; + dst->f[2] = (float)src->i[2]; + dst->f[3] = (float)src->i[3]; +} + +static void +micro_not(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_shl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] << src[1].u[0]; + dst->u[1] = src[0].u[1] << src[1].u[1]; + dst->u[2] = src[0].u[2] << src[1].u[2]; + dst->u[3] = src[0].u[3] << src[1].u[3]; +} + +static void +micro_and(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] & src[1].u[0]; + dst->u[1] = src[0].u[1] & src[1].u[1]; + dst->u[2] = src[0].u[2] & src[1].u[2]; + dst->u[3] = src[0].u[3] & src[1].u[3]; +} + +static void +micro_or(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] | src[1].u[0]; + dst->u[1] = src[0].u[1] | src[1].u[1]; + dst->u[2] = src[0].u[2] | src[1].u[2]; + dst->u[3] = src[0].u[3] | src[1].u[3]; +} + +static void +micro_xor(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] ^ src[1].u[0]; + dst->u[1] = src[0].u[1] ^ src[1].u[1]; + dst->u[2] = src[0].u[2] ^ src[1].u[2]; + dst->u[3] = src[0].u[3] ^ src[1].u[3]; +} + +static void +micro_f2i(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)src->f[0]; + dst->i[1] = (int)src->f[1]; + dst->i[2] = (int)src->f[2]; + dst->i[3] = (int)src->f[3]; +} + +static void +micro_idiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] / src[1].i[0]; + dst->i[1] = src[0].i[1] / src[1].i[1]; + dst->i[2] = src[0].i[2] / src[1].i[2]; + dst->i[3] = src[0].i[3] / src[1].i[3]; +} + +static void +micro_imax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_imin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_isge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0; +} + +static void +micro_ishr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >> src[1].i[0]; + dst->i[1] = src[0].i[1] >> src[1].i[1]; + dst->i[2] = src[0].i[2] >> src[1].i[2]; + dst->i[3] = src[0].i[3] >> src[1].i[3]; +} + +static void +micro_islt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0; +} + +static void +micro_f2u(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = (uint)src->f[0]; + dst->u[1] = (uint)src->f[1]; + dst->u[2] = (uint)src->f[2]; + dst->u[3] = (uint)src->f[3]; +} + +static void +micro_u2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->u[0]; + dst->f[1] = (float)src->u[1]; + dst->f[2] = (float)src->u[2]; + dst->f[3] = (float)src->u[3]; +} + +static void +micro_uadd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] + src[1].u[0]; + dst->u[1] = src[0].u[1] + src[1].u[1]; + dst->u[2] = src[0].u[2] + src[1].u[2]; + dst->u[3] = src[0].u[3] + src[1].u[3]; +} + +static void +micro_udiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] / src[1].u[0]; + dst->u[1] = src[0].u[1] / src[1].u[1]; + dst->u[2] = src[0].u[2] / src[1].u[2]; + dst->u[3] = src[0].u[3] / src[1].u[3]; +} + +static void +micro_umad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3]; +} + +static void +micro_umax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] % src[1].u[0]; + dst->u[1] = src[0].u[1] % src[1].u[1]; + dst->u[2] = src[0].u[2] % src[1].u[2]; + dst->u[3] = src[0].u[3] % src[1].u[3]; +} + +static void +micro_umul(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3]; +} + +static void +micro_useq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0; +} + +static void +micro_usge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0; +} + +static void +micro_ushr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >> src[1].u[0]; + dst->u[1] = src[0].u[1] >> src[1].u[1]; + dst->u[2] = src[0].u[2] >> src[1].u[2]; + dst->u[3] = src[0].u[3] >> src[1].u[3]; +} + +static void +micro_uslt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0; +} + +static void +micro_usne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0; +} + static void exec_instruction( struct tgsi_exec_machine *mach, @@ -1968,23 +2285,11 @@ exec_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - case TGSI_OPCODE_FLR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MOV: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&d[chan_index], 0, chan_index); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LIT: @@ -2021,23 +2326,11 @@ exec_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[0], &r[0] ); - micro_sqrt( &r[0], &r[0] ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EXP: @@ -2208,41 +2501,15 @@ exec_instruction( break; case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1] ); - FETCH( &r[1], 2, chan_index ); - micro_add(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SUB: @@ -2257,17 +2524,7 @@ exec_instruction( break; case TGSI_OPCODE_LRP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); - micro_mul( &r[0], &r[0], &r[1] ); - micro_add(&d[chan_index], &r[0], &r[2]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CND: @@ -2301,13 +2558,7 @@ exec_instruction( break; case TGSI_OPCODE_FRC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_frc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CLAMP: @@ -2323,33 +2574,20 @@ exec_instruction( } break; + case TGSI_OPCODE_FLR: + exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_ARR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_rnd(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EX2: - FETCH(&r[0], 0, CHAN_X); - - micro_exp2( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LG2: - FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_POW: @@ -2402,15 +2640,9 @@ exec_instruction( } break; - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - micro_abs(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; + case TGSI_OPCODE_ABS: + exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; case TGSI_OPCODE_RCC: FETCH(&r[0], 0, CHAN_X); @@ -2449,33 +2681,15 @@ exec_instruction( break; case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - micro_cos( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddx(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddy(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_KILP: @@ -2552,14 +2766,7 @@ exec_instruction( break; case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SFL: @@ -2569,44 +2776,19 @@ exec_instruction( break; case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - micro_sin( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_STR: @@ -2711,6 +2893,10 @@ exec_instruction( assert (0); break; + case TGSI_OPCODE_ARR: + exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_BRA: assert (0); break; @@ -2730,6 +2916,8 @@ exec_instruction( mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; + mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; /* note that PC was already incremented above */ mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; @@ -2737,12 +2925,17 @@ exec_instruction( /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ @@ -2775,6 +2968,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -2785,14 +2984,7 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_sgn(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CMP: @@ -2976,87 +3168,31 @@ exec_instruction( break; case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ceil(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_i2f(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_not(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_trunc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_shl(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ishr(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_and(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_or(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_MOD: @@ -3064,14 +3200,7 @@ exec_instruction( break; case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_xor(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_SAD: @@ -3087,13 +3216,11 @@ exec_instruction( break; case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + emit_vertex(mach); break; case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + emit_primitive(mach); break; case TGSI_OPCODE_BGNFOR: @@ -3122,11 +3249,15 @@ exec_instruction( case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; case TGSI_OPCODE_ENDFOR: @@ -3173,6 +3304,8 @@ exec_instruction( --mach->LoopLabelStackTop; assert(mach->LoopCounterStackTop > 0); --mach->LoopCounterStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; @@ -3196,15 +3329,14 @@ exec_instruction( mach->ContMask = mach->ContStack[--mach->ContStackTop]; assert(mach->LoopLabelStackTop > 0); --mach->LoopLabelStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); + exec_break(mach); break; case TGSI_OPCODE_CONT: @@ -3235,6 +3367,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -3265,11 +3403,116 @@ exec_instruction( UPDATE_EXEC_MASK(mach); break; + case TGSI_OPCODE_F2I: + exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_IDIV: + exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMAX: + exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMIN: + exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_INEG: + exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISGE: + exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISHR: + exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISLT: + exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_F2U: + exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U2F: + exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UADD: + exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UDIV: + exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAD: + exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAX: + exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMIN: + exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMOD: + exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMUL: + exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USEQ: + exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USGE: + exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USHR: + exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USLT: + exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USNE: + exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_SWITCH: + exec_switch(mach, inst); + break; + + case TGSI_OPCODE_CASE: + exec_case(mach, inst); + break; + + case TGSI_OPCODE_DEFAULT: + exec_default(mach); + break; + + case TGSI_OPCODE_ENDSWITCH: + exec_endswitch(mach); + break; + default: assert( 0 ); } } + #define DEBUG_EXECUTION 0 @@ -3289,9 +3532,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; + mach->Switch.mask = 0xf; + assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; @@ -3348,11 +3595,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("(%6f, %6f, %6f, %6f)\n", - temps[i].xyzw[0].f[j], - temps[i].xyzw[1].f[j], - temps[i].xyzw[2].f[j], - temps[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], + temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], + temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], + temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); } } } @@ -3366,11 +3613,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("{%6f, %6f, %6f, %6f}\n", - outputs[i].xyzw[0].f[j], - outputs[i].xyzw[1].f[j], - outputs[i].xyzw[2].f[j], - outputs[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } } } @@ -3392,6 +3639,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fd94c1bc440..aa3a98d7f18 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -179,6 +179,7 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_COND_NESTING 32 #define TGSI_EXEC_MAX_LOOP_NESTING 32 +#define TGSI_EXEC_MAX_SWITCH_NESTING 32 #define TGSI_EXEC_MAX_CALL_NESTING 32 /* The maximum number of input attributes per vertex. For 2D @@ -191,6 +192,14 @@ struct tgsi_exec_labels */ #define TGSI_EXEC_MAX_CONST_BUFFER 4096 +/* The maximum number of vertices per primitive */ +#define TGSI_MAX_PRIM_VERTICES 6 + +/* The maximum number of primitives to be generated */ +#define TGSI_MAX_PRIMITIVES 64 + +/* The maximum total number of vertices */ +#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) /** function call/activation record */ struct tgsi_call_record @@ -198,10 +207,29 @@ struct tgsi_call_record uint CondStackTop; uint LoopStackTop; uint ContStackTop; + int SwitchStackTop; + int BreakStackTop; uint ReturnAddr; }; +/* Switch-case block state. */ +struct tgsi_switch_record { + uint mask; /**< execution mask */ + union tgsi_exec_channel selector; /**< a value case statements are compared to */ + uint defaultMask; /**< non-execute mask for default case */ +}; + + +enum tgsi_break_type { + TGSI_EXEC_BREAK_INSIDE_LOOP, + TGSI_EXEC_BREAK_INSIDE_SWITCH +}; + + +#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING) + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -214,8 +242,8 @@ struct tgsi_exec_machine float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; - struct tgsi_exec_vector Inputs[PIPE_MAX_ATTRIBS]; - struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS]; + struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; + struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; struct tgsi_exec_vector *Addrs; struct tgsi_exec_vector *Predicates; @@ -229,6 +257,8 @@ struct tgsi_exec_machine /* GEOMETRY processor only. */ unsigned *Primitives; + unsigned NumOutputs; + unsigned MaxGeometryShaderOutputs; /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; @@ -242,6 +272,12 @@ struct tgsi_exec_machine uint FuncMask; /**< For function calls */ uint ExecMask; /**< = CondMask & LoopMask */ + /* Current switch-case state. */ + struct tgsi_switch_record Switch; + + /* Current break type. */ + enum tgsi_break_type BreakType; + /** Condition mask stack (for nested conditionals) */ uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; int CondStackTop; @@ -262,6 +298,13 @@ struct tgsi_exec_machine uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; + /** Switch case stack */ + struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING]; + int SwitchStackTop; + + enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK]; + int BreakStackTop; + /** Function execution mask stack (for executing subroutine code) */ uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; int FuncStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index be375cabb8b..de0e09cdbae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -119,7 +119,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 0, 0, 0, 0, 0, 0, "", 88 }, /* removed */ { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, @@ -149,7 +149,33 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */ + { 0, 0, 0, 0, 0, 0, "", 118 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "F2I", TGSI_OPCODE_F2I }, + { 1, 2, 0, 0, 0, 0, "IDIV", TGSI_OPCODE_IDIV }, + { 1, 2, 0, 0, 0, 0, "IMAX", TGSI_OPCODE_IMAX }, + { 1, 2, 0, 0, 0, 0, "IMIN", TGSI_OPCODE_IMIN }, + { 1, 1, 0, 0, 0, 0, "INEG", TGSI_OPCODE_INEG }, + { 1, 2, 0, 0, 0, 0, "ISGE", TGSI_OPCODE_ISGE }, + { 1, 2, 0, 0, 0, 0, "ISHR", TGSI_OPCODE_ISHR }, + { 1, 2, 0, 0, 0, 0, "ISLT", TGSI_OPCODE_ISLT }, + { 1, 1, 0, 0, 0, 0, "F2U", TGSI_OPCODE_F2U }, + { 1, 1, 0, 0, 0, 0, "U2F", TGSI_OPCODE_U2F }, + { 1, 2, 0, 0, 0, 0, "UADD", TGSI_OPCODE_UADD }, + { 1, 2, 0, 0, 0, 0, "UDIV", TGSI_OPCODE_UDIV }, + { 1, 3, 0, 0, 0, 0, "UMAD", TGSI_OPCODE_UMAD }, + { 1, 2, 0, 0, 0, 0, "UMAX", TGSI_OPCODE_UMAX }, + { 1, 2, 0, 0, 0, 0, "UMIN", TGSI_OPCODE_UMIN }, + { 1, 2, 0, 0, 0, 0, "UMOD", TGSI_OPCODE_UMOD }, + { 1, 2, 0, 0, 0, 0, "UMUL", TGSI_OPCODE_UMUL }, + { 1, 2, 0, 0, 0, 0, "USEQ", TGSI_OPCODE_USEQ }, + { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE }, + { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR }, + { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT }, + { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE }, + { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH }, + { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE }, + { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT }, + { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b34263da489..e4af15c156f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -124,7 +124,6 @@ OP11(I2F) OP11(NOT) OP11(TRUNC) OP12(SHL) -OP12(SHR) OP12(AND) OP12(OR) OP12(MOD) @@ -146,6 +145,28 @@ OP01(IFC) OP01(BREAKC) OP01(KIL) OP00(END) +OP11(F2I) +OP12(IDIV) +OP12(IMAX) +OP12(IMIN) +OP11(INEG) +OP12(ISGE) +OP12(ISHR) +OP12(ISLT) +OP11(F2U) +OP11(U2F) +OP12(UADD) +OP12(UDIV) +OP13(UMAD) +OP12(UMAX) +OP12(UMIN) +OP12(UMOD) +OP12(UMUL) +OP12(USEQ) +OP12(USGE) +OP12(USHR) +OP12(USLT) +OP12(USNE) #undef OP00 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index fa65ecb9975..8c7062d850c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -119,17 +119,29 @@ tgsi_parse_token( case TGSI_TOKEN_TYPE_IMMEDIATE: { struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + uint imm_count; memset(imm, 0, sizeof *imm); copy_token(&imm->Immediate, &token); + imm_count = imm->Immediate.NrTokens - 1; + switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: - { - uint imm_count = imm->Immediate.NrTokens - 1; - for (i = 0; i < imm_count; i++) { - next_token(ctx, &imm->u[i]); - } + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Float); + } + break; + + case TGSI_IMM_UINT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Uint); + } + break; + + case TGSI_IMM_INT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Int); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index c27579e7942..9b0644465af 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -26,32 +26,112 @@ **************************************************************************/ #include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_prim.h" +#include "cso_cache/cso_hash.h" #include "tgsi_sanity.h" #include "tgsi_info.h" #include "tgsi_iterate.h" -typedef uint reg_flag; - -#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) - -#define MAX_REGISTERS 1024 -#define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG) +typedef struct { + uint file : 28; + /* max 2 dimensions */ + uint dimensions : 4; + uint indices[2]; +} scan_register; struct sanity_check_ctx { struct tgsi_iterate_context iter; + struct cso_hash *regs_decl; + struct cso_hash *regs_used; + struct cso_hash *regs_ind_used; - reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REG_FLAGS]; - reg_flag regs_used[TGSI_FILE_COUNT][MAX_REG_FLAGS]; - boolean regs_ind_used[TGSI_FILE_COUNT]; uint num_imms; uint num_instructions; uint index_of_END; uint errors; uint warnings; + uint implied_array_size; }; +static INLINE unsigned +scan_register_key(const scan_register *reg) +{ + unsigned key = reg->file; + key |= (reg->indices[0] << 4); + key |= (reg->indices[1] << 18); + + return key; +} + +static void +fill_scan_register1d(scan_register *reg, + uint file, uint index) +{ + reg->file = file; + reg->dimensions = 1; + reg->indices[0] = index; + reg->indices[1] = 0; +} + +static void +fill_scan_register2d(scan_register *reg, + uint file, uint index1, uint index2) +{ + reg->file = file; + reg->dimensions = 2; + reg->indices[0] = index1; + reg->indices[1] = index2; +} + +static void +scan_register_dst(scan_register *reg, + struct tgsi_full_dst_register *dst) +{ + fill_scan_register1d(reg, + dst->Register.File, + dst->Register.Index); +} + +static void +scan_register_src(scan_register *reg, + struct tgsi_full_src_register *src) +{ + if (src->Register.Dimension) { + /*FIXME: right now we don't support indirect + * multidimensional addressing */ + debug_assert(!src->Dimension.Indirect); + fill_scan_register2d(reg, + src->Register.File, + src->Register.Index, + src->Dimension.Index); + } else { + fill_scan_register1d(reg, + src->Register.File, + src->Register.Index); + } +} + +static scan_register * +create_scan_register_src(struct tgsi_full_src_register *src) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_src(reg, src); + + return reg; +} + +static scan_register * +create_scan_register_dst(struct tgsi_full_dst_register *dst) +{ + scan_register *reg = MALLOC(sizeof(scan_register)); + scan_register_dst(reg, dst); + + return reg; +} + static void report_error( struct sanity_check_ctx *ctx, @@ -99,12 +179,12 @@ check_file_name( static boolean is_register_declared( struct sanity_check_ctx *ctx, - uint file, - int index ) + const scan_register *reg) { - assert( index >= 0 && index < MAX_REGISTERS ); - - return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; + void *data = cso_hash_find_data_from_template( + ctx->regs_decl, scan_register_key(reg), + (void*)reg, sizeof(scan_register)); + return data ? TRUE : FALSE; } static boolean @@ -112,23 +192,37 @@ is_any_register_declared( struct sanity_check_ctx *ctx, uint file ) { - uint i; + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); - for (i = 0; i < MAX_REG_FLAGS; i++) - if (ctx->regs_decl[file][i]) + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (reg->file == file) return TRUE; + iter = cso_hash_iter_next(iter); + } + return FALSE; } static boolean is_register_used( struct sanity_check_ctx *ctx, - uint file, - int index ) + scan_register *reg) { - assert( index < MAX_REGISTERS ); + void *data = cso_hash_find_data_from_template( + ctx->regs_used, scan_register_key(reg), + reg, sizeof(scan_register)); + return data ? TRUE : FALSE; +} - return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; + +static boolean +is_ind_register_used( + struct sanity_check_ctx *ctx, + scan_register *reg) +{ + return cso_hash_contains(ctx->regs_ind_used, reg->file); } static const char *file_names[TGSI_FILE_COUNT] = @@ -148,31 +242,40 @@ static const char *file_names[TGSI_FILE_COUNT] = static boolean check_register_usage( struct sanity_check_ctx *ctx, - uint file, - int index, + scan_register *reg, const char *name, boolean indirect_access ) { - if (!check_file_name( ctx, file )) + if (!check_file_name( ctx, reg->file )) { + FREE(reg); return FALSE; + } if (indirect_access) { /* Note that 'index' is an offset relative to the value of the - * address register. No range checking done here. - */ - if (!is_any_register_declared( ctx, file )) - report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); - ctx->regs_ind_used[file] = TRUE; + * address register. No range checking done here.*/ + reg->indices[0] = 0; + reg->indices[1] = 0; + if (!is_any_register_declared( ctx, reg->file )) + report_error( ctx, "%s: Undeclared %s register", file_names[reg->file], name ); + if (!is_ind_register_used(ctx, reg)) + cso_hash_insert(ctx->regs_ind_used, reg->file, reg); + else + FREE(reg); } else { - if (index < 0 || index >= MAX_REGISTERS) { - report_error( ctx, "%s[%d]: Invalid %s index", file_names[file], index, name ); - return FALSE; - } - - if (!is_register_declared( ctx, file, index )) - report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); - ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + if (!is_register_declared( ctx, reg )) { + if (reg->dimensions == 2) + report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], reg->indices[1], name ); + else + report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file], + reg->indices[0], name ); + } + if (!is_register_used( ctx, reg )) + cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg); + else + FREE(reg); } return TRUE; } @@ -210,33 +313,33 @@ iter_instruction( * Mark the registers as used. */ for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + scan_register *reg = create_scan_register_dst(&inst->Dst[i]); check_register_usage( ctx, - inst->Dst[i].Register.File, - inst->Dst[i].Register.Index, + reg, "destination", FALSE ); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + scan_register *reg = create_scan_register_src(&inst->Src[i]); check_register_usage( ctx, - inst->Src[i].Register.File, - inst->Src[i].Register.Index, + reg, "source", (boolean)inst->Src[i].Register.Indirect ); if (inst->Src[i].Register.Indirect) { - uint file; - int index; + scan_register *ind_reg = MALLOC(sizeof(scan_register)); - file = inst->Src[i].Indirect.File; - index = inst->Src[i].Indirect.Index; + fill_scan_register1d(ind_reg, + inst->Src[i].Indirect.File, + inst->Src[i].Indirect.Index); check_register_usage( ctx, - file, - index, + reg, "indirect", FALSE ); - if (!(file == TGSI_FILE_ADDRESS || file == TGSI_FILE_LOOP) || index != 0) { + if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || + reg->indices[0] != 0) { report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); } } @@ -266,6 +369,19 @@ iter_instruction( return TRUE; } +static void +check_and_declare(struct sanity_check_ctx *ctx, + scan_register *reg) +{ + if (is_register_declared( ctx, reg)) + report_error( ctx, "%s[%u]: The same register declared more than once", + file_names[reg->file], reg->indices[0] ); + cso_hash_insert(ctx->regs_decl, + scan_register_key(reg), + reg); +} + + static boolean iter_declaration( struct tgsi_iterate_context *iter, @@ -287,9 +403,21 @@ iter_declaration( if (!check_file_name( ctx, file )) return TRUE; for (i = decl->Range.First; i <= decl->Range.Last; i++) { - if (is_register_declared( ctx, file, i )) - report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i ); - ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); + /* declared TGSI_FILE_INPUT's for geometry processor + * have an implied second dimension */ + if (file == TGSI_FILE_INPUT && + ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) { + uint vert; + for (vert = 0; vert < ctx->implied_array_size; ++vert) { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register2d(reg, file, vert, i); + check_and_declare(ctx, reg); + } + } else { + scan_register *reg = MALLOC(sizeof(scan_register)); + fill_scan_register1d(reg, file, i); + check_and_declare(ctx, reg); + } } return TRUE; @@ -301,8 +429,7 @@ iter_immediate( struct tgsi_full_immediate *imm ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - - assert( ctx->num_imms < MAX_REGISTERS ); + scan_register *reg; /* No immediates allowed after the first instruction. */ @@ -311,12 +438,16 @@ iter_immediate( /* Mark the register as declared. */ - ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); + reg = MALLOC(sizeof(scan_register)); + fill_scan_register1d(reg, TGSI_FILE_IMMEDIATE, ctx->num_imms); + cso_hash_insert(ctx->regs_decl, scan_register_key(reg), reg); ctx->num_imms++; /* Check data type validity. */ - if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32 && + imm->Immediate.DataType != TGSI_IMM_UINT32 && + imm->Immediate.DataType != TGSI_IMM_INT32) { report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType ); return TRUE; } @@ -330,8 +461,12 @@ iter_property( struct tgsi_iterate_context *iter, struct tgsi_full_property *prop ) { - /*struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;*/ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + if (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY && + prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) { + ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data); + } return TRUE; } @@ -340,7 +475,6 @@ epilog( struct tgsi_iterate_context *iter ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint file; /* There must be an END instruction somewhere. */ @@ -350,13 +484,17 @@ epilog( /* Check if all declared registers were used. */ - for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { - uint i; - - for (i = 0; i < MAX_REGISTERS; i++) { - if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { - report_warning( ctx, "%s[%u]: Register never used", file_names[file], i ); + { + struct cso_hash_iter iter = + cso_hash_first_node(ctx->regs_decl); + + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) { + report_warning( ctx, "%s[%u]: Register never used", + file_names[reg->file], reg->indices[0] ); } + iter = cso_hash_iter_next(iter); } } @@ -368,6 +506,18 @@ epilog( return TRUE; } +static void +regs_hash_destroy(struct cso_hash *hash) +{ + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + scan_register *reg = (scan_register *)cso_hash_iter_data(iter); + iter = cso_hash_erase(hash, iter); + FREE(reg); + } + cso_hash_delete(hash); +} + boolean tgsi_sanity_check( const struct tgsi_token *tokens ) @@ -381,18 +531,23 @@ tgsi_sanity_check( ctx.iter.iterate_property = iter_property; ctx.iter.epilog = epilog; - memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); - memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); - memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); + ctx.regs_decl = cso_hash_create(); + ctx.regs_used = cso_hash_create(); + ctx.regs_ind_used = cso_hash_create(); + ctx.num_imms = 0; ctx.num_instructions = 0; ctx.index_of_END = ~0; ctx.errors = 0; ctx.warnings = 0; + ctx.implied_array_size = 0; if (!tgsi_iterate_shader( tokens, &ctx.iter )) return FALSE; + regs_hash_destroy(ctx.regs_decl); + regs_hash_destroy(ctx.regs_used); + regs_hash_destroy(ctx.regs_ind_used); return ctx.errors == 0; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 0f48b0dc3a1..a6cc773003a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -229,8 +229,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) /* Do a whole bunch of checks for a simple move */ if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || - src->Register.File != TGSI_FILE_INPUT || - src->Register.File != TGSI_FILE_SYSTEM_VALUE || + (src->Register.File != TGSI_FILE_INPUT && + src->Register.File != TGSI_FILE_SYSTEM_VALUE) || dst->Register.File != TGSI_FILE_OUTPUT || src->Register.Index != dst->Register.Index || diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index d63c75dafb3..118059ace9c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2578,7 +2578,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: return 0; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index f000958bfc0..9fcffeda368 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -27,7 +27,9 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "util/u_prim.h" #include "pipe/p_defines.h" +#include "pipe/p_inlines.h" #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_info.h" @@ -61,18 +63,20 @@ static boolean uprcase( char c ) } /* - * Ignore case of str1 and assume str2 is already uppercase. + * Ignore case of str1 and assume str1 is already uppercase. * Return TRUE iff str1 and str2 are equal. */ static int streq_nocase_uprcase(const char *str1, const char *str2) { - while (*str1 && uprcase(*str1) == *str2) { + while (*str1 && *str2) { + if (*str1 != uprcase(*str2)) + return FALSE; str1++; str2++; } - return *str1 == *str2; + return TRUE; } static boolean str_match_no_case( const char **pcur, const char *str ) @@ -193,11 +197,26 @@ struct translate_ctx struct tgsi_token *tokens_cur; struct tgsi_token *tokens_end; struct tgsi_header *header; + unsigned processor : 4; + int implied_array_size : 5; }; static void report_error( struct translate_ctx *ctx, const char *msg ) { - debug_printf( "\nError: %s", msg ); + int line = 1; + int column = 1; + const char *itr = ctx->text; + + while (itr != ctx->cur) { + if (*itr == '\n') { + column = 1; + ++line; + } + ++column; + ++itr; + } + + debug_printf( "\nTGSI asm error: %s [%d : %d] \n", msg, line, column ); } /* Parse shader header. @@ -229,6 +248,7 @@ static boolean parse_header( struct translate_ctx *ctx ) if (ctx->tokens_cur >= ctx->tokens_end) return FALSE; *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); + ctx->processor = processor; return TRUE; } @@ -325,92 +345,36 @@ parse_opt_writemask( return TRUE; } -/* <register_file_bracket> ::= <file> `[' - */ static boolean -parse_register_file_bracket( - struct translate_ctx *ctx, - uint *file ) -{ - if (!parse_file( &ctx->cur, file )) { - report_error( ctx, "Unknown register file" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '[') { - report_error( ctx, "Expected `['" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} +parse_register_dst( struct translate_ctx *ctx, + uint *file, + int *index ); -/* <register_file_bracket_index> ::= <register_file_bracket> <uint> - */ -static boolean -parse_register_file_bracket_index( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - uint uindex; +struct parsed_src_bracket { + int index; - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - return TRUE; -} + uint ind_file; + int ind_index; + uint ind_comp; +}; -/* Parse destination register operand. - * <register_dst> ::= <register_file_bracket_index> `]' - */ -static boolean -parse_register_dst( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - if (!parse_register_file_bracket_index( ctx, file, index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} -/* Parse source register operand. - * <register_src> ::= <register_file_bracket_index> `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' | - * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]' - */ static boolean -parse_register_src( +parse_register_src_bracket( struct translate_ctx *ctx, - uint *file, - int *index, - uint *ind_file, - int *ind_index, - uint *ind_comp) + struct parsed_src_bracket *brackets) { const char *cur; uint uindex; - *ind_comp = TGSI_SWIZZLE_X; - if (!parse_register_file_bracket( ctx, file )) - return FALSE; + memset(brackets, 0, sizeof(struct parsed_src_bracket)); + eat_opt_white( &ctx->cur ); + cur = ctx->cur; - if (parse_file( &cur, ind_file )) { - if (!parse_register_dst( ctx, ind_file, ind_index )) + if (parse_file( &cur, &brackets->ind_file )) { + if (!parse_register_dst( ctx, &brackets->ind_file, + &brackets->ind_index )) return FALSE; eat_opt_white( &ctx->cur ); @@ -420,16 +384,16 @@ parse_register_src( switch (uprcase(*ctx->cur)) { case 'X': - *ind_comp = TGSI_SWIZZLE_X; + brackets->ind_comp = TGSI_SWIZZLE_X; break; case 'Y': - *ind_comp = TGSI_SWIZZLE_Y; + brackets->ind_comp = TGSI_SWIZZLE_Y; break; case 'Z': - *ind_comp = TGSI_SWIZZLE_Z; + brackets->ind_comp = TGSI_SWIZZLE_Z; break; case 'W': - *ind_comp = TGSI_SWIZZLE_W; + brackets->ind_comp = TGSI_SWIZZLE_W; break; default: report_error(ctx, "Expected indirect register swizzle component `x', `y', `z' or `w'"); @@ -450,12 +414,12 @@ parse_register_src( return FALSE; } if (negate) - *index = -(int) uindex; + brackets->index = -(int) uindex; else - *index = (int) uindex; + brackets->index = (int) uindex; } else { - *index = 0; + brackets->index = 0; } } else { @@ -463,9 +427,9 @@ parse_register_src( report_error( ctx, "Expected literal unsigned integer" ); return FALSE; } - *index = (int) uindex; - *ind_file = TGSI_FILE_NULL; - *ind_index = 0; + brackets->index = (int) uindex; + brackets->ind_file = TGSI_FILE_NULL; + brackets->ind_index = 0; } eat_opt_white( &ctx->cur ); if (*ctx->cur != ']') { @@ -476,20 +440,123 @@ parse_register_src( return TRUE; } -/* Parse register declaration. - * <register_dcl> ::= <register_file_bracket_index> `]' | - * <register_file_bracket_index> `..' <index> `]' +static boolean +parse_opt_register_src_bracket( + struct translate_ctx *ctx, + struct parsed_src_bracket *brackets, + int *parsed_brackets) +{ + const char *cur = ctx->cur; + + *parsed_brackets = 0; + + eat_opt_white( &cur ); + if (cur[0] == '[') { + ++cur; + ctx->cur = cur; + + if (!parse_register_src_bracket(ctx, brackets)) + return FALSE; + + *parsed_brackets = 1; + } + + return TRUE; +} + +/* <register_file_bracket> ::= <file> `[' */ static boolean -parse_register_dcl( +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* <register_file_bracket_index> ::= <register_file_bracket> <uint> + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse source register operand. + * <register_src> ::= <register_file_bracket_index> `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `+' <uint> `]' | + * <register_file_bracket> <register_dst> [`.' (`x' | `y' | `z' | `w')] `-' <uint> `]' + */ +static boolean +parse_register_src( struct translate_ctx *ctx, uint *file, - int *first, - int *last ) + struct parsed_src_bracket *brackets) +{ + + brackets->ind_comp = TGSI_SWIZZLE_X; + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + if (!parse_register_src_bracket( ctx, brackets )) + return FALSE; + + return TRUE; +} + +struct parsed_dcl_bracket { + uint first; + uint last; +}; + +static boolean +parse_register_dcl_bracket( + struct translate_ctx *ctx, + struct parsed_dcl_bracket *bracket) { - if (!parse_register_file_bracket_index( ctx, file, first )) + uint uindex; + memset(bracket, 0, sizeof(struct parsed_dcl_bracket)); + + eat_opt_white( &ctx->cur ); + + if (!parse_uint( &ctx->cur, &uindex )) { + /* it can be an empty bracket [] which means its range + * is from 0 to some implied size */ + if (ctx->cur[0] == ']' && ctx->implied_array_size != 0) { + bracket->first = 0; + bracket->last = ctx->implied_array_size - 1; + goto cleanup; + } + report_error( ctx, "Expected literal unsigned integer" ); return FALSE; + } + bracket->first = (int) uindex; + eat_opt_white( &ctx->cur ); + if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { uint uindex; @@ -499,12 +566,14 @@ parse_register_dcl( report_error( ctx, "Expected literal integer" ); return FALSE; } - *last = (int) uindex; + bracket->last = (int) uindex; eat_opt_white( &ctx->cur ); } else { - *last = *first; + bracket->last = bracket->first; } + +cleanup: if (*ctx->cur != ']') { report_error( ctx, "Expected `]' or `..'" ); return FALSE; @@ -513,6 +582,70 @@ parse_register_dcl( return TRUE; } +/* Parse register declaration. + * <register_dcl> ::= <register_file_bracket_index> `]' | + * <register_file_bracket_index> `..' <index> `]' + */ +static boolean +parse_register_dcl( + struct translate_ctx *ctx, + uint *file, + struct parsed_dcl_bracket *brackets, + int *num_brackets) +{ + const char *cur; + + *num_brackets = 0; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + if (!parse_register_dcl_bracket( ctx, &brackets[0] )) + return FALSE; + + *num_brackets = 1; + + cur = ctx->cur; + eat_opt_white( &cur ); + + if (cur[0] == '[') { + ++cur; + ctx->cur = cur; + if (!parse_register_dcl_bracket( ctx, &brackets[1] )) + return FALSE; + /* for geometry shader we don't really care about + * the first brackets it's always the size of the + * input primitive. so we want to declare just + * the index relevant to the semantics which is in + * the second bracket */ + if (ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + brackets[0] = brackets[1]; + } + *num_brackets = 2; + } + + return TRUE; +} + + +/* Parse destination register operand. + * <register_dst> ::= <register_file_bracket_index> `]' + */ +static boolean +parse_register_dst( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} static boolean parse_dst_operand( @@ -582,37 +715,44 @@ parse_src_operand( struct tgsi_full_src_register *src ) { uint file; - int index; - uint ind_file; - int ind_index; - uint ind_comp; uint swizzle[4]; boolean parsed_swizzle; + struct parsed_src_bracket bracket[2]; + int parsed_opt_brackets; if (*ctx->cur == '-') { ctx->cur++; eat_opt_white( &ctx->cur ); src->Register.Negate = 1; } - + if (*ctx->cur == '|') { ctx->cur++; eat_opt_white( &ctx->cur ); src->Register.Absolute = 1; } - if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp)) + if (!parse_register_src(ctx, &file, &bracket[0])) + return FALSE; + if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets)) return FALSE; + src->Register.File = file; - src->Register.Index = index; - if (ind_file != TGSI_FILE_NULL) { + src->Register.Index = bracket[0].index; + if (bracket[0].ind_file != TGSI_FILE_NULL) { src->Register.Indirect = 1; - src->Indirect.File = ind_file; - src->Indirect.Index = ind_index; - src->Indirect.SwizzleX = ind_comp; - src->Indirect.SwizzleY = ind_comp; - src->Indirect.SwizzleZ = ind_comp; - src->Indirect.SwizzleW = ind_comp; + src->Indirect.File = bracket[0].ind_file; + src->Indirect.Index = bracket[0].ind_index; + src->Indirect.SwizzleX = bracket[0].ind_comp; + src->Indirect.SwizzleY = bracket[0].ind_comp; + src->Indirect.SwizzleZ = bracket[0].ind_comp; + src->Indirect.SwizzleW = bracket[0].ind_comp; + } + if (parsed_opt_brackets) { + src->Register.Dimension = 1; + src->Dimension.Indirect = 0; + src->Dimension.Dimension = 0; + src->Dimension.Index = bracket[1].index; } /* Parse optional swizzle. @@ -791,7 +931,9 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] = "PSIZE", "GENERIC", "NORMAL", - "FACE" + "FACE", + "EDGEFLAG", + "PRIM_ID" }; static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = @@ -805,8 +947,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) { struct tgsi_full_declaration decl; uint file; - int first; - int last; + struct parsed_dcl_bracket brackets[2]; + int num_brackets; uint writemask; const char *cur; uint advance; @@ -818,7 +960,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - if (!parse_register_dcl( ctx, &file, &first, &last )) + if (!parse_register_dcl( ctx, &file, brackets, &num_brackets)) return FALSE; if (!parse_opt_writemask( ctx, &writemask )) return FALSE; @@ -826,8 +968,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl = tgsi_default_full_declaration(); decl.Declaration.File = file; decl.Declaration.UsageMask = writemask; - decl.Range.First = first; - decl.Range.Last = last; + decl.Range.First = brackets[0].first; + decl.Range.Last = brackets[0].last; cur = ctx->cur; eat_opt_white( &cur ); @@ -1027,7 +1169,7 @@ static boolean parse_property( struct translate_ctx *ctx ) } for (property_name = 0; property_name < TGSI_PROPERTY_COUNT; ++property_name) { - if (streq_nocase_uprcase(id, property_names[property_name])) { + if (streq_nocase_uprcase(property_names[property_name], id)) { break; } } @@ -1044,6 +1186,10 @@ static boolean parse_property( struct translate_ctx *ctx ) report_error( ctx, "Unknown primitive name as property!" ); return FALSE; } + if (property_name == TGSI_PROPERTY_GS_INPUT_PRIM && + ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + ctx->implied_array_size = u_vertices_per_prim(values[0]); + } break; default: if (!parse_uint(&ctx->cur, &values[0] )) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 2713372b059..2b51672b8ea 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -109,8 +109,13 @@ struct ureg_program unsigned nr_outputs; struct { - float v[4]; + union { + float f[4]; + unsigned u[4]; + int i[4]; + } value; unsigned nr; + unsigned type; } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; @@ -513,22 +518,22 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, } - - -static int match_or_expand_immediate( const float *v, - unsigned nr, - float *v2, - unsigned *nr2, - unsigned *swizzle ) +static int +match_or_expand_immediate( const unsigned *v, + unsigned nr, + unsigned *v2, + unsigned *pnr2, + unsigned *swizzle ) { + unsigned nr2 = *pnr2; unsigned i, j; - + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; - for (j = 0; j < *nr2 && !found; j++) { + for (j = 0; j < nr2 && !found; j++) { if (v[i] == v2[j]) { *swizzle |= j << (i * 2); found = TRUE; @@ -536,24 +541,28 @@ static int match_or_expand_immediate( const float *v, } if (!found) { - if (*nr2 >= 4) + if (nr2 >= 4) { return FALSE; + } - v2[*nr2] = v[i]; - *swizzle |= *nr2 << (i * 2); - (*nr2)++; + v2[nr2] = v[i]; + *swizzle |= nr2 << (i * 2); + nr2++; } } + /* Actually expand immediate only when fully succeeded. + */ + *pnr2 = nr2; return TRUE; } - - -struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, - const float *v, - unsigned nr ) +static struct ureg_src +decl_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned nr, + unsigned type ) { unsigned i, j; unsigned swizzle; @@ -563,38 +572,82 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, */ for (i = 0; i < ureg->nr_immediates; i++) { - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + if (ureg->immediate[i].type != type) { + continue; + } + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { i = ureg->nr_immediates++; - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + ureg->immediate[i].type = type; + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } - set_bad( ureg ); + set_bad(ureg); out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - for (j = nr; j < 4; j++) + for (j = nr; j < 4; j++) { swizzle |= (swizzle & 0x3) << (j * 2); + } + + return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), + (swizzle >> 0) & 0x3, + (swizzle >> 2) & 0x3, + (swizzle >> 4) & 0x3, + (swizzle >> 6) & 0x3); +} + + +struct ureg_src +ureg_DECL_immediate( struct ureg_program *ureg, + const float *v, + unsigned nr ) +{ + union { + float f[4]; + unsigned u[4]; + } fu; + unsigned int i; + + for (i = 0; i < nr; i++) { + fu.f[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); +} + - return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), - (swizzle >> 0) & 0x3, - (swizzle >> 2) & 0x3, - (swizzle >> 4) & 0x3, - (swizzle >> 6) & 0x3); +struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); +} + + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *ureg, + const int *v, + unsigned nr ) +{ + return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } @@ -818,8 +871,8 @@ ureg_insn(struct ureg_program *ureg, unsigned i; boolean saturate; boolean predicate; - boolean negate; - unsigned swizzle[4]; + boolean negate = FALSE; + unsigned swizzle[4] = { 0 }; saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; @@ -865,8 +918,8 @@ ureg_tex_insn(struct ureg_program *ureg, unsigned i; boolean saturate; boolean predicate; - boolean negate; - unsigned swizzle[4]; + boolean negate = FALSE; + unsigned swizzle[4] = { 0 }; saturate = nr_dst ? dst[0].Saturate : FALSE; predicate = nr_dst ? dst[0].Predicate : FALSE; @@ -982,21 +1035,23 @@ static void emit_decl_range( struct ureg_program *ureg, out[1].decl_range.Last = first + count - 1; } -static void emit_immediate( struct ureg_program *ureg, - const float *v ) +static void +emit_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned type ) { union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); out[0].value = 0; out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; out[0].imm.NrTokens = 5; - out[0].imm.DataType = TGSI_IMM_FLOAT32; + out[0].imm.DataType = type; out[0].imm.Padding = 0; - out[1].imm_data.Float = v[0]; - out[2].imm_data.Float = v[1]; - out[3].imm_data.Float = v[2]; - out[4].imm_data.Float = v[3]; + out[1].imm_data.Uint = v[0]; + out[2].imm_data.Uint = v[1]; + out[3].imm_data.Uint = v[2]; + out[4].imm_data.Uint = v[3]; } @@ -1091,7 +1146,8 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, - ureg->immediate[i].v ); + ureg->immediate[i].value.u, + ureg->immediate[i].type ); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 179862d4e27..38e2fd8d0a9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -154,6 +154,16 @@ ureg_DECL_immediate( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *, + const int *v, + unsigned nr ); + +struct ureg_src ureg_DECL_constant( struct ureg_program *, unsigned index ); @@ -227,6 +237,90 @@ ureg_imm1f( struct ureg_program *ureg, return ureg_DECL_immediate( ureg, v, 1 ); } +static INLINE struct ureg_src +ureg_imm4u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c, unsigned d) +{ + unsigned v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_uint( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c) +{ + unsigned v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_uint( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2u( struct ureg_program *ureg, + unsigned a, unsigned b) +{ + unsigned v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_uint( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1u( struct ureg_program *ureg, + unsigned a) +{ + return ureg_DECL_immediate_uint( ureg, &a, 1 ); +} + +static INLINE struct ureg_src +ureg_imm4i( struct ureg_program *ureg, + int a, int b, + int c, int d) +{ + int v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_int( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3i( struct ureg_program *ureg, + int a, int b, + int c) +{ + int v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_int( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2i( struct ureg_program *ureg, + int a, int b) +{ + int v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_int( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1i( struct ureg_program *ureg, + int a) +{ + return ureg_DECL_immediate_int( ureg, &a, 1 ); +} + /*********************************************************************** * Functions for patching up labels */ |