/************************************************************************** * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ /** * TGSI interpreter/executor. * * Flow control information: * * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special * care since a condition may be true for some quad components but false * for other components. * * We basically execute all statements (even if they're in the part of * an IF/ELSE clause that's "not taken") and use a special mask to * control writing to destination registers. This is the ExecMask. * See store_dest(). * * The ExecMask is computed from three other masks (CondMask, LoopMask and * ContMask) which are controlled by the flow control instructions (namely: * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). * * * Authors: * Michal Krol * Brian Paul */ #include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" #include "util/u_memory.h" #include "util/u_math.h" #define FAST_MATH 1 #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 #define CHAN_X 0 #define CHAN_Y 1 #define CHAN_Z 2 #define CHAN_W 3 /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C #define TEMP_128_I TGSI_EXEC_TEMP_128_I #define TEMP_128_C TGSI_EXEC_TEMP_128_C #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C #define TEMP_R0 TGSI_EXEC_TEMP_R0 #define IS_CHANNEL_ENABLED(INST, CHAN)\ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ if (IS_CHANNEL_ENABLED( INST, CHAN )) #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ if (IS_CHANNEL_ENABLED2( INST, CHAN )) /** The execution mask depends on the conditional mask and the loop mask */ #define UPDATE_EXEC_MASK(MACH) \ MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask /** * Initialize machine state by expanding tokens to full instructions, * allocating temporary storage, setting up constants, etc. * After this, we can call tgsi_exec_machine_run() many times. */ void tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, uint numSamplers, struct tgsi_sampler **samplers) { uint k; struct tgsi_parse_context parse; struct tgsi_exec_labels *labels = &mach->Labels; struct tgsi_full_instruction *instructions; struct tgsi_full_declaration *declarations; uint maxInstructions = 10, numInstructions = 0; uint maxDeclarations = 10, numDeclarations = 0; uint instno = 0; #if 0 tgsi_dump(tokens, 0); #endif util_init_math(); mach->Tokens = tokens; mach->Samplers = samplers; k = tgsi_parse_init (&parse, mach->Tokens); if (k != TGSI_PARSE_OK) { debug_printf( "Problem parsing!\n" ); return; } mach->Processor = parse.FullHeader.Processor.Processor; mach->ImmLimit = 0; labels->count = 0; declarations = (struct tgsi_full_declaration *) MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); if (!declarations) { return; } instructions = (struct tgsi_full_instruction *) MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); if (!instructions) { FREE( declarations ); return; } while( !tgsi_parse_end_of_tokens( &parse ) ) { uint pointer = parse.Position; uint i; tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: /* save expanded declaration */ if (numDeclarations == maxDeclarations) { declarations = REALLOC(declarations, maxDeclarations * sizeof(struct tgsi_full_declaration), (maxDeclarations + 10) * sizeof(struct tgsi_full_declaration)); maxDeclarations += 10; } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); numDeclarations++; break; case TGSI_TOKEN_TYPE_IMMEDIATE: { uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; assert( size % 4 == 0 ); assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); for( i = 0; i < size; i++ ) { mach->Imms[mach->ImmLimit + i / 4][i % 4] = parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; } mach->ImmLimit += size / 4; } break; case TGSI_TOKEN_TYPE_INSTRUCTION: assert( labels->count < MAX_LABELS ); labels->labels[labels->count][0] = instno; labels->labels[labels->count][1] = pointer; labels->count++; /* save expanded instruction */ if (numInstructions == maxInstructions) { instructions = REALLOC(instructions, maxInstructions * sizeof(struct tgsi_full_instruction), (maxInstructions + 10) * sizeof(struct tgsi_full_instruction)); maxInstructions += 10; } memcpy(instructions + numInstructions, &parse.FullToken.FullInstruction, sizeof(instructions[0])); numInstructions++; break; default: assert( 0 ); } } tgsi_parse_free (&parse); if (mach->Declarations) { FREE( mach->Declarations ); } mach->Declarations = declarations; mach->NumDeclarations = numDeclarations; if (mach->Instructions) { FREE( mach->Instructions ); } mach->Instructions = instructions; mach->NumInstructions = numInstructions; } void tgsi_exec_machine_init( struct tgsi_exec_machine *mach ) { uint i; mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; /* Setup constants. */ for( i = 0; i < 4; i++ ) { mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; } } void tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) { if (mach->Instructions) { FREE(mach->Instructions); mach->Instructions = NULL; mach->NumInstructions = 0; } if (mach->Declarations) { FREE(mach->Declarations); mach->Declarations = NULL; mach->NumDeclarations = 0; } } static void micro_abs( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = fabsf( src->f[0] ); dst->f[1] = fabsf( src->f[1] ); dst->f[2] = fabsf( src->f[2] ); dst->f[3] = fabsf( src->f[3] ); } static void micro_add( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->f[0] = src0->f[0] + src1->f[0]; dst->f[1] = src0->f[1] + src1->f[1]; dst->f[2] = src0->f[2] + src1->f[2]; dst->f[3] = src0->f[3] + src1->f[3]; } #if 0 static void micro_iadd( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->i[0] = src0->i[0] + src1->i[0]; dst->i[1] = src0->i[1] + src1->i[1]; dst->i[2] = src0->i[2] + src1->i[2]; dst->i[3] = src0->i[3] + src1->i[3]; } #endif static void micro_and( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->u[0] = src0->u[0] & src1->u[0]; dst->u[1] = src0->u[1] & src1->u[1]; dst->u[2] = src0->u[2] & src1->u[2]; dst->u[3] = src0->u[3] & src1->u[3]; } static void micro_ceil( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = ceilf( src->f[0] ); dst->f[1] = ceilf( src->f[1] ); dst->f[2] = ceilf( src->f[2] ); dst->f[3] = ceilf( src->f[3] ); } static void micro_cos( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = cosf( src->f[0] ); dst->f[1] = cosf( src->f[1] ); dst->f[2] = cosf( src->f[2] ); dst->f[3] = cosf( src->f[3] ); } static void micro_ddx( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = dst->f[1] = dst->f[2] = dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; } static void micro_ddy( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = dst->f[1] = dst->f[2] = dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; } static void micro_div( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { if (src1->f[0] != 0) { dst->f[0] = src0->f[0] / src1->f[0]; } if (src1->f[1] != 0) { dst->f[1] = src0->f[1] / src1->f[1]; } if (src1->f[2] != 0) { dst->f[2] = src0->f[2] / src1->f[2]; } if (src1->f[3] != 0) { dst->f[3] = src0->f[3] / src1->f[3]; } } #if 0 static void micro_udiv( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->u[0] = src0->u[0] / src1->u[0]; dst->u[1] = src0->u[1] / src1->u[1]; dst->u[2] = src0->u[2] / src1->u[2]; dst->u[3] = src0->u[3] / src1->u[3]; } #endif static void micro_eq( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, const union tgsi_exec_channel *src2, const union tgsi_exec_channel *src3 ) { dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; } #if 0 static void micro_ieq( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, const union tgsi_exec_channel *src2, const union tgsi_exec_channel *src3 ) { dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; } #endif static void micro_exp2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { #if FAST_MATH dst->f[0] = util_fast_exp2( src->f[0] ); dst->f[1] = util_fast_exp2( src->f[1] ); dst->f[2] = util_fast_exp2( src->f[2] ); dst->f[3] = util_fast_exp2( src->f[3] ); #else dst->f[0] = powf( 2.0f, src->f[0] ); dst->f[1] = powf( 2.0f, src->f[1] ); dst->f[2] = powf( 2.0f, src->f[2] ); dst->f[3] = powf( 2.0f, src->f[3] ); #endif } #if 0 static void micro_f2ut( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->u[0] = (uint) src->f[0]; dst->u[1] = (uint) src->f[1]; dst->u[2] = (uint) src->f[2]; dst->u[3] = (uint) src->f[3]; } #endif static void micro_flr( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = floorf( src->f[0] ); dst->f[1] = floorf( src->f[1] ); dst->f[2] = floorf( src->f[2] ); dst->f[3] = floorf( src->f[3] ); } static void micro_frc( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = src->f[0] - floorf( src->f[0] ); dst->f[1] = src->f[1] - floorf( src->f[1] ); dst->f[2] = src->f[2] - floorf( src->f[2] ); dst->f[3] = src->f[3] - floorf( src->f[3] ); } static void micro_i2f( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = (float) src->i[0]; dst->f[1] = (float) src->i[1]; dst->f[2] = (float) src->i[2]; dst->f[3] = (float) src->i[3]; } static void micro_lg2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { #if FAST_MATH dst->f[0] = util_fast_log2( src->f[0] ); dst->f[1] = util_fast_log2( src->f[1] ); dst->f[2] = util_fast_log2( src->f[2] ); dst->f[3] = util_fast_log2( src->f[3] ); #else dst->f[0] = logf( src->f[0] ) * 1.442695f; dst->f[1] = logf( src->f[1] ) * 1.442695f; dst->f[2] = logf( src->f[2] ) * 1.442695f; dst->f[3] = logf( src->f[3] ) * 1.442695f; #endif } static void micro_le( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, const union tgsi_exec_channel *src2, const union tgsi_exec_channel *src3 ) { dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; } static void micro_lt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, const union tgsi_exec_channel *src2, const union tgsi_exec_channel *src3 ) { dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } #if 0 static void micro_ilt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, const union tgsi_exec_channel *src2, const union tgsi_exec_channel *src3 ) { dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; } #endif #if 0 static void micro_ult( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, const union tgsi_exec_channel *src2, const union tgsi_exec_channel *src3 ) { dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; } #endif static void micro_max( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } #if 0 static void micro_imax( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; } #endif #if 0 static void micro_umax( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; } #endif static void micro_min( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } #if 0 static void micro_imin( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; } #endif #if 0 static void micro_umin( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; } #endif #if 0 static void micro_umod( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->u[0] = src0->u[0] % src1->u[0]; dst->u[1] = src0->u[1] % src1->u[1]; dst->u[2] = src0->u[2] % src1->u[2]; dst->u[3] = src0->u[3] % src1->u[3]; } #endif static void micro_mul( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->f[0] = src0->f[0] * src1->f[0]; dst->f[1] = src0->f[1] * src1->f[1]; dst->f[2] = src0->f[2] * src1->f[2]; dst->f[3] = src0->f[3] * src1->f[3]; } #if 0 static void micro_imul( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->i[0] = src0->i[0] * src1->i[0]; dst->i[1] = src0->i[1] * src1->i[1]; dst->i[2] = src0->i[2] * src1->i[2]; dst->i[3] = src0->i[3] * src1->i[3]; } #endif #if 0 static void micro_imul64( union tgsi_exec_channel *dst0, union tgsi_exec_channel *dst1, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst1->i[0] = src0->i[0] * src1->i[0]; dst1->i[1] = src0->i[1] * src1->i[1]; dst1->i[2] = src0->i[2] * src1->i[2]; dst1->i[3] = src0->i[3] * src1->i[3]; dst0->i[0] = 0; dst0->i[1] = 0; dst0->i[2] = 0; dst0->i[3] = 0; } #endif #if 0 static void micro_umul64( union tgsi_exec_channel *dst0, union tgsi_exec_channel *dst1, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst1->u[0] = src0->u[0] * src1->u[0]; dst1->u[1] = src0->u[1] * src1->u[1]; dst1->u[2] = src0->u[2] * src1->u[2]; dst1->u[3] = src0->u[3] * src1->u[3]; dst0->u[0] = 0; dst0->u[1] = 0; dst0->u[2] = 0; dst0->u[3] = 0; } #endif #if 0 static void micro_movc( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, const union tgsi_exec_channel *src2 ) { dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; } #endif static void micro_neg( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = -src->f[0]; dst->f[1] = -src->f[1]; dst->f[2] = -src->f[2]; dst->f[3] = -src->f[3]; } #if 0 static void micro_ineg( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->i[0] = -src->i[0]; dst->i[1] = -src->i[1]; dst->i[2] = -src->i[2]; dst->i[3] = -src->i[3]; } #endif static void micro_not( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->u[0] = ~src->u[0]; dst->u[1] = ~src->u[1]; dst->u[2] = ~src->u[2]; dst->u[3] = ~src->u[3]; } static void micro_or( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->u[0] = src0->u[0] | src1->u[0]; dst->u[1] = src0->u[1] | src1->u[1]; dst->u[2] = src0->u[2] | src1->u[2]; dst->u[3] = src0->u[3] | src1->u[3]; } static void micro_pow( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { #if FAST_MATH dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); #else dst->f[0] = powf( src0->f[0], src1->f[0] ); dst->f[1] = powf( src0->f[1], src1->f[1] ); dst->f[2] = powf( src0->f[2], src1->f[2] ); dst->f[3] = powf( src0->f[3], src1->f[3] ); #endif } static void micro_rnd( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = floorf( src->f[0] + 0.5f ); dst->f[1] = floorf( src->f[1] + 0.5f ); dst->f[2] = floorf( src->f[2] + 0.5f ); dst->f[3] = floorf( src->f[3] + 0.5f ); } static void micro_sgn( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; } static void micro_shl( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->i[0] = src0->i[0] << src1->i[0]; dst->i[1] = src0->i[1] << src1->i[1]; dst->i[2] = src0->i[2] << src1->i[2]; dst->i[3] = src0->i[3] << src1->i[3]; } static void micro_ishr( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->i[0] = src0->i[0] >> src1->i[0]; dst->i[1] = src0->i[1] >> src1->i[1]; dst->i[2] = src0->i[2] >> src1->i[2]; dst->i[3] = src0->i[3] >> src1->i[3]; } static void micro_trunc( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0 ) { dst->f[0] = (float) (int) src0->f[0]; dst->f[1] = (float) (int) src0->f[1]; dst->f[2] = (float) (int) src0->f[2]; dst->f[3] = (float) (int) src0->f[3]; } #if 0 static void micro_ushr( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->u[0] = src0->u[0] >> src1->u[0]; dst->u[1] = src0->u[1] >> src1->u[1]; dst->u[2] = src0->u[2] >> src1->u[2]; dst->u[3] = src0->u[3] >> src1->u[3]; } #endif static void micro_sin( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = sinf( src->f[0] ); dst->f[1] = sinf( src->f[1] ); dst->f[2] = sinf( src->f[2] ); dst->f[3] = sinf( src->f[3] ); } static void micro_sqrt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = sqrtf( src->f[0] ); dst->f[1] = sqrtf( src->f[1] ); dst->f[2] = sqrtf( src->f[2] ); dst->f[3] = sqrtf( src->f[3] ); } static void micro_sub( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->f[0] = src0->f[0] - src1->f[0]; dst->f[1] = src0->f[1] - src1->f[1]; dst->f[2] = src0->f[2] - src1->f[2]; dst->f[3] = src0->f[3] - src1->f[3]; } #if 0 static void micro_u2f( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { dst->f[0] = (float) src->u[0]; dst->f[1] = (float) src->u[1]; dst->f[2] = (float) src->u[2]; dst->f[3] = (float) src->u[3]; } #endif static void micro_xor( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { dst->u[0] = src0->u[0] ^ src1->u[0]; dst->u[1] = src0->u[1] ^ src1->u[1]; dst->u[2] = src0->u[2] ^ src1->u[2]; dst->u[3] = src0->u[3] ^ src1->u[3]; } static void fetch_src_file_channel( const struct tgsi_exec_machine *mach, const uint file, const uint swizzle, const union tgsi_exec_channel *index, union tgsi_exec_channel *chan ) { switch( swizzle ) { case TGSI_EXTSWIZZLE_X: case TGSI_EXTSWIZZLE_Y: case TGSI_EXTSWIZZLE_Z: case TGSI_EXTSWIZZLE_W: switch( file ) { case TGSI_FILE_CONSTANT: assert(mach->Consts); if (index->i[0] < 0) chan->f[0] = 0.0f; else chan->f[0] = mach->Consts[index->i[0]][swizzle]; if (index->i[1] < 0) chan->f[1] = 0.0f; else chan->f[1] = mach->Consts[index->i[1]][swizzle]; if (index->i[2] < 0) chan->f[2] = 0.0f; else chan->f[2] = mach->Consts[index->i[2]][swizzle]; if (index->i[3] < 0) chan->f[3] = 0.0f; else chan->f[3] = mach->Consts[index->i[3]][swizzle]; break; case TGSI_FILE_INPUT: chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; break; case TGSI_FILE_TEMPORARY: assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; break; case TGSI_FILE_IMMEDIATE: assert( index->i[0] < (int) mach->ImmLimit ); chan->f[0] = mach->Imms[index->i[0]][swizzle]; assert( index->i[1] < (int) mach->ImmLimit ); chan->f[1] = mach->Imms[index->i[1]][swizzle]; assert( index->i[2] < (int) mach->ImmLimit ); chan->f[2] = mach->Imms[index->i[2]][swizzle]; assert( index->i[3] < (int) mach->ImmLimit ); chan->f[3] = mach->Imms[index->i[3]][swizzle]; break; case TGSI_FILE_ADDRESS: chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; break; case TGSI_FILE_OUTPUT: /* vertex/fragment output vars can be read too */ chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; break; default: assert( 0 ); } break; case TGSI_EXTSWIZZLE_ZERO: *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; break; case TGSI_EXTSWIZZLE_ONE: *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; break; default: assert( 0 ); } } static void fetch_source( const struct tgsi_exec_machine *mach, union tgsi_exec_channel *chan, const struct tgsi_full_src_register *reg, const uint chan_index ) { union tgsi_exec_channel index; uint swizzle; /* We start with a direct index into a register file. * * file[1], * where: * file = SrcRegister.File * [1] = SrcRegister.Index */ index.i[0] = index.i[1] = index.i[2] = index.i[3] = reg->SrcRegister.Index; /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset * that is being added to the indirect register. * * file[ind[2].x+1], * where: * ind = SrcRegisterInd.File * [2] = SrcRegisterInd.Index * .x = SrcRegisterInd.SwizzleX */ if (reg->SrcRegister.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; uint i; /* which address register (always zero now) */ index2.i[0] = index2.i[1] = index2.i[2] = index2.i[3] = reg->SrcRegisterInd.Index; /* get current value of address register[swizzle] */ swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); fetch_src_file_channel( mach, reg->SrcRegisterInd.File, swizzle, &index2, &indir_index ); /* add value of address register to the offset */ index.i[0] += (int) indir_index.f[0]; index.i[1] += (int) indir_index.f[1]; index.i[2] += (int) indir_index.f[2]; index.i[3] += (int) indir_index.f[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. */ for (i = 0; i < QUAD_SIZE; i++) { if ((execmask & (1 << i)) == 0) index.i[i] = 0; } } /* There is an extra source register that is a second * subscript to a register file. Effectively it means that * the register file is actually a 2D array of registers. * * file[1][3] == file[1*sizeof(file[1])+3], * where: * [3] = SrcRegisterDim.Index */ if (reg->SrcRegister.Dimension) { /* The size of the first-order array depends on the register file type. * We need to multiply the index to the first array to get an effective, * "flat" index that points to the beginning of the second-order array. */ switch (reg->SrcRegister.File) { case TGSI_FILE_INPUT: index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; break; case TGSI_FILE_CONSTANT: index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; break; default: assert( 0 ); } index.i[0] += reg->SrcRegisterDim.Index; index.i[1] += reg->SrcRegisterDim.Index; index.i[2] += reg->SrcRegisterDim.Index; index.i[3] += reg->SrcRegisterDim.Index; /* Again, the second subscript index can be addressed indirectly * identically to the first one. * Nothing stops us from indirectly addressing the indirect register, * but there is no need for that, so we won't exercise it. * * file[1][ind[4].y+3], * where: * ind = SrcRegisterDimInd.File * [4] = SrcRegisterDimInd.Index * .y = SrcRegisterDimInd.SwizzleX */ if (reg->SrcRegisterDim.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; uint i; index2.i[0] = index2.i[1] = index2.i[2] = index2.i[3] = reg->SrcRegisterDimInd.Index; swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); fetch_src_file_channel( mach, reg->SrcRegisterDimInd.File, swizzle, &index2, &indir_index ); index.i[0] += (int) indir_index.f[0]; index.i[1] += (int) indir_index.f[1]; index.i[2] += (int) indir_index.f[2]; index.i[3] += (int) indir_index.f[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. */ for (i = 0; i < QUAD_SIZE; i++) { if ((execmask & (1 << i)) == 0) index.i[i] = 0; } } /* If by any chance there was a need for a 3D array of register * files, we would have to check whether SrcRegisterDim is followed * by a dimension register and continue the saga. */ } swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); fetch_src_file_channel( mach, reg->SrcRegister.File, swizzle, &index, chan ); switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { case TGSI_UTIL_SIGN_CLEAR: micro_abs( chan, chan ); break; case TGSI_UTIL_SIGN_SET: micro_abs( chan, chan ); micro_neg( chan, chan ); break; case TGSI_UTIL_SIGN_TOGGLE: micro_neg( chan, chan ); break; case TGSI_UTIL_SIGN_KEEP: break; } if (reg->SrcRegisterExtMod.Complement) { micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); } } static void store_dest( struct tgsi_exec_machine *mach, const union tgsi_exec_channel *chan, const struct tgsi_full_dst_register *reg, const struct tgsi_full_instruction *inst, uint chan_index ) { uint i; union tgsi_exec_channel null; union tgsi_exec_channel *dst; uint execmask = mach->ExecMask; switch (reg->DstRegister.File) { case TGSI_FILE_NULL: dst = &null; break; case TGSI_FILE_OUTPUT: dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + reg->DstRegister.Index].xyzw[chan_index]; break; case TGSI_FILE_TEMPORARY: assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; break; case TGSI_FILE_ADDRESS: dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; break; default: assert( 0 ); return; } if (inst->InstructionExtNv.CondFlowEnable) { union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; uint swizzle; uint shift; uint mask; uint test; /* Only CC0 supported. */ assert( inst->InstructionExtNv.CondFlowIndex < 1 ); switch (chan_index) { case CHAN_X: swizzle = inst->InstructionExtNv.CondSwizzleX; break; case CHAN_Y: swizzle = inst->InstructionExtNv.CondSwizzleY; break; case CHAN_Z: swizzle = inst->InstructionExtNv.CondSwizzleZ; break; case CHAN_W: swizzle = inst->InstructionExtNv.CondSwizzleW; break; default: assert( 0 ); return; } switch (swizzle) { case TGSI_SWIZZLE_X: shift = TGSI_EXEC_CC_X_SHIFT; mask = TGSI_EXEC_CC_X_MASK; break; case TGSI_SWIZZLE_Y: shift = TGSI_EXEC_CC_Y_SHIFT; mask = TGSI_EXEC_CC_Y_MASK; break; case TGSI_SWIZZLE_Z: shift = TGSI_EXEC_CC_Z_SHIFT; mask = TGSI_EXEC_CC_Z_MASK; break; case TGSI_SWIZZLE_W: shift = TGSI_EXEC_CC_W_SHIFT; mask = TGSI_EXEC_CC_W_MASK; break; default: assert( 0 ); return; } switch (inst->InstructionExtNv.CondMask) { case TGSI_CC_GT: test = ~(TGSI_EXEC_CC_GT << shift) & mask; for (i = 0; i < QUAD_SIZE; i++) if (cc->u[i] & test) execmask &= ~(1 << i); break; case TGSI_CC_EQ: test = ~(TGSI_EXEC_CC_EQ << shift) & mask; for (i = 0; i < QUAD_SIZE; i++) if (cc->u[i] & test) execmask &= ~(1 << i); break; case TGSI_CC_LT: test = ~(TGSI_EXEC_CC_LT << shift) & mask; for (i = 0; i < QUAD_SIZE; i++) if (cc->u[i] & test) execmask &= ~(1 << i); break; case TGSI_CC_GE: test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; for (i = 0; i < QUAD_SIZE; i++) if (cc->u[i] & test) execmask &= ~(1 << i); break; case TGSI_CC_LE: test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; for (i = 0; i < QUAD_SIZE; i++) if (cc->u[i] & test) execmask &= ~(1 << i); break; case TGSI_CC_NE: test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; for (i = 0; i < QUAD_SIZE; i++) if (cc->u[i] & test) execmask &= ~(1 << i); break; case TGSI_CC_TR: break; case TGSI_CC_FL: for (i = 0; i < QUAD_SIZE; i++) execmask &= ~(1 << i); break; default: assert( 0 ); return; } } switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: for (i = 0; i < QUAD_SIZE; i++) if (execmask & (1 << i)) dst->i[i] = chan->i[i]; break; case TGSI_SAT_ZERO_ONE: for (i = 0; i < QUAD_SIZE; i++) if (execmask & (1 << i)) { if (chan->f[i] < 0.0f) dst->f[i] = 0.0f; else if (chan->f[i] > 1.0f) dst->f[i] = 1.0f; else dst->i[i] = chan->i[i]; } break; case TGSI_SAT_MINUS_PLUS_ONE: for (i = 0; i < QUAD_SIZE; i++) if (execmask & (1 << i)) { if (chan->f[i] < -1.0f) dst->f[i] = -1.0f; else if (chan->f[i] > 1.0f) dst->f[i] = 1.0f; else dst->i[i] = chan->i[i]; } break; default: assert( 0 ); } if (inst->InstructionExtNv.CondDstUpdate) { union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; uint shift; uint mask; /* Only CC0 supported. */ assert( inst->InstructionExtNv.CondDstIndex < 1 ); switch (chan_index) { case CHAN_X: shift = TGSI_EXEC_CC_X_SHIFT; mask = ~TGSI_EXEC_CC_X_MASK; break; case CHAN_Y: shift = TGSI_EXEC_CC_Y_SHIFT; mask = ~TGSI_EXEC_CC_Y_MASK; break; case CHAN_Z: shift = TGSI_EXEC_CC_Z_SHIFT; mask = ~TGSI_EXEC_CC_Z_MASK; break; case CHAN_W: shift = TGSI_EXEC_CC_W_SHIFT; mask = ~TGSI_EXEC_CC_W_MASK; break; default: assert( 0 ); return; } for (i = 0; i < QUAD_SIZE; i++) if (execmask & (1 << i)) { cc->u[i] &= mask; if (dst->f[i] < 0.0f) cc->u[i] |= TGSI_EXEC_CC_LT << shift; else if (dst->f[i] > 0.0f) cc->u[i] |= TGSI_EXEC_CC_GT << shift; else if (dst->f[i] == 0.0f) cc->u[i] |= TGSI_EXEC_CC_EQ << shift; else cc->u[i] |= TGSI_EXEC_CC_UN << shift; } } } #define FETCH(VAL,INDEX,CHAN)\ fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) #define STORE(VAL,INDEX,CHAN)\ store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) /** * Execute ARB-style KIL which is predicated by a src register. * Kill fragment if any of the four values is less than zero. */ static void exec_kil(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { uint uniquemask; uint chan_index; uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ union tgsi_exec_channel r[1]; /* This mask stores component bits that were already tested. Note that * we test if the value is less than zero, so 1.0 and 0.0 need not to be * tested. */ uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); for (chan_index = 0; chan_index < 4; chan_index++) { uint swizzle; uint i; /* unswizzle channel */ swizzle = tgsi_util_get_full_src_register_extswizzle ( &inst->FullSrcRegisters[0], chan_index); /* check if the component has not been already tested */ if (uniquemask & (1 << swizzle)) continue; uniquemask |= 1 << swizzle; FETCH(&r[0], 0, chan_index); for (i = 0; i < 4; i++) if (r[0].f[i] < 0.0f) kilmask |= 1 << i; } mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } /** * Execute NVIDIA-style KIL which is predicated by a condition code. * Kill fragment if the condition code is TRUE. */ static void exec_kilp(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ if (inst->InstructionExtNv.CondFlowEnable) { uint swizzle[4]; uint chan_index; kilmask = 0x0; swizzle[0] = inst->InstructionExtNv.CondSwizzleX; swizzle[1] = inst->InstructionExtNv.CondSwizzleY; swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; swizzle[3] = inst->InstructionExtNv.CondSwizzleW; for (chan_index = 0; chan_index < 4; chan_index++) { uint i; for (i = 0; i < 4; i++) { /* TODO: evaluate the condition code */ if (0) kilmask |= 1 << i; } } } else { /* "unconditional" kil */ kilmask = mach->ExecMask; } mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } /* * Fetch a four texture samples using STR texture coordinates. */ static void fetch_texel( struct tgsi_sampler *sampler, const union tgsi_exec_channel *s, const union tgsi_exec_channel *t, const union tgsi_exec_channel *p, float lodbias, /* XXX should be float[4] */ union tgsi_exec_channel *r, union tgsi_exec_channel *g, union tgsi_exec_channel *b, union tgsi_exec_channel *a ) { uint j; float rgba[NUM_CHANNELS][QUAD_SIZE]; sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); for (j = 0; j < 4; j++) { r->f[j] = rgba[0][j]; g->f[j] = rgba[1][j]; b->f[j] = rgba[2][j]; a->f[j] = rgba[3][j]; } } static void exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, boolean biasLod, boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; union tgsi_exec_channel r[4]; uint chan_index; float lodBias; /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ switch (inst->InstructionExtTexture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: FETCH(&r[0], 0, CHAN_X); if (projected) { FETCH(&r[1], 0, CHAN_W); micro_div( &r[0], &r[0], &r[1] ); } if (biasLod) { FETCH(&r[1], 0, CHAN_W); lodBias = r[2].f[0]; } else lodBias = 0.0; fetch_texel(mach->Samplers[unit], &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); } if (biasLod) { FETCH(&r[3], 0, CHAN_W); lodBias = r[3].f[0]; } else lodBias = 0.0; fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, /* inputs */ &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); } if (biasLod) { FETCH(&r[3], 0, CHAN_W); lodBias = r[3].f[0]; } else lodBias = 0.0; fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, &r[0], &r[1], &r[2], &r[3]); break; default: assert (0); } FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[chan_index], 0, chan_index ); } } /** * Evaluate a constant-valued coefficient at the position of the * current quad. */ static void eval_constant_coef( struct tgsi_exec_machine *mach, unsigned attrib, unsigned chan ) { unsigned i; for( i = 0; i < QUAD_SIZE; i++ ) { mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; } } /** * Evaluate a linear-valued coefficient at the position of the * current quad. */ static void eval_linear_coef( struct tgsi_exec_machine *mach, unsigned attrib, unsigned chan ) { const float x = mach->QuadPos.xyzw[0].f[0]; const float y = mach->QuadPos.xyzw[1].f[0]; const float dadx = mach->InterpCoefs[attrib].dadx[chan]; const float dady = mach->InterpCoefs[attrib].dady[chan]; const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; mach->Inputs[attrib].xyzw[chan].f[0] = a0; mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; } /** * Evaluate a perspective-valued coefficient at the position of the * current quad. */ static void eval_perspective_coef( struct tgsi_exec_machine *mach, unsigned attrib, unsigned chan ) { const float x = mach->QuadPos.xyzw[0].f[0]; const float y = mach->QuadPos.xyzw[1].f[0]; const float dadx = mach->InterpCoefs[attrib].dadx[chan]; const float dady = mach->InterpCoefs[attrib].dady[chan]; const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; const float *w = mach->QuadPos.xyzw[3].f; /* divide by W here */ mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; } typedef void (* eval_coef_func)( struct tgsi_exec_machine *mach, unsigned attrib, unsigned chan ); static void exec_declaration( struct tgsi_exec_machine *mach, const struct tgsi_full_declaration *decl ) { if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { if( decl->Declaration.File == TGSI_FILE_INPUT ) { unsigned first, last, mask; eval_coef_func eval; first = decl->DeclarationRange.First; last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; switch( decl->Declaration.Interpolate ) { case TGSI_INTERPOLATE_CONSTANT: eval = eval_constant_coef; break; case TGSI_INTERPOLATE_LINEAR: eval = eval_linear_coef; break; case TGSI_INTERPOLATE_PERSPECTIVE: eval = eval_perspective_coef; break; default: eval = NULL; assert( 0 ); } if( mask == TGSI_WRITEMASK_XYZW ) { unsigned i, j; for( i = first; i <= last; i++ ) { for( j = 0; j < NUM_CHANNELS; j++ ) { eval( mach, i, j ); } } } else { unsigned i, j; for( j = 0; j < NUM_CHANNELS; j++ ) { if( mask & (1 << j) ) { for( i = first; i <= last; i++ ) { eval( mach, i, j ); } } } } } } } static void exec_instruction( struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, int *pc ) { uint chan_index; union tgsi_exec_channel r[8]; (*pc)++; switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_flr( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_LIT: if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { FETCH( &r[0], 0, CHAN_X ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); STORE( &r[0], 0, CHAN_Y ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { FETCH( &r[1], 0, CHAN_Y ); micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); FETCH( &r[2], 0, CHAN_W ); micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); micro_pow( &r[1], &r[1], &r[2] ); micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); STORE( &r[0], 0, CHAN_Z ); } } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ FETCH( &r[0], 0, CHAN_X ); micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( &r[0], 0, CHAN_X ); micro_abs( &r[0], &r[0] ); micro_sqrt( &r[0], &r[0] ); micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_EXP: FETCH( &r[0], 0, CHAN_X ); micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ STORE( &r[2], 0, CHAN_X ); /* store r2 */ } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ STORE( &r[2], 0, CHAN_Y ); /* store r2 */ } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ STORE( &r[2], 0, CHAN_Z ); /* store r2 */ } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; case TGSI_OPCODE_LOG: FETCH( &r[0], 0, CHAN_X ); micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { STORE( &r[0], 0, CHAN_X ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ STORE( &r[0], 0, CHAN_Y ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { STORE( &r[1], 0, CHAN_Z ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; case TGSI_OPCODE_MUL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); micro_mul( &r[0], &r[0], &r[1] ); STORE(&r[0], 0, chan_index); } break; case TGSI_OPCODE_ADD: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_add( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_DP3: /* TGSI_OPCODE_DOT3 */ FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 0, CHAN_Y ); FETCH( &r[2], 1, CHAN_Y ); micro_mul( &r[1], &r[1], &r[2] ); micro_add( &r[0], &r[0], &r[1] ); FETCH( &r[1], 0, CHAN_Z ); FETCH( &r[2], 1, CHAN_Z ); micro_mul( &r[1], &r[1], &r[2] ); micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_DP4: /* TGSI_OPCODE_DOT4 */ FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 1, CHAN_X); micro_mul( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 1, CHAN_Y); micro_mul( &r[1], &r[1], &r[2] ); micro_add( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_Z); FETCH(&r[2], 1, CHAN_Z); micro_mul( &r[1], &r[1], &r[2] ); micro_add( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_W); FETCH(&r[2], 1, CHAN_W); micro_mul( &r[1], &r[1], &r[2] ); micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_DST: if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { FETCH( &r[0], 0, CHAN_Y ); FETCH( &r[1], 1, CHAN_Y); micro_mul( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, CHAN_Y ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { FETCH( &r[0], 0, CHAN_Z ); STORE( &r[0], 0, CHAN_Z ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { FETCH( &r[0], 1, CHAN_W ); STORE( &r[0], 0, CHAN_W ); } break; case TGSI_OPCODE_MIN: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); /* XXX use micro_min()?? */ micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); STORE(&r[0], 0, chan_index); } break; case TGSI_OPCODE_MAX: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); /* XXX use micro_max()?? */ micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); STORE(&r[0], 0, chan_index ); } break; case TGSI_OPCODE_SLT: /* TGSI_OPCODE_SETLT */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_SGE: /* TGSI_OPCODE_SETGE */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_MAD: /* TGSI_OPCODE_MADD */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 2, chan_index ); micro_add( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_SUB: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); micro_sub( &r[0], &r[0], &r[1] ); STORE(&r[0], 0, chan_index); } break; case TGSI_OPCODE_LERP: /* TGSI_OPCODE_LRP */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); micro_sub( &r[1], &r[1], &r[2] ); micro_mul( &r[0], &r[0], &r[1] ); micro_add( &r[0], &r[0], &r[2] ); STORE(&r[0], 0, chan_index); } break; case TGSI_OPCODE_CND: FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); STORE(&r[0], 0, chan_index); } break; case TGSI_OPCODE_CND0: FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); STORE(&r[0], 0, chan_index); } break; case TGSI_OPCODE_DOT2ADD: /* TGSI_OPCODE_DP2A */ FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 0, CHAN_Y ); FETCH( &r[2], 1, CHAN_Y ); micro_mul( &r[1], &r[1], &r[2] ); micro_add( &r[0], &r[0], &r[1] ); FETCH( &r[2], 2, CHAN_X ); micro_add( &r[0], &r[0], &r[2] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_INDEX: assert (0); break; case TGSI_OPCODE_NEGATE: assert (0); break; case TGSI_OPCODE_FRAC: /* TGSI_OPCODE_FRC */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_frc( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_CLAMP: assert (0); break; case TGSI_OPCODE_FLOOR: /* TGSI_OPCODE_FLR */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_flr( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_ROUND: case TGSI_OPCODE_ARR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_rnd( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_EXPBASE2: /* TGSI_OPCODE_EX2 */ FETCH(&r[0], 0, CHAN_X); #if FAST_MATH micro_exp2( &r[0], &r[0] ); #else micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); #endif FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_LOGBASE2: /* TGSI_OPCODE_LG2 */ FETCH( &r[0], 0, CHAN_X ); micro_lg2( &r[0], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_POWER: /* TGSI_OPCODE_POW */ FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 1, CHAN_X); micro_pow( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_CROSSPRODUCT: /* TGSI_OPCODE_XPD */ FETCH(&r[0], 0, CHAN_Y); FETCH(&r[1], 1, CHAN_Z); micro_mul( &r[2], &r[0], &r[1] ); FETCH(&r[3], 0, CHAN_Z); FETCH(&r[4], 1, CHAN_Y); micro_mul( &r[5], &r[3], &r[4] ); micro_sub( &r[2], &r[2], &r[5] ); if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { STORE( &r[2], 0, CHAN_X ); } FETCH(&r[2], 1, CHAN_X); micro_mul( &r[3], &r[3], &r[2] ); FETCH(&r[5], 0, CHAN_X); micro_mul( &r[1], &r[1], &r[5] ); micro_sub( &r[3], &r[3], &r[1] ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { STORE( &r[3], 0, CHAN_Y ); } micro_mul( &r[5], &r[5], &r[4] ); micro_mul( &r[0], &r[0], &r[2] ); micro_sub( &r[5], &r[5], &r[0] ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { STORE( &r[5], 0, CHAN_Z ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; case TGSI_OPCODE_MULTIPLYMATRIX: assert (0); break; case TGSI_OPCODE_ABS: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); micro_abs( &r[0], &r[0] ); STORE(&r[0], 0, chan_index); } break; case TGSI_OPCODE_RCC: assert (0); break; case TGSI_OPCODE_DPH: FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 1, CHAN_X); micro_mul( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 1, CHAN_Y); micro_mul( &r[1], &r[1], &r[2] ); micro_add( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_Z); FETCH(&r[2], 1, CHAN_Z); micro_mul( &r[1], &r[1], &r[2] ); micro_add( &r[0], &r[0], &r[1] ); FETCH(&r[1], 1, CHAN_W); micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_COS: FETCH(&r[0], 0, CHAN_X); micro_cos( &r[0], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_DDX: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_ddx( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_DDY: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_ddy( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_KILP: exec_kilp (mach, inst); break; case TGSI_OPCODE_KIL: exec_kil (mach, inst); break; case TGSI_OPCODE_PK2H: assert (0); break; case TGSI_OPCODE_PK2US: assert (0); break; case TGSI_OPCODE_PK4B: assert (0); break; case TGSI_OPCODE_PK4UB: assert (0); break; case TGSI_OPCODE_RFL: assert (0); break; case TGSI_OPCODE_SEQ: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_SFL: assert (0); break; case TGSI_OPCODE_SGT: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_SIN: FETCH( &r[0], 0, CHAN_X ); micro_sin( &r[0], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_SLE: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_SNE: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_STR: assert (0); break; case TGSI_OPCODE_TEX: /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ exec_tex(mach, inst, FALSE, FALSE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ exec_tex(mach, inst, TRUE, FALSE); break; case TGSI_OPCODE_TXD: /* Texture lookup with explict partial derivatives */ /* src[0] = texcoord */ /* src[1] = d[strq]/dx */ /* src[2] = d[strq]/dy */ /* src[3] = sampler unit */ assert (0); break; case TGSI_OPCODE_TXL: /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ exec_tex(mach, inst, TRUE, FALSE); break; case TGSI_OPCODE_TXP: /* Texture lookup with projection */ /* src[0] = texcoord (src[0].w = projection) */ /* src[1] = sampler unit */ exec_tex(mach, inst, FALSE, TRUE); break; case TGSI_OPCODE_UP2H: assert (0); break; case TGSI_OPCODE_UP2US: assert (0); break; case TGSI_OPCODE_UP4B: assert (0); break; case TGSI_OPCODE_UP4UB: assert (0); break; case TGSI_OPCODE_X2D: assert (0); break; case TGSI_OPCODE_ARA: assert (0); break; case TGSI_OPCODE_BRA: assert (0); break; case TGSI_OPCODE_CAL: /* skip the call if no execution channels are enabled */ if (mach->ExecMask) { /* do the call */ /* push the Cond, Loop, Cont stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* note that PC was already incremented above */ mach->CallStack[mach->CallStackTop++] = *pc; *pc = inst->InstructionExtLabel.Label; } break; case TGSI_OPCODE_RET: mach->FuncMask &= ~mach->ExecMask; UPDATE_EXEC_MASK(mach); if (mach->FuncMask == 0x0) { /* really return now (otherwise, keep executing */ if (mach->CallStackTop == 0) { /* returning from main() */ *pc = -1; return; } *pc = mach->CallStack[--mach->CallStackTop]; /* pop the Cond, Loop, Cont stacks */ assert(mach->CondStackTop > 0); mach->CondMask = mach->CondStack[--mach->CondStackTop]; assert(mach->LoopStackTop > 0); mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; UPDATE_EXEC_MASK(mach); } break; case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_sgn( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_CMP: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); STORE(&r[0], 0, chan_index); } break; case TGSI_OPCODE_SCS: if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { FETCH( &r[0], 0, CHAN_X ); } if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { micro_cos( &r[1], &r[0] ); STORE( &r[1], 0, CHAN_X ); } if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { micro_sin( &r[1], &r[0] ); STORE( &r[1], 0, CHAN_Y ); } if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); } if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; case TGSI_OPCODE_NRM: /* 3-component vector normalize */ { union tgsi_exec_channel tmp, dot; /* tmp = dp3(src0, src0): */ FETCH( &r[0], 0, CHAN_X ); micro_mul( &tmp, &r[0], &r[0] ); FETCH( &r[1], 0, CHAN_Y ); micro_mul( &dot, &r[1], &r[1] ); micro_add( &tmp, &tmp, &dot ); FETCH( &r[2], 0, CHAN_Z ); micro_mul( &dot, &r[2], &r[2] ); micro_add( &tmp, &tmp, &dot ); /* tmp = 1 / sqrt(tmp) */ micro_sqrt( &tmp, &tmp ); micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); /* note: w channel is undefined */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { /* chan = chan * tmp */ micro_mul( &r[chan_index], &tmp, &r[chan_index] ); STORE( &r[chan_index], 0, chan_index ); } } break; case TGSI_OPCODE_NRM4: /* 4-component vector normalize */ { union tgsi_exec_channel tmp, dot; /* tmp = dp4(src0, src0): */ FETCH( &r[0], 0, CHAN_X ); micro_mul( &tmp, &r[0], &r[0] ); FETCH( &r[1], 0, CHAN_Y ); micro_mul( &dot, &r[1], &r[1] ); micro_add( &tmp, &tmp, &dot ); FETCH( &r[2], 0, CHAN_Z ); micro_mul( &dot, &r[2], &r[2] ); micro_add( &tmp, &tmp, &dot ); FETCH( &r[3], 0, CHAN_W ); micro_mul( &dot, &r[3], &r[3] ); micro_add( &tmp, &tmp, &dot ); /* tmp = 1 / sqrt(tmp) */ micro_sqrt( &tmp, &tmp ); micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { /* chan = chan * tmp */ micro_mul( &r[chan_index], &tmp, &r[chan_index] ); STORE( &r[chan_index], 0, chan_index ); } } break; case TGSI_OPCODE_DIV: assert( 0 ); break; case TGSI_OPCODE_DP2: FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 0, CHAN_Y ); FETCH( &r[2], 1, CHAN_Y ); micro_mul( &r[1], &r[1], &r[2] ); micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_IF: /* push CondMask */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; FETCH( &r[0], 0, CHAN_X ); /* update CondMask */ if( ! r[0].u[0] ) { mach->CondMask &= ~0x1; } if( ! r[0].u[1] ) { mach->CondMask &= ~0x2; } if( ! r[0].u[2] ) { mach->CondMask &= ~0x4; } if( ! r[0].u[3] ) { mach->CondMask &= ~0x8; } UPDATE_EXEC_MASK(mach); /* Todo: If CondMask==0, jump to ELSE */ break; case TGSI_OPCODE_ELSE: /* invert CondMask wrt previous mask */ { uint prevMask; assert(mach->CondStackTop > 0); prevMask = mach->CondStack[mach->CondStackTop - 1]; mach->CondMask = ~mach->CondMask & prevMask; UPDATE_EXEC_MASK(mach); /* Todo: If CondMask==0, jump to ENDIF */ } break; case TGSI_OPCODE_ENDIF: /* pop CondMask */ assert(mach->CondStackTop > 0); mach->CondMask = mach->CondStack[--mach->CondStackTop]; UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_END: /* halt execution */ *pc = -1; break; case TGSI_OPCODE_REP: assert (0); break; case TGSI_OPCODE_ENDREP: assert (0); break; case TGSI_OPCODE_PUSHA: assert (0); break; case TGSI_OPCODE_POPA: assert (0); break; case TGSI_OPCODE_CEIL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_ceil( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_I2F: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_i2f( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_NOT: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_not( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_TRUNC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_trunc( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_SHL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_shl( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_SHR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_ishr( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_AND: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_and( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_OR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_or( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_MOD: assert (0); break; case TGSI_OPCODE_XOR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); micro_xor( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_SAD: assert (0); break; case TGSI_OPCODE_TXF: assert (0); break; case TGSI_OPCODE_TXQ: assert (0); break; case TGSI_OPCODE_EMIT: mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; break; case TGSI_OPCODE_ENDPRIM: mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; break; case TGSI_OPCODE_LOOP: /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP2: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; case TGSI_OPCODE_ENDLOOP: /* fall-through (for now at least) */ case TGSI_OPCODE_ENDLOOP2: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; UPDATE_EXEC_MASK(mach); if (mach->ExecMask) { /* repeat loop: jump to instruction just past BGNLOOP */ *pc = inst->InstructionExtLabel.Label + 1; } else { /* exit loop: pop LoopMask */ assert(mach->LoopStackTop > 0); mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; /* pop ContMask */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; } UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_BRK: /* turn off loop channels for each enabled exec channel */ mach->LoopMask &= ~mach->ExecMask; /* Todo: if mach->LoopMask == 0, jump to end of loop */ UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_CONT: /* turn off cont channels for each enabled exec channel */ mach->ContMask &= ~mach->ExecMask; /* Todo: if mach->LoopMask == 0, jump to end of loop */ UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_BGNSUB: /* no-op */ break; case TGSI_OPCODE_ENDSUB: /* no-op */ break; case TGSI_OPCODE_NOISE1: assert( 0 ); break; case TGSI_OPCODE_NOISE2: assert( 0 ); break; case TGSI_OPCODE_NOISE3: assert( 0 ); break; case TGSI_OPCODE_NOISE4: assert( 0 ); break; case TGSI_OPCODE_NOP: break; default: assert( 0 ); } } /** * Run TGSI interpreter. * \return bitmask of "alive" quad components */ uint tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) { uint i; int pc = 0; mach->CondMask = 0xf; mach->LoopMask = 0xf; mach->ContMask = 0xf; mach->FuncMask = 0xf; mach->ExecMask = 0xf; mach->CondStackTop = 0; /* temporarily subvert this assertion */ assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); assert(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; mach->Primitives[0] = 0; } for (i = 0; i < QUAD_SIZE; i++) { mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); } /* execute declarations (interpolants) */ for (i = 0; i < mach->NumDeclarations; i++) { exec_declaration( mach, mach->Declarations+i ); } /* execute instructions, until pc is set to -1 */ while (pc != -1) { assert(pc < (int) mach->NumInstructions); exec_instruction( mach, mach->Instructions + pc, &pc ); } #if 0 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { /* * Scale back depth component. */ for (i = 0; i < 4; i++) mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; } #endif return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; }