From b3f4b56a3b7f979c631358caefed635c6ec56453 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 1 Sep 2009 17:29:24 +0100 Subject: tgsi: remove redundant CND0 opcode Can be implemented with CMP src2, src1, src0 --- src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt | 6 +----- src/gallium/auxiliary/tgsi/tgsi_exec.c | 10 ---------- src/gallium/auxiliary/tgsi/tgsi_info.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 1 - src/gallium/auxiliary/tgsi/tgsi_sse2.c | 4 ---- 5 files changed, 2 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 802ec371189..a989514b75f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -187,11 +187,7 @@ TGSI Instruction Specification 1.2.6 CND0 - Condition Zero - dst.x = (src2.x >= 0.0) ? src0.x : src1.x - dst.y = (src2.y >= 0.0) ? src0.y : src1.y - dst.z = (src2.z >= 0.0) ? src0.z : src1.z - dst.w = (src2.w >= 0.0) ? src0.w : src1.w - + Removed. Use (CMP src2, src1, src0) instead. 1.2.7 DOT2ADD - 2-component Dot Product And Add diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 711e86d6edf..d3ffd4a85f4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2329,16 +2329,6 @@ exec_instruction( } break; - case TGSI_OPCODE_CND0: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); - } - break; - case TGSI_OPCODE_DP2A: FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index ccf4b205ffb..e69cd052132 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -51,7 +51,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, "SUB", TGSI_OPCODE_SUB }, { 1, 3, 0, 0, "LRP", TGSI_OPCODE_LRP }, { 1, 3, 0, 0, "CND", TGSI_OPCODE_CND }, - { 1, 3, 0, 0, "CND0", TGSI_OPCODE_CND0 }, + { 0, 0, 0, 0, "", 20 }, /* removed */ { 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, { 0, 0, 0, 0, "", 22 }, /* removed */ { 0, 0, 0, 0, "", 23 }, /* removed */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index ed594a3e2c7..e7bcf4bf754 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -60,7 +60,6 @@ OP13(MAD) OP12(SUB) OP13(LRP) OP13(CND) -OP13(CND0) OP13(DP2A) OP11(FRC) OP13(CLAMP) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 46f2387c158..3cdf8b9f359 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2089,10 +2089,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_CND0: - return 0; - break; - case TGSI_OPCODE_DP2A: FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ -- cgit v1.2.3 From 848ab8be8c34b00b2afe6120882f8c29f047ced5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 3 Sep 2009 15:16:25 +0100 Subject: aux/tgsi: pull back ureg work from 0.1 branch Manual merge of ureg changes on the branch. Too much unrelated stuff for a proper merge. --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 31 ++-- src/gallium/auxiliary/tgsi/tgsi_exec.c | 61 ++++++- src/gallium/auxiliary/tgsi/tgsi_exec.h | 8 + src/gallium/auxiliary/tgsi/tgsi_info.c | 238 +++++++++++++------------- src/gallium/auxiliary/tgsi/tgsi_info.h | 10 +- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 4 +- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 122 ++++++++++--- src/gallium/auxiliary/tgsi/tgsi_ureg.h | 119 ++++++++++++- src/gallium/auxiliary/tgsi/tgsi_util.h | 4 + src/gallium/auxiliary/util/u_math.h | 4 + src/gallium/auxiliary/util/u_simple_shaders.c | 2 +- 11 files changed, 429 insertions(+), 174 deletions(-) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 05b07a3a73e..111d95b6665 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -43,6 +43,7 @@ struct dump_ctx struct tgsi_iterate_context iter; uint instno; + int indent; uint indentation; @@ -335,14 +336,6 @@ tgsi_dump_immediate( iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm ); } -static void -indent(struct dump_ctx *ctx) -{ - uint i; - for (i = 0; i < ctx->indentation; i++) - TXT(" "); -} - static boolean iter_instruction( struct tgsi_iterate_context *iter, @@ -350,22 +343,19 @@ iter_instruction( { struct dump_ctx *ctx = (struct dump_ctx *) iter; uint instno = ctx->instno++; - + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( inst->Instruction.Opcode ); uint i; boolean first_reg = TRUE; INSTID( instno ); TXT( ": " ); - - /* update indentation */ - if (inst->Instruction.Opcode == TGSI_OPCODE_ENDIF || - inst->Instruction.Opcode == TGSI_OPCODE_ENDFOR || - inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) { - ctx->indentation -= indent_spaces; - } - indent(ctx); - - TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); + + ctx->indent -= info->pre_dedent; + for(i = 0; (int)i < ctx->indent; ++i) + TXT( " " ); + ctx->indent += info->post_indent; + + TXT( info->mnemonic ); switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: @@ -526,6 +516,7 @@ tgsi_dump_instruction( struct dump_ctx ctx; ctx.instno = instno; + ctx.indent = 0; ctx.printf = dump_ctx_printf; ctx.indentation = 0; @@ -559,6 +550,7 @@ tgsi_dump( ctx.iter.epilog = NULL; ctx.instno = 0; + ctx.indent = 0; ctx.printf = dump_ctx_printf; ctx.indentation = 0; @@ -612,6 +604,7 @@ tgsi_dump_str( ctx.base.iter.epilog = NULL; ctx.base.instno = 0; + ctx.base.indent = 0; ctx.base.printf = &str_dump_ctx_printf; ctx.base.indentation = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index d3ffd4a85f4..60ffa188df6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3094,6 +3094,12 @@ exec_instruction( break; case TGSI_OPCODE_BGNFOR: + assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + for (chan_index = 0; chan_index < 3; chan_index++) { + FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); + } + STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + ++mach->LoopCounterStackTop; /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ @@ -3101,10 +3107,58 @@ exec_instruction( mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; break; case TGSI_OPCODE_ENDFOR: - /* fall-through (for now at least) */ + assert(mach->LoopCounterStackTop > 0); + micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + /* update LoopMask */ + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { + mach->LoopMask &= ~0x1; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { + mach->LoopMask &= ~0x2; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { + mach->LoopMask &= ~0x4; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { + mach->LoopMask &= ~0x8; + } + micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); + assert(mach->LoopLabelStackTop > 0); + inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; + STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + UPDATE_EXEC_MASK(mach); + if (mach->ExecMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + assert(mach->LoopLabelStackTop > 0); + *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->LoopLabelStackTop > 0); + --mach->LoopLabelStackTop; + assert(mach->LoopCounterStackTop > 0); + --mach->LoopCounterStackTop; + } + UPDATE_EXEC_MASK(mach); + break; + case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); @@ -3112,7 +3166,8 @@ exec_instruction( UPDATE_EXEC_MASK(mach); if (mach->ExecMask) { /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; + assert(mach->LoopLabelStackTop > 0); + *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; } else { /* exit loop: pop LoopMask */ @@ -3121,6 +3176,8 @@ exec_instruction( /* pop ContMask */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->LoopLabelStackTop > 0); + --mach->LoopLabelStackTop; } UPDATE_EXEC_MASK(mach); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fd9ef6f35df..3baa94dbdde 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -232,6 +232,14 @@ struct tgsi_exec_machine uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopStackTop; + /** Loop label stack */ + uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopLabelStackTop; + + /** Loop counter stack (x = count, y = current, z = step) */ + struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopCounterStackTop; + /** Loop continue mask stack (see comments in tgsi_exec.c) */ uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index e69cd052132..17af4cb7ad2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -31,125 +31,125 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { - { 1, 1, 0, 0, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 0, 0, "SUB", TGSI_OPCODE_SUB }, - { 1, 3, 0, 0, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, "CND", TGSI_OPCODE_CND }, - { 0, 0, 0, 0, "", 20 }, /* removed */ - { 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, - { 0, 0, 0, 0, "", 22 }, /* removed */ - { 0, 0, 0, 0, "", 23 }, /* removed */ - { 1, 1, 0, 0, "FRC", TGSI_OPCODE_FRC }, - { 1, 3, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP }, - { 1, 1, 0, 0, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, "XPD", TGSI_OPCODE_XPD }, - { 0, 0, 0, 0, "", 32 }, /* removed */ - { 1, 1, 0, 0, "ABS", TGSI_OPCODE_ABS }, - { 1, 1, 0, 0, "RCC", TGSI_OPCODE_RCC }, - { 1, 2, 0, 0, "DPH", TGSI_OPCODE_DPH }, - { 1, 1, 0, 0, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP }, - { 1, 1, 0, 0, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 2, 0, 0, "RFL", TGSI_OPCODE_RFL }, - { 1, 2, 0, 0, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 2, 0, 0, "SFL", TGSI_OPCODE_SFL }, - { 1, 2, 0, 0, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, "SNE", TGSI_OPCODE_SNE }, - { 1, 2, 0, 0, "STR", TGSI_OPCODE_STR }, - { 1, 2, 1, 0, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB }, - { 1, 3, 0, 0, "X2D", TGSI_OPCODE_X2D }, - { 1, 1, 0, 0, "ARA", TGSI_OPCODE_ARA }, - { 1, 1, 0, 0, "ARR", TGSI_OPCODE_ARR }, - { 0, 1, 0, 0, "BRA", TGSI_OPCODE_BRA }, - { 0, 0, 0, 1, "CAL", TGSI_OPCODE_CAL }, - { 0, 0, 0, 0, "RET", TGSI_OPCODE_RET }, - { 1, 1, 0, 0, "SSG", TGSI_OPCODE_SSG }, - { 1, 3, 0, 0, "CMP", TGSI_OPCODE_CMP }, - { 1, 1, 0, 0, "SCS", TGSI_OPCODE_SCS }, - { 1, 2, 1, 0, "TXB", TGSI_OPCODE_TXB }, - { 1, 1, 0, 0, "NRM", TGSI_OPCODE_NRM }, - { 1, 2, 0, 0, "DIV", TGSI_OPCODE_DIV }, - { 1, 2, 0, 0, "DP2", TGSI_OPCODE_DP2 }, - { 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL }, - { 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, - { 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, - { 1, 1, 0, 0, "BGNFOR", TGSI_OPCODE_BGNFOR }, - { 0, 1, 0, 0, "REP", TGSI_OPCODE_REP }, - { 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE }, - { 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, - { 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP }, - { 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, - { 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, - { 1, 1, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, - { 1, 1, 0, 0, "I2F", TGSI_OPCODE_I2F }, - { 1, 1, 0, 0, "NOT", TGSI_OPCODE_NOT }, - { 1, 1, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, - { 1, 2, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, "SHR", TGSI_OPCODE_SHR }, - { 1, 2, 0, 0, "AND", TGSI_OPCODE_AND }, - { 1, 2, 0, 0, "OR", TGSI_OPCODE_OR }, - { 1, 2, 0, 0, "MOD", TGSI_OPCODE_MOD }, - { 1, 2, 0, 0, "XOR", TGSI_OPCODE_XOR }, - { 1, 3, 0, 0, "SAD", TGSI_OPCODE_SAD }, - { 1, 2, 1, 0, "TXF", TGSI_OPCODE_TXF }, - { 1, 2, 1, 0, "TXQ", TGSI_OPCODE_TXQ }, - { 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, - { 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, - { 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, - { 0, 0, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, - { 0, 0, 0, 0, "BGNSUB", TGSI_OPCODE_BGNSUB }, - { 0, 0, 0, 1, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, - { 0, 0, 0, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, - { 1, 1, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, - { 1, 1, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, - { 1, 1, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, - { 1, 1, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, - { 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, - { 0, 0, 0, 0, "", 108 }, /* removed */ - { 0, 0, 0, 0, "", 109 }, /* removed */ - { 0, 0, 0, 0, "", 110 }, /* removed */ - { 0, 0, 0, 0, "", 111 }, /* removed */ - { 1, 1, 0, 0, "NRM4", TGSI_OPCODE_NRM4 }, - { 0, 1, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ }, - { 0, 1, 0, 0, "IFC", TGSI_OPCODE_IFC }, - { 0, 1, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, - { 0, 1, 0, 0, "KIL", TGSI_OPCODE_KIL }, - { 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 1, 1, 0, 0, "SWZ", TGSI_OPCODE_SWZ } + { 1, 1, 0, 0, 0, 0, "ARL", TGSI_OPCODE_ARL }, + { 1, 1, 0, 0, 0, 0, "MOV", TGSI_OPCODE_MOV }, + { 1, 1, 0, 0, 0, 0, "LIT", TGSI_OPCODE_LIT }, + { 1, 1, 0, 0, 0, 0, "RCP", TGSI_OPCODE_RCP }, + { 1, 1, 0, 0, 0, 0, "RSQ", TGSI_OPCODE_RSQ }, + { 1, 1, 0, 0, 0, 0, "EXP", TGSI_OPCODE_EXP }, + { 1, 1, 0, 0, 0, 0, "LOG", TGSI_OPCODE_LOG }, + { 1, 2, 0, 0, 0, 0, "MUL", TGSI_OPCODE_MUL }, + { 1, 2, 0, 0, 0, 0, "ADD", TGSI_OPCODE_ADD }, + { 1, 2, 0, 0, 0, 0, "DP3", TGSI_OPCODE_DP3 }, + { 1, 2, 0, 0, 0, 0, "DP4", TGSI_OPCODE_DP4 }, + { 1, 2, 0, 0, 0, 0, "DST", TGSI_OPCODE_DST }, + { 1, 2, 0, 0, 0, 0, "MIN", TGSI_OPCODE_MIN }, + { 1, 2, 0, 0, 0, 0, "MAX", TGSI_OPCODE_MAX }, + { 1, 2, 0, 0, 0, 0, "SLT", TGSI_OPCODE_SLT }, + { 1, 2, 0, 0, 0, 0, "SGE", TGSI_OPCODE_SGE }, + { 1, 3, 0, 0, 0, 0, "MAD", TGSI_OPCODE_MAD }, + { 1, 2, 0, 0, 0, 0, "SUB", TGSI_OPCODE_SUB }, + { 1, 3, 0, 0, 0, 0, "LRP", TGSI_OPCODE_LRP }, + { 1, 3, 0, 0, 0, 0, "CND", TGSI_OPCODE_CND }, + { 0, 0, 0, 0, 0, 0, "", 20 }, /* removed */ + { 1, 3, 0, 0, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, + { 0, 0, 0, 0, 0, 0, "", 22 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 23 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "FRC", TGSI_OPCODE_FRC }, + { 1, 3, 0, 0, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP }, + { 1, 1, 0, 0, 0, 0, "FLR", TGSI_OPCODE_FLR }, + { 1, 1, 0, 0, 0, 0, "ROUND", TGSI_OPCODE_ROUND }, + { 1, 1, 0, 0, 0, 0, "EX2", TGSI_OPCODE_EX2 }, + { 1, 1, 0, 0, 0, 0, "LG2", TGSI_OPCODE_LG2 }, + { 1, 2, 0, 0, 0, 0, "POW", TGSI_OPCODE_POW }, + { 1, 2, 0, 0, 0, 0, "XPD", TGSI_OPCODE_XPD }, + { 0, 0, 0, 0, 0, 0, "", 32 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "ABS", TGSI_OPCODE_ABS }, + { 1, 1, 0, 0, 0, 0, "RCC", TGSI_OPCODE_RCC }, + { 1, 2, 0, 0, 0, 0, "DPH", TGSI_OPCODE_DPH }, + { 1, 1, 0, 0, 0, 0, "COS", TGSI_OPCODE_COS }, + { 1, 1, 0, 0, 0, 0, "DDX", TGSI_OPCODE_DDX }, + { 1, 1, 0, 0, 0, 0, "DDY", TGSI_OPCODE_DDY }, + { 0, 0, 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP }, + { 1, 1, 0, 0, 0, 0, "PK2H", TGSI_OPCODE_PK2H }, + { 1, 1, 0, 0, 0, 0, "PK2US", TGSI_OPCODE_PK2US }, + { 1, 1, 0, 0, 0, 0, "PK4B", TGSI_OPCODE_PK4B }, + { 1, 1, 0, 0, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB }, + { 1, 2, 0, 0, 0, 0, "RFL", TGSI_OPCODE_RFL }, + { 1, 2, 0, 0, 0, 0, "SEQ", TGSI_OPCODE_SEQ }, + { 1, 2, 0, 0, 0, 0, "SFL", TGSI_OPCODE_SFL }, + { 1, 2, 0, 0, 0, 0, "SGT", TGSI_OPCODE_SGT }, + { 1, 1, 0, 0, 0, 0, "SIN", TGSI_OPCODE_SIN }, + { 1, 2, 0, 0, 0, 0, "SLE", TGSI_OPCODE_SLE }, + { 1, 2, 0, 0, 0, 0, "SNE", TGSI_OPCODE_SNE }, + { 1, 2, 0, 0, 0, 0, "STR", TGSI_OPCODE_STR }, + { 1, 2, 1, 0, 0, 0, "TEX", TGSI_OPCODE_TEX }, + { 1, 4, 1, 0, 0, 0, "TXD", TGSI_OPCODE_TXD }, + { 1, 2, 1, 0, 0, 0, "TXP", TGSI_OPCODE_TXP }, + { 1, 1, 0, 0, 0, 0, "UP2H", TGSI_OPCODE_UP2H }, + { 1, 1, 0, 0, 0, 0, "UP2US", TGSI_OPCODE_UP2US }, + { 1, 1, 0, 0, 0, 0, "UP4B", TGSI_OPCODE_UP4B }, + { 1, 1, 0, 0, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB }, + { 1, 3, 0, 0, 0, 0, "X2D", TGSI_OPCODE_X2D }, + { 1, 1, 0, 0, 0, 0, "ARA", TGSI_OPCODE_ARA }, + { 1, 1, 0, 0, 0, 0, "ARR", TGSI_OPCODE_ARR }, + { 0, 1, 0, 0, 0, 0, "BRA", TGSI_OPCODE_BRA }, + { 0, 0, 0, 1, 0, 0, "CAL", TGSI_OPCODE_CAL }, + { 0, 0, 0, 0, 0, 0, "RET", TGSI_OPCODE_RET }, + { 1, 1, 0, 0, 0, 0, "SSG", TGSI_OPCODE_SSG }, + { 1, 3, 0, 0, 0, 0, "CMP", TGSI_OPCODE_CMP }, + { 1, 1, 0, 0, 0, 0, "SCS", TGSI_OPCODE_SCS }, + { 1, 2, 1, 0, 0, 0, "TXB", TGSI_OPCODE_TXB }, + { 1, 1, 0, 0, 0, 0, "NRM", TGSI_OPCODE_NRM }, + { 1, 2, 0, 0, 0, 0, "DIV", TGSI_OPCODE_DIV }, + { 1, 2, 0, 0, 0, 0, "DP2", TGSI_OPCODE_DP2 }, + { 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL }, + { 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, + { 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, + { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR }, + { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP }, + { 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE }, + { 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF }, + { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, + { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP }, + { 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, + { 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, + { 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, + { 1, 1, 0, 0, 0, 0, "I2F", TGSI_OPCODE_I2F }, + { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, + { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, + { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, + { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, + { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, + { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, + { 1, 2, 0, 0, 0, 0, "XOR", TGSI_OPCODE_XOR }, + { 1, 3, 0, 0, 0, 0, "SAD", TGSI_OPCODE_SAD }, + { 1, 2, 1, 0, 0, 0, "TXF", TGSI_OPCODE_TXF }, + { 1, 2, 1, 0, 0, 0, "TXQ", TGSI_OPCODE_TXQ }, + { 0, 0, 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, + { 0, 0, 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, + { 0, 0, 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, + { 0, 0, 0, 1, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, + { 0, 0, 0, 0, 0, 1, "BGNSUB", TGSI_OPCODE_BGNSUB }, + { 0, 0, 0, 1, 1, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, + { 0, 0, 0, 0, 1, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, + { 1, 1, 0, 0, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, + { 1, 1, 0, 0, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, + { 1, 1, 0, 0, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, + { 1, 1, 0, 0, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, + { 0, 0, 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, + { 0, 0, 0, 0, 0, 0, "", 108 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 109 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 110 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 111 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "NRM4", TGSI_OPCODE_NRM4 }, + { 0, 1, 0, 0, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ }, + { 0, 1, 0, 0, 0, 0, "IFC", TGSI_OPCODE_IFC }, + { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, + { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, + { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, + { 1, 1, 0, 0, 0, 0, "SWZ", TGSI_OPCODE_SWZ } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h index b2375c69710..74713c3b98a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.h +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -36,10 +36,12 @@ extern "C" { struct tgsi_opcode_info { - uint num_dst; - uint num_src; - boolean is_tex; - boolean is_branch; + unsigned num_dst:3; + unsigned num_src:3; + unsigned is_tex:1; + unsigned is_branch:1; + int pre_dedent:2; + int post_indent:2; const char *mnemonic; uint opcode; }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 4fe8553c423..8a13885da9b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -199,10 +199,10 @@ iter_instruction( } if (info->num_dst != inst->Instruction.NumDstRegs) { - report_error( ctx, "Invalid number of destination operands, should be %u", info->num_dst ); + report_error( ctx, "%s: Invalid number of destination operands, should be %u", info->mnemonic, info->num_dst ); } if (info->num_src != inst->Instruction.NumSrcRegs) { - report_error( ctx, "Invalid number of source operands, should be %u", info->num_src ); + report_error( ctx, "%s: Invalid number of source operands, should be %u", info->mnemonic, info->num_src ); } /* Check destination and source registers' validity. diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index c0a0627e0b2..f7096bd8e2c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -29,6 +29,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_info.h" #include "tgsi/tgsi_dump.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -71,6 +72,7 @@ struct ureg_tokens { #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 +#define UREG_MAX_ADDR 2 #define DOMAIN_DECL 0 #define DOMAIN_INSN 1 @@ -99,11 +101,15 @@ struct ureg_program } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; + struct ureg_src sampler[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; + unsigned nr_addrs; + unsigned nr_constants; - unsigned nr_samplers; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -187,6 +193,8 @@ ureg_dst_register( unsigned file, dst.File = file; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = 0; + dst.IndirectIndex = 0; + dst.IndirectSwizzle = 0; dst.Saturate = 0; dst.Index = index; dst.Pad1 = 0; @@ -208,6 +216,8 @@ ureg_src_register( unsigned file, src.SwizzleW = TGSI_SWIZZLE_W; src.Pad = 0; src.Indirect = 0; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; src.Absolute = 0; src.Index = index; src.Negate = 0; @@ -254,6 +264,7 @@ ureg_DECL_fs_input( struct ureg_program *ureg, unsigned index, unsigned interp ) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); return ureg_DECL_input( ureg, name, index, interp ); } @@ -263,6 +274,7 @@ ureg_DECL_vs_input( struct ureg_program *ureg, unsigned name, unsigned index ) { + assert(ureg->processor == TGSI_PROCESSOR_VERTEX); return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT ); } @@ -346,11 +358,36 @@ void ureg_release_temporary( struct ureg_program *ureg, } +/* Allocate a new address register. + */ +struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) +{ + if (ureg->nr_addrs < UREG_MAX_ADDR) + return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ ); + + assert( 0 ); + return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); +} + /* Allocate a new sampler. */ -struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg ) +struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, + unsigned nr ) { - return ureg_src_register( TGSI_FILE_SAMPLER, ureg->nr_samplers++ ); + unsigned i; + + for (i = 0; i < ureg->nr_samplers; i++) + if (ureg->sampler[i].Index == nr) + return ureg->sampler[i]; + + if (i < PIPE_MAX_SAMPLERS) { + ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr ); + ureg->nr_samplers++; + return ureg->sampler[i]; + } + + assert( 0 ); + return ureg->sampler[0]; } @@ -363,6 +400,8 @@ static int match_or_expand_immediate( const float *v, unsigned *swizzle ) { unsigned i, j; + + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; @@ -394,8 +433,8 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, const float *v, unsigned nr ) { - unsigned i; - unsigned swizzle = 0; + unsigned i, j; + unsigned swizzle; /* Could do a first pass where we examine all existing immediates * without expanding. @@ -423,6 +462,12 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, set_bad( ureg ); out: + /* Make sure that all referenced elements are from this immediate. + * Has the effect of making size-one immediates into scalars. + */ + for (j = nr; j < 4; j++) + swizzle |= (swizzle & 0x3) << (j * 2); + return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), (swizzle >> 0) & 0x3, (swizzle >> 2) & 0x3, @@ -442,31 +487,39 @@ ureg_emit_src( struct ureg_program *ureg, union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; + assert(src.File != TGSI_FILE_NULL); + assert(src.File != TGSI_FILE_OUTPUT); + assert(src.File < TGSI_FILE_COUNT); + out[n].value = 0; out[n].src.File = src.File; out[n].src.SwizzleX = src.SwizzleX; out[n].src.SwizzleY = src.SwizzleY; out[n].src.SwizzleZ = src.SwizzleZ; out[n].src.SwizzleW = src.SwizzleW; - out[n].src.Indirect = src.Indirect; out[n].src.Index = src.Index; + out[n].src.Negate = src.Negate; n++; if (src.Absolute) { + out[0].src.Extended = 1; + out[0].src.Negate = 0; out[n].value = 0; + out[n].src_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; out[n].src_ext_mod.Absolute = 1; + out[n].src_ext_mod.Negate = src.Negate; n++; } if (src.Indirect) { + out[0].src.Indirect = 1; out[n].value = 0; out[n].src.File = TGSI_FILE_ADDRESS; - out[n].src.SwizzleX = TGSI_SWIZZLE_X; - out[n].src.SwizzleY = TGSI_SWIZZLE_X; - out[n].src.SwizzleZ = TGSI_SWIZZLE_X; - out[n].src.SwizzleW = TGSI_SWIZZLE_X; - out[n].src.Indirect = 0; - out[n].src.Index = 0; + out[n].src.SwizzleX = src.IndirectSwizzle; + out[n].src.SwizzleY = src.IndirectSwizzle; + out[n].src.SwizzleZ = src.IndirectSwizzle; + out[n].src.SwizzleW = src.IndirectSwizzle; + out[n].src.Index = src.IndirectIndex; n++; } @@ -484,6 +537,13 @@ ureg_emit_dst( struct ureg_program *ureg, union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; + assert(dst.File != TGSI_FILE_NULL); + assert(dst.File != TGSI_FILE_CONSTANT); + assert(dst.File != TGSI_FILE_INPUT); + assert(dst.File != TGSI_FILE_SAMPLER); + assert(dst.File != TGSI_FILE_IMMEDIATE); + assert(dst.File < TGSI_FILE_COUNT); + out[n].value = 0; out[n].dst.File = dst.File; out[n].dst.WriteMask = dst.WriteMask; @@ -494,12 +554,11 @@ ureg_emit_dst( struct ureg_program *ureg, if (dst.Indirect) { out[n].value = 0; out[n].src.File = TGSI_FILE_ADDRESS; - out[n].src.SwizzleX = TGSI_SWIZZLE_X; - out[n].src.SwizzleY = TGSI_SWIZZLE_X; - out[n].src.SwizzleZ = TGSI_SWIZZLE_X; - out[n].src.SwizzleW = TGSI_SWIZZLE_X; - out[n].src.Indirect = 0; - out[n].src.Index = 0; + out[n].src.SwizzleX = dst.IndirectSwizzle; + out[n].src.SwizzleY = dst.IndirectSwizzle; + out[n].src.SwizzleZ = dst.IndirectSwizzle; + out[n].src.SwizzleW = dst.IndirectSwizzle; + out[n].src.Index = dst.IndirectIndex; n++; } @@ -523,7 +582,6 @@ ureg_emit_insn(struct ureg_program *ureg, out[0].insn.NrTokens = 0; out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; - out[0].insn.NrTokens = 0; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; out[0].insn.Padding = 0; @@ -542,6 +600,9 @@ ureg_emit_label(struct ureg_program *ureg, { union tgsi_any_token *out, *insn; + if(!label_token) + return; + out = get_tokens( ureg, DOMAIN_INSN, 1 ); insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); @@ -617,6 +678,17 @@ ureg_insn(struct ureg_program *ureg, unsigned insn, i; boolean saturate; +#ifdef DEBUG + { + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); + assert(info); + if(info) { + assert(nr_dst == info->num_dst); + assert(nr_src == info->num_src); + } + } +#endif + saturate = nr_dst ? dst[0].Saturate : FALSE; insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); @@ -723,10 +795,10 @@ static void emit_decls( struct ureg_program *ureg ) TGSI_INTERPOLATE_CONSTANT ); } - if (ureg->nr_samplers) { + for (i = 0; i < ureg->nr_samplers; i++) { emit_decl_range( ureg, TGSI_FILE_SAMPLER, - 0, ureg->nr_samplers ); + ureg->sampler[i].Index, 1 ); } if (ureg->nr_constants) { @@ -741,6 +813,12 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_temps ); } + if (ureg->nr_addrs) { + emit_decl_range( ureg, + TGSI_FILE_ADDRESS, + 0, ureg->nr_addrs ); + } + for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, ureg->immediate[i].v ); @@ -764,7 +842,7 @@ static void copy_instructions( struct ureg_program *ureg ) static void -fixup_header_size(struct ureg_program *ureg ) +fixup_header_size(struct ureg_program *ureg) { union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 8836a1ea0eb..acbca59040c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -31,6 +31,10 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" +#ifdef __cplusplus +extern "C" { +#endif + struct ureg_program; /* Almost a tgsi_src_register, but we need to pull in the Absolute @@ -48,6 +52,8 @@ struct ureg_src unsigned Absolute : 1; /* BOOL */ int Index : 16; /* SINT */ unsigned Negate : 1; /* BOOL */ + int IndirectIndex : 16; /* SINT */ + int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -64,6 +70,8 @@ struct ureg_dst int Index : 16; /* SINT */ unsigned Pad1 : 5; unsigned Pad2 : 1; /* BOOL */ + int IndirectIndex : 16; /* SINT */ + int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ }; struct pipe_context; @@ -131,12 +139,21 @@ void ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst tmp ); +struct ureg_dst +ureg_DECL_address( struct ureg_program * ); + +/* Supply an index to the sampler declaration as this is the hook to + * the external pipe_sampler state. Users of this function probably + * don't want just any sampler, but a specific one which they've set + * up state for in the context. + */ struct ureg_src -ureg_DECL_sampler( struct ureg_program * ); +ureg_DECL_sampler( struct ureg_program *, + unsigned index ); static INLINE struct ureg_src -ureg_DECL_immediate4f( struct ureg_program *ureg, +ureg_imm4f( struct ureg_program *ureg, float a, float b, float c, float d) { @@ -149,7 +166,7 @@ ureg_DECL_immediate4f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate3f( struct ureg_program *ureg, +ureg_imm3f( struct ureg_program *ureg, float a, float b, float c) { @@ -161,7 +178,7 @@ ureg_DECL_immediate3f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate2f( struct ureg_program *ureg, +ureg_imm2f( struct ureg_program *ureg, float a, float b) { float v[2]; @@ -171,7 +188,7 @@ ureg_DECL_immediate2f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate1f( struct ureg_program *ureg, +ureg_imm1f( struct ureg_program *ureg, float a) { float v[1]; @@ -392,6 +409,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ static INLINE struct ureg_src ureg_negate( struct ureg_src reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Negate ^= 1; return reg; } @@ -399,6 +417,7 @@ ureg_negate( struct ureg_src reg ) static INLINE struct ureg_src ureg_abs( struct ureg_src reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Absolute = 1; reg.Negate = 0; return reg; @@ -413,6 +432,12 @@ ureg_swizzle( struct ureg_src reg, (reg.SwizzleZ << 4) | (reg.SwizzleW << 6)); + assert(reg.File != TGSI_FILE_NULL); + assert(x < 4); + assert(y < 4); + assert(z < 4); + assert(w < 4); + reg.SwizzleX = (swz >> (x*2)) & 0x3; reg.SwizzleY = (swz >> (y*2)) & 0x3; reg.SwizzleZ = (swz >> (z*2)) & 0x3; @@ -430,6 +455,7 @@ static INLINE struct ureg_dst ureg_writemask( struct ureg_dst reg, unsigned writemask ) { + assert(reg.File != TGSI_FILE_NULL); reg.WriteMask &= writemask; return reg; } @@ -437,10 +463,33 @@ ureg_writemask( struct ureg_dst reg, static INLINE struct ureg_dst ureg_saturate( struct ureg_dst reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Saturate = 1; return reg; } +static INLINE struct ureg_dst +ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr ) +{ + assert(reg.File != TGSI_FILE_NULL); + assert(addr.File == TGSI_FILE_ADDRESS); + reg.Indirect = 1; + reg.IndirectIndex = addr.Index; + reg.IndirectSwizzle = addr.SwizzleX; + return reg; +} + +static INLINE struct ureg_src +ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) +{ + assert(reg.File != TGSI_FILE_NULL); + assert(addr.File == TGSI_FILE_ADDRESS); + reg.Indirect = 1; + reg.IndirectIndex = addr.Index; + reg.IndirectSwizzle = addr.SwizzleX; + return reg; +} + static INLINE struct ureg_dst ureg_dst( struct ureg_src src ) { @@ -449,6 +498,8 @@ ureg_dst( struct ureg_src src ) dst.File = src.File; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = src.Indirect; + dst.IndirectIndex = src.IndirectIndex; + dst.IndirectSwizzle = src.IndirectSwizzle; dst.Saturate = 0; dst.Index = src.Index; dst.Pad1 = 0; @@ -469,6 +520,8 @@ ureg_src( struct ureg_dst dst ) src.SwizzleW = TGSI_SWIZZLE_W; src.Pad = 0; src.Indirect = dst.Indirect; + src.IndirectIndex = dst.IndirectIndex; + src.IndirectSwizzle = dst.IndirectSwizzle; src.Absolute = 0; src.Index = dst.Index; src.Negate = 0; @@ -478,4 +531,60 @@ ureg_src( struct ureg_dst dst ) +static INLINE struct ureg_dst +ureg_dst_undef( void ) +{ + struct ureg_dst dst; + + dst.File = TGSI_FILE_NULL; + dst.WriteMask = 0; + dst.Indirect = 0; + dst.IndirectIndex = 0; + dst.IndirectSwizzle = 0; + dst.Saturate = 0; + dst.Index = 0; + dst.Pad1 = 0; + dst.Pad2 = 0; + + return dst; +} + +static INLINE struct ureg_src +ureg_src_undef( void ) +{ + struct ureg_src src; + + src.File = TGSI_FILE_NULL; + src.SwizzleX = 0; + src.SwizzleY = 0; + src.SwizzleZ = 0; + src.SwizzleW = 0; + src.Pad = 0; + src.Indirect = 0; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; + src.Absolute = 0; + src.Index = 0; + src.Negate = 0; + + return src; +} + +static INLINE boolean +ureg_src_is_undef( struct ureg_src src ) +{ + return src.File == TGSI_FILE_NULL; +} + +static INLINE boolean +ureg_dst_is_undef( struct ureg_dst dst ) +{ + return dst.File == TGSI_FILE_NULL; +} + + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 7877f345587..21eb656327e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -32,6 +32,10 @@ extern "C" { #endif +struct tgsi_src_register; +struct tgsi_src_register_ext_swz; +struct tgsi_full_src_register; + void * tgsi_align_128bit( void *unaligned ); diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b0807c13392..4c6c2bc00e1 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -374,6 +374,10 @@ unsigned ffs( unsigned u ) #define ffs __builtin_ffs #endif +#ifdef __MINGW32__ +#define ffs __builtin_ffs +#endif + /* Could also binary search for the highest bit. */ diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index d54a1d8c746..eb311d29f94 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -103,7 +103,7 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) if (ureg == NULL) return NULL; - sampler = ureg_DECL_sampler( ureg ); + sampler = ureg_DECL_sampler( ureg, 0 ); tex = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_GENERIC, 0, -- cgit v1.2.3 From ce39cd6696efbc72c13187217ca5d74ab00ab37f Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 4 Sep 2009 09:31:22 +0200 Subject: tgsi: Document differencies between vs_1_1 and vs_2_0 for EXPP and LOGP. --- src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index a989514b75f..eb492076b7d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -1027,12 +1027,12 @@ TGSI Instruction Specification 1.18.1 EXPP - Approximate Exponential Base 2 - Alias for EXP. + Use EXP. See also 1.19.3. 1.18.2 LOGP - Logarithm Base 2 - Alias for LG2. + Use LOG. See also 1.19.4. 1.19 vs_2_0 @@ -1049,6 +1049,16 @@ TGSI Instruction Specification Alias for ARR. +1.19.3 EXPP - Approximate Exponential Base 2 + + Use EX2. + + +1.19.4 LOGP - Logarithm Base 2 + + Use LG2. + + 2 Explanation of symbols used ============================== -- cgit v1.2.3 From d78a19612173eda51b93818a16a947201a785f3f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Sep 2009 12:44:28 -0600 Subject: tgsi: use new tgsi_call_record to handle execution mask stacks This fixes some issues when "return"ing from nested loops/conditionals. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 ++++++++++++++++++++++++---------- src/gallium/auxiliary/tgsi/tgsi_exec.h | 13 +++++++++- 2 files changed, 43 insertions(+), 14 deletions(-) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 60ffa188df6..c79c56debd6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2756,19 +2756,32 @@ exec_instruction( if (mach->ExecMask) { /* do the call */ - /* push the Cond, Loop, Cont stacks */ + /* First, record the depths of the execution stacks. + * This is important for deeply nested/looped return statements. + * We have to unwind the stacks by the correct amount. For a + * real code generator, we could determine the number of entries + * to pop off each stack with simple static analysis and avoid + * implementing this data structure at run time. + */ + mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; + mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; + mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; + + mach->CallStackTop++; + + /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; + /* Finally, jump to the subroutine */ *pc = inst->InstructionExtLabel.Label; } break; @@ -2785,18 +2798,24 @@ exec_instruction( *pc = -1; return; } - *pc = mach->CallStack[--mach->CallStackTop]; - /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->CallStackTop > 0); + mach->CallStackTop--; + + mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; + mach->CondMask = mach->CondStack[mach->CondStackTop]; + + mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; + mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; + + mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; + mach->ContMask = mach->ContStack[mach->ContStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; + UPDATE_EXEC_MASK(mach); } break; @@ -3245,7 +3264,6 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; - mach->CondStackTop = 0; /* temporarily subvert this assertion */ assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 3baa94dbdde..c72f76809d4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -186,6 +186,17 @@ struct tgsi_exec_labels */ #define TGSI_EXEC_MAX_CONST_BUFFER 4096 + +/** function call/activation record */ +struct tgsi_call_record +{ + uint CondStackTop; + uint LoopStackTop; + uint ContStackTop; + uint ReturnAddr; +}; + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -249,7 +260,7 @@ struct tgsi_exec_machine int FuncStackTop; /** Function call stack for saving/restoring the program counter */ - uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING]; int CallStackTop; struct tgsi_full_instruction *Instructions; -- cgit v1.2.3