diff options
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt | 20 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_dump.c | 31 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 71 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 8 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.c | 238 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.h | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sanity.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.c | 122 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.h | 119 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_util.h | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_blit.c | 55 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_blit.h | 11 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_math.h | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_simple_shaders.c | 16 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_simple_shaders.h | 4 |
17 files changed, 511 insertions, 211 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 802ec371189..eb492076b7d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -187,11 +187,7 @@ TGSI Instruction Specification 1.2.6 CND0 - Condition Zero - dst.x = (src2.x >= 0.0) ? src0.x : src1.x - dst.y = (src2.y >= 0.0) ? src0.y : src1.y - dst.z = (src2.z >= 0.0) ? src0.z : src1.z - dst.w = (src2.w >= 0.0) ? src0.w : src1.w - + Removed. Use (CMP src2, src1, src0) instead. 1.2.7 DOT2ADD - 2-component Dot Product And Add @@ -1031,12 +1027,12 @@ TGSI Instruction Specification 1.18.1 EXPP - Approximate Exponential Base 2 - Alias for EXP. + Use EXP. See also 1.19.3. 1.18.2 LOGP - Logarithm Base 2 - Alias for LG2. + Use LOG. See also 1.19.4. 1.19 vs_2_0 @@ -1053,6 +1049,16 @@ TGSI Instruction Specification Alias for ARR. +1.19.3 EXPP - Approximate Exponential Base 2 + + Use EX2. + + +1.19.4 LOGP - Logarithm Base 2 + + Use LG2. + + 2 Explanation of symbols used ============================== diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 05b07a3a73e..111d95b6665 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -43,6 +43,7 @@ struct dump_ctx struct tgsi_iterate_context iter; uint instno; + int indent; uint indentation; @@ -335,14 +336,6 @@ tgsi_dump_immediate( iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm ); } -static void -indent(struct dump_ctx *ctx) -{ - uint i; - for (i = 0; i < ctx->indentation; i++) - TXT(" "); -} - static boolean iter_instruction( struct tgsi_iterate_context *iter, @@ -350,22 +343,19 @@ iter_instruction( { struct dump_ctx *ctx = (struct dump_ctx *) iter; uint instno = ctx->instno++; - + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( inst->Instruction.Opcode ); uint i; boolean first_reg = TRUE; INSTID( instno ); TXT( ": " ); - - /* update indentation */ - if (inst->Instruction.Opcode == TGSI_OPCODE_ENDIF || - inst->Instruction.Opcode == TGSI_OPCODE_ENDFOR || - inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) { - ctx->indentation -= indent_spaces; - } - indent(ctx); - - TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); + + ctx->indent -= info->pre_dedent; + for(i = 0; (int)i < ctx->indent; ++i) + TXT( " " ); + ctx->indent += info->post_indent; + + TXT( info->mnemonic ); switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: @@ -526,6 +516,7 @@ tgsi_dump_instruction( struct dump_ctx ctx; ctx.instno = instno; + ctx.indent = 0; ctx.printf = dump_ctx_printf; ctx.indentation = 0; @@ -559,6 +550,7 @@ tgsi_dump( ctx.iter.epilog = NULL; ctx.instno = 0; + ctx.indent = 0; ctx.printf = dump_ctx_printf; ctx.indentation = 0; @@ -612,6 +604,7 @@ tgsi_dump_str( ctx.base.iter.epilog = NULL; ctx.base.instno = 0; + ctx.base.indent = 0; ctx.base.printf = &str_dump_ctx_printf; ctx.base.indentation = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 711e86d6edf..60ffa188df6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2329,16 +2329,6 @@ exec_instruction( } break; - case TGSI_OPCODE_CND0: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); - } - break; - case TGSI_OPCODE_DP2A: FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); @@ -3104,6 +3094,12 @@ exec_instruction( break; case TGSI_OPCODE_BGNFOR: + assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + for (chan_index = 0; chan_index < 3; chan_index++) { + FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); + } + STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + ++mach->LoopCounterStackTop; /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ @@ -3111,10 +3107,58 @@ exec_instruction( mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; break; case TGSI_OPCODE_ENDFOR: - /* fall-through (for now at least) */ + assert(mach->LoopCounterStackTop > 0); + micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + /* update LoopMask */ + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { + mach->LoopMask &= ~0x1; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { + mach->LoopMask &= ~0x2; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { + mach->LoopMask &= ~0x4; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { + mach->LoopMask &= ~0x8; + } + micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); + assert(mach->LoopLabelStackTop > 0); + inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; + STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + UPDATE_EXEC_MASK(mach); + if (mach->ExecMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + assert(mach->LoopLabelStackTop > 0); + *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->LoopLabelStackTop > 0); + --mach->LoopLabelStackTop; + assert(mach->LoopCounterStackTop > 0); + --mach->LoopCounterStackTop; + } + UPDATE_EXEC_MASK(mach); + break; + case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); @@ -3122,7 +3166,8 @@ exec_instruction( UPDATE_EXEC_MASK(mach); if (mach->ExecMask) { /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; + assert(mach->LoopLabelStackTop > 0); + *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; } else { /* exit loop: pop LoopMask */ @@ -3131,6 +3176,8 @@ exec_instruction( /* pop ContMask */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->LoopLabelStackTop > 0); + --mach->LoopLabelStackTop; } UPDATE_EXEC_MASK(mach); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fd9ef6f35df..3baa94dbdde 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -232,6 +232,14 @@ struct tgsi_exec_machine uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopStackTop; + /** Loop label stack */ + uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopLabelStackTop; + + /** Loop counter stack (x = count, y = current, z = step) */ + struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopCounterStackTop; + /** Loop continue mask stack (see comments in tgsi_exec.c) */ uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index ccf4b205ffb..17af4cb7ad2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -31,125 +31,125 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { - { 1, 1, 0, 0, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 0, 0, "SUB", TGSI_OPCODE_SUB }, - { 1, 3, 0, 0, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, "CND", TGSI_OPCODE_CND }, - { 1, 3, 0, 0, "CND0", TGSI_OPCODE_CND0 }, - { 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, - { 0, 0, 0, 0, "", 22 }, /* removed */ - { 0, 0, 0, 0, "", 23 }, /* removed */ - { 1, 1, 0, 0, "FRC", TGSI_OPCODE_FRC }, - { 1, 3, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP }, - { 1, 1, 0, 0, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, "XPD", TGSI_OPCODE_XPD }, - { 0, 0, 0, 0, "", 32 }, /* removed */ - { 1, 1, 0, 0, "ABS", TGSI_OPCODE_ABS }, - { 1, 1, 0, 0, "RCC", TGSI_OPCODE_RCC }, - { 1, 2, 0, 0, "DPH", TGSI_OPCODE_DPH }, - { 1, 1, 0, 0, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP }, - { 1, 1, 0, 0, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 2, 0, 0, "RFL", TGSI_OPCODE_RFL }, - { 1, 2, 0, 0, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 2, 0, 0, "SFL", TGSI_OPCODE_SFL }, - { 1, 2, 0, 0, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, "SNE", TGSI_OPCODE_SNE }, - { 1, 2, 0, 0, "STR", TGSI_OPCODE_STR }, - { 1, 2, 1, 0, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB }, - { 1, 3, 0, 0, "X2D", TGSI_OPCODE_X2D }, - { 1, 1, 0, 0, "ARA", TGSI_OPCODE_ARA }, - { 1, 1, 0, 0, "ARR", TGSI_OPCODE_ARR }, - { 0, 1, 0, 0, "BRA", TGSI_OPCODE_BRA }, - { 0, 0, 0, 1, "CAL", TGSI_OPCODE_CAL }, - { 0, 0, 0, 0, "RET", TGSI_OPCODE_RET }, - { 1, 1, 0, 0, "SSG", TGSI_OPCODE_SSG }, - { 1, 3, 0, 0, "CMP", TGSI_OPCODE_CMP }, - { 1, 1, 0, 0, "SCS", TGSI_OPCODE_SCS }, - { 1, 2, 1, 0, "TXB", TGSI_OPCODE_TXB }, - { 1, 1, 0, 0, "NRM", TGSI_OPCODE_NRM }, - { 1, 2, 0, 0, "DIV", TGSI_OPCODE_DIV }, - { 1, 2, 0, 0, "DP2", TGSI_OPCODE_DP2 }, - { 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL }, - { 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, - { 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, - { 1, 1, 0, 0, "BGNFOR", TGSI_OPCODE_BGNFOR }, - { 0, 1, 0, 0, "REP", TGSI_OPCODE_REP }, - { 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE }, - { 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, - { 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP }, - { 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, - { 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, - { 1, 1, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, - { 1, 1, 0, 0, "I2F", TGSI_OPCODE_I2F }, - { 1, 1, 0, 0, "NOT", TGSI_OPCODE_NOT }, - { 1, 1, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, - { 1, 2, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, "SHR", TGSI_OPCODE_SHR }, - { 1, 2, 0, 0, "AND", TGSI_OPCODE_AND }, - { 1, 2, 0, 0, "OR", TGSI_OPCODE_OR }, - { 1, 2, 0, 0, "MOD", TGSI_OPCODE_MOD }, - { 1, 2, 0, 0, "XOR", TGSI_OPCODE_XOR }, - { 1, 3, 0, 0, "SAD", TGSI_OPCODE_SAD }, - { 1, 2, 1, 0, "TXF", TGSI_OPCODE_TXF }, - { 1, 2, 1, 0, "TXQ", TGSI_OPCODE_TXQ }, - { 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, - { 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, - { 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, - { 0, 0, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, - { 0, 0, 0, 0, "BGNSUB", TGSI_OPCODE_BGNSUB }, - { 0, 0, 0, 1, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, - { 0, 0, 0, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, - { 1, 1, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, - { 1, 1, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, - { 1, 1, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, - { 1, 1, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, - { 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, - { 0, 0, 0, 0, "", 108 }, /* removed */ - { 0, 0, 0, 0, "", 109 }, /* removed */ - { 0, 0, 0, 0, "", 110 }, /* removed */ - { 0, 0, 0, 0, "", 111 }, /* removed */ - { 1, 1, 0, 0, "NRM4", TGSI_OPCODE_NRM4 }, - { 0, 1, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ }, - { 0, 1, 0, 0, "IFC", TGSI_OPCODE_IFC }, - { 0, 1, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, - { 0, 1, 0, 0, "KIL", TGSI_OPCODE_KIL }, - { 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 1, 1, 0, 0, "SWZ", TGSI_OPCODE_SWZ } + { 1, 1, 0, 0, 0, 0, "ARL", TGSI_OPCODE_ARL }, + { 1, 1, 0, 0, 0, 0, "MOV", TGSI_OPCODE_MOV }, + { 1, 1, 0, 0, 0, 0, "LIT", TGSI_OPCODE_LIT }, + { 1, 1, 0, 0, 0, 0, "RCP", TGSI_OPCODE_RCP }, + { 1, 1, 0, 0, 0, 0, "RSQ", TGSI_OPCODE_RSQ }, + { 1, 1, 0, 0, 0, 0, "EXP", TGSI_OPCODE_EXP }, + { 1, 1, 0, 0, 0, 0, "LOG", TGSI_OPCODE_LOG }, + { 1, 2, 0, 0, 0, 0, "MUL", TGSI_OPCODE_MUL }, + { 1, 2, 0, 0, 0, 0, "ADD", TGSI_OPCODE_ADD }, + { 1, 2, 0, 0, 0, 0, "DP3", TGSI_OPCODE_DP3 }, + { 1, 2, 0, 0, 0, 0, "DP4", TGSI_OPCODE_DP4 }, + { 1, 2, 0, 0, 0, 0, "DST", TGSI_OPCODE_DST }, + { 1, 2, 0, 0, 0, 0, "MIN", TGSI_OPCODE_MIN }, + { 1, 2, 0, 0, 0, 0, "MAX", TGSI_OPCODE_MAX }, + { 1, 2, 0, 0, 0, 0, "SLT", TGSI_OPCODE_SLT }, + { 1, 2, 0, 0, 0, 0, "SGE", TGSI_OPCODE_SGE }, + { 1, 3, 0, 0, 0, 0, "MAD", TGSI_OPCODE_MAD }, + { 1, 2, 0, 0, 0, 0, "SUB", TGSI_OPCODE_SUB }, + { 1, 3, 0, 0, 0, 0, "LRP", TGSI_OPCODE_LRP }, + { 1, 3, 0, 0, 0, 0, "CND", TGSI_OPCODE_CND }, + { 0, 0, 0, 0, 0, 0, "", 20 }, /* removed */ + { 1, 3, 0, 0, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, + { 0, 0, 0, 0, 0, 0, "", 22 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 23 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "FRC", TGSI_OPCODE_FRC }, + { 1, 3, 0, 0, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP }, + { 1, 1, 0, 0, 0, 0, "FLR", TGSI_OPCODE_FLR }, + { 1, 1, 0, 0, 0, 0, "ROUND", TGSI_OPCODE_ROUND }, + { 1, 1, 0, 0, 0, 0, "EX2", TGSI_OPCODE_EX2 }, + { 1, 1, 0, 0, 0, 0, "LG2", TGSI_OPCODE_LG2 }, + { 1, 2, 0, 0, 0, 0, "POW", TGSI_OPCODE_POW }, + { 1, 2, 0, 0, 0, 0, "XPD", TGSI_OPCODE_XPD }, + { 0, 0, 0, 0, 0, 0, "", 32 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "ABS", TGSI_OPCODE_ABS }, + { 1, 1, 0, 0, 0, 0, "RCC", TGSI_OPCODE_RCC }, + { 1, 2, 0, 0, 0, 0, "DPH", TGSI_OPCODE_DPH }, + { 1, 1, 0, 0, 0, 0, "COS", TGSI_OPCODE_COS }, + { 1, 1, 0, 0, 0, 0, "DDX", TGSI_OPCODE_DDX }, + { 1, 1, 0, 0, 0, 0, "DDY", TGSI_OPCODE_DDY }, + { 0, 0, 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP }, + { 1, 1, 0, 0, 0, 0, "PK2H", TGSI_OPCODE_PK2H }, + { 1, 1, 0, 0, 0, 0, "PK2US", TGSI_OPCODE_PK2US }, + { 1, 1, 0, 0, 0, 0, "PK4B", TGSI_OPCODE_PK4B }, + { 1, 1, 0, 0, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB }, + { 1, 2, 0, 0, 0, 0, "RFL", TGSI_OPCODE_RFL }, + { 1, 2, 0, 0, 0, 0, "SEQ", TGSI_OPCODE_SEQ }, + { 1, 2, 0, 0, 0, 0, "SFL", TGSI_OPCODE_SFL }, + { 1, 2, 0, 0, 0, 0, "SGT", TGSI_OPCODE_SGT }, + { 1, 1, 0, 0, 0, 0, "SIN", TGSI_OPCODE_SIN }, + { 1, 2, 0, 0, 0, 0, "SLE", TGSI_OPCODE_SLE }, + { 1, 2, 0, 0, 0, 0, "SNE", TGSI_OPCODE_SNE }, + { 1, 2, 0, 0, 0, 0, "STR", TGSI_OPCODE_STR }, + { 1, 2, 1, 0, 0, 0, "TEX", TGSI_OPCODE_TEX }, + { 1, 4, 1, 0, 0, 0, "TXD", TGSI_OPCODE_TXD }, + { 1, 2, 1, 0, 0, 0, "TXP", TGSI_OPCODE_TXP }, + { 1, 1, 0, 0, 0, 0, "UP2H", TGSI_OPCODE_UP2H }, + { 1, 1, 0, 0, 0, 0, "UP2US", TGSI_OPCODE_UP2US }, + { 1, 1, 0, 0, 0, 0, "UP4B", TGSI_OPCODE_UP4B }, + { 1, 1, 0, 0, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB }, + { 1, 3, 0, 0, 0, 0, "X2D", TGSI_OPCODE_X2D }, + { 1, 1, 0, 0, 0, 0, "ARA", TGSI_OPCODE_ARA }, + { 1, 1, 0, 0, 0, 0, "ARR", TGSI_OPCODE_ARR }, + { 0, 1, 0, 0, 0, 0, "BRA", TGSI_OPCODE_BRA }, + { 0, 0, 0, 1, 0, 0, "CAL", TGSI_OPCODE_CAL }, + { 0, 0, 0, 0, 0, 0, "RET", TGSI_OPCODE_RET }, + { 1, 1, 0, 0, 0, 0, "SSG", TGSI_OPCODE_SSG }, + { 1, 3, 0, 0, 0, 0, "CMP", TGSI_OPCODE_CMP }, + { 1, 1, 0, 0, 0, 0, "SCS", TGSI_OPCODE_SCS }, + { 1, 2, 1, 0, 0, 0, "TXB", TGSI_OPCODE_TXB }, + { 1, 1, 0, 0, 0, 0, "NRM", TGSI_OPCODE_NRM }, + { 1, 2, 0, 0, 0, 0, "DIV", TGSI_OPCODE_DIV }, + { 1, 2, 0, 0, 0, 0, "DP2", TGSI_OPCODE_DP2 }, + { 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL }, + { 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, + { 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, + { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR }, + { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP }, + { 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE }, + { 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF }, + { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, + { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP }, + { 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, + { 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, + { 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, + { 1, 1, 0, 0, 0, 0, "I2F", TGSI_OPCODE_I2F }, + { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, + { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, + { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, + { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, + { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, + { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, + { 1, 2, 0, 0, 0, 0, "XOR", TGSI_OPCODE_XOR }, + { 1, 3, 0, 0, 0, 0, "SAD", TGSI_OPCODE_SAD }, + { 1, 2, 1, 0, 0, 0, "TXF", TGSI_OPCODE_TXF }, + { 1, 2, 1, 0, 0, 0, "TXQ", TGSI_OPCODE_TXQ }, + { 0, 0, 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, + { 0, 0, 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, + { 0, 0, 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, + { 0, 0, 0, 1, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, + { 0, 0, 0, 0, 0, 1, "BGNSUB", TGSI_OPCODE_BGNSUB }, + { 0, 0, 0, 1, 1, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, + { 0, 0, 0, 0, 1, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, + { 1, 1, 0, 0, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, + { 1, 1, 0, 0, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, + { 1, 1, 0, 0, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, + { 1, 1, 0, 0, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, + { 0, 0, 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, + { 0, 0, 0, 0, 0, 0, "", 108 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 109 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 110 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 111 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "NRM4", TGSI_OPCODE_NRM4 }, + { 0, 1, 0, 0, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ }, + { 0, 1, 0, 0, 0, 0, "IFC", TGSI_OPCODE_IFC }, + { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, + { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, + { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, + { 1, 1, 0, 0, 0, 0, "SWZ", TGSI_OPCODE_SWZ } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h index b2375c69710..74713c3b98a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.h +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -36,10 +36,12 @@ extern "C" { struct tgsi_opcode_info { - uint num_dst; - uint num_src; - boolean is_tex; - boolean is_branch; + unsigned num_dst:3; + unsigned num_src:3; + unsigned is_tex:1; + unsigned is_branch:1; + int pre_dedent:2; + int post_indent:2; const char *mnemonic; uint opcode; }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index ed594a3e2c7..e7bcf4bf754 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -60,7 +60,6 @@ OP13(MAD) OP12(SUB) OP13(LRP) OP13(CND) -OP13(CND0) OP13(DP2A) OP11(FRC) OP13(CLAMP) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 4fe8553c423..8a13885da9b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -199,10 +199,10 @@ iter_instruction( } if (info->num_dst != inst->Instruction.NumDstRegs) { - report_error( ctx, "Invalid number of destination operands, should be %u", info->num_dst ); + report_error( ctx, "%s: Invalid number of destination operands, should be %u", info->mnemonic, info->num_dst ); } if (info->num_src != inst->Instruction.NumSrcRegs) { - report_error( ctx, "Invalid number of source operands, should be %u", info->num_src ); + report_error( ctx, "%s: Invalid number of source operands, should be %u", info->mnemonic, info->num_src ); } /* Check destination and source registers' validity. diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 46f2387c158..3cdf8b9f359 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2089,10 +2089,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_CND0: - return 0; - break; - case TGSI_OPCODE_DP2A: FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index c0a0627e0b2..f7096bd8e2c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -29,6 +29,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_info.h" #include "tgsi/tgsi_dump.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -71,6 +72,7 @@ struct ureg_tokens { #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 +#define UREG_MAX_ADDR 2 #define DOMAIN_DECL 0 #define DOMAIN_INSN 1 @@ -99,11 +101,15 @@ struct ureg_program } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; + struct ureg_src sampler[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; + unsigned nr_addrs; + unsigned nr_constants; - unsigned nr_samplers; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -187,6 +193,8 @@ ureg_dst_register( unsigned file, dst.File = file; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = 0; + dst.IndirectIndex = 0; + dst.IndirectSwizzle = 0; dst.Saturate = 0; dst.Index = index; dst.Pad1 = 0; @@ -208,6 +216,8 @@ ureg_src_register( unsigned file, src.SwizzleW = TGSI_SWIZZLE_W; src.Pad = 0; src.Indirect = 0; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; src.Absolute = 0; src.Index = index; src.Negate = 0; @@ -254,6 +264,7 @@ ureg_DECL_fs_input( struct ureg_program *ureg, unsigned index, unsigned interp ) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); return ureg_DECL_input( ureg, name, index, interp ); } @@ -263,6 +274,7 @@ ureg_DECL_vs_input( struct ureg_program *ureg, unsigned name, unsigned index ) { + assert(ureg->processor == TGSI_PROCESSOR_VERTEX); return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT ); } @@ -346,11 +358,36 @@ void ureg_release_temporary( struct ureg_program *ureg, } +/* Allocate a new address register. + */ +struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) +{ + if (ureg->nr_addrs < UREG_MAX_ADDR) + return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ ); + + assert( 0 ); + return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); +} + /* Allocate a new sampler. */ -struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg ) +struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, + unsigned nr ) { - return ureg_src_register( TGSI_FILE_SAMPLER, ureg->nr_samplers++ ); + unsigned i; + + for (i = 0; i < ureg->nr_samplers; i++) + if (ureg->sampler[i].Index == nr) + return ureg->sampler[i]; + + if (i < PIPE_MAX_SAMPLERS) { + ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr ); + ureg->nr_samplers++; + return ureg->sampler[i]; + } + + assert( 0 ); + return ureg->sampler[0]; } @@ -363,6 +400,8 @@ static int match_or_expand_immediate( const float *v, unsigned *swizzle ) { unsigned i, j; + + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; @@ -394,8 +433,8 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, const float *v, unsigned nr ) { - unsigned i; - unsigned swizzle = 0; + unsigned i, j; + unsigned swizzle; /* Could do a first pass where we examine all existing immediates * without expanding. @@ -423,6 +462,12 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, set_bad( ureg ); out: + /* Make sure that all referenced elements are from this immediate. + * Has the effect of making size-one immediates into scalars. + */ + for (j = nr; j < 4; j++) + swizzle |= (swizzle & 0x3) << (j * 2); + return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), (swizzle >> 0) & 0x3, (swizzle >> 2) & 0x3, @@ -442,31 +487,39 @@ ureg_emit_src( struct ureg_program *ureg, union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; + assert(src.File != TGSI_FILE_NULL); + assert(src.File != TGSI_FILE_OUTPUT); + assert(src.File < TGSI_FILE_COUNT); + out[n].value = 0; out[n].src.File = src.File; out[n].src.SwizzleX = src.SwizzleX; out[n].src.SwizzleY = src.SwizzleY; out[n].src.SwizzleZ = src.SwizzleZ; out[n].src.SwizzleW = src.SwizzleW; - out[n].src.Indirect = src.Indirect; out[n].src.Index = src.Index; + out[n].src.Negate = src.Negate; n++; if (src.Absolute) { + out[0].src.Extended = 1; + out[0].src.Negate = 0; out[n].value = 0; + out[n].src_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; out[n].src_ext_mod.Absolute = 1; + out[n].src_ext_mod.Negate = src.Negate; n++; } if (src.Indirect) { + out[0].src.Indirect = 1; out[n].value = 0; out[n].src.File = TGSI_FILE_ADDRESS; - out[n].src.SwizzleX = TGSI_SWIZZLE_X; - out[n].src.SwizzleY = TGSI_SWIZZLE_X; - out[n].src.SwizzleZ = TGSI_SWIZZLE_X; - out[n].src.SwizzleW = TGSI_SWIZZLE_X; - out[n].src.Indirect = 0; - out[n].src.Index = 0; + out[n].src.SwizzleX = src.IndirectSwizzle; + out[n].src.SwizzleY = src.IndirectSwizzle; + out[n].src.SwizzleZ = src.IndirectSwizzle; + out[n].src.SwizzleW = src.IndirectSwizzle; + out[n].src.Index = src.IndirectIndex; n++; } @@ -484,6 +537,13 @@ ureg_emit_dst( struct ureg_program *ureg, union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; + assert(dst.File != TGSI_FILE_NULL); + assert(dst.File != TGSI_FILE_CONSTANT); + assert(dst.File != TGSI_FILE_INPUT); + assert(dst.File != TGSI_FILE_SAMPLER); + assert(dst.File != TGSI_FILE_IMMEDIATE); + assert(dst.File < TGSI_FILE_COUNT); + out[n].value = 0; out[n].dst.File = dst.File; out[n].dst.WriteMask = dst.WriteMask; @@ -494,12 +554,11 @@ ureg_emit_dst( struct ureg_program *ureg, if (dst.Indirect) { out[n].value = 0; out[n].src.File = TGSI_FILE_ADDRESS; - out[n].src.SwizzleX = TGSI_SWIZZLE_X; - out[n].src.SwizzleY = TGSI_SWIZZLE_X; - out[n].src.SwizzleZ = TGSI_SWIZZLE_X; - out[n].src.SwizzleW = TGSI_SWIZZLE_X; - out[n].src.Indirect = 0; - out[n].src.Index = 0; + out[n].src.SwizzleX = dst.IndirectSwizzle; + out[n].src.SwizzleY = dst.IndirectSwizzle; + out[n].src.SwizzleZ = dst.IndirectSwizzle; + out[n].src.SwizzleW = dst.IndirectSwizzle; + out[n].src.Index = dst.IndirectIndex; n++; } @@ -523,7 +582,6 @@ ureg_emit_insn(struct ureg_program *ureg, out[0].insn.NrTokens = 0; out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; - out[0].insn.NrTokens = 0; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; out[0].insn.Padding = 0; @@ -542,6 +600,9 @@ ureg_emit_label(struct ureg_program *ureg, { union tgsi_any_token *out, *insn; + if(!label_token) + return; + out = get_tokens( ureg, DOMAIN_INSN, 1 ); insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); @@ -617,6 +678,17 @@ ureg_insn(struct ureg_program *ureg, unsigned insn, i; boolean saturate; +#ifdef DEBUG + { + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); + assert(info); + if(info) { + assert(nr_dst == info->num_dst); + assert(nr_src == info->num_src); + } + } +#endif + saturate = nr_dst ? dst[0].Saturate : FALSE; insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); @@ -723,10 +795,10 @@ static void emit_decls( struct ureg_program *ureg ) TGSI_INTERPOLATE_CONSTANT ); } - if (ureg->nr_samplers) { + for (i = 0; i < ureg->nr_samplers; i++) { emit_decl_range( ureg, TGSI_FILE_SAMPLER, - 0, ureg->nr_samplers ); + ureg->sampler[i].Index, 1 ); } if (ureg->nr_constants) { @@ -741,6 +813,12 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_temps ); } + if (ureg->nr_addrs) { + emit_decl_range( ureg, + TGSI_FILE_ADDRESS, + 0, ureg->nr_addrs ); + } + for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, ureg->immediate[i].v ); @@ -764,7 +842,7 @@ static void copy_instructions( struct ureg_program *ureg ) static void -fixup_header_size(struct ureg_program *ureg ) +fixup_header_size(struct ureg_program *ureg) { union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 8836a1ea0eb..acbca59040c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -31,6 +31,10 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" +#ifdef __cplusplus +extern "C" { +#endif + struct ureg_program; /* Almost a tgsi_src_register, but we need to pull in the Absolute @@ -48,6 +52,8 @@ struct ureg_src unsigned Absolute : 1; /* BOOL */ int Index : 16; /* SINT */ unsigned Negate : 1; /* BOOL */ + int IndirectIndex : 16; /* SINT */ + int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -64,6 +70,8 @@ struct ureg_dst int Index : 16; /* SINT */ unsigned Pad1 : 5; unsigned Pad2 : 1; /* BOOL */ + int IndirectIndex : 16; /* SINT */ + int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ }; struct pipe_context; @@ -131,12 +139,21 @@ void ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst tmp ); +struct ureg_dst +ureg_DECL_address( struct ureg_program * ); + +/* Supply an index to the sampler declaration as this is the hook to + * the external pipe_sampler state. Users of this function probably + * don't want just any sampler, but a specific one which they've set + * up state for in the context. + */ struct ureg_src -ureg_DECL_sampler( struct ureg_program * ); +ureg_DECL_sampler( struct ureg_program *, + unsigned index ); static INLINE struct ureg_src -ureg_DECL_immediate4f( struct ureg_program *ureg, +ureg_imm4f( struct ureg_program *ureg, float a, float b, float c, float d) { @@ -149,7 +166,7 @@ ureg_DECL_immediate4f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate3f( struct ureg_program *ureg, +ureg_imm3f( struct ureg_program *ureg, float a, float b, float c) { @@ -161,7 +178,7 @@ ureg_DECL_immediate3f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate2f( struct ureg_program *ureg, +ureg_imm2f( struct ureg_program *ureg, float a, float b) { float v[2]; @@ -171,7 +188,7 @@ ureg_DECL_immediate2f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate1f( struct ureg_program *ureg, +ureg_imm1f( struct ureg_program *ureg, float a) { float v[1]; @@ -392,6 +409,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ static INLINE struct ureg_src ureg_negate( struct ureg_src reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Negate ^= 1; return reg; } @@ -399,6 +417,7 @@ ureg_negate( struct ureg_src reg ) static INLINE struct ureg_src ureg_abs( struct ureg_src reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Absolute = 1; reg.Negate = 0; return reg; @@ -413,6 +432,12 @@ ureg_swizzle( struct ureg_src reg, (reg.SwizzleZ << 4) | (reg.SwizzleW << 6)); + assert(reg.File != TGSI_FILE_NULL); + assert(x < 4); + assert(y < 4); + assert(z < 4); + assert(w < 4); + reg.SwizzleX = (swz >> (x*2)) & 0x3; reg.SwizzleY = (swz >> (y*2)) & 0x3; reg.SwizzleZ = (swz >> (z*2)) & 0x3; @@ -430,6 +455,7 @@ static INLINE struct ureg_dst ureg_writemask( struct ureg_dst reg, unsigned writemask ) { + assert(reg.File != TGSI_FILE_NULL); reg.WriteMask &= writemask; return reg; } @@ -437,10 +463,33 @@ ureg_writemask( struct ureg_dst reg, static INLINE struct ureg_dst ureg_saturate( struct ureg_dst reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Saturate = 1; return reg; } +static INLINE struct ureg_dst +ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr ) +{ + assert(reg.File != TGSI_FILE_NULL); + assert(addr.File == TGSI_FILE_ADDRESS); + reg.Indirect = 1; + reg.IndirectIndex = addr.Index; + reg.IndirectSwizzle = addr.SwizzleX; + return reg; +} + +static INLINE struct ureg_src +ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) +{ + assert(reg.File != TGSI_FILE_NULL); + assert(addr.File == TGSI_FILE_ADDRESS); + reg.Indirect = 1; + reg.IndirectIndex = addr.Index; + reg.IndirectSwizzle = addr.SwizzleX; + return reg; +} + static INLINE struct ureg_dst ureg_dst( struct ureg_src src ) { @@ -449,6 +498,8 @@ ureg_dst( struct ureg_src src ) dst.File = src.File; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = src.Indirect; + dst.IndirectIndex = src.IndirectIndex; + dst.IndirectSwizzle = src.IndirectSwizzle; dst.Saturate = 0; dst.Index = src.Index; dst.Pad1 = 0; @@ -469,6 +520,8 @@ ureg_src( struct ureg_dst dst ) src.SwizzleW = TGSI_SWIZZLE_W; src.Pad = 0; src.Indirect = dst.Indirect; + src.IndirectIndex = dst.IndirectIndex; + src.IndirectSwizzle = dst.IndirectSwizzle; src.Absolute = 0; src.Index = dst.Index; src.Negate = 0; @@ -478,4 +531,60 @@ ureg_src( struct ureg_dst dst ) +static INLINE struct ureg_dst +ureg_dst_undef( void ) +{ + struct ureg_dst dst; + + dst.File = TGSI_FILE_NULL; + dst.WriteMask = 0; + dst.Indirect = 0; + dst.IndirectIndex = 0; + dst.IndirectSwizzle = 0; + dst.Saturate = 0; + dst.Index = 0; + dst.Pad1 = 0; + dst.Pad2 = 0; + + return dst; +} + +static INLINE struct ureg_src +ureg_src_undef( void ) +{ + struct ureg_src src; + + src.File = TGSI_FILE_NULL; + src.SwizzleX = 0; + src.SwizzleY = 0; + src.SwizzleZ = 0; + src.SwizzleW = 0; + src.Pad = 0; + src.Indirect = 0; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; + src.Absolute = 0; + src.Index = 0; + src.Negate = 0; + + return src; +} + +static INLINE boolean +ureg_src_is_undef( struct ureg_src src ) +{ + return src.File == TGSI_FILE_NULL; +} + +static INLINE boolean +ureg_dst_is_undef( struct ureg_dst dst ) +{ + return dst.File == TGSI_FILE_NULL; +} + + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 7877f345587..21eb656327e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -32,6 +32,10 @@ extern "C" { #endif +struct tgsi_src_register; +struct tgsi_src_register_ext_swz; +struct tgsi_full_src_register; + void * tgsi_align_128bit( void *unaligned ); diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index cda6dbd46d7..c516317d701 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -62,7 +62,7 @@ struct blit_state struct pipe_viewport_state viewport; void *vs; - void *fs; + void *fs[TGSI_WRITEMASK_XYZW + 1]; struct pipe_buffer *vbuf; /**< quad vertices */ unsigned vbuf_slot; @@ -125,7 +125,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) } /* fragment shader */ - ctx->fs = util_make_fragment_tex_shader(pipe); + ctx->fs[TGSI_WRITEMASK_XYZW] = util_make_fragment_tex_shader(pipe); ctx->vbuf = NULL; /* init vertex data that doesn't change */ @@ -146,9 +146,13 @@ void util_destroy_blit(struct blit_state *ctx) { struct pipe_context *pipe = ctx->pipe; + unsigned i; pipe->delete_vs_state(pipe, ctx->vs); - pipe->delete_fs_state(pipe, ctx->fs); + + for (i = 0; i < Elements(ctx->fs); i++) + if (ctx->fs[i]) + pipe->delete_fs_state(pipe, ctx->fs[i]); pipe_buffer_reference(&ctx->vbuf, NULL); @@ -299,14 +303,15 @@ regions_overlap(int srcX0, int srcY0, * XXX need some control over blitting Z and/or stencil. */ void -util_blit_pixels(struct blit_state *ctx, - struct pipe_surface *src, - int srcX0, int srcY0, - int srcX1, int srcY1, - struct pipe_surface *dst, - int dstX0, int dstY0, - int dstX1, int dstY1, - float z, uint filter) +util_blit_pixels_writemask(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter, + uint writemask) { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; @@ -426,8 +431,11 @@ util_blit_pixels(struct blit_state *ctx, /* texture */ cso_set_sampler_textures(ctx->cso, 1, &tex); + if (ctx->fs[writemask] == NULL) + ctx->fs[writemask] = util_make_fragment_tex_shader_writemask(pipe, writemask); + /* shaders */ - cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* drawing dest */ @@ -462,6 +470,27 @@ util_blit_pixels(struct blit_state *ctx, } +void +util_blit_pixels(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter ) +{ + util_blit_pixels_writemask( ctx, src, + srcX0, srcY0, + srcX1, srcY1, + dst, + dstX0, dstY0, + dstX1, dstY1, + z, filter, + TGSI_WRITEMASK_XYZW ); +} + + /* Release vertex buffer at end of frame to avoid synchronous * rendering. */ @@ -535,7 +564,7 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_set_sampler_textures(ctx->cso, 1, &tex); /* shaders */ - cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[TGSI_WRITEMASK_XYZW]); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* drawing dest */ diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index c35beceda8d..a102021529e 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -60,6 +60,17 @@ util_blit_pixels(struct blit_state *ctx, int dstX1, int dstY1, float z, uint filter); +void +util_blit_pixels_writemask(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter, + uint writemask); + extern void util_blit_pixels_tex(struct blit_state *ctx, struct pipe_texture *tex, diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b0807c13392..4c6c2bc00e1 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -374,6 +374,10 @@ unsigned ffs( unsigned u ) #define ffs __builtin_ffs #endif +#ifdef __MINGW32__ +#define ffs __builtin_ffs +#endif + /* Could also binary search for the highest bit. */ diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index d54a1d8c746..1b8da9b6853 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -88,11 +88,14 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, /** * Make simple fragment texture shader: - * TEX OUT[0], IN[0], SAMP[0], 2D; + * IMM {0,0,0,1} // (if writemask != 0xf) + * MOV OUT[0], IMM[0] // (if writemask != 0xf) + * TEX OUT[0].writemask, IN[0], SAMP[0], 2D; * END; */ void * -util_make_fragment_tex_shader(struct pipe_context *pipe) +util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, + unsigned writemask ) { struct ureg_program *ureg; struct ureg_src sampler; @@ -103,7 +106,7 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) if (ureg == NULL) return NULL; - sampler = ureg_DECL_sampler( ureg ); + sampler = ureg_DECL_sampler( ureg, 0 ); tex = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_GENERIC, 0, @@ -119,6 +122,13 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) return ureg_create_shader_and_destroy( ureg, pipe ); } +void * +util_make_fragment_tex_shader(struct pipe_context *pipe ) +{ + return util_make_fragment_tex_shader_writemask( pipe, + TGSI_WRITEMASK_XYZW ); +} + diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index 6f8d96af9bc..d2e80d6eb4d 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -50,6 +50,10 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, extern void * +util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, + unsigned writemask ); + +extern void * util_make_fragment_tex_shader(struct pipe_context *pipe); |