summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2009-09-10 15:33:45 -0700
committerIan Romanick <[email protected]>2009-09-10 15:33:45 -0700
commitb8e1e8d2d8ae6ffbf8f271b46ee89788a926b3b0 (patch)
tree5db502ab80287bfc8ff61082784017c7448464f5 /src/gallium/auxiliary/tgsi
parent81722c5d7e8e93d837510b9e6e5d014ec64cf4b3 (diff)
parentd9dc4cb0e4f578da9e50c9d1ba6fd9c22ea2fca6 (diff)
Merge branch 'master' into asm-shader-rework-2
Conflicts: src/mesa/shader/lex.yy.c src/mesa/shader/program_parse.tab.c src/mesa/shader/program_parse.tab.h
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt20
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c31
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c115
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h21
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c238
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.h10
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c122
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h119
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.h4
12 files changed, 480 insertions, 209 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index 802ec371189..eb492076b7d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -187,11 +187,7 @@ TGSI Instruction Specification
1.2.6 CND0 - Condition Zero
- dst.x = (src2.x >= 0.0) ? src0.x : src1.x
- dst.y = (src2.y >= 0.0) ? src0.y : src1.y
- dst.z = (src2.z >= 0.0) ? src0.z : src1.z
- dst.w = (src2.w >= 0.0) ? src0.w : src1.w
-
+ Removed. Use (CMP src2, src1, src0) instead.
1.2.7 DOT2ADD - 2-component Dot Product And Add
@@ -1031,12 +1027,12 @@ TGSI Instruction Specification
1.18.1 EXPP - Approximate Exponential Base 2
- Alias for EXP.
+ Use EXP. See also 1.19.3.
1.18.2 LOGP - Logarithm Base 2
- Alias for LG2.
+ Use LOG. See also 1.19.4.
1.19 vs_2_0
@@ -1053,6 +1049,16 @@ TGSI Instruction Specification
Alias for ARR.
+1.19.3 EXPP - Approximate Exponential Base 2
+
+ Use EX2.
+
+
+1.19.4 LOGP - Logarithm Base 2
+
+ Use LG2.
+
+
2 Explanation of symbols used
==============================
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 05b07a3a73e..111d95b6665 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -43,6 +43,7 @@ struct dump_ctx
struct tgsi_iterate_context iter;
uint instno;
+ int indent;
uint indentation;
@@ -335,14 +336,6 @@ tgsi_dump_immediate(
iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm );
}
-static void
-indent(struct dump_ctx *ctx)
-{
- uint i;
- for (i = 0; i < ctx->indentation; i++)
- TXT(" ");
-}
-
static boolean
iter_instruction(
struct tgsi_iterate_context *iter,
@@ -350,22 +343,19 @@ iter_instruction(
{
struct dump_ctx *ctx = (struct dump_ctx *) iter;
uint instno = ctx->instno++;
-
+ const struct tgsi_opcode_info *info = tgsi_get_opcode_info( inst->Instruction.Opcode );
uint i;
boolean first_reg = TRUE;
INSTID( instno );
TXT( ": " );
-
- /* update indentation */
- if (inst->Instruction.Opcode == TGSI_OPCODE_ENDIF ||
- inst->Instruction.Opcode == TGSI_OPCODE_ENDFOR ||
- inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
- ctx->indentation -= indent_spaces;
- }
- indent(ctx);
-
- TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic );
+
+ ctx->indent -= info->pre_dedent;
+ for(i = 0; (int)i < ctx->indent; ++i)
+ TXT( " " );
+ ctx->indent += info->post_indent;
+
+ TXT( info->mnemonic );
switch (inst->Instruction.Saturate) {
case TGSI_SAT_NONE:
@@ -526,6 +516,7 @@ tgsi_dump_instruction(
struct dump_ctx ctx;
ctx.instno = instno;
+ ctx.indent = 0;
ctx.printf = dump_ctx_printf;
ctx.indentation = 0;
@@ -559,6 +550,7 @@ tgsi_dump(
ctx.iter.epilog = NULL;
ctx.instno = 0;
+ ctx.indent = 0;
ctx.printf = dump_ctx_printf;
ctx.indentation = 0;
@@ -612,6 +604,7 @@ tgsi_dump_str(
ctx.base.iter.epilog = NULL;
ctx.base.instno = 0;
+ ctx.base.indent = 0;
ctx.base.printf = &str_dump_ctx_printf;
ctx.base.indentation = 0;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 711e86d6edf..c79c56debd6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2329,16 +2329,6 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_CND0:
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
- micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
- STORE(&r[0], 0, chan_index);
- }
- break;
-
case TGSI_OPCODE_DP2A:
FETCH( &r[0], 0, CHAN_X );
FETCH( &r[1], 1, CHAN_X );
@@ -2766,19 +2756,32 @@ exec_instruction(
if (mach->ExecMask) {
/* do the call */
- /* push the Cond, Loop, Cont stacks */
+ /* First, record the depths of the execution stacks.
+ * This is important for deeply nested/looped return statements.
+ * We have to unwind the stacks by the correct amount. For a
+ * real code generator, we could determine the number of entries
+ * to pop off each stack with simple static analysis and avoid
+ * implementing this data structure at run time.
+ */
+ mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
+ mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
+ mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
+ /* note that PC was already incremented above */
+ mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
+
+ mach->CallStackTop++;
+
+ /* Second, push the Cond, Loop, Cont, Func stacks */
assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
-
assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
- /* note that PC was already incremented above */
- mach->CallStack[mach->CallStackTop++] = *pc;
+ /* Finally, jump to the subroutine */
*pc = inst->InstructionExtLabel.Label;
}
break;
@@ -2795,18 +2798,24 @@ exec_instruction(
*pc = -1;
return;
}
- *pc = mach->CallStack[--mach->CallStackTop];
- /* pop the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop > 0);
- mach->CondMask = mach->CondStack[--mach->CondStackTop];
- assert(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->CallStackTop > 0);
+ mach->CallStackTop--;
+
+ mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
+ mach->CondMask = mach->CondStack[mach->CondStackTop];
+
+ mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
+ mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
+
+ mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
+ mach->ContMask = mach->ContStack[mach->ContStackTop];
+
assert(mach->FuncStackTop > 0);
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+ *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
+
UPDATE_EXEC_MASK(mach);
}
break;
@@ -3104,6 +3113,12 @@ exec_instruction(
break;
case TGSI_OPCODE_BGNFOR:
+ assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ for (chan_index = 0; chan_index < 3; chan_index++) {
+ FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
+ }
+ STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+ ++mach->LoopCounterStackTop;
/* fall-through (for now) */
case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
@@ -3111,10 +3126,58 @@ exec_instruction(
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
break;
case TGSI_OPCODE_ENDFOR:
- /* fall-through (for now at least) */
+ assert(mach->LoopCounterStackTop > 0);
+ micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ /* update LoopMask */
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
+ mach->LoopMask &= ~0x1;
+ }
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
+ mach->LoopMask &= ~0x2;
+ }
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
+ mach->LoopMask &= ~0x4;
+ }
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
+ mach->LoopMask &= ~0x8;
+ }
+ micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
+ assert(mach->LoopLabelStackTop > 0);
+ inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
+ STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+ /* Restore ContMask, but don't pop */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
+ UPDATE_EXEC_MASK(mach);
+ if (mach->ExecMask) {
+ /* repeat loop: jump to instruction just past BGNLOOP */
+ assert(mach->LoopLabelStackTop > 0);
+ *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
+ }
+ else {
+ /* exit loop: pop LoopMask */
+ assert(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ /* pop ContMask */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->LoopLabelStackTop > 0);
+ --mach->LoopLabelStackTop;
+ assert(mach->LoopCounterStackTop > 0);
+ --mach->LoopCounterStackTop;
+ }
+ UPDATE_EXEC_MASK(mach);
+ break;
+
case TGSI_OPCODE_ENDLOOP:
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
@@ -3122,7 +3185,8 @@ exec_instruction(
UPDATE_EXEC_MASK(mach);
if (mach->ExecMask) {
/* repeat loop: jump to instruction just past BGNLOOP */
- *pc = inst->InstructionExtLabel.Label + 1;
+ assert(mach->LoopLabelStackTop > 0);
+ *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
}
else {
/* exit loop: pop LoopMask */
@@ -3131,6 +3195,8 @@ exec_instruction(
/* pop ContMask */
assert(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->LoopLabelStackTop > 0);
+ --mach->LoopLabelStackTop;
}
UPDATE_EXEC_MASK(mach);
break;
@@ -3198,7 +3264,6 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
mach->FuncMask = 0xf;
mach->ExecMask = 0xf;
- mach->CondStackTop = 0; /* temporarily subvert this assertion */
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index fd9ef6f35df..c72f76809d4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -186,6 +186,17 @@ struct tgsi_exec_labels
*/
#define TGSI_EXEC_MAX_CONST_BUFFER 4096
+
+/** function call/activation record */
+struct tgsi_call_record
+{
+ uint CondStackTop;
+ uint LoopStackTop;
+ uint ContStackTop;
+ uint ReturnAddr;
+};
+
+
/**
* Run-time virtual machine state for executing TGSI shader.
*/
@@ -232,6 +243,14 @@ struct tgsi_exec_machine
uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
int LoopStackTop;
+ /** Loop label stack */
+ uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING];
+ int LoopLabelStackTop;
+
+ /** Loop counter stack (x = count, y = current, z = step) */
+ struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING];
+ int LoopCounterStackTop;
+
/** Loop continue mask stack (see comments in tgsi_exec.c) */
uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
int ContStackTop;
@@ -241,7 +260,7 @@ struct tgsi_exec_machine
int FuncStackTop;
/** Function call stack for saving/restoring the program counter */
- uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
+ struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING];
int CallStackTop;
struct tgsi_full_instruction *Instructions;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index ccf4b205ffb..17af4cb7ad2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -31,125 +31,125 @@
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{
- { 1, 1, 0, 0, "ARL", TGSI_OPCODE_ARL },
- { 1, 1, 0, 0, "MOV", TGSI_OPCODE_MOV },
- { 1, 1, 0, 0, "LIT", TGSI_OPCODE_LIT },
- { 1, 1, 0, 0, "RCP", TGSI_OPCODE_RCP },
- { 1, 1, 0, 0, "RSQ", TGSI_OPCODE_RSQ },
- { 1, 1, 0, 0, "EXP", TGSI_OPCODE_EXP },
- { 1, 1, 0, 0, "LOG", TGSI_OPCODE_LOG },
- { 1, 2, 0, 0, "MUL", TGSI_OPCODE_MUL },
- { 1, 2, 0, 0, "ADD", TGSI_OPCODE_ADD },
- { 1, 2, 0, 0, "DP3", TGSI_OPCODE_DP3 },
- { 1, 2, 0, 0, "DP4", TGSI_OPCODE_DP4 },
- { 1, 2, 0, 0, "DST", TGSI_OPCODE_DST },
- { 1, 2, 0, 0, "MIN", TGSI_OPCODE_MIN },
- { 1, 2, 0, 0, "MAX", TGSI_OPCODE_MAX },
- { 1, 2, 0, 0, "SLT", TGSI_OPCODE_SLT },
- { 1, 2, 0, 0, "SGE", TGSI_OPCODE_SGE },
- { 1, 3, 0, 0, "MAD", TGSI_OPCODE_MAD },
- { 1, 2, 0, 0, "SUB", TGSI_OPCODE_SUB },
- { 1, 3, 0, 0, "LRP", TGSI_OPCODE_LRP },
- { 1, 3, 0, 0, "CND", TGSI_OPCODE_CND },
- { 1, 3, 0, 0, "CND0", TGSI_OPCODE_CND0 },
- { 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A },
- { 0, 0, 0, 0, "", 22 }, /* removed */
- { 0, 0, 0, 0, "", 23 }, /* removed */
- { 1, 1, 0, 0, "FRC", TGSI_OPCODE_FRC },
- { 1, 3, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP },
- { 1, 1, 0, 0, "FLR", TGSI_OPCODE_FLR },
- { 1, 1, 0, 0, "ROUND", TGSI_OPCODE_ROUND },
- { 1, 1, 0, 0, "EX2", TGSI_OPCODE_EX2 },
- { 1, 1, 0, 0, "LG2", TGSI_OPCODE_LG2 },
- { 1, 2, 0, 0, "POW", TGSI_OPCODE_POW },
- { 1, 2, 0, 0, "XPD", TGSI_OPCODE_XPD },
- { 0, 0, 0, 0, "", 32 }, /* removed */
- { 1, 1, 0, 0, "ABS", TGSI_OPCODE_ABS },
- { 1, 1, 0, 0, "RCC", TGSI_OPCODE_RCC },
- { 1, 2, 0, 0, "DPH", TGSI_OPCODE_DPH },
- { 1, 1, 0, 0, "COS", TGSI_OPCODE_COS },
- { 1, 1, 0, 0, "DDX", TGSI_OPCODE_DDX },
- { 1, 1, 0, 0, "DDY", TGSI_OPCODE_DDY },
- { 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP },
- { 1, 1, 0, 0, "PK2H", TGSI_OPCODE_PK2H },
- { 1, 1, 0, 0, "PK2US", TGSI_OPCODE_PK2US },
- { 1, 1, 0, 0, "PK4B", TGSI_OPCODE_PK4B },
- { 1, 1, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB },
- { 1, 2, 0, 0, "RFL", TGSI_OPCODE_RFL },
- { 1, 2, 0, 0, "SEQ", TGSI_OPCODE_SEQ },
- { 1, 2, 0, 0, "SFL", TGSI_OPCODE_SFL },
- { 1, 2, 0, 0, "SGT", TGSI_OPCODE_SGT },
- { 1, 1, 0, 0, "SIN", TGSI_OPCODE_SIN },
- { 1, 2, 0, 0, "SLE", TGSI_OPCODE_SLE },
- { 1, 2, 0, 0, "SNE", TGSI_OPCODE_SNE },
- { 1, 2, 0, 0, "STR", TGSI_OPCODE_STR },
- { 1, 2, 1, 0, "TEX", TGSI_OPCODE_TEX },
- { 1, 4, 1, 0, "TXD", TGSI_OPCODE_TXD },
- { 1, 2, 1, 0, "TXP", TGSI_OPCODE_TXP },
- { 1, 1, 0, 0, "UP2H", TGSI_OPCODE_UP2H },
- { 1, 1, 0, 0, "UP2US", TGSI_OPCODE_UP2US },
- { 1, 1, 0, 0, "UP4B", TGSI_OPCODE_UP4B },
- { 1, 1, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB },
- { 1, 3, 0, 0, "X2D", TGSI_OPCODE_X2D },
- { 1, 1, 0, 0, "ARA", TGSI_OPCODE_ARA },
- { 1, 1, 0, 0, "ARR", TGSI_OPCODE_ARR },
- { 0, 1, 0, 0, "BRA", TGSI_OPCODE_BRA },
- { 0, 0, 0, 1, "CAL", TGSI_OPCODE_CAL },
- { 0, 0, 0, 0, "RET", TGSI_OPCODE_RET },
- { 1, 1, 0, 0, "SSG", TGSI_OPCODE_SSG },
- { 1, 3, 0, 0, "CMP", TGSI_OPCODE_CMP },
- { 1, 1, 0, 0, "SCS", TGSI_OPCODE_SCS },
- { 1, 2, 1, 0, "TXB", TGSI_OPCODE_TXB },
- { 1, 1, 0, 0, "NRM", TGSI_OPCODE_NRM },
- { 1, 2, 0, 0, "DIV", TGSI_OPCODE_DIV },
- { 1, 2, 0, 0, "DP2", TGSI_OPCODE_DP2 },
- { 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL },
- { 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK },
- { 0, 1, 0, 1, "IF", TGSI_OPCODE_IF },
- { 1, 1, 0, 0, "BGNFOR", TGSI_OPCODE_BGNFOR },
- { 0, 1, 0, 0, "REP", TGSI_OPCODE_REP },
- { 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE },
- { 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF },
- { 1, 0, 0, 0, "ENDFOR", TGSI_OPCODE_ENDFOR },
- { 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP },
- { 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA },
- { 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA },
- { 1, 1, 0, 0, "CEIL", TGSI_OPCODE_CEIL },
- { 1, 1, 0, 0, "I2F", TGSI_OPCODE_I2F },
- { 1, 1, 0, 0, "NOT", TGSI_OPCODE_NOT },
- { 1, 1, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC },
- { 1, 2, 0, 0, "SHL", TGSI_OPCODE_SHL },
- { 1, 2, 0, 0, "SHR", TGSI_OPCODE_SHR },
- { 1, 2, 0, 0, "AND", TGSI_OPCODE_AND },
- { 1, 2, 0, 0, "OR", TGSI_OPCODE_OR },
- { 1, 2, 0, 0, "MOD", TGSI_OPCODE_MOD },
- { 1, 2, 0, 0, "XOR", TGSI_OPCODE_XOR },
- { 1, 3, 0, 0, "SAD", TGSI_OPCODE_SAD },
- { 1, 2, 1, 0, "TXF", TGSI_OPCODE_TXF },
- { 1, 2, 1, 0, "TXQ", TGSI_OPCODE_TXQ },
- { 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT },
- { 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT },
- { 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
- { 0, 0, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
- { 0, 0, 0, 0, "BGNSUB", TGSI_OPCODE_BGNSUB },
- { 0, 0, 0, 1, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
- { 0, 0, 0, 0, "ENDSUB", TGSI_OPCODE_ENDSUB },
- { 1, 1, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 },
- { 1, 1, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 },
- { 1, 1, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 },
- { 1, 1, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 },
- { 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP },
- { 0, 0, 0, 0, "", 108 }, /* removed */
- { 0, 0, 0, 0, "", 109 }, /* removed */
- { 0, 0, 0, 0, "", 110 }, /* removed */
- { 0, 0, 0, 0, "", 111 }, /* removed */
- { 1, 1, 0, 0, "NRM4", TGSI_OPCODE_NRM4 },
- { 0, 1, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ },
- { 0, 1, 0, 0, "IFC", TGSI_OPCODE_IFC },
- { 0, 1, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC },
- { 0, 1, 0, 0, "KIL", TGSI_OPCODE_KIL },
- { 0, 0, 0, 0, "END", TGSI_OPCODE_END },
- { 1, 1, 0, 0, "SWZ", TGSI_OPCODE_SWZ }
+ { 1, 1, 0, 0, 0, 0, "ARL", TGSI_OPCODE_ARL },
+ { 1, 1, 0, 0, 0, 0, "MOV", TGSI_OPCODE_MOV },
+ { 1, 1, 0, 0, 0, 0, "LIT", TGSI_OPCODE_LIT },
+ { 1, 1, 0, 0, 0, 0, "RCP", TGSI_OPCODE_RCP },
+ { 1, 1, 0, 0, 0, 0, "RSQ", TGSI_OPCODE_RSQ },
+ { 1, 1, 0, 0, 0, 0, "EXP", TGSI_OPCODE_EXP },
+ { 1, 1, 0, 0, 0, 0, "LOG", TGSI_OPCODE_LOG },
+ { 1, 2, 0, 0, 0, 0, "MUL", TGSI_OPCODE_MUL },
+ { 1, 2, 0, 0, 0, 0, "ADD", TGSI_OPCODE_ADD },
+ { 1, 2, 0, 0, 0, 0, "DP3", TGSI_OPCODE_DP3 },
+ { 1, 2, 0, 0, 0, 0, "DP4", TGSI_OPCODE_DP4 },
+ { 1, 2, 0, 0, 0, 0, "DST", TGSI_OPCODE_DST },
+ { 1, 2, 0, 0, 0, 0, "MIN", TGSI_OPCODE_MIN },
+ { 1, 2, 0, 0, 0, 0, "MAX", TGSI_OPCODE_MAX },
+ { 1, 2, 0, 0, 0, 0, "SLT", TGSI_OPCODE_SLT },
+ { 1, 2, 0, 0, 0, 0, "SGE", TGSI_OPCODE_SGE },
+ { 1, 3, 0, 0, 0, 0, "MAD", TGSI_OPCODE_MAD },
+ { 1, 2, 0, 0, 0, 0, "SUB", TGSI_OPCODE_SUB },
+ { 1, 3, 0, 0, 0, 0, "LRP", TGSI_OPCODE_LRP },
+ { 1, 3, 0, 0, 0, 0, "CND", TGSI_OPCODE_CND },
+ { 0, 0, 0, 0, 0, 0, "", 20 }, /* removed */
+ { 1, 3, 0, 0, 0, 0, "DP2A", TGSI_OPCODE_DP2A },
+ { 0, 0, 0, 0, 0, 0, "", 22 }, /* removed */
+ { 0, 0, 0, 0, 0, 0, "", 23 }, /* removed */
+ { 1, 1, 0, 0, 0, 0, "FRC", TGSI_OPCODE_FRC },
+ { 1, 3, 0, 0, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP },
+ { 1, 1, 0, 0, 0, 0, "FLR", TGSI_OPCODE_FLR },
+ { 1, 1, 0, 0, 0, 0, "ROUND", TGSI_OPCODE_ROUND },
+ { 1, 1, 0, 0, 0, 0, "EX2", TGSI_OPCODE_EX2 },
+ { 1, 1, 0, 0, 0, 0, "LG2", TGSI_OPCODE_LG2 },
+ { 1, 2, 0, 0, 0, 0, "POW", TGSI_OPCODE_POW },
+ { 1, 2, 0, 0, 0, 0, "XPD", TGSI_OPCODE_XPD },
+ { 0, 0, 0, 0, 0, 0, "", 32 }, /* removed */
+ { 1, 1, 0, 0, 0, 0, "ABS", TGSI_OPCODE_ABS },
+ { 1, 1, 0, 0, 0, 0, "RCC", TGSI_OPCODE_RCC },
+ { 1, 2, 0, 0, 0, 0, "DPH", TGSI_OPCODE_DPH },
+ { 1, 1, 0, 0, 0, 0, "COS", TGSI_OPCODE_COS },
+ { 1, 1, 0, 0, 0, 0, "DDX", TGSI_OPCODE_DDX },
+ { 1, 1, 0, 0, 0, 0, "DDY", TGSI_OPCODE_DDY },
+ { 0, 0, 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP },
+ { 1, 1, 0, 0, 0, 0, "PK2H", TGSI_OPCODE_PK2H },
+ { 1, 1, 0, 0, 0, 0, "PK2US", TGSI_OPCODE_PK2US },
+ { 1, 1, 0, 0, 0, 0, "PK4B", TGSI_OPCODE_PK4B },
+ { 1, 1, 0, 0, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB },
+ { 1, 2, 0, 0, 0, 0, "RFL", TGSI_OPCODE_RFL },
+ { 1, 2, 0, 0, 0, 0, "SEQ", TGSI_OPCODE_SEQ },
+ { 1, 2, 0, 0, 0, 0, "SFL", TGSI_OPCODE_SFL },
+ { 1, 2, 0, 0, 0, 0, "SGT", TGSI_OPCODE_SGT },
+ { 1, 1, 0, 0, 0, 0, "SIN", TGSI_OPCODE_SIN },
+ { 1, 2, 0, 0, 0, 0, "SLE", TGSI_OPCODE_SLE },
+ { 1, 2, 0, 0, 0, 0, "SNE", TGSI_OPCODE_SNE },
+ { 1, 2, 0, 0, 0, 0, "STR", TGSI_OPCODE_STR },
+ { 1, 2, 1, 0, 0, 0, "TEX", TGSI_OPCODE_TEX },
+ { 1, 4, 1, 0, 0, 0, "TXD", TGSI_OPCODE_TXD },
+ { 1, 2, 1, 0, 0, 0, "TXP", TGSI_OPCODE_TXP },
+ { 1, 1, 0, 0, 0, 0, "UP2H", TGSI_OPCODE_UP2H },
+ { 1, 1, 0, 0, 0, 0, "UP2US", TGSI_OPCODE_UP2US },
+ { 1, 1, 0, 0, 0, 0, "UP4B", TGSI_OPCODE_UP4B },
+ { 1, 1, 0, 0, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB },
+ { 1, 3, 0, 0, 0, 0, "X2D", TGSI_OPCODE_X2D },
+ { 1, 1, 0, 0, 0, 0, "ARA", TGSI_OPCODE_ARA },
+ { 1, 1, 0, 0, 0, 0, "ARR", TGSI_OPCODE_ARR },
+ { 0, 1, 0, 0, 0, 0, "BRA", TGSI_OPCODE_BRA },
+ { 0, 0, 0, 1, 0, 0, "CAL", TGSI_OPCODE_CAL },
+ { 0, 0, 0, 0, 0, 0, "RET", TGSI_OPCODE_RET },
+ { 1, 1, 0, 0, 0, 0, "SSG", TGSI_OPCODE_SSG },
+ { 1, 3, 0, 0, 0, 0, "CMP", TGSI_OPCODE_CMP },
+ { 1, 1, 0, 0, 0, 0, "SCS", TGSI_OPCODE_SCS },
+ { 1, 2, 1, 0, 0, 0, "TXB", TGSI_OPCODE_TXB },
+ { 1, 1, 0, 0, 0, 0, "NRM", TGSI_OPCODE_NRM },
+ { 1, 2, 0, 0, 0, 0, "DIV", TGSI_OPCODE_DIV },
+ { 1, 2, 0, 0, 0, 0, "DP2", TGSI_OPCODE_DP2 },
+ { 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL },
+ { 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK },
+ { 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF },
+ { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR },
+ { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP },
+ { 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE },
+ { 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF },
+ { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR },
+ { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP },
+ { 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA },
+ { 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA },
+ { 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL },
+ { 1, 1, 0, 0, 0, 0, "I2F", TGSI_OPCODE_I2F },
+ { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT },
+ { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC },
+ { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL },
+ { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR },
+ { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND },
+ { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR },
+ { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD },
+ { 1, 2, 0, 0, 0, 0, "XOR", TGSI_OPCODE_XOR },
+ { 1, 3, 0, 0, 0, 0, "SAD", TGSI_OPCODE_SAD },
+ { 1, 2, 1, 0, 0, 0, "TXF", TGSI_OPCODE_TXF },
+ { 1, 2, 1, 0, 0, 0, "TXQ", TGSI_OPCODE_TXQ },
+ { 0, 0, 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT },
+ { 0, 0, 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT },
+ { 0, 0, 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
+ { 0, 0, 0, 1, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
+ { 0, 0, 0, 0, 0, 1, "BGNSUB", TGSI_OPCODE_BGNSUB },
+ { 0, 0, 0, 1, 1, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
+ { 0, 0, 0, 0, 1, 0, "ENDSUB", TGSI_OPCODE_ENDSUB },
+ { 1, 1, 0, 0, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 },
+ { 1, 1, 0, 0, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 },
+ { 1, 1, 0, 0, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 },
+ { 1, 1, 0, 0, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 },
+ { 0, 0, 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP },
+ { 0, 0, 0, 0, 0, 0, "", 108 }, /* removed */
+ { 0, 0, 0, 0, 0, 0, "", 109 }, /* removed */
+ { 0, 0, 0, 0, 0, 0, "", 110 }, /* removed */
+ { 0, 0, 0, 0, 0, 0, "", 111 }, /* removed */
+ { 1, 1, 0, 0, 0, 0, "NRM4", TGSI_OPCODE_NRM4 },
+ { 0, 1, 0, 0, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ },
+ { 0, 1, 0, 0, 0, 0, "IFC", TGSI_OPCODE_IFC },
+ { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC },
+ { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL },
+ { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END },
+ { 1, 1, 0, 0, 0, 0, "SWZ", TGSI_OPCODE_SWZ }
};
const struct tgsi_opcode_info *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
index b2375c69710..74713c3b98a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -36,10 +36,12 @@ extern "C" {
struct tgsi_opcode_info
{
- uint num_dst;
- uint num_src;
- boolean is_tex;
- boolean is_branch;
+ unsigned num_dst:3;
+ unsigned num_src:3;
+ unsigned is_tex:1;
+ unsigned is_branch:1;
+ int pre_dedent:2;
+ int post_indent:2;
const char *mnemonic;
uint opcode;
};
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index ed594a3e2c7..e7bcf4bf754 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -60,7 +60,6 @@ OP13(MAD)
OP12(SUB)
OP13(LRP)
OP13(CND)
-OP13(CND0)
OP13(DP2A)
OP11(FRC)
OP13(CLAMP)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 4fe8553c423..8a13885da9b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -199,10 +199,10 @@ iter_instruction(
}
if (info->num_dst != inst->Instruction.NumDstRegs) {
- report_error( ctx, "Invalid number of destination operands, should be %u", info->num_dst );
+ report_error( ctx, "%s: Invalid number of destination operands, should be %u", info->mnemonic, info->num_dst );
}
if (info->num_src != inst->Instruction.NumSrcRegs) {
- report_error( ctx, "Invalid number of source operands, should be %u", info->num_src );
+ report_error( ctx, "%s: Invalid number of source operands, should be %u", info->mnemonic, info->num_src );
}
/* Check destination and source registers' validity.
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 46f2387c158..3cdf8b9f359 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2089,10 +2089,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_CND0:
- return 0;
- break;
-
case TGSI_OPCODE_DP2A:
FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index c0a0627e0b2..f7096bd8e2c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -29,6 +29,7 @@
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_memory.h"
#include "util/u_math.h"
@@ -71,6 +72,7 @@ struct ureg_tokens {
#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
#define UREG_MAX_IMMEDIATE 32
#define UREG_MAX_TEMP 256
+#define UREG_MAX_ADDR 2
#define DOMAIN_DECL 0
#define DOMAIN_INSN 1
@@ -99,11 +101,15 @@ struct ureg_program
} immediate[UREG_MAX_IMMEDIATE];
unsigned nr_immediates;
+ struct ureg_src sampler[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+
unsigned temps_active[UREG_MAX_TEMP / 32];
unsigned nr_temps;
+ unsigned nr_addrs;
+
unsigned nr_constants;
- unsigned nr_samplers;
unsigned nr_instructions;
struct ureg_tokens domain[2];
@@ -187,6 +193,8 @@ ureg_dst_register( unsigned file,
dst.File = file;
dst.WriteMask = TGSI_WRITEMASK_XYZW;
dst.Indirect = 0;
+ dst.IndirectIndex = 0;
+ dst.IndirectSwizzle = 0;
dst.Saturate = 0;
dst.Index = index;
dst.Pad1 = 0;
@@ -208,6 +216,8 @@ ureg_src_register( unsigned file,
src.SwizzleW = TGSI_SWIZZLE_W;
src.Pad = 0;
src.Indirect = 0;
+ src.IndirectIndex = 0;
+ src.IndirectSwizzle = 0;
src.Absolute = 0;
src.Index = index;
src.Negate = 0;
@@ -254,6 +264,7 @@ ureg_DECL_fs_input( struct ureg_program *ureg,
unsigned index,
unsigned interp )
{
+ assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
return ureg_DECL_input( ureg, name, index, interp );
}
@@ -263,6 +274,7 @@ ureg_DECL_vs_input( struct ureg_program *ureg,
unsigned name,
unsigned index )
{
+ assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT );
}
@@ -346,11 +358,36 @@ void ureg_release_temporary( struct ureg_program *ureg,
}
+/* Allocate a new address register.
+ */
+struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
+{
+ if (ureg->nr_addrs < UREG_MAX_ADDR)
+ return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ );
+
+ assert( 0 );
+ return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
+}
+
/* Allocate a new sampler.
*/
-struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg )
+struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
+ unsigned nr )
{
- return ureg_src_register( TGSI_FILE_SAMPLER, ureg->nr_samplers++ );
+ unsigned i;
+
+ for (i = 0; i < ureg->nr_samplers; i++)
+ if (ureg->sampler[i].Index == nr)
+ return ureg->sampler[i];
+
+ if (i < PIPE_MAX_SAMPLERS) {
+ ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr );
+ ureg->nr_samplers++;
+ return ureg->sampler[i];
+ }
+
+ assert( 0 );
+ return ureg->sampler[0];
}
@@ -363,6 +400,8 @@ static int match_or_expand_immediate( const float *v,
unsigned *swizzle )
{
unsigned i, j;
+
+ *swizzle = 0;
for (i = 0; i < nr; i++) {
boolean found = FALSE;
@@ -394,8 +433,8 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg,
const float *v,
unsigned nr )
{
- unsigned i;
- unsigned swizzle = 0;
+ unsigned i, j;
+ unsigned swizzle;
/* Could do a first pass where we examine all existing immediates
* without expanding.
@@ -423,6 +462,12 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg,
set_bad( ureg );
out:
+ /* Make sure that all referenced elements are from this immediate.
+ * Has the effect of making size-one immediates into scalars.
+ */
+ for (j = nr; j < 4; j++)
+ swizzle |= (swizzle & 0x3) << (j * 2);
+
return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ),
(swizzle >> 0) & 0x3,
(swizzle >> 2) & 0x3,
@@ -442,31 +487,39 @@ ureg_emit_src( struct ureg_program *ureg,
union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
unsigned n = 0;
+ assert(src.File != TGSI_FILE_NULL);
+ assert(src.File != TGSI_FILE_OUTPUT);
+ assert(src.File < TGSI_FILE_COUNT);
+
out[n].value = 0;
out[n].src.File = src.File;
out[n].src.SwizzleX = src.SwizzleX;
out[n].src.SwizzleY = src.SwizzleY;
out[n].src.SwizzleZ = src.SwizzleZ;
out[n].src.SwizzleW = src.SwizzleW;
- out[n].src.Indirect = src.Indirect;
out[n].src.Index = src.Index;
+ out[n].src.Negate = src.Negate;
n++;
if (src.Absolute) {
+ out[0].src.Extended = 1;
+ out[0].src.Negate = 0;
out[n].value = 0;
+ out[n].src_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD;
out[n].src_ext_mod.Absolute = 1;
+ out[n].src_ext_mod.Negate = src.Negate;
n++;
}
if (src.Indirect) {
+ out[0].src.Indirect = 1;
out[n].value = 0;
out[n].src.File = TGSI_FILE_ADDRESS;
- out[n].src.SwizzleX = TGSI_SWIZZLE_X;
- out[n].src.SwizzleY = TGSI_SWIZZLE_X;
- out[n].src.SwizzleZ = TGSI_SWIZZLE_X;
- out[n].src.SwizzleW = TGSI_SWIZZLE_X;
- out[n].src.Indirect = 0;
- out[n].src.Index = 0;
+ out[n].src.SwizzleX = src.IndirectSwizzle;
+ out[n].src.SwizzleY = src.IndirectSwizzle;
+ out[n].src.SwizzleZ = src.IndirectSwizzle;
+ out[n].src.SwizzleW = src.IndirectSwizzle;
+ out[n].src.Index = src.IndirectIndex;
n++;
}
@@ -484,6 +537,13 @@ ureg_emit_dst( struct ureg_program *ureg,
union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
unsigned n = 0;
+ assert(dst.File != TGSI_FILE_NULL);
+ assert(dst.File != TGSI_FILE_CONSTANT);
+ assert(dst.File != TGSI_FILE_INPUT);
+ assert(dst.File != TGSI_FILE_SAMPLER);
+ assert(dst.File != TGSI_FILE_IMMEDIATE);
+ assert(dst.File < TGSI_FILE_COUNT);
+
out[n].value = 0;
out[n].dst.File = dst.File;
out[n].dst.WriteMask = dst.WriteMask;
@@ -494,12 +554,11 @@ ureg_emit_dst( struct ureg_program *ureg,
if (dst.Indirect) {
out[n].value = 0;
out[n].src.File = TGSI_FILE_ADDRESS;
- out[n].src.SwizzleX = TGSI_SWIZZLE_X;
- out[n].src.SwizzleY = TGSI_SWIZZLE_X;
- out[n].src.SwizzleZ = TGSI_SWIZZLE_X;
- out[n].src.SwizzleW = TGSI_SWIZZLE_X;
- out[n].src.Indirect = 0;
- out[n].src.Index = 0;
+ out[n].src.SwizzleX = dst.IndirectSwizzle;
+ out[n].src.SwizzleY = dst.IndirectSwizzle;
+ out[n].src.SwizzleZ = dst.IndirectSwizzle;
+ out[n].src.SwizzleW = dst.IndirectSwizzle;
+ out[n].src.Index = dst.IndirectIndex;
n++;
}
@@ -523,7 +582,6 @@ ureg_emit_insn(struct ureg_program *ureg,
out[0].insn.NrTokens = 0;
out[0].insn.Opcode = opcode;
out[0].insn.Saturate = saturate;
- out[0].insn.NrTokens = 0;
out[0].insn.NumDstRegs = num_dst;
out[0].insn.NumSrcRegs = num_src;
out[0].insn.Padding = 0;
@@ -542,6 +600,9 @@ ureg_emit_label(struct ureg_program *ureg,
{
union tgsi_any_token *out, *insn;
+ if(!label_token)
+ return;
+
out = get_tokens( ureg, DOMAIN_INSN, 1 );
insn = retrieve_token( ureg, DOMAIN_INSN, insn_token );
@@ -617,6 +678,17 @@ ureg_insn(struct ureg_program *ureg,
unsigned insn, i;
boolean saturate;
+#ifdef DEBUG
+ {
+ const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
+ assert(info);
+ if(info) {
+ assert(nr_dst == info->num_dst);
+ assert(nr_src == info->num_src);
+ }
+ }
+#endif
+
saturate = nr_dst ? dst[0].Saturate : FALSE;
insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src );
@@ -723,10 +795,10 @@ static void emit_decls( struct ureg_program *ureg )
TGSI_INTERPOLATE_CONSTANT );
}
- if (ureg->nr_samplers) {
+ for (i = 0; i < ureg->nr_samplers; i++) {
emit_decl_range( ureg,
TGSI_FILE_SAMPLER,
- 0, ureg->nr_samplers );
+ ureg->sampler[i].Index, 1 );
}
if (ureg->nr_constants) {
@@ -741,6 +813,12 @@ static void emit_decls( struct ureg_program *ureg )
0, ureg->nr_temps );
}
+ if (ureg->nr_addrs) {
+ emit_decl_range( ureg,
+ TGSI_FILE_ADDRESS,
+ 0, ureg->nr_addrs );
+ }
+
for (i = 0; i < ureg->nr_immediates; i++) {
emit_immediate( ureg,
ureg->immediate[i].v );
@@ -764,7 +842,7 @@ static void copy_instructions( struct ureg_program *ureg )
static void
-fixup_header_size(struct ureg_program *ureg )
+fixup_header_size(struct ureg_program *ureg)
{
union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 8836a1ea0eb..acbca59040c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -31,6 +31,10 @@
#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct ureg_program;
/* Almost a tgsi_src_register, but we need to pull in the Absolute
@@ -48,6 +52,8 @@ struct ureg_src
unsigned Absolute : 1; /* BOOL */
int Index : 16; /* SINT */
unsigned Negate : 1; /* BOOL */
+ int IndirectIndex : 16; /* SINT */
+ int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */
};
/* Very similar to a tgsi_dst_register, removing unsupported fields
@@ -64,6 +70,8 @@ struct ureg_dst
int Index : 16; /* SINT */
unsigned Pad1 : 5;
unsigned Pad2 : 1; /* BOOL */
+ int IndirectIndex : 16; /* SINT */
+ int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */
};
struct pipe_context;
@@ -131,12 +139,21 @@ void
ureg_release_temporary( struct ureg_program *ureg,
struct ureg_dst tmp );
+struct ureg_dst
+ureg_DECL_address( struct ureg_program * );
+
+/* Supply an index to the sampler declaration as this is the hook to
+ * the external pipe_sampler state. Users of this function probably
+ * don't want just any sampler, but a specific one which they've set
+ * up state for in the context.
+ */
struct ureg_src
-ureg_DECL_sampler( struct ureg_program * );
+ureg_DECL_sampler( struct ureg_program *,
+ unsigned index );
static INLINE struct ureg_src
-ureg_DECL_immediate4f( struct ureg_program *ureg,
+ureg_imm4f( struct ureg_program *ureg,
float a, float b,
float c, float d)
{
@@ -149,7 +166,7 @@ ureg_DECL_immediate4f( struct ureg_program *ureg,
}
static INLINE struct ureg_src
-ureg_DECL_immediate3f( struct ureg_program *ureg,
+ureg_imm3f( struct ureg_program *ureg,
float a, float b,
float c)
{
@@ -161,7 +178,7 @@ ureg_DECL_immediate3f( struct ureg_program *ureg,
}
static INLINE struct ureg_src
-ureg_DECL_immediate2f( struct ureg_program *ureg,
+ureg_imm2f( struct ureg_program *ureg,
float a, float b)
{
float v[2];
@@ -171,7 +188,7 @@ ureg_DECL_immediate2f( struct ureg_program *ureg,
}
static INLINE struct ureg_src
-ureg_DECL_immediate1f( struct ureg_program *ureg,
+ureg_imm1f( struct ureg_program *ureg,
float a)
{
float v[1];
@@ -392,6 +409,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
static INLINE struct ureg_src
ureg_negate( struct ureg_src reg )
{
+ assert(reg.File != TGSI_FILE_NULL);
reg.Negate ^= 1;
return reg;
}
@@ -399,6 +417,7 @@ ureg_negate( struct ureg_src reg )
static INLINE struct ureg_src
ureg_abs( struct ureg_src reg )
{
+ assert(reg.File != TGSI_FILE_NULL);
reg.Absolute = 1;
reg.Negate = 0;
return reg;
@@ -413,6 +432,12 @@ ureg_swizzle( struct ureg_src reg,
(reg.SwizzleZ << 4) |
(reg.SwizzleW << 6));
+ assert(reg.File != TGSI_FILE_NULL);
+ assert(x < 4);
+ assert(y < 4);
+ assert(z < 4);
+ assert(w < 4);
+
reg.SwizzleX = (swz >> (x*2)) & 0x3;
reg.SwizzleY = (swz >> (y*2)) & 0x3;
reg.SwizzleZ = (swz >> (z*2)) & 0x3;
@@ -430,6 +455,7 @@ static INLINE struct ureg_dst
ureg_writemask( struct ureg_dst reg,
unsigned writemask )
{
+ assert(reg.File != TGSI_FILE_NULL);
reg.WriteMask &= writemask;
return reg;
}
@@ -437,10 +463,33 @@ ureg_writemask( struct ureg_dst reg,
static INLINE struct ureg_dst
ureg_saturate( struct ureg_dst reg )
{
+ assert(reg.File != TGSI_FILE_NULL);
reg.Saturate = 1;
return reg;
}
+static INLINE struct ureg_dst
+ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr )
+{
+ assert(reg.File != TGSI_FILE_NULL);
+ assert(addr.File == TGSI_FILE_ADDRESS);
+ reg.Indirect = 1;
+ reg.IndirectIndex = addr.Index;
+ reg.IndirectSwizzle = addr.SwizzleX;
+ return reg;
+}
+
+static INLINE struct ureg_src
+ureg_src_indirect( struct ureg_src reg, struct ureg_src addr )
+{
+ assert(reg.File != TGSI_FILE_NULL);
+ assert(addr.File == TGSI_FILE_ADDRESS);
+ reg.Indirect = 1;
+ reg.IndirectIndex = addr.Index;
+ reg.IndirectSwizzle = addr.SwizzleX;
+ return reg;
+}
+
static INLINE struct ureg_dst
ureg_dst( struct ureg_src src )
{
@@ -449,6 +498,8 @@ ureg_dst( struct ureg_src src )
dst.File = src.File;
dst.WriteMask = TGSI_WRITEMASK_XYZW;
dst.Indirect = src.Indirect;
+ dst.IndirectIndex = src.IndirectIndex;
+ dst.IndirectSwizzle = src.IndirectSwizzle;
dst.Saturate = 0;
dst.Index = src.Index;
dst.Pad1 = 0;
@@ -469,6 +520,8 @@ ureg_src( struct ureg_dst dst )
src.SwizzleW = TGSI_SWIZZLE_W;
src.Pad = 0;
src.Indirect = dst.Indirect;
+ src.IndirectIndex = dst.IndirectIndex;
+ src.IndirectSwizzle = dst.IndirectSwizzle;
src.Absolute = 0;
src.Index = dst.Index;
src.Negate = 0;
@@ -478,4 +531,60 @@ ureg_src( struct ureg_dst dst )
+static INLINE struct ureg_dst
+ureg_dst_undef( void )
+{
+ struct ureg_dst dst;
+
+ dst.File = TGSI_FILE_NULL;
+ dst.WriteMask = 0;
+ dst.Indirect = 0;
+ dst.IndirectIndex = 0;
+ dst.IndirectSwizzle = 0;
+ dst.Saturate = 0;
+ dst.Index = 0;
+ dst.Pad1 = 0;
+ dst.Pad2 = 0;
+
+ return dst;
+}
+
+static INLINE struct ureg_src
+ureg_src_undef( void )
+{
+ struct ureg_src src;
+
+ src.File = TGSI_FILE_NULL;
+ src.SwizzleX = 0;
+ src.SwizzleY = 0;
+ src.SwizzleZ = 0;
+ src.SwizzleW = 0;
+ src.Pad = 0;
+ src.Indirect = 0;
+ src.IndirectIndex = 0;
+ src.IndirectSwizzle = 0;
+ src.Absolute = 0;
+ src.Index = 0;
+ src.Negate = 0;
+
+ return src;
+}
+
+static INLINE boolean
+ureg_src_is_undef( struct ureg_src src )
+{
+ return src.File == TGSI_FILE_NULL;
+}
+
+static INLINE boolean
+ureg_dst_is_undef( struct ureg_dst dst )
+{
+ return dst.File == TGSI_FILE_NULL;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h
index 7877f345587..21eb656327e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -32,6 +32,10 @@
extern "C" {
#endif
+struct tgsi_src_register;
+struct tgsi_src_register_ext_swz;
+struct tgsi_full_src_register;
+
void *
tgsi_align_128bit(
void *unaligned );