diff options
Diffstat (limited to 'src/gallium')
113 files changed, 4492 insertions, 1000 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 802ec371189..eb492076b7d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -187,11 +187,7 @@ TGSI Instruction Specification 1.2.6 CND0 - Condition Zero - dst.x = (src2.x >= 0.0) ? src0.x : src1.x - dst.y = (src2.y >= 0.0) ? src0.y : src1.y - dst.z = (src2.z >= 0.0) ? src0.z : src1.z - dst.w = (src2.w >= 0.0) ? src0.w : src1.w - + Removed. Use (CMP src2, src1, src0) instead. 1.2.7 DOT2ADD - 2-component Dot Product And Add @@ -1031,12 +1027,12 @@ TGSI Instruction Specification 1.18.1 EXPP - Approximate Exponential Base 2 - Alias for EXP. + Use EXP. See also 1.19.3. 1.18.2 LOGP - Logarithm Base 2 - Alias for LG2. + Use LOG. See also 1.19.4. 1.19 vs_2_0 @@ -1053,6 +1049,16 @@ TGSI Instruction Specification Alias for ARR. +1.19.3 EXPP - Approximate Exponential Base 2 + + Use EX2. + + +1.19.4 LOGP - Logarithm Base 2 + + Use LG2. + + 2 Explanation of symbols used ============================== diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 05b07a3a73e..111d95b6665 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -43,6 +43,7 @@ struct dump_ctx struct tgsi_iterate_context iter; uint instno; + int indent; uint indentation; @@ -335,14 +336,6 @@ tgsi_dump_immediate( iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm ); } -static void -indent(struct dump_ctx *ctx) -{ - uint i; - for (i = 0; i < ctx->indentation; i++) - TXT(" "); -} - static boolean iter_instruction( struct tgsi_iterate_context *iter, @@ -350,22 +343,19 @@ iter_instruction( { struct dump_ctx *ctx = (struct dump_ctx *) iter; uint instno = ctx->instno++; - + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( inst->Instruction.Opcode ); uint i; boolean first_reg = TRUE; INSTID( instno ); TXT( ": " ); - - /* update indentation */ - if (inst->Instruction.Opcode == TGSI_OPCODE_ENDIF || - inst->Instruction.Opcode == TGSI_OPCODE_ENDFOR || - inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) { - ctx->indentation -= indent_spaces; - } - indent(ctx); - - TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); + + ctx->indent -= info->pre_dedent; + for(i = 0; (int)i < ctx->indent; ++i) + TXT( " " ); + ctx->indent += info->post_indent; + + TXT( info->mnemonic ); switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: @@ -526,6 +516,7 @@ tgsi_dump_instruction( struct dump_ctx ctx; ctx.instno = instno; + ctx.indent = 0; ctx.printf = dump_ctx_printf; ctx.indentation = 0; @@ -559,6 +550,7 @@ tgsi_dump( ctx.iter.epilog = NULL; ctx.instno = 0; + ctx.indent = 0; ctx.printf = dump_ctx_printf; ctx.indentation = 0; @@ -612,6 +604,7 @@ tgsi_dump_str( ctx.base.iter.epilog = NULL; ctx.base.instno = 0; + ctx.base.indent = 0; ctx.base.printf = &str_dump_ctx_printf; ctx.base.indentation = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 711e86d6edf..c79c56debd6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2329,16 +2329,6 @@ exec_instruction( } break; - case TGSI_OPCODE_CND0: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); - } - break; - case TGSI_OPCODE_DP2A: FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); @@ -2766,19 +2756,32 @@ exec_instruction( if (mach->ExecMask) { /* do the call */ - /* push the Cond, Loop, Cont stacks */ + /* First, record the depths of the execution stacks. + * This is important for deeply nested/looped return statements. + * We have to unwind the stacks by the correct amount. For a + * real code generator, we could determine the number of entries + * to pop off each stack with simple static analysis and avoid + * implementing this data structure at run time. + */ + mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; + mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; + mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; + + mach->CallStackTop++; + + /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; + /* Finally, jump to the subroutine */ *pc = inst->InstructionExtLabel.Label; } break; @@ -2795,18 +2798,24 @@ exec_instruction( *pc = -1; return; } - *pc = mach->CallStack[--mach->CallStackTop]; - /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->CallStackTop > 0); + mach->CallStackTop--; + + mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; + mach->CondMask = mach->CondStack[mach->CondStackTop]; + + mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; + mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; + + mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; + mach->ContMask = mach->ContStack[mach->ContStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; + UPDATE_EXEC_MASK(mach); } break; @@ -3104,6 +3113,12 @@ exec_instruction( break; case TGSI_OPCODE_BGNFOR: + assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + for (chan_index = 0; chan_index < 3; chan_index++) { + FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); + } + STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + ++mach->LoopCounterStackTop; /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ @@ -3111,10 +3126,58 @@ exec_instruction( mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; break; case TGSI_OPCODE_ENDFOR: - /* fall-through (for now at least) */ + assert(mach->LoopCounterStackTop > 0); + micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + /* update LoopMask */ + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { + mach->LoopMask &= ~0x1; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { + mach->LoopMask &= ~0x2; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { + mach->LoopMask &= ~0x4; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { + mach->LoopMask &= ~0x8; + } + micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); + assert(mach->LoopLabelStackTop > 0); + inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; + STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + UPDATE_EXEC_MASK(mach); + if (mach->ExecMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + assert(mach->LoopLabelStackTop > 0); + *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->LoopLabelStackTop > 0); + --mach->LoopLabelStackTop; + assert(mach->LoopCounterStackTop > 0); + --mach->LoopCounterStackTop; + } + UPDATE_EXEC_MASK(mach); + break; + case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); @@ -3122,7 +3185,8 @@ exec_instruction( UPDATE_EXEC_MASK(mach); if (mach->ExecMask) { /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; + assert(mach->LoopLabelStackTop > 0); + *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; } else { /* exit loop: pop LoopMask */ @@ -3131,6 +3195,8 @@ exec_instruction( /* pop ContMask */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->LoopLabelStackTop > 0); + --mach->LoopLabelStackTop; } UPDATE_EXEC_MASK(mach); break; @@ -3198,7 +3264,6 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; - mach->CondStackTop = 0; /* temporarily subvert this assertion */ assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fd9ef6f35df..c72f76809d4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -186,6 +186,17 @@ struct tgsi_exec_labels */ #define TGSI_EXEC_MAX_CONST_BUFFER 4096 + +/** function call/activation record */ +struct tgsi_call_record +{ + uint CondStackTop; + uint LoopStackTop; + uint ContStackTop; + uint ReturnAddr; +}; + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -232,6 +243,14 @@ struct tgsi_exec_machine uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopStackTop; + /** Loop label stack */ + uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopLabelStackTop; + + /** Loop counter stack (x = count, y = current, z = step) */ + struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopCounterStackTop; + /** Loop continue mask stack (see comments in tgsi_exec.c) */ uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; @@ -241,7 +260,7 @@ struct tgsi_exec_machine int FuncStackTop; /** Function call stack for saving/restoring the program counter */ - uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING]; int CallStackTop; struct tgsi_full_instruction *Instructions; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index ccf4b205ffb..17af4cb7ad2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -31,125 +31,125 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { - { 1, 1, 0, 0, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 0, 0, "SUB", TGSI_OPCODE_SUB }, - { 1, 3, 0, 0, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, "CND", TGSI_OPCODE_CND }, - { 1, 3, 0, 0, "CND0", TGSI_OPCODE_CND0 }, - { 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, - { 0, 0, 0, 0, "", 22 }, /* removed */ - { 0, 0, 0, 0, "", 23 }, /* removed */ - { 1, 1, 0, 0, "FRC", TGSI_OPCODE_FRC }, - { 1, 3, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP }, - { 1, 1, 0, 0, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, "XPD", TGSI_OPCODE_XPD }, - { 0, 0, 0, 0, "", 32 }, /* removed */ - { 1, 1, 0, 0, "ABS", TGSI_OPCODE_ABS }, - { 1, 1, 0, 0, "RCC", TGSI_OPCODE_RCC }, - { 1, 2, 0, 0, "DPH", TGSI_OPCODE_DPH }, - { 1, 1, 0, 0, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP }, - { 1, 1, 0, 0, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 2, 0, 0, "RFL", TGSI_OPCODE_RFL }, - { 1, 2, 0, 0, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 2, 0, 0, "SFL", TGSI_OPCODE_SFL }, - { 1, 2, 0, 0, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, "SNE", TGSI_OPCODE_SNE }, - { 1, 2, 0, 0, "STR", TGSI_OPCODE_STR }, - { 1, 2, 1, 0, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB }, - { 1, 3, 0, 0, "X2D", TGSI_OPCODE_X2D }, - { 1, 1, 0, 0, "ARA", TGSI_OPCODE_ARA }, - { 1, 1, 0, 0, "ARR", TGSI_OPCODE_ARR }, - { 0, 1, 0, 0, "BRA", TGSI_OPCODE_BRA }, - { 0, 0, 0, 1, "CAL", TGSI_OPCODE_CAL }, - { 0, 0, 0, 0, "RET", TGSI_OPCODE_RET }, - { 1, 1, 0, 0, "SSG", TGSI_OPCODE_SSG }, - { 1, 3, 0, 0, "CMP", TGSI_OPCODE_CMP }, - { 1, 1, 0, 0, "SCS", TGSI_OPCODE_SCS }, - { 1, 2, 1, 0, "TXB", TGSI_OPCODE_TXB }, - { 1, 1, 0, 0, "NRM", TGSI_OPCODE_NRM }, - { 1, 2, 0, 0, "DIV", TGSI_OPCODE_DIV }, - { 1, 2, 0, 0, "DP2", TGSI_OPCODE_DP2 }, - { 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL }, - { 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, - { 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, - { 1, 1, 0, 0, "BGNFOR", TGSI_OPCODE_BGNFOR }, - { 0, 1, 0, 0, "REP", TGSI_OPCODE_REP }, - { 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE }, - { 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, - { 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP }, - { 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, - { 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, - { 1, 1, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, - { 1, 1, 0, 0, "I2F", TGSI_OPCODE_I2F }, - { 1, 1, 0, 0, "NOT", TGSI_OPCODE_NOT }, - { 1, 1, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, - { 1, 2, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, "SHR", TGSI_OPCODE_SHR }, - { 1, 2, 0, 0, "AND", TGSI_OPCODE_AND }, - { 1, 2, 0, 0, "OR", TGSI_OPCODE_OR }, - { 1, 2, 0, 0, "MOD", TGSI_OPCODE_MOD }, - { 1, 2, 0, 0, "XOR", TGSI_OPCODE_XOR }, - { 1, 3, 0, 0, "SAD", TGSI_OPCODE_SAD }, - { 1, 2, 1, 0, "TXF", TGSI_OPCODE_TXF }, - { 1, 2, 1, 0, "TXQ", TGSI_OPCODE_TXQ }, - { 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, - { 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, - { 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, - { 0, 0, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, - { 0, 0, 0, 0, "BGNSUB", TGSI_OPCODE_BGNSUB }, - { 0, 0, 0, 1, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, - { 0, 0, 0, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, - { 1, 1, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, - { 1, 1, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, - { 1, 1, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, - { 1, 1, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, - { 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, - { 0, 0, 0, 0, "", 108 }, /* removed */ - { 0, 0, 0, 0, "", 109 }, /* removed */ - { 0, 0, 0, 0, "", 110 }, /* removed */ - { 0, 0, 0, 0, "", 111 }, /* removed */ - { 1, 1, 0, 0, "NRM4", TGSI_OPCODE_NRM4 }, - { 0, 1, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ }, - { 0, 1, 0, 0, "IFC", TGSI_OPCODE_IFC }, - { 0, 1, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, - { 0, 1, 0, 0, "KIL", TGSI_OPCODE_KIL }, - { 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 1, 1, 0, 0, "SWZ", TGSI_OPCODE_SWZ } + { 1, 1, 0, 0, 0, 0, "ARL", TGSI_OPCODE_ARL }, + { 1, 1, 0, 0, 0, 0, "MOV", TGSI_OPCODE_MOV }, + { 1, 1, 0, 0, 0, 0, "LIT", TGSI_OPCODE_LIT }, + { 1, 1, 0, 0, 0, 0, "RCP", TGSI_OPCODE_RCP }, + { 1, 1, 0, 0, 0, 0, "RSQ", TGSI_OPCODE_RSQ }, + { 1, 1, 0, 0, 0, 0, "EXP", TGSI_OPCODE_EXP }, + { 1, 1, 0, 0, 0, 0, "LOG", TGSI_OPCODE_LOG }, + { 1, 2, 0, 0, 0, 0, "MUL", TGSI_OPCODE_MUL }, + { 1, 2, 0, 0, 0, 0, "ADD", TGSI_OPCODE_ADD }, + { 1, 2, 0, 0, 0, 0, "DP3", TGSI_OPCODE_DP3 }, + { 1, 2, 0, 0, 0, 0, "DP4", TGSI_OPCODE_DP4 }, + { 1, 2, 0, 0, 0, 0, "DST", TGSI_OPCODE_DST }, + { 1, 2, 0, 0, 0, 0, "MIN", TGSI_OPCODE_MIN }, + { 1, 2, 0, 0, 0, 0, "MAX", TGSI_OPCODE_MAX }, + { 1, 2, 0, 0, 0, 0, "SLT", TGSI_OPCODE_SLT }, + { 1, 2, 0, 0, 0, 0, "SGE", TGSI_OPCODE_SGE }, + { 1, 3, 0, 0, 0, 0, "MAD", TGSI_OPCODE_MAD }, + { 1, 2, 0, 0, 0, 0, "SUB", TGSI_OPCODE_SUB }, + { 1, 3, 0, 0, 0, 0, "LRP", TGSI_OPCODE_LRP }, + { 1, 3, 0, 0, 0, 0, "CND", TGSI_OPCODE_CND }, + { 0, 0, 0, 0, 0, 0, "", 20 }, /* removed */ + { 1, 3, 0, 0, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, + { 0, 0, 0, 0, 0, 0, "", 22 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 23 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "FRC", TGSI_OPCODE_FRC }, + { 1, 3, 0, 0, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP }, + { 1, 1, 0, 0, 0, 0, "FLR", TGSI_OPCODE_FLR }, + { 1, 1, 0, 0, 0, 0, "ROUND", TGSI_OPCODE_ROUND }, + { 1, 1, 0, 0, 0, 0, "EX2", TGSI_OPCODE_EX2 }, + { 1, 1, 0, 0, 0, 0, "LG2", TGSI_OPCODE_LG2 }, + { 1, 2, 0, 0, 0, 0, "POW", TGSI_OPCODE_POW }, + { 1, 2, 0, 0, 0, 0, "XPD", TGSI_OPCODE_XPD }, + { 0, 0, 0, 0, 0, 0, "", 32 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "ABS", TGSI_OPCODE_ABS }, + { 1, 1, 0, 0, 0, 0, "RCC", TGSI_OPCODE_RCC }, + { 1, 2, 0, 0, 0, 0, "DPH", TGSI_OPCODE_DPH }, + { 1, 1, 0, 0, 0, 0, "COS", TGSI_OPCODE_COS }, + { 1, 1, 0, 0, 0, 0, "DDX", TGSI_OPCODE_DDX }, + { 1, 1, 0, 0, 0, 0, "DDY", TGSI_OPCODE_DDY }, + { 0, 0, 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP }, + { 1, 1, 0, 0, 0, 0, "PK2H", TGSI_OPCODE_PK2H }, + { 1, 1, 0, 0, 0, 0, "PK2US", TGSI_OPCODE_PK2US }, + { 1, 1, 0, 0, 0, 0, "PK4B", TGSI_OPCODE_PK4B }, + { 1, 1, 0, 0, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB }, + { 1, 2, 0, 0, 0, 0, "RFL", TGSI_OPCODE_RFL }, + { 1, 2, 0, 0, 0, 0, "SEQ", TGSI_OPCODE_SEQ }, + { 1, 2, 0, 0, 0, 0, "SFL", TGSI_OPCODE_SFL }, + { 1, 2, 0, 0, 0, 0, "SGT", TGSI_OPCODE_SGT }, + { 1, 1, 0, 0, 0, 0, "SIN", TGSI_OPCODE_SIN }, + { 1, 2, 0, 0, 0, 0, "SLE", TGSI_OPCODE_SLE }, + { 1, 2, 0, 0, 0, 0, "SNE", TGSI_OPCODE_SNE }, + { 1, 2, 0, 0, 0, 0, "STR", TGSI_OPCODE_STR }, + { 1, 2, 1, 0, 0, 0, "TEX", TGSI_OPCODE_TEX }, + { 1, 4, 1, 0, 0, 0, "TXD", TGSI_OPCODE_TXD }, + { 1, 2, 1, 0, 0, 0, "TXP", TGSI_OPCODE_TXP }, + { 1, 1, 0, 0, 0, 0, "UP2H", TGSI_OPCODE_UP2H }, + { 1, 1, 0, 0, 0, 0, "UP2US", TGSI_OPCODE_UP2US }, + { 1, 1, 0, 0, 0, 0, "UP4B", TGSI_OPCODE_UP4B }, + { 1, 1, 0, 0, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB }, + { 1, 3, 0, 0, 0, 0, "X2D", TGSI_OPCODE_X2D }, + { 1, 1, 0, 0, 0, 0, "ARA", TGSI_OPCODE_ARA }, + { 1, 1, 0, 0, 0, 0, "ARR", TGSI_OPCODE_ARR }, + { 0, 1, 0, 0, 0, 0, "BRA", TGSI_OPCODE_BRA }, + { 0, 0, 0, 1, 0, 0, "CAL", TGSI_OPCODE_CAL }, + { 0, 0, 0, 0, 0, 0, "RET", TGSI_OPCODE_RET }, + { 1, 1, 0, 0, 0, 0, "SSG", TGSI_OPCODE_SSG }, + { 1, 3, 0, 0, 0, 0, "CMP", TGSI_OPCODE_CMP }, + { 1, 1, 0, 0, 0, 0, "SCS", TGSI_OPCODE_SCS }, + { 1, 2, 1, 0, 0, 0, "TXB", TGSI_OPCODE_TXB }, + { 1, 1, 0, 0, 0, 0, "NRM", TGSI_OPCODE_NRM }, + { 1, 2, 0, 0, 0, 0, "DIV", TGSI_OPCODE_DIV }, + { 1, 2, 0, 0, 0, 0, "DP2", TGSI_OPCODE_DP2 }, + { 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL }, + { 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, + { 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, + { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR }, + { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP }, + { 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE }, + { 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF }, + { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, + { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP }, + { 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, + { 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, + { 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, + { 1, 1, 0, 0, 0, 0, "I2F", TGSI_OPCODE_I2F }, + { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, + { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, + { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, + { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, + { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, + { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, + { 1, 2, 0, 0, 0, 0, "XOR", TGSI_OPCODE_XOR }, + { 1, 3, 0, 0, 0, 0, "SAD", TGSI_OPCODE_SAD }, + { 1, 2, 1, 0, 0, 0, "TXF", TGSI_OPCODE_TXF }, + { 1, 2, 1, 0, 0, 0, "TXQ", TGSI_OPCODE_TXQ }, + { 0, 0, 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, + { 0, 0, 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, + { 0, 0, 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, + { 0, 0, 0, 1, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, + { 0, 0, 0, 0, 0, 1, "BGNSUB", TGSI_OPCODE_BGNSUB }, + { 0, 0, 0, 1, 1, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, + { 0, 0, 0, 0, 1, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, + { 1, 1, 0, 0, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, + { 1, 1, 0, 0, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, + { 1, 1, 0, 0, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, + { 1, 1, 0, 0, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, + { 0, 0, 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, + { 0, 0, 0, 0, 0, 0, "", 108 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 109 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 110 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 111 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "NRM4", TGSI_OPCODE_NRM4 }, + { 0, 1, 0, 0, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ }, + { 0, 1, 0, 0, 0, 0, "IFC", TGSI_OPCODE_IFC }, + { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, + { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, + { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, + { 1, 1, 0, 0, 0, 0, "SWZ", TGSI_OPCODE_SWZ } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h index b2375c69710..74713c3b98a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.h +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -36,10 +36,12 @@ extern "C" { struct tgsi_opcode_info { - uint num_dst; - uint num_src; - boolean is_tex; - boolean is_branch; + unsigned num_dst:3; + unsigned num_src:3; + unsigned is_tex:1; + unsigned is_branch:1; + int pre_dedent:2; + int post_indent:2; const char *mnemonic; uint opcode; }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index ed594a3e2c7..e7bcf4bf754 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -60,7 +60,6 @@ OP13(MAD) OP12(SUB) OP13(LRP) OP13(CND) -OP13(CND0) OP13(DP2A) OP11(FRC) OP13(CLAMP) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 4fe8553c423..8a13885da9b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -199,10 +199,10 @@ iter_instruction( } if (info->num_dst != inst->Instruction.NumDstRegs) { - report_error( ctx, "Invalid number of destination operands, should be %u", info->num_dst ); + report_error( ctx, "%s: Invalid number of destination operands, should be %u", info->mnemonic, info->num_dst ); } if (info->num_src != inst->Instruction.NumSrcRegs) { - report_error( ctx, "Invalid number of source operands, should be %u", info->num_src ); + report_error( ctx, "%s: Invalid number of source operands, should be %u", info->mnemonic, info->num_src ); } /* Check destination and source registers' validity. diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 46f2387c158..3cdf8b9f359 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2089,10 +2089,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_CND0: - return 0; - break; - case TGSI_OPCODE_DP2A: FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index c0a0627e0b2..f7096bd8e2c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -29,6 +29,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_info.h" #include "tgsi/tgsi_dump.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -71,6 +72,7 @@ struct ureg_tokens { #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 +#define UREG_MAX_ADDR 2 #define DOMAIN_DECL 0 #define DOMAIN_INSN 1 @@ -99,11 +101,15 @@ struct ureg_program } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; + struct ureg_src sampler[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; + unsigned nr_addrs; + unsigned nr_constants; - unsigned nr_samplers; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -187,6 +193,8 @@ ureg_dst_register( unsigned file, dst.File = file; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = 0; + dst.IndirectIndex = 0; + dst.IndirectSwizzle = 0; dst.Saturate = 0; dst.Index = index; dst.Pad1 = 0; @@ -208,6 +216,8 @@ ureg_src_register( unsigned file, src.SwizzleW = TGSI_SWIZZLE_W; src.Pad = 0; src.Indirect = 0; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; src.Absolute = 0; src.Index = index; src.Negate = 0; @@ -254,6 +264,7 @@ ureg_DECL_fs_input( struct ureg_program *ureg, unsigned index, unsigned interp ) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); return ureg_DECL_input( ureg, name, index, interp ); } @@ -263,6 +274,7 @@ ureg_DECL_vs_input( struct ureg_program *ureg, unsigned name, unsigned index ) { + assert(ureg->processor == TGSI_PROCESSOR_VERTEX); return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT ); } @@ -346,11 +358,36 @@ void ureg_release_temporary( struct ureg_program *ureg, } +/* Allocate a new address register. + */ +struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) +{ + if (ureg->nr_addrs < UREG_MAX_ADDR) + return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ ); + + assert( 0 ); + return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); +} + /* Allocate a new sampler. */ -struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg ) +struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, + unsigned nr ) { - return ureg_src_register( TGSI_FILE_SAMPLER, ureg->nr_samplers++ ); + unsigned i; + + for (i = 0; i < ureg->nr_samplers; i++) + if (ureg->sampler[i].Index == nr) + return ureg->sampler[i]; + + if (i < PIPE_MAX_SAMPLERS) { + ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr ); + ureg->nr_samplers++; + return ureg->sampler[i]; + } + + assert( 0 ); + return ureg->sampler[0]; } @@ -363,6 +400,8 @@ static int match_or_expand_immediate( const float *v, unsigned *swizzle ) { unsigned i, j; + + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; @@ -394,8 +433,8 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, const float *v, unsigned nr ) { - unsigned i; - unsigned swizzle = 0; + unsigned i, j; + unsigned swizzle; /* Could do a first pass where we examine all existing immediates * without expanding. @@ -423,6 +462,12 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, set_bad( ureg ); out: + /* Make sure that all referenced elements are from this immediate. + * Has the effect of making size-one immediates into scalars. + */ + for (j = nr; j < 4; j++) + swizzle |= (swizzle & 0x3) << (j * 2); + return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), (swizzle >> 0) & 0x3, (swizzle >> 2) & 0x3, @@ -442,31 +487,39 @@ ureg_emit_src( struct ureg_program *ureg, union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; + assert(src.File != TGSI_FILE_NULL); + assert(src.File != TGSI_FILE_OUTPUT); + assert(src.File < TGSI_FILE_COUNT); + out[n].value = 0; out[n].src.File = src.File; out[n].src.SwizzleX = src.SwizzleX; out[n].src.SwizzleY = src.SwizzleY; out[n].src.SwizzleZ = src.SwizzleZ; out[n].src.SwizzleW = src.SwizzleW; - out[n].src.Indirect = src.Indirect; out[n].src.Index = src.Index; + out[n].src.Negate = src.Negate; n++; if (src.Absolute) { + out[0].src.Extended = 1; + out[0].src.Negate = 0; out[n].value = 0; + out[n].src_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; out[n].src_ext_mod.Absolute = 1; + out[n].src_ext_mod.Negate = src.Negate; n++; } if (src.Indirect) { + out[0].src.Indirect = 1; out[n].value = 0; out[n].src.File = TGSI_FILE_ADDRESS; - out[n].src.SwizzleX = TGSI_SWIZZLE_X; - out[n].src.SwizzleY = TGSI_SWIZZLE_X; - out[n].src.SwizzleZ = TGSI_SWIZZLE_X; - out[n].src.SwizzleW = TGSI_SWIZZLE_X; - out[n].src.Indirect = 0; - out[n].src.Index = 0; + out[n].src.SwizzleX = src.IndirectSwizzle; + out[n].src.SwizzleY = src.IndirectSwizzle; + out[n].src.SwizzleZ = src.IndirectSwizzle; + out[n].src.SwizzleW = src.IndirectSwizzle; + out[n].src.Index = src.IndirectIndex; n++; } @@ -484,6 +537,13 @@ ureg_emit_dst( struct ureg_program *ureg, union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; + assert(dst.File != TGSI_FILE_NULL); + assert(dst.File != TGSI_FILE_CONSTANT); + assert(dst.File != TGSI_FILE_INPUT); + assert(dst.File != TGSI_FILE_SAMPLER); + assert(dst.File != TGSI_FILE_IMMEDIATE); + assert(dst.File < TGSI_FILE_COUNT); + out[n].value = 0; out[n].dst.File = dst.File; out[n].dst.WriteMask = dst.WriteMask; @@ -494,12 +554,11 @@ ureg_emit_dst( struct ureg_program *ureg, if (dst.Indirect) { out[n].value = 0; out[n].src.File = TGSI_FILE_ADDRESS; - out[n].src.SwizzleX = TGSI_SWIZZLE_X; - out[n].src.SwizzleY = TGSI_SWIZZLE_X; - out[n].src.SwizzleZ = TGSI_SWIZZLE_X; - out[n].src.SwizzleW = TGSI_SWIZZLE_X; - out[n].src.Indirect = 0; - out[n].src.Index = 0; + out[n].src.SwizzleX = dst.IndirectSwizzle; + out[n].src.SwizzleY = dst.IndirectSwizzle; + out[n].src.SwizzleZ = dst.IndirectSwizzle; + out[n].src.SwizzleW = dst.IndirectSwizzle; + out[n].src.Index = dst.IndirectIndex; n++; } @@ -523,7 +582,6 @@ ureg_emit_insn(struct ureg_program *ureg, out[0].insn.NrTokens = 0; out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; - out[0].insn.NrTokens = 0; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; out[0].insn.Padding = 0; @@ -542,6 +600,9 @@ ureg_emit_label(struct ureg_program *ureg, { union tgsi_any_token *out, *insn; + if(!label_token) + return; + out = get_tokens( ureg, DOMAIN_INSN, 1 ); insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); @@ -617,6 +678,17 @@ ureg_insn(struct ureg_program *ureg, unsigned insn, i; boolean saturate; +#ifdef DEBUG + { + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); + assert(info); + if(info) { + assert(nr_dst == info->num_dst); + assert(nr_src == info->num_src); + } + } +#endif + saturate = nr_dst ? dst[0].Saturate : FALSE; insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); @@ -723,10 +795,10 @@ static void emit_decls( struct ureg_program *ureg ) TGSI_INTERPOLATE_CONSTANT ); } - if (ureg->nr_samplers) { + for (i = 0; i < ureg->nr_samplers; i++) { emit_decl_range( ureg, TGSI_FILE_SAMPLER, - 0, ureg->nr_samplers ); + ureg->sampler[i].Index, 1 ); } if (ureg->nr_constants) { @@ -741,6 +813,12 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_temps ); } + if (ureg->nr_addrs) { + emit_decl_range( ureg, + TGSI_FILE_ADDRESS, + 0, ureg->nr_addrs ); + } + for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, ureg->immediate[i].v ); @@ -764,7 +842,7 @@ static void copy_instructions( struct ureg_program *ureg ) static void -fixup_header_size(struct ureg_program *ureg ) +fixup_header_size(struct ureg_program *ureg) { union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 8836a1ea0eb..acbca59040c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -31,6 +31,10 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" +#ifdef __cplusplus +extern "C" { +#endif + struct ureg_program; /* Almost a tgsi_src_register, but we need to pull in the Absolute @@ -48,6 +52,8 @@ struct ureg_src unsigned Absolute : 1; /* BOOL */ int Index : 16; /* SINT */ unsigned Negate : 1; /* BOOL */ + int IndirectIndex : 16; /* SINT */ + int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -64,6 +70,8 @@ struct ureg_dst int Index : 16; /* SINT */ unsigned Pad1 : 5; unsigned Pad2 : 1; /* BOOL */ + int IndirectIndex : 16; /* SINT */ + int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ }; struct pipe_context; @@ -131,12 +139,21 @@ void ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst tmp ); +struct ureg_dst +ureg_DECL_address( struct ureg_program * ); + +/* Supply an index to the sampler declaration as this is the hook to + * the external pipe_sampler state. Users of this function probably + * don't want just any sampler, but a specific one which they've set + * up state for in the context. + */ struct ureg_src -ureg_DECL_sampler( struct ureg_program * ); +ureg_DECL_sampler( struct ureg_program *, + unsigned index ); static INLINE struct ureg_src -ureg_DECL_immediate4f( struct ureg_program *ureg, +ureg_imm4f( struct ureg_program *ureg, float a, float b, float c, float d) { @@ -149,7 +166,7 @@ ureg_DECL_immediate4f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate3f( struct ureg_program *ureg, +ureg_imm3f( struct ureg_program *ureg, float a, float b, float c) { @@ -161,7 +178,7 @@ ureg_DECL_immediate3f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate2f( struct ureg_program *ureg, +ureg_imm2f( struct ureg_program *ureg, float a, float b) { float v[2]; @@ -171,7 +188,7 @@ ureg_DECL_immediate2f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate1f( struct ureg_program *ureg, +ureg_imm1f( struct ureg_program *ureg, float a) { float v[1]; @@ -392,6 +409,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ static INLINE struct ureg_src ureg_negate( struct ureg_src reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Negate ^= 1; return reg; } @@ -399,6 +417,7 @@ ureg_negate( struct ureg_src reg ) static INLINE struct ureg_src ureg_abs( struct ureg_src reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Absolute = 1; reg.Negate = 0; return reg; @@ -413,6 +432,12 @@ ureg_swizzle( struct ureg_src reg, (reg.SwizzleZ << 4) | (reg.SwizzleW << 6)); + assert(reg.File != TGSI_FILE_NULL); + assert(x < 4); + assert(y < 4); + assert(z < 4); + assert(w < 4); + reg.SwizzleX = (swz >> (x*2)) & 0x3; reg.SwizzleY = (swz >> (y*2)) & 0x3; reg.SwizzleZ = (swz >> (z*2)) & 0x3; @@ -430,6 +455,7 @@ static INLINE struct ureg_dst ureg_writemask( struct ureg_dst reg, unsigned writemask ) { + assert(reg.File != TGSI_FILE_NULL); reg.WriteMask &= writemask; return reg; } @@ -437,10 +463,33 @@ ureg_writemask( struct ureg_dst reg, static INLINE struct ureg_dst ureg_saturate( struct ureg_dst reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Saturate = 1; return reg; } +static INLINE struct ureg_dst +ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr ) +{ + assert(reg.File != TGSI_FILE_NULL); + assert(addr.File == TGSI_FILE_ADDRESS); + reg.Indirect = 1; + reg.IndirectIndex = addr.Index; + reg.IndirectSwizzle = addr.SwizzleX; + return reg; +} + +static INLINE struct ureg_src +ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) +{ + assert(reg.File != TGSI_FILE_NULL); + assert(addr.File == TGSI_FILE_ADDRESS); + reg.Indirect = 1; + reg.IndirectIndex = addr.Index; + reg.IndirectSwizzle = addr.SwizzleX; + return reg; +} + static INLINE struct ureg_dst ureg_dst( struct ureg_src src ) { @@ -449,6 +498,8 @@ ureg_dst( struct ureg_src src ) dst.File = src.File; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = src.Indirect; + dst.IndirectIndex = src.IndirectIndex; + dst.IndirectSwizzle = src.IndirectSwizzle; dst.Saturate = 0; dst.Index = src.Index; dst.Pad1 = 0; @@ -469,6 +520,8 @@ ureg_src( struct ureg_dst dst ) src.SwizzleW = TGSI_SWIZZLE_W; src.Pad = 0; src.Indirect = dst.Indirect; + src.IndirectIndex = dst.IndirectIndex; + src.IndirectSwizzle = dst.IndirectSwizzle; src.Absolute = 0; src.Index = dst.Index; src.Negate = 0; @@ -478,4 +531,60 @@ ureg_src( struct ureg_dst dst ) +static INLINE struct ureg_dst +ureg_dst_undef( void ) +{ + struct ureg_dst dst; + + dst.File = TGSI_FILE_NULL; + dst.WriteMask = 0; + dst.Indirect = 0; + dst.IndirectIndex = 0; + dst.IndirectSwizzle = 0; + dst.Saturate = 0; + dst.Index = 0; + dst.Pad1 = 0; + dst.Pad2 = 0; + + return dst; +} + +static INLINE struct ureg_src +ureg_src_undef( void ) +{ + struct ureg_src src; + + src.File = TGSI_FILE_NULL; + src.SwizzleX = 0; + src.SwizzleY = 0; + src.SwizzleZ = 0; + src.SwizzleW = 0; + src.Pad = 0; + src.Indirect = 0; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; + src.Absolute = 0; + src.Index = 0; + src.Negate = 0; + + return src; +} + +static INLINE boolean +ureg_src_is_undef( struct ureg_src src ) +{ + return src.File == TGSI_FILE_NULL; +} + +static INLINE boolean +ureg_dst_is_undef( struct ureg_dst dst ) +{ + return dst.File == TGSI_FILE_NULL; +} + + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 7877f345587..21eb656327e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -32,6 +32,10 @@ extern "C" { #endif +struct tgsi_src_register; +struct tgsi_src_register_ext_swz; +struct tgsi_full_src_register; + void * tgsi_align_128bit( void *unaligned ); diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index cda6dbd46d7..c516317d701 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -62,7 +62,7 @@ struct blit_state struct pipe_viewport_state viewport; void *vs; - void *fs; + void *fs[TGSI_WRITEMASK_XYZW + 1]; struct pipe_buffer *vbuf; /**< quad vertices */ unsigned vbuf_slot; @@ -125,7 +125,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) } /* fragment shader */ - ctx->fs = util_make_fragment_tex_shader(pipe); + ctx->fs[TGSI_WRITEMASK_XYZW] = util_make_fragment_tex_shader(pipe); ctx->vbuf = NULL; /* init vertex data that doesn't change */ @@ -146,9 +146,13 @@ void util_destroy_blit(struct blit_state *ctx) { struct pipe_context *pipe = ctx->pipe; + unsigned i; pipe->delete_vs_state(pipe, ctx->vs); - pipe->delete_fs_state(pipe, ctx->fs); + + for (i = 0; i < Elements(ctx->fs); i++) + if (ctx->fs[i]) + pipe->delete_fs_state(pipe, ctx->fs[i]); pipe_buffer_reference(&ctx->vbuf, NULL); @@ -299,14 +303,15 @@ regions_overlap(int srcX0, int srcY0, * XXX need some control over blitting Z and/or stencil. */ void -util_blit_pixels(struct blit_state *ctx, - struct pipe_surface *src, - int srcX0, int srcY0, - int srcX1, int srcY1, - struct pipe_surface *dst, - int dstX0, int dstY0, - int dstX1, int dstY1, - float z, uint filter) +util_blit_pixels_writemask(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter, + uint writemask) { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; @@ -426,8 +431,11 @@ util_blit_pixels(struct blit_state *ctx, /* texture */ cso_set_sampler_textures(ctx->cso, 1, &tex); + if (ctx->fs[writemask] == NULL) + ctx->fs[writemask] = util_make_fragment_tex_shader_writemask(pipe, writemask); + /* shaders */ - cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* drawing dest */ @@ -462,6 +470,27 @@ util_blit_pixels(struct blit_state *ctx, } +void +util_blit_pixels(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter ) +{ + util_blit_pixels_writemask( ctx, src, + srcX0, srcY0, + srcX1, srcY1, + dst, + dstX0, dstY0, + dstX1, dstY1, + z, filter, + TGSI_WRITEMASK_XYZW ); +} + + /* Release vertex buffer at end of frame to avoid synchronous * rendering. */ @@ -535,7 +564,7 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_set_sampler_textures(ctx->cso, 1, &tex); /* shaders */ - cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[TGSI_WRITEMASK_XYZW]); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* drawing dest */ diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index c35beceda8d..a102021529e 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -60,6 +60,17 @@ util_blit_pixels(struct blit_state *ctx, int dstX1, int dstY1, float z, uint filter); +void +util_blit_pixels_writemask(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter, + uint writemask); + extern void util_blit_pixels_tex(struct blit_state *ctx, struct pipe_texture *tex, diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index d42b65ce281..1380d98d7ee 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -88,6 +88,7 @@ _debug_printf(const char *format, ...) * - avoid outputing large strings (512 bytes is the current maximum length * that is guaranteed to be printed in all platforms) */ +#if !defined(PIPE_OS_HAIKU) static INLINE void debug_printf(const char *format, ...) { @@ -101,6 +102,7 @@ debug_printf(const char *format, ...) #endif } +#endif /* !PIPE_OS_HAIKU */ /* * ... isn't portable so we need to pass arguments in parentheses. diff --git a/src/gallium/auxiliary/util/u_fifo.h b/src/gallium/auxiliary/util/u_fifo.h new file mode 100644 index 00000000000..9e007de1ada --- /dev/null +++ b/src/gallium/auxiliary/util/u_fifo.h @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_FIFO_H +#define U_FIFO_H + +#include "util/u_memory.h" + +struct util_fifo +{ + size_t head; + size_t tail; + size_t num; + size_t size; +}; + +static INLINE struct util_fifo * +u_fifo_create(size_t size) +{ + struct util_fifo *fifo; + fifo = MALLOC(sizeof(*fifo) + size * sizeof(void*)); + + fifo->head = 0; + fifo->tail = 0; + fifo->num = 0; + fifo->size = size; + + return fifo; +} + +static INLINE boolean +u_fifo_add(struct util_fifo *fifo, void *ptr) +{ + void **array = (void**)&fifo[1]; + if (fifo->num >= fifo->size) + return FALSE; + + if (++fifo->head >= fifo->size) + fifo->head = 0; + + array[fifo->head] = ptr; + + ++fifo->num; + + return TRUE; +} + +static INLINE boolean +u_fifo_pop(struct util_fifo *fifo, void **ptr) +{ + void **array = (void**)&fifo[1]; + + if (!fifo->num) + return FALSE; + + if (++fifo->tail >= fifo->size) + fifo->tail = 0; + + *ptr = array[fifo->tail]; + + ++fifo->num; + + return TRUE; +} + +static INLINE void +u_fifo_destroy(struct util_fifo *fifo) +{ + FREE(fifo); +} + +#endif diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 00a46d0cc48..6e82983e586 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -14,10 +14,10 @@ PIPE_FORMAT_L16_UNORM , arith , 1, 1, un16, , , , xxx1, PIPE_FORMAT_Z16_UNORM , array , 1, 1, un16, , , , x___, zs PIPE_FORMAT_Z32_UNORM , array , 1, 1, un32, , , , x___, zs PIPE_FORMAT_Z32_FLOAT , array , 1, 1, f32 , , , , x___, zs -PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs -PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs -PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un8 , un24, , , y___, zs -PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs +PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs +PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un8 , un24, , , y___, zs PIPE_FORMAT_S8_UNORM , array , 1, 1, un8 , , , , _x__, zs PIPE_FORMAT_R64_FLOAT , array , 1, 1, f64 , , , , x001, rgb PIPE_FORMAT_R64G64_FLOAT , array , 1, 1, f64 , f64 , , , xy01, rgb diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b0807c13392..b12c97dfb4d 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -341,6 +341,16 @@ util_is_inf_or_nan(float x) /** + * Test whether x is a power of two. + */ +static INLINE boolean +util_is_pot(unsigned x) +{ + return (x & (x - 1)) == 0; +} + + +/** * Find first bit set in word. Least significant bit is 1. * Return 0 if no bits set. */ @@ -374,6 +384,10 @@ unsigned ffs( unsigned u ) #define ffs __builtin_ffs #endif +#ifdef __MINGW32__ +#define ffs __builtin_ffs +#endif + /* Could also binary search for the highest bit. */ diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 07d804ecdbf..bc4b7584067 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include <winsock2.h> # include <windows.h> -#elif defined(PIPE_OS_LINUX) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) # include <sys/socket.h> # include <netinet/in.h> # include <unistd.h> @@ -54,7 +54,7 @@ u_socket_close(int s) if (s < 0) return; -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) shutdown(s, SHUT_RDWR); close(s); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) @@ -169,7 +169,7 @@ u_socket_listen_on_port(uint16_t portnum) void u_socket_block(int s, boolean block) { -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) int old = fcntl(s, F_GETFL, 0); if (old == -1) return; diff --git a/src/gallium/auxiliary/util/u_network.h b/src/gallium/auxiliary/util/u_network.h index 14d3884427e..8c778f492ca 100644 --- a/src/gallium/auxiliary/util/u_network.h +++ b/src/gallium/auxiliary/util/u_network.h @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define PIPE_HAVE_SOCKETS -#elif defined(PIPE_OS_LINUX) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) # define PIPE_HAVE_SOCKETS #endif diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index d54a1d8c746..ab754296fa8 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -88,11 +88,14 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, /** * Make simple fragment texture shader: - * TEX OUT[0], IN[0], SAMP[0], 2D; + * IMM {0,0,0,1} // (if writemask != 0xf) + * MOV OUT[0], IMM[0] // (if writemask != 0xf) + * TEX OUT[0].writemask, IN[0], SAMP[0], 2D; * END; */ void * -util_make_fragment_tex_shader(struct pipe_context *pipe) +util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, + unsigned writemask ) { struct ureg_program *ureg; struct ureg_src sampler; @@ -103,7 +106,7 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) if (ureg == NULL) return NULL; - sampler = ureg_DECL_sampler( ureg ); + sampler = ureg_DECL_sampler( ureg, 0 ); tex = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_GENERIC, 0, @@ -119,7 +122,12 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) return ureg_create_shader_and_destroy( ureg, pipe ); } - +void * +util_make_fragment_tex_shader(struct pipe_context *pipe ) +{ + return util_make_fragment_tex_shader_writemask( pipe, + TGSI_WRITEMASK_XYZW ); +} diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index 6f8d96af9bc..d2e80d6eb4d 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -50,6 +50,10 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, extern void * +util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, + unsigned writemask ); + +extern void * util_make_fragment_tex_shader(struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c index d8f648e5dd1..5cd05b29047 100644 --- a/src/gallium/auxiliary/util/u_stream_stdc.c +++ b/src/gallium/auxiliary/util/u_stream_stdc.c @@ -32,7 +32,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) #include <stdio.h> diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 1235a67d264..0d6489c26e4 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -170,7 +170,7 @@ x8r8g8b8_get_tile_rgba(const unsigned *src, pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); - pRow[3] = ubyte_to_float(0xff); + pRow[3] = 1.0F; } p += dst_stride; } @@ -394,6 +394,52 @@ r5g6b5_put_tile_rgba(ushort *dst, +/*** PIPE_FORMAT_R8G8B8_UNORM ***/ + +static void +r8g8b8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = ubyte_to_float(src[0]); + pRow[1] = ubyte_to_float(src[1]); + pRow[2] = ubyte_to_float(src[2]); + pRow[3] = 1.0f; + src += 3; + } + p += dst_stride; + } +} + + +static void +r8g8b8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + dst[0] = float_to_ubyte(pRow[0]); + dst[1] = float_to_ubyte(pRow[1]); + dst[2] = float_to_ubyte(pRow[2]); + dst += 3; + } + p += src_stride; + } +} + + + /*** PIPE_FORMAT_Z16_UNORM ***/ /** @@ -1106,6 +1152,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_L8_UNORM: l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); break; @@ -1222,6 +1271,9 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; case PIPE_FORMAT_R8G8B8A8_UNORM: assert(0); break; diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index c16cdd0b226..b958a986353 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -35,7 +35,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) #include <sys/time.h> #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #include <windows.h> @@ -77,7 +77,7 @@ util_time_get_frequency(void) void util_time_get(struct util_time *t) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) gettimeofday(&t->tv, NULL); #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) LONGLONG temp; @@ -102,7 +102,7 @@ util_time_add(const struct util_time *t1, int64_t usecs, struct util_time *t2) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) @@ -124,7 +124,7 @@ int64_t util_time_diff(const struct util_time *t1, const struct util_time *t2) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) return (t2->tv.tv_usec - t1->tv.tv_usec) + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) @@ -144,7 +144,7 @@ util_time_micros( void ) util_time_get(&t1); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) util_time_get_frequency(); @@ -166,7 +166,7 @@ static INLINE int util_time_compare(const struct util_time *t1, const struct util_time *t2) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) if (t1->tv.tv_sec < t2->tv.tv_sec) return -1; else if(t1->tv.tv_sec > t2->tv.tv_sec) diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index 7a5c54d9b23..a6189a247bb 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -43,6 +43,11 @@ #include <unistd.h> /* usleep */ #endif +#if defined(PIPE_OS_HAIKU) +#include <sys/time.h> /* timeval */ +#include <unistd.h> +#endif + #include "pipe/p_compiler.h" @@ -58,7 +63,7 @@ extern "C" { */ struct util_time { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) struct timeval tv; #else int64_t counter; @@ -89,7 +94,7 @@ util_time_timeout(const struct util_time *start, const struct util_time *end, const struct util_time *curr); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) #define util_time_sleep usleep #else void diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 6db9501128c..0eaae2e451b 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1150,10 +1150,6 @@ exec_instruction( ASSERT (0); break; - case TGSI_OPCODE_CND0: - ASSERT (0); - break; - case TGSI_OPCODE_DP2A: ASSERT (0); break; diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c index b223dadab64..1582168eba5 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -45,6 +45,7 @@ static void i915_flush( struct pipe_context *pipe, draw_flush(i915->draw); +#if 0 /* Do we need to emit an MI_FLUSH command to flush the hardware * caches? */ @@ -63,6 +64,13 @@ static void i915_flush( struct pipe_context *pipe, } OUT_BATCH( flush ); } +#endif + +#if 0 + if (i915->batch->map == i915->batch->ptr) { + return; + } +#endif /* If there are no flags, just flush pending commands to hardware: */ diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 508f4560e48..b3a7774fd6a 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -44,6 +44,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_fifo.h" #include "i915_context.h" #include "i915_reg.h" @@ -76,8 +77,13 @@ struct i915_vbuf_render { size_t vbo_size; size_t vbo_offset; void *vbo_ptr; - size_t vbo_alloc_size; size_t vbo_max_used; + + /* stuff for the pool */ + struct util_fifo *pool_fifo; + unsigned pool_used; + unsigned pool_buffer_size; + boolean pool_not_used; }; @@ -106,33 +112,72 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render) } static boolean +i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + + if (i915_render->vbo_size < size + i915_render->vbo_offset) + return FALSE; + + if (i915->vbo_flushed) + return FALSE; + + return TRUE; +} + +static void +i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + struct intel_winsys *iws = i915->iws; + + if (i915_render->vbo) { + if (i915_render->pool_not_used) + iws->buffer_destroy(iws, i915_render->vbo); + else + u_fifo_add(i915_render->pool_fifo, i915_render->vbo); + i915_render->vbo = NULL; + } + + i915->vbo_flushed = 0; + + i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size); + i915_render->vbo_offset = 0; + + if (i915_render->vbo_size != i915_render->pool_buffer_size) { + i915_render->pool_not_used = TRUE; + i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, + INTEL_NEW_VERTEX); + } else { + i915_render->pool_not_used = FALSE; + + if (i915_render->pool_used >= 2) { + FLUSH_BATCH(NULL); + i915->vbo_flushed = 0; + i915_render->pool_used = 0; + } + u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); + } +} + +static boolean i915_vbuf_render_allocate_vertices(struct vbuf_render *render, ushort vertex_size, ushort nr_vertices) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct intel_winsys *iws = i915->iws; size_t size = (size_t)vertex_size * (size_t)nr_vertices; /* FIXME: handle failure */ assert(!i915->vbo); - if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { - } else { - i915->vbo_flushed = 0; - if (i915_render->vbo) { - iws->buffer_destroy(iws, i915_render->vbo); - i915_render->vbo = NULL; - } - } + if (!i915_vbuf_render_reserve(i915_render, size)) { - if (!i915_render->vbo) { - i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); - i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + if (i915->vbo_flushed) + i915_render->pool_used = 0; + i915_vbuf_render_new_buf(i915_render, size); } i915_render->vertex_size = vertex_size; @@ -504,6 +549,7 @@ i915_vbuf_render_create(struct i915_context *i915) { struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); struct intel_winsys *iws = i915->iws; + int i; i915_render->i915 = i915; @@ -524,14 +570,24 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; - i915_render->vbo_alloc_size = 128 * 4096; - i915_render->vbo_size = i915_render->vbo_alloc_size; + + i915_render->vbo = NULL; + i915_render->vbo_size = 0; i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + + i915_render->pool_used = FALSE; + i915_render->pool_buffer_size = 128 * 4096; + i915_render->pool_fifo = u_fifo_create(6); + for (i = 0; i < 6; i++) + u_fifo_add(i915_render->pool_fifo, + iws->buffer_create(iws, i915_render->pool_buffer_size, 64, + INTEL_NEW_VERTEX)); + +#if 0 /* TODO JB: is this realy needed? */ i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); iws->buffer_unmap(iws, i915_render->vbo); +#endif return &i915_render->base; } diff --git a/src/gallium/drivers/i915simple/intel_winsys.h b/src/gallium/drivers/i915simple/intel_winsys.h index e123c32d889..42c5e7470ec 100644 --- a/src/gallium/drivers/i915simple/intel_winsys.h +++ b/src/gallium/drivers/i915simple/intel_winsys.h @@ -69,8 +69,8 @@ struct intel_batchbuffer { * each dword written. */ /*{@*/ - void *map; - void *ptr; + uint8_t *map; + uint8_t *ptr; size_t size; size_t relocs; @@ -150,6 +150,17 @@ struct intel_winsys { void (*buffer_unmap)(struct intel_winsys *iws, struct intel_buffer *buffer); + /** + * Write to a buffer. + * + * Arguments follows pwrite(2) + */ + int (*buffer_write)(struct intel_winsys *iws, + struct intel_buffer *dst, + const void *src, + size_t size, + size_t offset); + void (*buffer_destroy)(struct intel_winsys *iws, struct intel_buffer *buffer); /*@}*/ diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 6e63a0c2b76..06c586e6bb9 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -15,9 +15,11 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ + lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ lp_bld_tgsi_soa.c \ @@ -44,7 +46,8 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample.c \ + lp_tex_sample_c.c \ + lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ lp_tile_soa.c diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 6bceb84da47..dea4b703c45 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -2,8 +2,12 @@ Import('*') env = env.Clone() +env.Tool('llvm') +if env.has_key('LLVM_VERSION') is False: + print 'warning: LLVM not found: not building llvmpipe' + Return() + env.Tool('udis86') -env.ParseConfig('llvm-config --cppflags') llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', @@ -19,8 +23,10 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', 'lp_bld_swizzle.c', @@ -48,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample.c', + 'lp_tex_sample_c.c', + 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', 'lp_tile_soa.c', @@ -57,8 +64,6 @@ llvmpipe = env.ConvenienceLibrary( env = env.Clone() -env['LINK'] = env['CXX'] -env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter') env.Prepend(LIBS = [llvmpipe] + auxiliaries) env.Program( diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 09a57ff33d5..be7442d00ae 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld, } +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) +{ + return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); +} + + +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11) +{ + LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); + LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); + return lp_build_lerp(bld, y, v0, v1); +} + + /** * Generate min(a, b) * Do checks for special cases. @@ -566,20 +591,31 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) return a; - /* XXX: is this really necessary? */ + if(type.floating) { + /* Mask out the sign bit */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } + #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(!type.floating && type.width*type.length == 128) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - if(type.width == 8) + if(type.width*type.length == 128) { + switch(type.width) { + case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); - if(type.width == 16) + case 16: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); - if(type.width == 32) + case 32: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); + } } #endif @@ -587,6 +623,89 @@ lp_build_abs(struct lp_build_context *bld, } +enum lp_build_round_sse41_mode +{ + LP_BUILD_ROUND_SSE41_NEAREST = 0, + LP_BUILD_ROUND_SSE41_FLOOR = 1, + LP_BUILD_ROUND_SSE41_CEIL = 2, + LP_BUILD_ROUND_SSE41_TRUNCATE = 3 +}; + + +static INLINE LLVMValueRef +lp_build_round_sse41(struct lp_build_context *bld, + LLVMValueRef a, + enum lp_build_round_sse41_mode mode) +{ + const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + const char *intrinsic; + + assert(type.floating); + assert(type.width*type.length == 128); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ps"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.pd"; + break; + default: + assert(0); + return bld->undef; + } + + return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, + LLVMConstInt(LLVMInt32Type(), mode, 0)); +} + + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); +} + + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) +{ + a = lp_build_floor(bld, a); + a = lp_build_int(bld, a); + return a; +} + + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index fc8cb25966e..383c3c3313b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -72,6 +72,26 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1); + +/** + * Bilinear interpolation. + * + * Values indices are in v_{yx}. + */ +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11); + +LLVMValueRef lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); @@ -86,6 +106,18 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 30925b5f415..59d8f492e60 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -30,10 +30,27 @@ #include <udis86.h> #endif +#include "util/u_math.h" #include "util/u_debug.h" #include "lp_bld_debug.h" +/** + * Check alignment. + * + * It is important that this check is not implemented as a macro or inlined + * function, as the compiler assumptions in respect to alignment of global + * and stack variables would often make the check a no op, defeating the + * whole purpose of the exercise. + */ +boolean +lp_check_alignment(const void *ptr, unsigned alignment) +{ + assert(util_is_pot(alignment)); + return ((uintptr_t)ptr & (alignment - 1)) == 0; +} + + void lp_disassemble(const void* func) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h index ecdafef76d0..583e6132b4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h @@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...) } +boolean +lp_check_alignment(const void *ptr, unsigned alignment); + + void lp_disassemble(const void* func); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 2cd6e6b9217..3f88a14b5d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder, padding_right = 0; for(chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size; + padding_left = format_desc->block.bits - + (padding_right + format_desc->channel[z_swizzle].size); if(padding_left || padding_right) { - const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1; - const long long mask_right = ((long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right); + const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; + const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; + z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); } if(padding_left) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 9d99e1a9d9f..69ed014ff3d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -32,59 +32,261 @@ */ #include "util/u_debug.h" +#include "util/u_memory.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" +#define LP_BUILD_FLOW_MAX_VARIABLES 32 +#define LP_BUILD_FLOW_MAX_DEPTH 32 + + +/** + * Enumeration of all possible flow constructs. + */ +enum lp_build_flow_construct_kind { + lP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SKIP, +}; + + +/** + * Variable declaration scope. + */ +struct lp_build_flow_scope +{ + /** Number of variables declared in this scope */ + unsigned num_variables; +}; + + +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_flow_skip +{ + /** Block to skip to */ + LLVMBasicBlockRef block; + + /** Number of variables declared at the beginning */ + unsigned num_variables; + + LLVMValueRef *phi; +}; + + +/** + * Union of all possible flow constructs' data + */ +union lp_build_flow_construct_data +{ + struct lp_build_flow_scope scope; + struct lp_build_flow_skip skip; +}; + + +/** + * Element of the flow construct stack. + */ +struct lp_build_flow_construct +{ + enum lp_build_flow_construct_kind kind; + union lp_build_flow_construct_data data; +}; + + +/** + * All necessary data to generate LLVM control flow constructs. + * + * Besides keeping track of the control flow construct themselves we also + * need to keep track of variables in order to generate SSA Phi values. + */ +struct lp_build_flow_context +{ + LLVMBuilderRef builder; + + /** + * Control flow stack. + */ + struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; + unsigned num_constructs; + + /** + * Variable stack + */ + LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; + unsigned num_variables; +}; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder) +{ + struct lp_build_flow_context *flow; + + flow = CALLOC_STRUCT(lp_build_flow_context); + if(!flow) + return NULL; + + flow->builder = builder; + + return flow; +} + + void -lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, - union lp_type type, - LLVMValueRef value) +lp_build_flow_destroy(struct lp_build_flow_context *flow) { - memset(mask, 0, sizeof *mask); + assert(flow->num_constructs == 0); + assert(flow->num_variables == 0); + FREE(flow); +} - mask->builder = builder; - mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + +static union lp_build_flow_construct_data * +lp_build_flow_push(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); + if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) + return NULL; + + flow->constructs[flow->num_constructs].kind = kind; + return &flow->constructs[flow->num_constructs++].data; +} + + +static union lp_build_flow_construct_data * +lp_build_flow_peek(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[flow->num_constructs - 1].data; } +static union lp_build_flow_construct_data * +lp_build_flow_pop(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[--flow->num_constructs].data; +} + + +/** + * Begin a variable scope. + * + * + */ void -lp_build_mask_update(struct lp_build_mask_context *mask, - LLVMValueRef value) +lp_build_flow_scope_begin(struct lp_build_flow_context *flow) { + struct lp_build_flow_scope *scope; - LLVMValueRef cond; - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; + scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(mask->value) - mask->value = LLVMBuildAnd(mask->builder, mask->value, value, ""); - else - mask->value = value; + scope->num_variables = 0; +} - /* FIXME: disabled until we have proper control flow helpers */ -#if 0 - cond = LLVMBuildICmp(mask->builder, - LLVMIntEQ, - LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""), - LLVMConstNull(mask->reg_type), - ""); - current_block = LLVMGetInsertBlock(mask->builder); +/** + * Declare a variable. + * + * A variable is a named entity which can have different LLVMValueRef's at + * different points of the program. This is relevant for control flow because + * when there are mutiple branches to a same location we need to replace + * the variable's value with a Phi function as explained in + * http://en.wikipedia.org/wiki/Static_single_assignment_form . + * + * We keep track of variables by keeping around a pointer to where their + * current. + * + * There are a few cautions to observe: + * + * - Variable's value must not be NULL. If there is no initial value then + * LLVMGetUndef() should be used. + * + * - Variable's value must be kept up-to-date. If the variable is going to be + * modified by a function then a pointer should be passed so that its value + * is accurate. Failure to do this will cause some of the variables' + * transient values to be lost, leading to wrong results. + * + * - A program should be written from top to bottom, by always appending + * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or + * modifying existing statements will most likely lead to wrong results. + * + */ +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(!mask->skip_block) { - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - mask->skip_block = LLVMAppendBasicBlock(function, "skip"); + assert(*variable); + if(!*variable) + return; + + assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); + if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) + return; + + flow->variables[flow->num_variables++] = variable; + ++scope->num_variables; +} + + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), ""); + assert(flow->num_variables >= scope->num_variables); + if(flow->num_variables < scope->num_variables) { + flow->num_variables = 0; + return; } + flow->num_variables -= scope->num_variables; +} + + +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + current_block = LLVMGetInsertBlock(flow->builder); + next_block = LLVMGetNextBasicBlock(current_block); - assert(next_block); if(next_block) { new_block = LLVMInsertBasicBlock(next_block, ""); } @@ -93,30 +295,148 @@ lp_build_mask_update(struct lp_build_mask_context *mask, new_block = LLVMAppendBasicBlock(function, ""); } - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block); + return new_block; +} + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBuilderRef builder; + unsigned i; + + skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + skip->block = lp_build_flow_insert_block(flow); + skip->num_variables = flow->num_variables; + if(!skip->num_variables) { + skip->phi = NULL; + return; + } - LLVMPositionBuilderAtEnd(mask->builder, new_block); -#endif + skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); + if(!skip->phi) { + skip->num_variables = 0; + return; + } + + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, skip->block); + + for(i = 0; i < skip->num_variables; ++i) + skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + LLVMDisposeBuilder(builder); } -LLVMValueRef -lp_build_mask_end(struct lp_build_mask_context *mask) +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond) { - if(mask->skip_block) { - LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder); + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef new_block; + unsigned i; - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildBr(mask->builder, mask->skip_block); + skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; - LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block); + current_block = LLVMGetInsertBlock(flow->builder); - mask->value = mask->phi; - mask->phi = NULL; - mask->skip_block = NULL; + new_block = lp_build_flow_insert_block(flow); + + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); } + LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + + LLVMPositionBuilderAtEnd(flow->builder, new_block); + } + + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + unsigned i; + + skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + current_block = LLVMGetInsertBlock(flow->builder); + + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + *flow->variables[i] = skip->phi[i]; + } + + LLVMBuildBr(flow->builder, skip->block); + LLVMPositionBuilderAtEnd(flow->builder, skip->block); + + FREE(skip->phi); +} + + +static void +lp_build_mask_check(struct lp_build_mask_context *mask) +{ + LLVMBuilderRef builder = mask->flow->builder; + LLVMValueRef cond; + + cond = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMConstNull(mask->reg_type), + ""); + + lp_build_flow_skip_cond_break(mask->flow, cond); +} + + +void +lp_build_mask_begin(struct lp_build_mask_context *mask, + struct lp_build_flow_context *flow, + union lp_type type, + LLVMValueRef value) +{ + memset(mask, 0, sizeof *mask); + + mask->flow = flow; + mask->reg_type = LLVMIntType(type.width * type.length); + mask->value = value; + + lp_build_flow_scope_begin(flow); + lp_build_flow_scope_declare(flow, &mask->value); + lp_build_flow_skip_begin(flow); + + lp_build_mask_check(mask); +} + + +void +lp_build_mask_update(struct lp_build_mask_context *mask, + LLVMValueRef value) +{ + mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); + + lp_build_mask_check(mask); +} + + +LLVMValueRef +lp_build_mask_end(struct lp_build_mask_context *mask) +{ + lp_build_flow_skip_end(mask->flow); + lp_build_flow_scope_end(mask->flow); return mask->value; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 1b634ff038d..9d76e3064dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -41,23 +41,49 @@ union lp_type; +struct lp_build_flow_context; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder); + +void +lp_build_flow_destroy(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable); + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond); + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow); + + struct lp_build_mask_context { - LLVMBuilderRef builder; + struct lp_build_flow_context *flow; LLVMTypeRef reg_type; LLVMValueRef value; - - LLVMValueRef phi; - - LLVMBasicBlockRef skip_block; }; void lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, + struct lp_build_flow_context *flow, union lp_type type, LLVMValueRef value); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 01c8a752d18..5ee06560932 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -31,20 +31,14 @@ /** * @file - * LLVM IR building helpers interfaces. - * - * We use LLVM-C bindings for now. They are not documented, but follow the C++ - * interfaces very closely, and appear to be complete enough for code - * genration. See - * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - * for a standalone example. + * Pixel format helpers. */ #include <llvm-c/Core.h> - -#include "pipe/p_format.h" +#include "pipe/p_format.h" +struct util_format_description; union lp_type; @@ -56,9 +50,9 @@ union lp_type; * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed); /** @@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba); /** @@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr); /** @@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba); + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba); #endif /* !LP_BLD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index dcbc0076c7d..b9b5d84bed5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -32,9 +32,9 @@ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed) +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed) { const struct util_format_description *desc; LLVMTypeRef type; @@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba) +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr) { const struct util_format_description *desc; LLVMTypeRef type; @@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder, packed = LLVMBuildLoad(builder, ptr, ""); - return lp_build_unpack_rgba(builder, format, packed); + return lp_build_unpack_rgba_aos(builder, format, packed); } void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder, type = LLVMIntType(desc->block.bits); - packed = lp_build_pack_rgba(builder, format, rgba); + packed = lp_build_pack_rgba_aos(builder, format, rgba); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c new file mode 100644 index 00000000000..569e8d10a31 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -0,0 +1,208 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_format.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_format.h" + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +static LLVMValueRef +lp_build_format_swizzle(union lp_type type, + const LLVMValueRef *inputs, + enum util_format_swizzle swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + return inputs[swizzle]; + case UTIL_FORMAT_SWIZZLE_0: + return lp_build_zero(type); + case UTIL_FORMAT_SWIZZLE_1: + return lp_build_one(type); + case UTIL_FORMAT_SWIZZLE_NONE: + return lp_build_undef(type); + default: + assert(0); + return lp_build_undef(type); + } +} + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef inputs[4]; + unsigned start; + unsigned chan; + + /* FIXME: Support more formats */ + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + /* Decode the input vector components */ + start = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned width = format_desc->channel[chan].size; + unsigned stop = start + width; + LLVMValueRef input; + + input = packed; + + switch(format_desc->channel[chan].type) { + case UTIL_FORMAT_TYPE_VOID: + input = NULL; + break; + + case UTIL_FORMAT_TYPE_UNSIGNED: + if(type.floating) { + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); + if(stop < format_desc->block.bits) { + unsigned mask = ((unsigned long long)1 << width) - 1; + input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); + } + + if(format_desc->channel[chan].normalized) + input = lp_build_unsigned_norm_to_float(builder, width, type, input); + else + input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), ""); + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + break; + + default: + /* fall through */ + input = lp_build_undef(type); + break; + } + + inputs[chan] = input; + + start = stop; + } + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); + rgba[2] = rgba[1] = rgba[0] = depth; + rgba[3] = lp_build_one(type); + } + else { + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); + } + } +} + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba) +{ + LLVMValueRef packed; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + packed = lp_build_gather(builder, + type.length, format_desc->block.bits, type.width, + base_ptr, offsets); + + lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h new file mode 100644 index 00000000000..6f565af76d1 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#ifndef LP_BLD_SAMPLE_H +#define LP_BLD_SAMPLE_H + + +#include <llvm-c/Core.h> + +struct pipe_texture; +struct pipe_sampler_state; +union lp_type; + + +/** + * Sampler static state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are embedded in the generated code. + */ +struct lp_sampler_static_state +{ + /* pipe_texture's state */ + enum pipe_format format; + unsigned target:2; + unsigned pot_width:1; + unsigned pot_height:1; + unsigned pot_depth:1; + + /* pipe_sampler_state's state */ + unsigned wrap_s:3; + unsigned wrap_t:3; + unsigned wrap_r:3; + unsigned min_img_filter:2; + unsigned min_mip_filter:2; + unsigned mag_img_filter:2; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned normalized_coords:1; + unsigned prefilter:4; +}; + + +/** + * Sampler dynamic state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are computed in runtime. + * + * There are obtained through callbacks, as we don't want to tie the texture + * sampling code generation logic to any particular texture layout or pipe + * driver. + */ +struct lp_sampler_dynamic_state +{ + + /** Obtain the base texture width. */ + LLVMValueRef + (*width)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + /** Obtain the base texture height. */ + LLVMValueRef + (*height)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*data_ptr)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + +}; + + +/** + * Derive the sampler static state. + */ +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler); + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + union lp_type fp_type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); + + + +#endif /* LP_BLD_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c new file mode 100644 index 00000000000..2913b17d3f8 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -0,0 +1,404 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width[0]); + state->pot_height = util_is_pot(texture->height[0]); + state->pot_depth = util_is_pot(texture->depth[0]); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + + +/** + * Keep all information for sampling code generation in a single place. + */ +struct lp_build_sample_context +{ + LLVMBuilderRef builder; + + const struct lp_sampler_static_state *static_state; + + struct lp_sampler_dynamic_state *dynamic_state; + + const struct util_format_description *format_desc; + + /** Incoming coordinates type and build context */ + union lp_type coord_type; + struct lp_build_context coord_bld; + + /** Integer coordinates */ + union lp_type int_coord_type; + struct lp_build_context int_coord_bld; + + /** Output texels type and build context */ + union lp_type texel_type; + struct lp_build_context texel_bld; +}; + + +static void +lp_build_sample_texel(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); + + if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); + + x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(int_coord_bld, x, x_stride); + y_offset = lp_build_mul(int_coord_bld, y, y_stride); + + offset = lp_build_add(int_coord_bld, x_offset, y_offset); + } + + lp_build_load_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + data_ptr, + offset, + texel); +} + + +static LLVMValueRef +lp_build_sample_wrap(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if(is_pot) + coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); + else + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord = LLVMBuildURem(bld->builder, coord, length, ""); + break; + + case PIPE_TEX_WRAP_CLAMP: + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + default: + assert(0); + } + + return coord; +} + + +static void +lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef x; + LLVMValueRef y; + + x = lp_build_ifloor(&bld->coord_bld, s); + y = lp_build_ifloor(&bld->coord_bld, t); + + x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); +} + + +static void +lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef half; + LLVMValueRef s_ipart; + LLVMValueRef t_ipart; + LLVMValueRef s_fpart; + LLVMValueRef t_fpart; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2][4]; + unsigned chan; + + half = lp_build_const_scalar(bld->coord_type, 0.5); + s = lp_build_sub(&bld->coord_bld, s, half); + t = lp_build_sub(&bld->coord_bld, t, half); + + s_ipart = lp_build_floor(&bld->coord_bld, s); + t_ipart = lp_build_floor(&bld->coord_bld, t); + + s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); + t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); + + x0 = lp_build_int(&bld->coord_bld, s_ipart); + y0 = lp_build_int(&bld->coord_bld, t_ipart); + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + + /* TODO: Don't interpolate missing channels */ + for(chan = 0; chan < 4; ++chan) { + texel[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } +} + + +static void +lp_build_sample_compare(struct lp_build_sample_context *bld, + LLVMValueRef p, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + LLVMValueRef res; + unsigned chan; + + if(!bld->static_state->compare_mode) + return; + + /* TODO: Compare before swizzling, to avoid redundant computations */ + res = NULL; + for(chan = 0; chan < 4; ++chan) { + LLVMValueRef cmp; + cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); + cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); + + if(res) + res = lp_build_add(texel_bld, res, cmp); + else + res = cmp; + } + + assert(res); + res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for(chan = 0; chan < 3; ++chan) + texel[chan] = res; + texel[3] = texel_bld->one; +} + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_build_sample_context bld; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef stride; + LLVMValueRef data_ptr; + LLVMValueRef s; + LLVMValueRef t; + LLVMValueRef p; + + /* Setup our build context */ + memset(&bld, 0, sizeof bld); + bld.builder = builder; + bld.static_state = static_state; + bld.dynamic_state = dynamic_state; + bld.format_desc = util_format_description(static_state->format); + bld.coord_type = type; + bld.int_coord_type = lp_int_type(type); + bld.texel_type = type; + lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); + lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); + lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); + + /* Get the dynamic state */ + width = dynamic_state->width(dynamic_state, builder, unit); + height = dynamic_state->height(dynamic_state, builder, unit); + stride = dynamic_state->stride(dynamic_state, builder, unit); + data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + + s = coords[0]; + t = coords[1]; + p = coords[2]; + + width = lp_build_broadcast_scalar(&bld.int_coord_bld, width); + height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); + stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); + + if(static_state->target == PIPE_TEXTURE_1D) + t = bld.coord_bld.zero; + + if(static_state->normalized_coords) { + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); + LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, ""); + s = lp_build_mul(&bld.coord_bld, s, fp_width); + t = lp_build_mul(&bld.coord_bld, t, fp_height); + } + + switch (static_state->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + } + + lp_build_sample_compare(&bld, p, texel); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c index 14d2b10df9c..3998ac374fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c @@ -42,17 +42,30 @@ LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name) +{ + LLVMValueRef indices[2]; + LLVMValueRef member_ptr; + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); + return member_ptr; +} + + +LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, unsigned member, const char *name) { - LLVMValueRef indices[2]; LLVMValueRef member_ptr; LLVMValueRef res; - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); res = LLVMBuildLoad(builder, member_ptr, ""); lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); return res; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h index cbefdc9f815..740392f5611 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h @@ -53,6 +53,18 @@ offsetof(_ctype, _cmember)) +/** + * Get value pointer to a structure member. + */ +LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name); + +/** + * Get the value of a structure member. + */ LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 912db24aecb..10c251c4162 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -44,14 +44,30 @@ struct lp_build_context; struct lp_build_mask_context; -typedef void -(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel); +/** + * Sampler code generation interface. + * + * Although texture sampling is a requirement for TGSI translation, it is + * a very different problem with several different approaches to it. This + * structure establishes an interface for texture sampling code generation, so + * that we can easily use different texture sampling strategies. + */ +struct lp_build_sampler_soa +{ + void + (*destroy)( struct lp_build_sampler_soa *sampler ); + + void + (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); +}; + void lp_build_tgsi_soa(LLVMBuilderRef builder, @@ -62,8 +78,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context); + struct lp_build_sampler_soa *sampler); #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index bce26607f90..3ce379de12f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -88,8 +88,7 @@ struct lp_build_tgsi_soa_context const LLVMValueRef (*inputs)[NUM_CHANNELS]; LLVMValueRef (*outputs)[NUM_CHANNELS]; - lp_emit_fetch_texel_soa_callback emit_fetch_texel; - void *emit_fetch_texel_context; + struct lp_build_sampler_soa *sampler; LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; @@ -289,8 +288,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, coords[i] = lp_build_mul(&bld->base, coords[i], oow); } - bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context, - unit, num_coords, coords, lodbias, texel); + bld->sampler->emit_fetch_texel(bld->sampler, + bld->base.builder, + bld->base.type, + unit, num_coords, coords, lodbias, + texel); FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) { emit_store( bld, inst, 0, i, texel[i] ); @@ -687,10 +689,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_CND0: - return 0; - break; - case TGSI_OPCODE_DP2A: tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ @@ -1287,8 +1285,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context) + struct lp_build_sampler_soa *sampler) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; @@ -1303,8 +1300,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; - bld.emit_fetch_texel = emit_fetch_texel; - bld.emit_fetch_texel_context = emit_fetch_texel_context; + bld.sampler = sampler; tgsi_parse_init( &parse, tokens ); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 8e0026fd973..577644b7ab8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -157,6 +157,17 @@ lp_build_int_vec_type(union lp_type type) } +union lp_type +lp_int_type(union lp_type type) +{ + union lp_type int_type; + int_type.value = 0; + int_type.width = type.width; + int_type.length = type.length; + return int_type; +} + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 3ce566be641..9933e0b45c3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -165,6 +165,10 @@ LLVMTypeRef lp_build_int_vec_type(union lp_type type); +union lp_type +lp_int_type(union lp_type type); + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index d288460a1b8..9465f763d50 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -44,15 +44,47 @@ static void lp_jit_init_globals(struct llvmpipe_screen *screen) { - /* struct lp_jit_context */ + LLVMTypeRef texture_type; + + /* struct lp_jit_texture */ { LLVMTypeRef elem_types[4]; + + elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, + screen->target, texture_type, + LP_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, + screen->target, texture_type, + LP_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride, + screen->target, texture_type, + LP_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, + screen->target, texture_type, + LP_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, + screen->target, texture_type); + + LLVMAddTypeName(screen->module, "texture", texture_type); + } + + /* struct lp_jit_context */ + { + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, screen->target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, + screen->target, context_type, + LP_JIT_CONTEXT_TEXTURES_INDEX); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, screen->target, context_type); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index a7fb60f9f5c..c3e3e1af672 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -38,11 +38,31 @@ #include "lp_bld_struct.h" +#include "pipe/p_state.h" + struct tgsi_sampler; struct llvmpipe_screen; +struct lp_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + + +enum { + LP_JIT_TEXTURE_WIDTH = 0, + LP_JIT_TEXTURE_HEIGHT, + LP_JIT_TEXTURE_STRIDE, + LP_JIT_TEXTURE_DATA +}; + + + /** * This structure is passed directly to the generated fragment shader. * @@ -65,6 +85,8 @@ struct lp_jit_context /* TODO: blend constant color */ uint8_t *blend_color; + + struct lp_jit_texture textures[PIPE_MAX_SAMPLERS]; }; @@ -80,6 +102,11 @@ struct lp_jit_context #define lp_jit_context_blend_color(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 3, "blend_color") +#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 + +#define lp_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") + typedef void (*lp_jit_frag_func)(struct lp_jit_context *context, diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d145f6d6bbc..f9e254efcae 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -44,6 +44,7 @@ #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "lp_bld_debug.h" #include "lp_tile_cache.h" #include "lp_tile_soa.h" @@ -164,10 +165,12 @@ shade_quads(struct llvmpipe_context *llvmpipe, /* TODO: blend color */ - assert((((uintptr_t)mask) & 0xf) == 0); - assert((((uintptr_t)depth) & 0xf) == 0); - assert((((uintptr_t)color) & 0xf) == 0); - assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0); + /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ + assert(lp_check_alignment(mask, 16)); + + assert(lp_check_alignment(depth, 16)); + assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); /* run shader */ fs->current->jit_function( &llvmpipe->jit_context, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index fb10329887d..7b26ce61a38 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -36,6 +36,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */ #define LP_NEW_VIEWPORT 0x1 @@ -57,16 +58,20 @@ struct tgsi_sampler; struct vertex_info; - +struct pipe_context; +struct llvmpipe_context; struct lp_fragment_shader; struct lp_fragment_shader_variant_key { + enum pipe_format zsbuf_format; struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 6fbb057937e..e87976b9f36 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -250,7 +250,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | - LP_NEW_DEPTH_STENCIL_ALPHA)) + LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_SAMPLER | + LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 94170bd7161..618cf1ffb8d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -85,6 +85,7 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_quad.h" +#include "lp_tex_sample.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -130,9 +131,8 @@ generate_pos0(LLVMBuilderRef builder, * Generate the depth test. */ static void -generate_depth(struct llvmpipe_context *lp, - LLVMBuilderRef builder, - const struct pipe_depth_state *state, +generate_depth(LLVMBuilderRef builder, + const struct lp_fragment_shader_variant_key *key, union lp_type src_type, struct lp_build_mask_context *mask, LLVMValueRef src, @@ -141,10 +141,10 @@ generate_depth(struct llvmpipe_context *lp, const struct util_format_description *format_desc; union lp_type dst_type; - if(!lp->framebuffer.zsbuf) + if(!key->depth.enabled) return; - format_desc = util_format_description(lp->framebuffer.zsbuf->format); + format_desc = util_format_description(key->zsbuf_format); assert(format_desc); /* Pick the depth type. */ @@ -164,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp, #endif lp_build_depth_test(builder, - state, + &key->depth, dst_type, format_desc, mask, @@ -173,107 +173,6 @@ generate_depth(struct llvmpipe_context *lp, } -struct build_fetch_texel_context -{ - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -static void -emit_fetch_texel( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct build_fetch_texel_context *bld = context; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!bld->samplers_ptr) - bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr); - - if(!bld->store_ptr) - bld->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = bld->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = bld->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - /** * Generate the fragment shader, depth/stencil test, and alpha tests. */ @@ -286,7 +185,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef context_ptr, unsigned i, const struct lp_build_interp_soa_context *interp, - struct build_fetch_texel_context *sampler, + struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef *color, LLVMValueRef depth_ptr) @@ -298,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; + struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean early_depth_test; unsigned attrib; @@ -309,25 +209,35 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr = lp_jit_context_constants(builder, context_ptr); - lp_build_mask_begin(&mask, builder, type, *pmask); + flow = lp_build_flow_create(builder); + + memset(outputs, 0, sizeof outputs); + + lp_build_flow_scope_begin(flow); + + /* Declare the color and z variables */ + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + color[chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &color[chan]); + } + lp_build_flow_scope_declare(flow, &z); + + lp_build_mask_begin(&mask, flow, type, *pmask); early_depth_test = - lp->depth_stencil->depth.enabled && - lp->framebuffer.zsbuf && - !lp->depth_stencil->alpha.enabled && - !lp->fs->info.uses_kill && - !lp->fs->info.writes_z; + key->depth.enabled && + !key->alpha.enabled && + !shader->info.uses_kill && + !shader->info.writes_z; if(early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); - memset(outputs, 0, sizeof outputs); - lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, emit_fetch_texel, sampler); + outputs, sampler); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -368,12 +278,16 @@ generate_fs(struct llvmpipe_context *lp, } if(!early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); lp_build_mask_end(&mask); + lp_build_flow_scope_end(flow); + + lp_build_flow_destroy(flow); + *pmask = mask.value; } @@ -392,6 +306,8 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef dst_ptr) { struct lp_build_context bld; + struct lp_build_flow_context *flow; + struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; @@ -400,11 +316,14 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef res[4]; unsigned chan; + lp_build_context_init(&bld, builder, type); + + flow = lp_build_flow_create(builder); + lp_build_mask_begin(&mask_ctx, flow, type, mask); + vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); - lp_build_context_init(&bld, builder, type); - const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); @@ -422,11 +341,16 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); - lp_build_name(res[chan], "res.%c", "rgba"[chan]); - res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); - LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + if(blend->colormask & (1 << chan)) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); + lp_build_name(res[chan], "res.%c", "rgba"[chan]); + res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); + LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + } } + + lp_build_mask_end(&mask_ctx); + lp_build_flow_destroy(flow); } @@ -462,7 +386,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; - struct build_fetch_texel_context sampler; + struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; @@ -586,8 +510,13 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); - memset(&sampler, 0, sizeof sampler); - sampler.context_ptr = context_ptr; +#if 0 + /* C texture sampling */ + sampler = lp_c_sampler_soa_create(context_ptr); +#else + /* code generated texture sampling */ + sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); +#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -606,7 +535,7 @@ generate_fragment(struct llvmpipe_context *lp, context_ptr, i, &interp, - &sampler, + sampler, &fs_mask[i], out_color, depth_ptr_i); @@ -615,6 +544,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_out_color[chan][i] = out_color[chan]; } + sampler->destroy(sampler); + /* * Convert the fs's output color and mask to fit to the blending type. */ @@ -765,18 +696,45 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, */ static void make_variant_key(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, struct lp_fragment_shader_variant_key *key) { + unsigned i; + memset(key, 0, sizeof *key); - memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + if(lp->framebuffer.zsbuf && + lp->depth_stencil->depth.enabled) { + key->zsbuf_format = lp->framebuffer.zsbuf->format; + memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + } key->alpha.enabled = lp->depth_stencil->alpha.enabled; if(key->alpha.enabled) key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ - memcpy(&key->blend, lp->blend, sizeof key->blend); + if(lp->framebuffer.cbufs[0]) { + const struct util_format_description *format_desc; + unsigned chan; + + memcpy(&key->blend, lp->blend, sizeof key->blend); + + format_desc = util_format_description(lp->framebuffer.cbufs[0]->format); + assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); + + /* mask out color channels not present in the color buffer */ + for(chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + if(swizzle > 4) + key->blend.colormask &= ~(1 << chan); + } + } + + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) + lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]); } @@ -787,7 +745,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; - make_variant_key(lp, &key); + make_variant_key(lp, shader, &key); variant = shader->variants; while(variant) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 4fef541b1e3..c69d90c723a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, pipe_texture_reference(&llvmpipe->texture[i], tex); lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex); + + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; + jit_tex->width = tex->width[0]; + jit_tex->height = tex->height[0]; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + } } llvmpipe->num_textures = num; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 1d192355eed..d8455e56490 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module, lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop); - rgba = lp_build_load_rgba(builder, format, ptr); + rgba = lp_build_load_rgba_aos(builder, format, ptr); LLVMBuildStore(builder, rgba, rgba_ptr); lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop); @@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba(builder, format, ptr, rgba); + lp_build_store_rgba_aos(builder, format, ptr, rgba); LLVMBuildRetVoid(builder); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 23a94b5b0d5..773e8482425 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) if (lpt->timestamp != tc->timestamp) { /* texture was modified, invalidate all cached tiles */ uint i; - _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); + debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); for (i = 0; i < NUM_ENTRIES; i++) { tc->entries[i].addr.bits.invalid = 1; } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 628ec3f1efd..9ad1bde9565 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -29,10 +29,13 @@ #define LP_TEX_SAMPLE_H +#include <llvm-c/Core.h> + #include "tgsi/tgsi_exec.h" struct llvmpipe_tex_tile_cache; +struct lp_sampler_static_state; /** @@ -75,4 +78,24 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE]); +/** + * Texture sampling code generator that just calls lp_get_samples C function + * for the actual sampling computation. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr); + + +/** + * Pure-LLVM texture sampling code generator. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key, + LLVMValueRef context_ptr); + + #endif /* LP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 94eb6dad5af..9a876f404df 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1578,3 +1578,136 @@ out: tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); } + +void PIPE_CDECL +lp_fetch_texel_soa( struct tgsi_sampler **samplers, + uint32_t unit, + float *store ) +{ + struct tgsi_sampler *sampler = samplers[unit]; + +#if 0 + uint j; + + debug_printf("%s sampler: %p (%p) store: %p\n", + __FUNCTION__, + sampler, *sampler, + store ); + + debug_printf("lodbias %f\n", store[12]); + + for (j = 0; j < 4; j++) + debug_printf("sample %d texcoord %f %f\n", + j, + store[0+j], + store[4+j]); +#endif + + { + float rgba[NUM_CHANNELS][QUAD_SIZE]; + sampler->get_samples(sampler, + &store[0], + &store[4], + &store[8], + 0.0f, /*store[12], lodbias */ + rgba); + memcpy(store, rgba, sizeof rgba); + } + +#if 0 + for (j = 0; j < 4; j++) + debug_printf("sample %d result %f %f %f %f\n", + j, + store[0+j], + store[4+j], + store[8+j], + store[12+j]); +#endif +} + + +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_tgsi.h" + + +struct lp_c_sampler_soa +{ + struct lp_build_sampler_soa base; + + LLVMValueRef context_ptr; + + LLVMValueRef samplers_ptr; + + /** Coords/texels store */ + LLVMValueRef store_ptr; +}; + + +static void +lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; + LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); + LLVMValueRef args[3]; + unsigned i; + + if(!sampler->samplers_ptr) + sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); + + if(!sampler->store_ptr) + sampler->store_ptr = LLVMBuildArrayAlloca(builder, + vec_type, + LLVMConstInt(LLVMInt32Type(), 4, 0), + "texel_store"); + + for (i = 0; i < num_coords; i++) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + LLVMBuildStore(builder, coords[i], coord_ptr); + } + + args[0] = sampler->samplers_ptr; + args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); + args[2] = sampler->store_ptr; + + lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); + } +} + + +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr) +{ + struct lp_c_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_c_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_c_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; + sampler->context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c new file mode 100644 index 00000000000..7d31705d014 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling code generation + * + * This file is nothing more than ugly glue between three largely independent + * entities: + * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa) + * - texture sampling code generation (i.e., lp_build_sample_soa) + * - LLVM pipe driver + * + * All interesting code is in the functions mentioned above. There is really + * nothing to see here. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_sample.h" +#include "lp_bld_tgsi.h" +#include "lp_state.h" +#include "lp_tex_sample.h" + + +/** + * This provides the bridge between the sampler state store in lp_jit_context + * and lp_jit_texture and the sampler code generator. It provides the + * texture layout information required by the texture sampler code generator + * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime. + */ +struct llvmpipe_sampler_dynamic_state +{ + struct lp_sampler_dynamic_state base; + + const struct lp_sampler_static_state *static_state; + + LLVMValueRef context_ptr; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct lp_llvm_sampler_soa +{ + struct lp_build_sampler_soa base; + + struct llvmpipe_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, + LLVMBuilderRef builder, + unsigned unit, + unsigned member_index, + const char *member_name) +{ + struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(unit < PIPE_MAX_SAMPLERS); + + /* context[0] */ + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + /* context[0].textures */ + indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0); + /* context[0].textures[unit] */ + indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + /* context[0].textures[unit].member */ + indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + + ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); + + res = LLVMBuildLoad(builder, ptr, ""); + + lp_build_name(res, "context.texture%u.%s", unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to fetch + * the members of lp_jit_texture to fulfill the sampler code generator requests. + * + * This complexity is the price we have to pay to keep the texture sampler code + * generator a reusable module without dependencies to llvmpipe internals. + */ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \ + static LLVMValueRef \ + lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ + LLVMBuilderRef builder, \ + unsigned unit) \ + { \ + return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \ + } + + +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT) +LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE) +LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA) + + +static void +lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base; + + assert(unit < PIPE_MAX_SAMPLERS); + + lp_build_sample_soa(builder, + &sampler->dynamic_state.static_state[unit], + &sampler->dynamic_state.base, + type, + unit, + num_coords, + coords, + lodbias, + texel); +} + + +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr) +{ + struct lp_llvm_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_llvm_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_llvm_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel; + sampler->dynamic_state.base.width = lp_llvm_texture_width; + sampler->dynamic_state.base.height = lp_llvm_texture_height; + sampler->dynamic_state.base.stride = lp_llvm_texture_stride; + sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; + sampler->dynamic_state.static_state = static_state; + sampler->dynamic_state.context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 03b9243b828..93479a0314a 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -148,6 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->image_nr = 1; mt->level[0].pitch = *stride; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + mt->level[0].tile_mode = bo->tile_mode; nouveau_bo_ref(bo, &mt->base.bo); return &mt->base.base; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 289c3485e08..4a838529de7 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1106,10 +1106,10 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) m = 0xffff7fff; break; case 0x8: - /* INTERP */ - m = ~0x02000000; - if (e->inst[0] & 0x02000000) - q = 0x00020000; + /* INTERP (move centroid, perspective and flat bits) */ + m = ~0x03000100; + q = (e->inst[0] & (3 << 24)) >> (24 - 16); + q |= (e->inst[0] & (1 << 8)) << (18 - 8); break; case 0x9: /* RCP */ @@ -1495,6 +1495,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 344c2cf6dde..d294356f75d 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -66,7 +66,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1224, 1); @@ -110,7 +111,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1538, 1); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index b266324f58d..6bf6f773b0c 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -60,13 +60,13 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) format = nv50_format(ps->format); if (format < 0) return 1; - + if (!bo->tile_flags) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); BEGIN_RING(chan, eng2d, mthd + 0x14, 5); - OUT_RING (chan, mt->level[0].pitch); + OUT_RING (chan, mt->level[ps->level].pitch); OUT_RING (chan, ps->width); OUT_RING (chan, ps->height); OUT_RELOCh(chan, bo, ps->offset, flags); @@ -75,7 +75,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) BEGIN_RING(chan, eng2d, mthd, 5); OUT_RING (chan, format); OUT_RING (chan, 0); - OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, mt->level[ps->level].tile_mode << 4); OUT_RING (chan, 1); OUT_RING (chan, 0); BEGIN_RING(chan, eng2d, mthd + 0x18, 4); diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 36f1b24b2f0..eeed148c7b9 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -178,7 +178,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); for (i = 0; i < nr; i += 2) - OUT_RING (chan, (map[1] << 16) | map[0]); + OUT_RING (chan, (map[i + 1] << 16) | map[i]); count -= nr; map += nr; @@ -207,7 +207,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); for (i = 0; i < nr; i += 2) - OUT_RING (chan, (map[1] << 16) | map[0]); + OUT_RING (chan, (map[i + 1] << 16) | map[i]); count -= nr; map += nr; @@ -313,18 +313,18 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so_data (so, fui(v[3])); break; case 3: - so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 4); + so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3); so_data (so, fui(v[0])); so_data (so, fui(v[1])); so_data (so, fui(v[2])); break; case 2: - so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 4); + so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2); so_data (so, fui(v[0])); so_data (so, fui(v[1])); break; case 1: - so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 4); + so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1); so_data (so, fui(v[0])); break; default: diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index d7a2c8c462c..93c2152edce 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ r300_chipset.c \ r300_clear.c \ r300_context.c \ + r300_debug.c \ r300_emit.c \ r300_flush.c \ r300_fs.c \ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index da67bc29b89..9cc455135db 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -22,6 +22,9 @@ #include "r300_context.h" +#include "r300_flush.h" +#include "r300_state_invariant.h" + static boolean r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, @@ -146,6 +149,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.winsys = (struct pipe_winsys*)r300_winsys; r300->context.screen = r300_screen(screen); + r300_init_debug(r300); + r300->context.destroy = r300_destroy_context; r300->context.clear = r300_clear; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index f78492d4aa9..6c5914baa35 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -211,10 +211,7 @@ struct r300_vertex_format { int fs_tab[16]; }; -static struct pipe_viewport_state r300_viewport_identity = { - .scale = {1.0, 1.0, 1.0, 1.0}, - .translate = {0.0, 0.0, 0.0, 0.0}, -}; +extern struct pipe_viewport_state r300_viewport_identity; struct r300_context { /* Parent class */ @@ -275,6 +272,9 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + + /** Combination of DBG_xxx flags */ + unsigned debug; }; /* Convenience cast wrapper. */ @@ -288,4 +288,40 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); +/* Debug functionality. */ + +/** + * Debug flags to disable/enable certain groups of debugging outputs. + * + * \note These may be rather coarse, and the grouping may be impractical. + * If you find, while debugging the driver, that a different grouping + * of these flags would be beneficial, just feel free to change them + * but make sure to update the documentation in r300_debug.c to reflect + * those changes. + */ +/*@{*/ +#define DBG_HELP 0x0000001 +#define DBG_FP 0x0000002 +#define DBG_VP 0x0000004 +#define DBG_CS 0x0000008 +#define DBG_DRAW 0x0000010 +/*@}*/ + +static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) +{ + return (ctx->debug & flags) ? true : false; +} + +static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) +{ + if (DBG_ON(ctx, flags)) { + va_list va; + va_start(va, fmt); + debug_vprintf(fmt, va); + va_end(va); + } +} + +void r300_init_debug(struct r300_context * ctx); + #endif /* R300_CONTEXT_H */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 71b142c0dbf..0a7e4703636 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -49,7 +49,8 @@ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) #define CS_LOCALS(context) \ - struct r300_winsys* cs_winsys = context->winsys; \ + struct r300_context* const cs_context_copy = (context); \ + struct r300_winsys* cs_winsys = cs_context_copy->winsys; \ int cs_count = 0; #define CHECK_CS(size) \ @@ -58,7 +59,7 @@ #define BEGIN_CS(size) do { \ CHECK_CS(size); \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ + DBG(cs_context_copy, DBG_CS, "r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ size, __FUNCTION__, __FILE__, __LINE__); \ } \ cs_winsys->begin_cs(cs_winsys, (size), \ @@ -78,7 +79,7 @@ #define OUT_CS_REG(register, value) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing 0x%08X to register 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \ value, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, 0)); \ @@ -89,14 +90,14 @@ * not the actual packet0 count! */ #define OUT_CS_REG_SEQ(register, count) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing register sequence of %d to 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, ((count) - 1))); \ } while (0) #define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for buffer %p, offset %d, " \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ bo, offset, rd, wd, flags); \ assert(bo); \ @@ -107,7 +108,7 @@ #define END_CS do { \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ + DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ if (cs_count != 0) \ @@ -117,7 +118,7 @@ #define FLUSH_CS do { \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ + DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ cs_winsys->flush_cs(cs_winsys); \ @@ -127,7 +128,7 @@ #define OUT_CS_ONE_REG(register, count) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ @@ -141,7 +142,7 @@ } while (0) #define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for index buffer %p," \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \ "offset %d\n", bo, offset); \ assert(bo); \ OUT_CS(offset); \ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c new file mode 100644 index 00000000000..15308dda1de --- /dev/null +++ b/src/gallium/drivers/r300/r300_debug.c @@ -0,0 +1,88 @@ +/* + * Copyright 2009 Nicolai Haehnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_context.h" + +#include <ctype.h> + + +struct debug_option { + const char * name; + unsigned flag; + const char * description; +}; + +static struct debug_option debug_options[] = { + { "help", DBG_HELP, "Helpful meta-information about the driver" }, + { "fp", DBG_FP, "Fragment program handling" }, + { "vp", DBG_VP, "Vertex program handling" }, + { "cs", DBG_CS, "Command submissions" }, + { "draw", DBG_DRAW, "Draw and emit" }, + + { "all", ~0, "Convenience option that enables all debug flags" }, + + /* must be last */ + { 0, 0, 0 } +}; + +void r300_init_debug(struct r300_context * ctx) +{ + const char * options = debug_get_option("RADEON_DEBUG", 0); + boolean printhint = false; + + if (options) { + while(*options) { + if (*options == ' ' || *options == ',') { + options++; + continue; + } + + size_t length = strcspn(options, " ,"); + struct debug_option * opt; + + for(opt = debug_options; opt->name; ++opt) { + if (!strncmp(options, opt->name, length)) { + ctx->debug |= opt->flag; + break; + } + } + + if (!opt->name) { + debug_printf("Unknown debug option: %s\n", options); + printhint = true; + } + + options += length; + } + + if (!ctx->debug) + printhint = true; + } + + if (printhint || ctx->debug & DBG_HELP) { + debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" + "to a comma-separated list of debug options. Available options are:\n"); + for(struct debug_option * opt = debug_options; opt->name; ++opt) { + debug_printf(" %s: %s\n", opt->name, opt->description); + } + } +} diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index bd4d59e6f1a..1bc35c24867 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "r300_emit.h" #include "r300_fs.h" +#include "r300_state_derived.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, @@ -490,7 +491,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300) { CS_LOCALS(r300); - debug_printf("r300: Preparing vertex buffer %p for render, " + DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, r300->vertex_info.vinfo.size); /* Set the pointer to our vertex buffer. The emitted values are this: diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 350691d592d..c4002b8e5d0 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -56,6 +56,11 @@ void r500_emit_fragment_program_code(struct r300_context* r300, void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb); +void r300_emit_query_begin(struct r300_context* r300, + struct r300_query* query); +void r300_emit_query_end(struct r300_context* r300, + struct r300_query* query); + void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); void r300_emit_rs_block_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 36463b9a2eb..a0e848a59ac 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -96,7 +96,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = 1; + compiler.Base.Debug = DBG_ON(r300, DBG_FP); compiler.code = &fs->code; compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 1d5185b417e..2880d34877f 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -22,6 +22,8 @@ #include "r300_query.h" +#include "r300_emit.h" + static struct pipe_query* r300_create_query(struct pipe_context* pipe, unsigned query_type) { diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index cd458d019ae..d05a736dd96 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,6 +26,7 @@ #include "r300_cs.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_reg.h" #include "r300_state_derived.h" @@ -34,7 +35,7 @@ struct r300_render { /* Parent class */ struct vbuf_render base; - + /* Pipe context */ struct r300_context* r300; @@ -77,7 +78,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, if (r300render->vbo && (size > r300render->vbo_alloc_size)) { pipe_buffer_reference(&r300render->vbo, NULL); } - + if (!r300render->vbo) { r300render->vbo = pipe_buffer_create(screen, 64, @@ -184,7 +185,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, prepare_render(r300render, count); - debug_printf("r300: Doing vbuf render, count %d\n", count); + DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); BEGIN_CS(2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); @@ -233,7 +234,8 @@ static void r300_render_draw(struct vbuf_render* render, OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; */ - BEGIN_CS(2 + (count+1)/2); + BEGIN_CS(4 + (count+1)/2); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index c16cadd0407..88cb9af6fb7 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -20,10 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_debug.h" #include "util/u_math.h" #include "util/u_pack_color.h" -#include "util/u_debug.h" +#include "tgsi/tgsi_parse.h" #include "pipe/p_config.h" #include "pipe/internal/p_winsys_screen.h" @@ -429,6 +430,9 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->rs_state = rs; r300->dirty_state |= R300_NEW_RASTERIZER; + r300->dirty_state |= R300_NEW_RS_BLOCK; + r300->dirty_state |= R300_NEW_SCISSOR; + r300->dirty_state |= R300_NEW_VIEWPORT; } /* Free rasterizer state. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c01e61a9b19..5f6b225d340 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -23,6 +23,7 @@ #include "r300_state_derived.h" #include "r300_fs.h" +#include "r300_state_inlines.h" #include "r300_vs.h" /* r300_state_derived: Various bits of state which are dependent upon @@ -195,13 +196,13 @@ static void r300_vertex_psc(struct r300_context* r300, * and not on attrib information. */ if (r300screen->caps->has_tcl) { attrib_count = r300->vs->info.num_inputs; - debug_printf("r300: routing %d attribs in psc for vs\n", + DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n", attrib_count); } else { attrib_count = vinfo->num_attribs; - debug_printf("r300: attrib count: %d\n", attrib_count); + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); for (i = 0; i < attrib_count; i++) { - debug_printf("r300: attrib: offset %d, interp %d, size %d," + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," " tab %d\n", vinfo->attrib[i].src_index, vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, tab[i]); @@ -299,18 +300,18 @@ static void r300_update_fs_tab(struct r300_context* r300) } /* Now that we know where everything is... */ - debug_printf("r300: fp input count: %d\n", info->num_inputs); + DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs); for (i = 0; i < info->num_inputs; i++) { switch (tab[i]) { case INTERP_LINEAR: - debug_printf("r300: attrib: " + DBG(r300, DBG_DRAW, "r300: attrib: " "stack offset %d, color, tab %d\n", i, cols_emitted); tab[i] = cols_emitted; cols_emitted++; break; case INTERP_PERSPECTIVE: - debug_printf("r300: attrib: " + DBG(r300, DBG_DRAW, "r300: attrib: " "stack offset %d, texcoord, tab %d\n", i, cols + texs); tab[i] = cols + texs; diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h index 63ae8eb8d08..71a4a47b003 100644 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ b/src/gallium/drivers/r300/r300_state_derived.h @@ -23,11 +23,7 @@ #ifndef R300_STATE_DERIVED_H #define R300_STATE_DERIVED_H -#include "draw/draw_vertex.h" - -#include "r300_context.h" -#include "r300_reg.h" -#include "r300_state_inlines.h" +struct r300_context; void r300_update_derived_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 1e92374a4e9..3865730d635 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -23,6 +23,12 @@ #include "r300_state_invariant.h" + +struct pipe_viewport_state r300_viewport_identity = { + .scale = {1.0, 1.0, 1.0, 1.0}, + .translate = {0.0, 0.0, 0.0, 0.0}, +}; + /* Calculate and emit invariant state. This is data that the 3D engine * will probably want at the beginning of every CS, but it's not currently * handled by any CSO setup, and in addition it doesn't really change much. @@ -38,7 +44,9 @@ void r300_emit_invariant_state(struct r300_context* r300) /*** Graphics Backend (GB) ***/ /* Various GB enables */ - OUT_CS_REG(R300_GB_ENABLE, 0x0); + OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | + R300_GB_LINE_STUFF_ENABLE | + R300_GB_TRIANGLE_STUFF_ENABLE); /* Subpixel multisampling for AA * These are commented out because glisse's CS checker doesn't like them. * I presume these will be re-enabled later. diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h index d01f0b143f5..f9e98b2ec9c 100644 --- a/src/gallium/drivers/r300/r300_surface.h +++ b/src/gallium/drivers/r300/r300_surface.h @@ -73,9 +73,9 @@ static struct r300_rs_state rs_clear_state = { }; static struct r300_rs_block r3xx_rs_block_clear_state = { - .ip[0] = R500_RS_SEL_S(R300_RS_SEL_K0) | - R500_RS_SEL_T(R300_RS_SEL_K0) | - R500_RS_SEL_R(R300_RS_SEL_K0) | + .ip[0] = R500_RS_SEL_S(R300_RS_SEL_C0) | + R500_RS_SEL_T(R300_RS_SEL_C0) | + R500_RS_SEL_R(R300_RS_SEL_C0) | R500_RS_SEL_Q(R300_RS_SEL_K1), .inst[0] = R300_RS_INST_COL_CN_WRITE, .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN, diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 3adbb715f37..d68a1041063 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -53,7 +53,6 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_SUB: return OPCODE_SUB; case TGSI_OPCODE_LRP: return OPCODE_LRP; /* case TGSI_OPCODE_CND: return OPCODE_CND; */ - /* case TGSI_OPCODE_CND0: return OPCODE_CND0; */ case TGSI_OPCODE_DP2A: return OPCODE_DP2A; /* gap */ case TGSI_OPCODE_FRC: return OPCODE_FRC; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 2cb903bba2f..12a6e37be62 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -116,7 +116,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Setup the compiler */ rc_init(&compiler.Base); - compiler.Base.Debug = 1; + compiler.Base.Debug = DBG_ON(r300, DBG_VP); compiler.code = &vs->code; compiler.UserData = vs; diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index fa59277438c..d3af18e162b 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -85,5 +85,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, /* non-cached surface */ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); #endif - } + } + + softpipe->dirty_render_cache = TRUE; } diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 4ab718f233d..ae0af4d0557 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1277,8 +1277,10 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; - tr_ctx->base.surface_copy = trace_context_surface_copy; - tr_ctx->base.surface_fill = trace_context_surface_fill; + if (pipe->surface_copy) + tr_ctx->base.surface_copy = trace_context_surface_copy; + if (pipe->surface_fill) + tr_ctx->base.surface_fill = trace_context_surface_fill; tr_ctx->base.clear = trace_context_clear; tr_ctx->base.flush = trace_context_flush; tr_ctx->base.is_texture_referenced = trace_is_texture_referenced; diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index e6a67f8c2fd..c13cffceb0a 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -93,9 +93,11 @@ typedef int _Bool; #endif +#ifndef __HAIKU__ typedef unsigned int uint; -typedef unsigned char ubyte; typedef unsigned short ushort; +#endif +typedef unsigned char ubyte; #if 0 #define boolean bool diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 4152d6ac36a..de99957d9d0 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -140,6 +140,9 @@ #define PIPE_OS_WINDOWS #endif +#if defined(__HAIKU__) +#define PIPE_OS_HAIKU +#endif /* * Subsystem. diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index f0ba4fb308c..5fa6c9af30b 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -184,7 +184,7 @@ union tgsi_immediate_data #define TGSI_OPCODE_SUB 17 #define TGSI_OPCODE_LRP 18 #define TGSI_OPCODE_CND 19 -#define TGSI_OPCODE_CND0 20 + /* gap */ #define TGSI_OPCODE_DP2A 21 /* gap */ #define TGSI_OPCODE_FRC 24 diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index 96e8e087447..b1606dc6526 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -39,7 +39,7 @@ #include "util/u_debug.h" /* for assert */ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) #include <pthread.h> /* POSIX threads headers */ #include <stdio.h> /* for perror() */ @@ -213,7 +213,7 @@ typedef unsigned pipe_condvar; */ typedef struct { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) pthread_key_t key; #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) DWORD key; @@ -228,7 +228,7 @@ typedef struct { static INLINE void pipe_tsd_init(pipe_tsd *tsd) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) if (pthread_key_create(&tsd->key, NULL/*free*/) != 0) { perror("pthread_key_create(): failed to allocate key for thread specific data"); exit(-1); @@ -245,7 +245,7 @@ pipe_tsd_get(pipe_tsd *tsd) if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { pipe_tsd_init(tsd); } -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) return pthread_getspecific(tsd->key); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) assert(0); @@ -262,7 +262,7 @@ pipe_tsd_set(pipe_tsd *tsd, void *value) if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { pipe_tsd_init(tsd); } -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) if (pthread_setspecific(tsd->key, value) != 0) { perror("pthread_set_specific() failed"); exit(-1); diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index bcfd1c06fec..5cec9e329d9 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -189,12 +189,10 @@ dri_get_buffers(__DRIdrawablePrivate * dPriv) format = drawable->color_format; break; case __DRI_BUFFER_DEPTH: - index = ST_SURFACE_DEPTH; - format = drawable->depth_format; - break; + case __DRI_BUFFER_DEPTH_STENCIL: case __DRI_BUFFER_STENCIL: index = ST_SURFACE_DEPTH; - format = drawable->stencil_format; + format = drawable->depth_stencil_format; break; case __DRI_BUFFER_ACCUM: default: @@ -215,6 +213,18 @@ dri_get_buffers(__DRIdrawablePrivate * dPriv) dri_drawable->w, dri_drawable->h, buffers[i].pitch); + switch (buffers[i].attachment) { + case __DRI_BUFFER_FAKE_FRONT_LEFT: + case __DRI_BUFFER_BACK_LEFT: + drawable->color_format = surface->format; + break; + case __DRI_BUFFER_DEPTH: + case __DRI_BUFFER_DEPTH_STENCIL: + case __DRI_BUFFER_STENCIL: + drawable->depth_stencil_format = surface->format; + break; + } + st_set_framebuffer_surface(drawable->stfb, index, surface); pipe_surface_reference(&surface, NULL); } @@ -241,9 +251,7 @@ void dri2_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, st_get_framebuffer_surface(drawable->stfb, ST_SURFACE_FRONT_LEFT, &ps); st_bind_texture_surface(ps, target == GL_TEXTURE_2D ? ST_TEXTURE_2D : - ST_TEXTURE_RECT, 0, - format == GLX_TEXTURE_FORMAT_RGBA_EXT ? - PIPE_FORMAT_R8G8B8A8_UNORM : PIPE_FORMAT_R8G8B8X8_UNORM); + ST_TEXTURE_RECT, 0, drawable->color_format); } void dri2_set_tex_buffer(__DRIcontext *pDRICtx, GLint target, @@ -311,43 +319,31 @@ dri_create_buffer(__DRIscreenPrivate * sPriv, switch(visual->depthBits) { default: case 0: - drawable->depth_format = PIPE_FORMAT_NONE; + drawable->depth_stencil_format = PIPE_FORMAT_NONE; break; case 16: - drawable->depth_format = PIPE_FORMAT_Z16_UNORM; + drawable->depth_stencil_format = PIPE_FORMAT_Z16_UNORM; break; case 24: if (visual->stencilBits == 0) { - drawable->depth_format = (screen->d_depth_bits_last) ? + drawable->depth_stencil_format = (screen->d_depth_bits_last) ? PIPE_FORMAT_X8Z24_UNORM: PIPE_FORMAT_Z24X8_UNORM; } else { - drawable->depth_format = (screen->sd_depth_bits_last) ? + drawable->depth_stencil_format = (screen->sd_depth_bits_last) ? PIPE_FORMAT_S8Z24_UNORM: PIPE_FORMAT_Z24S8_UNORM; } break; case 32: - drawable->depth_format = PIPE_FORMAT_Z32_UNORM; - break; - } - - switch(visual->stencilBits) { - default: - case 0: - drawable->stencil_format = PIPE_FORMAT_NONE; - break; - case 8: - drawable->stencil_format = (screen->sd_depth_bits_last) ? - PIPE_FORMAT_S8Z24_UNORM: - PIPE_FORMAT_Z24S8_UNORM; + drawable->depth_stencil_format = PIPE_FORMAT_Z32_UNORM; break; } drawable->stfb = st_create_framebuffer(visual, drawable->color_format, - drawable->depth_format, - drawable->stencil_format, + drawable->depth_stencil_format, + drawable->depth_stencil_format, dPriv->w, dPriv->h, (void *)drawable); if (drawable->stfb == NULL) @@ -366,9 +362,11 @@ dri_create_buffer(__DRIscreenPrivate * sPriv, drawable->attachments[i++] = __DRI_BUFFER_BACK_LEFT; else drawable->attachments[i++] = __DRI_BUFFER_FAKE_FRONT_LEFT; - if (visual->depthBits) + if (visual->depthBits && visual->stencilBits) + drawable->attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; + else if (visual->depthBits) drawable->attachments[i++] = __DRI_BUFFER_DEPTH; - if (visual->stencilBits) + else if (visual->stencilBits) drawable->attachments[i++] = __DRI_BUFFER_STENCIL; drawable->num_attachments = i; diff --git a/src/gallium/state_trackers/dri/dri_drawable.h b/src/gallium/state_trackers/dri/dri_drawable.h index 9f9cb29b974..b910930db42 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.h +++ b/src/gallium/state_trackers/dri/dri_drawable.h @@ -63,8 +63,7 @@ struct dri_drawable unsigned int cur_fences; enum pipe_format color_format; - enum pipe_format depth_format; - enum pipe_format stencil_format; + enum pipe_format depth_stencil_format; }; static INLINE struct dri_drawable * diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index 651a9a9fea7..957002ddd55 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -668,7 +668,7 @@ XMesaVisual XMesaCreateVisual( Display *display, * at a later time. */ v->visinfo = (XVisualInfo *) MALLOC(sizeof(*visinfo)); - if(!v->visinfo) { + if (!v->visinfo) { _mesa_free(v); return NULL; } @@ -755,7 +755,7 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) { static GLboolean firstTime = GL_TRUE; static struct pipe_screen *screen = NULL; - struct pipe_context *pipe; + struct pipe_context *pipe = NULL; XMesaContext c; GLcontext *mesaCtx; uint pf; @@ -781,8 +781,7 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (screen == NULL) goto fail; - pipe = driver.create_pipe_context( screen, - (void *)c ); + pipe = driver.create_pipe_context(screen, (void *) c); if (pipe == NULL) goto fail; @@ -795,23 +794,15 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) mesaCtx = c->st->ctx; c->st->ctx->DriverCtx = c; -#if 00 - _mesa_enable_sw_extensions(mesaCtx); - _mesa_enable_1_3_extensions(mesaCtx); - _mesa_enable_1_4_extensions(mesaCtx); - _mesa_enable_1_5_extensions(mesaCtx); - _mesa_enable_2_0_extensions(mesaCtx); -#endif - return c; - fail: +fail: if (c->st) st_destroy_context(c->st); else if (pipe) pipe->destroy(pipe); - FREE(c); + _mesa_free(c); return NULL; } @@ -1165,7 +1156,7 @@ void XMesaFlush( XMesaContext c ) XMesaBuffer XMesaFindBuffer( Display *dpy, Drawable d ) { XMesaBuffer b; - for (b=XMesaBufferList; b; b=b->Next) { + for (b = XMesaBufferList; b; b = b->Next) { if (b->drawable == d && b->xm_visual->display == dpy) { return b; } diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index 86402a0d130..15c955450d0 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -4,6 +4,7 @@ #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" +#include "util/u_math.h" #include "pipe/p_inlines.h" @@ -40,6 +41,40 @@ static const struct xorg_composite_blend xorg_blends[] = { PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_INV_SRC_ALPHA }, }; + +static INLINE void +pixel_to_float4(Pixel pixel, float *color) +{ + CARD32 r, g, b, a; + + a = (pixel >> 24) & 0xff; + r = (pixel >> 16) & 0xff; + g = (pixel >> 8) & 0xff; + b = (pixel >> 0) & 0xff; + color[0] = ((float)r) / 255.; + color[1] = ((float)g) / 255.; + color[2] = ((float)b) / 255.; + color[3] = ((float)a) / 255.; +} + +static INLINE void +render_pixel_to_float4(PictFormatPtr format, + CARD32 pixel, float *color) +{ + CARD32 r, g, b, a; + + debug_assert(format->type == PictTypeDirect); + + r = (pixel >> format->direct.red) & format->direct.redMask; + g = (pixel >> format->direct.green) & format->direct.greenMask; + b = (pixel >> format->direct.blue) & format->direct.blueMask; + a = (pixel >> format->direct.alpha) & format->direct.alphaMask; + color[0] = ((float)r) / ((float)format->direct.redMask); + color[1] = ((float)g) / ((float)format->direct.greenMask); + color[2] = ((float)b) / ((float)format->direct.blueMask); + color[3] = ((float)a) / ((float)format->direct.alphaMask); +} + struct acceleration_info { int op : 16; int with_mask : 1; @@ -76,80 +111,195 @@ blend_for_op(int op) return xorg_blends[BLEND_OP_OVER]; } - -static struct pipe_buffer * -setup_vertex_data_tex(struct exa_context *ctx, - float x0, float y0, float x1, float y1, - float x2, float y2, float x3, float y3, - float s0, float t0, float s1, float t1, - float z) +static INLINE int +render_repeat_to_gallium(int mode) { - ctx->vertices[0][0][0] = x0; - ctx->vertices[0][0][1] = y0; - ctx->vertices[0][0][2] = z; - ctx->vertices[0][1][0] = s0; /*s*/ - ctx->vertices[0][1][1] = t0; /*t*/ + switch(mode) { + case RepeatNone: + return PIPE_TEX_WRAP_CLAMP; + case RepeatNormal: + return PIPE_TEX_WRAP_REPEAT; + case RepeatReflect: + return PIPE_TEX_WRAP_MIRROR_REPEAT; + case RepeatPad: + return PIPE_TEX_WRAP_CLAMP_TO_EDGE; + default: + debug_printf("Unsupported repeat mode\n"); + } + return PIPE_TEX_WRAP_REPEAT; +} - ctx->vertices[1][0][0] = x1; - ctx->vertices[1][0][1] = y1; - ctx->vertices[1][0][2] = z; - ctx->vertices[1][1][0] = s1; /*s*/ - ctx->vertices[1][1][1] = t0; /*t*/ - ctx->vertices[2][0][0] = x2; - ctx->vertices[2][0][1] = y2; - ctx->vertices[2][0][2] = z; - ctx->vertices[2][1][0] = s1; - ctx->vertices[2][1][1] = t1; +static INLINE void +setup_vertex0(float vertex[2][4], float x, float y, + float color[4]) +{ + vertex[0][0] = x; + vertex[0][1] = y; + vertex[0][2] = 0.f; /*z*/ + vertex[0][3] = 1.f; /*w*/ + + vertex[1][0] = color[0]; /*r*/ + vertex[1][1] = color[1]; /*g*/ + vertex[1][2] = color[2]; /*b*/ + vertex[1][3] = color[3]; /*a*/ +} - ctx->vertices[3][0][0] = x3; - ctx->vertices[3][0][1] = y3; - ctx->vertices[3][0][2] = z; - ctx->vertices[3][1][0] = s0; - ctx->vertices[3][1][1] = t1; +static struct pipe_buffer * +setup_vertex_data0(struct exa_context *ctx, + int srcX, int srcY, int maskX, int maskY, + int dstX, int dstY, int width, int height) +{ + float vertices[4][2][4]; + + /* 1st vertex */ + setup_vertex0(vertices[0], dstX, dstY, + ctx->solid_color); + /* 2nd vertex */ + setup_vertex0(vertices[1], dstX + width, dstY, + ctx->solid_color); + /* 3rd vertex */ + setup_vertex0(vertices[2], dstX + width, dstY + height, + ctx->solid_color); + /* 4th vertex */ + setup_vertex0(vertices[3], dstX, dstY + height, + ctx->solid_color); + + return pipe_user_buffer_create(ctx->pipe->screen, + vertices, + sizeof(vertices)); +} - return pipe_user_buffer_create(ctx->ctx->screen, - ctx->vertices, - sizeof(ctx->vertices)); +static INLINE void +setup_vertex1(float vertex[2][4], float x, float y, float s, float t) +{ + vertex[0][0] = x; + vertex[0][1] = y; + vertex[0][2] = 0.f; /*z*/ + vertex[0][3] = 1.f; /*w*/ + + vertex[1][0] = s; /*s*/ + vertex[1][1] = t; /*t*/ + vertex[1][2] = 0.f; /*r*/ + vertex[1][3] = 1.f; /*q*/ } -static void -draw_texture(struct exa_context *exa, - struct pipe_texture *tex, - float x1offset, float y1offset, - float x2offset, float y2offset, - float x1, float y1, - float x2, float y2, - float x3, float y3, - float x4, float y4) -{ - struct pipe_context *pipe = exa->ctx; - struct pipe_buffer *buf; +static struct pipe_buffer * +setup_vertex_data1(struct exa_context *ctx, + int srcX, int srcY, int maskX, int maskY, + int dstX, int dstY, int width, int height) +{ + float vertices[4][2][4]; float s0, t0, s1, t1; + struct pipe_texture *src = ctx->bound_textures[0]; + + s0 = srcX / src->width[0]; + s1 = srcX + width / src->width[0]; + t0 = srcY / src->height[0]; + t1 = srcY + height / src->height[0]; + + /* 1st vertex */ + setup_vertex1(vertices[0], dstX, dstY, + s0, t0); + /* 2nd vertex */ + setup_vertex1(vertices[1], dstX + width, dstY, + s1, t0); + /* 3rd vertex */ + setup_vertex1(vertices[2], dstX + width, dstY + height, + s1, t1); + /* 4th vertex */ + setup_vertex1(vertices[3], dstX, dstY + height, + s0, t1); + + return pipe_user_buffer_create(ctx->pipe->screen, + vertices, + sizeof(vertices)); +} - assert(tex->width[0] != 0); - assert(tex->height[0] != 0); +static struct pipe_buffer * +setup_vertex_data_tex(struct exa_context *ctx, + float x0, float y0, float x1, float y1, + float s0, float t0, float s1, float t1, + float z) +{ + float vertices[4][2][4]; + + /* 1st vertex */ + setup_vertex1(vertices[0], x0, y0, + s0, t0); + /* 2nd vertex */ + setup_vertex1(vertices[1], x1, y0, + s1, t0); + /* 3rd vertex */ + setup_vertex1(vertices[2], x1, y1, + s1, t1); + /* 4th vertex */ + setup_vertex1(vertices[3], x0, y1, + s0, t1); + + return pipe_user_buffer_create(ctx->pipe->screen, + vertices, + sizeof(vertices)); +} - s0 = x1offset / tex->width[0]; - s1 = x2offset / tex->width[0]; - t0 = y1offset / tex->height[0]; - t1 = y2offset / tex->height[0]; - /* draw quad */ - buf = setup_vertex_data_tex(exa, x1, y1, x2, y2, x3, y3, x4, y4, - s0, t0, s1, t1, 0.0f); - if (buf) { - util_draw_vertex_buffer(pipe, buf, 0, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ - - pipe_buffer_reference(&buf, - NULL); - } +static INLINE void +setup_vertex2(float vertex[3][4], float x, float y, + float s0, float t0, float s1, float t1) +{ + vertex[0][0] = x; + vertex[0][1] = y; + vertex[0][2] = 0.f; /*z*/ + vertex[0][3] = 1.f; /*w*/ + + vertex[1][0] = s0; /*s*/ + vertex[1][1] = t0; /*t*/ + vertex[1][2] = 0.f; /*r*/ + vertex[1][3] = 1.f; /*q*/ + + vertex[2][0] = s1; /*s*/ + vertex[2][1] = t1; /*t*/ + vertex[2][2] = 0.f; /*r*/ + vertex[2][3] = 1.f; /*q*/ +} - cso_restore_vertex_shader(exa->cso); +static struct pipe_buffer * +setup_vertex_data2(struct exa_context *ctx, + int srcX, int srcY, int maskX, int maskY, + int dstX, int dstY, int width, int height) +{ + float vertices[4][3][4]; + float st0[4], st1[4]; + struct pipe_texture *src = ctx->bound_textures[0]; + struct pipe_texture *mask = ctx->bound_textures[0]; + + st0[0] = srcX / src->width[0]; + st0[1] = srcY / src->height[0]; + st0[2] = srcX + width / src->width[0]; + st0[3] = srcY + height / src->height[0]; + + st1[0] = maskX / mask->width[0]; + st1[1] = maskY / mask->height[0]; + st1[2] = maskX + width / mask->width[0]; + st1[3] = maskY + height / mask->height[0]; + + /* 1st vertex */ + setup_vertex2(vertices[0], dstX, dstY, + st0[0], st0[1], st1[0], st1[1]); + /* 2nd vertex */ + setup_vertex2(vertices[1], dstX + width, dstY, + st0[2], st0[1], st1[2], st1[1]); + /* 3rd vertex */ + setup_vertex2(vertices[2], dstX + width, dstY + height, + st0[2], st0[3], st1[2], st1[3]); + /* 4th vertex */ + setup_vertex2(vertices[3], dstX, dstY + height, + st0[0], st0[3], st1[0], st1[3]); + + return pipe_user_buffer_create(ctx->pipe->screen, + vertices, + sizeof(vertices)); } boolean xorg_composite_accelerated(int op, @@ -185,16 +335,20 @@ boolean xorg_composite_accelerated(int op, } static void -bind_framebuffer_state(struct exa_context *exa, PicturePtr pDstPicture, - struct exa_pixmap_priv *pDst) +bind_clip_state(struct exa_context *exa) +{ +} + +static void +bind_framebuffer_state(struct exa_context *exa, struct exa_pixmap_priv *pDst) { unsigned i; struct pipe_framebuffer_state state; struct pipe_surface *surface = exa_gpu_surface(exa, pDst); memset(&state, 0, sizeof(struct pipe_framebuffer_state)); - state.width = pDstPicture->pDrawable->width; - state.height = pDstPicture->pDrawable->height; + state.width = pDst->tex->width[0]; + state.height = pDst->tex->height[0]; state.nr_cbufs = 1; state.cbufs[0] = surface; @@ -232,19 +386,22 @@ set_viewport(struct exa_context *exa, int width, int height, } static void -bind_viewport_state(struct exa_context *exa, PicturePtr pDstPicture) +bind_viewport_state(struct exa_context *exa, struct exa_pixmap_priv *pDst) { - int width = pDstPicture->pDrawable->width; - int height = pDstPicture->pDrawable->height; + int width = pDst->tex->width[0]; + int height = pDst->tex->height[0]; + + debug_printf("Bind viewport (%d, %d)\n", width, height); - set_viewport(exa, width, height, Y0_BOTTOM); + set_viewport(exa, width, height, Y0_TOP); } static void bind_blend_state(struct exa_context *exa, int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture) { - boolean component_alpha = pSrcPicture->componentAlpha; + boolean component_alpha = (pSrcPicture) ? + pSrcPicture->componentAlpha : FALSE; struct xorg_composite_blend blend_opt; struct pipe_blend_state blend; @@ -284,9 +441,24 @@ bind_shaders(struct exa_context *exa, int op, unsigned vs_traits = 0, fs_traits = 0; struct xorg_shader shader; + exa->has_solid_color = FALSE; + if (pSrcPicture) { - vs_traits |= VS_COMPOSITE; - fs_traits |= FS_COMPOSITE; + if (pSrcPicture->pSourcePict) { + if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { + fs_traits |= FS_SOLID_FILL; + vs_traits |= VS_SOLID_FILL; + render_pixel_to_float4(pSrcPicture->pFormat, + pSrcPicture->pSourcePict->solidFill.color, + exa->solid_color); + exa->has_solid_color = TRUE; + } else { + debug_assert("!gradients not supported"); + } + } else { + fs_traits |= FS_COMPOSITE; + vs_traits |= VS_COMPOSITE; + } } if (pMaskPicture) { @@ -309,35 +481,43 @@ bind_samplers(struct exa_context *exa, int op, struct exa_pixmap_priv *pDst) { struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; - struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; struct pipe_sampler_state src_sampler, mask_sampler; + exa->num_bound_samplers = 0; + memset(&src_sampler, 0, sizeof(struct pipe_sampler_state)); memset(&mask_sampler, 0, sizeof(struct pipe_sampler_state)); if (pSrcPicture && pSrc) { - src_sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - src_sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + unsigned src_wrap = render_repeat_to_gallium( + pSrcPicture->repeatType); + src_sampler.wrap_s = src_wrap; + src_sampler.wrap_t = src_wrap; src_sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST; src_sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST; src_sampler.normalized_coords = 1; samplers[0] = &src_sampler; - textures[0] = pSrc->tex; + exa->bound_textures[0] = pSrc->tex; + ++exa->num_bound_samplers; } if (pMaskPicture && pMask) { - mask_sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - mask_sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + unsigned mask_wrap = render_repeat_to_gallium( + pMaskPicture->repeatType); + mask_sampler.wrap_s = mask_wrap; + mask_sampler.wrap_t = mask_wrap; mask_sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST; mask_sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST; mask_sampler.normalized_coords = 1; samplers[1] = &mask_sampler; - textures[1] = pMask->tex; + exa->bound_textures[1] = pMask->tex; + ++exa->num_bound_samplers; } - cso_set_samplers(exa->cso, 3, + cso_set_samplers(exa->cso, exa->num_bound_samplers, (const struct pipe_sampler_state **)samplers); - cso_set_sampler_textures(exa->cso, 3, textures); + cso_set_sampler_textures(exa->cso, exa->num_bound_samplers, + exa->bound_textures); } static void @@ -352,15 +532,15 @@ setup_vs_constant_buffer(struct exa_context *exa, struct pipe_constant_buffer *cbuf = &exa->vs_const_buffer; pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16, + cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); if (cbuf->buffer) { - pipe_buffer_write(exa->ctx->screen, cbuf->buffer, + pipe_buffer_write(exa->pipe->screen, cbuf->buffer, 0, param_bytes, vs_consts); } - exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_VERTEX, 0, cbuf); + exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_VERTEX, 0, cbuf); } @@ -374,22 +554,22 @@ setup_fs_constant_buffer(struct exa_context *exa) struct pipe_constant_buffer *cbuf = &exa->fs_const_buffer; pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16, + cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); if (cbuf->buffer) { - pipe_buffer_write(exa->ctx->screen, cbuf->buffer, + pipe_buffer_write(exa->pipe->screen, cbuf->buffer, 0, param_bytes, fs_consts); } - exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_FRAGMENT, 0, cbuf); + exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); } static void -setup_constant_buffers(struct exa_context *exa, PicturePtr pDstPicture) +setup_constant_buffers(struct exa_context *exa, struct exa_pixmap_priv *pDst) { - int width = pDstPicture->pDrawable->width; - int height = pDstPicture->pDrawable->height; + int width = pDst->tex->width[0]; + int height = pDst->tex->height[0]; setup_vs_constant_buffer(exa, width, height); setup_fs_constant_buffer(exa); @@ -404,15 +584,15 @@ boolean xorg_composite_bind_state(struct exa_context *exa, struct exa_pixmap_priv *pMask, struct exa_pixmap_priv *pDst) { - bind_framebuffer_state(exa, pDstPicture, pDst); - bind_viewport_state(exa, pDstPicture); + bind_framebuffer_state(exa, pDst); + bind_viewport_state(exa, pDst); bind_blend_state(exa, op, pSrcPicture, pMaskPicture); bind_rasterizer_state(exa); bind_shaders(exa, op, pSrcPicture, pMaskPicture); - bind_samplers(exa, op, pSrcPicture, pMaskPicture, pDstPicture, - pSrc, pMask, pDst); - - setup_constant_buffers(exa, pDstPicture); + bind_samplers(exa, op, pSrcPicture, pMaskPicture, + pDstPicture, pSrc, pMask, pDst); + bind_clip_state(exa); + setup_constant_buffers(exa, pDst); return FALSE; } @@ -422,5 +602,418 @@ void xorg_composite(struct exa_context *exa, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int width, int height) { + struct pipe_context *pipe = exa->pipe; + struct pipe_buffer *buf = 0; + + if (exa->num_bound_samplers == 0 ) { /* solid fill */ + buf = setup_vertex_data0(exa, + srcX, srcY, maskX, maskY, + dstX, dstY, width, height); + } else if (exa->num_bound_samplers == 1 ) /* src */ + buf = setup_vertex_data1(exa, + srcX, srcY, maskX, maskY, + dstX, dstY, width, height); + else if (exa->num_bound_samplers == 2) /* src + mask */ + buf = setup_vertex_data2(exa, + srcX, srcY, maskX, maskY, + dstX, dstY, width, height); + else if (exa->num_bound_samplers == 3) { /* src + mask + dst */ + debug_assert(!"src/mask/dst not handled right now"); +#if 0 + buf = setup_vertex_data2(exa, + srcX, srcY, maskX, maskY, + dstX, dstY, width, height); +#endif + } + + if (buf) { + int num_attribs = 1; /*pos*/ + num_attribs += exa->num_bound_samplers; + if (exa->has_solid_color) + ++num_attribs; + + util_draw_vertex_buffer(pipe, buf, 0, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + num_attribs); /* attribs/vert */ + + pipe_buffer_reference(&buf, NULL); + } +} + +boolean xorg_solid_bind_state(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + Pixel fg) +{ + unsigned vs_traits, fs_traits; + struct xorg_shader shader; + + pixel_to_float4(fg, exa->solid_color); + exa->has_solid_color = TRUE; + + exa->solid_color[3] = 1.f; + + debug_printf("Color Pixel=(%d, %d, %d, %d), RGBA=(%f, %f, %f, %f)\n", + (fg >> 24) & 0xff, (fg >> 16) & 0xff, + (fg >> 8) & 0xff, (fg >> 0) & 0xff, + exa->solid_color[0], exa->solid_color[1], + exa->solid_color[2], exa->solid_color[3]); + +#if 0 + exa->solid_color[0] = 1.f; + exa->solid_color[1] = 0.f; + exa->solid_color[2] = 0.f; + exa->solid_color[3] = 1.f; +#endif + + vs_traits = VS_SOLID_FILL; + fs_traits = FS_SOLID_FILL; + + bind_framebuffer_state(exa, pixmap); + bind_viewport_state(exa, pixmap); + bind_rasterizer_state(exa); + bind_blend_state(exa, PictOpSrc, NULL, NULL); + setup_constant_buffers(exa, pixmap); + bind_clip_state(exa); + + shader = xorg_shaders_get(exa->shaders, vs_traits, fs_traits); + cso_set_vertex_shader_handle(exa->cso, shader.vs); + cso_set_fragment_shader_handle(exa->cso, shader.fs); + + return FALSE; +} + +void xorg_solid(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + int x0, int y0, int x1, int y1) +{ + struct pipe_context *pipe = exa->pipe; + struct pipe_buffer *buf = 0; + float vertices[4][2][4]; + + x0 = 10; y0 = 10; + x1 = 300; y1 = 300; + + /* 1st vertex */ + setup_vertex0(vertices[0], x0, y0, + exa->solid_color); + /* 2nd vertex */ + setup_vertex0(vertices[1], x1, y0, + exa->solid_color); + /* 3rd vertex */ + setup_vertex0(vertices[2], x1, y1, + exa->solid_color); + /* 4th vertex */ + setup_vertex0(vertices[3], x0, y1, + exa->solid_color); + + buf = pipe_user_buffer_create(exa->pipe->screen, + vertices, + sizeof(vertices)); + + + if (buf) { + debug_printf("Drawing buf is %p\n", buf); + util_draw_vertex_buffer(pipe, buf, 0, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe_buffer_reference(&buf, NULL); + } +} + + +static INLINE void shift_rectx(float coords[4], + const float *bounds, + const float shift) +{ + coords[0] += shift; + coords[2] -= shift; + if (bounds) { + coords[2] = MIN2(coords[2], bounds[2]); + /* bound x/y + width/height */ + if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) { + coords[2] = (bounds[0] + bounds[2]) - coords[0]; + } + } +} + +static INLINE void shift_recty(float coords[4], + const float *bounds, + const float shift) +{ + coords[1] += shift; + coords[3] -= shift; + if (bounds) { + coords[3] = MIN2(coords[3], bounds[3]); + if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) { + coords[3] = (bounds[1] + bounds[3]) - coords[1]; + } + } +} + +static INLINE void bound_rect(float coords[4], + const float bounds[4], + float shift[4]) +{ + /* if outside the bounds */ + if (coords[0] > (bounds[0] + bounds[2]) || + coords[1] > (bounds[1] + bounds[3]) || + (coords[0] + coords[2]) < bounds[0] || + (coords[1] + coords[3]) < bounds[1]) { + coords[0] = 0.f; + coords[1] = 0.f; + coords[2] = 0.f; + coords[3] = 0.f; + shift[0] = 0.f; + shift[1] = 0.f; + return; + } + + /* bound x */ + if (coords[0] < bounds[0]) { + shift[0] = bounds[0] - coords[0]; + coords[2] -= shift[0]; + coords[0] = bounds[0]; + } else + shift[0] = 0.f; + + /* bound y */ + if (coords[1] < bounds[1]) { + shift[1] = bounds[1] - coords[1]; + coords[3] -= shift[1]; + coords[1] = bounds[1]; + } else + shift[1] = 0.f; + + shift[2] = bounds[2] - coords[2]; + shift[3] = bounds[3] - coords[3]; + /* bound width/height */ + coords[2] = MIN2(coords[2], bounds[2]); + coords[3] = MIN2(coords[3], bounds[3]); + + /* bound x/y + width/height */ + if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) { + coords[2] = (bounds[0] + bounds[2]) - coords[0]; + } + if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) { + coords[3] = (bounds[1] + bounds[3]) - coords[1]; + } + + /* if outside the bounds */ + if ((coords[0] + coords[2]) < bounds[0] || + (coords[1] + coords[3]) < bounds[1]) { + coords[0] = 0.f; + coords[1] = 0.f; + coords[2] = 0.f; + coords[3] = 0.f; + return; + } +} + +static INLINE void sync_size(float *src_loc, float *dst_loc) +{ + src_loc[2] = MIN2(src_loc[2], dst_loc[2]); + src_loc[3] = MIN2(src_loc[3], dst_loc[3]); + dst_loc[2] = src_loc[2]; + dst_loc[3] = src_loc[3]; +} + + +static void renderer_copy_texture(struct exa_context *exa, + struct pipe_texture *src, + float sx1, float sy1, + float sx2, float sy2, + struct pipe_texture *dst, + float dx1, float dy1, + float dx2, float dy2) +{ + struct pipe_context *pipe = exa->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_buffer *buf; + struct pipe_surface *dst_surf = screen->get_tex_surface( + screen, dst, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + struct pipe_framebuffer_state fb; + float s0, t0, s1, t1; + struct xorg_shader shader; + + assert(src->width[0] != 0); + assert(src->height[0] != 0); + assert(dst->width[0] != 0); + assert(dst->height[0] != 0); + +#if 0 + debug_printf("copy texture [%f, %f, %f, %f], [%f, %f, %f, %f]\n", + sx1, sy1, sx2, sy2, dx1, dy1, dx2, dy2); +#endif + +#if 1 + s0 = sx1 / src->width[0]; + s1 = sx2 / src->width[0]; + t0 = sy1 / src->height[0]; + t1 = sy2 / src->height[0]; +#else + s0 = 0; + s1 = 1; + t0 = 0; + t1 = 1; +#endif + + assert(screen->is_format_supported(screen, dst_surf->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); + + /* save state (restored below) */ + cso_save_blend(exa->cso); + cso_save_samplers(exa->cso); + cso_save_sampler_textures(exa->cso); + cso_save_framebuffer(exa->cso); + cso_save_fragment_shader(exa->cso); + cso_save_vertex_shader(exa->cso); + + cso_save_viewport(exa->cso); + + + /* set misc state we care about */ + { + struct pipe_blend_state blend; + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.colormask = PIPE_MASK_RGBA; + cso_set_blend(exa->cso, &blend); + } + + /* sampler */ + { + struct pipe_sampler_state sampler; + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + cso_single_sampler(exa->cso, 0, &sampler); + cso_single_sampler_done(exa->cso); + } + + set_viewport(exa, dst_surf->width, dst_surf->height, Y0_TOP); + + /* texture */ + cso_set_sampler_textures(exa->cso, 1, &src); + + /* shaders */ + shader = xorg_shaders_get(exa->shaders, + VS_COMPOSITE, + FS_COMPOSITE); + cso_set_vertex_shader_handle(exa->cso, shader.vs); + cso_set_fragment_shader_handle(exa->cso, shader.fs); + + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.width = dst_surf->width; + fb.height = dst_surf->height; + fb.nr_cbufs = 1; + fb.cbufs[0] = dst_surf; + { + int i; + for (i = 1; i < PIPE_MAX_COLOR_BUFS; ++i) + fb.cbufs[i] = 0; + } + cso_set_framebuffer(exa->cso, &fb); + + /* draw quad */ + buf = setup_vertex_data_tex(exa, + dx1, dy1, + dx2, dy2, + s0, t0, s1, t1, + 0.0f); + + if (buf) { + util_draw_vertex_buffer(exa->pipe, buf, 0, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe_buffer_reference(&buf, NULL); + } + + /* restore state we changed */ + cso_restore_blend(exa->cso); + cso_restore_samplers(exa->cso); + cso_restore_sampler_textures(exa->cso); + cso_restore_framebuffer(exa->cso); + cso_restore_vertex_shader(exa->cso); + cso_restore_fragment_shader(exa->cso); + cso_restore_viewport(exa->cso); + + pipe_surface_reference(&dst_surf, NULL); +} + +void xorg_copy_pixmap(struct exa_context *ctx, + struct exa_pixmap_priv *dst_priv, int dx, int dy, + struct exa_pixmap_priv *src_priv, int sx, int sy, + int width, int height) +{ + float dst_loc[4], src_loc[4]; + float dst_bounds[4], src_bounds[4]; + float src_shift[4], dst_shift[4], shift[4]; + struct pipe_texture *dst = dst_priv->tex; + struct pipe_texture *src = src_priv->tex; + + dst_loc[0] = dx; + dst_loc[1] = dy; + dst_loc[2] = width; + dst_loc[3] = height; + dst_bounds[0] = 0.f; + dst_bounds[1] = 0.f; + dst_bounds[2] = dst->width[0]; + dst_bounds[3] = dst->height[0]; + + src_loc[0] = sx; + src_loc[1] = sy; + src_loc[2] = width; + src_loc[3] = height; + src_bounds[0] = 0.f; + src_bounds[1] = 0.f; + src_bounds[2] = src->width[0]; + src_bounds[3] = src->height[0]; + + bound_rect(src_loc, src_bounds, src_shift); + bound_rect(dst_loc, dst_bounds, dst_shift); + shift[0] = src_shift[0] - dst_shift[0]; + shift[1] = src_shift[1] - dst_shift[1]; + + if (shift[0] < 0) + shift_rectx(src_loc, src_bounds, -shift[0]); + else + shift_rectx(dst_loc, dst_bounds, shift[0]); + + if (shift[1] < 0) + shift_recty(src_loc, src_bounds, -shift[1]); + else + shift_recty(dst_loc, dst_bounds, shift[1]); + + sync_size(src_loc, dst_loc); + + if (src_loc[2] >= 0 && src_loc[3] >= 0 && + dst_loc[2] >= 0 && dst_loc[3] >= 0) { + renderer_copy_texture(ctx, + src, + src_loc[0], + src_loc[1] + src_loc[3], + src_loc[0] + src_loc[2], + src_loc[1], + dst, + dst_loc[0], + dst_loc[1] + dst_loc[3], + dst_loc[0] + dst_loc[2], + dst_loc[1]); + } } diff --git a/src/gallium/state_trackers/xorg/xorg_composite.h b/src/gallium/state_trackers/xorg/xorg_composite.h index 17dfcb199ea..e73f1c704a8 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.h +++ b/src/gallium/state_trackers/xorg/xorg_composite.h @@ -22,4 +22,16 @@ void xorg_composite(struct exa_context *exa, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int width, int height); +boolean xorg_solid_bind_state(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + Pixel fg); +void xorg_solid(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + int x0, int y0, int x1, int y1); + +void xorg_copy_pixmap(struct exa_context *ctx, + struct exa_pixmap_priv *dst, int dx, int dy, + struct exa_pixmap_priv *src, int sx, int sy, + int width, int height); + #endif diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 3b90421de9f..8a362596c75 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -33,6 +33,7 @@ #include "xf86_OSproc.h" #include "xorg_tracker.h" +#include "xorg_exa.h" #include "dri2.h" @@ -47,62 +48,57 @@ typedef struct { struct pipe_fence_handle *fence; } *BufferPrivatePtr; -static DRI2BufferPtr -driCreateBuffers(DrawablePtr pDraw, unsigned int *attachments, int count) +static Bool +driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format) { - struct pipe_texture *depth, *tex; - struct pipe_buffer *buf; + struct pipe_texture *tex = NULL; ScreenPtr pScreen = pDraw->pScreen; ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; modesettingPtr ms = modesettingPTR(pScrn); - BufferPrivatePtr privates; - DRI2BufferPtr buffers; + struct exa_pixmap_priv *exa_priv; + BufferPrivatePtr private = buffer->driverPrivate; PixmapPtr pPixmap; unsigned stride, handle; - boolean have_depth = FALSE, have_stencil = FALSE; - int i; - - buffers = xcalloc(count, sizeof *buffers); - if (!buffers) - goto fail_buffers; - - privates = xcalloc(count, sizeof *privates); - if (!privates) - goto fail_privates; - - for (i = 0; i < count; i++) { - if (attachments[i] == DRI2BufferDepth) - have_depth = TRUE; - else if (attachments[i] == DRI2BufferStencil) - have_stencil = TRUE; - } - - if (have_stencil && !have_depth) - FatalError("Doesn't support only stencil yet\n"); - depth = NULL; - for (i = 0; i < count; i++) { - pPixmap = NULL; - tex = NULL; - buf = NULL; - if (attachments[i] == DRI2BufferFrontLeft) { - if (pDraw->type == DRAWABLE_PIXMAP) - pPixmap = (PixmapPtr) pDraw; - else - pPixmap = (*pScreen->GetWindowPixmap)((WindowPtr) pDraw); - pPixmap->refcnt++; - } else if (attachments[i] == DRI2BufferStencil) { - pipe_texture_reference(&tex, depth); - } else if (attachments[i] == DRI2BufferDepth) { + if (pDraw->type == DRAWABLE_PIXMAP) + pPixmap = (PixmapPtr) pDraw; + else + pPixmap = (*pScreen->GetWindowPixmap)((WindowPtr) pDraw); + exa_priv = exaGetPixmapDriverPrivate(pPixmap); + + switch (buffer->attachment) { + default: + if (buffer->attachment != DRI2BufferFakeFrontLeft || + pDraw->type != DRAWABLE_PIXMAP) { + private->pPixmap = (*pScreen->CreatePixmap)(pScreen, pDraw->width, + pDraw->height, + pDraw->depth, + 0); + } + break; + case DRI2BufferFrontLeft: + break; + case DRI2BufferStencil: +#if defined(DRI2INFOREC_VERSION) && DRI2INFOREC_VERSION > 2 + case DRI2BufferDepthStencil: + if (exa_priv->depth_stencil_tex && + !pf_is_depth_stencil(exa_priv->depth_stencil_tex->format)) + exa_priv->depth_stencil_tex = NULL; + /* Fall through */ +#endif + case DRI2BufferDepth: + if (exa_priv->depth_stencil_tex) + pipe_texture_reference(&tex, exa_priv->depth_stencil_tex); + else { struct pipe_texture template; memset(&template, 0, sizeof(template)); template.target = PIPE_TEXTURE_2D; - if (have_stencil) + if (buffer->attachment == DRI2BufferDepth) template.format = ms->ds_depth_bits_last ? - PIPE_FORMAT_S8Z24_UNORM : PIPE_FORMAT_Z24S8_UNORM; - else - template.format = ms->d_depth_bits_last ? PIPE_FORMAT_X8Z24_UNORM : PIPE_FORMAT_Z24X8_UNORM; + else + template.format = ms->ds_depth_bits_last ? + PIPE_FORMAT_S8Z24_UNORM : PIPE_FORMAT_Z24S8_UNORM; pf_get_block(template.format, &template.block); template.width[0] = pDraw->width; template.height[0] = pDraw->height; @@ -111,41 +107,120 @@ driCreateBuffers(DrawablePtr pDraw, unsigned int *attachments, int count) template.tex_usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL | PIPE_TEXTURE_USAGE_DISPLAY_TARGET; tex = ms->screen->texture_create(ms->screen, &template); - depth = tex; - } else if (attachments[i] == DRI2BufferFakeFrontLeft && - pDraw->type == DRAWABLE_PIXMAP) { - pPixmap = (PixmapPtr) pDraw; - pPixmap->refcnt++; - } else { - pPixmap = (*pScreen->CreatePixmap)(pScreen, pDraw->width, - pDraw->height, - pDraw->depth, - 0); + pipe_texture_reference(&exa_priv->depth_stencil_tex, tex); } + break; + } - if (pPixmap) { - xorg_exa_set_shared_usage(pPixmap); - pScreen->ModifyPixmapHeader(pPixmap, 0, 0, 0, 0, 0, NULL); - tex = xorg_exa_get_texture(pPixmap); - } + if (!private->pPixmap) { + private->pPixmap = pPixmap; + pPixmap->refcnt++; + } + + if (!tex) { + exaMoveInPixmap(private->pPixmap); + xorg_exa_set_shared_usage(private->pPixmap); + pScreen->ModifyPixmapHeader(private->pPixmap, 0, 0, 0, 0, 0, NULL); + tex = xorg_exa_get_texture(private->pPixmap); + } + + if (!tex) + FatalError("NO TEXTURE IN DRI2\n"); + + ms->api->shared_handle_from_texture(ms->api, ms->screen, tex, &stride, &handle); - if (!tex) - FatalError("NO TEXTURE IN DRI2\n"); + buffer->name = handle; + buffer->pitch = stride; + buffer->cpp = 4; + buffer->driverPrivate = private; + buffer->flags = 0; /* not tiled */ + private->tex = tex; + + return TRUE; +} + +static void +driDoDestroyBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer) +{ + ScreenPtr pScreen = pDraw->pScreen; + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + BufferPrivatePtr private = buffer->driverPrivate; + struct exa_pixmap_priv *exa_priv = exaGetPixmapDriverPrivate(private->pPixmap); + + pipe_texture_reference(&private->tex, NULL); + ms->screen->fence_reference(ms->screen, &private->fence, NULL); + pipe_texture_reference(&exa_priv->depth_stencil_tex, NULL); + (*pScreen->DestroyPixmap)(private->pPixmap); +} - ms->api->shared_handle_from_texture(ms->api, ms->screen, tex, &stride, &handle); +#if defined(DRI2INFOREC_VERSION) && DRI2INFOREC_VERSION > 2 - buffers[i].name = handle; +static DRI2BufferPtr +driCreateBuffer(DrawablePtr pDraw, unsigned int attachment, unsigned int format) +{ + DRI2BufferPtr buffer; + BufferPrivatePtr private; + + buffer = xcalloc(1, sizeof *buffer); + if (!buffer) + return NULL; + + private = xcalloc(1, sizeof *private); + if (!private) { + goto fail; + } + + buffer->attachment = attachment; + buffer->driverPrivate = private; + + if (driDoCreateBuffer(pDraw, buffer, format)) + return buffer; + + xfree(private); +fail: + xfree(buffer); + return NULL; +} + +static void +driDestroyBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer) +{ + driDoDestroyBuffer(pDraw, buffer); + + xfree(buffer->driverPrivate); + xfree(buffer); +} + +#else /* DRI2INFOREC_VERSION <= 2 */ + +static DRI2BufferPtr +driCreateBuffers(DrawablePtr pDraw, unsigned int *attachments, int count) +{ + BufferPrivatePtr privates; + DRI2BufferPtr buffers; + int i; + + buffers = xcalloc(count, sizeof *buffers); + if (!buffers) + goto fail_buffers; + + privates = xcalloc(count, sizeof *privates); + if (!privates) + goto fail_privates; + + for (i = 0; i < count; i++) { buffers[i].attachment = attachments[i]; - buffers[i].pitch = stride; - buffers[i].cpp = 4; buffers[i].driverPrivate = &privates[i]; - buffers[i].flags = 0; /* not tiled */ - privates[i].pPixmap = pPixmap; - privates[i].tex = tex; + + if (!driDoCreateBuffer(pDraw, &buffers[i], 0)) + goto fail; } return buffers; +fail: + xfree(privates); fail_privates: xfree(buffers); fail_buffers: @@ -155,21 +230,10 @@ fail_buffers: static void driDestroyBuffers(DrawablePtr pDraw, DRI2BufferPtr buffers, int count) { - ScreenPtr pScreen = pDraw->pScreen; - ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; - modesettingPtr ms = modesettingPTR(pScrn); - BufferPrivatePtr private; int i; - (void)ms; for (i = 0; i < count; i++) { - private = buffers[i].driverPrivate; - - pipe_texture_reference(&private->tex, NULL); - ms->screen->fence_reference(ms->screen, &private->fence, NULL); - - if (private->pPixmap) - (*pScreen->DestroyPixmap)(private->pPixmap); + driDoDestroyBuffer(pDraw, &buffers[i]); } if (buffers) { @@ -178,6 +242,8 @@ driDestroyBuffers(DrawablePtr pDraw, DRI2BufferPtr buffers, int count) } } +#endif /* DRI2INFOREC_VERSION */ + static void driCopyRegion(DrawablePtr pDraw, RegionPtr pRegion, DRI2BufferPtr pDestBuffer, DRI2BufferPtr pSrcBuffer) @@ -273,15 +339,25 @@ driScreenInit(ScreenPtr pScreen) modesettingPtr ms = modesettingPTR(pScrn); DRI2InfoRec dri2info; +#if defined(DRI2INFOREC_VERSION) + dri2info.version = DRI2INFOREC_VERSION; +#else dri2info.version = 1; +#endif dri2info.fd = ms->fd; dri2info.driverName = pScrn->driverName; dri2info.deviceName = "/dev/dri/card0"; /* FIXME */ +#if defined(DRI2INFOREC_VERSION) && DRI2INFOREC_VERSION > 2 + dri2info.CreateBuffer = driCreateBuffer; + dri2info.DestroyBuffer = driDestroyBuffer; +#else dri2info.CreateBuffers = driCreateBuffers; dri2info.DestroyBuffers = driDestroyBuffers; +#endif dri2info.CopyRegion = driCopyRegion; + dri2info.Wait = NULL; ms->d_depth_bits_last = ms->screen->is_format_supported(ms->screen, PIPE_FORMAT_X8Z24_UNORM, diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index bc3fd54bdb4..643b6b3b9e4 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -33,10 +33,8 @@ #include "xf86.h" #include "xf86_OSproc.h" #include "compiler.h" -#include "xf86RAC.h" #include "xf86PciInfo.h" #include "xf86Pci.h" -#include "xf86Resources.h" #include "mipointer.h" #include "micmap.h" #include <X11/extensions/randr.h> @@ -85,41 +83,9 @@ static const OptionInfoRec Options[] = { }; /* - * Functions that might be needed - */ - -static const char *exaSymbols[] = { - "exaGetVersion", - "exaDriverInit", - "exaDriverFini", - "exaOffscreenAlloc", - "exaOffscreenFree", - "exaWaitSync", - NULL -}; - -static const char *fbSymbols[] = { - "fbPictureInit", - "fbScreenInit", - NULL -}; - -static const char *ddcSymbols[] = { - "xf86PrintEDID", - "xf86SetDDCproperties", - NULL -}; - -/* * Exported Xorg driver functions to winsys */ -void -xorg_tracker_loader_ref_sym_lists() -{ - LoaderRefSymLists(exaSymbols, fbSymbols, ddcSymbols, NULL); -} - const OptionInfoRec * xorg_tracker_available_options(int chipid, int busid) { @@ -288,10 +254,6 @@ PreInit(ScrnInfoPtr pScrn, int flags) } else ms->entityPrivate = NULL; - if (xf86RegisterResources(ms->pEnt->index, NULL, ResNone)) { - return FALSE; - } - if (xf86IsEntityShared(pScrn->entityList[0])) { if (xf86IsPrimInitDone(pScrn->entityList[0])) { /* do something */ @@ -312,7 +274,6 @@ PreInit(ScrnInfoPtr pScrn, int flags) if (ms->fd < 0) return FALSE; - pScrn->racMemFlags = RAC_FB | RAC_COLORMAP; pScrn->monitor = pScrn->confScreen->monitor; pScrn->progClock = TRUE; pScrn->rgbBits = 8; @@ -398,8 +359,6 @@ PreInit(ScrnInfoPtr pScrn, int flags) return FALSE; } - xf86LoaderReqSymLists(fbSymbols, NULL); - xf86LoadSubModule(pScrn, "exa"); #ifdef DRI2 diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index 3697d26363e..312dab1544c 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -82,6 +82,25 @@ exa_get_pipe_format(int depth, enum pipe_format *format, int *bbp) } } +static void +xorg_exa_init_state(struct exa_context *exa) +{ + struct pipe_depth_stencil_alpha_state dsa; + + /* set common initial clip state */ + memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state)); + cso_set_depth_stencil_alpha(exa->cso, &dsa); +} + +static void +xorg_exa_common_done(struct exa_context *exa) +{ + exa->copy.src = NULL; + exa->copy.dst = NULL; + exa->has_solid_color = FALSE; + exa->num_bound_samplers = 0; +} + /* * Static exported EXA functions */ @@ -98,6 +117,68 @@ ExaMarkSync(ScreenPtr pScreen) } static Bool +ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst, + int dst_pitch) +{ + ScreenPtr pScreen = pPix->drawable.pScreen; + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPix); + struct pipe_transfer *transfer; + + if (!priv || !priv->tex) + return FALSE; + + if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & + PIPE_REFERENCED_FOR_WRITE) + exa->pipe->flush(exa->pipe, 0, NULL); + + transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, + PIPE_TRANSFER_READ, x, y, w, h); + if (!transfer) + return FALSE; + + util_copy_rect((unsigned char*)dst, &priv->tex->block, dst_pitch, 0, 0, + w, h, exa->scrn->transfer_map(exa->scrn, transfer), + transfer->stride, 0, 0); + + exa->scrn->transfer_unmap(exa->scrn, transfer); + exa->scrn->tex_transfer_destroy(transfer); + + return TRUE; +} + +static Bool +ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src, + int src_pitch) +{ + ScreenPtr pScreen = pPix->drawable.pScreen; + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPix); + struct pipe_transfer *transfer; + + if (!priv || !priv->tex) + return FALSE; + + transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, + PIPE_TRANSFER_WRITE, x, y, w, h); + if (!transfer) + return FALSE; + + util_copy_rect(exa->scrn->transfer_map(exa->scrn, transfer), + &priv->tex->block, transfer->stride, 0, 0, w, h, + (unsigned char*)src, src_pitch, 0, 0); + + exa->scrn->transfer_unmap(exa->scrn, transfer); + exa->scrn->tex_transfer_destroy(transfer); + + return TRUE; +} + +static Bool ExaPrepareAccess(PixmapPtr pPix, int index) { ScreenPtr pScreen = pPix->drawable.pScreen; @@ -116,9 +197,9 @@ ExaPrepareAccess(PixmapPtr pPix, int index) if (priv->map_count++ == 0) { - if (exa->ctx->is_texture_referenced(exa->ctx, priv->tex, 0, 0) & + if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & PIPE_REFERENCED_FOR_WRITE) - exa->ctx->flush(exa->ctx, 0, NULL); + exa->pipe->flush(exa->pipe, 0, NULL); priv->map_transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, @@ -169,15 +250,22 @@ ExaDone(PixmapPtr pPixmap) if (!priv) return; - if (priv->src_surf) - exa->scrn->tex_surface_destroy(priv->src_surf); - priv->src_surf = NULL; +#if 1 + xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, NULL); +#else + xorg_finish(exa); +#endif + xorg_exa_common_done(exa); } static void ExaDoneComposite(PixmapPtr pPixmap) { + ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + xorg_exa_common_done(exa); } static Bool @@ -188,6 +276,7 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg) struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap); struct exa_context *exa = ms->exa; + debug_printf("ExaPrepareSolid - test\n"); if (pPixmap->drawable.depth < 15) return FALSE; @@ -200,12 +289,11 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg) if (alu != GXcopy) return FALSE; - if (!exa->ctx || !exa->ctx->surface_fill) + if (!exa->pipe) return FALSE; - priv->color = fg; - - return TRUE; + debug_printf(" ExaPrepareSolid(0x%x)\n", fg); + return xorg_solid_bind_state(exa, priv, fg); } static void @@ -215,12 +303,18 @@ ExaSolid(PixmapPtr pPixmap, int x0, int y0, int x1, int y1) modesettingPtr ms = modesettingPTR(pScrn); struct exa_context *exa = ms->exa; struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap); - struct pipe_surface *surf = exa_gpu_surface(exa, priv); - exa->ctx->surface_fill(exa->ctx, surf, x0, y0, x1 - x0, y1 - y0, - priv->color); + debug_printf("\tExaSolid(%d, %d, %d, %d)\n", x0, y0, x1, y1); - exa->scrn->tex_surface_destroy(surf); +#if 0 + if (x0 == 0 && y0 == 0 && + x1 == priv->tex->width[0] && + y1 == priv->tex->height[0]) { + exa->ctx->clear(exa->ctx, PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, + exa->solid_color, 1., 0); + } else +#endif + xorg_solid(exa, priv, x0, y0, x1, y1) ; } static Bool @@ -233,6 +327,8 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); struct exa_pixmap_priv *src_priv = exaGetPixmapDriverPrivate(pSrcPixmap); + debug_printf("ExaPrepareCopy\n"); + if (alu != GXcopy) return FALSE; @@ -248,27 +344,33 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, if (!priv->tex || !src_priv->tex) return FALSE; - if (!exa->ctx || !exa->ctx->surface_copy) + if (!exa->pipe) return FALSE; - priv->src_surf = exa_gpu_surface(exa, src_priv); + exa->copy.src = src_priv; + exa->copy.dst = priv; - return TRUE; + /*XXX disabled until some issues with syncing are fixed */ + return FALSE; } static void ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int width, int height) { - ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; - modesettingPtr ms = modesettingPTR(pScrn); - struct exa_context *exa = ms->exa; - struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); - struct pipe_surface *surf = exa_gpu_surface(exa, priv); + ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); + + debug_printf("\tExaCopy(srcx=%d, srcy=%d, dstX=%d, dstY=%d, w=%d, h=%d)\n", + srcX, srcY, dstX, dstY, width, height); + + debug_assert(priv == exa->copy.dst); - exa->ctx->surface_copy(exa->ctx, surf, dstX, dstY, priv->src_surf, - srcX, srcY, width, height); - exa->scrn->tex_surface_destroy(surf); + xorg_copy_pixmap(exa, exa->copy.dst, dstX, dstY, + exa->copy.src, srcX, srcY, + width, height); } static Bool @@ -280,6 +382,8 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture, modesettingPtr ms = modesettingPTR(pScrn); struct exa_context *exa = ms->exa; + debug_printf("ExaPrepareComposite\n"); + return xorg_composite_bind_state(exa, op, pSrcPicture, pMaskPicture, pDstPicture, exaGetPixmapDriverPrivate(pSrc), @@ -296,6 +400,8 @@ ExaComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, struct exa_context *exa = ms->exa; struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDst); + debug_printf("\tExaComposite\n"); + xorg_composite(exa, priv, srcX, srcY, maskX, maskY, dstX, dstY, width, height); } @@ -475,16 +581,16 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, if (priv->tex) { struct pipe_surface *dst_surf; + struct pipe_surface *src_surf; dst_surf = exa->scrn->get_tex_surface(exa->scrn, texture, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE); - priv->src_surf = exa_gpu_surface(exa, priv); - exa->ctx->surface_copy(exa->ctx, dst_surf, 0, 0, priv->src_surf, - 0, 0, min(width, texture->width[0]), - min(height, texture->height[0])); + src_surf = exa_gpu_surface(exa, priv); + exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf, + 0, 0, min(width, texture->width[0]), + min(height, texture->height[0])); exa->scrn->tex_surface_destroy(dst_surf); - exa->scrn->tex_surface_destroy(priv->src_surf); - priv->src_surf = NULL; + exa->scrn->tex_surface_destroy(src_surf); } else if (pPixmap->devPrivate.ptr) { struct pipe_transfer *transfer; @@ -501,6 +607,9 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, pPixmap->devKind, 0, 0); exa->scrn->transfer_unmap(exa->scrn, transfer); exa->scrn->tex_transfer_destroy(transfer); + + xfree(pPixmap->devPrivate.ptr); + pPixmap->devPrivate.ptr = NULL; } } #ifdef DRM_MODE_FEATURE_DIRTYFB @@ -549,8 +658,8 @@ xorg_exa_close(ScrnInfoPtr pScrn) cso_destroy_context(exa->cso); } - if (exa->ctx) - exa->ctx->destroy(exa->ctx); + if (exa->pipe) + exa->pipe->destroy(exa->pipe); exaDriverFini(pScrn->pScreen); xfree(exa); @@ -563,7 +672,6 @@ xorg_exa_init(ScrnInfoPtr pScrn) modesettingPtr ms = modesettingPTR(pScrn); struct exa_context *exa; ExaDriverPtr pExa; - int i; exa = xcalloc(1, sizeof(struct exa_context)); if (!exa) @@ -584,6 +692,12 @@ xorg_exa_init(ScrnInfoPtr pScrn) pExa->pixmapOffsetAlign = 0; pExa->pixmapPitchAlign = 1; pExa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_HANDLES_PIXMAPS; +#ifdef EXA_SUPPORTS_PREPARE_AUX + pExa->flags |= EXA_SUPPORTS_PREPARE_AUX; +#endif +#ifdef EXA_MIXED_PIXMAPS + pExa->flags |= EXA_MIXED_PIXMAPS; +#endif pExa->maxX = 8191; /* FIXME */ pExa->maxY = 8191; /* FIXME */ @@ -600,6 +714,8 @@ xorg_exa_init(ScrnInfoPtr pScrn) pExa->Composite = ExaComposite; pExa->DoneComposite = ExaDoneComposite; pExa->PixmapIsOffscreen = ExaPixmapIsOffscreen; + pExa->DownloadFromScreen = ExaDownloadFromScreen; + pExa->UploadToScreen = ExaUploadToScreen; pExa->PrepareAccess = ExaPrepareAccess; pExa->FinishAccess = ExaFinishAccess; pExa->CreatePixmap = ExaCreatePixmap; @@ -611,20 +727,15 @@ xorg_exa_init(ScrnInfoPtr pScrn) } exa->scrn = ms->screen; - exa->ctx = ms->api->create_context(ms->api, exa->scrn); + exa->pipe = ms->api->create_context(ms->api, exa->scrn); /* Share context with DRI */ - ms->ctx = exa->ctx; - - /* common vertex data setup */ - for (i = 0; i < 4; ++i) { - exa->vertices[i][0][3] = 1.0f; /* w */ - exa->vertices[i][1][2] = 0.0f; /* r */ - exa->vertices[i][1][3] = 1.0f; /* q */ - } + ms->ctx = exa->pipe; - exa->cso = cso_create_context(exa->ctx); + exa->cso = cso_create_context(exa->pipe); exa->shaders = xorg_shaders_create(exa); + xorg_exa_init_state(exa); + return (void *)exa; out_err: @@ -642,3 +753,19 @@ exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv) } +void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags, + struct pipe_fence_handle **fence) +{ + exa->pipe->flush(exa->pipe, pipeFlushFlags, fence); +} + +void xorg_exa_finish(struct exa_context *exa) +{ + struct pipe_fence_handle *fence = NULL; + + xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, &fence); + + exa->pipe->screen->fence_finish(exa->pipe->screen, fence, 0); + exa->pipe->screen->fence_reference(exa->pipe->screen, &fence, NULL); +} + diff --git a/src/gallium/state_trackers/xorg/xorg_exa.h b/src/gallium/state_trackers/xorg/xorg_exa.h index 90ad8400ffd..43949b04a44 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.h +++ b/src/gallium/state_trackers/xorg/xorg_exa.h @@ -8,10 +8,13 @@ struct cso_context; struct xorg_shaders; +/* src + mask + dst */ +#define MAX_EXA_SAMPLERS 3 + struct exa_context { ExaDriverPtr pExa; - struct pipe_context *ctx; + struct pipe_context *pipe; struct pipe_screen *scrn; struct cso_context *cso; struct xorg_shaders *shaders; @@ -19,7 +22,16 @@ struct exa_context struct pipe_constant_buffer vs_const_buffer; struct pipe_constant_buffer fs_const_buffer; - float vertices[4][2][4]; + struct pipe_texture *bound_textures[MAX_EXA_SAMPLERS]; + int num_bound_samplers; + + float solid_color[4]; + boolean has_solid_color; + + struct { + struct exa_pixmap_priv *src; + struct exa_pixmap_priv *dst; + } copy; }; @@ -29,8 +41,7 @@ struct exa_pixmap_priv int tex_flags; struct pipe_texture *tex; - unsigned int color; - struct pipe_surface *src_surf; /* for copies */ + struct pipe_texture *depth_stencil_tex; struct pipe_transfer *map_transfer; unsigned map_count; @@ -39,5 +50,8 @@ struct exa_pixmap_priv struct pipe_surface * exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv); +void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags, + struct pipe_fence_handle **fence); +void xorg_exa_finish(struct exa_context *exa); #endif diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c index b5288bde4fb..694eded09a2 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c @@ -18,22 +18,24 @@ #include "cso_cache/cso_hash.h" /* Vertex shader: - * IN[0] = src_pos - * IN[1] = mask_pos - * IN[2] = dst_pos + * IN[0] = vertex pos + * IN[1] = src tex coord | solid fill color + * IN[2] = mask tex coord + * IN[3] = dst tex coord * CONST[0] = (2/dst_width, 2/dst_height, 1, 1) * CONST[1] = (-1, -1, 0, 0) * - * OUT[0] = src_pos - * OUT[1] = mask_pos - * OUT[2] = dst_pos + * OUT[0] = vertex pos + * OUT[1] = src tex coord | solid fill color + * OUT[2] = mask tex coord + * OUT[3] = dst tex coord */ /* Fragment shader: * SAMP[0] = src * SAMP[1] = mask * SAMP[2] = dst - * IN[0] = pos src + * IN[0] = pos src | solid fill color * IN[1] = pos mask * IN[2] = pos dst * CONST[0] = (0, 0, 0, 1) @@ -84,6 +86,150 @@ vs_normalize_coords(struct ureg_program *ureg, struct ureg_src coords, return ret; } +static void +linear_gradient(struct ureg_program *ureg, + struct ureg_dst out, + struct ureg_src pos, + struct ureg_src sampler, + struct ureg_src coords, + struct ureg_src const0124, + struct ureg_src matrow0, + struct ureg_src matrow1, + struct ureg_src matrow2) +{ + struct ureg_dst temp0 = ureg_DECL_temporary(ureg); + struct ureg_dst temp1 = ureg_DECL_temporary(ureg); + struct ureg_dst temp2 = ureg_DECL_temporary(ureg); + struct ureg_dst temp3 = ureg_DECL_temporary(ureg); + struct ureg_dst temp4 = ureg_DECL_temporary(ureg); + struct ureg_dst temp5 = ureg_DECL_temporary(ureg); + + ureg_MOV(ureg, + ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos); + ureg_MOV(ureg, + ureg_writemask(temp0, TGSI_WRITEMASK_Z), + ureg_scalar(const0124, TGSI_SWIZZLE_Y)); + + ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0)); + ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0)); + ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0)); + ureg_RCP(ureg, temp3, ureg_src(temp3)); + ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3)); + ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3)); + + ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_X), + ureg_src(temp1)); + ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_Y), + ureg_src(temp2)); + + ureg_MUL(ureg, temp0, + ureg_scalar(coords, TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_Y)); + ureg_MAD(ureg, temp1, + ureg_scalar(coords, TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_X), + ureg_src(temp0)); + + ureg_MUL(ureg, temp2, + ureg_src(temp1), + ureg_scalar(coords, TGSI_SWIZZLE_Z)); + + ureg_TEX(ureg, out, + TGSI_TEXTURE_1D, ureg_src(temp2), sampler); + + ureg_release_temporary(ureg, temp0); + ureg_release_temporary(ureg, temp1); + ureg_release_temporary(ureg, temp2); + ureg_release_temporary(ureg, temp3); + ureg_release_temporary(ureg, temp4); + ureg_release_temporary(ureg, temp5); +} + + +static void +radial_gradient(struct ureg_program *ureg, + struct ureg_dst out, + struct ureg_src pos, + struct ureg_src sampler, + struct ureg_src coords, + struct ureg_src const0124, + struct ureg_src matrow0, + struct ureg_src matrow1, + struct ureg_src matrow2) +{ + struct ureg_dst temp0 = ureg_DECL_temporary(ureg); + struct ureg_dst temp1 = ureg_DECL_temporary(ureg); + struct ureg_dst temp2 = ureg_DECL_temporary(ureg); + struct ureg_dst temp3 = ureg_DECL_temporary(ureg); + struct ureg_dst temp4 = ureg_DECL_temporary(ureg); + struct ureg_dst temp5 = ureg_DECL_temporary(ureg); + + ureg_MOV(ureg, + ureg_writemask(temp0, TGSI_WRITEMASK_XY), + pos); + ureg_MOV(ureg, + ureg_writemask(temp0, TGSI_WRITEMASK_Z), + ureg_scalar(const0124, TGSI_SWIZZLE_Y)); + + ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0)); + ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0)); + ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0)); + ureg_RCP(ureg, temp3, ureg_src(temp3)); + ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3)); + ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3)); + + ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_X), + ureg_src(temp1)); + ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_Y), + ureg_src(temp2)); + + ureg_MUL(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y)); + ureg_MAD(ureg, temp1, + ureg_scalar(coords, TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), + ureg_src(temp0)); + ureg_ADD(ureg, temp1, + ureg_src(temp1), ureg_src(temp1)); + ureg_MUL(ureg, temp3, + ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y)); + ureg_MAD(ureg, temp4, + ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), + ureg_src(temp3)); + ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4))); + ureg_MUL(ureg, temp2, + ureg_scalar(coords, TGSI_SWIZZLE_Z), + ureg_src(temp4)); + ureg_MUL(ureg, temp0, + ureg_scalar(const0124, TGSI_SWIZZLE_W), + ureg_src(temp2)); + ureg_MUL(ureg, temp3, + ureg_src(temp1), ureg_src(temp1)); + ureg_SUB(ureg, temp2, + ureg_src(temp3), ureg_src(temp0)); + ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2))); + ureg_RCP(ureg, temp2, ureg_src(temp2)); + ureg_SUB(ureg, temp1, + ureg_src(temp2), ureg_src(temp1)); + ureg_ADD(ureg, temp0, + ureg_scalar(coords, TGSI_SWIZZLE_Z), + ureg_scalar(coords, TGSI_SWIZZLE_Z)); + ureg_RCP(ureg, temp0, ureg_src(temp0)); + ureg_MUL(ureg, temp2, + ureg_src(temp1), ureg_src(temp0)); + ureg_TEX(ureg, out, TGSI_TEXTURE_1D, + ureg_src(temp2), sampler); + + ureg_release_temporary(ureg, temp0); + ureg_release_temporary(ureg, temp1); + ureg_release_temporary(ureg, temp2); + ureg_release_temporary(ureg, temp3); + ureg_release_temporary(ureg, temp4); + ureg_release_temporary(ureg, temp5); +} + static void * create_vs(struct pipe_context *pipe, unsigned vs_traits) @@ -92,6 +238,9 @@ create_vs(struct pipe_context *pipe, struct ureg_src src; struct ureg_dst dst; struct ureg_src const0, const1; + boolean is_fill = vs_traits & VS_FILL; + boolean is_composite = vs_traits & VS_COMPOSITE; + boolean has_mask = vs_traits & VS_MASK; ureg = ureg_create(TGSI_PROCESSOR_VERTEX); if (ureg == NULL) @@ -100,18 +249,34 @@ create_vs(struct pipe_context *pipe, const0 = ureg_DECL_constant(ureg); const1 = ureg_DECL_constant(ureg); - if ((vs_traits & VS_COMPOSITE)) { + /* it has to be either a fill or a composite op */ + debug_assert(is_fill ^ is_composite); + + src = ureg_DECL_vs_input(ureg, + TGSI_SEMANTIC_POSITION, 0); + dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); + src = vs_normalize_coords(ureg, src, + const0, const1); + ureg_MOV(ureg, dst, src); + + + if (is_composite) { + src = ureg_DECL_vs_input(ureg, + TGSI_SEMANTIC_GENERIC, 1); + dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1); + ureg_MOV(ureg, dst, src); + } + if (is_fill) { src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_POSITION, 0); - dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); - src = vs_normalize_coords(ureg, src, - const0, const1); + TGSI_SEMANTIC_COLOR, 0); + dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); ureg_MOV(ureg, dst, src); } - if ((vs_traits & VS_MASK)) { + + if (has_mask) { src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_POSITION, 1); - dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 1); + TGSI_SEMANTIC_GENERIC, 2); + dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 2); ureg_MOV(ureg, dst, src); } @@ -125,27 +290,50 @@ create_fs(struct pipe_context *pipe, unsigned fs_traits) { struct ureg_program *ureg; - struct ureg_src dst_sampler, src_sampler, mask_sampler; - struct ureg_src dst_pos, src_pos, mask_pos; - struct ureg_src src, mask; + struct ureg_src /*dst_sampler,*/ src_sampler, mask_sampler; + struct ureg_src /*dst_pos,*/ src_input, mask_pos; + struct ureg_dst src, mask; struct ureg_dst out; + boolean has_mask = fs_traits & FS_MASK; + boolean is_fill = fs_traits & FS_FILL; + boolean is_composite = fs_traits & FS_COMPOSITE; + boolean is_solid = fs_traits & FS_SOLID_FILL; + boolean is_lingrad = fs_traits & FS_LINGRAD_FILL; + boolean is_radgrad = fs_traits & FS_RADGRAD_FILL; ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (ureg == NULL) return 0; + /* it has to be either a fill or a composite op */ + debug_assert(is_fill ^ is_composite); + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); - src_sampler = ureg_DECL_sampler(ureg); - src_pos = ureg_DECL_fs_input(ureg, - TGSI_SEMANTIC_POSITION, - 0, - TGSI_INTERPOLATE_PERSPECTIVE); + if (is_composite) { + src_sampler = ureg_DECL_sampler(ureg, 0); + src_input = ureg_DECL_fs_input(ureg, + TGSI_SEMANTIC_POSITION, + 0, + TGSI_INTERPOLATE_PERSPECTIVE); + } else { + debug_assert(is_fill); + if (is_solid) + src_input = ureg_DECL_fs_input(ureg, + TGSI_SEMANTIC_COLOR, + 0, + TGSI_INTERPOLATE_PERSPECTIVE); + else + src_input = ureg_DECL_fs_input(ureg, + TGSI_SEMANTIC_POSITION, + 0, + TGSI_INTERPOLATE_PERSPECTIVE); + } - if ((fs_traits & FS_MASK)) { - mask_sampler = ureg_DECL_sampler(ureg); + if (has_mask) { + mask_sampler = ureg_DECL_sampler(ureg, 1); mask_pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 1, @@ -153,21 +341,63 @@ create_fs(struct pipe_context *pipe, } #if 0 /* unused right now */ - dst_sampler = ureg_DECL_sampler(ureg); + dst_sampler = ureg_DECL_sampler(ureg, 2); dst_pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 2, TGSI_INTERPOLATE_PERSPECTIVE); #endif - if ((fs_traits & FS_MASK)) { - ureg_TEX(ureg, ureg_dst(mask), + if (is_composite) { + if (has_mask) + src = ureg_DECL_temporary(ureg); + else + src = out; + ureg_TEX(ureg, src, + TGSI_TEXTURE_2D, src_input, src_sampler); + } else if (is_fill) { + if (is_solid) { + if (has_mask) + src = ureg_dst(src_input); + else + ureg_MOV(ureg, out, src_input); + } else if (is_lingrad || is_radgrad) { + struct ureg_src coords, const0124, + matrow0, matrow1, matrow2; + + if (has_mask) + src = ureg_DECL_temporary(ureg); + else + src = out; + + coords = ureg_DECL_constant(ureg); + const0124 = ureg_DECL_constant(ureg); + matrow0 = ureg_DECL_constant(ureg); + matrow1 = ureg_DECL_constant(ureg); + matrow2 = ureg_DECL_constant(ureg); + + if (is_lingrad) { + linear_gradient(ureg, src, + src_input, src_sampler, + coords, const0124, + matrow0, matrow1, matrow2); + } else if (is_radgrad) { + radial_gradient(ureg, src, + src_input, src_sampler, + coords, const0124, + matrow0, matrow1, matrow2); + } + } else + debug_assert(!"Unknown fill type!"); + } + + if (has_mask) { + mask = ureg_DECL_temporary(ureg); + ureg_TEX(ureg, mask, TGSI_TEXTURE_2D, mask_pos, mask_sampler); /* src IN mask */ - src_in_mask(ureg, out, src, mask); - } else { - ureg_TEX(ureg, out, - TGSI_TEXTURE_2D, src_pos, src_sampler); + src_in_mask(ureg, out, ureg_src(src), ureg_src(mask)); + ureg_release_temporary(ureg, mask); } ureg_END(ureg); @@ -243,9 +473,9 @@ struct xorg_shader xorg_shaders_get(struct xorg_shaders *sc, struct xorg_shader shader = {0}; void *vs, *fs; - vs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_VERTEX, + vs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_VERTEX, sc->vs_hash, vs_traits); - fs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_FRAGMENT, + fs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_FRAGMENT, sc->fs_hash, fs_traits); debug_assert(vs && fs); diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.h b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.h index 003e5d8caf4..1535a0c8c30 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.h +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.h @@ -6,16 +6,24 @@ enum xorg_vs_traits { VS_COMPOSITE = 1 << 0, VS_MASK = 1 << 1, - VS_FILL = 1 << 2 - /*VS_TRANSFORM = 1 << 3*/ + VS_SOLID_FILL = 1 << 2, + VS_LINGRAD_FILL = 1 << 3, + VS_RADGRAD_FILL = 1 << 4, + VS_FILL = (VS_SOLID_FILL | + VS_LINGRAD_FILL | + VS_RADGRAD_FILL) + /*VS_TRANSFORM = 1 << 5*/ }; enum xorg_fs_traits { FS_COMPOSITE = 1 << 0, FS_MASK = 1 << 1, - FS_FILL = 1 << 2, - FS_LINEAR_GRADIENT = 1 << 3, - FS_RADIAL_GRADIENT = 1 << 4 + FS_SOLID_FILL = 1 << 2, + FS_LINGRAD_FILL = 1 << 3, + FS_RADGRAD_FILL = 1 << 4, + FS_FILL = (FS_SOLID_FILL | + FS_LINGRAD_FILL | + FS_RADGRAD_FILL) }; struct xorg_shader { diff --git a/src/gallium/state_trackers/xorg/xorg_winsys.h b/src/gallium/state_trackers/xorg/xorg_winsys.h index d523080e90f..47ee4b9ffd8 100644 --- a/src/gallium/state_trackers/xorg/xorg_winsys.h +++ b/src/gallium/state_trackers/xorg/xorg_winsys.h @@ -37,7 +37,6 @@ #include "xorg-server.h" #include "xf86.h" -#include "xf86Resources.h" #include "pciaccess.h" #ifndef XSERVER_LIBPCIACCESS @@ -46,6 +45,5 @@ void xorg_tracker_set_functions(ScrnInfoPtr scrn); const OptionInfoRec * xorg_tracker_available_options(int chipid, int busid); -void xorg_tracker_loader_ref_sym_lists(void); #endif diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript index 6c00861f517..f9738110723 100644 --- a/src/gallium/winsys/drm/intel/dri/SConscript +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -12,8 +12,9 @@ drivers = [ trace, ] -env.SharedLibrary( +env.LoadableModule( target ='i915_dri.so', source = COMMON_GALLIUM_SOURCES, LIBS = drivers + mesa + auxiliaries + env['LIBS'], + SHLIBPREFIX = '', ) diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c index 77b3fec17a4..ebd1b607b78 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c @@ -12,6 +12,9 @@ #define INTEL_BATCH_NO_CLIPRECTS 0x1 #define INTEL_BATCH_CLIPRECTS 0x2 +#undef INTEL_RUN_SYNC +#undef INTEL_MAP_BATCHBUFFER + struct intel_drm_batchbuffer { struct intel_batchbuffer base; @@ -38,8 +41,11 @@ intel_drm_batchbuffer_reset(struct intel_drm_batchbuffer *batch) "gallium3d_batchbuffer", batch->actual_size, 4096); + +#ifdef INTEL_MAP_BATCHBUFFER drm_intel_bo_map(batch->bo, TRUE); batch->base.map = batch->bo->virtual; +#endif memset(batch->base.map, 0, batch->actual_size); batch->base.ptr = batch->base.map; @@ -53,17 +59,21 @@ intel_drm_batchbuffer_create(struct intel_winsys *iws) struct intel_drm_winsys *idws = intel_drm_winsys(iws); struct intel_drm_batchbuffer *batch = CALLOC_STRUCT(intel_drm_batchbuffer); + batch->actual_size = idws->max_batch_size; + +#ifdef INTEL_MAP_BATCHBUFFER batch->base.map = NULL; +#else + batch->base.map = MALLOC(batch->actual_size); +#endif batch->base.ptr = NULL; batch->base.size = 0; batch->base.relocs = 0; - batch->base.max_relocs = 100;/*INTEL_DEFAULT_RELOCS;*/ + batch->base.max_relocs = 300;/*INTEL_DEFAULT_RELOCS;*/ batch->base.iws = iws; - batch->actual_size = idws->max_batch_size; - intel_drm_batchbuffer_reset(batch); return &batch->base; @@ -154,7 +164,11 @@ intel_drm_batchbuffer_flush(struct intel_batchbuffer *ibatch, used = batch->base.ptr - batch->base.map; +#ifdef INTEL_MAP_BATCHBUFFER drm_intel_bo_unmap(batch->bo); +#else + drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); +#endif /* Do the sending to HW */ ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); @@ -172,19 +186,20 @@ intel_drm_batchbuffer_flush(struct intel_batchbuffer *ibatch, drm_intel_bo_unmap(batch->bo); } else { - /* TODO figgure out why the gpu hangs if we don't run sync */ +#ifdef INTEL_RUN_SYNC drm_intel_bo_map(batch->bo, FALSE); drm_intel_bo_unmap(batch->bo); +#endif } if (fence) { ibatch->iws->fence_reference(ibatch->iws, fence, NULL); -#if 0 - (*fence) = intel_drm_fence_create(batch->bo); -#else +#ifdef INTEL_RUN_SYNC /* we run synced to GPU so just pass null */ (*fence) = intel_drm_fence_create(NULL); +#else + (*fence) = intel_drm_fence_create(batch->bo); #endif } @@ -199,7 +214,10 @@ intel_drm_batchbuffer_destroy(struct intel_batchbuffer *ibatch) if (batch->bo) drm_intel_bo_unreference(batch->bo); - free(batch); +#ifndef INTEL_MAP_BATCHBUFFER + FREE(batch->base.map); +#endif + FREE(batch); } void intel_drm_winsys_init_batchbuffer_functions(struct intel_drm_winsys *idws) diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c index e017cd2e982..0030f915a36 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c @@ -28,6 +28,7 @@ intel_drm_buffer_create(struct intel_winsys *iws, } else if (type == INTEL_NEW_VERTEX) { name = "gallium3d_vertex"; pool = idws->pools.gem; + buf->map_gtt = TRUE; } else if (type == INTEL_NEW_SCANOUT) { name = "gallium3d_scanout"; pool = idws->pools.gem; @@ -109,6 +110,18 @@ intel_drm_buffer_unmap(struct intel_winsys *iws, drm_intel_bo_unmap(intel_bo(buffer)); } +static int +intel_drm_buffer_write(struct intel_winsys *iws, + struct intel_buffer *buffer, + const void *data, + size_t size, + size_t offset) +{ + struct intel_drm_buffer *buf = intel_drm_buffer(buffer); + + return drm_intel_bo_subdata(buf->bo, offset, size, (void*)data); +} + static void intel_drm_buffer_destroy(struct intel_winsys *iws, struct intel_buffer *buffer) @@ -130,5 +143,6 @@ intel_drm_winsys_init_buffer_functions(struct intel_drm_winsys *idws) idws->base.buffer_set_fence_reg = intel_drm_buffer_set_fence_reg; idws->base.buffer_map = intel_drm_buffer_map; idws->base.buffer_unmap = intel_drm_buffer_unmap; + idws->base.buffer_write = intel_drm_buffer_write; idws->base.buffer_destroy = intel_drm_buffer_destroy; } diff --git a/src/gallium/winsys/drm/intel/xorg/intel_xorg.c b/src/gallium/winsys/drm/intel/xorg/intel_xorg.c index 28107f4b806..369dc356cf8 100644 --- a/src/gallium/winsys/drm/intel/xorg/intel_xorg.c +++ b/src/gallium/winsys/drm/intel/xorg/intel_xorg.c @@ -47,8 +47,8 @@ static SymTabRec intel_xorg_chipsets[] = { }; static PciChipsets intel_xorg_pci_devices[] = { - {PCI_MATCH_ANY, PCI_MATCH_ANY, RES_SHARED_VGA}, - {-1, -1, RES_UNDEFINED} + {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL}, + {-1, -1, NULL} }; static XF86ModuleVersionInfo intel_xorg_version = { @@ -105,12 +105,6 @@ intel_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) xf86AddDriver(&modesetting, module, HaveDriverFuncs); /* - * Tell the loader about symbols from other modules that this module - * might refer to. - */ - xorg_tracker_loader_ref_sym_lists(); - - /* * The return value must be non-NULL on success even though there * is no TearDownProc. */ diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 07551e7cd16..7bf23cba236 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -32,6 +32,8 @@ #include "radeon_buffer.h" +#include "radeon_bo_gem.h" + static const char *radeon_get_name(struct pipe_winsys *ws) { return "Radeon/GEM+KMS"; @@ -99,6 +101,7 @@ static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws, unsigned height, enum pipe_format format, unsigned usage, + unsigned tex_usage, unsigned *stride) { struct pipe_format_block block; @@ -134,8 +137,11 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, (struct radeon_pipe_buffer*)buffer; int write = 0; - if (!(flags & PIPE_BUFFER_USAGE_DONTBLOCK)) { - radeon_bo_wait(radeon_buffer->bo); + if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { + uint32_t domain; + + if (radeon_bo_is_busy(radeon_buffer->bo, &domain)) + return NULL; } if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) { write = 1; @@ -187,7 +193,6 @@ static void radeon_flush_frontbuffer(struct pipe_winsys *pipe_winsys, struct radeon_winsys* radeon_pipe_winsys(int fd) { struct radeon_winsys* radeon_ws; - struct radeon_bo_manager* bom; radeon_ws = CALLOC_STRUCT(radeon_winsys); if (radeon_ws == NULL) { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 47376a0f07b..a4011db0b87 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -98,7 +98,7 @@ struct pipe_buffer* radeon_buffer_from_handle(struct drm_api* api, return &radeon_buffer->base; } -struct pipe_texture* +static struct pipe_texture* radeon_texture_from_shared_handle(struct drm_api *api, struct pipe_screen *screen, struct pipe_texture *templ, @@ -116,20 +116,22 @@ radeon_texture_from_shared_handle(struct drm_api *api, return screen->texture_blanket(screen, templ, &stride, buffer); } -boolean radeon_shared_handle_from_texture(struct drm_api *api, - struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned *stride, - unsigned *handle) +static boolean radeon_shared_handle_from_texture(struct drm_api *api, + struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned *stride, + unsigned *handle) { int retval, fd; struct drm_gem_flink flink; struct radeon_pipe_buffer* radeon_buffer; - struct pipe_buffer* buffer = &radeon_buffer->base; - if (!radeon_buffer_from_texture(api, texture, buffer, stride)) { + struct pipe_buffer *buffer; + + if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { return FALSE; } + radeon_buffer = (struct radeon_pipe_buffer*)buffer; if (!radeon_buffer->flinked) { fd = ((struct radeon_winsys*)screen->winsys)->priv->fd; @@ -150,11 +152,11 @@ boolean radeon_shared_handle_from_texture(struct drm_api *api, return TRUE; } -boolean radeon_local_handle_from_texture(struct drm_api *api, - struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned *stride, - unsigned *handle) +static boolean radeon_local_handle_from_texture(struct drm_api *api, + struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned *stride, + unsigned *handle) { struct pipe_buffer *buffer; if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index d7238762219..d2d84f1a8f0 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -137,7 +137,7 @@ static void do_ioctls(struct r300_winsys* winsys, int fd) int target = 0; int retval; - info.value = ⌖ + info.value = (unsigned long)⌖ /* First, get the number of pixel pipes */ info.request = RADEON_INFO_NUM_GB_PIPES; diff --git a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c index f2c6ee5f9a9..837f2aa8fec 100644 --- a/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c +++ b/src/gallium/winsys/drm/radeon/xorg/radeon_xorg.c @@ -48,8 +48,8 @@ static SymTabRec radeon_xorg_chipsets[] = { }; static PciChipsets radeon_xorg_pci_devices[] = { - {PCI_MATCH_ANY, PCI_MATCH_ANY, RES_SHARED_VGA}, - {-1, -1, RES_UNDEFINED} + {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL}, + {-1, -1, NULL} }; static XF86ModuleVersionInfo radeon_xorg_version = { @@ -106,12 +106,6 @@ radeon_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) xf86AddDriver(&modesetting, module, HaveDriverFuncs); /* - * Tell the loader about symbols from other modules that this module - * might refer to. - */ - xorg_tracker_loader_ref_sym_lists(); - - /* * The return value must be non-NULL on success even though there * is no TearDownProc. */ diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 33826524d7a..66120a6a983 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -166,6 +166,7 @@ gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, unsigned usage, + unsigned tex_usage, unsigned *stride) { const unsigned alignment = 64; diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index 518fd2b5a84..467d595d33b 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -30,12 +30,12 @@ if env['platform'] == 'linux' \ drivers += [softpipe] if 'llvmpipe' in env['drivers']: - env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') - env.Tool('udis86') - env.ParseConfig('llvm-config --libs jit interpreter nativecodegen') - env['LINK'] = env['CXX'] - sources += ['xlib_llvmpipe.c'] - drivers += [llvmpipe] + env.Tool('llvm') + if 'LLVM_VERSION' in env: + env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') + env.Tool('udis86') + sources += ['xlib_llvmpipe.c'] + drivers += [llvmpipe] if 'i965simple' in env['drivers']: env.Append(CPPDEFINES = 'GALLIUM_I965SIMPLE') diff --git a/src/gallium/winsys/xlib/xlib_brw_screen.c b/src/gallium/winsys/xlib/xlib_brw_screen.c index 6f3861e2cd6..ef545796f3c 100644 --- a/src/gallium/winsys/xlib/xlib_brw_screen.c +++ b/src/gallium/winsys/xlib/xlib_brw_screen.c @@ -365,7 +365,7 @@ void xlib_brw_buffer_subdata_typed( struct pipe_winsys *pws, unsigned data_type ) { unsigned aub_type = DW_GENERAL_STATE; - unsigned aub_sub_type; + unsigned aub_sub_type = 0; switch (data_type) { case BRW_CC_VP: |