diff options
Diffstat (limited to 'src/gallium')
92 files changed, 3841 insertions, 834 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 60ffa188df6..c79c56debd6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2756,19 +2756,32 @@ exec_instruction( if (mach->ExecMask) { /* do the call */ - /* push the Cond, Loop, Cont stacks */ + /* First, record the depths of the execution stacks. + * This is important for deeply nested/looped return statements. + * We have to unwind the stacks by the correct amount. For a + * real code generator, we could determine the number of entries + * to pop off each stack with simple static analysis and avoid + * implementing this data structure at run time. + */ + mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; + mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; + mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; + + mach->CallStackTop++; + + /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; + /* Finally, jump to the subroutine */ *pc = inst->InstructionExtLabel.Label; } break; @@ -2785,18 +2798,24 @@ exec_instruction( *pc = -1; return; } - *pc = mach->CallStack[--mach->CallStackTop]; - /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->CallStackTop > 0); + mach->CallStackTop--; + + mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; + mach->CondMask = mach->CondStack[mach->CondStackTop]; + + mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; + mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; + + mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; + mach->ContMask = mach->ContStack[mach->ContStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; + UPDATE_EXEC_MASK(mach); } break; @@ -3245,7 +3264,6 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; - mach->CondStackTop = 0; /* temporarily subvert this assertion */ assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 3baa94dbdde..c72f76809d4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -186,6 +186,17 @@ struct tgsi_exec_labels */ #define TGSI_EXEC_MAX_CONST_BUFFER 4096 + +/** function call/activation record */ +struct tgsi_call_record +{ + uint CondStackTop; + uint LoopStackTop; + uint ContStackTop; + uint ReturnAddr; +}; + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -249,7 +260,7 @@ struct tgsi_exec_machine int FuncStackTop; /** Function call stack for saving/restoring the program counter */ - uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING]; int CallStackTop; struct tgsi_full_instruction *Instructions; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 8a13885da9b..53e13b30e63 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -358,7 +358,7 @@ epilog( boolean tgsi_sanity_check( - struct tgsi_token *tokens ) + const struct tgsi_token *tokens ) { struct sanity_check_ctx ctx; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h index ca45e94c7ad..52263ff8832 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -40,7 +40,7 @@ extern "C" { */ boolean tgsi_sanity_check( - struct tgsi_token *tokens ); + const struct tgsi_token *tokens ); #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 3cdf8b9f359..501fc05e729 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -39,8 +39,9 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" -#include "tgsi_exec.h" -#include "tgsi_sse2.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_sse2.h" #include "rtasm/rtasm_x86sse.h" @@ -1360,6 +1361,32 @@ emit_store( const struct tgsi_full_instruction *inst, unsigned chan_index ) { + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + sse_maxps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + + sse_minps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ) ); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } + + switch( reg->DstRegister.File ) { case TGSI_FILE_OUTPUT: emit_output( @@ -1388,19 +1415,6 @@ emit_store( default: assert( 0 ); } - - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - - case TGSI_SAT_ZERO_ONE: - /* assert( 0 ); */ - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - } } #define STORE( FUNC, INST, XMM, INDEX, CHAN )\ @@ -1747,14 +1761,6 @@ emit_instruction( if (indirect_temp_reference(inst)) return FALSE; - /* we don't handle saturation/clamping yet */ - if (inst->Instruction.Saturate != TGSI_SAT_NONE) - return FALSE; - - /* need to use extra temps to fix SOA dependencies : */ - if (tgsi_check_soa_dependencies(inst)) - return FALSE; - switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { @@ -1768,8 +1774,10 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - STORE( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 4 + chan_index, 0, chan_index ); + } + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 4 + chan_index, 0, chan_index ); } break; @@ -2929,6 +2937,22 @@ tgsi_emit_sse2( parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? "vertex shader" : "fragment shader"); } + + if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { + uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; + + /* XXX: we only handle src/dst aliasing in a few opcodes + * currently. Need to use an additional temporay to hold + * the result in the cases where the code is too opaque to + * fix. + */ + if (opcode != TGSI_OPCODE_MOV && + opcode != TGSI_OPCODE_SWZ) { + debug_printf("Warning: src/dst aliasing in instruction" + " is not handled:\n"); + tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1); + } + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index f7096bd8e2c..654426a903d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -31,6 +31,7 @@ #include "tgsi/tgsi_ureg.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_sanity.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -70,6 +71,7 @@ struct ureg_tokens { #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 #define UREG_MAX_ADDR 2 @@ -86,8 +88,10 @@ struct ureg_program unsigned semantic_name; unsigned semantic_index; unsigned interp; - } input[UREG_MAX_INPUT]; - unsigned nr_inputs; + } fs_input[UREG_MAX_INPUT]; + unsigned nr_fs_inputs; + + unsigned vs_inputs[UREG_MAX_INPUT/32]; struct { unsigned semantic_name; @@ -107,9 +111,13 @@ struct ureg_program unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; - unsigned nr_addrs; + struct { + unsigned first; + unsigned last; + } constant_range[UREG_MAX_CONSTANT_RANGE]; + unsigned nr_constant_ranges; - unsigned nr_constants; + unsigned nr_addrs; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -119,6 +127,9 @@ static union tgsi_any_token error_tokens[32]; static void tokens_error( struct ureg_tokens *tokens ) { + if (tokens->tokens && tokens->tokens != error_tokens) + FREE(tokens->tokens); + tokens->tokens = error_tokens; tokens->size = Elements(error_tokens); tokens->count = 0; @@ -228,25 +239,25 @@ ureg_src_register( unsigned file, -static struct ureg_src -ureg_DECL_input( struct ureg_program *ureg, - unsigned name, - unsigned index, - unsigned interp_mode ) +struct ureg_src +ureg_DECL_fs_input( struct ureg_program *ureg, + unsigned name, + unsigned index, + unsigned interp_mode ) { unsigned i; - for (i = 0; i < ureg->nr_inputs; i++) { - if (ureg->input[i].semantic_name == name && - ureg->input[i].semantic_index == index) + for (i = 0; i < ureg->nr_fs_inputs; i++) { + if (ureg->fs_input[i].semantic_name == name && + ureg->fs_input[i].semantic_index == index) goto out; } - if (ureg->nr_inputs < UREG_MAX_INPUT) { - ureg->input[i].semantic_name = name; - ureg->input[i].semantic_index = index; - ureg->input[i].interp = interp_mode; - ureg->nr_inputs++; + if (ureg->nr_fs_inputs < UREG_MAX_INPUT) { + ureg->fs_input[i].semantic_name = name; + ureg->fs_input[i].semantic_index = index; + ureg->fs_input[i].interp = interp_mode; + ureg->nr_fs_inputs++; } else { set_bad( ureg ); @@ -257,25 +268,14 @@ out: } - -struct ureg_src -ureg_DECL_fs_input( struct ureg_program *ureg, - unsigned name, - unsigned index, - unsigned interp ) -{ - assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); - return ureg_DECL_input( ureg, name, index, interp ); -} - - struct ureg_src ureg_DECL_vs_input( struct ureg_program *ureg, - unsigned name, unsigned index ) { assert(ureg->processor == TGSI_PROCESSOR_VERTEX); - return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT ); + + ureg->vs_inputs[index/32] |= 1 << (index % 32); + return ureg_src_register( TGSI_FILE_INPUT, index ); } @@ -313,9 +313,57 @@ out: * value or manage any constant_buffer contents -- that's the * resposibility of the calling code. */ -struct ureg_src ureg_DECL_constant(struct ureg_program *ureg ) +struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, + unsigned index ) { - return ureg_src_register( TGSI_FILE_CONSTANT, ureg->nr_constants++ ); + unsigned minconst = index, maxconst = index; + unsigned i; + + /* Inside existing range? + */ + for (i = 0; i < ureg->nr_constant_ranges; i++) { + if (ureg->constant_range[i].first <= index && + ureg->constant_range[i].last >= index) + goto out; + } + + /* Extend existing range? + */ + for (i = 0; i < ureg->nr_constant_ranges; i++) { + if (ureg->constant_range[i].last == index - 1) { + ureg->constant_range[i].last = index; + goto out; + } + + if (ureg->constant_range[i].first == index + 1) { + ureg->constant_range[i].first = index; + goto out; + } + + minconst = MIN2(minconst, ureg->constant_range[i].first); + maxconst = MAX2(maxconst, ureg->constant_range[i].last); + } + + /* Create new range? + */ + if (ureg->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { + i = ureg->nr_constant_ranges++; + ureg->constant_range[i].first = index; + ureg->constant_range[i].last = index; + } + + /* Collapse all ranges down to one: + */ + i = 0; + ureg->constant_range[0].first = minconst; + ureg->constant_range[0].last = maxconst; + ureg->nr_constant_ranges = 1; + +out: + assert(i < ureg->nr_constant_ranges); + assert(ureg->constant_range[i].first <= index); + assert(ureg->constant_range[i].last >= index); + return ureg_src_register( TGSI_FILE_CONSTANT, index ); } @@ -566,6 +614,19 @@ ureg_emit_dst( struct ureg_program *ureg, } +static void validate( unsigned opcode, + unsigned nr_dst, + unsigned nr_src ) +{ +#ifdef DEBUG + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); + assert(info); + if(info) { + assert(nr_dst == info->num_dst); + assert(nr_src == info->num_src); + } +#endif +} unsigned ureg_emit_insn(struct ureg_program *ureg, @@ -576,6 +637,8 @@ ureg_emit_insn(struct ureg_program *ureg, { union tgsi_any_token *out; + validate( opcode, num_dst, num_src ); + out = get_tokens( ureg, DOMAIN_INSN, 1 ); out[0].value = 0; out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION; @@ -678,17 +741,6 @@ ureg_insn(struct ureg_program *ureg, unsigned insn, i; boolean saturate; -#ifdef DEBUG - { - const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); - assert(info); - if(info) { - assert(nr_dst == info->num_dst); - assert(nr_src == info->num_src); - } - } -#endif - saturate = nr_dst ? dst[0].Saturate : FALSE; insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); @@ -702,6 +754,53 @@ ureg_insn(struct ureg_program *ureg, ureg_fixup_insn_size( ureg, insn ); } +void +ureg_tex_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_dst *dst, + unsigned nr_dst, + unsigned target, + const struct ureg_src *src, + unsigned nr_src ) +{ + unsigned insn, i; + boolean saturate; + + saturate = nr_dst ? dst[0].Saturate : FALSE; + + insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); + + ureg_emit_texture( ureg, insn, target ); \ + + for (i = 0; i < nr_dst; i++) + ureg_emit_dst( ureg, dst[i] ); + + for (i = 0; i < nr_src; i++) + ureg_emit_src( ureg, src[i] ); + + ureg_fixup_insn_size( ureg, insn ); +} + + +void +ureg_label_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_src *src, + unsigned nr_src, + unsigned *label_token ) +{ + unsigned insn, i; + + insn = ureg_emit_insn( ureg, opcode, FALSE, 0, nr_src ); + + ureg_emit_label( ureg, insn, label_token ); \ + + for (i = 0; i < nr_src; i++) + ureg_emit_src( ureg, src[i] ); + + ureg_fixup_insn_size( ureg, insn ); +} + static void emit_decl( struct ureg_program *ureg, @@ -777,13 +876,22 @@ static void emit_decls( struct ureg_program *ureg ) { unsigned i; - for (i = 0; i < ureg->nr_inputs; i++) { - emit_decl( ureg, - TGSI_FILE_INPUT, - i, - ureg->input[i].semantic_name, - ureg->input[i].semantic_index, - ureg->input[i].interp ); + if (ureg->processor == TGSI_PROCESSOR_VERTEX) { + for (i = 0; i < UREG_MAX_INPUT; i++) { + if (ureg->vs_inputs[i/32] & (1 << (i%32))) { + emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); + } + } + } + else { + for (i = 0; i < ureg->nr_fs_inputs; i++) { + emit_decl( ureg, + TGSI_FILE_INPUT, + i, + ureg->fs_input[i].semantic_name, + ureg->fs_input[i].semantic_index, + ureg->fs_input[i].interp ); + } } for (i = 0; i < ureg->nr_outputs; i++) { @@ -801,10 +909,13 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler[i].Index, 1 ); } - if (ureg->nr_constants) { - emit_decl_range( ureg, - TGSI_FILE_CONSTANT, - 0, ureg->nr_constants ); + if (ureg->nr_constant_ranges) { + for (i = 0; i < ureg->nr_constant_ranges; i++) + emit_decl_range( ureg, + TGSI_FILE_CONSTANT, + ureg->constant_range[i].first, + (ureg->constant_range[i].last + 1 - + ureg->constant_range[i].first) ); } if (ureg->nr_temps) { @@ -890,6 +1001,15 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) ureg->domain[DOMAIN_DECL].count); tgsi_dump( tokens, 0 ); } + +#if DEBUG + if (tokens && !tgsi_sanity_check(tokens)) { + debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n"); + tgsi_dump(tokens, 0); + assert(0); + } +#endif + return tokens; } @@ -911,6 +1031,25 @@ void *ureg_create_shader( struct ureg_program *ureg, } +const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, + unsigned *nr_tokens ) +{ + const struct tgsi_token *tokens; + + ureg_finalize(ureg); + + tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; + + if (nr_tokens) + *nr_tokens = ureg->domain[DOMAIN_DECL].size; + + ureg->domain[DOMAIN_DECL].tokens = 0; + ureg->domain[DOMAIN_DECL].size = 0; + ureg->domain[DOMAIN_DECL].order = 0; + ureg->domain[DOMAIN_DECL].count = 0; + + return tokens; +} struct ureg_program *ureg_create( unsigned processor ) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index acbca59040c..f04f443b9e7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -82,10 +82,21 @@ ureg_create( unsigned processor ); const struct tgsi_token * ureg_finalize( struct ureg_program * ); +/* Create and return a shader: + */ void * ureg_create_shader( struct ureg_program *, struct pipe_context *pipe ); + +/* Alternately, return the built token stream and hand ownership of + * that memory to the caller: + */ +const struct tgsi_token * +ureg_get_tokens( struct ureg_program *ureg, + unsigned *nr_tokens ); + + void ureg_destroy( struct ureg_program * ); @@ -116,8 +127,7 @@ ureg_DECL_fs_input( struct ureg_program *, struct ureg_src ureg_DECL_vs_input( struct ureg_program *, - unsigned semantic_name, - unsigned semantic_index ); + unsigned index ); struct ureg_dst ureg_DECL_output( struct ureg_program *, @@ -130,7 +140,8 @@ ureg_DECL_immediate( struct ureg_program *, unsigned nr ); struct ureg_src -ureg_DECL_constant( struct ureg_program * ); +ureg_DECL_constant( struct ureg_program *, + unsigned index ); struct ureg_dst ureg_DECL_temporary( struct ureg_program * ); @@ -233,6 +244,24 @@ ureg_insn(struct ureg_program *ureg, unsigned nr_src ); +void +ureg_tex_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_dst *dst, + unsigned nr_dst, + unsigned target, + const struct ureg_src *src, + unsigned nr_src ); + + +void +ureg_label_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_src *src, + unsigned nr_src, + unsigned *label); + + /*********************************************************************** * Internal instruction helpers, don't call these directly: */ diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index d42b65ce281..1380d98d7ee 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -88,6 +88,7 @@ _debug_printf(const char *format, ...) * - avoid outputing large strings (512 bytes is the current maximum length * that is guaranteed to be printed in all platforms) */ +#if !defined(PIPE_OS_HAIKU) static INLINE void debug_printf(const char *format, ...) { @@ -101,6 +102,7 @@ debug_printf(const char *format, ...) #endif } +#endif /* !PIPE_OS_HAIKU */ /* * ... isn't portable so we need to pass arguments in parentheses. diff --git a/src/gallium/auxiliary/util/u_fifo.h b/src/gallium/auxiliary/util/u_fifo.h new file mode 100644 index 00000000000..9e007de1ada --- /dev/null +++ b/src/gallium/auxiliary/util/u_fifo.h @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_FIFO_H +#define U_FIFO_H + +#include "util/u_memory.h" + +struct util_fifo +{ + size_t head; + size_t tail; + size_t num; + size_t size; +}; + +static INLINE struct util_fifo * +u_fifo_create(size_t size) +{ + struct util_fifo *fifo; + fifo = MALLOC(sizeof(*fifo) + size * sizeof(void*)); + + fifo->head = 0; + fifo->tail = 0; + fifo->num = 0; + fifo->size = size; + + return fifo; +} + +static INLINE boolean +u_fifo_add(struct util_fifo *fifo, void *ptr) +{ + void **array = (void**)&fifo[1]; + if (fifo->num >= fifo->size) + return FALSE; + + if (++fifo->head >= fifo->size) + fifo->head = 0; + + array[fifo->head] = ptr; + + ++fifo->num; + + return TRUE; +} + +static INLINE boolean +u_fifo_pop(struct util_fifo *fifo, void **ptr) +{ + void **array = (void**)&fifo[1]; + + if (!fifo->num) + return FALSE; + + if (++fifo->tail >= fifo->size) + fifo->tail = 0; + + *ptr = array[fifo->tail]; + + ++fifo->num; + + return TRUE; +} + +static INLINE void +u_fifo_destroy(struct util_fifo *fifo) +{ + FREE(fifo); +} + +#endif diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 00a46d0cc48..6e82983e586 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -14,10 +14,10 @@ PIPE_FORMAT_L16_UNORM , arith , 1, 1, un16, , , , xxx1, PIPE_FORMAT_Z16_UNORM , array , 1, 1, un16, , , , x___, zs PIPE_FORMAT_Z32_UNORM , array , 1, 1, un32, , , , x___, zs PIPE_FORMAT_Z32_FLOAT , array , 1, 1, f32 , , , , x___, zs -PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs -PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs -PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un8 , un24, , , y___, zs -PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs +PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs +PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un8 , un24, , , y___, zs PIPE_FORMAT_S8_UNORM , array , 1, 1, un8 , , , , _x__, zs PIPE_FORMAT_R64_FLOAT , array , 1, 1, f64 , , , , x001, rgb PIPE_FORMAT_R64G64_FLOAT , array , 1, 1, f64 , f64 , , , xy01, rgb diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 4c6c2bc00e1..b12c97dfb4d 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -341,6 +341,16 @@ util_is_inf_or_nan(float x) /** + * Test whether x is a power of two. + */ +static INLINE boolean +util_is_pot(unsigned x) +{ + return (x & (x - 1)) == 0; +} + + +/** * Find first bit set in word. Least significant bit is 1. * Return 0 if no bits set. */ diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 07d804ecdbf..bc4b7584067 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include <winsock2.h> # include <windows.h> -#elif defined(PIPE_OS_LINUX) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) # include <sys/socket.h> # include <netinet/in.h> # include <unistd.h> @@ -54,7 +54,7 @@ u_socket_close(int s) if (s < 0) return; -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) shutdown(s, SHUT_RDWR); close(s); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) @@ -169,7 +169,7 @@ u_socket_listen_on_port(uint16_t portnum) void u_socket_block(int s, boolean block) { -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) int old = fcntl(s, F_GETFL, 0); if (old == -1) return; diff --git a/src/gallium/auxiliary/util/u_network.h b/src/gallium/auxiliary/util/u_network.h index 14d3884427e..8c778f492ca 100644 --- a/src/gallium/auxiliary/util/u_network.h +++ b/src/gallium/auxiliary/util/u_network.h @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define PIPE_HAVE_SOCKETS -#elif defined(PIPE_OS_LINUX) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) # define PIPE_HAVE_SOCKETS #endif diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c index f01296b40fc..52382990155 100644 --- a/src/gallium/auxiliary/util/u_simple_screen.c +++ b/src/gallium/auxiliary/util/u_simple_screen.c @@ -52,8 +52,7 @@ pass_user_buffer_create(struct pipe_screen *screen, unsigned bytes) { struct pipe_buffer *buffer = - screen->winsys->user_buffer_create(screen->winsys, - ptr, bytes); + screen->winsys->user_buffer_create(screen->winsys, ptr, bytes); buffer->screen = screen; @@ -69,9 +68,8 @@ pass_surface_buffer_create(struct pipe_screen *screen, unsigned *stride) { struct pipe_buffer *buffer = - screen->winsys->surface_buffer_create(screen->winsys, - width, height, - format, usage, tex_usage, stride); + screen->winsys->surface_buffer_create(screen->winsys, width, height, + format, usage, tex_usage, stride); buffer->screen = screen; @@ -83,8 +81,7 @@ pass_buffer_map(struct pipe_screen *screen, struct pipe_buffer *buf, unsigned usage) { - return screen->winsys->buffer_map(screen->winsys, - buf, usage); + return screen->winsys->buffer_map(screen->winsys, buf, usage); } static void @@ -106,8 +103,7 @@ pass_flush_frontbuffer(struct pipe_screen *screen, struct pipe_surface *surf, void *context_private) { - screen->winsys->flush_frontbuffer(screen->winsys, - surf, context_private); + screen->winsys->flush_frontbuffer(screen->winsys, surf, context_private); } static void @@ -115,8 +111,7 @@ pass_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - screen->winsys->fence_reference(screen->winsys, - ptr, fence); + screen->winsys->fence_reference(screen->winsys, ptr, fence); } static int @@ -124,8 +119,7 @@ pass_fence_signalled(struct pipe_screen *screen, struct pipe_fence_handle *fence, unsigned flag) { - return screen->winsys->fence_signalled(screen->winsys, - fence, flag); + return screen->winsys->fence_signalled(screen->winsys, fence, flag); } static int @@ -133,11 +127,11 @@ pass_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, unsigned flag) { - return screen->winsys->fence_finish(screen->winsys, - fence, flag); + return screen->winsys->fence_finish(screen->winsys, fence, flag); } -void u_simple_screen_init(struct pipe_screen *screen) +void +u_simple_screen_init(struct pipe_screen *screen) { screen->buffer_create = pass_buffer_create; screen->user_buffer_create = pass_user_buffer_create; @@ -152,7 +146,8 @@ void u_simple_screen_init(struct pipe_screen *screen) screen->fence_finish = pass_fence_finish; } -const char* u_simple_screen_winsys_name(struct pipe_screen *screen) +const char * +u_simple_screen_winsys_name(struct pipe_screen *screen) { return screen->winsys->get_name(screen->winsys); } diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 1b8da9b6853..0d706f9449d 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -34,14 +34,8 @@ #include "pipe/p_context.h" -#include "util/u_debug.h" -#include "pipe/p_defines.h" -#include "pipe/p_screen.h" #include "pipe/p_shader_tokens.h" - -#include "util/u_memory.h" #include "util/u_simple_shaders.h" - #include "tgsi/tgsi_ureg.h" @@ -67,9 +61,7 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, struct ureg_src src; struct ureg_dst dst; - src = ureg_DECL_vs_input( ureg, - semantic_names[i], - semantic_indexes[i]); + src = ureg_DECL_vs_input( ureg, i ); dst = ureg_DECL_output( ureg, semantic_names[i], @@ -131,8 +123,6 @@ util_make_fragment_tex_shader(struct pipe_context *pipe ) - - /** * Make simple fragment color pass-through shader. */ diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c index d8f648e5dd1..5cd05b29047 100644 --- a/src/gallium/auxiliary/util/u_stream_stdc.c +++ b/src/gallium/auxiliary/util/u_stream_stdc.c @@ -32,7 +32,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) #include <stdio.h> diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 1235a67d264..0d6489c26e4 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -170,7 +170,7 @@ x8r8g8b8_get_tile_rgba(const unsigned *src, pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); - pRow[3] = ubyte_to_float(0xff); + pRow[3] = 1.0F; } p += dst_stride; } @@ -394,6 +394,52 @@ r5g6b5_put_tile_rgba(ushort *dst, +/*** PIPE_FORMAT_R8G8B8_UNORM ***/ + +static void +r8g8b8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = ubyte_to_float(src[0]); + pRow[1] = ubyte_to_float(src[1]); + pRow[2] = ubyte_to_float(src[2]); + pRow[3] = 1.0f; + src += 3; + } + p += dst_stride; + } +} + + +static void +r8g8b8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + dst[0] = float_to_ubyte(pRow[0]); + dst[1] = float_to_ubyte(pRow[1]); + dst[2] = float_to_ubyte(pRow[2]); + dst += 3; + } + p += src_stride; + } +} + + + /*** PIPE_FORMAT_Z16_UNORM ***/ /** @@ -1106,6 +1152,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_L8_UNORM: l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); break; @@ -1222,6 +1271,9 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; case PIPE_FORMAT_R8G8B8A8_UNORM: assert(0); break; diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index c16cdd0b226..b958a986353 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -35,7 +35,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) #include <sys/time.h> #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #include <windows.h> @@ -77,7 +77,7 @@ util_time_get_frequency(void) void util_time_get(struct util_time *t) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) gettimeofday(&t->tv, NULL); #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) LONGLONG temp; @@ -102,7 +102,7 @@ util_time_add(const struct util_time *t1, int64_t usecs, struct util_time *t2) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) @@ -124,7 +124,7 @@ int64_t util_time_diff(const struct util_time *t1, const struct util_time *t2) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) return (t2->tv.tv_usec - t1->tv.tv_usec) + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) @@ -144,7 +144,7 @@ util_time_micros( void ) util_time_get(&t1); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) util_time_get_frequency(); @@ -166,7 +166,7 @@ static INLINE int util_time_compare(const struct util_time *t1, const struct util_time *t2) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) if (t1->tv.tv_sec < t2->tv.tv_sec) return -1; else if(t1->tv.tv_sec > t2->tv.tv_sec) diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index 7a5c54d9b23..a6189a247bb 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -43,6 +43,11 @@ #include <unistd.h> /* usleep */ #endif +#if defined(PIPE_OS_HAIKU) +#include <sys/time.h> /* timeval */ +#include <unistd.h> +#endif + #include "pipe/p_compiler.h" @@ -58,7 +63,7 @@ extern "C" { */ struct util_time { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) struct timeval tv; #else int64_t counter; @@ -89,7 +94,7 @@ util_time_timeout(const struct util_time *start, const struct util_time *end, const struct util_time *curr); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) #define util_time_sleep usleep #else void diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 508f4560e48..b3a7774fd6a 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -44,6 +44,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_fifo.h" #include "i915_context.h" #include "i915_reg.h" @@ -76,8 +77,13 @@ struct i915_vbuf_render { size_t vbo_size; size_t vbo_offset; void *vbo_ptr; - size_t vbo_alloc_size; size_t vbo_max_used; + + /* stuff for the pool */ + struct util_fifo *pool_fifo; + unsigned pool_used; + unsigned pool_buffer_size; + boolean pool_not_used; }; @@ -106,33 +112,72 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render) } static boolean +i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + + if (i915_render->vbo_size < size + i915_render->vbo_offset) + return FALSE; + + if (i915->vbo_flushed) + return FALSE; + + return TRUE; +} + +static void +i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + struct intel_winsys *iws = i915->iws; + + if (i915_render->vbo) { + if (i915_render->pool_not_used) + iws->buffer_destroy(iws, i915_render->vbo); + else + u_fifo_add(i915_render->pool_fifo, i915_render->vbo); + i915_render->vbo = NULL; + } + + i915->vbo_flushed = 0; + + i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size); + i915_render->vbo_offset = 0; + + if (i915_render->vbo_size != i915_render->pool_buffer_size) { + i915_render->pool_not_used = TRUE; + i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, + INTEL_NEW_VERTEX); + } else { + i915_render->pool_not_used = FALSE; + + if (i915_render->pool_used >= 2) { + FLUSH_BATCH(NULL); + i915->vbo_flushed = 0; + i915_render->pool_used = 0; + } + u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); + } +} + +static boolean i915_vbuf_render_allocate_vertices(struct vbuf_render *render, ushort vertex_size, ushort nr_vertices) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct intel_winsys *iws = i915->iws; size_t size = (size_t)vertex_size * (size_t)nr_vertices; /* FIXME: handle failure */ assert(!i915->vbo); - if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { - } else { - i915->vbo_flushed = 0; - if (i915_render->vbo) { - iws->buffer_destroy(iws, i915_render->vbo); - i915_render->vbo = NULL; - } - } + if (!i915_vbuf_render_reserve(i915_render, size)) { - if (!i915_render->vbo) { - i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); - i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + if (i915->vbo_flushed) + i915_render->pool_used = 0; + i915_vbuf_render_new_buf(i915_render, size); } i915_render->vertex_size = vertex_size; @@ -504,6 +549,7 @@ i915_vbuf_render_create(struct i915_context *i915) { struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); struct intel_winsys *iws = i915->iws; + int i; i915_render->i915 = i915; @@ -524,14 +570,24 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; - i915_render->vbo_alloc_size = 128 * 4096; - i915_render->vbo_size = i915_render->vbo_alloc_size; + + i915_render->vbo = NULL; + i915_render->vbo_size = 0; i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + + i915_render->pool_used = FALSE; + i915_render->pool_buffer_size = 128 * 4096; + i915_render->pool_fifo = u_fifo_create(6); + for (i = 0; i < 6; i++) + u_fifo_add(i915_render->pool_fifo, + iws->buffer_create(iws, i915_render->pool_buffer_size, 64, + INTEL_NEW_VERTEX)); + +#if 0 /* TODO JB: is this realy needed? */ i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); iws->buffer_unmap(iws, i915_render->vbo); +#endif return &i915_render->base; } diff --git a/src/gallium/drivers/i915simple/intel_winsys.h b/src/gallium/drivers/i915simple/intel_winsys.h index f949f52a9ce..42c5e7470ec 100644 --- a/src/gallium/drivers/i915simple/intel_winsys.h +++ b/src/gallium/drivers/i915simple/intel_winsys.h @@ -150,6 +150,17 @@ struct intel_winsys { void (*buffer_unmap)(struct intel_winsys *iws, struct intel_buffer *buffer); + /** + * Write to a buffer. + * + * Arguments follows pwrite(2) + */ + int (*buffer_write)(struct intel_winsys *iws, + struct intel_buffer *dst, + const void *src, + size_t size, + size_t offset); + void (*buffer_destroy)(struct intel_winsys *iws, struct intel_buffer *buffer); /*@}*/ diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 6e63a0c2b76..06c586e6bb9 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -15,9 +15,11 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ + lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ lp_bld_tgsi_soa.c \ @@ -44,7 +46,8 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample.c \ + lp_tex_sample_c.c \ + lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ lp_tile_soa.c diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 498d21dea6c..89d08834a3c 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -8,13 +8,16 @@ Done so far is: - the whole fragment pipeline is code generated in a single function + - input interpolation + - depth testing + - texture sampling (not all state/formats are supported) + - fragment shader TGSI translation - same level of support as the TGSI SSE2 exec machine, with the exception we don't fallback to TGSI interpretation when an unsupported opcode is found, but just ignore it - - texture sampling via an intrinsic call - done in SoA layout - input interpolation also code generated @@ -28,16 +31,17 @@ Done so far is: any width and length - not all operations are implemented for these types yet though -Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch, -which includes hand written fast implementations for common cases. +Most mesa/progs/demos/* work. To do (probably by this order): - code generate stipple and stencil testing - - code generate texture sampling + - translate the remaining bits of texture sampling state - translate TGSI control flow instructions, and all other remaining opcodes + + - integrate with the draw module for VS code generation - code generate the triangle setup and rasterization @@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as make linux-llvm -but the rest of these instructions assume scons is used. +but the rest of these instructions assume that scons is used. Using @@ -108,6 +112,9 @@ or export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH +For performance evaluation pass debug=no to scons, and use the corresponding +lib directory without the "-debug" suffix. + Unit testing ============ @@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe: - lp_test_conv: SIMD vector conversion - lp_test_format: pixel unpacking/packing -Some of this tests can output results and benchmarks to a tab-seperated-file +Some of this tests can output results and benchmarks to a tab-separated-file for posterior analysis, e.g.: build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv @@ -133,10 +140,10 @@ Development Notes at the top of the lp_bld_*.c functions. - All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be - put in a standalone Gallium state -> LLVM IR translation module. + put in a stand-alone Gallium state -> LLVM IR translation module. - We use LLVM-C bindings for now. They are not documented, but follow the C++ interfaces very closely, and appear to be complete enough for code generation. See http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - for a standalone example. + for a stand-alone example. diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5c29bdac56e..dea4b703c45 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -3,7 +3,7 @@ Import('*') env = env.Clone() env.Tool('llvm') -if 'LLVM_VERSION' not in env: +if env.has_key('LLVM_VERSION') is False: print 'warning: LLVM not found: not building llvmpipe' Return() @@ -23,8 +23,10 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', 'lp_bld_swizzle.c', @@ -52,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample.c', + 'lp_tex_sample_c.c', + 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 09a57ff33d5..ce3e5f91c07 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld, } +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) +{ + return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); +} + + +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11) +{ + LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); + LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); + return lp_build_lerp(bld, y, v0, v1); +} + + /** * Generate min(a, b) * Do checks for special cases. @@ -566,20 +591,31 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) return a; - /* XXX: is this really necessary? */ + if(type.floating) { + /* Mask out the sign bit */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } + #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(!type.floating && type.width*type.length == 128) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - if(type.width == 8) + if(type.width*type.length == 128) { + switch(type.width) { + case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); - if(type.width == 16) + case 16: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); - if(type.width == 32) + case 32: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); + } } #endif @@ -588,6 +624,184 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef cond; + LLVMValueRef res; + + /* Handle non-zero case */ + if(!type.sign) { + /* if not zero then sign must be positive */ + res = bld->one; + } + else if(type.floating) { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef one; + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + one = LLVMConstBitCast(bld->one, int_vec_type); + res = LLVMBuildOr(bld->builder, sign, one, ""); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + else + { + LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); + res = lp_build_select(bld, cond, bld->one, minus_one); + } + + /* Handle zero */ + cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); + res = lp_build_select(bld, cond, bld->zero, bld->one); + + return res; +} + + +enum lp_build_round_sse41_mode +{ + LP_BUILD_ROUND_SSE41_NEAREST = 0, + LP_BUILD_ROUND_SSE41_FLOOR = 1, + LP_BUILD_ROUND_SSE41_CEIL = 2, + LP_BUILD_ROUND_SSE41_TRUNCATE = 3 +}; + + +static INLINE LLVMValueRef +lp_build_round_sse41(struct lp_build_context *bld, + LLVMValueRef a, + enum lp_build_round_sse41_mode mode) +{ + const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + const char *intrinsic; + + assert(type.floating); + assert(type.width*type.length == 128); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ps"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.pd"; + break; + default: + assert(0); + return bld->undef; + } + + return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, + LLVMConstInt(LLVMInt32Type(), mode, 0)); +} + + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); +} + + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) +{ + a = lp_build_floor(bld, a); + a = lp_build_int(bld, a); + return a; +} + + +LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index fc8cb25966e..5e083b847fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -72,6 +72,26 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1); + +/** + * Bilinear interpolation. + * + * Values indices are in v_{yx}. + */ +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11); + +LLVMValueRef lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); @@ -86,6 +106,34 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index c8954c8a34f..c5a71d2c726 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); - /* Fill in the empty lower bits for added precision? */ + /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; @@ -244,7 +244,7 @@ lp_build_const_pack_shuffle(unsigned n) * Expand the bit width. * * This will only change the number of bits the values are represented, not the - * values themselved. + * values themselves. */ static void lp_build_expand(LLVMBuilderRef builder, @@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder, * TODO: Handle saturation consistently. */ static LLVMValueRef -lp_build_trunc(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs) +lp_build_pack(LLVMBuilderRef builder, + union lp_type src_type, + union lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs) { LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned i; @@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder, if(tmp_type.width > dst_type.width) { assert(num_dsts == 1); - tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); + tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = 1; @@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder, if(src_type.width > dst_type.width) { assert(num_dsts == 1); - dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs); + dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); } else if(src_type.width < dst_type.width) { assert(num_srcs == 1); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 30925b5f415..59d8f492e60 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -30,10 +30,27 @@ #include <udis86.h> #endif +#include "util/u_math.h" #include "util/u_debug.h" #include "lp_bld_debug.h" +/** + * Check alignment. + * + * It is important that this check is not implemented as a macro or inlined + * function, as the compiler assumptions in respect to alignment of global + * and stack variables would often make the check a no op, defeating the + * whole purpose of the exercise. + */ +boolean +lp_check_alignment(const void *ptr, unsigned alignment) +{ + assert(util_is_pot(alignment)); + return ((uintptr_t)ptr & (alignment - 1)) == 0; +} + + void lp_disassemble(const void* func) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h index ecdafef76d0..583e6132b4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h @@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...) } +boolean +lp_check_alignment(const void *ptr, unsigned alignment); + + void lp_disassemble(const void* func); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 2cd6e6b9217..e5fe81193fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder, padding_right = 0; for(chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size; + padding_left = format_desc->block.bits - + (padding_right + format_desc->channel[z_swizzle].size); if(padding_left || padding_right) { - const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1; - const long long mask_right = ((long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right); + const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; + const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; + z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); } if(padding_left) @@ -210,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder, LLVMBuildStore(builder, dst, dst_ptr); } + /* FIXME */ assert(!state->occlusion_count); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 9d99e1a9d9f..69ed014ff3d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -32,59 +32,261 @@ */ #include "util/u_debug.h" +#include "util/u_memory.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" +#define LP_BUILD_FLOW_MAX_VARIABLES 32 +#define LP_BUILD_FLOW_MAX_DEPTH 32 + + +/** + * Enumeration of all possible flow constructs. + */ +enum lp_build_flow_construct_kind { + lP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SKIP, +}; + + +/** + * Variable declaration scope. + */ +struct lp_build_flow_scope +{ + /** Number of variables declared in this scope */ + unsigned num_variables; +}; + + +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_flow_skip +{ + /** Block to skip to */ + LLVMBasicBlockRef block; + + /** Number of variables declared at the beginning */ + unsigned num_variables; + + LLVMValueRef *phi; +}; + + +/** + * Union of all possible flow constructs' data + */ +union lp_build_flow_construct_data +{ + struct lp_build_flow_scope scope; + struct lp_build_flow_skip skip; +}; + + +/** + * Element of the flow construct stack. + */ +struct lp_build_flow_construct +{ + enum lp_build_flow_construct_kind kind; + union lp_build_flow_construct_data data; +}; + + +/** + * All necessary data to generate LLVM control flow constructs. + * + * Besides keeping track of the control flow construct themselves we also + * need to keep track of variables in order to generate SSA Phi values. + */ +struct lp_build_flow_context +{ + LLVMBuilderRef builder; + + /** + * Control flow stack. + */ + struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; + unsigned num_constructs; + + /** + * Variable stack + */ + LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; + unsigned num_variables; +}; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder) +{ + struct lp_build_flow_context *flow; + + flow = CALLOC_STRUCT(lp_build_flow_context); + if(!flow) + return NULL; + + flow->builder = builder; + + return flow; +} + + void -lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, - union lp_type type, - LLVMValueRef value) +lp_build_flow_destroy(struct lp_build_flow_context *flow) { - memset(mask, 0, sizeof *mask); + assert(flow->num_constructs == 0); + assert(flow->num_variables == 0); + FREE(flow); +} - mask->builder = builder; - mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + +static union lp_build_flow_construct_data * +lp_build_flow_push(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); + if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) + return NULL; + + flow->constructs[flow->num_constructs].kind = kind; + return &flow->constructs[flow->num_constructs++].data; +} + + +static union lp_build_flow_construct_data * +lp_build_flow_peek(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[flow->num_constructs - 1].data; } +static union lp_build_flow_construct_data * +lp_build_flow_pop(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[--flow->num_constructs].data; +} + + +/** + * Begin a variable scope. + * + * + */ void -lp_build_mask_update(struct lp_build_mask_context *mask, - LLVMValueRef value) +lp_build_flow_scope_begin(struct lp_build_flow_context *flow) { + struct lp_build_flow_scope *scope; - LLVMValueRef cond; - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; + scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(mask->value) - mask->value = LLVMBuildAnd(mask->builder, mask->value, value, ""); - else - mask->value = value; + scope->num_variables = 0; +} - /* FIXME: disabled until we have proper control flow helpers */ -#if 0 - cond = LLVMBuildICmp(mask->builder, - LLVMIntEQ, - LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""), - LLVMConstNull(mask->reg_type), - ""); - current_block = LLVMGetInsertBlock(mask->builder); +/** + * Declare a variable. + * + * A variable is a named entity which can have different LLVMValueRef's at + * different points of the program. This is relevant for control flow because + * when there are mutiple branches to a same location we need to replace + * the variable's value with a Phi function as explained in + * http://en.wikipedia.org/wiki/Static_single_assignment_form . + * + * We keep track of variables by keeping around a pointer to where their + * current. + * + * There are a few cautions to observe: + * + * - Variable's value must not be NULL. If there is no initial value then + * LLVMGetUndef() should be used. + * + * - Variable's value must be kept up-to-date. If the variable is going to be + * modified by a function then a pointer should be passed so that its value + * is accurate. Failure to do this will cause some of the variables' + * transient values to be lost, leading to wrong results. + * + * - A program should be written from top to bottom, by always appending + * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or + * modifying existing statements will most likely lead to wrong results. + * + */ +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(!mask->skip_block) { - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - mask->skip_block = LLVMAppendBasicBlock(function, "skip"); + assert(*variable); + if(!*variable) + return; + + assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); + if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) + return; + + flow->variables[flow->num_variables++] = variable; + ++scope->num_variables; +} + + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), ""); + assert(flow->num_variables >= scope->num_variables); + if(flow->num_variables < scope->num_variables) { + flow->num_variables = 0; + return; } + flow->num_variables -= scope->num_variables; +} + + +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + current_block = LLVMGetInsertBlock(flow->builder); + next_block = LLVMGetNextBasicBlock(current_block); - assert(next_block); if(next_block) { new_block = LLVMInsertBasicBlock(next_block, ""); } @@ -93,30 +295,148 @@ lp_build_mask_update(struct lp_build_mask_context *mask, new_block = LLVMAppendBasicBlock(function, ""); } - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block); + return new_block; +} + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBuilderRef builder; + unsigned i; + + skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + skip->block = lp_build_flow_insert_block(flow); + skip->num_variables = flow->num_variables; + if(!skip->num_variables) { + skip->phi = NULL; + return; + } - LLVMPositionBuilderAtEnd(mask->builder, new_block); -#endif + skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); + if(!skip->phi) { + skip->num_variables = 0; + return; + } + + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, skip->block); + + for(i = 0; i < skip->num_variables; ++i) + skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + LLVMDisposeBuilder(builder); } -LLVMValueRef -lp_build_mask_end(struct lp_build_mask_context *mask) +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond) { - if(mask->skip_block) { - LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder); + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef new_block; + unsigned i; - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildBr(mask->builder, mask->skip_block); + skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; - LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block); + current_block = LLVMGetInsertBlock(flow->builder); - mask->value = mask->phi; - mask->phi = NULL; - mask->skip_block = NULL; + new_block = lp_build_flow_insert_block(flow); + + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); } + LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + + LLVMPositionBuilderAtEnd(flow->builder, new_block); + } + + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + unsigned i; + + skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + current_block = LLVMGetInsertBlock(flow->builder); + + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + *flow->variables[i] = skip->phi[i]; + } + + LLVMBuildBr(flow->builder, skip->block); + LLVMPositionBuilderAtEnd(flow->builder, skip->block); + + FREE(skip->phi); +} + + +static void +lp_build_mask_check(struct lp_build_mask_context *mask) +{ + LLVMBuilderRef builder = mask->flow->builder; + LLVMValueRef cond; + + cond = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMConstNull(mask->reg_type), + ""); + + lp_build_flow_skip_cond_break(mask->flow, cond); +} + + +void +lp_build_mask_begin(struct lp_build_mask_context *mask, + struct lp_build_flow_context *flow, + union lp_type type, + LLVMValueRef value) +{ + memset(mask, 0, sizeof *mask); + + mask->flow = flow; + mask->reg_type = LLVMIntType(type.width * type.length); + mask->value = value; + + lp_build_flow_scope_begin(flow); + lp_build_flow_scope_declare(flow, &mask->value); + lp_build_flow_skip_begin(flow); + + lp_build_mask_check(mask); +} + + +void +lp_build_mask_update(struct lp_build_mask_context *mask, + LLVMValueRef value) +{ + mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); + + lp_build_mask_check(mask); +} + + +LLVMValueRef +lp_build_mask_end(struct lp_build_mask_context *mask) +{ + lp_build_flow_skip_end(mask->flow); + lp_build_flow_scope_end(mask->flow); return mask->value; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 1b634ff038d..9d76e3064dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -41,23 +41,49 @@ union lp_type; +struct lp_build_flow_context; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder); + +void +lp_build_flow_destroy(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable); + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond); + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow); + + struct lp_build_mask_context { - LLVMBuilderRef builder; + struct lp_build_flow_context *flow; LLVMTypeRef reg_type; LLVMValueRef value; - - LLVMValueRef phi; - - LLVMBasicBlockRef skip_block; }; void lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, + struct lp_build_flow_context *flow, union lp_type type, LLVMValueRef value); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 01c8a752d18..5ee06560932 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -31,20 +31,14 @@ /** * @file - * LLVM IR building helpers interfaces. - * - * We use LLVM-C bindings for now. They are not documented, but follow the C++ - * interfaces very closely, and appear to be complete enough for code - * genration. See - * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - * for a standalone example. + * Pixel format helpers. */ #include <llvm-c/Core.h> - -#include "pipe/p_format.h" +#include "pipe/p_format.h" +struct util_format_description; union lp_type; @@ -56,9 +50,9 @@ union lp_type; * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed); /** @@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba); /** @@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr); /** @@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba); + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba); #endif /* !LP_BLD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index dcbc0076c7d..b9b5d84bed5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -32,9 +32,9 @@ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed) +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed) { const struct util_format_description *desc; LLVMTypeRef type; @@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba) +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr) { const struct util_format_description *desc; LLVMTypeRef type; @@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder, packed = LLVMBuildLoad(builder, ptr, ""); - return lp_build_unpack_rgba(builder, format, packed); + return lp_build_unpack_rgba_aos(builder, format, packed); } void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder, type = LLVMIntType(desc->block.bits); - packed = lp_build_pack_rgba(builder, format, rgba); + packed = lp_build_pack_rgba_aos(builder, format, rgba); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c new file mode 100644 index 00000000000..569e8d10a31 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -0,0 +1,208 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_format.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_format.h" + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +static LLVMValueRef +lp_build_format_swizzle(union lp_type type, + const LLVMValueRef *inputs, + enum util_format_swizzle swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + return inputs[swizzle]; + case UTIL_FORMAT_SWIZZLE_0: + return lp_build_zero(type); + case UTIL_FORMAT_SWIZZLE_1: + return lp_build_one(type); + case UTIL_FORMAT_SWIZZLE_NONE: + return lp_build_undef(type); + default: + assert(0); + return lp_build_undef(type); + } +} + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef inputs[4]; + unsigned start; + unsigned chan; + + /* FIXME: Support more formats */ + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + /* Decode the input vector components */ + start = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned width = format_desc->channel[chan].size; + unsigned stop = start + width; + LLVMValueRef input; + + input = packed; + + switch(format_desc->channel[chan].type) { + case UTIL_FORMAT_TYPE_VOID: + input = NULL; + break; + + case UTIL_FORMAT_TYPE_UNSIGNED: + if(type.floating) { + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); + if(stop < format_desc->block.bits) { + unsigned mask = ((unsigned long long)1 << width) - 1; + input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); + } + + if(format_desc->channel[chan].normalized) + input = lp_build_unsigned_norm_to_float(builder, width, type, input); + else + input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), ""); + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + break; + + default: + /* fall through */ + input = lp_build_undef(type); + break; + } + + inputs[chan] = input; + + start = stop; + } + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); + rgba[2] = rgba[1] = rgba[0] = depth; + rgba[3] = lp_build_one(type); + } + else { + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); + } + } +} + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba) +{ + LLVMValueRef packed; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + packed = lp_build_gather(builder, + type.length, format_desc->block.bits, type.width, + base_ptr, offsets); + + lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 8631efd6c3e..995a69c0f4c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld, b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); } - /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */ - a = LLVMBuildAnd(bld->builder, a, mask, ""); /* This often gets translated to PANDN, but sometimes the NOT is @@ -339,7 +337,7 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]) + const boolean cond[4]) { const union lp_type type = bld->type; const unsigned n = type.length; @@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld, return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); } + else { #if 0 - else if(0) { - /* FIXME: Unfortunately select of vectors do not work */ + /* XXX: Unfortunately select of vectors do not work */ /* Use a select */ LLVMTypeRef elem_type = LLVMInt1Type(); LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; @@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld, cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); - } -#endif - else { +#else LLVMValueRef mask = lp_build_const_mask_aos(type, cond); return lp_build_select(bld, mask, a, b); +#endif } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index 29b9e1c45b8..9099e0fb5bf 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -66,7 +66,7 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]); + const boolean cond[4]); #endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h new file mode 100644 index 00000000000..6f565af76d1 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#ifndef LP_BLD_SAMPLE_H +#define LP_BLD_SAMPLE_H + + +#include <llvm-c/Core.h> + +struct pipe_texture; +struct pipe_sampler_state; +union lp_type; + + +/** + * Sampler static state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are embedded in the generated code. + */ +struct lp_sampler_static_state +{ + /* pipe_texture's state */ + enum pipe_format format; + unsigned target:2; + unsigned pot_width:1; + unsigned pot_height:1; + unsigned pot_depth:1; + + /* pipe_sampler_state's state */ + unsigned wrap_s:3; + unsigned wrap_t:3; + unsigned wrap_r:3; + unsigned min_img_filter:2; + unsigned min_mip_filter:2; + unsigned mag_img_filter:2; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned normalized_coords:1; + unsigned prefilter:4; +}; + + +/** + * Sampler dynamic state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are computed in runtime. + * + * There are obtained through callbacks, as we don't want to tie the texture + * sampling code generation logic to any particular texture layout or pipe + * driver. + */ +struct lp_sampler_dynamic_state +{ + + /** Obtain the base texture width. */ + LLVMValueRef + (*width)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + /** Obtain the base texture height. */ + LLVMValueRef + (*height)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*data_ptr)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + +}; + + +/** + * Derive the sampler static state. + */ +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler); + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + union lp_type fp_type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); + + + +#endif /* LP_BLD_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c new file mode 100644 index 00000000000..3ca25b0e76c --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -0,0 +1,411 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width[0]); + state->pot_height = util_is_pot(texture->height[0]); + state->pot_depth = util_is_pot(texture->depth[0]); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + + +/** + * Keep all information for sampling code generation in a single place. + */ +struct lp_build_sample_context +{ + LLVMBuilderRef builder; + + const struct lp_sampler_static_state *static_state; + + struct lp_sampler_dynamic_state *dynamic_state; + + const struct util_format_description *format_desc; + + /** Incoming coordinates type and build context */ + union lp_type coord_type; + struct lp_build_context coord_bld; + + /** Integer coordinates */ + union lp_type int_coord_type; + struct lp_build_context int_coord_bld; + + /** Output texels type and build context */ + union lp_type texel_type; + struct lp_build_context texel_bld; +}; + + +static void +lp_build_sample_texel(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); + + if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); + + x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(int_coord_bld, x, x_stride); + y_offset = lp_build_mul(int_coord_bld, y, y_stride); + + offset = lp_build_add(int_coord_bld, x_offset, y_offset); + } + + lp_build_load_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + data_ptr, + offset, + texel); +} + + +static LLVMValueRef +lp_build_sample_wrap(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if(is_pot) + coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); + else + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord = LLVMBuildURem(bld->builder, coord, length, ""); + break; + + case PIPE_TEX_WRAP_CLAMP: + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + /* FIXME */ + default: + assert(0); + } + + return coord; +} + + +static void +lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef x; + LLVMValueRef y; + + x = lp_build_ifloor(&bld->coord_bld, s); + y = lp_build_ifloor(&bld->coord_bld, t); + + x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); +} + + +static void +lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef half; + LLVMValueRef s_ipart; + LLVMValueRef t_ipart; + LLVMValueRef s_fpart; + LLVMValueRef t_fpart; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2][4]; + unsigned chan; + + half = lp_build_const_scalar(bld->coord_type, 0.5); + s = lp_build_sub(&bld->coord_bld, s, half); + t = lp_build_sub(&bld->coord_bld, t, half); + + s_ipart = lp_build_floor(&bld->coord_bld, s); + t_ipart = lp_build_floor(&bld->coord_bld, t); + + s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); + t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); + + x0 = lp_build_int(&bld->coord_bld, s_ipart); + y0 = lp_build_int(&bld->coord_bld, t_ipart); + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + + /* TODO: Don't interpolate missing channels */ + for(chan = 0; chan < 4; ++chan) { + texel[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } +} + + +static void +lp_build_sample_compare(struct lp_build_sample_context *bld, + LLVMValueRef p, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + LLVMValueRef res; + unsigned chan; + + if(!bld->static_state->compare_mode) + return; + + /* TODO: Compare before swizzling, to avoid redundant computations */ + res = NULL; + for(chan = 0; chan < 4; ++chan) { + LLVMValueRef cmp; + cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); + cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); + + if(res) + res = lp_build_add(texel_bld, res, cmp); + else + res = cmp; + } + + assert(res); + res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for(chan = 0; chan < 3; ++chan) + texel[chan] = res; + texel[3] = texel_bld->one; +} + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_build_sample_context bld; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef stride; + LLVMValueRef data_ptr; + LLVMValueRef s; + LLVMValueRef t; + LLVMValueRef p; + + /* Setup our build context */ + memset(&bld, 0, sizeof bld); + bld.builder = builder; + bld.static_state = static_state; + bld.dynamic_state = dynamic_state; + bld.format_desc = util_format_description(static_state->format); + bld.coord_type = type; + bld.int_coord_type = lp_int_type(type); + bld.texel_type = type; + lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); + lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); + lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); + + /* Get the dynamic state */ + width = dynamic_state->width(dynamic_state, builder, unit); + height = dynamic_state->height(dynamic_state, builder, unit); + stride = dynamic_state->stride(dynamic_state, builder, unit); + data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + + s = coords[0]; + t = coords[1]; + p = coords[2]; + + width = lp_build_broadcast_scalar(&bld.int_coord_bld, width); + height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); + stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); + + if(static_state->target == PIPE_TEXTURE_1D) + t = bld.coord_bld.zero; + + if(static_state->normalized_coords) { + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); + LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, ""); + s = lp_build_mul(&bld.coord_bld, s, fp_width); + t = lp_build_mul(&bld.coord_bld, t, fp_height); + } + + switch (static_state->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + default: + assert(0); + } + + /* FIXME: respect static_state->min_mip_filter */; + /* FIXME: respect static_state->mag_img_filter */; + /* FIXME: respect static_state->prefilter */; + + lp_build_sample_compare(&bld, p, texel); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c index 14d2b10df9c..3998ac374fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c @@ -42,17 +42,30 @@ LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name) +{ + LLVMValueRef indices[2]; + LLVMValueRef member_ptr; + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); + return member_ptr; +} + + +LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, unsigned member, const char *name) { - LLVMValueRef indices[2]; LLVMValueRef member_ptr; LLVMValueRef res; - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); res = LLVMBuildLoad(builder, member_ptr, ""); lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); return res; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h index cbefdc9f815..740392f5611 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h @@ -53,6 +53,18 @@ offsetof(_ctype, _cmember)) +/** + * Get value pointer to a structure member. + */ +LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name); + +/** + * Get the value of a structure member. + */ LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c index ac7eed9379a..f35638be447 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c @@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -192,7 +192,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld, return lp_build_swizzle1_aos(bld, a, swizzle); if(a == b) { - swizzle[0] %= 4; - swizzle[1] %= 4; - swizzle[2] %= 4; - swizzle[3] %= 4; - return lp_build_swizzle1_aos(bld, a, swizzle); + unsigned char swizzle1[4]; + swizzle1[0] = swizzle[0] % 4; + swizzle1[1] = swizzle[1] % 4; + swizzle1[2] = swizzle[2] % 4; + swizzle1[3] = swizzle[3] % 4; + return lp_build_swizzle1_aos(bld, a, swizzle1); } if(swizzle[0] % 4 == 0 && diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index d7dd6a8a604..cb0b6707ec8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); /** @@ -85,7 +85,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); #endif /* !LP_BLD_SWIZZLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 912db24aecb..10c251c4162 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -44,14 +44,30 @@ struct lp_build_context; struct lp_build_mask_context; -typedef void -(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel); +/** + * Sampler code generation interface. + * + * Although texture sampling is a requirement for TGSI translation, it is + * a very different problem with several different approaches to it. This + * structure establishes an interface for texture sampling code generation, so + * that we can easily use different texture sampling strategies. + */ +struct lp_build_sampler_soa +{ + void + (*destroy)( struct lp_build_sampler_soa *sampler ); + + void + (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); +}; + void lp_build_tgsi_soa(LLVMBuilderRef builder, @@ -62,8 +78,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context); + struct lp_build_sampler_soa *sampler); #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index d4d18febec7..b106ce2317f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -78,6 +78,11 @@ #define CHAN_Z 2 #define CHAN_W 3 +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + struct lp_build_tgsi_soa_context { @@ -88,8 +93,7 @@ struct lp_build_tgsi_soa_context const LLVMValueRef (*inputs)[NUM_CHANNELS]; LLVMValueRef (*outputs)[NUM_CHANNELS]; - lp_emit_fetch_texel_soa_callback emit_fetch_texel; - void *emit_fetch_texel_context; + struct lp_build_sampler_soa *sampler; LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; @@ -98,6 +102,51 @@ struct lp_build_tgsi_soa_context }; +static const unsigned char +swizzle_left[4] = { + QUAD_TOP_LEFT, QUAD_TOP_LEFT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT +}; + +static const unsigned char +swizzle_right[4] = { + QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, + QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT +}; + +static const unsigned char +swizzle_top[4] = { + QUAD_TOP_LEFT, QUAD_TOP_RIGHT, + QUAD_TOP_LEFT, QUAD_TOP_RIGHT +}; + +static const unsigned char +swizzle_bottom[4] = { + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT +}; + + +static LLVMValueRef +emit_ddx(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); + LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); + return lp_build_sub(&bld->base, src_right, src_left); +} + + +static LLVMValueRef +emit_ddy(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); + LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); + return lp_build_sub(&bld->base, src_top, src_bottom); +} + + /** * Register fetch. */ @@ -168,6 +217,7 @@ emit_fetch( break; case TGSI_UTIL_SIGN_SET: + /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); res = LLVMBuildNeg( bld->base.builder, res, "" ); break; @@ -185,6 +235,36 @@ emit_fetch( /** + * Register fetch with derivatives. + */ +static void +emit_fetch_deriv( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + const unsigned chan_index, + LLVMValueRef *res, + LLVMValueRef *ddx, + LLVMValueRef *ddy) +{ + LLVMValueRef src; + + src = emit_fetch(bld, inst, index, chan_index); + + if(res) + *res = src; + + /* TODO: use interpolation coeffs for inputs */ + + if(ddx) + *ddx = emit_ddx(bld, src); + + if(ddy) + *ddy = emit_ddy(bld, src); +} + + +/** * Register store. */ static void @@ -239,17 +319,18 @@ emit_store( * High-level instruction translators. */ + static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, boolean apply_lodbias, - boolean projected) + boolean projected, + LLVMValueRef *texel) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; LLVMValueRef lodbias; LLVMValueRef oow; LLVMValueRef coords[3]; - LLVMValueRef texel[4]; unsigned num_coords; unsigned i; @@ -289,12 +370,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, coords[i] = lp_build_mul(&bld->base, coords[i], oow); } - bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context, - unit, num_coords, coords, lodbias, texel); - - FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) { - emit_store( bld, inst, 0, i, texel[i] ); - } + bld->sampler->emit_fetch_texel(bld->sampler, + bld->base.builder, + bld->base.type, + unit, num_coords, coords, lodbias, + texel); } @@ -347,14 +427,6 @@ emit_kil( } -static void -emit_kilp( - struct lp_build_tgsi_soa_context *bld ) -{ - /* XXX todo / fix me */ -} - - /** * Check if inst src/dest regs use indirect addressing into temporary * register file. @@ -382,25 +454,35 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) static int emit_instruction( struct lp_build_tgsi_soa_context *bld, - struct tgsi_full_instruction *inst ) + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info) { unsigned chan_index; LLVMValueRef src0, src1, src2; LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - LLVMValueRef dst0; + LLVMValueRef res; + LLVMValueRef dst0[NUM_CHANNELS]; /* we can't handle indirect addressing into temp register file yet */ if (indirect_temp_reference(inst)) return FALSE; + assert(info->num_dst <= 1); + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.undef; + } + } + switch (inst->Instruction.Opcode) { #if 0 case TGSI_OPCODE_ARL: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_flr(bld, 0, 0); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif @@ -408,19 +490,17 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); } break; case TGSI_OPCODE_LIT: if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { - emit_store( bld, inst, 0, CHAN_X, bld->base.one); + dst0[CHAN_X] = bld->base.one; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_max( &bld->base, src0, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Y, dst0); + dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { /* XMM[1] = SrcReg[0].yyyy */ @@ -432,20 +512,19 @@ emit_instruction( tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); - dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Z, dst0); + dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { - emit_store( bld, inst, 0, CHAN_W, bld->base.one); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_rcp(&bld->base, src0); + res = lp_build_rcp(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -453,9 +532,9 @@ emit_instruction( /* TGSI_OPCODE_RECIPSQRT */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); src0 = lp_build_abs(&bld->base, src0); - dst0 = lp_build_rsqrt(&bld->base, src0); + res = lp_build_rsqrt(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -479,16 +558,15 @@ emit_instruction( lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = tmp1; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -514,20 +592,18 @@ emit_instruction( /* dst.x = floor(lg2(abs(src.x))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { - tmp1 = lp_build_div( &bld->base, src0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); } /* dst.z = lg2(abs(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -535,8 +611,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_mul(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); } break; @@ -544,8 +619,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_add(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_add(&bld->base, src0, src1); } break; @@ -563,7 +637,7 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -585,28 +659,24 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_DST: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = bld->base.one; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = emit_fetch( bld, inst, 0, CHAN_Z ); - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = emit_fetch( bld, inst, 1, CHAN_W ); - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); } break; @@ -614,8 +684,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_min( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); } break; @@ -623,8 +692,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_max( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); } break; @@ -634,8 +702,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -645,8 +712,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -658,7 +724,7 @@ emit_instruction( tmp2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); tmp0 = lp_build_add( &bld->base, tmp0, tmp2); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -666,8 +732,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); tmp1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_sub( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); } break; @@ -678,13 +743,19 @@ emit_instruction( src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_sub( &bld->base, src1, src2 ); tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); - dst0 = lp_build_add( &bld->base, tmp0, src2 ); - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); } break; case TGSI_OPCODE_CND: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp1 = lp_build_const_scalar(bld->base.type, 0.5); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); + } break; case TGSI_OPCODE_DP2A: @@ -698,45 +769,49 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; -#if 0 case TGSI_OPCODE_FRC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_frc( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + src0 = emit_fetch( bld, inst, 0, chan_index ); + tmp0 = lp_build_floor(&bld->base, src0); + tmp0 = lp_build_sub(&bld->base, tmp0, src0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_CLAMP: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp0 = lp_build_max(&bld->base, tmp0, src1); + tmp0 = lp_build_min(&bld->base, tmp0, src2); + dst0[chan_index] = tmp0; + } break; case TGSI_OPCODE_FLR: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_floor(&bld->base, tmp0); } break; case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_round(&bld->base, tmp0); } break; -#endif case TGSI_OPCODE_EX2: { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_exp2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; } @@ -745,16 +820,16 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_log2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_POW: src0 = emit_fetch( bld, inst, 0, CHAN_X ); src1 = emit_fetch( bld, inst, 1, CHAN_X ); - dst0 = lp_build_pow( &bld->base, src0, src1 ); + res = lp_build_pow( &bld->base, src0, src1 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -775,7 +850,7 @@ emit_instruction( tmp5 = tmp3; tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); - emit_store( bld, inst, 0, CHAN_X, tmp2); + dst0[CHAN_X] = tmp2; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { @@ -786,31 +861,30 @@ emit_instruction( tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp3); + dst0[CHAN_Y] = tmp3; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); - emit_store( bld, inst, 0, CHAN_Z, tmp5); + dst0[CHAN_Z] = tmp5; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_ABS: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_abs( &bld->base, tmp0 ) ; - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); } break; case TGSI_OPCODE_RCC: + /* deprecated? */ + assert(0); return 0; - break; case TGSI_OPCODE_DPH: tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); @@ -827,7 +901,7 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -835,25 +909,27 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_cos( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_DDX: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); + } break; case TGSI_OPCODE_DDY: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); + } break; -#if 0 case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( bld ); - return 0; /* XXX fix me */ + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_KIL: /* conditional kill */ @@ -885,13 +961,14 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_SFL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.zero; + } break; case TGSI_OPCODE_SGT: @@ -899,8 +976,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -908,7 +984,7 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_sin( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -917,8 +993,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -927,85 +1002,99 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_STR: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.one; + } break; case TGSI_OPCODE_TEX: - emit_tex( bld, inst, FALSE, FALSE ); + emit_tex( bld, inst, FALSE, FALSE, dst0 ); break; case TGSI_OPCODE_TXD: + /* FIXME */ return 0; break; case TGSI_OPCODE_UP2H: + /* deprecated */ + assert (0); return 0; break; case TGSI_OPCODE_UP2US: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4B: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4UB: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_X2D: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_ARA: + /* deprecated */ + assert(0); return 0; break; #if 0 case TGSI_OPCODE_ARR: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_rnd( bld, 0, 0 ); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif case TGSI_OPCODE_BRA: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_CAL: + /* FIXME */ return 0; break; -#if 0 case TGSI_OPCODE_RET: - emit_ret( bld ); + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_END: break; -#if 0 case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_sgn( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); } break; -#endif case TGSI_OPCODE_CMP: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1013,34 +1102,29 @@ emit_instruction( src1 = emit_fetch( bld, inst, 1, chan_index ); src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); - dst0 = lp_build_select( &bld->base, tmp0, src1, src2); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); } break; case TGSI_OPCODE_SCS: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_cos( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_sin( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = bld->base.zero; - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = bld->base.zero; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_NRM: @@ -1099,38 +1183,35 @@ emit_instruction( /* dst.x = xmm1 * src.x */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { - tmp4 = lp_build_mul( &bld->base, tmp4, tmp1); - emit_store(bld, inst, 0, CHAN_X, tmp4); + dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); } /* dst.y = xmm1 * src.y */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { - tmp5 = lp_build_mul( &bld->base, tmp5, tmp1); - emit_store(bld, inst, 0, CHAN_Y, tmp5); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); } /* dst.z = xmm1 * src.z */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { - tmp6 = lp_build_mul( &bld->base, tmp6, tmp1); - emit_store(bld, inst, 0, CHAN_Z, tmp6); + dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); } /* dst.w = xmm1 * src.w */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { - tmp7 = lp_build_mul( &bld->base, tmp7, tmp1); - emit_store(bld, inst, 0, CHAN_W, tmp7); + dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); } } - /* dst0.w = 1.0 */ + /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { - tmp0 = bld->base.one; - emit_store(bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } } break; case TGSI_OPCODE_DIV: + /* deprecated */ + assert( 0 ); return 0; break; @@ -1143,118 +1224,157 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_TXP: - emit_tex( bld, inst, FALSE, TRUE ); + emit_tex( bld, inst, FALSE, TRUE, dst0 ); break; case TGSI_OPCODE_BRK: + /* FIXME */ return 0; break; case TGSI_OPCODE_IF: + /* FIXME */ return 0; break; case TGSI_OPCODE_BGNFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_REP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ELSE: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDIF: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ENDREP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_PUSHA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_POPA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CEIL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); + } break; case TGSI_OPCODE_I2F: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_NOT: + /* deprecated? */ + assert(0); return 0; break; -#if 0 case TGSI_OPCODE_TRUNC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_f2it( bld, 0 ); - emit_i2f( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); } break; -#endif case TGSI_OPCODE_SHL: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SHR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_AND: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_OR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_MOD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_XOR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SAD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXF: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXQ: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CONT: + /* deprecated? */ + assert(0); return 0; break; @@ -1266,10 +1386,28 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.zero; + } + break; + + case TGSI_OPCODE_NOP: + break; + default: return 0; } + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_store( bld, inst, 0, chan_index, dst0[chan_index]); + } + } + return 1; } @@ -1283,8 +1421,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context) + struct lp_build_sampler_soa *sampler) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; @@ -1299,8 +1436,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; - bld.emit_fetch_texel = emit_fetch_texel; - bld.emit_fetch_texel_context = emit_fetch_texel_context; + bld.sampler = sampler; tgsi_parse_init( &parse, tokens ); @@ -1309,16 +1445,18 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - /* Input already interpolated */ + /* Inputs already interpolated */ break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) { + { unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : "<invalid>"); - } + if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + info ? info->mnemonic : "<invalid>"); + } + break; case TGSI_TOKEN_TYPE_IMMEDIATE: diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 8e0026fd973..577644b7ab8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -157,6 +157,17 @@ lp_build_int_vec_type(union lp_type type) } +union lp_type +lp_int_type(union lp_type type) +{ + union lp_type int_type; + int_type.value = 0; + int_type.width = type.width; + int_type.length = type.length; + return int_type; +} + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 3ce566be641..9933e0b45c3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -165,6 +165,10 @@ LLVMTypeRef lp_build_int_vec_type(union lp_type type); +union lp_type +lp_int_type(union lp_type type); + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 580cca5b463..bdcff94b9bf 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, util_pack_color(rgba, ps->format, &cv); lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv); } + llvmpipe->dirty_render_cache = TRUE; } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 233d1df0e10..a4b2bd8c2ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -141,8 +141,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, return PIPE_REFERENCED_FOR_WRITE; } - /* FIXME: we also need to do the same for the texture cache */ - return PIPE_UNREFERENCED; } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index d288460a1b8..9465f763d50 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -44,15 +44,47 @@ static void lp_jit_init_globals(struct llvmpipe_screen *screen) { - /* struct lp_jit_context */ + LLVMTypeRef texture_type; + + /* struct lp_jit_texture */ { LLVMTypeRef elem_types[4]; + + elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, + screen->target, texture_type, + LP_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, + screen->target, texture_type, + LP_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride, + screen->target, texture_type, + LP_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, + screen->target, texture_type, + LP_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, + screen->target, texture_type); + + LLVMAddTypeName(screen->module, "texture", texture_type); + } + + /* struct lp_jit_context */ + { + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, screen->target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, + screen->target, context_type, + LP_JIT_CONTEXT_TEXTURES_INDEX); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, screen->target, context_type); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index a7fb60f9f5c..58f716ede29 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -38,11 +38,31 @@ #include "lp_bld_struct.h" +#include "pipe/p_state.h" + struct tgsi_sampler; struct llvmpipe_screen; +struct lp_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + + +enum { + LP_JIT_TEXTURE_WIDTH = 0, + LP_JIT_TEXTURE_HEIGHT, + LP_JIT_TEXTURE_STRIDE, + LP_JIT_TEXTURE_DATA +}; + + + /** * This structure is passed directly to the generated fragment shader. * @@ -60,11 +80,12 @@ struct lp_jit_context struct tgsi_sampler **samplers; - /* TODO: alpha reference value */ float alpha_ref_value; - /* TODO: blend constant color */ + /* FIXME: store (also?) in floats */ uint8_t *blend_color; + + struct lp_jit_texture textures[PIPE_MAX_SAMPLERS]; }; @@ -80,6 +101,11 @@ struct lp_jit_context #define lp_jit_context_blend_color(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 3, "blend_color") +#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 + +#define lp_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") + typedef void (*lp_jit_frag_func)(struct lp_jit_context *context, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 125035771e5..0ce1a37bd47 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,8 +27,6 @@ #include "util/u_memory.h" -#include "util/u_simple_screen.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -196,8 +194,7 @@ static void llvmpipe_destroy_screen( struct pipe_screen *_screen ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - - struct pipe_winsys *winsys = _screen->winsys; + struct llvmpipe_winsys *winsys = screen->winsys; lp_jit_screen_cleanup(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d145f6d6bbc..2d2fc19a65f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -44,6 +44,7 @@ #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "lp_bld_debug.h" #include "lp_tile_cache.h" #include "lp_tile_soa.h" @@ -162,12 +163,12 @@ shade_quads(struct llvmpipe_context *llvmpipe, else depth = NULL; - /* TODO: blend color */ + /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ + assert(lp_check_alignment(mask, 16)); - assert((((uintptr_t)mask) & 0xf) == 0); - assert((((uintptr_t)depth) & 0xf) == 0); - assert((((uintptr_t)color) & 0xf) == 0); - assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0); + assert(lp_check_alignment(depth, 16)); + assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); /* run shader */ fs->current->jit_function( &llvmpipe->jit_context, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index fb10329887d..7b26ce61a38 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -36,6 +36,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */ #define LP_NEW_VIEWPORT 0x1 @@ -57,16 +58,20 @@ struct tgsi_sampler; struct vertex_info; - +struct pipe_context; +struct llvmpipe_context; struct lp_fragment_shader; struct lp_fragment_shader_variant_key { + enum pipe_format zsbuf_format; struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 6fbb057937e..e87976b9f36 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -250,7 +250,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | - LP_NEW_DEPTH_STENCIL_ALPHA)) + LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_SAMPLER | + LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 94170bd7161..618cf1ffb8d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -85,6 +85,7 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_quad.h" +#include "lp_tex_sample.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -130,9 +131,8 @@ generate_pos0(LLVMBuilderRef builder, * Generate the depth test. */ static void -generate_depth(struct llvmpipe_context *lp, - LLVMBuilderRef builder, - const struct pipe_depth_state *state, +generate_depth(LLVMBuilderRef builder, + const struct lp_fragment_shader_variant_key *key, union lp_type src_type, struct lp_build_mask_context *mask, LLVMValueRef src, @@ -141,10 +141,10 @@ generate_depth(struct llvmpipe_context *lp, const struct util_format_description *format_desc; union lp_type dst_type; - if(!lp->framebuffer.zsbuf) + if(!key->depth.enabled) return; - format_desc = util_format_description(lp->framebuffer.zsbuf->format); + format_desc = util_format_description(key->zsbuf_format); assert(format_desc); /* Pick the depth type. */ @@ -164,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp, #endif lp_build_depth_test(builder, - state, + &key->depth, dst_type, format_desc, mask, @@ -173,107 +173,6 @@ generate_depth(struct llvmpipe_context *lp, } -struct build_fetch_texel_context -{ - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -static void -emit_fetch_texel( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct build_fetch_texel_context *bld = context; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!bld->samplers_ptr) - bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr); - - if(!bld->store_ptr) - bld->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = bld->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = bld->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - /** * Generate the fragment shader, depth/stencil test, and alpha tests. */ @@ -286,7 +185,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef context_ptr, unsigned i, const struct lp_build_interp_soa_context *interp, - struct build_fetch_texel_context *sampler, + struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef *color, LLVMValueRef depth_ptr) @@ -298,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; + struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean early_depth_test; unsigned attrib; @@ -309,25 +209,35 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr = lp_jit_context_constants(builder, context_ptr); - lp_build_mask_begin(&mask, builder, type, *pmask); + flow = lp_build_flow_create(builder); + + memset(outputs, 0, sizeof outputs); + + lp_build_flow_scope_begin(flow); + + /* Declare the color and z variables */ + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + color[chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &color[chan]); + } + lp_build_flow_scope_declare(flow, &z); + + lp_build_mask_begin(&mask, flow, type, *pmask); early_depth_test = - lp->depth_stencil->depth.enabled && - lp->framebuffer.zsbuf && - !lp->depth_stencil->alpha.enabled && - !lp->fs->info.uses_kill && - !lp->fs->info.writes_z; + key->depth.enabled && + !key->alpha.enabled && + !shader->info.uses_kill && + !shader->info.writes_z; if(early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); - memset(outputs, 0, sizeof outputs); - lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, emit_fetch_texel, sampler); + outputs, sampler); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -368,12 +278,16 @@ generate_fs(struct llvmpipe_context *lp, } if(!early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); lp_build_mask_end(&mask); + lp_build_flow_scope_end(flow); + + lp_build_flow_destroy(flow); + *pmask = mask.value; } @@ -392,6 +306,8 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef dst_ptr) { struct lp_build_context bld; + struct lp_build_flow_context *flow; + struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; @@ -400,11 +316,14 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef res[4]; unsigned chan; + lp_build_context_init(&bld, builder, type); + + flow = lp_build_flow_create(builder); + lp_build_mask_begin(&mask_ctx, flow, type, mask); + vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); - lp_build_context_init(&bld, builder, type); - const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); @@ -422,11 +341,16 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); - lp_build_name(res[chan], "res.%c", "rgba"[chan]); - res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); - LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + if(blend->colormask & (1 << chan)) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); + lp_build_name(res[chan], "res.%c", "rgba"[chan]); + res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); + LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + } } + + lp_build_mask_end(&mask_ctx); + lp_build_flow_destroy(flow); } @@ -462,7 +386,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; - struct build_fetch_texel_context sampler; + struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; @@ -586,8 +510,13 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); - memset(&sampler, 0, sizeof sampler); - sampler.context_ptr = context_ptr; +#if 0 + /* C texture sampling */ + sampler = lp_c_sampler_soa_create(context_ptr); +#else + /* code generated texture sampling */ + sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); +#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -606,7 +535,7 @@ generate_fragment(struct llvmpipe_context *lp, context_ptr, i, &interp, - &sampler, + sampler, &fs_mask[i], out_color, depth_ptr_i); @@ -615,6 +544,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_out_color[chan][i] = out_color[chan]; } + sampler->destroy(sampler); + /* * Convert the fs's output color and mask to fit to the blending type. */ @@ -765,18 +696,45 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, */ static void make_variant_key(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, struct lp_fragment_shader_variant_key *key) { + unsigned i; + memset(key, 0, sizeof *key); - memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + if(lp->framebuffer.zsbuf && + lp->depth_stencil->depth.enabled) { + key->zsbuf_format = lp->framebuffer.zsbuf->format; + memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + } key->alpha.enabled = lp->depth_stencil->alpha.enabled; if(key->alpha.enabled) key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ - memcpy(&key->blend, lp->blend, sizeof key->blend); + if(lp->framebuffer.cbufs[0]) { + const struct util_format_description *format_desc; + unsigned chan; + + memcpy(&key->blend, lp->blend, sizeof key->blend); + + format_desc = util_format_description(lp->framebuffer.cbufs[0]->format); + assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); + + /* mask out color channels not present in the color buffer */ + for(chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + if(swizzle > 4) + key->blend.colormask &= ~(1 << chan); + } + } + + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) + lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]); } @@ -787,7 +745,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; - make_variant_key(lp, &key); + make_variant_key(lp, shader, &key); variant = shader->variants; while(variant) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 4fef541b1e3..c69d90c723a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, pipe_texture_reference(&llvmpipe->texture[i], tex); lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex); + + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; + jit_tex->width = tex->width[0]; + jit_tex->height = tex->height[0]; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + } } llvmpipe->num_textures = num; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 1d192355eed..d8455e56490 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module, lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop); - rgba = lp_build_load_rgba(builder, format, ptr); + rgba = lp_build_load_rgba_aos(builder, format, ptr); LLVMBuildStore(builder, rgba, rgba_ptr); lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop); @@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba(builder, format, ptr, rgba); + lp_build_store_rgba_aos(builder, format, ptr, rgba); LLVMBuildRetVoid(builder); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 23a94b5b0d5..773e8482425 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) if (lpt->timestamp != tc->timestamp) { /* texture was modified, invalidate all cached tiles */ uint i; - _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); + debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); for (i = 0; i < NUM_ENTRIES; i++) { tc->entries[i].addr.bits.invalid = 1; } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 628ec3f1efd..9ad1bde9565 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -29,10 +29,13 @@ #define LP_TEX_SAMPLE_H +#include <llvm-c/Core.h> + #include "tgsi/tgsi_exec.h" struct llvmpipe_tex_tile_cache; +struct lp_sampler_static_state; /** @@ -75,4 +78,24 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE]); +/** + * Texture sampling code generator that just calls lp_get_samples C function + * for the actual sampling computation. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr); + + +/** + * Pure-LLVM texture sampling code generator. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key, + LLVMValueRef context_ptr); + + #endif /* LP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 94eb6dad5af..9a876f404df 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1578,3 +1578,136 @@ out: tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); } + +void PIPE_CDECL +lp_fetch_texel_soa( struct tgsi_sampler **samplers, + uint32_t unit, + float *store ) +{ + struct tgsi_sampler *sampler = samplers[unit]; + +#if 0 + uint j; + + debug_printf("%s sampler: %p (%p) store: %p\n", + __FUNCTION__, + sampler, *sampler, + store ); + + debug_printf("lodbias %f\n", store[12]); + + for (j = 0; j < 4; j++) + debug_printf("sample %d texcoord %f %f\n", + j, + store[0+j], + store[4+j]); +#endif + + { + float rgba[NUM_CHANNELS][QUAD_SIZE]; + sampler->get_samples(sampler, + &store[0], + &store[4], + &store[8], + 0.0f, /*store[12], lodbias */ + rgba); + memcpy(store, rgba, sizeof rgba); + } + +#if 0 + for (j = 0; j < 4; j++) + debug_printf("sample %d result %f %f %f %f\n", + j, + store[0+j], + store[4+j], + store[8+j], + store[12+j]); +#endif +} + + +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_tgsi.h" + + +struct lp_c_sampler_soa +{ + struct lp_build_sampler_soa base; + + LLVMValueRef context_ptr; + + LLVMValueRef samplers_ptr; + + /** Coords/texels store */ + LLVMValueRef store_ptr; +}; + + +static void +lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; + LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); + LLVMValueRef args[3]; + unsigned i; + + if(!sampler->samplers_ptr) + sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); + + if(!sampler->store_ptr) + sampler->store_ptr = LLVMBuildArrayAlloca(builder, + vec_type, + LLVMConstInt(LLVMInt32Type(), 4, 0), + "texel_store"); + + for (i = 0; i < num_coords; i++) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + LLVMBuildStore(builder, coords[i], coord_ptr); + } + + args[0] = sampler->samplers_ptr; + args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); + args[2] = sampler->store_ptr; + + lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); + } +} + + +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr) +{ + struct lp_c_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_c_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_c_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; + sampler->context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c new file mode 100644 index 00000000000..7d31705d014 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling code generation + * + * This file is nothing more than ugly glue between three largely independent + * entities: + * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa) + * - texture sampling code generation (i.e., lp_build_sample_soa) + * - LLVM pipe driver + * + * All interesting code is in the functions mentioned above. There is really + * nothing to see here. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_sample.h" +#include "lp_bld_tgsi.h" +#include "lp_state.h" +#include "lp_tex_sample.h" + + +/** + * This provides the bridge between the sampler state store in lp_jit_context + * and lp_jit_texture and the sampler code generator. It provides the + * texture layout information required by the texture sampler code generator + * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime. + */ +struct llvmpipe_sampler_dynamic_state +{ + struct lp_sampler_dynamic_state base; + + const struct lp_sampler_static_state *static_state; + + LLVMValueRef context_ptr; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct lp_llvm_sampler_soa +{ + struct lp_build_sampler_soa base; + + struct llvmpipe_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, + LLVMBuilderRef builder, + unsigned unit, + unsigned member_index, + const char *member_name) +{ + struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(unit < PIPE_MAX_SAMPLERS); + + /* context[0] */ + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + /* context[0].textures */ + indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0); + /* context[0].textures[unit] */ + indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + /* context[0].textures[unit].member */ + indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + + ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); + + res = LLVMBuildLoad(builder, ptr, ""); + + lp_build_name(res, "context.texture%u.%s", unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to fetch + * the members of lp_jit_texture to fulfill the sampler code generator requests. + * + * This complexity is the price we have to pay to keep the texture sampler code + * generator a reusable module without dependencies to llvmpipe internals. + */ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \ + static LLVMValueRef \ + lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ + LLVMBuilderRef builder, \ + unsigned unit) \ + { \ + return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \ + } + + +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT) +LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE) +LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA) + + +static void +lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base; + + assert(unit < PIPE_MAX_SAMPLERS); + + lp_build_sample_soa(builder, + &sampler->dynamic_state.static_state[unit], + &sampler->dynamic_state.base, + type, + unit, + num_coords, + coords, + lodbias, + texel); +} + + +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr) +{ + struct lp_llvm_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_llvm_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_llvm_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel; + sampler->dynamic_state.base.width = lp_llvm_texture_width; + sampler->dynamic_state.base.height = lp_llvm_texture_height; + sampler->dynamic_state.base.stride = lp_llvm_texture_stride; + sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; + sampler->dynamic_state.static_state = static_state; + sampler->dynamic_state.context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 03b9243b828..93479a0314a 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -148,6 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->image_nr = 1; mt->level[0].pitch = *stride; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + mt->level[0].tile_mode = bo->tile_mode; nouveau_bo_ref(bo, &mt->base.bo); return &mt->base.base; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 344c2cf6dde..d294356f75d 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -66,7 +66,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1224, 1); @@ -110,7 +111,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1538, 1); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index b266324f58d..6bf6f773b0c 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -60,13 +60,13 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) format = nv50_format(ps->format); if (format < 0) return 1; - + if (!bo->tile_flags) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); BEGIN_RING(chan, eng2d, mthd + 0x14, 5); - OUT_RING (chan, mt->level[0].pitch); + OUT_RING (chan, mt->level[ps->level].pitch); OUT_RING (chan, ps->width); OUT_RING (chan, ps->height); OUT_RELOCh(chan, bo, ps->offset, flags); @@ -75,7 +75,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) BEGIN_RING(chan, eng2d, mthd, 5); OUT_RING (chan, format); OUT_RING (chan, 0); - OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, mt->level[ps->level].tile_mode << 4); OUT_RING (chan, 1); OUT_RING (chan, 0); BEGIN_RING(chan, eng2d, mthd + 0x18, 4); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 6c5914baa35..52b1c9a6b26 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -184,8 +184,15 @@ struct r300_texture { /* Offsets into the buffer. */ unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; - /* Stride (pitch?) of this texture in bytes */ - unsigned stride; + /** + * If non-zero, override the natural texture layout with + * a custom stride (in bytes). + * + * \note Mipmapping fails for textures with a non-natural layout! + * + * \sa r300_texture_get_stride + */ + unsigned stride_override; /* Total size of this texture, in bytes. */ unsigned size; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 1bc35c24867..a1b36ba2ed1 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -283,7 +283,7 @@ void r300_emit_fb_state(struct r300_context* r300, for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - pixpitch = tex->stride / tex->tex.block.size; + pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -300,7 +300,7 @@ void r300_emit_fb_state(struct r300_context* r300, if (fb->zsbuf) { tex = (struct r300_texture*)fb->zsbuf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - pixpitch = tex->stride / tex->tex.block.size; + pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index d05a736dd96..737396d8d97 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -31,6 +31,7 @@ #include "r300_state_derived.h" /* r300_render: Vertex and index buffer primitive emission. */ +#define R300_MAX_VBO_SIZE (1024 * 1024) struct r300_render { /* Parent class */ @@ -46,7 +47,10 @@ struct r300_render { /* VBO */ struct pipe_buffer* vbo; - size_t vbo_alloc_size; + size_t vbo_size; + size_t vbo_offset; + size_t vbo_max_used; + void * vbo_ptr; }; static INLINE struct r300_render* @@ -75,19 +79,18 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, struct pipe_screen* screen = r300->context.screen; size_t size = (size_t)vertex_size * (size_t)count; - if (r300render->vbo && (size > r300render->vbo_alloc_size)) { - pipe_buffer_reference(&r300render->vbo, NULL); - } - - if (!r300render->vbo) { + if (size + r300render->vbo_offset > r300render->vbo_size) + { r300render->vbo = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, - size); + R300_MAX_VBO_SIZE); + r300render->vbo_size = R300_MAX_VBO_SIZE; } - r300render->vbo_alloc_size = MAX2(size, r300render->vbo_alloc_size); r300render->vertex_size = vertex_size; + r300->vbo = r300render->vbo; + r300->vbo_offset = r300render->vbo_offset; return (r300render->vbo) ? TRUE : FALSE; } @@ -97,8 +100,10 @@ static void* r300_render_map_vertices(struct vbuf_render* render) struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; - return (unsigned char*)pipe_buffer_map(screen, r300render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); + r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + + return (r300render->vbo_ptr + r300render->vbo_offset); } static void r300_render_unmap_vertices(struct vbuf_render* render, @@ -107,15 +112,24 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, { struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; + CS_LOCALS(r300render->r300); + BEGIN_CS(2); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max); + END_CS; + r300render->vbo_max_used = MAX2(r300render->vbo_max_used, + r300render->vertex_size * (max + 1)); pipe_buffer_unmap(screen, r300render->vbo); } static void r300_render_release_vertices(struct vbuf_render* render) { struct r300_render* r300render = r300_render(render); + struct r300_context* r300 = r300render->r300; - pipe_buffer_reference(&r300render->vbo, NULL); + r300render->vbo_offset += r300render->vbo_max_used; + r300render->vbo_max_used = 0; + r300->vbo = NULL; } static boolean r300_render_set_primitive(struct vbuf_render* render, @@ -163,14 +177,12 @@ static boolean r300_render_set_primitive(struct vbuf_render* render, return TRUE; } -static void prepare_render(struct r300_render* render, unsigned count) +static void r300_prepare_render(struct r300_render* render, unsigned count) { struct r300_context* r300 = render->r300; CS_LOCALS(r300); - r300->vbo = render->vbo; - r300_emit_dirty_state(r300); } @@ -183,7 +195,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); - prepare_render(r300render, count); + r300_prepare_render(r300render, count); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); @@ -208,7 +220,7 @@ static void r300_render_draw(struct vbuf_render* render, CS_LOCALS(r300); - prepare_render(r300render, count); + r300_prepare_render(r300render, count); /* Send our indices into an index buffer. */ index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, @@ -217,25 +229,7 @@ static void r300_render_draw(struct vbuf_render* render, return; } -/* - index_map = pipe_buffer_map(screen, index_buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(index_map, indices, count); - pipe_buffer_unmap(screen, index_buffer); - - debug_printf("r300: Doing indexbuf render, count %d\n", count); - - BEGIN_CS(8); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | - r300render->hwprim); - OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); - OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); - OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; */ - - BEGIN_CS(4 + (count+1)/2); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); + BEGIN_CS(2 + (count+1)/2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); @@ -273,6 +267,10 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300) r300render->base.release_vertices = r300_render_release_vertices; r300render->base.destroy = r300_render_destroy; + r300render->vbo = NULL; + r300render->vbo_size = 0; + r300render->vbo_offset = 0; + return &r300render->base; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 15740f61252..593178c50be 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -323,13 +323,14 @@ r300_get_tex_transfer(struct pipe_screen *screen, if (trans) { pipe_texture_reference(&trans->transfer.texture, texture); trans->transfer.format = texture->format; + trans->transfer.x = x; + trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; trans->transfer.block = texture->block; trans->transfer.nblocksx = texture->nblocksx[level]; trans->transfer.nblocksy = texture->nblocksy[level]; - trans->transfer.stride = align(pf_get_stride(&trans->transfer.block, - texture->width[level]), 32); + trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; trans->offset = offset; } @@ -356,7 +357,7 @@ static void* r300_transfer_map(struct pipe_screen* screen, if (transfer->usage != PIPE_TRANSFER_READ) { flags |= PIPE_BUFFER_USAGE_CPU_WRITE; } - + map = pipe_buffer_map(screen, tex->buffer, flags); if (!map) { diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 96e6e4a77d4..cc6288cb519 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -29,7 +29,7 @@ static void r300_surface_setup(struct r300_context* r300, unsigned w, unsigned h) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; - unsigned pixpitch = dest->stride / dest->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size; CS_LOCALS(r300); r300_emit_blend_state(r300, &blend_clear_state); @@ -100,7 +100,7 @@ static void r300_surface_fill(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* tex = (struct r300_texture*)dest->texture; - unsigned pixpitch = tex->stride / tex->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; boolean invalid = FALSE; CS_LOCALS(r300); @@ -233,7 +233,7 @@ static void r300_surface_copy(struct pipe_context* pipe, struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* srctex = (struct r300_texture*)src->texture; struct r300_texture* desttex = (struct r300_texture*)dest->texture; - unsigned pixpitch = srctex->stride / srctex->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size; boolean invalid = FALSE; float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty; CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 590052509cc..6e8c3683200 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -25,7 +25,6 @@ static void r300_setup_texture_state(struct r300_texture* tex, unsigned width, unsigned height, - unsigned pitch, unsigned levels) { struct r300_texture_state* state = &tex->state; @@ -38,7 +37,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, /* XXX */ state->format1 = r300_translate_texformat(tex->tex.format); - state->format2 = pitch - 1; + state->format2 = r300_texture_get_stride(tex, 0); /* Assume (somewhat foolishly) that oversized textures will * not be permitted by the state tracker. */ @@ -50,13 +49,30 @@ static void r300_setup_texture_state(struct r300_texture* tex, } debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n", - width, height, pitch, levels); + width, height, levels); +} + +/** + * Return the stride, in bytes, of the texture images of the given texture + * at the given level. + */ +unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) +{ + if (tex->stride_override) + return tex->stride_override; + + if (level > tex->tex.last_level) { + debug_printf("%s: level (%u) > last_level (%u)\n", level, tex->tex.last_level); + return 0; + } + + return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, offset; + int stride, size; int i; for (i = 0; i <= base->last_level; i++) { @@ -74,7 +90,7 @@ static void r300_setup_miptree(struct r300_texture* tex) * XXX * POT, uncompressed, unmippmapped textures can be aligned to 32, * instead of 64. */ - stride = align(pf_get_stride(&base->block, base->width[i]), 32); + stride = r300_texture_get_stride(tex, i); size = stride * base->nblocksy[i] * base->depth[i]; tex->offset[i] = align(tex->size, 32); @@ -84,10 +100,6 @@ static void r300_setup_miptree(struct r300_texture* tex) "(%dx%dx%d px, pitch %d bytes)\n", i, base->width[i], base->height[i], base->depth[i], stride); - /* Save stride of first level to the texture. */ - if (i == 0) { - tex->stride = stride; - } } } @@ -109,7 +121,7 @@ static struct pipe_texture* r300_setup_miptree(tex); r300_setup_texture_state(tex, template->width[0], template->height[0], - template->width[0], template->last_level); + template->last_level); tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, @@ -189,11 +201,10 @@ static struct pipe_texture* pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; - tex->stride = *stride; + tex->stride_override = *stride; /* XXX */ - r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], - tex->stride, 0); + r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], 0); pipe_buffer_reference(&tex->buffer, buffer); @@ -221,7 +232,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture, pipe_buffer_reference(buffer, tex->buffer); if (stride) { - *stride = tex->stride; + *stride = r300_texture_get_stride(tex, 0); } return TRUE; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 3b56f0307c0..3109af5baca 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -30,8 +30,12 @@ #include "r300_context.h" #include "r300_reg.h" +struct r300_texture; + void r300_init_screen_texture_functions(struct pipe_screen* screen); +unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); + /* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index d68a1041063..0913ca1bd5b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -158,7 +158,6 @@ static unsigned translate_saturate(unsigned saturate) switch(saturate) { case TGSI_SAT_NONE: return SATURATE_OFF; case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE; - case TGSI_SAT_MINUS_PLUS_ONE: return SATURATE_PLUS_MINUS_ONE; } fprintf(stderr, "Unknown saturate mode: %i\n", saturate); diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index fa59277438c..8fac8e6e05f 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,8 +36,6 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" -#include "sp_surface.h" -#include "sp_state.h" #include "sp_tile_cache.h" @@ -85,5 +83,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, /* non-cached surface */ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); #endif - } + } + + softpipe->dirty_render_cache = TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h index 2e450672f58..9be3b86fe9f 100644 --- a/src/gallium/drivers/softpipe/sp_clear.h +++ b/src/gallium/drivers/softpipe/sp_clear.h @@ -32,7 +32,6 @@ #ifndef SP_CLEAR_H #define SP_CLEAR_H -#include "pipe/p_state.h" struct pipe_context; extern void diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 86df320ea8a..c699c433e9c 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -83,7 +83,8 @@ softpipe_unmap_transfers(struct softpipe_context *sp) } -static void softpipe_destroy( struct pipe_context *pipe ) +static void +softpipe_destroy( struct pipe_context *pipe ) { struct softpipe_context *softpipe = softpipe_context( pipe ); uint i; @@ -121,6 +122,15 @@ static void softpipe_destroy( struct pipe_context *pipe ) FREE( softpipe ); } + +/** + * if (the texture is being used as a framebuffer surface) + * return PIPE_REFERENCED_FOR_WRITE + * else if (the texture is a bound texture source) + * return PIPE_REFERENCED_FOR_READ XXX not done yet + * else + * return PIPE_UNREFERENCED + */ static unsigned int softpipe_is_texture_referenced( struct pipe_context *pipe, struct pipe_texture *texture, @@ -129,15 +139,17 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context( pipe ); unsigned i; - if(softpipe->dirty_render_cache) { + if (softpipe->dirty_render_cache) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { - if(softpipe->framebuffer.cbufs[i] && - softpipe->framebuffer.cbufs[i]->texture == texture) + if (softpipe->framebuffer.cbufs[i] && + softpipe->framebuffer.cbufs[i]->texture == texture) { return PIPE_REFERENCED_FOR_WRITE; + } } - if(softpipe->framebuffer.zsbuf && - softpipe->framebuffer.zsbuf->texture == texture) + if (softpipe->framebuffer.zsbuf && + softpipe->framebuffer.zsbuf->texture == texture) { return PIPE_REFERENCED_FOR_WRITE; + } } /* FIXME: we also need to do the same for the texture cache */ @@ -145,6 +157,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, return PIPE_UNREFERENCED; } + static unsigned int softpipe_is_buffer_referenced( struct pipe_context *pipe, struct pipe_buffer *buf) @@ -152,6 +165,7 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, return PIPE_UNREFERENCED; } + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -222,7 +236,6 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.is_buffer_referenced = softpipe_is_buffer_referenced; softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( softpipe ); /* * Alloc caches for accessing drawing surfaces and textures. diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 70f09324311..548e40c4a8c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -30,26 +30,21 @@ * Michel Dänzer <[email protected]> */ -#include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" -#include "sp_tile_cache.h" #include "sp_screen.h" #include "sp_winsys.h" -/* Simple, maximally packed layout. - */ - - -/* Conventional allocation path for non-display textures: +/** + * Conventional allocation path for non-display textures: + * Use a simple, maximally packed layout. */ static boolean softpipe_texture_layout(struct pipe_screen *screen, @@ -89,6 +84,10 @@ softpipe_texture_layout(struct pipe_screen *screen, return spt->buffer != NULL; } + +/** + * Texture layout for simple color buffers. + */ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, struct softpipe_texture * spt) @@ -112,9 +111,6 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, } - - - static struct pipe_texture * softpipe_texture_create(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -342,14 +338,13 @@ softpipe_transfer_map( struct pipe_screen *screen, /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) - { + if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) { /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. */ softpipe_screen(screen)->timestamp++; } - + xfer_map = map + softpipe_transfer(transfer)->offset + transfer->y / transfer->block.height * transfer->stride + transfer->x / transfer->block.width * transfer->block.size; @@ -360,7 +355,7 @@ softpipe_transfer_map( struct pipe_screen *screen, static void softpipe_transfer_unmap(struct pipe_screen *screen, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { struct softpipe_texture *spt; @@ -377,12 +372,6 @@ softpipe_transfer_unmap(struct pipe_screen *screen, void -softpipe_init_texture_funcs(struct softpipe_context *sp) -{ -} - - -void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = softpipe_texture_create; @@ -404,7 +393,7 @@ softpipe_get_texture_buffer( struct pipe_texture *texture, struct pipe_buffer **buf, unsigned *stride ) { - struct softpipe_texture *tex = (struct softpipe_texture *)texture; + struct softpipe_texture *tex = (struct softpipe_texture *) texture; if (!tex) return FALSE; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 893aa7d11d8..b1b6130b22c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -74,9 +74,6 @@ softpipe_transfer(struct pipe_transfer *pt) extern void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ); - -extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index e6a67f8c2fd..c13cffceb0a 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -93,9 +93,11 @@ typedef int _Bool; #endif +#ifndef __HAIKU__ typedef unsigned int uint; -typedef unsigned char ubyte; typedef unsigned short ushort; +#endif +typedef unsigned char ubyte; #if 0 #define boolean bool diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 4152d6ac36a..de99957d9d0 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -140,6 +140,9 @@ #define PIPE_OS_WINDOWS #endif +#if defined(__HAIKU__) +#define PIPE_OS_HAIKU +#endif /* * Subsystem. diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index 96e8e087447..b1606dc6526 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -39,7 +39,7 @@ #include "util/u_debug.h" /* for assert */ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) #include <pthread.h> /* POSIX threads headers */ #include <stdio.h> /* for perror() */ @@ -213,7 +213,7 @@ typedef unsigned pipe_condvar; */ typedef struct { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) pthread_key_t key; #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) DWORD key; @@ -228,7 +228,7 @@ typedef struct { static INLINE void pipe_tsd_init(pipe_tsd *tsd) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) if (pthread_key_create(&tsd->key, NULL/*free*/) != 0) { perror("pthread_key_create(): failed to allocate key for thread specific data"); exit(-1); @@ -245,7 +245,7 @@ pipe_tsd_get(pipe_tsd *tsd) if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { pipe_tsd_init(tsd); } -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) return pthread_getspecific(tsd->key); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) assert(0); @@ -262,7 +262,7 @@ pipe_tsd_set(pipe_tsd *tsd, void *value) if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { pipe_tsd_init(tsd); } -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) if (pthread_setspecific(tsd->key, value) != 0) { perror("pthread_set_specific() failed"); exit(-1); diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index 20d682de3fb..7c8e545824f 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -425,7 +425,7 @@ static int vlCreateVertexShader */ for (i = 0; i < 2; ++i) { - inst = vl_inst4(TGSI_OPCODE_MADD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); + inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc index ef4a4b2add9..34d93e1df0a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc @@ -615,7 +615,7 @@ static int vlCreateFragmentShaderFieldPMB ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + inst = vl_inst2(TGSI_OPCODE_FLR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* mul t3, t3, c1.y ; Multiply by 2 */ @@ -632,7 +632,7 @@ static int vlCreateFragmentShaderFieldPMB /* TODO: Move to conditional tex fetch on t3 instead of lerp */ /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* add o0, t0, t1 ; Add ref and differential to form final output */ @@ -969,7 +969,7 @@ static int vlCreateFragmentShaderFrameBMB } /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -1116,7 +1116,7 @@ static int vlCreateFragmentShaderFieldBMB ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + inst = vl_inst2(TGSI_OPCODE_FLR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* mul t3, t3, c1.y ; Multiply by 2 */ @@ -1143,7 +1143,7 @@ static int vlCreateFragmentShaderFieldBMB /* TODO: Move to conditional tex fetch on t3 instead of lerp */ /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* @@ -1158,11 +1158,11 @@ static int vlCreateFragmentShaderFieldBMB /* TODO: Move to conditional tex fetch on t3 instead of lerp */ /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index c708ac31702..15c955450d0 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -4,6 +4,7 @@ #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" +#include "util/u_math.h" #include "pipe/p_inlines.h" @@ -40,9 +41,25 @@ static const struct xorg_composite_blend xorg_blends[] = { PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_INV_SRC_ALPHA }, }; + static INLINE void -pixel_to_float4(PictFormatPtr format, - CARD32 pixel, float *color) +pixel_to_float4(Pixel pixel, float *color) +{ + CARD32 r, g, b, a; + + a = (pixel >> 24) & 0xff; + r = (pixel >> 16) & 0xff; + g = (pixel >> 8) & 0xff; + b = (pixel >> 0) & 0xff; + color[0] = ((float)r) / 255.; + color[1] = ((float)g) / 255.; + color[2] = ((float)b) / 255.; + color[3] = ((float)a) / 255.; +} + +static INLINE void +render_pixel_to_float4(PictFormatPtr format, + CARD32 pixel, float *color) { CARD32 r, g, b, a; @@ -148,7 +165,7 @@ setup_vertex_data0(struct exa_context *ctx, setup_vertex0(vertices[3], dstX, dstY + height, ctx->solid_color); - return pipe_user_buffer_create(ctx->ctx->screen, + return pipe_user_buffer_create(ctx->pipe->screen, vertices, sizeof(vertices)); } @@ -194,12 +211,39 @@ setup_vertex_data1(struct exa_context *ctx, setup_vertex1(vertices[3], dstX, dstY + height, s0, t1); - return pipe_user_buffer_create(ctx->ctx->screen, + return pipe_user_buffer_create(ctx->pipe->screen, + vertices, + sizeof(vertices)); +} + +static struct pipe_buffer * +setup_vertex_data_tex(struct exa_context *ctx, + float x0, float y0, float x1, float y1, + float s0, float t0, float s1, float t1, + float z) +{ + float vertices[4][2][4]; + + /* 1st vertex */ + setup_vertex1(vertices[0], x0, y0, + s0, t0); + /* 2nd vertex */ + setup_vertex1(vertices[1], x1, y0, + s1, t0); + /* 3rd vertex */ + setup_vertex1(vertices[2], x1, y1, + s1, t1); + /* 4th vertex */ + setup_vertex1(vertices[3], x0, y1, + s0, t1); + + return pipe_user_buffer_create(ctx->pipe->screen, vertices, sizeof(vertices)); } + static INLINE void setup_vertex2(float vertex[3][4], float x, float y, float s0, float t0, float s1, float t1) @@ -253,7 +297,7 @@ setup_vertex_data2(struct exa_context *ctx, setup_vertex2(vertices[3], dstX, dstY + height, st0[0], st0[3], st1[0], st1[3]); - return pipe_user_buffer_create(ctx->ctx->screen, + return pipe_user_buffer_create(ctx->pipe->screen, vertices, sizeof(vertices)); } @@ -291,16 +335,20 @@ boolean xorg_composite_accelerated(int op, } static void -bind_framebuffer_state(struct exa_context *exa, PicturePtr pDstPicture, - struct exa_pixmap_priv *pDst) +bind_clip_state(struct exa_context *exa) +{ +} + +static void +bind_framebuffer_state(struct exa_context *exa, struct exa_pixmap_priv *pDst) { unsigned i; struct pipe_framebuffer_state state; struct pipe_surface *surface = exa_gpu_surface(exa, pDst); memset(&state, 0, sizeof(struct pipe_framebuffer_state)); - state.width = pDstPicture->pDrawable->width; - state.height = pDstPicture->pDrawable->height; + state.width = pDst->tex->width[0]; + state.height = pDst->tex->height[0]; state.nr_cbufs = 1; state.cbufs[0] = surface; @@ -338,10 +386,12 @@ set_viewport(struct exa_context *exa, int width, int height, } static void -bind_viewport_state(struct exa_context *exa, PicturePtr pDstPicture) +bind_viewport_state(struct exa_context *exa, struct exa_pixmap_priv *pDst) { - int width = pDstPicture->pDrawable->width; - int height = pDstPicture->pDrawable->height; + int width = pDst->tex->width[0]; + int height = pDst->tex->height[0]; + + debug_printf("Bind viewport (%d, %d)\n", width, height); set_viewport(exa, width, height, Y0_TOP); } @@ -350,7 +400,8 @@ static void bind_blend_state(struct exa_context *exa, int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture) { - boolean component_alpha = pSrcPicture->componentAlpha; + boolean component_alpha = (pSrcPicture) ? + pSrcPicture->componentAlpha : FALSE; struct xorg_composite_blend blend_opt; struct pipe_blend_state blend; @@ -390,14 +441,17 @@ bind_shaders(struct exa_context *exa, int op, unsigned vs_traits = 0, fs_traits = 0; struct xorg_shader shader; + exa->has_solid_color = FALSE; + if (pSrcPicture) { if (pSrcPicture->pSourcePict) { if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { fs_traits |= FS_SOLID_FILL; vs_traits |= VS_SOLID_FILL; - pixel_to_float4(pSrcPicture->pFormat, - pSrcPicture->pSourcePict->solidFill.color, - exa->solid_color); + render_pixel_to_float4(pSrcPicture->pFormat, + pSrcPicture->pSourcePict->solidFill.color, + exa->solid_color); + exa->has_solid_color = TRUE; } else { debug_assert("!gradients not supported"); } @@ -478,15 +532,15 @@ setup_vs_constant_buffer(struct exa_context *exa, struct pipe_constant_buffer *cbuf = &exa->vs_const_buffer; pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16, + cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); if (cbuf->buffer) { - pipe_buffer_write(exa->ctx->screen, cbuf->buffer, + pipe_buffer_write(exa->pipe->screen, cbuf->buffer, 0, param_bytes, vs_consts); } - exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_VERTEX, 0, cbuf); + exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_VERTEX, 0, cbuf); } @@ -500,22 +554,22 @@ setup_fs_constant_buffer(struct exa_context *exa) struct pipe_constant_buffer *cbuf = &exa->fs_const_buffer; pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16, + cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); if (cbuf->buffer) { - pipe_buffer_write(exa->ctx->screen, cbuf->buffer, + pipe_buffer_write(exa->pipe->screen, cbuf->buffer, 0, param_bytes, fs_consts); } - exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_FRAGMENT, 0, cbuf); + exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); } static void -setup_constant_buffers(struct exa_context *exa, PicturePtr pDstPicture) +setup_constant_buffers(struct exa_context *exa, struct exa_pixmap_priv *pDst) { - int width = pDstPicture->pDrawable->width; - int height = pDstPicture->pDrawable->height; + int width = pDst->tex->width[0]; + int height = pDst->tex->height[0]; setup_vs_constant_buffer(exa, width, height); setup_fs_constant_buffer(exa); @@ -530,15 +584,15 @@ boolean xorg_composite_bind_state(struct exa_context *exa, struct exa_pixmap_priv *pMask, struct exa_pixmap_priv *pDst) { - bind_framebuffer_state(exa, pDstPicture, pDst); - bind_viewport_state(exa, pDstPicture); + bind_framebuffer_state(exa, pDst); + bind_viewport_state(exa, pDst); bind_blend_state(exa, op, pSrcPicture, pMaskPicture); bind_rasterizer_state(exa); bind_shaders(exa, op, pSrcPicture, pMaskPicture); bind_samplers(exa, op, pSrcPicture, pMaskPicture, pDstPicture, pSrc, pMask, pDst); - - setup_constant_buffers(exa, pDstPicture); + bind_clip_state(exa); + setup_constant_buffers(exa, pDst); return FALSE; } @@ -548,7 +602,7 @@ void xorg_composite(struct exa_context *exa, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int width, int height) { - struct pipe_context *pipe = exa->ctx; + struct pipe_context *pipe = exa->pipe; struct pipe_buffer *buf = 0; if (exa->num_bound_samplers == 0 ) { /* solid fill */ @@ -573,12 +627,393 @@ void xorg_composite(struct exa_context *exa, } if (buf) { + int num_attribs = 1; /*pos*/ + num_attribs += exa->num_bound_samplers; + if (exa->has_solid_color) + ++num_attribs; + util_draw_vertex_buffer(pipe, buf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ - 1 + exa->num_bound_samplers); /* attribs/vert */ + num_attribs); /* attribs/vert */ pipe_buffer_reference(&buf, NULL); } } +boolean xorg_solid_bind_state(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + Pixel fg) +{ + unsigned vs_traits, fs_traits; + struct xorg_shader shader; + + pixel_to_float4(fg, exa->solid_color); + exa->has_solid_color = TRUE; + + exa->solid_color[3] = 1.f; + + debug_printf("Color Pixel=(%d, %d, %d, %d), RGBA=(%f, %f, %f, %f)\n", + (fg >> 24) & 0xff, (fg >> 16) & 0xff, + (fg >> 8) & 0xff, (fg >> 0) & 0xff, + exa->solid_color[0], exa->solid_color[1], + exa->solid_color[2], exa->solid_color[3]); + +#if 0 + exa->solid_color[0] = 1.f; + exa->solid_color[1] = 0.f; + exa->solid_color[2] = 0.f; + exa->solid_color[3] = 1.f; +#endif + + vs_traits = VS_SOLID_FILL; + fs_traits = FS_SOLID_FILL; + + bind_framebuffer_state(exa, pixmap); + bind_viewport_state(exa, pixmap); + bind_rasterizer_state(exa); + bind_blend_state(exa, PictOpSrc, NULL, NULL); + setup_constant_buffers(exa, pixmap); + bind_clip_state(exa); + + shader = xorg_shaders_get(exa->shaders, vs_traits, fs_traits); + cso_set_vertex_shader_handle(exa->cso, shader.vs); + cso_set_fragment_shader_handle(exa->cso, shader.fs); + + return FALSE; +} + +void xorg_solid(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + int x0, int y0, int x1, int y1) +{ + struct pipe_context *pipe = exa->pipe; + struct pipe_buffer *buf = 0; + float vertices[4][2][4]; + + x0 = 10; y0 = 10; + x1 = 300; y1 = 300; + + /* 1st vertex */ + setup_vertex0(vertices[0], x0, y0, + exa->solid_color); + /* 2nd vertex */ + setup_vertex0(vertices[1], x1, y0, + exa->solid_color); + /* 3rd vertex */ + setup_vertex0(vertices[2], x1, y1, + exa->solid_color); + /* 4th vertex */ + setup_vertex0(vertices[3], x0, y1, + exa->solid_color); + + buf = pipe_user_buffer_create(exa->pipe->screen, + vertices, + sizeof(vertices)); + + + if (buf) { + debug_printf("Drawing buf is %p\n", buf); + util_draw_vertex_buffer(pipe, buf, 0, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe_buffer_reference(&buf, NULL); + } +} + + +static INLINE void shift_rectx(float coords[4], + const float *bounds, + const float shift) +{ + coords[0] += shift; + coords[2] -= shift; + if (bounds) { + coords[2] = MIN2(coords[2], bounds[2]); + /* bound x/y + width/height */ + if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) { + coords[2] = (bounds[0] + bounds[2]) - coords[0]; + } + } +} + +static INLINE void shift_recty(float coords[4], + const float *bounds, + const float shift) +{ + coords[1] += shift; + coords[3] -= shift; + if (bounds) { + coords[3] = MIN2(coords[3], bounds[3]); + if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) { + coords[3] = (bounds[1] + bounds[3]) - coords[1]; + } + } +} + +static INLINE void bound_rect(float coords[4], + const float bounds[4], + float shift[4]) +{ + /* if outside the bounds */ + if (coords[0] > (bounds[0] + bounds[2]) || + coords[1] > (bounds[1] + bounds[3]) || + (coords[0] + coords[2]) < bounds[0] || + (coords[1] + coords[3]) < bounds[1]) { + coords[0] = 0.f; + coords[1] = 0.f; + coords[2] = 0.f; + coords[3] = 0.f; + shift[0] = 0.f; + shift[1] = 0.f; + return; + } + + /* bound x */ + if (coords[0] < bounds[0]) { + shift[0] = bounds[0] - coords[0]; + coords[2] -= shift[0]; + coords[0] = bounds[0]; + } else + shift[0] = 0.f; + + /* bound y */ + if (coords[1] < bounds[1]) { + shift[1] = bounds[1] - coords[1]; + coords[3] -= shift[1]; + coords[1] = bounds[1]; + } else + shift[1] = 0.f; + + shift[2] = bounds[2] - coords[2]; + shift[3] = bounds[3] - coords[3]; + /* bound width/height */ + coords[2] = MIN2(coords[2], bounds[2]); + coords[3] = MIN2(coords[3], bounds[3]); + + /* bound x/y + width/height */ + if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) { + coords[2] = (bounds[0] + bounds[2]) - coords[0]; + } + if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) { + coords[3] = (bounds[1] + bounds[3]) - coords[1]; + } + + /* if outside the bounds */ + if ((coords[0] + coords[2]) < bounds[0] || + (coords[1] + coords[3]) < bounds[1]) { + coords[0] = 0.f; + coords[1] = 0.f; + coords[2] = 0.f; + coords[3] = 0.f; + return; + } +} + +static INLINE void sync_size(float *src_loc, float *dst_loc) +{ + src_loc[2] = MIN2(src_loc[2], dst_loc[2]); + src_loc[3] = MIN2(src_loc[3], dst_loc[3]); + dst_loc[2] = src_loc[2]; + dst_loc[3] = src_loc[3]; +} + + +static void renderer_copy_texture(struct exa_context *exa, + struct pipe_texture *src, + float sx1, float sy1, + float sx2, float sy2, + struct pipe_texture *dst, + float dx1, float dy1, + float dx2, float dy2) +{ + struct pipe_context *pipe = exa->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_buffer *buf; + struct pipe_surface *dst_surf = screen->get_tex_surface( + screen, dst, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + struct pipe_framebuffer_state fb; + float s0, t0, s1, t1; + struct xorg_shader shader; + + assert(src->width[0] != 0); + assert(src->height[0] != 0); + assert(dst->width[0] != 0); + assert(dst->height[0] != 0); + +#if 0 + debug_printf("copy texture [%f, %f, %f, %f], [%f, %f, %f, %f]\n", + sx1, sy1, sx2, sy2, dx1, dy1, dx2, dy2); +#endif + +#if 1 + s0 = sx1 / src->width[0]; + s1 = sx2 / src->width[0]; + t0 = sy1 / src->height[0]; + t1 = sy2 / src->height[0]; +#else + s0 = 0; + s1 = 1; + t0 = 0; + t1 = 1; +#endif + + assert(screen->is_format_supported(screen, dst_surf->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); + + /* save state (restored below) */ + cso_save_blend(exa->cso); + cso_save_samplers(exa->cso); + cso_save_sampler_textures(exa->cso); + cso_save_framebuffer(exa->cso); + cso_save_fragment_shader(exa->cso); + cso_save_vertex_shader(exa->cso); + + cso_save_viewport(exa->cso); + + + /* set misc state we care about */ + { + struct pipe_blend_state blend; + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.colormask = PIPE_MASK_RGBA; + cso_set_blend(exa->cso, &blend); + } + + /* sampler */ + { + struct pipe_sampler_state sampler; + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + cso_single_sampler(exa->cso, 0, &sampler); + cso_single_sampler_done(exa->cso); + } + + set_viewport(exa, dst_surf->width, dst_surf->height, Y0_TOP); + + /* texture */ + cso_set_sampler_textures(exa->cso, 1, &src); + + /* shaders */ + shader = xorg_shaders_get(exa->shaders, + VS_COMPOSITE, + FS_COMPOSITE); + cso_set_vertex_shader_handle(exa->cso, shader.vs); + cso_set_fragment_shader_handle(exa->cso, shader.fs); + + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.width = dst_surf->width; + fb.height = dst_surf->height; + fb.nr_cbufs = 1; + fb.cbufs[0] = dst_surf; + { + int i; + for (i = 1; i < PIPE_MAX_COLOR_BUFS; ++i) + fb.cbufs[i] = 0; + } + cso_set_framebuffer(exa->cso, &fb); + + /* draw quad */ + buf = setup_vertex_data_tex(exa, + dx1, dy1, + dx2, dy2, + s0, t0, s1, t1, + 0.0f); + + if (buf) { + util_draw_vertex_buffer(exa->pipe, buf, 0, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe_buffer_reference(&buf, NULL); + } + + /* restore state we changed */ + cso_restore_blend(exa->cso); + cso_restore_samplers(exa->cso); + cso_restore_sampler_textures(exa->cso); + cso_restore_framebuffer(exa->cso); + cso_restore_vertex_shader(exa->cso); + cso_restore_fragment_shader(exa->cso); + cso_restore_viewport(exa->cso); + + pipe_surface_reference(&dst_surf, NULL); +} + +void xorg_copy_pixmap(struct exa_context *ctx, + struct exa_pixmap_priv *dst_priv, int dx, int dy, + struct exa_pixmap_priv *src_priv, int sx, int sy, + int width, int height) +{ + float dst_loc[4], src_loc[4]; + float dst_bounds[4], src_bounds[4]; + float src_shift[4], dst_shift[4], shift[4]; + struct pipe_texture *dst = dst_priv->tex; + struct pipe_texture *src = src_priv->tex; + + dst_loc[0] = dx; + dst_loc[1] = dy; + dst_loc[2] = width; + dst_loc[3] = height; + dst_bounds[0] = 0.f; + dst_bounds[1] = 0.f; + dst_bounds[2] = dst->width[0]; + dst_bounds[3] = dst->height[0]; + + src_loc[0] = sx; + src_loc[1] = sy; + src_loc[2] = width; + src_loc[3] = height; + src_bounds[0] = 0.f; + src_bounds[1] = 0.f; + src_bounds[2] = src->width[0]; + src_bounds[3] = src->height[0]; + + bound_rect(src_loc, src_bounds, src_shift); + bound_rect(dst_loc, dst_bounds, dst_shift); + shift[0] = src_shift[0] - dst_shift[0]; + shift[1] = src_shift[1] - dst_shift[1]; + + if (shift[0] < 0) + shift_rectx(src_loc, src_bounds, -shift[0]); + else + shift_rectx(dst_loc, dst_bounds, shift[0]); + + if (shift[1] < 0) + shift_recty(src_loc, src_bounds, -shift[1]); + else + shift_recty(dst_loc, dst_bounds, shift[1]); + + sync_size(src_loc, dst_loc); + + if (src_loc[2] >= 0 && src_loc[3] >= 0 && + dst_loc[2] >= 0 && dst_loc[3] >= 0) { + renderer_copy_texture(ctx, + src, + src_loc[0], + src_loc[1] + src_loc[3], + src_loc[0] + src_loc[2], + src_loc[1], + dst, + dst_loc[0], + dst_loc[1] + dst_loc[3], + dst_loc[0] + dst_loc[2], + dst_loc[1]); + } +} + diff --git a/src/gallium/state_trackers/xorg/xorg_composite.h b/src/gallium/state_trackers/xorg/xorg_composite.h index 17dfcb199ea..e73f1c704a8 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.h +++ b/src/gallium/state_trackers/xorg/xorg_composite.h @@ -22,4 +22,16 @@ void xorg_composite(struct exa_context *exa, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int width, int height); +boolean xorg_solid_bind_state(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + Pixel fg); +void xorg_solid(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + int x0, int y0, int x1, int y1); + +void xorg_copy_pixmap(struct exa_context *ctx, + struct exa_pixmap_priv *dst, int dx, int dy, + struct exa_pixmap_priv *src, int sx, int sy, + int width, int height); + #endif diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 6431a0fe254..8a362596c75 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -118,6 +118,7 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format) } if (!tex) { + exaMoveInPixmap(private->pPixmap); xorg_exa_set_shared_usage(private->pPixmap); pScreen->ModifyPixmapHeader(private->pPixmap, 0, 0, 0, 0, 0, NULL); tex = xorg_exa_get_texture(private->pPixmap); diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index a17a71f23a8..312dab1544c 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -82,6 +82,25 @@ exa_get_pipe_format(int depth, enum pipe_format *format, int *bbp) } } +static void +xorg_exa_init_state(struct exa_context *exa) +{ + struct pipe_depth_stencil_alpha_state dsa; + + /* set common initial clip state */ + memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state)); + cso_set_depth_stencil_alpha(exa->cso, &dsa); +} + +static void +xorg_exa_common_done(struct exa_context *exa) +{ + exa->copy.src = NULL; + exa->copy.dst = NULL; + exa->has_solid_color = FALSE; + exa->num_bound_samplers = 0; +} + /* * Static exported EXA functions */ @@ -111,9 +130,9 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst, if (!priv || !priv->tex) return FALSE; - if (exa->ctx->is_texture_referenced(exa->ctx, priv->tex, 0, 0) & + if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & PIPE_REFERENCED_FOR_WRITE) - exa->ctx->flush(exa->ctx, 0, NULL); + exa->pipe->flush(exa->pipe, 0, NULL); transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, PIPE_TRANSFER_READ, x, y, w, h); @@ -178,9 +197,9 @@ ExaPrepareAccess(PixmapPtr pPix, int index) if (priv->map_count++ == 0) { - if (exa->ctx->is_texture_referenced(exa->ctx, priv->tex, 0, 0) & + if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & PIPE_REFERENCED_FOR_WRITE) - exa->ctx->flush(exa->ctx, 0, NULL); + exa->pipe->flush(exa->pipe, 0, NULL); priv->map_transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, @@ -231,15 +250,22 @@ ExaDone(PixmapPtr pPixmap) if (!priv) return; - if (priv->src_surf) - exa->scrn->tex_surface_destroy(priv->src_surf); - priv->src_surf = NULL; +#if 1 + xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, NULL); +#else + xorg_finish(exa); +#endif + xorg_exa_common_done(exa); } static void ExaDoneComposite(PixmapPtr pPixmap) { + ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + xorg_exa_common_done(exa); } static Bool @@ -250,6 +276,7 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg) struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap); struct exa_context *exa = ms->exa; + debug_printf("ExaPrepareSolid - test\n"); if (pPixmap->drawable.depth < 15) return FALSE; @@ -262,12 +289,11 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg) if (alu != GXcopy) return FALSE; - if (!exa->ctx || !exa->ctx->surface_fill) + if (!exa->pipe) return FALSE; - priv->color = fg; - - return TRUE; + debug_printf(" ExaPrepareSolid(0x%x)\n", fg); + return xorg_solid_bind_state(exa, priv, fg); } static void @@ -277,12 +303,18 @@ ExaSolid(PixmapPtr pPixmap, int x0, int y0, int x1, int y1) modesettingPtr ms = modesettingPTR(pScrn); struct exa_context *exa = ms->exa; struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap); - struct pipe_surface *surf = exa_gpu_surface(exa, priv); - exa->ctx->surface_fill(exa->ctx, surf, x0, y0, x1 - x0, y1 - y0, - priv->color); + debug_printf("\tExaSolid(%d, %d, %d, %d)\n", x0, y0, x1, y1); - exa->scrn->tex_surface_destroy(surf); +#if 0 + if (x0 == 0 && y0 == 0 && + x1 == priv->tex->width[0] && + y1 == priv->tex->height[0]) { + exa->ctx->clear(exa->ctx, PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, + exa->solid_color, 1., 0); + } else +#endif + xorg_solid(exa, priv, x0, y0, x1, y1) ; } static Bool @@ -295,6 +327,8 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); struct exa_pixmap_priv *src_priv = exaGetPixmapDriverPrivate(pSrcPixmap); + debug_printf("ExaPrepareCopy\n"); + if (alu != GXcopy) return FALSE; @@ -310,27 +344,33 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, if (!priv->tex || !src_priv->tex) return FALSE; - if (!exa->ctx || !exa->ctx->surface_copy) + if (!exa->pipe) return FALSE; - priv->src_surf = exa_gpu_surface(exa, src_priv); + exa->copy.src = src_priv; + exa->copy.dst = priv; - return TRUE; + /*XXX disabled until some issues with syncing are fixed */ + return FALSE; } static void ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int width, int height) { - ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; - modesettingPtr ms = modesettingPTR(pScrn); - struct exa_context *exa = ms->exa; - struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); - struct pipe_surface *surf = exa_gpu_surface(exa, priv); + ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); + + debug_printf("\tExaCopy(srcx=%d, srcy=%d, dstX=%d, dstY=%d, w=%d, h=%d)\n", + srcX, srcY, dstX, dstY, width, height); - exa->ctx->surface_copy(exa->ctx, surf, dstX, dstY, priv->src_surf, - srcX, srcY, width, height); - exa->scrn->tex_surface_destroy(surf); + debug_assert(priv == exa->copy.dst); + + xorg_copy_pixmap(exa, exa->copy.dst, dstX, dstY, + exa->copy.src, srcX, srcY, + width, height); } static Bool @@ -342,6 +382,8 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture, modesettingPtr ms = modesettingPTR(pScrn); struct exa_context *exa = ms->exa; + debug_printf("ExaPrepareComposite\n"); + return xorg_composite_bind_state(exa, op, pSrcPicture, pMaskPicture, pDstPicture, exaGetPixmapDriverPrivate(pSrc), @@ -358,6 +400,8 @@ ExaComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, struct exa_context *exa = ms->exa; struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDst); + debug_printf("\tExaComposite\n"); + xorg_composite(exa, priv, srcX, srcY, maskX, maskY, dstX, dstY, width, height); } @@ -537,16 +581,16 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, if (priv->tex) { struct pipe_surface *dst_surf; + struct pipe_surface *src_surf; dst_surf = exa->scrn->get_tex_surface(exa->scrn, texture, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE); - priv->src_surf = exa_gpu_surface(exa, priv); - exa->ctx->surface_copy(exa->ctx, dst_surf, 0, 0, priv->src_surf, - 0, 0, min(width, texture->width[0]), - min(height, texture->height[0])); + src_surf = exa_gpu_surface(exa, priv); + exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf, + 0, 0, min(width, texture->width[0]), + min(height, texture->height[0])); exa->scrn->tex_surface_destroy(dst_surf); - exa->scrn->tex_surface_destroy(priv->src_surf); - priv->src_surf = NULL; + exa->scrn->tex_surface_destroy(src_surf); } else if (pPixmap->devPrivate.ptr) { struct pipe_transfer *transfer; @@ -563,6 +607,9 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, pPixmap->devKind, 0, 0); exa->scrn->transfer_unmap(exa->scrn, transfer); exa->scrn->tex_transfer_destroy(transfer); + + xfree(pPixmap->devPrivate.ptr); + pPixmap->devPrivate.ptr = NULL; } } #ifdef DRM_MODE_FEATURE_DIRTYFB @@ -611,8 +658,8 @@ xorg_exa_close(ScrnInfoPtr pScrn) cso_destroy_context(exa->cso); } - if (exa->ctx) - exa->ctx->destroy(exa->ctx); + if (exa->pipe) + exa->pipe->destroy(exa->pipe); exaDriverFini(pScrn->pScreen); xfree(exa); @@ -645,6 +692,12 @@ xorg_exa_init(ScrnInfoPtr pScrn) pExa->pixmapOffsetAlign = 0; pExa->pixmapPitchAlign = 1; pExa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_HANDLES_PIXMAPS; +#ifdef EXA_SUPPORTS_PREPARE_AUX + pExa->flags |= EXA_SUPPORTS_PREPARE_AUX; +#endif +#ifdef EXA_MIXED_PIXMAPS + pExa->flags |= EXA_MIXED_PIXMAPS; +#endif pExa->maxX = 8191; /* FIXME */ pExa->maxY = 8191; /* FIXME */ @@ -674,13 +727,15 @@ xorg_exa_init(ScrnInfoPtr pScrn) } exa->scrn = ms->screen; - exa->ctx = ms->api->create_context(ms->api, exa->scrn); + exa->pipe = ms->api->create_context(ms->api, exa->scrn); /* Share context with DRI */ - ms->ctx = exa->ctx; + ms->ctx = exa->pipe; - exa->cso = cso_create_context(exa->ctx); + exa->cso = cso_create_context(exa->pipe); exa->shaders = xorg_shaders_create(exa); + xorg_exa_init_state(exa); + return (void *)exa; out_err: @@ -698,3 +753,19 @@ exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv) } +void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags, + struct pipe_fence_handle **fence) +{ + exa->pipe->flush(exa->pipe, pipeFlushFlags, fence); +} + +void xorg_exa_finish(struct exa_context *exa) +{ + struct pipe_fence_handle *fence = NULL; + + xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, &fence); + + exa->pipe->screen->fence_finish(exa->pipe->screen, fence, 0); + exa->pipe->screen->fence_reference(exa->pipe->screen, &fence, NULL); +} + diff --git a/src/gallium/state_trackers/xorg/xorg_exa.h b/src/gallium/state_trackers/xorg/xorg_exa.h index 5b515be1397..43949b04a44 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.h +++ b/src/gallium/state_trackers/xorg/xorg_exa.h @@ -14,7 +14,7 @@ struct xorg_shaders; struct exa_context { ExaDriverPtr pExa; - struct pipe_context *ctx; + struct pipe_context *pipe; struct pipe_screen *scrn; struct cso_context *cso; struct xorg_shaders *shaders; @@ -26,6 +26,12 @@ struct exa_context int num_bound_samplers; float solid_color[4]; + boolean has_solid_color; + + struct { + struct exa_pixmap_priv *src; + struct exa_pixmap_priv *dst; + } copy; }; @@ -36,8 +42,6 @@ struct exa_pixmap_priv struct pipe_texture *tex; struct pipe_texture *depth_stencil_tex; - unsigned int color; - struct pipe_surface *src_surf; /* for copies */ struct pipe_transfer *map_transfer; unsigned map_count; @@ -46,5 +50,8 @@ struct exa_pixmap_priv struct pipe_surface * exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv); +void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags, + struct pipe_fence_handle **fence); +void xorg_exa_finish(struct exa_context *exa); #endif diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c index cfee10c3b30..b30cbff4793 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c @@ -241,41 +241,38 @@ create_vs(struct pipe_context *pipe, boolean is_fill = vs_traits & VS_FILL; boolean is_composite = vs_traits & VS_COMPOSITE; boolean has_mask = vs_traits & VS_MASK; + unsigned input_slot = 0; ureg = ureg_create(TGSI_PROCESSOR_VERTEX); if (ureg == NULL) return 0; - const0 = ureg_DECL_constant(ureg); - const1 = ureg_DECL_constant(ureg); + const0 = ureg_DECL_constant(ureg, 0); + const1 = ureg_DECL_constant(ureg, 1); /* it has to be either a fill or a composite op */ debug_assert(is_fill ^ is_composite); - src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_POSITION, 0); + src = ureg_DECL_vs_input(ureg, input_slot++); dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); src = vs_normalize_coords(ureg, src, const0, const1); ureg_MOV(ureg, dst, src); - if (is_composite) { - src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_GENERIC, 1); + src = ureg_DECL_vs_input(ureg, input_slot++); dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1); ureg_MOV(ureg, dst, src); } + if (is_fill) { - src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_COLOR, 1); - dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1); + src = ureg_DECL_vs_input(ureg, input_slot++); + dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); ureg_MOV(ureg, dst, src); } if (has_mask) { - src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_GENERIC, 2); + src = ureg_DECL_vs_input(ureg, input_slot++); dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 2); ureg_MOV(ureg, dst, src); } @@ -318,8 +315,8 @@ create_fs(struct pipe_context *pipe, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_PERSPECTIVE); - } - if (is_fill) { + } else { + debug_assert(is_fill); if (is_solid) src_input = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, @@ -473,9 +470,9 @@ struct xorg_shader xorg_shaders_get(struct xorg_shaders *sc, struct xorg_shader shader = {0}; void *vs, *fs; - vs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_VERTEX, + vs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_VERTEX, sc->vs_hash, vs_traits); - fs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_FRAGMENT, + fs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_FRAGMENT, sc->fs_hash, fs_traits); debug_assert(vs && fs); diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript index 6c00861f517..f9738110723 100644 --- a/src/gallium/winsys/drm/intel/dri/SConscript +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -12,8 +12,9 @@ drivers = [ trace, ] -env.SharedLibrary( +env.LoadableModule( target ='i915_dri.so', source = COMMON_GALLIUM_SOURCES, LIBS = drivers + mesa + auxiliaries + env['LIBS'], + SHLIBPREFIX = '', ) diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c index 5ca3ad9762d..ebd1b607b78 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c @@ -13,6 +13,7 @@ #define INTEL_BATCH_CLIPRECTS 0x2 #undef INTEL_RUN_SYNC +#undef INTEL_MAP_BATCHBUFFER struct intel_drm_batchbuffer { @@ -40,8 +41,11 @@ intel_drm_batchbuffer_reset(struct intel_drm_batchbuffer *batch) "gallium3d_batchbuffer", batch->actual_size, 4096); + +#ifdef INTEL_MAP_BATCHBUFFER drm_intel_bo_map(batch->bo, TRUE); batch->base.map = batch->bo->virtual; +#endif memset(batch->base.map, 0, batch->actual_size); batch->base.ptr = batch->base.map; @@ -55,7 +59,13 @@ intel_drm_batchbuffer_create(struct intel_winsys *iws) struct intel_drm_winsys *idws = intel_drm_winsys(iws); struct intel_drm_batchbuffer *batch = CALLOC_STRUCT(intel_drm_batchbuffer); + batch->actual_size = idws->max_batch_size; + +#ifdef INTEL_MAP_BATCHBUFFER batch->base.map = NULL; +#else + batch->base.map = MALLOC(batch->actual_size); +#endif batch->base.ptr = NULL; batch->base.size = 0; @@ -64,8 +74,6 @@ intel_drm_batchbuffer_create(struct intel_winsys *iws) batch->base.iws = iws; - batch->actual_size = idws->max_batch_size; - intel_drm_batchbuffer_reset(batch); return &batch->base; @@ -156,7 +164,11 @@ intel_drm_batchbuffer_flush(struct intel_batchbuffer *ibatch, used = batch->base.ptr - batch->base.map; +#ifdef INTEL_MAP_BATCHBUFFER drm_intel_bo_unmap(batch->bo); +#else + drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); +#endif /* Do the sending to HW */ ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); @@ -202,7 +214,10 @@ intel_drm_batchbuffer_destroy(struct intel_batchbuffer *ibatch) if (batch->bo) drm_intel_bo_unreference(batch->bo); - free(batch); +#ifndef INTEL_MAP_BATCHBUFFER + FREE(batch->base.map); +#endif + FREE(batch); } void intel_drm_winsys_init_batchbuffer_functions(struct intel_drm_winsys *idws) diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c index e017cd2e982..0030f915a36 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c @@ -28,6 +28,7 @@ intel_drm_buffer_create(struct intel_winsys *iws, } else if (type == INTEL_NEW_VERTEX) { name = "gallium3d_vertex"; pool = idws->pools.gem; + buf->map_gtt = TRUE; } else if (type == INTEL_NEW_SCANOUT) { name = "gallium3d_scanout"; pool = idws->pools.gem; @@ -109,6 +110,18 @@ intel_drm_buffer_unmap(struct intel_winsys *iws, drm_intel_bo_unmap(intel_bo(buffer)); } +static int +intel_drm_buffer_write(struct intel_winsys *iws, + struct intel_buffer *buffer, + const void *data, + size_t size, + size_t offset) +{ + struct intel_drm_buffer *buf = intel_drm_buffer(buffer); + + return drm_intel_bo_subdata(buf->bo, offset, size, (void*)data); +} + static void intel_drm_buffer_destroy(struct intel_winsys *iws, struct intel_buffer *buffer) @@ -130,5 +143,6 @@ intel_drm_winsys_init_buffer_functions(struct intel_drm_winsys *idws) idws->base.buffer_set_fence_reg = intel_drm_buffer_set_fence_reg; idws->base.buffer_map = intel_drm_buffer_map; idws->base.buffer_unmap = intel_drm_buffer_unmap; + idws->base.buffer_write = intel_drm_buffer_write; idws->base.buffer_destroy = intel_drm_buffer_destroy; } |