diff options
Diffstat (limited to 'src/gallium')
121 files changed, 5224 insertions, 1631 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 8a13885da9b..53e13b30e63 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -358,7 +358,7 @@ epilog( boolean tgsi_sanity_check( - struct tgsi_token *tokens ) + const struct tgsi_token *tokens ) { struct sanity_check_ctx ctx; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h index ca45e94c7ad..52263ff8832 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -40,7 +40,7 @@ extern "C" { */ boolean tgsi_sanity_check( - struct tgsi_token *tokens ); + const struct tgsi_token *tokens ); #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 3cdf8b9f359..501fc05e729 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -39,8 +39,9 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" -#include "tgsi_exec.h" -#include "tgsi_sse2.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_sse2.h" #include "rtasm/rtasm_x86sse.h" @@ -1360,6 +1361,32 @@ emit_store( const struct tgsi_full_instruction *inst, unsigned chan_index ) { + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + sse_maxps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + + sse_minps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ) ); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } + + switch( reg->DstRegister.File ) { case TGSI_FILE_OUTPUT: emit_output( @@ -1388,19 +1415,6 @@ emit_store( default: assert( 0 ); } - - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - - case TGSI_SAT_ZERO_ONE: - /* assert( 0 ); */ - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - } } #define STORE( FUNC, INST, XMM, INDEX, CHAN )\ @@ -1747,14 +1761,6 @@ emit_instruction( if (indirect_temp_reference(inst)) return FALSE; - /* we don't handle saturation/clamping yet */ - if (inst->Instruction.Saturate != TGSI_SAT_NONE) - return FALSE; - - /* need to use extra temps to fix SOA dependencies : */ - if (tgsi_check_soa_dependencies(inst)) - return FALSE; - switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { @@ -1768,8 +1774,10 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - STORE( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 4 + chan_index, 0, chan_index ); + } + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 4 + chan_index, 0, chan_index ); } break; @@ -2929,6 +2937,22 @@ tgsi_emit_sse2( parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? "vertex shader" : "fragment shader"); } + + if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { + uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; + + /* XXX: we only handle src/dst aliasing in a few opcodes + * currently. Need to use an additional temporay to hold + * the result in the cases where the code is too opaque to + * fix. + */ + if (opcode != TGSI_OPCODE_MOV && + opcode != TGSI_OPCODE_SWZ) { + debug_printf("Warning: src/dst aliasing in instruction" + " is not handled:\n"); + tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1); + } + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index f7096bd8e2c..654426a903d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -31,6 +31,7 @@ #include "tgsi/tgsi_ureg.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_sanity.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -70,6 +71,7 @@ struct ureg_tokens { #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 #define UREG_MAX_ADDR 2 @@ -86,8 +88,10 @@ struct ureg_program unsigned semantic_name; unsigned semantic_index; unsigned interp; - } input[UREG_MAX_INPUT]; - unsigned nr_inputs; + } fs_input[UREG_MAX_INPUT]; + unsigned nr_fs_inputs; + + unsigned vs_inputs[UREG_MAX_INPUT/32]; struct { unsigned semantic_name; @@ -107,9 +111,13 @@ struct ureg_program unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; - unsigned nr_addrs; + struct { + unsigned first; + unsigned last; + } constant_range[UREG_MAX_CONSTANT_RANGE]; + unsigned nr_constant_ranges; - unsigned nr_constants; + unsigned nr_addrs; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -119,6 +127,9 @@ static union tgsi_any_token error_tokens[32]; static void tokens_error( struct ureg_tokens *tokens ) { + if (tokens->tokens && tokens->tokens != error_tokens) + FREE(tokens->tokens); + tokens->tokens = error_tokens; tokens->size = Elements(error_tokens); tokens->count = 0; @@ -228,25 +239,25 @@ ureg_src_register( unsigned file, -static struct ureg_src -ureg_DECL_input( struct ureg_program *ureg, - unsigned name, - unsigned index, - unsigned interp_mode ) +struct ureg_src +ureg_DECL_fs_input( struct ureg_program *ureg, + unsigned name, + unsigned index, + unsigned interp_mode ) { unsigned i; - for (i = 0; i < ureg->nr_inputs; i++) { - if (ureg->input[i].semantic_name == name && - ureg->input[i].semantic_index == index) + for (i = 0; i < ureg->nr_fs_inputs; i++) { + if (ureg->fs_input[i].semantic_name == name && + ureg->fs_input[i].semantic_index == index) goto out; } - if (ureg->nr_inputs < UREG_MAX_INPUT) { - ureg->input[i].semantic_name = name; - ureg->input[i].semantic_index = index; - ureg->input[i].interp = interp_mode; - ureg->nr_inputs++; + if (ureg->nr_fs_inputs < UREG_MAX_INPUT) { + ureg->fs_input[i].semantic_name = name; + ureg->fs_input[i].semantic_index = index; + ureg->fs_input[i].interp = interp_mode; + ureg->nr_fs_inputs++; } else { set_bad( ureg ); @@ -257,25 +268,14 @@ out: } - -struct ureg_src -ureg_DECL_fs_input( struct ureg_program *ureg, - unsigned name, - unsigned index, - unsigned interp ) -{ - assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); - return ureg_DECL_input( ureg, name, index, interp ); -} - - struct ureg_src ureg_DECL_vs_input( struct ureg_program *ureg, - unsigned name, unsigned index ) { assert(ureg->processor == TGSI_PROCESSOR_VERTEX); - return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT ); + + ureg->vs_inputs[index/32] |= 1 << (index % 32); + return ureg_src_register( TGSI_FILE_INPUT, index ); } @@ -313,9 +313,57 @@ out: * value or manage any constant_buffer contents -- that's the * resposibility of the calling code. */ -struct ureg_src ureg_DECL_constant(struct ureg_program *ureg ) +struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, + unsigned index ) { - return ureg_src_register( TGSI_FILE_CONSTANT, ureg->nr_constants++ ); + unsigned minconst = index, maxconst = index; + unsigned i; + + /* Inside existing range? + */ + for (i = 0; i < ureg->nr_constant_ranges; i++) { + if (ureg->constant_range[i].first <= index && + ureg->constant_range[i].last >= index) + goto out; + } + + /* Extend existing range? + */ + for (i = 0; i < ureg->nr_constant_ranges; i++) { + if (ureg->constant_range[i].last == index - 1) { + ureg->constant_range[i].last = index; + goto out; + } + + if (ureg->constant_range[i].first == index + 1) { + ureg->constant_range[i].first = index; + goto out; + } + + minconst = MIN2(minconst, ureg->constant_range[i].first); + maxconst = MAX2(maxconst, ureg->constant_range[i].last); + } + + /* Create new range? + */ + if (ureg->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { + i = ureg->nr_constant_ranges++; + ureg->constant_range[i].first = index; + ureg->constant_range[i].last = index; + } + + /* Collapse all ranges down to one: + */ + i = 0; + ureg->constant_range[0].first = minconst; + ureg->constant_range[0].last = maxconst; + ureg->nr_constant_ranges = 1; + +out: + assert(i < ureg->nr_constant_ranges); + assert(ureg->constant_range[i].first <= index); + assert(ureg->constant_range[i].last >= index); + return ureg_src_register( TGSI_FILE_CONSTANT, index ); } @@ -566,6 +614,19 @@ ureg_emit_dst( struct ureg_program *ureg, } +static void validate( unsigned opcode, + unsigned nr_dst, + unsigned nr_src ) +{ +#ifdef DEBUG + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); + assert(info); + if(info) { + assert(nr_dst == info->num_dst); + assert(nr_src == info->num_src); + } +#endif +} unsigned ureg_emit_insn(struct ureg_program *ureg, @@ -576,6 +637,8 @@ ureg_emit_insn(struct ureg_program *ureg, { union tgsi_any_token *out; + validate( opcode, num_dst, num_src ); + out = get_tokens( ureg, DOMAIN_INSN, 1 ); out[0].value = 0; out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION; @@ -678,17 +741,6 @@ ureg_insn(struct ureg_program *ureg, unsigned insn, i; boolean saturate; -#ifdef DEBUG - { - const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); - assert(info); - if(info) { - assert(nr_dst == info->num_dst); - assert(nr_src == info->num_src); - } - } -#endif - saturate = nr_dst ? dst[0].Saturate : FALSE; insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); @@ -702,6 +754,53 @@ ureg_insn(struct ureg_program *ureg, ureg_fixup_insn_size( ureg, insn ); } +void +ureg_tex_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_dst *dst, + unsigned nr_dst, + unsigned target, + const struct ureg_src *src, + unsigned nr_src ) +{ + unsigned insn, i; + boolean saturate; + + saturate = nr_dst ? dst[0].Saturate : FALSE; + + insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); + + ureg_emit_texture( ureg, insn, target ); \ + + for (i = 0; i < nr_dst; i++) + ureg_emit_dst( ureg, dst[i] ); + + for (i = 0; i < nr_src; i++) + ureg_emit_src( ureg, src[i] ); + + ureg_fixup_insn_size( ureg, insn ); +} + + +void +ureg_label_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_src *src, + unsigned nr_src, + unsigned *label_token ) +{ + unsigned insn, i; + + insn = ureg_emit_insn( ureg, opcode, FALSE, 0, nr_src ); + + ureg_emit_label( ureg, insn, label_token ); \ + + for (i = 0; i < nr_src; i++) + ureg_emit_src( ureg, src[i] ); + + ureg_fixup_insn_size( ureg, insn ); +} + static void emit_decl( struct ureg_program *ureg, @@ -777,13 +876,22 @@ static void emit_decls( struct ureg_program *ureg ) { unsigned i; - for (i = 0; i < ureg->nr_inputs; i++) { - emit_decl( ureg, - TGSI_FILE_INPUT, - i, - ureg->input[i].semantic_name, - ureg->input[i].semantic_index, - ureg->input[i].interp ); + if (ureg->processor == TGSI_PROCESSOR_VERTEX) { + for (i = 0; i < UREG_MAX_INPUT; i++) { + if (ureg->vs_inputs[i/32] & (1 << (i%32))) { + emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); + } + } + } + else { + for (i = 0; i < ureg->nr_fs_inputs; i++) { + emit_decl( ureg, + TGSI_FILE_INPUT, + i, + ureg->fs_input[i].semantic_name, + ureg->fs_input[i].semantic_index, + ureg->fs_input[i].interp ); + } } for (i = 0; i < ureg->nr_outputs; i++) { @@ -801,10 +909,13 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler[i].Index, 1 ); } - if (ureg->nr_constants) { - emit_decl_range( ureg, - TGSI_FILE_CONSTANT, - 0, ureg->nr_constants ); + if (ureg->nr_constant_ranges) { + for (i = 0; i < ureg->nr_constant_ranges; i++) + emit_decl_range( ureg, + TGSI_FILE_CONSTANT, + ureg->constant_range[i].first, + (ureg->constant_range[i].last + 1 - + ureg->constant_range[i].first) ); } if (ureg->nr_temps) { @@ -890,6 +1001,15 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) ureg->domain[DOMAIN_DECL].count); tgsi_dump( tokens, 0 ); } + +#if DEBUG + if (tokens && !tgsi_sanity_check(tokens)) { + debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n"); + tgsi_dump(tokens, 0); + assert(0); + } +#endif + return tokens; } @@ -911,6 +1031,25 @@ void *ureg_create_shader( struct ureg_program *ureg, } +const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, + unsigned *nr_tokens ) +{ + const struct tgsi_token *tokens; + + ureg_finalize(ureg); + + tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; + + if (nr_tokens) + *nr_tokens = ureg->domain[DOMAIN_DECL].size; + + ureg->domain[DOMAIN_DECL].tokens = 0; + ureg->domain[DOMAIN_DECL].size = 0; + ureg->domain[DOMAIN_DECL].order = 0; + ureg->domain[DOMAIN_DECL].count = 0; + + return tokens; +} struct ureg_program *ureg_create( unsigned processor ) diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index acbca59040c..f04f443b9e7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -82,10 +82,21 @@ ureg_create( unsigned processor ); const struct tgsi_token * ureg_finalize( struct ureg_program * ); +/* Create and return a shader: + */ void * ureg_create_shader( struct ureg_program *, struct pipe_context *pipe ); + +/* Alternately, return the built token stream and hand ownership of + * that memory to the caller: + */ +const struct tgsi_token * +ureg_get_tokens( struct ureg_program *ureg, + unsigned *nr_tokens ); + + void ureg_destroy( struct ureg_program * ); @@ -116,8 +127,7 @@ ureg_DECL_fs_input( struct ureg_program *, struct ureg_src ureg_DECL_vs_input( struct ureg_program *, - unsigned semantic_name, - unsigned semantic_index ); + unsigned index ); struct ureg_dst ureg_DECL_output( struct ureg_program *, @@ -130,7 +140,8 @@ ureg_DECL_immediate( struct ureg_program *, unsigned nr ); struct ureg_src -ureg_DECL_constant( struct ureg_program * ); +ureg_DECL_constant( struct ureg_program *, + unsigned index ); struct ureg_dst ureg_DECL_temporary( struct ureg_program * ); @@ -233,6 +244,24 @@ ureg_insn(struct ureg_program *ureg, unsigned nr_src ); +void +ureg_tex_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_dst *dst, + unsigned nr_dst, + unsigned target, + const struct ureg_src *src, + unsigned nr_src ); + + +void +ureg_label_insn(struct ureg_program *ureg, + unsigned opcode, + const struct ureg_src *src, + unsigned nr_src, + unsigned *label); + + /*********************************************************************** * Internal instruction helpers, don't call these directly: */ diff --git a/src/gallium/auxiliary/util/u_fifo.h b/src/gallium/auxiliary/util/u_fifo.h new file mode 100644 index 00000000000..9e007de1ada --- /dev/null +++ b/src/gallium/auxiliary/util/u_fifo.h @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_FIFO_H +#define U_FIFO_H + +#include "util/u_memory.h" + +struct util_fifo +{ + size_t head; + size_t tail; + size_t num; + size_t size; +}; + +static INLINE struct util_fifo * +u_fifo_create(size_t size) +{ + struct util_fifo *fifo; + fifo = MALLOC(sizeof(*fifo) + size * sizeof(void*)); + + fifo->head = 0; + fifo->tail = 0; + fifo->num = 0; + fifo->size = size; + + return fifo; +} + +static INLINE boolean +u_fifo_add(struct util_fifo *fifo, void *ptr) +{ + void **array = (void**)&fifo[1]; + if (fifo->num >= fifo->size) + return FALSE; + + if (++fifo->head >= fifo->size) + fifo->head = 0; + + array[fifo->head] = ptr; + + ++fifo->num; + + return TRUE; +} + +static INLINE boolean +u_fifo_pop(struct util_fifo *fifo, void **ptr) +{ + void **array = (void**)&fifo[1]; + + if (!fifo->num) + return FALSE; + + if (++fifo->tail >= fifo->size) + fifo->tail = 0; + + *ptr = array[fifo->tail]; + + ++fifo->num; + + return TRUE; +} + +static INLINE void +u_fifo_destroy(struct util_fifo *fifo) +{ + FREE(fifo); +} + +#endif diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 00a46d0cc48..f1bf94f17dd 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -2,7 +2,7 @@ PIPE_FORMAT_A8R8G8B8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , zyxw, PIPE_FORMAT_X8R8G8B8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , zyx1, rgb PIPE_FORMAT_B8G8R8A8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb PIPE_FORMAT_B8G8R8X8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , yzw1, rgb -PIPE_FORMAT_A1R5G5B5_UNORM , arith , 1, 1, un1 , un5 , un5 , un5 , zyxw, rgb +PIPE_FORMAT_A1R5G5B5_UNORM , arith , 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb PIPE_FORMAT_A4R4G4B4_UNORM , arith , 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb PIPE_FORMAT_R5G6B5_UNORM , arith , 1, 1, un5 , un6 , un5 , , zyx1, rgb PIPE_FORMAT_A2B10G10R10_UNORM , arith , 1, 1, un10, un10, un10, un2 , xyzw, rgb @@ -14,10 +14,10 @@ PIPE_FORMAT_L16_UNORM , arith , 1, 1, un16, , , , xxx1, PIPE_FORMAT_Z16_UNORM , array , 1, 1, un16, , , , x___, zs PIPE_FORMAT_Z32_UNORM , array , 1, 1, un32, , , , x___, zs PIPE_FORMAT_Z32_FLOAT , array , 1, 1, f32 , , , , x___, zs -PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs -PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs -PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un8 , un24, , , y___, zs -PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs +PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs +PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un8 , un24, , , y___, zs PIPE_FORMAT_S8_UNORM , array , 1, 1, un8 , , , , _x__, zs PIPE_FORMAT_R64_FLOAT , array , 1, 1, f64 , , , , x001, rgb PIPE_FORMAT_R64G64_FLOAT , array , 1, 1, f64 , f64 , , , xy01, rgb diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 4c6c2bc00e1..b428dc544c3 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -341,10 +341,22 @@ util_is_inf_or_nan(float x) /** + * Test whether x is a power of two. + */ +static INLINE boolean +util_is_pot(unsigned x) +{ + return (x & (x - 1)) == 0; +} + + +/** * Find first bit set in word. Least significant bit is 1. * Return 0 if no bits set. */ -#if defined(_MSC_VER) && _MSC_VER >= 1300 +#if defined(_MSC_VER) && _MSC_VER >= 1300 && (_M_IX86 || _M_AMD64 || _M_IA64) +unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask); +#pragma intrinsic(_BitScanForward) static INLINE unsigned long ffs( unsigned long u ) { diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c index f01296b40fc..52382990155 100644 --- a/src/gallium/auxiliary/util/u_simple_screen.c +++ b/src/gallium/auxiliary/util/u_simple_screen.c @@ -52,8 +52,7 @@ pass_user_buffer_create(struct pipe_screen *screen, unsigned bytes) { struct pipe_buffer *buffer = - screen->winsys->user_buffer_create(screen->winsys, - ptr, bytes); + screen->winsys->user_buffer_create(screen->winsys, ptr, bytes); buffer->screen = screen; @@ -69,9 +68,8 @@ pass_surface_buffer_create(struct pipe_screen *screen, unsigned *stride) { struct pipe_buffer *buffer = - screen->winsys->surface_buffer_create(screen->winsys, - width, height, - format, usage, tex_usage, stride); + screen->winsys->surface_buffer_create(screen->winsys, width, height, + format, usage, tex_usage, stride); buffer->screen = screen; @@ -83,8 +81,7 @@ pass_buffer_map(struct pipe_screen *screen, struct pipe_buffer *buf, unsigned usage) { - return screen->winsys->buffer_map(screen->winsys, - buf, usage); + return screen->winsys->buffer_map(screen->winsys, buf, usage); } static void @@ -106,8 +103,7 @@ pass_flush_frontbuffer(struct pipe_screen *screen, struct pipe_surface *surf, void *context_private) { - screen->winsys->flush_frontbuffer(screen->winsys, - surf, context_private); + screen->winsys->flush_frontbuffer(screen->winsys, surf, context_private); } static void @@ -115,8 +111,7 @@ pass_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - screen->winsys->fence_reference(screen->winsys, - ptr, fence); + screen->winsys->fence_reference(screen->winsys, ptr, fence); } static int @@ -124,8 +119,7 @@ pass_fence_signalled(struct pipe_screen *screen, struct pipe_fence_handle *fence, unsigned flag) { - return screen->winsys->fence_signalled(screen->winsys, - fence, flag); + return screen->winsys->fence_signalled(screen->winsys, fence, flag); } static int @@ -133,11 +127,11 @@ pass_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, unsigned flag) { - return screen->winsys->fence_finish(screen->winsys, - fence, flag); + return screen->winsys->fence_finish(screen->winsys, fence, flag); } -void u_simple_screen_init(struct pipe_screen *screen) +void +u_simple_screen_init(struct pipe_screen *screen) { screen->buffer_create = pass_buffer_create; screen->user_buffer_create = pass_user_buffer_create; @@ -152,7 +146,8 @@ void u_simple_screen_init(struct pipe_screen *screen) screen->fence_finish = pass_fence_finish; } -const char* u_simple_screen_winsys_name(struct pipe_screen *screen) +const char * +u_simple_screen_winsys_name(struct pipe_screen *screen) { return screen->winsys->get_name(screen->winsys); } diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index ab754296fa8..0d706f9449d 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -34,14 +34,8 @@ #include "pipe/p_context.h" -#include "util/u_debug.h" -#include "pipe/p_defines.h" -#include "pipe/p_screen.h" #include "pipe/p_shader_tokens.h" - -#include "util/u_memory.h" #include "util/u_simple_shaders.h" - #include "tgsi/tgsi_ureg.h" @@ -67,9 +61,7 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, struct ureg_src src; struct ureg_dst dst; - src = ureg_DECL_vs_input( ureg, - semantic_names[i], - semantic_indexes[i]); + src = ureg_DECL_vs_input( ureg, i ); dst = ureg_DECL_output( ureg, semantic_names[i], diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 1235a67d264..0d6489c26e4 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -170,7 +170,7 @@ x8r8g8b8_get_tile_rgba(const unsigned *src, pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); - pRow[3] = ubyte_to_float(0xff); + pRow[3] = 1.0F; } p += dst_stride; } @@ -394,6 +394,52 @@ r5g6b5_put_tile_rgba(ushort *dst, +/*** PIPE_FORMAT_R8G8B8_UNORM ***/ + +static void +r8g8b8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = ubyte_to_float(src[0]); + pRow[1] = ubyte_to_float(src[1]); + pRow[2] = ubyte_to_float(src[2]); + pRow[3] = 1.0f; + src += 3; + } + p += dst_stride; + } +} + + +static void +r8g8b8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + dst[0] = float_to_ubyte(pRow[0]); + dst[1] = float_to_ubyte(pRow[1]); + dst[2] = float_to_ubyte(pRow[2]); + dst += 3; + } + p += src_stride; + } +} + + + /*** PIPE_FORMAT_Z16_UNORM ***/ /** @@ -1106,6 +1152,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_L8_UNORM: l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); break; @@ -1222,6 +1271,9 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; case PIPE_FORMAT_R8G8B8A8_UNORM: assert(0); break; diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 508f4560e48..b3a7774fd6a 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -44,6 +44,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_fifo.h" #include "i915_context.h" #include "i915_reg.h" @@ -76,8 +77,13 @@ struct i915_vbuf_render { size_t vbo_size; size_t vbo_offset; void *vbo_ptr; - size_t vbo_alloc_size; size_t vbo_max_used; + + /* stuff for the pool */ + struct util_fifo *pool_fifo; + unsigned pool_used; + unsigned pool_buffer_size; + boolean pool_not_used; }; @@ -106,33 +112,72 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render) } static boolean +i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + + if (i915_render->vbo_size < size + i915_render->vbo_offset) + return FALSE; + + if (i915->vbo_flushed) + return FALSE; + + return TRUE; +} + +static void +i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + struct intel_winsys *iws = i915->iws; + + if (i915_render->vbo) { + if (i915_render->pool_not_used) + iws->buffer_destroy(iws, i915_render->vbo); + else + u_fifo_add(i915_render->pool_fifo, i915_render->vbo); + i915_render->vbo = NULL; + } + + i915->vbo_flushed = 0; + + i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size); + i915_render->vbo_offset = 0; + + if (i915_render->vbo_size != i915_render->pool_buffer_size) { + i915_render->pool_not_used = TRUE; + i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, + INTEL_NEW_VERTEX); + } else { + i915_render->pool_not_used = FALSE; + + if (i915_render->pool_used >= 2) { + FLUSH_BATCH(NULL); + i915->vbo_flushed = 0; + i915_render->pool_used = 0; + } + u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); + } +} + +static boolean i915_vbuf_render_allocate_vertices(struct vbuf_render *render, ushort vertex_size, ushort nr_vertices) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct intel_winsys *iws = i915->iws; size_t size = (size_t)vertex_size * (size_t)nr_vertices; /* FIXME: handle failure */ assert(!i915->vbo); - if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { - } else { - i915->vbo_flushed = 0; - if (i915_render->vbo) { - iws->buffer_destroy(iws, i915_render->vbo); - i915_render->vbo = NULL; - } - } + if (!i915_vbuf_render_reserve(i915_render, size)) { - if (!i915_render->vbo) { - i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); - i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + if (i915->vbo_flushed) + i915_render->pool_used = 0; + i915_vbuf_render_new_buf(i915_render, size); } i915_render->vertex_size = vertex_size; @@ -504,6 +549,7 @@ i915_vbuf_render_create(struct i915_context *i915) { struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); struct intel_winsys *iws = i915->iws; + int i; i915_render->i915 = i915; @@ -524,14 +570,24 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; - i915_render->vbo_alloc_size = 128 * 4096; - i915_render->vbo_size = i915_render->vbo_alloc_size; + + i915_render->vbo = NULL; + i915_render->vbo_size = 0; i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + + i915_render->pool_used = FALSE; + i915_render->pool_buffer_size = 128 * 4096; + i915_render->pool_fifo = u_fifo_create(6); + for (i = 0; i < 6; i++) + u_fifo_add(i915_render->pool_fifo, + iws->buffer_create(iws, i915_render->pool_buffer_size, 64, + INTEL_NEW_VERTEX)); + +#if 0 /* TODO JB: is this realy needed? */ i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); iws->buffer_unmap(iws, i915_render->vbo); +#endif return &i915_render->base; } diff --git a/src/gallium/drivers/i915simple/intel_winsys.h b/src/gallium/drivers/i915simple/intel_winsys.h index f949f52a9ce..42c5e7470ec 100644 --- a/src/gallium/drivers/i915simple/intel_winsys.h +++ b/src/gallium/drivers/i915simple/intel_winsys.h @@ -150,6 +150,17 @@ struct intel_winsys { void (*buffer_unmap)(struct intel_winsys *iws, struct intel_buffer *buffer); + /** + * Write to a buffer. + * + * Arguments follows pwrite(2) + */ + int (*buffer_write)(struct intel_winsys *iws, + struct intel_buffer *dst, + const void *src, + size_t size, + size_t offset); + void (*buffer_destroy)(struct intel_winsys *iws, struct intel_buffer *buffer); /*@}*/ diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 5ac09de79ea..cd7b6356d28 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -17,9 +17,11 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ + lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ lp_bld_tgsi_soa.c \ @@ -46,7 +48,8 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample.c \ + lp_tex_sample_c.c \ + lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ lp_tile_soa.c diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 498d21dea6c..89d08834a3c 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -8,13 +8,16 @@ Done so far is: - the whole fragment pipeline is code generated in a single function + - input interpolation + - depth testing + - texture sampling (not all state/formats are supported) + - fragment shader TGSI translation - same level of support as the TGSI SSE2 exec machine, with the exception we don't fallback to TGSI interpretation when an unsupported opcode is found, but just ignore it - - texture sampling via an intrinsic call - done in SoA layout - input interpolation also code generated @@ -28,16 +31,17 @@ Done so far is: any width and length - not all operations are implemented for these types yet though -Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch, -which includes hand written fast implementations for common cases. +Most mesa/progs/demos/* work. To do (probably by this order): - code generate stipple and stencil testing - - code generate texture sampling + - translate the remaining bits of texture sampling state - translate TGSI control flow instructions, and all other remaining opcodes + + - integrate with the draw module for VS code generation - code generate the triangle setup and rasterization @@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as make linux-llvm -but the rest of these instructions assume scons is used. +but the rest of these instructions assume that scons is used. Using @@ -108,6 +112,9 @@ or export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH +For performance evaluation pass debug=no to scons, and use the corresponding +lib directory without the "-debug" suffix. + Unit testing ============ @@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe: - lp_test_conv: SIMD vector conversion - lp_test_format: pixel unpacking/packing -Some of this tests can output results and benchmarks to a tab-seperated-file +Some of this tests can output results and benchmarks to a tab-separated-file for posterior analysis, e.g.: build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv @@ -133,10 +140,10 @@ Development Notes at the top of the lp_bld_*.c functions. - All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be - put in a standalone Gallium state -> LLVM IR translation module. + put in a stand-alone Gallium state -> LLVM IR translation module. - We use LLVM-C bindings for now. They are not documented, but follow the C++ interfaces very closely, and appear to be complete enough for code generation. See http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - for a standalone example. + for a stand-alone example. diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5c29bdac56e..dea4b703c45 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -3,7 +3,7 @@ Import('*') env = env.Clone() env.Tool('llvm') -if 'LLVM_VERSION' not in env: +if env.has_key('LLVM_VERSION') is False: print 'warning: LLVM not found: not building llvmpipe' Return() @@ -23,8 +23,10 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', 'lp_bld_swizzle.c', @@ -52,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample.c', + 'lp_tex_sample_c.c', + 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index 49c2f911af7..2b4bc5c819d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -45,7 +45,7 @@ void lp_build_alpha_test(LLVMBuilderRef builder, const struct pipe_alpha_state *state, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 9dbcdb4daab..634575670db 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -38,14 +38,14 @@ #include <llvm-c/Core.h> struct pipe_alpha_state; -union lp_type; +struct lp_type; struct lp_build_mask_context; void lp_build_alpha_test(LLVMBuilderRef builder, const struct pipe_alpha_state *state, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 09a57ff33d5..0b115fc9b07 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -65,7 +65,7 @@ lp_build_min_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -113,7 +113,7 @@ lp_build_max_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -159,7 +159,7 @@ LLVMValueRef lp_build_comp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->one) return bld->zero; @@ -188,7 +188,7 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(a == bld->zero) @@ -241,7 +241,7 @@ lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(b == bld->zero) @@ -405,7 +405,7 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -477,7 +477,7 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld, } +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) +{ + return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); +} + + +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11) +{ + LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); + LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); + return lp_build_lerp(bld, y, v0, v1); +} + + /** * Generate min(a, b) * Do checks for special cases. @@ -565,21 +590,32 @@ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) return a; - /* XXX: is this really necessary? */ + if(type.floating) { + /* Mask out the sign bit */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } + #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(!type.floating && type.width*type.length == 128) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - if(type.width == 8) + if(type.width*type.length == 128) { + switch(type.width) { + case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); - if(type.width == 16) + case 16: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); - if(type.width == 32) + case 32: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); + } } #endif @@ -588,10 +624,188 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef cond; + LLVMValueRef res; + + /* Handle non-zero case */ + if(!type.sign) { + /* if not zero then sign must be positive */ + res = bld->one; + } + else if(type.floating) { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef one; + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + one = LLVMConstBitCast(bld->one, int_vec_type); + res = LLVMBuildOr(bld->builder, sign, one, ""); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + else + { + LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); + res = lp_build_select(bld, cond, bld->one, minus_one); + } + + /* Handle zero */ + cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); + res = lp_build_select(bld, cond, bld->zero, bld->one); + + return res; +} + + +enum lp_build_round_sse41_mode +{ + LP_BUILD_ROUND_SSE41_NEAREST = 0, + LP_BUILD_ROUND_SSE41_FLOOR = 1, + LP_BUILD_ROUND_SSE41_CEIL = 2, + LP_BUILD_ROUND_SSE41_TRUNCATE = 3 +}; + + +static INLINE LLVMValueRef +lp_build_round_sse41(struct lp_build_context *bld, + LLVMValueRef a, + enum lp_build_round_sse41_mode mode) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + const char *intrinsic; + + assert(type.floating); + assert(type.width*type.length == 128); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ps"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.pd"; + break; + default: + assert(0); + return bld->undef; + } + + return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, + LLVMConstInt(LLVMInt32Type(), mode, 0)); +} + + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); +} + + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) +{ + a = lp_build_floor(bld, a); + a = lp_build_int(bld, a); + return a; +} + + +LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -609,7 +823,7 @@ LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->undef; @@ -640,7 +854,7 @@ LLVMValueRef lp_build_rsqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); @@ -661,7 +875,7 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -681,7 +895,7 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -752,7 +966,7 @@ lp_build_polynomial(struct lp_build_context *bld, const double *coeffs, unsigned num_coeffs) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res = NULL; unsigned i; @@ -800,7 +1014,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; @@ -893,7 +1107,7 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index fc8cb25966e..d68a97c4b87 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -union lp_type type; +struct lp_type type; struct lp_build_context; @@ -72,6 +72,26 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1); + +/** + * Bilinear interpolation. + * + * Values indices are in v_{yx}. + */ +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11); + +LLVMValueRef lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); @@ -86,6 +106,34 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index d19e18846c2..da272e549f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -46,7 +46,7 @@ struct pipe_blend_state; -union lp_type; +struct lp_type; struct lp_build_context; @@ -74,7 +74,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, @@ -84,7 +84,7 @@ lp_build_blend_aos(LLVMBuilderRef builder, void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef const_[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index c11a9398f87..d14f468ba93 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -303,7 +303,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index b92254a7d6f..9511299d558 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -199,7 +199,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c index 21487365eae..c8eaa8c3940 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c @@ -42,7 +42,7 @@ unsigned -lp_mantissa(union lp_type type) +lp_mantissa(struct lp_type type) { assert(type.floating); @@ -72,7 +72,7 @@ lp_mantissa(union lp_type type) * Same as lp_const_scale(), but in terms of shifts. */ unsigned -lp_const_shift(union lp_type type) +lp_const_shift(struct lp_type type) { if(type.floating) return 0; @@ -86,7 +86,7 @@ lp_const_shift(union lp_type type) unsigned -lp_const_offset(union lp_type type) +lp_const_offset(struct lp_type type) { if(type.floating || type.fixed) return 0; @@ -104,7 +104,7 @@ lp_const_offset(union lp_type type) * else for the fixed points types and normalized integers. */ double -lp_const_scale(union lp_type type) +lp_const_scale(struct lp_type type) { unsigned long long llscale; double dscale; @@ -122,7 +122,7 @@ lp_const_scale(union lp_type type) * Minimum value representable by the type. */ double -lp_const_min(union lp_type type) +lp_const_min(struct lp_type type) { unsigned bits; @@ -158,7 +158,7 @@ lp_const_min(union lp_type type) * Maximum value representable by the type. */ double -lp_const_max(union lp_type type) +lp_const_max(struct lp_type type) { unsigned bits; @@ -190,7 +190,7 @@ lp_const_max(union lp_type type) double -lp_const_eps(union lp_type type) +lp_const_eps(struct lp_type type) { if (type.floating) { switch(type.width) { @@ -211,7 +211,7 @@ lp_const_eps(union lp_type type) LLVMValueRef -lp_build_undef(union lp_type type) +lp_build_undef(struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); return LLVMGetUndef(vec_type); @@ -219,7 +219,7 @@ lp_build_undef(union lp_type type) LLVMValueRef -lp_build_zero(union lp_type type) +lp_build_zero(struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); return LLVMConstNull(vec_type); @@ -227,7 +227,7 @@ lp_build_zero(union lp_type type) LLVMValueRef -lp_build_one(union lp_type type) +lp_build_one(struct lp_type type) { LLVMTypeRef elem_type; LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; @@ -269,7 +269,7 @@ lp_build_one(union lp_type type) LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type, double val) { LLVMTypeRef elem_type = lp_build_elem_type(type); @@ -295,7 +295,7 @@ lp_build_const_scalar(union lp_type type, LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type, long long val) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); @@ -312,7 +312,7 @@ lp_build_int_const_scalar(union lp_type type, LLVMValueRef -lp_build_const_aos(union lp_type type, +lp_build_const_aos(struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle) { @@ -352,8 +352,8 @@ lp_build_const_aos(union lp_type type, LLVMValueRef -lp_build_const_mask_aos(union lp_type type, - boolean cond[4]) +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]) { LLVMTypeRef elem_type = LLVMIntType(type.width); LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h index 1934530ea3c..ffb302f7366 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h @@ -42,67 +42,67 @@ #include <pipe/p_compiler.h> -union lp_type type; +struct lp_type type; unsigned -lp_mantissa(union lp_type type); +lp_mantissa(struct lp_type type); unsigned -lp_const_shift(union lp_type type); +lp_const_shift(struct lp_type type); unsigned -lp_const_offset(union lp_type type); +lp_const_offset(struct lp_type type); double -lp_const_scale(union lp_type type); +lp_const_scale(struct lp_type type); double -lp_const_min(union lp_type type); +lp_const_min(struct lp_type type); double -lp_const_max(union lp_type type); +lp_const_max(struct lp_type type); double -lp_const_eps(union lp_type type); +lp_const_eps(struct lp_type type); LLVMValueRef -lp_build_undef(union lp_type type); +lp_build_undef(struct lp_type type); LLVMValueRef -lp_build_zero(union lp_type type); +lp_build_zero(struct lp_type type); LLVMValueRef -lp_build_one(union lp_type type); +lp_build_one(struct lp_type type); LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type, double val); LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type, long long val); LLVMValueRef -lp_build_const_aos(union lp_type type, +lp_build_const_aos(struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle); LLVMValueRef -lp_build_const_mask_aos(union lp_type type, - boolean cond[4]); +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]); #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index c8954c8a34f..186cac70f62 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -86,7 +86,7 @@ */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - union lp_type src_type, + struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { @@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); - /* Fill in the empty lower bits for added precision? */ + /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; @@ -152,7 +152,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, - union lp_type dst_type, + struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); @@ -244,12 +244,12 @@ lp_build_const_pack_shuffle(unsigned n) * Expand the bit width. * * This will only change the number of bits the values are represented, not the - * values themselved. + * values themselves. */ static void lp_build_expand(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, LLVMValueRef src, LLVMValueRef *dst, unsigned num_dsts) { @@ -266,7 +266,7 @@ lp_build_expand(LLVMBuilderRef builder, dst[0] = src; while(src_type.width < dst_type.width) { - union lp_type new_type = src_type; + struct lp_type new_type = src_type; LLVMTypeRef new_vec_type; new_type.width *= 2; @@ -314,8 +314,8 @@ lp_build_expand(LLVMBuilderRef builder, */ static LLVMValueRef lp_build_pack2(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, boolean clamped, LLVMValueRef lo, LLVMValueRef hi) @@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder, * TODO: Handle saturation consistently. */ static LLVMValueRef -lp_build_trunc(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs) +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs) { LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned i; @@ -410,7 +410,7 @@ lp_build_trunc(LLVMBuilderRef builder, tmp[i] = src[i]; while(src_type.width > dst_type.width) { - union lp_type new_type = src_type; + struct lp_type new_type = src_type; new_type.width /= 2; new_type.length *= 2; @@ -442,12 +442,12 @@ lp_build_trunc(LLVMBuilderRef builder, */ void lp_build_conv(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { - union lp_type tmp_type; + struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; @@ -470,7 +470,7 @@ lp_build_conv(LLVMBuilderRef builder, * Clamp if necessary */ - if(src_type.value != dst_type.value) { + if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); @@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder, if(tmp_type.width > dst_type.width) { assert(num_dsts == 1); - tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); + tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = 1; @@ -656,8 +656,8 @@ lp_build_conv(LLVMBuilderRef builder, */ void lp_build_conv_mask(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { @@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder, if(src_type.width > dst_type.width) { assert(num_dsts == 1); - dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs); + dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); } else if(src_type.width < dst_type.width) { assert(num_srcs == 1); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h index 05c1ef2a100..ca378804d2a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h @@ -40,33 +40,33 @@ #include <llvm-c/Core.h> -union lp_type type; +struct lp_type type; LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - union lp_type src_type, + struct lp_type src_type, unsigned dst_width, LLVMValueRef src); LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, - union lp_type dst_type, + struct lp_type dst_type, LLVMValueRef src); void lp_build_conv(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *srcs, unsigned num_srcs, LLVMValueRef *dsts, unsigned num_dsts); void lp_build_conv_mask(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 30925b5f415..59d8f492e60 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -30,10 +30,27 @@ #include <udis86.h> #endif +#include "util/u_math.h" #include "util/u_debug.h" #include "lp_bld_debug.h" +/** + * Check alignment. + * + * It is important that this check is not implemented as a macro or inlined + * function, as the compiler assumptions in respect to alignment of global + * and stack variables would often make the check a no op, defeating the + * whole purpose of the exercise. + */ +boolean +lp_check_alignment(const void *ptr, unsigned alignment) +{ + assert(util_is_pot(alignment)); + return ((uintptr_t)ptr & (alignment - 1)) == 0; +} + + void lp_disassemble(const void* func) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h index ecdafef76d0..583e6132b4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h @@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...) } +boolean +lp_check_alignment(const void *ptr, unsigned alignment); + + void lp_disassemble(const void* func); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 2cd6e6b9217..21c665c4d4c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -71,11 +71,11 @@ /** * Return a type appropriate for depth/stencil testing. */ -union lp_type +struct lp_type lp_depth_type(const struct util_format_description *format_desc, unsigned length) { - union lp_type type; + struct lp_type type; unsigned swizzle; assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); @@ -85,7 +85,7 @@ lp_depth_type(const struct util_format_description *format_desc, swizzle = format_desc->swizzle[0]; assert(swizzle < 4); - type.value = 0; + memset(&type, 0, sizeof type); type.width = format_desc->block.bits; if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { @@ -114,7 +114,7 @@ lp_depth_type(const struct util_format_description *format_desc, void lp_build_depth_test(LLVMBuilderRef builder, const struct pipe_depth_state *state, - union lp_type type, + struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef src, @@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder, padding_right = 0; for(chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size; + padding_left = format_desc->block.bits - + (padding_right + format_desc->channel[z_swizzle].size); if(padding_left || padding_right) { - const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1; - const long long mask_right = ((long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right); + const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; + const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; + z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); } if(padding_left) @@ -210,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder, LLVMBuildStore(builder, dst, dst_ptr); } + /* FIXME */ assert(!state->occlusion_count); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 5d2e042fcc5..79d6981bb51 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -41,11 +41,11 @@ struct pipe_depth_state; struct util_format_description; -union lp_type; +struct lp_type; struct lp_build_mask_context; -union lp_type +struct lp_type lp_depth_type(const struct util_format_description *format_desc, unsigned length); @@ -53,7 +53,7 @@ lp_depth_type(const struct util_format_description *format_desc, void lp_build_depth_test(LLVMBuilderRef builder, const struct pipe_depth_state *state, - union lp_type type, + struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef src, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 9d99e1a9d9f..dcc25fbff86 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -32,59 +32,261 @@ */ #include "util/u_debug.h" +#include "util/u_memory.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" +#define LP_BUILD_FLOW_MAX_VARIABLES 32 +#define LP_BUILD_FLOW_MAX_DEPTH 32 + + +/** + * Enumeration of all possible flow constructs. + */ +enum lp_build_flow_construct_kind { + lP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SKIP, +}; + + +/** + * Variable declaration scope. + */ +struct lp_build_flow_scope +{ + /** Number of variables declared in this scope */ + unsigned num_variables; +}; + + +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_flow_skip +{ + /** Block to skip to */ + LLVMBasicBlockRef block; + + /** Number of variables declared at the beginning */ + unsigned num_variables; + + LLVMValueRef *phi; +}; + + +/** + * Union of all possible flow constructs' data + */ +union lp_build_flow_construct_data +{ + struct lp_build_flow_scope scope; + struct lp_build_flow_skip skip; +}; + + +/** + * Element of the flow construct stack. + */ +struct lp_build_flow_construct +{ + enum lp_build_flow_construct_kind kind; + union lp_build_flow_construct_data data; +}; + + +/** + * All necessary data to generate LLVM control flow constructs. + * + * Besides keeping track of the control flow construct themselves we also + * need to keep track of variables in order to generate SSA Phi values. + */ +struct lp_build_flow_context +{ + LLVMBuilderRef builder; + + /** + * Control flow stack. + */ + struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; + unsigned num_constructs; + + /** + * Variable stack + */ + LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; + unsigned num_variables; +}; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder) +{ + struct lp_build_flow_context *flow; + + flow = CALLOC_STRUCT(lp_build_flow_context); + if(!flow) + return NULL; + + flow->builder = builder; + + return flow; +} + + void -lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, - union lp_type type, - LLVMValueRef value) +lp_build_flow_destroy(struct lp_build_flow_context *flow) { - memset(mask, 0, sizeof *mask); + assert(flow->num_constructs == 0); + assert(flow->num_variables == 0); + FREE(flow); +} - mask->builder = builder; - mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + +static union lp_build_flow_construct_data * +lp_build_flow_push(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); + if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) + return NULL; + + flow->constructs[flow->num_constructs].kind = kind; + return &flow->constructs[flow->num_constructs++].data; +} + + +static union lp_build_flow_construct_data * +lp_build_flow_peek(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[flow->num_constructs - 1].data; } +static union lp_build_flow_construct_data * +lp_build_flow_pop(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[--flow->num_constructs].data; +} + + +/** + * Begin a variable scope. + * + * + */ void -lp_build_mask_update(struct lp_build_mask_context *mask, - LLVMValueRef value) +lp_build_flow_scope_begin(struct lp_build_flow_context *flow) { + struct lp_build_flow_scope *scope; - LLVMValueRef cond; - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; + scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(mask->value) - mask->value = LLVMBuildAnd(mask->builder, mask->value, value, ""); - else - mask->value = value; + scope->num_variables = 0; +} - /* FIXME: disabled until we have proper control flow helpers */ -#if 0 - cond = LLVMBuildICmp(mask->builder, - LLVMIntEQ, - LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""), - LLVMConstNull(mask->reg_type), - ""); - current_block = LLVMGetInsertBlock(mask->builder); +/** + * Declare a variable. + * + * A variable is a named entity which can have different LLVMValueRef's at + * different points of the program. This is relevant for control flow because + * when there are mutiple branches to a same location we need to replace + * the variable's value with a Phi function as explained in + * http://en.wikipedia.org/wiki/Static_single_assignment_form . + * + * We keep track of variables by keeping around a pointer to where their + * current. + * + * There are a few cautions to observe: + * + * - Variable's value must not be NULL. If there is no initial value then + * LLVMGetUndef() should be used. + * + * - Variable's value must be kept up-to-date. If the variable is going to be + * modified by a function then a pointer should be passed so that its value + * is accurate. Failure to do this will cause some of the variables' + * transient values to be lost, leading to wrong results. + * + * - A program should be written from top to bottom, by always appending + * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or + * modifying existing statements will most likely lead to wrong results. + * + */ +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(!mask->skip_block) { - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - mask->skip_block = LLVMAppendBasicBlock(function, "skip"); + assert(*variable); + if(!*variable) + return; + + assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); + if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) + return; + + flow->variables[flow->num_variables++] = variable; + ++scope->num_variables; +} + + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), ""); + assert(flow->num_variables >= scope->num_variables); + if(flow->num_variables < scope->num_variables) { + flow->num_variables = 0; + return; } + flow->num_variables -= scope->num_variables; +} + + +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + current_block = LLVMGetInsertBlock(flow->builder); + next_block = LLVMGetNextBasicBlock(current_block); - assert(next_block); if(next_block) { new_block = LLVMInsertBasicBlock(next_block, ""); } @@ -93,30 +295,148 @@ lp_build_mask_update(struct lp_build_mask_context *mask, new_block = LLVMAppendBasicBlock(function, ""); } - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block); + return new_block; +} + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBuilderRef builder; + unsigned i; + + skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + skip->block = lp_build_flow_insert_block(flow); + skip->num_variables = flow->num_variables; + if(!skip->num_variables) { + skip->phi = NULL; + return; + } - LLVMPositionBuilderAtEnd(mask->builder, new_block); -#endif + skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); + if(!skip->phi) { + skip->num_variables = 0; + return; + } + + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, skip->block); + + for(i = 0; i < skip->num_variables; ++i) + skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + LLVMDisposeBuilder(builder); } -LLVMValueRef -lp_build_mask_end(struct lp_build_mask_context *mask) +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond) +{ + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef new_block; + unsigned i; + + skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + current_block = LLVMGetInsertBlock(flow->builder); + + new_block = lp_build_flow_insert_block(flow); + + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + } + + LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + + LLVMPositionBuilderAtEnd(flow->builder, new_block); + } + + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow) { - if(mask->skip_block) { - LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder); + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + unsigned i; - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildBr(mask->builder, mask->skip_block); + skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; - LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block); + current_block = LLVMGetInsertBlock(flow->builder); - mask->value = mask->phi; - mask->phi = NULL; - mask->skip_block = NULL; + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + *flow->variables[i] = skip->phi[i]; } + LLVMBuildBr(flow->builder, skip->block); + LLVMPositionBuilderAtEnd(flow->builder, skip->block); + + FREE(skip->phi); +} + + +static void +lp_build_mask_check(struct lp_build_mask_context *mask) +{ + LLVMBuilderRef builder = mask->flow->builder; + LLVMValueRef cond; + + cond = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMConstNull(mask->reg_type), + ""); + + lp_build_flow_skip_cond_break(mask->flow, cond); +} + + +void +lp_build_mask_begin(struct lp_build_mask_context *mask, + struct lp_build_flow_context *flow, + struct lp_type type, + LLVMValueRef value) +{ + memset(mask, 0, sizeof *mask); + + mask->flow = flow; + mask->reg_type = LLVMIntType(type.width * type.length); + mask->value = value; + + lp_build_flow_scope_begin(flow); + lp_build_flow_scope_declare(flow, &mask->value); + lp_build_flow_skip_begin(flow); + + lp_build_mask_check(mask); +} + + +void +lp_build_mask_update(struct lp_build_mask_context *mask, + LLVMValueRef value) +{ + mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); + + lp_build_mask_check(mask); +} + + +LLVMValueRef +lp_build_mask_end(struct lp_build_mask_context *mask) +{ + lp_build_flow_skip_end(mask->flow); + lp_build_flow_scope_end(mask->flow); return mask->value; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 1b634ff038d..e61999ff06b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -38,27 +38,53 @@ #include <llvm-c/Core.h> -union lp_type; +struct lp_type; + + +struct lp_build_flow_context; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder); + +void +lp_build_flow_destroy(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable); + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond); + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow); struct lp_build_mask_context { - LLVMBuilderRef builder; + struct lp_build_flow_context *flow; LLVMTypeRef reg_type; LLVMValueRef value; - - LLVMValueRef phi; - - LLVMBasicBlockRef skip_block; }; void lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, - union lp_type type, + struct lp_build_flow_context *flow, + struct lp_type type, LLVMValueRef value); /** diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 01c8a752d18..6d3f6926193 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -31,21 +31,15 @@ /** * @file - * LLVM IR building helpers interfaces. - * - * We use LLVM-C bindings for now. They are not documented, but follow the C++ - * interfaces very closely, and appear to be complete enough for code - * genration. See - * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - * for a standalone example. + * Pixel format helpers. */ #include <llvm-c/Core.h> - -#include "pipe/p_format.h" +#include "pipe/p_format.h" -union lp_type; +struct util_format_description; +struct lp_type; /** @@ -56,9 +50,9 @@ union lp_type; * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed); /** @@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba); /** @@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr); /** @@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba); + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba); #endif /* !LP_BLD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index dcbc0076c7d..b9b5d84bed5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -32,9 +32,9 @@ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed) +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed) { const struct util_format_description *desc; LLVMTypeRef type; @@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba) +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr) { const struct util_format_description *desc; LLVMTypeRef type; @@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder, packed = LLVMBuildLoad(builder, ptr, ""); - return lp_build_unpack_rgba(builder, format, packed); + return lp_build_unpack_rgba_aos(builder, format, packed); } void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder, type = LLVMIntType(desc->block.bits); - packed = lp_build_pack_rgba(builder, format, rgba); + packed = lp_build_pack_rgba_aos(builder, format, rgba); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c new file mode 100644 index 00000000000..b5ff434e1ae --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -0,0 +1,208 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_format.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_format.h" + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +static LLVMValueRef +lp_build_format_swizzle(struct lp_type type, + const LLVMValueRef *inputs, + enum util_format_swizzle swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + return inputs[swizzle]; + case UTIL_FORMAT_SWIZZLE_0: + return lp_build_zero(type); + case UTIL_FORMAT_SWIZZLE_1: + return lp_build_one(type); + case UTIL_FORMAT_SWIZZLE_NONE: + return lp_build_undef(type); + default: + assert(0); + return lp_build_undef(type); + } +} + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef inputs[4]; + unsigned start; + unsigned chan; + + /* FIXME: Support more formats */ + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + /* Decode the input vector components */ + start = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned width = format_desc->channel[chan].size; + unsigned stop = start + width; + LLVMValueRef input; + + input = packed; + + switch(format_desc->channel[chan].type) { + case UTIL_FORMAT_TYPE_VOID: + input = NULL; + break; + + case UTIL_FORMAT_TYPE_UNSIGNED: + if(type.floating) { + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); + if(stop < format_desc->block.bits) { + unsigned mask = ((unsigned long long)1 << width) - 1; + input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); + } + + if(format_desc->channel[chan].normalized) + input = lp_build_unsigned_norm_to_float(builder, width, type, input); + else + input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), ""); + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + break; + + default: + /* fall through */ + input = lp_build_undef(type); + break; + } + + inputs[chan] = input; + + start = stop; + } + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); + rgba[2] = rgba[1] = rgba[0] = depth; + rgba[3] = lp_build_one(type); + } + else { + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); + } + } +} + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba) +{ + LLVMValueRef packed; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + packed = lp_build_gather(builder, + type.length, format_desc->block.bits, type.width, + base_ptr, offsets); + + lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index cfe20a0d75b..338dbca6d1e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -292,7 +292,7 @@ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 9194f6233a7..9c57a10879b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -83,7 +83,7 @@ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 8631efd6c3e..6b6f8207697 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -45,7 +45,7 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); @@ -301,7 +301,7 @@ lp_build_select(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - union lp_type type = bld->type; + struct lp_type type = bld->type; LLVMValueRef res; if(a == b) @@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld, b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); } - /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */ - a = LLVMBuildAnd(bld->builder, a, mask, ""); /* This often gets translated to PANDN, but sometimes the NOT is @@ -339,9 +337,9 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]) + const boolean cond[4]) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld, return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); } + else { #if 0 - else if(0) { - /* FIXME: Unfortunately select of vectors do not work */ + /* XXX: Unfortunately select of vectors do not work */ /* Use a select */ LLVMTypeRef elem_type = LLVMInt1Type(); LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; @@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld, cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); - } -#endif - else { +#else LLVMValueRef mask = lp_build_const_mask_aos(type, cond); return lp_build_select(bld, mask, a, b); +#endif } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index 29b9e1c45b8..a4ee7723b5f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -42,7 +42,7 @@ #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ -union lp_type type; +struct lp_type type; struct lp_build_context; @@ -66,7 +66,7 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]); + const boolean cond[4]); #endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h new file mode 100644 index 00000000000..403d0e48367 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#ifndef LP_BLD_SAMPLE_H +#define LP_BLD_SAMPLE_H + + +#include <llvm-c/Core.h> + +struct pipe_texture; +struct pipe_sampler_state; +struct lp_type; + + +/** + * Sampler static state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are embedded in the generated code. + */ +struct lp_sampler_static_state +{ + /* pipe_texture's state */ + enum pipe_format format; + unsigned target:2; + unsigned pot_width:1; + unsigned pot_height:1; + unsigned pot_depth:1; + + /* pipe_sampler_state's state */ + unsigned wrap_s:3; + unsigned wrap_t:3; + unsigned wrap_r:3; + unsigned min_img_filter:2; + unsigned min_mip_filter:2; + unsigned mag_img_filter:2; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned normalized_coords:1; + unsigned prefilter:4; +}; + + +/** + * Sampler dynamic state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are computed in runtime. + * + * There are obtained through callbacks, as we don't want to tie the texture + * sampling code generation logic to any particular texture layout or pipe + * driver. + */ +struct lp_sampler_dynamic_state +{ + + /** Obtain the base texture width. */ + LLVMValueRef + (*width)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + /** Obtain the base texture height. */ + LLVMValueRef + (*height)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*data_ptr)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + +}; + + +/** + * Derive the sampler static state. + */ +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler); + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + struct lp_type fp_type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); + + + +#endif /* LP_BLD_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c new file mode 100644 index 00000000000..08b1dc10f36 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -0,0 +1,411 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width[0]); + state->pot_height = util_is_pot(texture->height[0]); + state->pot_depth = util_is_pot(texture->depth[0]); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + + +/** + * Keep all information for sampling code generation in a single place. + */ +struct lp_build_sample_context +{ + LLVMBuilderRef builder; + + const struct lp_sampler_static_state *static_state; + + struct lp_sampler_dynamic_state *dynamic_state; + + const struct util_format_description *format_desc; + + /** Incoming coordinates type and build context */ + struct lp_type coord_type; + struct lp_build_context coord_bld; + + /** Integer coordinates */ + struct lp_type int_coord_type; + struct lp_build_context int_coord_bld; + + /** Output texels type and build context */ + struct lp_type texel_type; + struct lp_build_context texel_bld; +}; + + +static void +lp_build_sample_texel(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); + + if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); + + x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(int_coord_bld, x, x_stride); + y_offset = lp_build_mul(int_coord_bld, y, y_stride); + + offset = lp_build_add(int_coord_bld, x_offset, y_offset); + } + + lp_build_load_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + data_ptr, + offset, + texel); +} + + +static LLVMValueRef +lp_build_sample_wrap(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if(is_pot) + coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); + else + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord = LLVMBuildURem(bld->builder, coord, length, ""); + break; + + case PIPE_TEX_WRAP_CLAMP: + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + /* FIXME */ + default: + assert(0); + } + + return coord; +} + + +static void +lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef x; + LLVMValueRef y; + + x = lp_build_ifloor(&bld->coord_bld, s); + y = lp_build_ifloor(&bld->coord_bld, t); + + x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); +} + + +static void +lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef half; + LLVMValueRef s_ipart; + LLVMValueRef t_ipart; + LLVMValueRef s_fpart; + LLVMValueRef t_fpart; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2][4]; + unsigned chan; + + half = lp_build_const_scalar(bld->coord_type, 0.5); + s = lp_build_sub(&bld->coord_bld, s, half); + t = lp_build_sub(&bld->coord_bld, t, half); + + s_ipart = lp_build_floor(&bld->coord_bld, s); + t_ipart = lp_build_floor(&bld->coord_bld, t); + + s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); + t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); + + x0 = lp_build_int(&bld->coord_bld, s_ipart); + y0 = lp_build_int(&bld->coord_bld, t_ipart); + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + + /* TODO: Don't interpolate missing channels */ + for(chan = 0; chan < 4; ++chan) { + texel[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } +} + + +static void +lp_build_sample_compare(struct lp_build_sample_context *bld, + LLVMValueRef p, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + LLVMValueRef res; + unsigned chan; + + if(!bld->static_state->compare_mode) + return; + + /* TODO: Compare before swizzling, to avoid redundant computations */ + res = NULL; + for(chan = 0; chan < 4; ++chan) { + LLVMValueRef cmp; + cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); + cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); + + if(res) + res = lp_build_add(texel_bld, res, cmp); + else + res = cmp; + } + + assert(res); + res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for(chan = 0; chan < 3; ++chan) + texel[chan] = res; + texel[3] = texel_bld->one; +} + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_build_sample_context bld; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef stride; + LLVMValueRef data_ptr; + LLVMValueRef s; + LLVMValueRef t; + LLVMValueRef p; + + /* Setup our build context */ + memset(&bld, 0, sizeof bld); + bld.builder = builder; + bld.static_state = static_state; + bld.dynamic_state = dynamic_state; + bld.format_desc = util_format_description(static_state->format); + bld.coord_type = type; + bld.int_coord_type = lp_int_type(type); + bld.texel_type = type; + lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); + lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); + lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); + + /* Get the dynamic state */ + width = dynamic_state->width(dynamic_state, builder, unit); + height = dynamic_state->height(dynamic_state, builder, unit); + stride = dynamic_state->stride(dynamic_state, builder, unit); + data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + + s = coords[0]; + t = coords[1]; + p = coords[2]; + + width = lp_build_broadcast_scalar(&bld.int_coord_bld, width); + height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); + stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); + + if(static_state->target == PIPE_TEXTURE_1D) + t = bld.coord_bld.zero; + + if(static_state->normalized_coords) { + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); + LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, ""); + s = lp_build_mul(&bld.coord_bld, s, fp_width); + t = lp_build_mul(&bld.coord_bld, t, fp_height); + } + + switch (static_state->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + default: + assert(0); + } + + /* FIXME: respect static_state->min_mip_filter */; + /* FIXME: respect static_state->mag_img_filter */; + /* FIXME: respect static_state->prefilter */; + + lp_build_sample_compare(&bld, p, texel); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c index 14d2b10df9c..3998ac374fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c @@ -42,17 +42,30 @@ LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name) +{ + LLVMValueRef indices[2]; + LLVMValueRef member_ptr; + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); + return member_ptr; +} + + +LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, unsigned member, const char *name) { - LLVMValueRef indices[2]; LLVMValueRef member_ptr; LLVMValueRef res; - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); res = LLVMBuildLoad(builder, member_ptr, ""); lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); return res; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h index cbefdc9f815..740392f5611 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h @@ -53,6 +53,18 @@ offsetof(_ctype, _cmember)) +/** + * Get value pointer to a structure member. + */ +LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name); + +/** + * Get the value of a structure member. + */ LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c index ac7eed9379a..64e81f7b1fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c @@ -64,7 +64,7 @@ LLVMValueRef lp_build_broadcast_scalar(struct lp_build_context *bld, LLVMValueRef scalar) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; unsigned i; @@ -83,7 +83,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -115,7 +115,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, * YY00 YY00 .... YY00 * YYYY YYYY .... YYYY <= output */ - union lp_type type4 = type; + struct lp_type type4 = type; const char shifts[4][2] = { { 1, 2}, {-1, 2}, @@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -192,7 +192,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld, return lp_build_swizzle1_aos(bld, a, swizzle); if(a == b) { - swizzle[0] %= 4; - swizzle[1] %= 4; - swizzle[2] %= 4; - swizzle[3] %= 4; - return lp_build_swizzle1_aos(bld, a, swizzle); + unsigned char swizzle1[4]; + swizzle1[0] = swizzle[0] % 4; + swizzle1[1] = swizzle[1] % 4; + swizzle1[2] = swizzle[2] % 4; + swizzle1[3] = swizzle[3] % 4; + return lp_build_swizzle1_aos(bld, a, swizzle1); } if(swizzle[0] % 4 == 0 && diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index d7dd6a8a604..1f6da804488 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -union lp_type type; +struct lp_type type; struct lp_build_context; @@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); /** @@ -85,7 +85,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); #endif /* !LP_BLD_SWIZZLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 912db24aecb..eddb7a83fa2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -39,31 +39,46 @@ struct tgsi_token; -union lp_type; +struct lp_type; struct lp_build_context; struct lp_build_mask_context; -typedef void -(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel); +/** + * Sampler code generation interface. + * + * Although texture sampling is a requirement for TGSI translation, it is + * a very different problem with several different approaches to it. This + * structure establishes an interface for texture sampling code generation, so + * that we can easily use different texture sampling strategies. + */ +struct lp_build_sampler_soa +{ + void + (*destroy)( struct lp_build_sampler_soa *sampler ); + + void + (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); +}; + void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context); + struct lp_build_sampler_soa *sampler); #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index d4d18febec7..adc81569ed5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -78,6 +78,11 @@ #define CHAN_Z 2 #define CHAN_W 3 +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + struct lp_build_tgsi_soa_context { @@ -88,8 +93,7 @@ struct lp_build_tgsi_soa_context const LLVMValueRef (*inputs)[NUM_CHANNELS]; LLVMValueRef (*outputs)[NUM_CHANNELS]; - lp_emit_fetch_texel_soa_callback emit_fetch_texel; - void *emit_fetch_texel_context; + struct lp_build_sampler_soa *sampler; LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; @@ -98,6 +102,51 @@ struct lp_build_tgsi_soa_context }; +static const unsigned char +swizzle_left[4] = { + QUAD_TOP_LEFT, QUAD_TOP_LEFT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT +}; + +static const unsigned char +swizzle_right[4] = { + QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, + QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT +}; + +static const unsigned char +swizzle_top[4] = { + QUAD_TOP_LEFT, QUAD_TOP_RIGHT, + QUAD_TOP_LEFT, QUAD_TOP_RIGHT +}; + +static const unsigned char +swizzle_bottom[4] = { + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT +}; + + +static LLVMValueRef +emit_ddx(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); + LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); + return lp_build_sub(&bld->base, src_right, src_left); +} + + +static LLVMValueRef +emit_ddy(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); + LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); + return lp_build_sub(&bld->base, src_top, src_bottom); +} + + /** * Register fetch. */ @@ -168,6 +217,7 @@ emit_fetch( break; case TGSI_UTIL_SIGN_SET: + /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); res = LLVMBuildNeg( bld->base.builder, res, "" ); break; @@ -185,6 +235,36 @@ emit_fetch( /** + * Register fetch with derivatives. + */ +static void +emit_fetch_deriv( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + const unsigned chan_index, + LLVMValueRef *res, + LLVMValueRef *ddx, + LLVMValueRef *ddy) +{ + LLVMValueRef src; + + src = emit_fetch(bld, inst, index, chan_index); + + if(res) + *res = src; + + /* TODO: use interpolation coeffs for inputs */ + + if(ddx) + *ddx = emit_ddx(bld, src); + + if(ddy) + *ddy = emit_ddy(bld, src); +} + + +/** * Register store. */ static void @@ -239,17 +319,18 @@ emit_store( * High-level instruction translators. */ + static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, boolean apply_lodbias, - boolean projected) + boolean projected, + LLVMValueRef *texel) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; LLVMValueRef lodbias; LLVMValueRef oow; LLVMValueRef coords[3]; - LLVMValueRef texel[4]; unsigned num_coords; unsigned i; @@ -289,12 +370,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, coords[i] = lp_build_mul(&bld->base, coords[i], oow); } - bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context, - unit, num_coords, coords, lodbias, texel); - - FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) { - emit_store( bld, inst, 0, i, texel[i] ); - } + bld->sampler->emit_fetch_texel(bld->sampler, + bld->base.builder, + bld->base.type, + unit, num_coords, coords, lodbias, + texel); } @@ -347,14 +427,6 @@ emit_kil( } -static void -emit_kilp( - struct lp_build_tgsi_soa_context *bld ) -{ - /* XXX todo / fix me */ -} - - /** * Check if inst src/dest regs use indirect addressing into temporary * register file. @@ -382,25 +454,35 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) static int emit_instruction( struct lp_build_tgsi_soa_context *bld, - struct tgsi_full_instruction *inst ) + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info) { unsigned chan_index; LLVMValueRef src0, src1, src2; LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - LLVMValueRef dst0; + LLVMValueRef res; + LLVMValueRef dst0[NUM_CHANNELS]; /* we can't handle indirect addressing into temp register file yet */ if (indirect_temp_reference(inst)) return FALSE; + assert(info->num_dst <= 1); + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.undef; + } + } + switch (inst->Instruction.Opcode) { #if 0 case TGSI_OPCODE_ARL: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_flr(bld, 0, 0); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif @@ -408,19 +490,17 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); } break; case TGSI_OPCODE_LIT: if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { - emit_store( bld, inst, 0, CHAN_X, bld->base.one); + dst0[CHAN_X] = bld->base.one; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_max( &bld->base, src0, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Y, dst0); + dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { /* XMM[1] = SrcReg[0].yyyy */ @@ -432,20 +512,19 @@ emit_instruction( tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); - dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Z, dst0); + dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { - emit_store( bld, inst, 0, CHAN_W, bld->base.one); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_rcp(&bld->base, src0); + res = lp_build_rcp(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -453,9 +532,9 @@ emit_instruction( /* TGSI_OPCODE_RECIPSQRT */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); src0 = lp_build_abs(&bld->base, src0); - dst0 = lp_build_rsqrt(&bld->base, src0); + res = lp_build_rsqrt(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -479,16 +558,15 @@ emit_instruction( lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = tmp1; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -514,20 +592,18 @@ emit_instruction( /* dst.x = floor(lg2(abs(src.x))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { - tmp1 = lp_build_div( &bld->base, src0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); } /* dst.z = lg2(abs(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -535,8 +611,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_mul(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); } break; @@ -544,8 +619,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_add(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_add(&bld->base, src0, src1); } break; @@ -563,7 +637,7 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -585,28 +659,24 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_DST: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = bld->base.one; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = emit_fetch( bld, inst, 0, CHAN_Z ); - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = emit_fetch( bld, inst, 1, CHAN_W ); - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); } break; @@ -614,8 +684,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_min( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); } break; @@ -623,8 +692,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_max( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); } break; @@ -634,8 +702,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -645,8 +712,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -658,7 +724,7 @@ emit_instruction( tmp2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); tmp0 = lp_build_add( &bld->base, tmp0, tmp2); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -666,8 +732,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); tmp1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_sub( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); } break; @@ -678,13 +743,19 @@ emit_instruction( src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_sub( &bld->base, src1, src2 ); tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); - dst0 = lp_build_add( &bld->base, tmp0, src2 ); - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); } break; case TGSI_OPCODE_CND: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp1 = lp_build_const_scalar(bld->base.type, 0.5); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); + } break; case TGSI_OPCODE_DP2A: @@ -698,45 +769,49 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; -#if 0 case TGSI_OPCODE_FRC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_frc( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + src0 = emit_fetch( bld, inst, 0, chan_index ); + tmp0 = lp_build_floor(&bld->base, src0); + tmp0 = lp_build_sub(&bld->base, tmp0, src0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_CLAMP: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp0 = lp_build_max(&bld->base, tmp0, src1); + tmp0 = lp_build_min(&bld->base, tmp0, src2); + dst0[chan_index] = tmp0; + } break; case TGSI_OPCODE_FLR: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_floor(&bld->base, tmp0); } break; case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_round(&bld->base, tmp0); } break; -#endif case TGSI_OPCODE_EX2: { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_exp2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; } @@ -745,16 +820,16 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_log2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_POW: src0 = emit_fetch( bld, inst, 0, CHAN_X ); src1 = emit_fetch( bld, inst, 1, CHAN_X ); - dst0 = lp_build_pow( &bld->base, src0, src1 ); + res = lp_build_pow( &bld->base, src0, src1 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -775,7 +850,7 @@ emit_instruction( tmp5 = tmp3; tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); - emit_store( bld, inst, 0, CHAN_X, tmp2); + dst0[CHAN_X] = tmp2; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { @@ -786,31 +861,30 @@ emit_instruction( tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp3); + dst0[CHAN_Y] = tmp3; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); - emit_store( bld, inst, 0, CHAN_Z, tmp5); + dst0[CHAN_Z] = tmp5; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_ABS: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_abs( &bld->base, tmp0 ) ; - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); } break; case TGSI_OPCODE_RCC: + /* deprecated? */ + assert(0); return 0; - break; case TGSI_OPCODE_DPH: tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); @@ -827,7 +901,7 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -835,25 +909,27 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_cos( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_DDX: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); + } break; case TGSI_OPCODE_DDY: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); + } break; -#if 0 case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( bld ); - return 0; /* XXX fix me */ + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_KIL: /* conditional kill */ @@ -885,13 +961,14 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_SFL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.zero; + } break; case TGSI_OPCODE_SGT: @@ -899,8 +976,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -908,7 +984,7 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_sin( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -917,8 +993,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -927,85 +1002,99 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_STR: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.one; + } break; case TGSI_OPCODE_TEX: - emit_tex( bld, inst, FALSE, FALSE ); + emit_tex( bld, inst, FALSE, FALSE, dst0 ); break; case TGSI_OPCODE_TXD: + /* FIXME */ return 0; break; case TGSI_OPCODE_UP2H: + /* deprecated */ + assert (0); return 0; break; case TGSI_OPCODE_UP2US: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4B: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4UB: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_X2D: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_ARA: + /* deprecated */ + assert(0); return 0; break; #if 0 case TGSI_OPCODE_ARR: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_rnd( bld, 0, 0 ); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif case TGSI_OPCODE_BRA: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_CAL: + /* FIXME */ return 0; break; -#if 0 case TGSI_OPCODE_RET: - emit_ret( bld ); + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_END: break; -#if 0 case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_sgn( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); } break; -#endif case TGSI_OPCODE_CMP: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1013,34 +1102,29 @@ emit_instruction( src1 = emit_fetch( bld, inst, 1, chan_index ); src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); - dst0 = lp_build_select( &bld->base, tmp0, src1, src2); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); } break; case TGSI_OPCODE_SCS: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_cos( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_sin( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = bld->base.zero; - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = bld->base.zero; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_NRM: @@ -1099,38 +1183,35 @@ emit_instruction( /* dst.x = xmm1 * src.x */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { - tmp4 = lp_build_mul( &bld->base, tmp4, tmp1); - emit_store(bld, inst, 0, CHAN_X, tmp4); + dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); } /* dst.y = xmm1 * src.y */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { - tmp5 = lp_build_mul( &bld->base, tmp5, tmp1); - emit_store(bld, inst, 0, CHAN_Y, tmp5); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); } /* dst.z = xmm1 * src.z */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { - tmp6 = lp_build_mul( &bld->base, tmp6, tmp1); - emit_store(bld, inst, 0, CHAN_Z, tmp6); + dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); } /* dst.w = xmm1 * src.w */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { - tmp7 = lp_build_mul( &bld->base, tmp7, tmp1); - emit_store(bld, inst, 0, CHAN_W, tmp7); + dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); } } - /* dst0.w = 1.0 */ + /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { - tmp0 = bld->base.one; - emit_store(bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } } break; case TGSI_OPCODE_DIV: + /* deprecated */ + assert( 0 ); return 0; break; @@ -1143,118 +1224,157 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_TXP: - emit_tex( bld, inst, FALSE, TRUE ); + emit_tex( bld, inst, FALSE, TRUE, dst0 ); break; case TGSI_OPCODE_BRK: + /* FIXME */ return 0; break; case TGSI_OPCODE_IF: + /* FIXME */ return 0; break; case TGSI_OPCODE_BGNFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_REP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ELSE: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDIF: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ENDREP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_PUSHA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_POPA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CEIL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); + } break; case TGSI_OPCODE_I2F: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_NOT: + /* deprecated? */ + assert(0); return 0; break; -#if 0 case TGSI_OPCODE_TRUNC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_f2it( bld, 0 ); - emit_i2f( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); } break; -#endif case TGSI_OPCODE_SHL: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SHR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_AND: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_OR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_MOD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_XOR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SAD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXF: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXQ: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CONT: + /* deprecated? */ + assert(0); return 0; break; @@ -1266,10 +1386,28 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.zero; + } + break; + + case TGSI_OPCODE_NOP: + break; + default: return 0; } + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_store( bld, inst, 0, chan_index, dst0[chan_index]); + } + } + return 1; } @@ -1277,14 +1415,13 @@ emit_instruction( void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context) + struct lp_build_sampler_soa *sampler) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; @@ -1299,8 +1436,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; - bld.emit_fetch_texel = emit_fetch_texel; - bld.emit_fetch_texel_context = emit_fetch_texel_context; + bld.sampler = sampler; tgsi_parse_init( &parse, tokens ); @@ -1309,16 +1445,18 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - /* Input already interpolated */ + /* Inputs already interpolated */ break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) { + { unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : "<invalid>"); - } + if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + info ? info->mnemonic : "<invalid>"); + } + break; case TGSI_TOKEN_TYPE_IMMEDIATE: diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 8e0026fd973..606243d6c5a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -33,7 +33,7 @@ LLVMTypeRef -lp_build_elem_type(union lp_type type) +lp_build_elem_type(struct lp_type type) { if (type.floating) { switch(type.width) { @@ -55,7 +55,7 @@ lp_build_elem_type(union lp_type type) LLVMTypeRef -lp_build_vec_type(union lp_type type) +lp_build_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_elem_type(type); return LLVMVectorType(elem_type, type.length); @@ -69,7 +69,7 @@ lp_build_vec_type(union lp_type type) * type and check for identity. */ boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type) { LLVMTypeKind elem_kind; @@ -107,7 +107,7 @@ lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) { LLVMTypeRef elem_type; @@ -128,7 +128,7 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) boolean -lp_check_value(union lp_type type, LLVMValueRef val) +lp_check_value(struct lp_type type, LLVMValueRef val) { LLVMTypeRef vec_type; @@ -143,24 +143,36 @@ lp_check_value(union lp_type type, LLVMValueRef val) LLVMTypeRef -lp_build_int_elem_type(union lp_type type) +lp_build_int_elem_type(struct lp_type type) { return LLVMIntType(type.width); } LLVMTypeRef -lp_build_int_vec_type(union lp_type type) +lp_build_int_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); return LLVMVectorType(elem_type, type.length); } +struct lp_type +lp_int_type(struct lp_type type) +{ + struct lp_type int_type; + + memset(&int_type, 0, sizeof int_type); + int_type.width = type.width; + int_type.length = type.length; + return int_type; +} + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, - union lp_type type) + struct lp_type type) { bld->builder = builder; bld->type = type; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 3ce566be641..ee5ca3483c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -56,58 +56,55 @@ * on the types used for intermediate computations, such as signed vs unsigned, * normalized values, or fixed point. */ -union lp_type { - struct { - /** - * Floating-point. Cannot be used with fixed. Integer numbers are - * represented by this zero. - */ - unsigned floating:1; - - /** - * Fixed-point. Cannot be used with floating. Integer numbers are - * represented by this zero. - */ - unsigned fixed:1; - - /** - * Whether it can represent negative values or not. - * - * If this is not set for floating point, it means that all values are - * assumed to be positive. - */ - unsigned sign:1; - - /** - * Whether values are normalized to fit [0, 1] interval, or [-1, 1] - * interval for signed types. - * - * For integer types it means the representable integer range should be - * interpreted as the interval above. - * - * For floating and fixed point formats it means the values should be - * clamped to the interval above. - */ - unsigned norm:1; - - /** - * Element width. - * - * For fixed point values, the fixed point is assumed to be at half the - * width. - */ - unsigned width:14; - - /** - * Vector length. - * - * width*length should be a power of two greater or equal to eight. - * - * @sa LP_MAX_VECTOR_LENGTH - */ - unsigned length:14; - }; - uint32_t value; +struct lp_type { + /** + * Floating-point. Cannot be used with fixed. Integer numbers are + * represented by this zero. + */ + unsigned floating:1; + + /** + * Fixed-point. Cannot be used with floating. Integer numbers are + * represented by this zero. + */ + unsigned fixed:1; + + /** + * Whether it can represent negative values or not. + * + * If this is not set for floating point, it means that all values are + * assumed to be positive. + */ + unsigned sign:1; + + /** + * Whether values are normalized to fit [0, 1] interval, or [-1, 1] + * interval for signed types. + * + * For integer types it means the representable integer range should be + * interpreted as the interval above. + * + * For floating and fixed point formats it means the values should be + * clamped to the interval above. + */ + unsigned norm:1; + + /** + * Element width. + * + * For fixed point values, the fixed point is assumed to be at half the + * width. + */ + unsigned width:14; + + /** + * Vector length. + * + * width*length should be a power of two greater or equal to eight. + * + * @sa LP_MAX_VECTOR_LENGTH + */ + unsigned length:14; }; @@ -124,7 +121,7 @@ struct lp_build_context * This not only describes the input/output LLVM types, but also whether * to normalize/clamp the results. */ - union lp_type type; + struct lp_type type; /** Same as lp_build_undef(type) */ LLVMValueRef undef; @@ -138,37 +135,41 @@ struct lp_build_context LLVMTypeRef -lp_build_elem_type(union lp_type type); +lp_build_elem_type(struct lp_type type); LLVMTypeRef -lp_build_vec_type(union lp_type type); +lp_build_vec_type(struct lp_type type); boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type); +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type); boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type); +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type); boolean -lp_check_value(union lp_type type, LLVMValueRef val); +lp_check_value(struct lp_type type, LLVMValueRef val); LLVMTypeRef -lp_build_int_elem_type(union lp_type type); +lp_build_int_elem_type(struct lp_type type); LLVMTypeRef -lp_build_int_vec_type(union lp_type type); +lp_build_int_vec_type(struct lp_type type); + + +struct lp_type +lp_int_type(struct lp_type type); void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, - union lp_type type); + struct lp_type type); #endif /* !LP_BLD_TYPE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 580cca5b463..bdcff94b9bf 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, util_pack_color(rgba, ps->format, &cv); lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv); } + llvmpipe->dirty_render_cache = TRUE; } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 233d1df0e10..a4b2bd8c2ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -141,8 +141,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, return PIPE_REFERENCED_FOR_WRITE; } - /* FIXME: we also need to do the same for the texture cache */ - return PIPE_UNREFERENCED; } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index d288460a1b8..9465f763d50 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -44,15 +44,47 @@ static void lp_jit_init_globals(struct llvmpipe_screen *screen) { - /* struct lp_jit_context */ + LLVMTypeRef texture_type; + + /* struct lp_jit_texture */ { LLVMTypeRef elem_types[4]; + + elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, + screen->target, texture_type, + LP_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, + screen->target, texture_type, + LP_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride, + screen->target, texture_type, + LP_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, + screen->target, texture_type, + LP_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, + screen->target, texture_type); + + LLVMAddTypeName(screen->module, "texture", texture_type); + } + + /* struct lp_jit_context */ + { + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, screen->target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, + screen->target, context_type, + LP_JIT_CONTEXT_TEXTURES_INDEX); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, screen->target, context_type); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index a7fb60f9f5c..58f716ede29 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -38,11 +38,31 @@ #include "lp_bld_struct.h" +#include "pipe/p_state.h" + struct tgsi_sampler; struct llvmpipe_screen; +struct lp_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + + +enum { + LP_JIT_TEXTURE_WIDTH = 0, + LP_JIT_TEXTURE_HEIGHT, + LP_JIT_TEXTURE_STRIDE, + LP_JIT_TEXTURE_DATA +}; + + + /** * This structure is passed directly to the generated fragment shader. * @@ -60,11 +80,12 @@ struct lp_jit_context struct tgsi_sampler **samplers; - /* TODO: alpha reference value */ float alpha_ref_value; - /* TODO: blend constant color */ + /* FIXME: store (also?) in floats */ uint8_t *blend_color; + + struct lp_jit_texture textures[PIPE_MAX_SAMPLERS]; }; @@ -80,6 +101,11 @@ struct lp_jit_context #define lp_jit_context_blend_color(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 3, "blend_color") +#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 + +#define lp_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") + typedef void (*lp_jit_frag_func)(struct lp_jit_context *context, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 125035771e5..0ce1a37bd47 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,8 +27,6 @@ #include "util/u_memory.h" -#include "util/u_simple_screen.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -196,8 +194,7 @@ static void llvmpipe_destroy_screen( struct pipe_screen *_screen ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - - struct pipe_winsys *winsys = _screen->winsys; + struct llvmpipe_winsys *winsys = screen->winsys; lp_jit_screen_cleanup(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d145f6d6bbc..2d2fc19a65f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -44,6 +44,7 @@ #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "lp_bld_debug.h" #include "lp_tile_cache.h" #include "lp_tile_soa.h" @@ -162,12 +163,12 @@ shade_quads(struct llvmpipe_context *llvmpipe, else depth = NULL; - /* TODO: blend color */ + /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ + assert(lp_check_alignment(mask, 16)); - assert((((uintptr_t)mask) & 0xf) == 0); - assert((((uintptr_t)depth) & 0xf) == 0); - assert((((uintptr_t)color) & 0xf) == 0); - assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0); + assert(lp_check_alignment(depth, 16)); + assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); /* run shader */ fs->current->jit_function( &llvmpipe->jit_context, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index fb10329887d..7b26ce61a38 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -36,6 +36,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */ #define LP_NEW_VIEWPORT 0x1 @@ -57,16 +58,20 @@ struct tgsi_sampler; struct vertex_info; - +struct pipe_context; +struct llvmpipe_context; struct lp_fragment_shader; struct lp_fragment_shader_variant_key { + enum pipe_format zsbuf_format; struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 6fbb057937e..e87976b9f36 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -250,7 +250,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | - LP_NEW_DEPTH_STENCIL_ALPHA)) + LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_SAMPLER | + LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 94170bd7161..9faed5a0b18 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -85,6 +85,7 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_quad.h" +#include "lp_tex_sample.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -130,21 +131,20 @@ generate_pos0(LLVMBuilderRef builder, * Generate the depth test. */ static void -generate_depth(struct llvmpipe_context *lp, - LLVMBuilderRef builder, - const struct pipe_depth_state *state, - union lp_type src_type, +generate_depth(LLVMBuilderRef builder, + const struct lp_fragment_shader_variant_key *key, + struct lp_type src_type, struct lp_build_mask_context *mask, LLVMValueRef src, LLVMValueRef dst_ptr) { const struct util_format_description *format_desc; - union lp_type dst_type; + struct lp_type dst_type; - if(!lp->framebuffer.zsbuf) + if(!key->depth.enabled) return; - format_desc = util_format_description(lp->framebuffer.zsbuf->format); + format_desc = util_format_description(key->zsbuf_format); assert(format_desc); /* Pick the depth type. */ @@ -164,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp, #endif lp_build_depth_test(builder, - state, + &key->depth, dst_type, format_desc, mask, @@ -173,107 +173,6 @@ generate_depth(struct llvmpipe_context *lp, } -struct build_fetch_texel_context -{ - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -static void -emit_fetch_texel( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct build_fetch_texel_context *bld = context; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!bld->samplers_ptr) - bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr); - - if(!bld->store_ptr) - bld->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = bld->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = bld->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - /** * Generate the fragment shader, depth/stencil test, and alpha tests. */ @@ -282,11 +181,11 @@ generate_fs(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef context_ptr, unsigned i, const struct lp_build_interp_soa_context *interp, - struct build_fetch_texel_context *sampler, + struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef *color, LLVMValueRef depth_ptr) @@ -298,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; + struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean early_depth_test; unsigned attrib; @@ -309,25 +209,35 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr = lp_jit_context_constants(builder, context_ptr); - lp_build_mask_begin(&mask, builder, type, *pmask); + flow = lp_build_flow_create(builder); + + memset(outputs, 0, sizeof outputs); + + lp_build_flow_scope_begin(flow); + + /* Declare the color and z variables */ + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + color[chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &color[chan]); + } + lp_build_flow_scope_declare(flow, &z); + + lp_build_mask_begin(&mask, flow, type, *pmask); early_depth_test = - lp->depth_stencil->depth.enabled && - lp->framebuffer.zsbuf && - !lp->depth_stencil->alpha.enabled && - !lp->fs->info.uses_kill && - !lp->fs->info.writes_z; + key->depth.enabled && + !key->alpha.enabled && + !shader->info.uses_kill && + !shader->info.writes_z; if(early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); - memset(outputs, 0, sizeof outputs); - lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, emit_fetch_texel, sampler); + outputs, sampler); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -368,12 +278,16 @@ generate_fs(struct llvmpipe_context *lp, } if(!early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); lp_build_mask_end(&mask); + lp_build_flow_scope_end(flow); + + lp_build_flow_destroy(flow); + *pmask = mask.value; } @@ -385,13 +299,15 @@ generate_fs(struct llvmpipe_context *lp, static void generate_blend(const struct pipe_blend_state *blend, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; + struct lp_build_flow_context *flow; + struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; @@ -400,11 +316,14 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef res[4]; unsigned chan; + lp_build_context_init(&bld, builder, type); + + flow = lp_build_flow_create(builder); + lp_build_mask_begin(&mask_ctx, flow, type, mask); + vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); - lp_build_context_init(&bld, builder, type); - const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); @@ -422,11 +341,16 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); - lp_build_name(res[chan], "res.%c", "rgba"[chan]); - res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); - LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + if(blend->colormask & (1 << chan)) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); + lp_build_name(res[chan], "res.%c", "rgba"[chan]); + res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); + LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + } } + + lp_build_mask_end(&mask_ctx); + lp_build_flow_destroy(flow); } @@ -440,8 +364,8 @@ generate_fragment(struct llvmpipe_context *lp, { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader_variant *variant; - union lp_type fs_type; - union lp_type blend_type; + struct lp_type fs_type; + struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; @@ -462,7 +386,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; - struct build_fetch_texel_context sampler; + struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; @@ -507,7 +431,7 @@ generate_fragment(struct llvmpipe_context *lp, /* TODO: actually pick these based on the fs and color buffer * characteristics. */ - fs_type.value = 0; + memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ @@ -515,7 +439,7 @@ generate_fragment(struct llvmpipe_context *lp, fs_type.length = 4; /* 4 element per vector */ num_fs = 4; - blend_type.value = 0; + memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ @@ -586,8 +510,13 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); - memset(&sampler, 0, sizeof sampler); - sampler.context_ptr = context_ptr; +#if 0 + /* C texture sampling */ + sampler = lp_c_sampler_soa_create(context_ptr); +#else + /* code generated texture sampling */ + sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); +#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -606,7 +535,7 @@ generate_fragment(struct llvmpipe_context *lp, context_ptr, i, &interp, - &sampler, + sampler, &fs_mask[i], out_color, depth_ptr_i); @@ -615,6 +544,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_out_color[chan][i] = out_color[chan]; } + sampler->destroy(sampler); + /* * Convert the fs's output color and mask to fit to the blending type. */ @@ -765,18 +696,45 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, */ static void make_variant_key(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, struct lp_fragment_shader_variant_key *key) { + unsigned i; + memset(key, 0, sizeof *key); - memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + if(lp->framebuffer.zsbuf && + lp->depth_stencil->depth.enabled) { + key->zsbuf_format = lp->framebuffer.zsbuf->format; + memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + } key->alpha.enabled = lp->depth_stencil->alpha.enabled; if(key->alpha.enabled) key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ - memcpy(&key->blend, lp->blend, sizeof key->blend); + if(lp->framebuffer.cbufs[0]) { + const struct util_format_description *format_desc; + unsigned chan; + + memcpy(&key->blend, lp->blend, sizeof key->blend); + + format_desc = util_format_description(lp->framebuffer.cbufs[0]->format); + assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); + + /* mask out color channels not present in the color buffer */ + for(chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + if(swizzle > 4) + key->blend.colormask &= ~(1 << chan); + } + } + + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) + lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]); } @@ -787,7 +745,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; - make_variant_key(lp, &key); + make_variant_key(lp, shader, &key); variant = shader->variants; while(variant) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 4fef541b1e3..c69d90c723a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, pipe_texture_reference(&llvmpipe->texture[i], tex); lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex); + + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; + jit_tex->width = tex->width[0]; + jit_tex->height = tex->height[0]; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + } } llvmpipe->num_textures = num; diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 69aaae26e0a..a88e110c663 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -86,43 +86,43 @@ random_float(void); void -dump_type(FILE *fp, union lp_type type); +dump_type(FILE *fp, struct lp_type type); double -read_elem(union lp_type type, const void *src, unsigned index); +read_elem(struct lp_type type, const void *src, unsigned index); void -write_elem(union lp_type type, void *dst, unsigned index, double src); +write_elem(struct lp_type type, void *dst, unsigned index, double src); void -random_elem(union lp_type type, void *dst, unsigned index); +random_elem(struct lp_type type, void *dst, unsigned index); void -read_vec(union lp_type type, const void *src, double *dst); +read_vec(struct lp_type type, const void *src, double *dst); void -write_vec(union lp_type type, void *dst, const double *src); +write_vec(struct lp_type type, void *dst, const double *src); void -random_vec(union lp_type type, void *dst); +random_vec(struct lp_type type, void *dst); boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps); +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps); boolean -compare_vec(union lp_type type, const void *res, const void *ref); +compare_vec(struct lp_type type, const void *res, const void *ref); void -dump_vec(FILE *fp, union lp_type type, const void *src); +dump_vec(FILE *fp, struct lp_type type, const void *src); #endif /* !LP_TEST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 8dfad468e3c..1b57ea2a5c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -80,7 +80,7 @@ static void write_tsv_row(FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type, + struct lp_type type, double cycles, boolean success) { @@ -125,7 +125,7 @@ static void dump_blend_type(FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { fprintf(fp, "%s", mode ? "soa" : "aos"); @@ -153,7 +153,7 @@ static LLVMValueRef add_blend_test(LLVMModuleRef module, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { LLVMTypeRef ret_type; LLVMTypeRef vec_type; @@ -467,7 +467,7 @@ test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; @@ -765,10 +765,10 @@ blend_funcs[] = { }; -const union lp_type blend_types[] = { +const struct lp_type blend_types[] = { /* float, fixed, sign, norm, width, len */ - {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, /* f32 x 4 */ - {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */ + { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ }; @@ -788,7 +788,7 @@ test_all(unsigned verbose, FILE *fp) const unsigned *alpha_dst_factor; struct pipe_blend_state blend; enum vector_mode mode; - const union lp_type *type; + const struct lp_type *type; bool success = TRUE; for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { @@ -841,7 +841,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) const unsigned *alpha_dst_factor; struct pipe_blend_state blend; enum vector_mode mode; - const union lp_type *type; + const struct lp_type *type; unsigned long i; bool success = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index e6489834af5..ae2697143f4 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -59,8 +59,8 @@ write_tsv_header(FILE *fp) static void write_tsv_row(FILE *fp, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, double cycles, boolean success) { @@ -80,8 +80,8 @@ write_tsv_row(FILE *fp, static void dump_conv_types(FILE *fp, - union lp_type src_type, - union lp_type dst_type) + struct lp_type src_type, + struct lp_type dst_type) { fprintf(fp, "src_type="); dump_type(fp, src_type); @@ -96,8 +96,8 @@ dump_conv_types(FILE *fp, static LLVMValueRef add_conv_test(LLVMModuleRef module, - union lp_type src_type, unsigned num_srcs, - union lp_type dst_type, unsigned num_dsts) + struct lp_type src_type, unsigned num_srcs, + struct lp_type dst_type, unsigned num_dsts) { LLVMTypeRef args[2]; LLVMValueRef func; @@ -145,8 +145,8 @@ add_conv_test(LLVMModuleRef module, static boolean test_one(unsigned verbose, FILE *fp, - union lp_type src_type, - union lp_type dst_type) + struct lp_type src_type, + struct lp_type dst_type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; @@ -343,35 +343,35 @@ test_one(unsigned verbose, } -const union lp_type conv_types[] = { +const struct lp_type conv_types[] = { /* float, fixed, sign, norm, width, len */ - {{ TRUE, FALSE, TRUE, TRUE, 32, 4 }}, - {{ TRUE, FALSE, TRUE, FALSE, 32, 4 }}, - {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, - {{ TRUE, FALSE, FALSE, FALSE, 32, 4 }}, + { TRUE, FALSE, TRUE, TRUE, 32, 4 }, + { TRUE, FALSE, TRUE, FALSE, 32, 4 }, + { TRUE, FALSE, FALSE, TRUE, 32, 4 }, + { TRUE, FALSE, FALSE, FALSE, 32, 4 }, /* TODO: test fixed formats too */ - {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 32, 4 }}, - {{ FALSE, FALSE, TRUE, FALSE, 32, 4 }}, - {{ FALSE, FALSE, FALSE, TRUE, 32, 4 }}, - {{ FALSE, FALSE, FALSE, FALSE, 32, 4 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 8, 16 }}, - {{ FALSE, FALSE, TRUE, FALSE, 8, 16 }}, - {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, - {{ FALSE, FALSE, FALSE, FALSE, 8, 16 }}, + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, + { FALSE, FALSE, TRUE, FALSE, 16, 8 }, + { FALSE, FALSE, FALSE, TRUE, 16, 8 }, + { FALSE, FALSE, FALSE, FALSE, 16, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 32, 4 }, + { FALSE, FALSE, TRUE, FALSE, 32, 4 }, + { FALSE, FALSE, FALSE, TRUE, 32, 4 }, + { FALSE, FALSE, FALSE, FALSE, 32, 4 }, + + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, + { FALSE, FALSE, TRUE, FALSE, 16, 8 }, + { FALSE, FALSE, FALSE, TRUE, 16, 8 }, + { FALSE, FALSE, FALSE, FALSE, 16, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 8, 16 }, + { FALSE, FALSE, TRUE, FALSE, 8, 16 }, + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, + { FALSE, FALSE, FALSE, FALSE, 8, 16 }, }; @@ -381,8 +381,8 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]); boolean test_all(unsigned verbose, FILE *fp) { - const union lp_type *src_type; - const union lp_type *dst_type; + const struct lp_type *src_type; + const struct lp_type *dst_type; bool success = TRUE; for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) { @@ -407,8 +407,8 @@ test_all(unsigned verbose, FILE *fp) boolean test_some(unsigned verbose, FILE *fp, unsigned long n) { - const union lp_type *src_type; - const union lp_type *dst_type; + const struct lp_type *src_type; + const struct lp_type *dst_type; unsigned long i; bool success = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 1d192355eed..d8455e56490 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module, lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop); - rgba = lp_build_load_rgba(builder, format, ptr); + rgba = lp_build_load_rgba_aos(builder, format, ptr); LLVMBuildStore(builder, rgba, rgba_ptr); lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop); @@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba(builder, format, ptr, rgba); + lp_build_store_rgba_aos(builder, format, ptr, rgba); LLVMBuildRetVoid(builder); diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 49213fb4f0b..c3bb8fadf70 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -40,7 +40,7 @@ void dump_type(FILE *fp, - union lp_type type) + struct lp_type type) { fprintf(fp, "%s%s%u%sx%u", type.sign ? (type.floating || type.fixed ? "" : "s") : "u", @@ -52,7 +52,7 @@ dump_type(FILE *fp, double -read_elem(union lp_type type, const void *src, unsigned index) +read_elem(struct lp_type type, const void *src, unsigned index) { double scale = lp_const_scale(type); double value; @@ -115,7 +115,7 @@ read_elem(union lp_type type, const void *src, unsigned index) void -write_elem(union lp_type type, void *dst, unsigned index, double value) +write_elem(struct lp_type type, void *dst, unsigned index, double value) { assert(index < type.length); if(!type.sign && value < 0.0) @@ -184,7 +184,7 @@ write_elem(union lp_type type, void *dst, unsigned index, double value) void -random_elem(union lp_type type, void *dst, unsigned index) +random_elem(struct lp_type type, void *dst, unsigned index) { double value; assert(index < type.length); @@ -209,7 +209,7 @@ random_elem(union lp_type type, void *dst, unsigned index) void -read_vec(union lp_type type, const void *src, double *dst) +read_vec(struct lp_type type, const void *src, double *dst) { unsigned i; for (i = 0; i < type.length; ++i) @@ -218,7 +218,7 @@ read_vec(union lp_type type, const void *src, double *dst) void -write_vec(union lp_type type, void *dst, const double *src) +write_vec(struct lp_type type, void *dst, const double *src) { unsigned i; for (i = 0; i < type.length; ++i) @@ -234,7 +234,7 @@ random_float(void) void -random_vec(union lp_type type, void *dst) +random_vec(struct lp_type type, void *dst) { unsigned i; for (i = 0; i < type.length; ++i) @@ -243,7 +243,7 @@ random_vec(union lp_type type, void *dst) boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps) +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps) { unsigned i; for (i = 0; i < type.length; ++i) { @@ -259,7 +259,7 @@ compare_vec_with_eps(union lp_type type, const void *res, const void *ref, doubl boolean -compare_vec(union lp_type type, const void *res, const void *ref) +compare_vec(struct lp_type type, const void *res, const void *ref) { double eps = lp_const_eps(type); return compare_vec_with_eps(type, res, ref, eps); @@ -267,7 +267,7 @@ compare_vec(union lp_type type, const void *res, const void *ref) void -dump_vec(FILE *fp, union lp_type type, const void *src) +dump_vec(FILE *fp, struct lp_type type, const void *src) { unsigned i; for (i = 0; i < type.length; ++i) { diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 23a94b5b0d5..773e8482425 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) if (lpt->timestamp != tc->timestamp) { /* texture was modified, invalidate all cached tiles */ uint i; - _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); + debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); for (i = 0; i < NUM_ENTRIES; i++) { tc->entries[i].addr.bits.invalid = 1; } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 628ec3f1efd..9ad1bde9565 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -29,10 +29,13 @@ #define LP_TEX_SAMPLE_H +#include <llvm-c/Core.h> + #include "tgsi/tgsi_exec.h" struct llvmpipe_tex_tile_cache; +struct lp_sampler_static_state; /** @@ -75,4 +78,24 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE]); +/** + * Texture sampling code generator that just calls lp_get_samples C function + * for the actual sampling computation. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr); + + +/** + * Pure-LLVM texture sampling code generator. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key, + LLVMValueRef context_ptr); + + #endif /* LP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 94eb6dad5af..a1365a045f1 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1578,3 +1578,136 @@ out: tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); } + +void PIPE_CDECL +lp_fetch_texel_soa( struct tgsi_sampler **samplers, + uint32_t unit, + float *store ) +{ + struct tgsi_sampler *sampler = samplers[unit]; + +#if 0 + uint j; + + debug_printf("%s sampler: %p (%p) store: %p\n", + __FUNCTION__, + sampler, *sampler, + store ); + + debug_printf("lodbias %f\n", store[12]); + + for (j = 0; j < 4; j++) + debug_printf("sample %d texcoord %f %f\n", + j, + store[0+j], + store[4+j]); +#endif + + { + float rgba[NUM_CHANNELS][QUAD_SIZE]; + sampler->get_samples(sampler, + &store[0], + &store[4], + &store[8], + 0.0f, /*store[12], lodbias */ + rgba); + memcpy(store, rgba, sizeof rgba); + } + +#if 0 + for (j = 0; j < 4; j++) + debug_printf("sample %d result %f %f %f %f\n", + j, + store[0+j], + store[4+j], + store[8+j], + store[12+j]); +#endif +} + + +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_tgsi.h" + + +struct lp_c_sampler_soa +{ + struct lp_build_sampler_soa base; + + LLVMValueRef context_ptr; + + LLVMValueRef samplers_ptr; + + /** Coords/texels store */ + LLVMValueRef store_ptr; +}; + + +static void +lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; + LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); + LLVMValueRef args[3]; + unsigned i; + + if(!sampler->samplers_ptr) + sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); + + if(!sampler->store_ptr) + sampler->store_ptr = LLVMBuildArrayAlloca(builder, + vec_type, + LLVMConstInt(LLVMInt32Type(), 4, 0), + "texel_store"); + + for (i = 0; i < num_coords; i++) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + LLVMBuildStore(builder, coords[i], coord_ptr); + } + + args[0] = sampler->samplers_ptr; + args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); + args[2] = sampler->store_ptr; + + lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); + } +} + + +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr) +{ + struct lp_c_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_c_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_c_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; + sampler->context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c new file mode 100644 index 00000000000..d2a6ae21f57 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling code generation + * + * This file is nothing more than ugly glue between three largely independent + * entities: + * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa) + * - texture sampling code generation (i.e., lp_build_sample_soa) + * - LLVM pipe driver + * + * All interesting code is in the functions mentioned above. There is really + * nothing to see here. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_sample.h" +#include "lp_bld_tgsi.h" +#include "lp_state.h" +#include "lp_tex_sample.h" + + +/** + * This provides the bridge between the sampler state store in lp_jit_context + * and lp_jit_texture and the sampler code generator. It provides the + * texture layout information required by the texture sampler code generator + * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime. + */ +struct llvmpipe_sampler_dynamic_state +{ + struct lp_sampler_dynamic_state base; + + const struct lp_sampler_static_state *static_state; + + LLVMValueRef context_ptr; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct lp_llvm_sampler_soa +{ + struct lp_build_sampler_soa base; + + struct llvmpipe_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, + LLVMBuilderRef builder, + unsigned unit, + unsigned member_index, + const char *member_name) +{ + struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(unit < PIPE_MAX_SAMPLERS); + + /* context[0] */ + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + /* context[0].textures */ + indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0); + /* context[0].textures[unit] */ + indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + /* context[0].textures[unit].member */ + indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + + ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); + + res = LLVMBuildLoad(builder, ptr, ""); + + lp_build_name(res, "context.texture%u.%s", unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to fetch + * the members of lp_jit_texture to fulfill the sampler code generator requests. + * + * This complexity is the price we have to pay to keep the texture sampler code + * generator a reusable module without dependencies to llvmpipe internals. + */ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \ + static LLVMValueRef \ + lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ + LLVMBuilderRef builder, \ + unsigned unit) \ + { \ + return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \ + } + + +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT) +LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE) +LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA) + + +static void +lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base; + + assert(unit < PIPE_MAX_SAMPLERS); + + lp_build_sample_soa(builder, + &sampler->dynamic_state.static_state[unit], + &sampler->dynamic_state.base, + type, + unit, + num_coords, + coords, + lodbias, + texel); +} + + +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr) +{ + struct lp_llvm_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_llvm_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_llvm_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel; + sampler->dynamic_state.base.width = lp_llvm_texture_width; + sampler->dynamic_state.base.height = lp_llvm_texture_height; + sampler->dynamic_state.base.stride = lp_llvm_texture_stride; + sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; + sampler->dynamic_state.static_state = static_state; + sampler->dynamic_state.context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 1e9e8e49bfb..3a5f990e97e 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -116,6 +116,7 @@ struct nv50_state { unsigned miptree_nr; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *programs; struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; @@ -190,6 +191,7 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, /* nv50_program.c */ extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); /* nv50_state_validate.c */ diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 03b9243b828..93479a0314a 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -148,6 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->image_nr = 1; mt->level[0].pitch = *stride; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + mt->level[0].tile_mode = bo->tile_mode; nouveau_bo_ref(bo, &mt->base.bo); return &mt->base.base; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 4a838529de7..eb90d5e66f9 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -112,6 +112,10 @@ struct nv50_pc { struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; + /* broadcast and destination replacement regs */ + struct nv50_reg *r_brdc; + struct nv50_reg *r_dst[4]; + unsigned interp_mode[32]; /* perspective interpolation registers */ struct nv50_reg *iv_p; @@ -124,6 +128,25 @@ struct nv50_pc { boolean allow32; }; +static INLINE void +ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) +{ + reg->type = type; + reg->index = index; + reg->hw = hw; + reg->neg = 0; + reg->rhw = -1; + reg->acc = 0; +} + +static INLINE unsigned +popcnt4(uint32_t val) +{ + static const unsigned cnt[16] + = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; + return cnt[val & 0xf]; +} + static void alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { @@ -184,11 +207,8 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) for (i = 0; i < NV50_SU_MAX_TEMP; i++) { if (!pc->r_temp[i]) { - r = CALLOC_STRUCT(nv50_reg); - r->type = P_TEMP; - r->index = -1; - r->hw = i; - r->rhw = -1; + r = MALLOC_STRUCT(nv50_reg); + ctor_reg(r, P_TEMP, -1, i); pc->r_temp[i] = r; return r; } @@ -254,10 +274,8 @@ alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) return alloc_temp4(pc, dst, idx + 4); for (i = 0; i < 4; i++) { - dst[i] = CALLOC_STRUCT(nv50_reg); - dst[i]->type = P_TEMP; - dst[i]->index = -1; - dst[i]->hw = idx + i; + dst[i] = MALLOC_STRUCT(nv50_reg); + ctor_reg(dst[i], P_TEMP, -1, idx + i); pc->r_temp[idx + i] = dst[i]; } @@ -309,7 +327,7 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) static struct nv50_reg * alloc_immd(struct nv50_pc *pc, float f) { - struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + struct nv50_reg *r = MALLOC_STRUCT(nv50_reg); unsigned hw; for (hw = 0; hw < pc->immd_nr * 4; hw++) @@ -319,9 +337,7 @@ alloc_immd(struct nv50_pc *pc, float f) if (hw == pc->immd_nr * 4) hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4; - r->type = P_IMMD; - r->hw = hw; - r->index = -1; + ctor_reg(r, P_IMMD, -1, hw); return r; } @@ -786,6 +802,9 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) #define CVTOP_SAT 0x08 #define CVTOP_ABS 0x10 +/* 0x04 == 32 bit */ +/* 0x40 == dst is float */ +/* 0x80 == src is float */ #define CVT_F32_F32 0xc4 #define CVT_F32_S32 0x44 #define CVT_F32_U32 0x64 @@ -795,7 +814,7 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) static void emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, - int wp, unsigned cop, unsigned fmt) + int wp, unsigned cvn, unsigned fmt) { struct nv50_program_exec *e; @@ -804,7 +823,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, e->inst[0] |= 0xa0000000; e->inst[1] |= 0x00004000; - e->inst[1] |= (cop << 16); + e->inst[1] |= (cvn << 16); e->inst[1] |= (fmt << 24); set_src_0(pc, src, e); @@ -821,49 +840,80 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, emit(pc, e); } +/* nv50 Condition codes: + * 0x1 = LT + * 0x2 = EQ + * 0x3 = LE + * 0x4 = GT + * 0x5 = NE + * 0x6 = GE + * 0x7 = set condition code ? (used before bra.lt/le/gt/ge) + * 0x8 = unordered bit (allows NaN) + */ static void -emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, +emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, struct nv50_reg *src0, struct nv50_reg *src1) { + static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + struct nv50_program_exec *e = exec(pc); - unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; struct nv50_reg *rdst; - assert(c_op <= 7); + assert(ccode < 16); if (check_swap_src_0_1(pc, &src0, &src1)) - c_op = inv_cop[c_op]; + ccode = cc_swapped[ccode & 7] | (ccode & 8); rdst = dst; - if (dst->type != P_TEMP) + if (dst && dst->type != P_TEMP) dst = alloc_temp(pc, NULL); /* set.u32 */ set_long(pc, e); e->inst[0] |= 0xb0000000; - e->inst[1] |= (3 << 29); - e->inst[1] |= (c_op << 14); - /*XXX: breaks things, .u32 by default? - * decuda will disasm as .u16 and use .lo/.hi regs, but this - * doesn't seem to match what the hw actually does. - inst[1] |= 0x04000000; << breaks things.. .u32 by default? + e->inst[1] |= 0x60000000 | (ccode << 14); + + /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but + * that doesn't seem to match what the hw actually does + e->inst[1] |= 0x04000000; << breaks things, u32 by default ? */ - set_dst(pc, dst, e); + + if (wp >= 0) + set_pred_wr(pc, 1, wp, e); + if (dst) + set_dst(pc, dst, e); + else { + e->inst[0] |= 0x000001fc; + e->inst[1] |= 0x00000008; + } + set_src_0(pc, src0, e); set_src_1(pc, src1, e); - emit(pc, e); - /* cvt.f32.u32 */ - e = exec(pc); - e->inst[0] = 0xa0000001; - e->inst[1] = 0x64014780; - set_dst(pc, rdst, e); - set_src_0(pc, dst, e); emit(pc, e); - if (dst != rdst) + /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ + if (rdst) + emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32); + if (rdst && rdst != dst) free_temp(pc, dst); } +static INLINE unsigned +map_tgsi_setop_cc(unsigned op) +{ + switch (op) { + case TGSI_OPCODE_SLT: return 0x1; + case TGSI_OPCODE_SGE: return 0x6; + case TGSI_OPCODE_SEQ: return 0x2; + case TGSI_OPCODE_SGT: return 0x4; + case TGSI_OPCODE_SLE: return 0x3; + case TGSI_OPCODE_SNE: return 0xd; + default: + assert(0); + return 0; + } +} + static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -890,6 +940,12 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); } +static INLINE void +emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); +} + static void emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, struct nv50_reg **src) @@ -1159,6 +1215,70 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) } } +/* Return a read mask for source registers deduced from opcode & write mask. */ +static unsigned +nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) +{ + unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); + case TGSI_OPCODE_DP3: + return 0x7; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_KIL: /* WriteMask ignored */ + return 0xf; + case TGSI_OPCODE_DST: + return mask & (c ? 0xa : 0x6); + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SCS: + return 0x1; + case TGSI_OPCODE_LIT: + return 0xb; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + { + const struct tgsi_instruction_ext_texture *tex; + + assert(insn->Instruction.Extended); + tex = &insn->InstructionExtTexture; + + mask = 0x7; + if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) + mask |= 0x8; + + switch (tex->Texture) { + case TGSI_TEXTURE_1D: + mask &= 0x9; + break; + case TGSI_TEXTURE_2D: + mask &= 0xb; + break; + default: + break; + } + } + return mask; + case TGSI_OPCODE_XPD: + x = 0; + if (mask & 1) x |= 0x6; + if (mask & 2) x |= 0x5; + if (mask & 4) x |= 0x3; + return x; + default: + break; + } + + return mask; +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -1258,93 +1378,126 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, return r; } -/* returns TRUE if instruction can overwrite sources before they're read */ +/* return TRUE for ops that produce only a single result */ static boolean -direct2dest_op(const struct tgsi_full_instruction *insn) +is_scalar_op(unsigned op) { - if (insn->Instruction.Saturate) - return FALSE; - - switch (insn->Instruction.Opcode) { + switch (op) { case TGSI_OPCODE_COS: + case TGSI_OPCODE_DP2: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: case TGSI_OPCODE_DPH: - case TGSI_OPCODE_KIL: - case TGSI_OPCODE_LIT: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SCS: case TGSI_OPCODE_SIN: + /* + case TGSI_OPCODE_KIL: + case TGSI_OPCODE_LIT: + case TGSI_OPCODE_SCS: + */ + return TRUE; + default: + return FALSE; + } +} + +/* Returns a bitmask indicating which dst components depend + * on source s, component c (reverse of nv50_tgsi_src_mask). + */ +static unsigned +nv50_tgsi_dst_revdep(unsigned op, int s, int c) +{ + if (is_scalar_op(op)) + return 0x1; + + switch (op) { + case TGSI_OPCODE_DST: + return (1 << c) & (s ? 0xa : 0x6); + case TGSI_OPCODE_XPD: + switch (c) { + case 0: return 0x6; + case 1: return 0x5; + case 2: return 0x3; + case 3: return 0x0; + default: + assert(0); + return 0x0; + } + case TGSI_OPCODE_LIT: + case TGSI_OPCODE_SCS: case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: - return FALSE; + /* these take care of dangerous swizzles themselves */ + return 0x0; + case TGSI_OPCODE_IF: + case TGSI_OPCODE_KIL: + /* don't call this function for these ops */ + assert(0); + return 0; default: - return TRUE; + /* linear vector instruction */ + return (1 << c); } } static boolean -nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +nv50_program_tx_insn(struct nv50_pc *pc, + const struct tgsi_full_instruction *inst) { - const struct tgsi_full_instruction *inst = &tok->FullInstruction; - struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; + struct nv50_reg *rdst[4], *dst[4], *brdc, *src[3][4], *temp; unsigned mask, sat, unit; - boolean assimilate = FALSE; int i, c; mask = inst->FullDstRegisters[0].DstRegister.WriteMask; sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; + memset(src, 0, sizeof(src)); + for (c = 0; c < 4; c++) { - if (mask & (1 << c)) + if ((mask & (1 << c)) && !pc->r_dst[c]) dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); else - dst[c] = NULL; - rdst[c] = NULL; - src[0][c] = NULL; - src[1][c] = NULL; - src[2][c] = NULL; + dst[c] = pc->r_dst[c]; + rdst[c] = dst[c]; } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + unsigned src_mask; + boolean neg_supp; + + src_mask = nv50_tgsi_src_mask(inst, i); + neg_supp = negate_supported(inst, i); if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) unit = fs->SrcRegister.Index; for (c = 0; c < 4; c++) - src[i][c] = tgsi_src(pc, c, fs, - negate_supported(inst, i)); + if (src_mask & (1 << c)) + src[i][c] = tgsi_src(pc, c, fs, neg_supp); } - if (sat) { - for (c = 0; c < 4; c++) { - rdst[c] = dst[c]; - dst[c] = temp_temp(pc); - } + brdc = temp = pc->r_brdc; + if (brdc && brdc->type != P_TEMP) { + temp = temp_temp(pc); + if (sat) + brdc = temp; } else - if (direct2dest_op(inst)) { + if (sat) { for (c = 0; c < 4; c++) { - if (!dst[c] || dst[c]->type != P_TEMP) + if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; - - for (i = c + 1; i < 4; i++) { - if (dst[c] == src[0][i] || - dst[c] == src[1][i] || - dst[c] == src[2][i]) - break; - } - if (i == 4) - continue; - - assimilate = TRUE; rdst[c] = dst[c]; - dst[c] = alloc_temp(pc, NULL); + dst[c] = temp_temp(pc); } } + assert(brdc || !is_scalar_op(inst->Instruction.Opcode)); + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: for (c = 0; c < 4; c++) { @@ -1360,74 +1513,56 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_add(pc, dst[c], src[0][c], src[1][c]); } break; - case TGSI_OPCODE_COS: - temp = temp_temp(pc); - emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 5, temp, temp); + case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_mov(pc, dst[c], temp); + emit_cvt(pc, dst[c], src[0][c], -1, + CVTOP_CEIL, CVT_F32_F32); + } + break; + case TGSI_OPCODE_COS: + if (mask & 8) { + emit_precossin(pc, temp, src[0][3]); + emit_flop(pc, 5, dst[3], temp); + if (!(mask &= 7)) + break; + if (temp == dst[3]) + temp = brdc = temp_temp(pc); } + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 5, brdc, temp); break; case TGSI_OPCODE_DP3: - temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); - emit_mad(pc, temp, src[0][2], src[1][2], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_mad(pc, brdc, src[0][2], src[1][2], temp); break; case TGSI_OPCODE_DP4: - temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); - emit_mad(pc, temp, src[0][3], src[1][3], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_mad(pc, brdc, src[0][3], src[1][3], temp); break; case TGSI_OPCODE_DPH: - temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); - emit_add(pc, temp, src[1][3], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_add(pc, brdc, src[1][3], temp); break; case TGSI_OPCODE_DST: - { - struct nv50_reg *one = alloc_immd(pc, 1.0); - if (mask & (1 << 0)) - emit_mov(pc, dst[0], one); if (mask & (1 << 1)) emit_mul(pc, dst[1], src[0][1], src[1][1]); if (mask & (1 << 2)) emit_mov(pc, dst[2], src[0][2]); if (mask & (1 << 3)) emit_mov(pc, dst[3], src[1][3]); - FREE(one); - } + if (mask & (1 << 0)) + emit_mov_immdval(pc, dst[0], 1.0f); break; case TGSI_OPCODE_EX2: - temp = temp_temp(pc); emit_preex2(pc, temp, src[0][0]); - emit_flop(pc, 6, temp, temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_flop(pc, 6, brdc, temp); break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -1450,19 +1585,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_kil(pc, src[0][1]); emit_kil(pc, src[0][2]); emit_kil(pc, src[0][3]); - pc->p->cfg.fp.regs[2] |= 0x00100000; break; case TGSI_OPCODE_LIT: emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - temp = temp_temp(pc); - emit_flop(pc, 3, temp, src[0][0]); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_flop(pc, 3, brdc, src[0][0]); break; case TGSI_OPCODE_LRP: temp = temp_temp(pc); @@ -1510,31 +1638,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_POW: - temp = temp_temp(pc); - emit_pow(pc, temp, src[0][0], src[1][0]); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: - for (c = 3; c >= 0; c--) { - if (!(mask & (1 << c))) - continue; - emit_flop(pc, 0, dst[c], src[0][0]); - } + emit_flop(pc, 0, brdc, src[0][0]); break; case TGSI_OPCODE_RSQ: - for (c = 3; c >= 0; c--) { - if (!(mask & (1 << c))) - continue; - emit_flop(pc, 2, dst[c], src[0][0]); - } + emit_flop(pc, 2, brdc, src[0][0]); break; case TGSI_OPCODE_SCS: temp = temp_temp(pc); - emit_precossin(pc, temp, src[0][0]); + if (mask & 3) + emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) emit_flop(pc, 5, dst[0], temp); if (mask & (1 << 1)) @@ -1544,28 +1659,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (mask & (1 << 3)) emit_mov_immdval(pc, dst[3], 1.0); break; - case TGSI_OPCODE_SGE: - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_set(pc, 6, dst[c], src[0][c], src[1][c]); - } - break; case TGSI_OPCODE_SIN: - temp = temp_temp(pc); - emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 4, temp, temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); + if (mask & 8) { + emit_precossin(pc, temp, src[0][3]); + emit_flop(pc, 4, dst[3], temp); + if (!(mask &= 7)) + break; + if (temp == dst[3]) + temp = brdc = temp_temp(pc); } + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 4, brdc, temp); break; case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: + i = map_tgsi_setop_cc(inst->Instruction.Opcode); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_set(pc, 1, dst[c], src[0][c], src[1][c]); + emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]); } break; case TGSI_OPCODE_SUB: @@ -1583,6 +1699,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_tex(pc, dst, mask, src[0], unit, inst->InstructionExtTexture.Texture, TRUE); break; + case TGSI_OPCODE_TRUNC: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVTOP_TRUNC, CVT_F32_F32); + } + break; case TGSI_OPCODE_XPD: temp = temp_temp(pc); if (mask & (1 << 0)) { @@ -1607,17 +1731,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return FALSE; } + if (brdc) { + if (sat) + emit_sat(pc, brdc, brdc); + for (c = 0; c < 4; c++) + if ((mask & (1 << c)) && dst[c] != brdc) + emit_mov(pc, dst[c], brdc); + } else if (sat) { for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, - CVT_F32_F32); + /* in this case we saturate later */ + if (dst[c]->type == P_TEMP && dst[c]->index < 0) + continue; + emit_sat(pc, rdst[c], dst[c]); } - } else if (assimilate) { - for (c = 0; c < 4; c++) - if (rdst[c]) - assimilate_temp(pc, rdst[c], dst[c]); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -1626,9 +1755,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) continue; if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) FREE(src[i][c]); - else - if (src[i][c]->acc == pc->insn_cur) - release_hw(pc, src[i][c]); } } @@ -1636,180 +1762,271 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return TRUE; } -/* Adjust a bitmask that indicates what components of a source are used, - * we use this in tx_prep so we only load interpolants that are needed. - */ static void -insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask) +prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) { - const struct tgsi_instruction_ext_texture *tex; - - switch (insn->Instruction.Opcode) { - case TGSI_OPCODE_DP3: - *mask = 0x7; - break; - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - *mask = 0xF; - break; - case TGSI_OPCODE_LIT: - *mask = 0xB; - break; - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - *mask = 0x1; - break; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXP: - assert(insn->Instruction.Extended); - tex = &insn->InstructionExtTexture; - - *mask = 0x7; - if (tex->Texture == TGSI_TEXTURE_1D) - *mask = 0x1; - else - if (tex->Texture == TGSI_TEXTURE_2D) - *mask = 0x3; - - if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) - *mask |= 0x8; - break; - default: - break; - } -} - -static void -prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok, - unsigned *r_usage[2]) -{ - const struct tgsi_full_instruction *insn; + struct nv50_reg *reg = NULL; const struct tgsi_full_src_register *src; const struct tgsi_dst_register *dst; + unsigned i, c, k, mask; - unsigned i, c, k, n, mask, *acc_p; - - insn = &tok->FullInstruction; dst = &insn->FullDstRegisters[0].DstRegister; mask = dst->WriteMask; - if (!r_usage[0]) - r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned)); - if (!r_usage[1]) - r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned)); + if (dst->File == TGSI_FILE_TEMPORARY) + reg = pc->temp; + else + if (dst->File == TGSI_FILE_OUTPUT) + reg = pc->result; - if (dst->File == TGSI_FILE_TEMPORARY) { + if (reg) { for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - r_usage[0][dst->Index * 4 + c] = pc->insn_nr; + reg[dst->Index * 4 + c].acc = pc->insn_nr; } } for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { src = &insn->FullSrcRegisters[i]; - switch (src->SrcRegister.File) { - case TGSI_FILE_TEMPORARY: - acc_p = r_usage[0]; - break; - case TGSI_FILE_INPUT: - acc_p = r_usage[1]; - break; - default: + if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) + reg = pc->temp; + else + if (src->SrcRegister.File == TGSI_FILE_INPUT) + reg = pc->attr; + else continue; - } - insn_adjust_mask(insn, &mask); + mask = nv50_tgsi_src_mask(insn, i); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - k = tgsi_util_get_full_src_register_extswizzle(src, c); - switch (k) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - n = src->SrcRegister.Index * 4 + k; - acc_p[n] = pc->insn_nr; - break; - default: - break; - } + + if (k > TGSI_EXTSWIZZLE_W) + continue; + + reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr; } } } +/* Returns a bitmask indicating which dst components need to be + * written to temporaries first to avoid 'corrupting' sources. + * + * m[i] (out) indicate component to write in the i-th position + * rdep[c] (in) bitmasks of dst[i] that require dst[c] as source + */ static unsigned -load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid, - int *aid, int *p_oid) +nv50_revdep_reorder(unsigned m[4], unsigned rdep[4]) { - struct nv50_reg *iv; - int oid, c, n; - unsigned mask = 0; + unsigned i, c, x, unsafe; - iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p; + for (c = 0; c < 4; c++) + m[c] = c; - for (c = 0, n = i * 4; c < 4; c++, n++) { - oid = (*p_oid)++; - pc->attr[n].type = P_TEMP; - pc->attr[n].index = i; + /* Swap as long as a dst component written earlier is depended on + * by one written later, but the next one isn't depended on by it. + */ + for (c = 0; c < 3; c++) { + if (rdep[m[c + 1]] & (1 << m[c])) + continue; /* if next one is depended on by us */ + for (i = c + 1; i < 4; i++) + /* if we are depended on by a later one */ + if (rdep[m[c]] & (1 << m[i])) + break; + if (i == 4) + continue; + /* now, swap */ + x = m[c]; + m[c] = m[c + 1]; + m[c + 1] = x; + + /* restart */ + c = 0; + } + + /* mark dependencies that could not be resolved by reordering */ + for (i = 0; i < 3; ++i) + for (c = i + 1; c < 4; ++c) + if (rdep[m[i]] & (1 << m[c])) + unsafe |= (1 << i); + + /* NOTE: $unsafe is with respect to order, not component */ + return unsafe; +} - if (pc->attr[n].acc == acc[n]) +/* Select a suitable dst register for broadcasting scalar results, + * or return NULL if we have to allocate an extra TEMP. + * + * If e.g. only 1 component is written, we may also emit the final + * result to a write-only register. + */ +static struct nv50_reg * +tgsi_broadcast_dst(struct nv50_pc *pc, + const struct tgsi_full_dst_register *fd, unsigned mask) +{ + if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) { + int c = ffs(~mask & fd->DstRegister.WriteMask); + if (c) + return tgsi_dst(pc, c - 1, fd); + } else { + int c = ffs(fd->DstRegister.WriteMask) - 1; + if ((1 << c) == fd->DstRegister.WriteMask) + return tgsi_dst(pc, c, fd); + } + + return NULL; +} + +/* Scan source swizzles and return a bitmask indicating dst regs that + * also occur among the src regs, and fill rdep for nv50_revdep_reoder. + */ +static unsigned +nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, + unsigned rdep[4]) +{ + const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0]; + const struct tgsi_full_src_register *fs; + unsigned i, deqs = 0; + + for (i = 0; i < 4; ++i) + rdep[i] = 0; + + for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { + unsigned chn, mask = nv50_tgsi_src_mask(insn, i); + boolean neg_supp = negate_supported(insn, i); + + fs = &insn->FullSrcRegisters[i]; + if (fs->SrcRegister.File != fd->DstRegister.File || + fs->SrcRegister.Index != fd->DstRegister.Index) continue; - mask |= (1 << c); - pc->attr[n].acc = acc[n]; - pc->attr[n].rhw = pc->attr[n].hw = -1; - alloc_reg(pc, &pc->attr[n]); + for (chn = 0; chn < 4; ++chn) { + unsigned s, c; + + if (!(mask & (1 << chn))) /* src is not read */ + continue; + c = tgsi_util_get_full_src_register_extswizzle(fs, chn); + s = tgsi_util_get_full_src_register_sign_mode(fs, chn); - pc->attr[n].rhw = (*aid)++; - emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]); + if (c > TGSI_EXTSWIZZLE_W || + !(fd->DstRegister.WriteMask & (1 << c))) + continue; - pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4)); - (*mid)++; - pc->p->cfg.fp.regs[1] += 0x00010001; + /* no danger if src is copied to TEMP first */ + if ((s != TGSI_UTIL_SIGN_KEEP) && + (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) + continue; + + rdep[c] |= nv50_tgsi_dst_revdep( + insn->Instruction.Opcode, i, chn); + deqs |= (1 << c); + } } - return mask; + return deqs; } static boolean -nv50_program_tx_prep(struct nv50_pc *pc) +nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { - struct tgsi_parse_context p; - boolean ret = FALSE; - unsigned i, c; - unsigned fcol, bcol, fcrd, depr; + struct tgsi_full_instruction insn = tok->FullInstruction; + const struct tgsi_full_dst_register *fd; + unsigned i, deqs, rdep[4], m[4]; + + fd = &tok->FullInstruction.FullDstRegisters[0]; + deqs = nv50_tgsi_scan_swizzle(&insn, rdep); + + if (is_scalar_op(insn.Instruction.Opcode)) { + pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); + if (!pc->r_brdc) + pc->r_brdc = temp_temp(pc); + return nv50_program_tx_insn(pc, &insn); + } + pc->r_brdc = NULL; + + if (!deqs) + return nv50_program_tx_insn(pc, &insn); + + deqs = nv50_revdep_reorder(m, rdep); - /* count (centroid) perspective interpolations */ - unsigned centroid_loads = 0; - unsigned perspect_loads = 0; + for (i = 0; i < 4; ++i) { + assert(pc->r_dst[m[i]] == NULL); - /* track register access for temps and attrs */ - unsigned *r_usage[2]; - r_usage[0] = NULL; - r_usage[1] = NULL; + insn.FullDstRegisters[0].DstRegister.WriteMask = + fd->DstRegister.WriteMask & (1 << m[i]); - depr = fcol = bcol = fcrd = 0xffff; + if (!insn.FullDstRegisters[0].DstRegister.WriteMask) + continue; + + if (deqs & (1 << i)) + pc->r_dst[m[i]] = alloc_temp(pc, NULL); - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - pc->p->cfg.fp.regs[0] = 0x01000404; - pc->p->cfg.fp.regs[1] = 0x00000400; + if (!nv50_program_tx_insn(pc, &insn)) + return FALSE; } - tgsi_parse_init(&p, pc->p->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; + for (i = 0; i < 4; i++) { + struct nv50_reg *reg = pc->r_dst[i]; + if (!reg) + continue; + pc->r_dst[i] = NULL; + + if (insn.Instruction.Saturate == TGSI_SAT_ZERO_ONE) + emit_sat(pc, tgsi_dst(pc, i, fd), reg); + else + emit_mov(pc, tgsi_dst(pc, i, fd), reg); + free_temp(pc, reg); + } - tgsi_parse_token(&p); + return TRUE; +} + +static void +load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) +{ + struct nv50_reg *iv, **ppiv; + unsigned mode = pc->interp_mode[reg->index]; + + ppiv = (mode & INTERP_CENTROID) ? &pc->iv_c : &pc->iv_p; + iv = *ppiv; + + if ((mode & INTERP_PERSPECTIVE) && !iv) { + iv = *ppiv = alloc_temp(pc, NULL); + iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; + + emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); + emit_flop(pc, 0, iv, iv); + + /* XXX: when loading interpolants dynamically, move these + * to the program head, or make sure it can't be skipped. + */ + } + + emit_interp(pc, reg, iv, mode); +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ + struct tgsi_parse_context tp; + struct nv50_program *p = pc->p; + boolean ret = FALSE; + unsigned i, c, flat_nr = 0; + + tgsi_parse_init(&tp, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&tp)) { + const union tgsi_full_token *tok = &tp.FullToken; + + tgsi_parse_token(&tp); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: { const struct tgsi_full_immediate *imm = - &p.FullToken.FullImmediate; + &tp.FullToken.FullImmediate; ctor_immd(pc, imm->u[0].Float, imm->u[1].Float, @@ -1820,78 +2037,61 @@ nv50_program_tx_prep(struct nv50_pc *pc) case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *d; - unsigned last, first, mode; + unsigned si, last, first, mode; - d = &p.FullToken.FullDeclaration; + d = &tp.FullToken.FullDeclaration; first = d->DeclarationRange.First; last = d->DeclarationRange.Last; switch (d->Declaration.File) { case TGSI_FILE_TEMPORARY: - if (pc->temp_nr < (last + 1)) - pc->temp_nr = last + 1; break; case TGSI_FILE_OUTPUT: - if (pc->result_nr < (last + 1)) - pc->result_nr = last + 1; - - if (!d->Declaration.Semantic) + if (!d->Declaration.Semantic || + p->type == PIPE_SHADER_FRAGMENT) break; + si = d->Semantic.SemanticIndex; switch (d->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - depr = first; - pc->p->cfg.fp.regs[2] |= 0x00000100; - pc->p->cfg.fp.regs[3] |= 0x00000011; + case TGSI_SEMANTIC_BCOLOR: + p->cfg.two_side[si].hw = first; + if (p->cfg.io_nr > first) + p->cfg.io_nr = first; + break; + case TGSI_SEMANTIC_PSIZE: + p->cfg.psiz = first; + if (p->cfg.io_nr > first) + p->cfg.io_nr = first; + break; + /* + case TGSI_SEMANTIC_CLIP_DISTANCE: + p->cfg.clpd = MIN2(p->cfg.clpd, first); break; + */ default: break; } - break; case TGSI_FILE_INPUT: { - if (pc->attr_nr < (last + 1)) - pc->attr_nr = last + 1; - - if (pc->p->type != PIPE_SHADER_FRAGMENT) + if (p->type != PIPE_SHADER_FRAGMENT) break; switch (d->Declaration.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: mode = INTERP_FLAT; + flat_nr++; break; case TGSI_INTERPOLATE_PERSPECTIVE: mode = INTERP_PERSPECTIVE; + p->cfg.regs[1] |= 0x08 << 24; break; default: mode = INTERP_LINEAR; break; } - - if (d->Declaration.Semantic) { - switch (d->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - fcrd = first; - break; - case TGSI_SEMANTIC_COLOR: - fcol = first; - mode = INTERP_PERSPECTIVE; - break; - case TGSI_SEMANTIC_BCOLOR: - bcol = first; - mode = INTERP_PERSPECTIVE; - break; - } - } - - if (d->Declaration.Centroid) { + if (d->Declaration.Centroid) mode |= INTERP_CENTROID; - if (mode & INTERP_PERSPECTIVE) - centroid_loads++; - } else - if (mode & INTERP_PERSPECTIVE) - perspect_loads++; assert(last < 32); for (i = first; i <= last; i++) @@ -1899,8 +2099,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) } break; case TGSI_FILE_CONSTANT: - if (pc->param_nr < (last + 1)) - pc->param_nr = last + 1; break; case TGSI_FILE_SAMPLER: break; @@ -1913,182 +2111,155 @@ nv50_program_tx_prep(struct nv50_pc *pc) break; case TGSI_TOKEN_TYPE_INSTRUCTION: pc->insn_nr++; - prep_inspect_insn(pc, tok, r_usage); + prep_inspect_insn(pc, &tok->FullInstruction); break; default: break; } } - if (pc->temp_nr) { - pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); - if (!pc->temp) - goto out_err; + if (p->type == PIPE_SHADER_VERTEX) { + int rid = 0; - for (i = 0; i < pc->temp_nr; i++) { - for (c = 0; c < 4; c++) { - pc->temp[i*4+c].type = P_TEMP; - pc->temp[i*4+c].hw = -1; - pc->temp[i*4+c].rhw = -1; - pc->temp[i*4+c].index = i; - pc->temp[i*4+c].acc = r_usage[0][i*4+c]; + for (i = 0; i < pc->attr_nr * 4; ++i) { + if (pc->attr[i].acc) { + pc->attr[i].hw = rid++; + p->cfg.attr[i / 32] |= 1 << (i % 32); } } - } - - if (pc->attr_nr) { - int oid = 4, mid = 4, aid = 0; - /* oid = VP output id - * aid = FP attribute/interpolant id - * mid = VP output mapping field ID - */ - pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); - if (!pc->attr) - goto out_err; + for (i = 0, rid = 0; i < pc->result_nr; ++i) { + p->cfg.io[i].hw = rid; + p->cfg.io[i].id_vp = i; - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - /* position should be loaded first */ - if (fcrd != 0xffff) { - unsigned mask; - mid = 0; - mask = load_fp_attrib(pc, fcrd, r_usage[1], - &mid, &aid, &oid); - oid = 0; - pc->p->cfg.fp.regs[1] |= (mask << 24); - pc->p->cfg.fp.map[0] = 0x04040404 * fcrd; - } - pc->p->cfg.fp.map[0] += 0x03020100; - - /* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */ - - if (perspect_loads) { - pc->iv_p = alloc_temp(pc, NULL); - - if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) { - pc->p->cfg.fp.regs[1] |= 0x08000000; - pc->iv_p->rhw = aid++; - emit_interp(pc, pc->iv_p, NULL, - INTERP_LINEAR); - emit_flop(pc, 0, pc->iv_p, pc->iv_p); - } else { - pc->iv_p->rhw = aid - 1; - emit_flop(pc, 0, pc->iv_p, - &pc->attr[fcrd * 4 + 3]); - } + for (c = 0; c < 4; ++c) { + int n = i * 4 + c; + if (!pc->result[n].acc) + continue; + pc->result[n].hw = rid++; + p->cfg.io[i].mask |= 1 << c; } + } - if (centroid_loads) { - pc->iv_c = alloc_temp(pc, NULL); - pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++; - emit_interp(pc, pc->iv_c, NULL, - INTERP_CENTROID); - emit_flop(pc, 0, pc->iv_c, pc->iv_c); - pc->p->cfg.fp.regs[1] |= 0x08000000; - } + for (c = 0; c < 2; ++c) + if (p->cfg.two_side[c].hw < 0x40) + p->cfg.two_side[c] = p->cfg.io[ + p->cfg.two_side[c].hw]; - for (c = 0; c < 4; c++) { - /* I don't know what these values do, but - * let's set them like the blob does: - */ - if (fcol != 0xffff && r_usage[1][fcol * 4 + c]) - pc->p->cfg.fp.regs[0] += 0x00010000; - if (bcol != 0xffff && r_usage[1][bcol * 4 + c]) - pc->p->cfg.fp.regs[0] += 0x00010000; - } - - for (i = 0; i < pc->attr_nr; i++) - load_fp_attrib(pc, i, r_usage[1], - &mid, &aid, &oid); + if (p->cfg.psiz < 0x40) + p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; + } else + if (p->type == PIPE_SHADER_FRAGMENT) { + int rid, aid; + unsigned n = 0, m = pc->attr_nr - flat_nr; - if (pc->iv_p) - free_temp(pc, pc->iv_p); - if (pc->iv_c) - free_temp(pc, pc->iv_c); + int base = (TGSI_SEMANTIC_POSITION == + p->info.input_semantic_name[0]) ? 0 : 1; - pc->p->cfg.fp.high_map = (mid / 4); - pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0); - } else { - /* vertex program */ - for (i = 0; i < pc->attr_nr * 4; i++) { - pc->p->cfg.vp.attr[aid / 32] |= - (1 << (aid % 32)); - pc->attr[i].type = P_ATTR; - pc->attr[i].hw = aid++; - pc->attr[i].index = i / 4; + /* non-flat interpolants have to be mapped to + * the lower hardware IDs, so sort them: + */ + for (i = 0; i < pc->attr_nr; i++) { + if (pc->interp_mode[i] == INTERP_FLAT) { + p->cfg.io[m].id_vp = i + base; + p->cfg.io[m++].id_fp = i; + } else { + if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) + p->cfg.io[n].linear = TRUE; + p->cfg.io[n].id_vp = i + base; + p->cfg.io[n++].id_fp = i; } } - } - if (pc->result_nr) { - int rid = 0; + if (!base) /* set w-coordinate mask from perspective interp */ + p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; - pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); - if (!pc->result) - goto out_err; + aid = popcnt4( /* if fcrd isn't contained in cfg.io */ + base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); - for (i = 0; i < pc->result_nr; i++) { - for (c = 0; c < 4; c++) { - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - pc->result[i*4+c].type = P_TEMP; - pc->result[i*4+c].hw = -1; - pc->result[i*4+c].rhw = (i == depr) ? - -1 : rid++; - } else { - pc->result[i*4+c].type = P_RESULT; - pc->result[i*4+c].hw = rid++; - } - pc->result[i*4+c].index = i; - } + for (n = 0; n < pc->attr_nr; ++n) { + p->cfg.io[n].hw = rid = aid; + i = p->cfg.io[n].id_fp; + + for (c = 0; c < 4; ++c) { + if (!pc->attr[i * 4 + c].acc) + continue; + pc->attr[i * 4 + c].rhw = rid++; + p->cfg.io[n].mask |= 1 << c; - if (pc->p->type == PIPE_SHADER_FRAGMENT && - depr != 0xffff) { - pc->result[depr * 4 + 2].rhw = - (pc->result_nr - 1) * 4; + load_interpolant(pc, &pc->attr[i * 4 + c]); } + aid += popcnt4(p->cfg.io[n].mask); } - } - if (pc->param_nr) { - int rid = 0; + if (!base) + p->cfg.regs[1] |= p->cfg.io[0].mask << 24; - pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); - if (!pc->param) - goto out_err; + m = popcnt4(p->cfg.regs[1] >> 24); - for (i = 0; i < pc->param_nr; i++) { - for (c = 0; c < 4; c++) { - pc->param[i*4+c].type = P_CONST; - pc->param[i*4+c].hw = rid++; - pc->param[i*4+c].index = i; + /* set count of non-position inputs and of non-flat + * non-position inputs for FP_INTERPOLANT_CTRL + */ + p->cfg.regs[1] |= aid - m; + + if (flat_nr) { + i = p->cfg.io[pc->attr_nr - flat_nr].hw; + p->cfg.regs[1] |= (i - m) << 16; + } else + p->cfg.regs[1] |= p->cfg.regs[1] << 16; + + /* mark color semantic for light-twoside */ + n = 0x40; + for (i = 0; i < pc->attr_nr; i++) { + ubyte si, sn; + + sn = p->info.input_semantic_name[p->cfg.io[i].id_fp]; + si = p->info.input_semantic_index[p->cfg.io[i].id_fp]; + + if (sn == TGSI_SEMANTIC_COLOR) { + p->cfg.two_side[si] = p->cfg.io[i]; + + /* increase colour count */ + p->cfg.regs[0] += popcnt4( + p->cfg.two_side[si].mask) << 16; + + n = MIN2(n, p->cfg.io[i].hw - m); } } + if (n < 0x40) + p->cfg.regs[0] += n; + + /* Initialize FP results: + * FragDepth is always first TGSI and last hw output + */ + i = p->info.writes_z ? 4 : 0; + for (rid = 0; i < pc->result_nr * 4; i++) + pc->result[i].rhw = rid++; + if (p->info.writes_z) + pc->result[2].rhw = rid; } if (pc->immd_nr) { int rid = 0; - pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); + pc->immd = MALLOC(pc->immd_nr * 4 * sizeof(struct nv50_reg)); if (!pc->immd) goto out_err; for (i = 0; i < pc->immd_nr; i++) { - for (c = 0; c < 4; c++) { - pc->immd[i*4+c].type = P_IMMD; - pc->immd[i*4+c].hw = rid++; - pc->immd[i*4+c].index = i; - } + for (c = 0; c < 4; c++, rid++) + ctor_reg(&pc->immd[rid], P_IMMD, i, rid); } } ret = TRUE; out_err: - if (r_usage[0]) - FREE(r_usage[0]); - if (r_usage[1]) - FREE(r_usage[1]); + if (pc->iv_p) + free_temp(pc, pc->iv_p); + if (pc->iv_c) + free_temp(pc, pc->iv_c); - tgsi_parse_free(&p); + tgsi_parse_free(&tp); return ret; } @@ -2110,6 +2281,88 @@ free_nv50_pc(struct nv50_pc *pc) } static boolean +ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) +{ + int i, c; + unsigned rtype[2] = { P_ATTR, P_RESULT }; + + pc->p = p; + pc->temp_nr = p->info.file_max[TGSI_FILE_TEMPORARY] + 1; + pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1; + pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1; + pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; + + p->cfg.high_temp = 4; + + p->cfg.two_side[0].hw = 0x40; + p->cfg.two_side[1].hw = 0x40; + + switch (p->type) { + case PIPE_SHADER_VERTEX: + p->cfg.psiz = 0x40; + p->cfg.clpd = 0x40; + p->cfg.io_nr = pc->result_nr; + break; + case PIPE_SHADER_FRAGMENT: + rtype[0] = rtype[1] = P_TEMP; + + p->cfg.regs[0] = 0x01000004; + p->cfg.io_nr = pc->attr_nr; + + if (p->info.writes_z) { + p->cfg.regs[2] |= 0x00000100; + p->cfg.regs[3] |= 0x00000011; + } + if (p->info.uses_kill) + p->cfg.regs[2] |= 0x00100000; + break; + } + + if (pc->temp_nr) { + pc->temp = MALLOC(pc->temp_nr * 4 * sizeof(struct nv50_reg)); + if (!pc->temp) + return FALSE; + + for (i = 0; i < pc->temp_nr * 4; ++i) + ctor_reg(&pc->temp[i], P_TEMP, i / 4, -1); + } + + if (pc->attr_nr) { + pc->attr = MALLOC(pc->attr_nr * 4 * sizeof(struct nv50_reg)); + if (!pc->attr) + return FALSE; + + for (i = 0; i < pc->attr_nr * 4; ++i) + ctor_reg(&pc->attr[i], rtype[0], i / 4, -1); + } + + if (pc->result_nr) { + unsigned nr = pc->result_nr * 4; + + pc->result = MALLOC(nr * sizeof(struct nv50_reg)); + if (!pc->result) + return FALSE; + + for (i = 0; i < nr; ++i) + ctor_reg(&pc->result[i], rtype[1], i / 4, -1); + } + + if (pc->param_nr) { + int rid = 0; + + pc->param = MALLOC(pc->param_nr * 4 * sizeof(struct nv50_reg)); + if (!pc->param) + return FALSE; + + for (i = 0; i < pc->param_nr; ++i) + for (c = 0; c < 4; ++c, ++rid) + ctor_reg(&pc->param[rid], P_CONST, i, rid); + } + + return TRUE; +} + +static boolean nv50_program_tx(struct nv50_program *p) { struct tgsi_parse_context parse; @@ -2120,8 +2373,10 @@ nv50_program_tx(struct nv50_program *p) pc = CALLOC_STRUCT(nv50_pc); if (!pc) return FALSE; - pc->p = p; - pc->p->cfg.high_temp = 4; + + ret = ctor_nv50_pc(pc, p); + if (ret == FALSE) + goto out_cleanup; ret = nv50_program_tx_prep(pc); if (ret == FALSE) @@ -2141,7 +2396,7 @@ nv50_program_tx(struct nv50_program *p) switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: ++pc->insn_cur; - ret = nv50_program_tx_insn(pc, tok); + ret = nv50_tgsi_insn(pc, tok); if (ret == FALSE) goto out_err; break; @@ -2152,8 +2407,8 @@ nv50_program_tx(struct nv50_program *p) if (p->type == PIPE_SHADER_FRAGMENT) { struct nv50_reg out; + ctor_reg(&out, P_TEMP, -1, -1); - out.type = P_TEMP; for (k = 0; k < pc->result_nr * 4; k++) { if (pc->result[k].rhw == -1) continue; @@ -2258,30 +2513,19 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) p->immd_nr, NV50_CB_PMISC); } - if (!p->data[1] && p->param_nr) { - struct nouveau_resource *heap = - nv50->screen->parm_heap[p->type]; - - if (nouveau_resource_alloc(heap, p->param_nr, p, &p->data[1])) { - while (heap->next && heap->size < p->param_nr) { - struct nv50_program *evict = heap->next->priv; - nouveau_resource_free(&evict->data[1]); - } - - if (nouveau_resource_alloc(heap, p->param_nr, p, - &p->data[1])) - assert(0); - } - } + assert(p->param_nr <= 128); if (p->param_nr) { - unsigned cbuf = NV50_CB_PVP; + unsigned cb; float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - if (p->type == PIPE_SHADER_FRAGMENT) - cbuf = NV50_CB_PFP; - nv50_program_upload_data(nv50, map, p->data[1]->start, - p->param_nr, cbuf); + + if (p->type == PIPE_SHADER_VERTEX) + cb = NV50_CB_PVP; + else + cb = NV50_CB_PFP; + + nv50_program_upload_data(nv50, map, 0, p->param_nr, cb); pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]); } } @@ -2303,32 +2547,30 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) upload = TRUE; } - if ((p->data[0] && p->data[0]->start != p->data_start[0]) || - (p->data[1] && p->data[1]->start != p->data_start[1])) { - for (e = p->exec_head; e; e = e->next) { - unsigned ei, ci, bs; - - if (e->param.index < 0) - continue; - bs = (e->inst[1] >> 22) & 0x07; - assert(bs < 2); - ei = e->param.shift >> 5; - ci = e->param.index + p->data[bs]->start; + if (p->data[0] && p->data[0]->start != p->data_start[0]) + upload = TRUE; - e->inst[ei] &= ~e->param.mask; - e->inst[ei] |= (ci << e->param.shift); - } + if (!upload) + return; - if (p->data[0]) - p->data_start[0] = p->data[0]->start; - if (p->data[1]) - p->data_start[1] = p->data[1]->start; + for (e = p->exec_head; e; e = e->next) { + unsigned ei, ci, bs; - upload = TRUE; + if (e->param.index < 0) + continue; + bs = (e->inst[1] >> 22) & 0x07; + assert(bs < 2); + ei = e->param.shift >> 5; + ci = e->param.index; + if (bs == 0) + ci += p->data[bs]->start; + + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); } - if (!upload) - return; + if (p->data[0]) + p->data_start[0] = p->data[0]->start; #ifdef NV50_PROGRAM_DUMP NOUVEAU_ERR("-------\n"); @@ -2402,8 +2644,8 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); - so_data (so, p->cfg.vp.attr[0]); - so_data (so, p->cfg.vp.attr[1]); + so_data (so, p->cfg.attr[0]); + so_data (so, p->cfg.attr[1]); so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, p->cfg.high_result); so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); @@ -2421,7 +2663,6 @@ nv50_fragprog_validate(struct nv50_context *nv50) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *p = nv50->fragprog; struct nouveau_stateobj *so; - unsigned i; if (!p->translated) { nv50_program_validate(nv50, p); @@ -2438,29 +2679,186 @@ nv50_fragprog_validate(struct nv50_context *nv50) NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); - so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */ - so_data (so, 0x00000004); - so_data (so, 0x00000000); - so_data (so, 0x00000000); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), p->cfg.fp.high_map); - for (i = 0; i < p->cfg.fp.high_map; i++) - so_data(so, p->cfg.fp.map[i]); - so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 2); - so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */ + so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1); so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); - so_data (so, p->cfg.fp.regs[2]); + so_data (so, p->cfg.regs[2]); so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); - so_data (so, p->cfg.fp.regs[3]); + so_data (so, p->cfg.regs[3]); so_method(so, tesla, NV50TCL_FP_START_ID, 1); so_data (so, 0); /* program start offset */ so_ref(so, &nv50->state.fragprog); so_ref(NULL, &so); } +static void +nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) +{ + struct nv50_program *fp = nv50->fragprog; + struct nv50_program *vp = nv50->vertprog; + unsigned i, c, m = base; + + /* XXX: This can't work correctly in all cases yet, we either + * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has + * to be per FP input instead of per VP output + */ + memset(pntc, 0, 8 * sizeof(uint32_t)); + + for (i = 0; i < fp->cfg.io_nr; i++) { + uint8_t sn, si; + uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp; + unsigned n = popcnt4(fp->cfg.io[i].mask); + + if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { + m += n; + continue; + } + + sn = vp->info.input_semantic_name[j]; + si = vp->info.input_semantic_index[j]; + + if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) { + ubyte mode = + nv50->rasterizer->pipe.sprite_coord_mode[si]; + + if (mode == PIPE_SPRITE_COORD_NONE) { + m += n; + continue; + } + } + + /* this is either PointCoord or replaced by sprite coords */ + for (c = 0; c < 4; c++) { + if (!(fp->cfg.io[i].mask & (1 << c))) + continue; + pntc[m / 8] |= (c + 1) << ((m % 8) * 4); + ++m; + } + } +} + +static int +nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], + struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) +{ + int c; + uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; + uint8_t *map = (uint8_t *)p_map; + + for (c = 0; c < 4; ++c) { + if (mf & 1) { + if (fpi->linear == TRUE) + lin[mid / 32] |= 1 << (mid % 32); + map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); + } + + oid += mv & 1; + mf >>= 1; + mv >>= 1; + } + + return mid; +} + +void +nv50_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *fp = nv50->fragprog; + struct nouveau_stateobj *so; + struct nv50_sreg4 dummy, *vpo; + int i, n, c, m = 0; + uint32_t map[16], lin[4], reg[5], pcrd[8]; + + memset(map, 0, sizeof(map)); + memset(lin, 0, sizeof(lin)); + + reg[1] = 0x00000004; /* low and high clip distance map ids */ + reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ + reg[3] = 0x00000000; /* point size map id & enable */ + reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ + reg[4] = fp->cfg.regs[1]; /* interpolant info */ + + dummy.linear = FALSE; + dummy.mask = 0xf; /* map all components of HPOS */ + m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); + + dummy.mask = 0x0; + + if (vp->cfg.clpd < 0x40) { + for (c = 0; c < vp->cfg.clpd_nr; ++c) + map[m++] = vp->cfg.clpd + c; + reg[1] = (m << 8); + } + + reg[0] |= m << 8; /* adjust BFC0 id */ + + /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ + if (nv50->rasterizer->pipe.light_twoside) { + vpo = &vp->cfg.two_side[0]; + + m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); + m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); + } + + reg[0] += m - 4; /* adjust FFC0 id */ + reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ + + i = 0; + if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) + i = 1; + for (; i < fp->cfg.io_nr; i++) { + ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp]; + ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp]; + + n = fp->cfg.io[i].id_vp; + if (n >= vp->cfg.io_nr || + vp->info.output_semantic_name[n] != sn || + vp->info.output_semantic_index[n] != si) + vpo = &dummy; + else + vpo = &vp->cfg.io[n]; + + m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); + } + + if (nv50->rasterizer->pipe.point_size_per_vertex) { + map[m / 4] |= vp->cfg.psiz << ((m % 4) * 8); + reg[3] = (m++ << 4) | 1; + } + + /* now fill the stateobj */ + so = so_new(64, 0); + + n = (m + 3) / 4; + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); + so_datap (so, map, n); + + so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); + so_datap (so, reg, 4); + + so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); + so_data (so, reg[4]); + + so_method(so, tesla, 0x1540, 4); + so_datap (so, lin, 4); + + if (nv50->rasterizer->pipe.point_sprite) { + nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + + so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); + so_datap (so, pcrd, 8); + } + + so_ref(so, &nv50->state.programs); + so_ref(NULL, &so); +} + void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { @@ -2476,7 +2874,6 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) nouveau_bo_ref(NULL, &p->bo); nouveau_resource_free(&p->data[0]); - nouveau_resource_free(&p->data[1]); p->translated = 0; } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 096e0476aab..d78dee083f1 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -15,6 +15,15 @@ struct nv50_program_exec { } param; }; +struct nv50_sreg4 { + uint8_t hw; + uint8_t id_vp; + uint8_t id_fp; + + uint8_t mask; + boolean linear; +}; + struct nv50_program { struct pipe_shader_state pipe; struct tgsi_shader_info info; @@ -24,8 +33,8 @@ struct nv50_program { struct nv50_program_exec *exec_head; struct nv50_program_exec *exec_tail; unsigned exec_size; - struct nouveau_resource *data[2]; - unsigned data_start[2]; + struct nouveau_resource *data[1]; + unsigned data_start[1]; struct nouveau_bo *bo; @@ -36,14 +45,20 @@ struct nv50_program { struct { unsigned high_temp; unsigned high_result; - struct { - unsigned attr[2]; - } vp; - struct { - unsigned regs[4]; - unsigned map[5]; - unsigned high_map; - } fp; + + uint32_t attr[2]; + uint32_t regs[4]; + + /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */ + unsigned io_nr; + struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS]; + + /* FP colour inputs, VP/GP back colour outputs */ + struct nv50_sreg4 two_side[2]; + + /* VP only */ + uint8_t clpd, clpd_nr; + uint8_t psiz; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index c7f80a22037..7adaaaa135b 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -92,7 +92,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: - return 0; + return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; case PIPE_CAP_OCCLUSION_QUERY: diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 4283808ed93..81fa3e34c59 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -276,6 +276,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, 0x1684, 1); so_data (so, cso->flatshade_first ? 0 : 1); + so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); + so_data (so, cso->light_twoside); + so_method(so, tesla, NV50TCL_LINE_WIDTH, 1); so_data (so, fui(cso->line_width)); so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); @@ -294,6 +297,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_POINT_SIZE, 1); so_data (so, fui(cso->point_size)); + so_method(so, tesla, NV50TCL_POINT_SPRITE_ENABLE, 1); + so_data (so, cso->point_sprite); + so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3); if (cso->front_winding == PIPE_WINDING_CCW) { so_data(so, nvgl_polygon_mode(cso->fill_ccw)); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 344c2cf6dde..d307a987453 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -66,7 +66,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1224, 1); @@ -110,7 +111,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1538, 1); @@ -187,6 +189,8 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); + if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + so_emit(chan, nv50->state.programs); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -238,6 +242,9 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + nv50_linkage_validate(nv50); + if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index b266324f58d..6bf6f773b0c 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -60,13 +60,13 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) format = nv50_format(ps->format); if (format < 0) return 1; - + if (!bo->tile_flags) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); BEGIN_RING(chan, eng2d, mthd + 0x14, 5); - OUT_RING (chan, mt->level[0].pitch); + OUT_RING (chan, mt->level[ps->level].pitch); OUT_RING (chan, ps->width); OUT_RING (chan, ps->height); OUT_RELOCh(chan, bo, ps->offset, flags); @@ -75,7 +75,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) BEGIN_RING(chan, eng2d, mthd, 5); OUT_RING (chan, format); OUT_RING (chan, 0); - OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, mt->level[ps->level].tile_mode << 4); OUT_RING (chan, 1); OUT_RING (chan, 0); BEGIN_RING(chan, eng2d, mthd + 0x18, 4); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index e9c3562194b..bb7731855cd 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -89,14 +89,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, if (src_bo->tile_flags) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); - OUT_RING (chan, (sy << 16) | sx); + OUT_RING (chan, (sy << 16) | (sx * cpp)); } else { src_offset += (line_count * src_pitch); } if (dst_bo->tile_flags) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); - OUT_RING (chan, (dy << 16) | dx); + OUT_RING (chan, (dy << 16) | (dx * cpp)); } else { dst_offset += (line_count * dst_pitch); } diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index d7a2c8c462c..93c2152edce 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ r300_chipset.c \ r300_clear.c \ r300_context.c \ + r300_debug.c \ r300_emit.c \ r300_flush.c \ r300_fs.c \ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index da67bc29b89..9cc455135db 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -22,6 +22,9 @@ #include "r300_context.h" +#include "r300_flush.h" +#include "r300_state_invariant.h" + static boolean r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, @@ -146,6 +149,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.winsys = (struct pipe_winsys*)r300_winsys; r300->context.screen = r300_screen(screen); + r300_init_debug(r300); + r300->context.destroy = r300_destroy_context; r300->context.clear = r300_clear; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index f78492d4aa9..52b1c9a6b26 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -184,8 +184,15 @@ struct r300_texture { /* Offsets into the buffer. */ unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; - /* Stride (pitch?) of this texture in bytes */ - unsigned stride; + /** + * If non-zero, override the natural texture layout with + * a custom stride (in bytes). + * + * \note Mipmapping fails for textures with a non-natural layout! + * + * \sa r300_texture_get_stride + */ + unsigned stride_override; /* Total size of this texture, in bytes. */ unsigned size; @@ -211,10 +218,7 @@ struct r300_vertex_format { int fs_tab[16]; }; -static struct pipe_viewport_state r300_viewport_identity = { - .scale = {1.0, 1.0, 1.0, 1.0}, - .translate = {0.0, 0.0, 0.0, 0.0}, -}; +extern struct pipe_viewport_state r300_viewport_identity; struct r300_context { /* Parent class */ @@ -275,6 +279,9 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + + /** Combination of DBG_xxx flags */ + unsigned debug; }; /* Convenience cast wrapper. */ @@ -288,4 +295,40 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); +/* Debug functionality. */ + +/** + * Debug flags to disable/enable certain groups of debugging outputs. + * + * \note These may be rather coarse, and the grouping may be impractical. + * If you find, while debugging the driver, that a different grouping + * of these flags would be beneficial, just feel free to change them + * but make sure to update the documentation in r300_debug.c to reflect + * those changes. + */ +/*@{*/ +#define DBG_HELP 0x0000001 +#define DBG_FP 0x0000002 +#define DBG_VP 0x0000004 +#define DBG_CS 0x0000008 +#define DBG_DRAW 0x0000010 +/*@}*/ + +static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) +{ + return (ctx->debug & flags) ? true : false; +} + +static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) +{ + if (DBG_ON(ctx, flags)) { + va_list va; + va_start(va, fmt); + debug_vprintf(fmt, va); + va_end(va); + } +} + +void r300_init_debug(struct r300_context * ctx); + #endif /* R300_CONTEXT_H */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 71b142c0dbf..0a7e4703636 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -49,7 +49,8 @@ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) #define CS_LOCALS(context) \ - struct r300_winsys* cs_winsys = context->winsys; \ + struct r300_context* const cs_context_copy = (context); \ + struct r300_winsys* cs_winsys = cs_context_copy->winsys; \ int cs_count = 0; #define CHECK_CS(size) \ @@ -58,7 +59,7 @@ #define BEGIN_CS(size) do { \ CHECK_CS(size); \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ + DBG(cs_context_copy, DBG_CS, "r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ size, __FUNCTION__, __FILE__, __LINE__); \ } \ cs_winsys->begin_cs(cs_winsys, (size), \ @@ -78,7 +79,7 @@ #define OUT_CS_REG(register, value) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing 0x%08X to register 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \ value, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, 0)); \ @@ -89,14 +90,14 @@ * not the actual packet0 count! */ #define OUT_CS_REG_SEQ(register, count) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing register sequence of %d to 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, ((count) - 1))); \ } while (0) #define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for buffer %p, offset %d, " \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ bo, offset, rd, wd, flags); \ assert(bo); \ @@ -107,7 +108,7 @@ #define END_CS do { \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ + DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ if (cs_count != 0) \ @@ -117,7 +118,7 @@ #define FLUSH_CS do { \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ + DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ cs_winsys->flush_cs(cs_winsys); \ @@ -127,7 +128,7 @@ #define OUT_CS_ONE_REG(register, count) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ @@ -141,7 +142,7 @@ } while (0) #define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for index buffer %p," \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \ "offset %d\n", bo, offset); \ assert(bo); \ OUT_CS(offset); \ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c new file mode 100644 index 00000000000..15308dda1de --- /dev/null +++ b/src/gallium/drivers/r300/r300_debug.c @@ -0,0 +1,88 @@ +/* + * Copyright 2009 Nicolai Haehnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_context.h" + +#include <ctype.h> + + +struct debug_option { + const char * name; + unsigned flag; + const char * description; +}; + +static struct debug_option debug_options[] = { + { "help", DBG_HELP, "Helpful meta-information about the driver" }, + { "fp", DBG_FP, "Fragment program handling" }, + { "vp", DBG_VP, "Vertex program handling" }, + { "cs", DBG_CS, "Command submissions" }, + { "draw", DBG_DRAW, "Draw and emit" }, + + { "all", ~0, "Convenience option that enables all debug flags" }, + + /* must be last */ + { 0, 0, 0 } +}; + +void r300_init_debug(struct r300_context * ctx) +{ + const char * options = debug_get_option("RADEON_DEBUG", 0); + boolean printhint = false; + + if (options) { + while(*options) { + if (*options == ' ' || *options == ',') { + options++; + continue; + } + + size_t length = strcspn(options, " ,"); + struct debug_option * opt; + + for(opt = debug_options; opt->name; ++opt) { + if (!strncmp(options, opt->name, length)) { + ctx->debug |= opt->flag; + break; + } + } + + if (!opt->name) { + debug_printf("Unknown debug option: %s\n", options); + printhint = true; + } + + options += length; + } + + if (!ctx->debug) + printhint = true; + } + + if (printhint || ctx->debug & DBG_HELP) { + debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" + "to a comma-separated list of debug options. Available options are:\n"); + for(struct debug_option * opt = debug_options; opt->name; ++opt) { + debug_printf(" %s: %s\n", opt->name, opt->description); + } + } +} diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index bd4d59e6f1a..a1b36ba2ed1 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "r300_emit.h" #include "r300_fs.h" +#include "r300_state_derived.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, @@ -282,7 +283,7 @@ void r300_emit_fb_state(struct r300_context* r300, for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - pixpitch = tex->stride / tex->tex.block.size; + pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -299,7 +300,7 @@ void r300_emit_fb_state(struct r300_context* r300, if (fb->zsbuf) { tex = (struct r300_texture*)fb->zsbuf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - pixpitch = tex->stride / tex->tex.block.size; + pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -490,7 +491,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300) { CS_LOCALS(r300); - debug_printf("r300: Preparing vertex buffer %p for render, " + DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, r300->vertex_info.vinfo.size); /* Set the pointer to our vertex buffer. The emitted values are this: diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 350691d592d..c4002b8e5d0 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -56,6 +56,11 @@ void r500_emit_fragment_program_code(struct r300_context* r300, void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb); +void r300_emit_query_begin(struct r300_context* r300, + struct r300_query* query); +void r300_emit_query_end(struct r300_context* r300, + struct r300_query* query); + void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); void r300_emit_rs_block_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 36463b9a2eb..a0e848a59ac 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -96,7 +96,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = 1; + compiler.Base.Debug = DBG_ON(r300, DBG_FP); compiler.code = &fs->code; compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 1d5185b417e..2880d34877f 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -22,6 +22,8 @@ #include "r300_query.h" +#include "r300_emit.h" + static struct pipe_query* r300_create_query(struct pipe_context* pipe, unsigned query_type) { diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index cd458d019ae..737396d8d97 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,15 +26,17 @@ #include "r300_cs.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_reg.h" #include "r300_state_derived.h" /* r300_render: Vertex and index buffer primitive emission. */ +#define R300_MAX_VBO_SIZE (1024 * 1024) struct r300_render { /* Parent class */ struct vbuf_render base; - + /* Pipe context */ struct r300_context* r300; @@ -45,7 +47,10 @@ struct r300_render { /* VBO */ struct pipe_buffer* vbo; - size_t vbo_alloc_size; + size_t vbo_size; + size_t vbo_offset; + size_t vbo_max_used; + void * vbo_ptr; }; static INLINE struct r300_render* @@ -74,19 +79,18 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, struct pipe_screen* screen = r300->context.screen; size_t size = (size_t)vertex_size * (size_t)count; - if (r300render->vbo && (size > r300render->vbo_alloc_size)) { - pipe_buffer_reference(&r300render->vbo, NULL); - } - - if (!r300render->vbo) { + if (size + r300render->vbo_offset > r300render->vbo_size) + { r300render->vbo = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, - size); + R300_MAX_VBO_SIZE); + r300render->vbo_size = R300_MAX_VBO_SIZE; } - r300render->vbo_alloc_size = MAX2(size, r300render->vbo_alloc_size); r300render->vertex_size = vertex_size; + r300->vbo = r300render->vbo; + r300->vbo_offset = r300render->vbo_offset; return (r300render->vbo) ? TRUE : FALSE; } @@ -96,8 +100,10 @@ static void* r300_render_map_vertices(struct vbuf_render* render) struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; - return (unsigned char*)pipe_buffer_map(screen, r300render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); + r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + + return (r300render->vbo_ptr + r300render->vbo_offset); } static void r300_render_unmap_vertices(struct vbuf_render* render, @@ -106,15 +112,24 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, { struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; + CS_LOCALS(r300render->r300); + BEGIN_CS(2); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max); + END_CS; + r300render->vbo_max_used = MAX2(r300render->vbo_max_used, + r300render->vertex_size * (max + 1)); pipe_buffer_unmap(screen, r300render->vbo); } static void r300_render_release_vertices(struct vbuf_render* render) { struct r300_render* r300render = r300_render(render); + struct r300_context* r300 = r300render->r300; - pipe_buffer_reference(&r300render->vbo, NULL); + r300render->vbo_offset += r300render->vbo_max_used; + r300render->vbo_max_used = 0; + r300->vbo = NULL; } static boolean r300_render_set_primitive(struct vbuf_render* render, @@ -162,14 +177,12 @@ static boolean r300_render_set_primitive(struct vbuf_render* render, return TRUE; } -static void prepare_render(struct r300_render* render, unsigned count) +static void r300_prepare_render(struct r300_render* render, unsigned count) { struct r300_context* r300 = render->r300; CS_LOCALS(r300); - r300->vbo = render->vbo; - r300_emit_dirty_state(r300); } @@ -182,9 +195,9 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); - prepare_render(r300render, count); + r300_prepare_render(r300render, count); - debug_printf("r300: Doing vbuf render, count %d\n", count); + DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); BEGIN_CS(2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); @@ -207,7 +220,7 @@ static void r300_render_draw(struct vbuf_render* render, CS_LOCALS(r300); - prepare_render(r300render, count); + r300_prepare_render(r300render, count); /* Send our indices into an index buffer. */ index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, @@ -216,23 +229,6 @@ static void r300_render_draw(struct vbuf_render* render, return; } -/* - index_map = pipe_buffer_map(screen, index_buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(index_map, indices, count); - pipe_buffer_unmap(screen, index_buffer); - - debug_printf("r300: Doing indexbuf render, count %d\n", count); - - BEGIN_CS(8); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | - r300render->hwprim); - OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); - OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); - OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; */ - BEGIN_CS(2 + (count+1)/2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | @@ -271,6 +267,10 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300) r300render->base.release_vertices = r300_render_release_vertices; r300render->base.destroy = r300_render_destroy; + r300render->vbo = NULL; + r300render->vbo_size = 0; + r300render->vbo_offset = 0; + return &r300render->base; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 15740f61252..593178c50be 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -323,13 +323,14 @@ r300_get_tex_transfer(struct pipe_screen *screen, if (trans) { pipe_texture_reference(&trans->transfer.texture, texture); trans->transfer.format = texture->format; + trans->transfer.x = x; + trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; trans->transfer.block = texture->block; trans->transfer.nblocksx = texture->nblocksx[level]; trans->transfer.nblocksy = texture->nblocksy[level]; - trans->transfer.stride = align(pf_get_stride(&trans->transfer.block, - texture->width[level]), 32); + trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; trans->offset = offset; } @@ -356,7 +357,7 @@ static void* r300_transfer_map(struct pipe_screen* screen, if (transfer->usage != PIPE_TRANSFER_READ) { flags |= PIPE_BUFFER_USAGE_CPU_WRITE; } - + map = pipe_buffer_map(screen, tex->buffer, flags); if (!map) { diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index c16cadd0407..88cb9af6fb7 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -20,10 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_debug.h" #include "util/u_math.h" #include "util/u_pack_color.h" -#include "util/u_debug.h" +#include "tgsi/tgsi_parse.h" #include "pipe/p_config.h" #include "pipe/internal/p_winsys_screen.h" @@ -429,6 +430,9 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->rs_state = rs; r300->dirty_state |= R300_NEW_RASTERIZER; + r300->dirty_state |= R300_NEW_RS_BLOCK; + r300->dirty_state |= R300_NEW_SCISSOR; + r300->dirty_state |= R300_NEW_VIEWPORT; } /* Free rasterizer state. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c01e61a9b19..5f6b225d340 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -23,6 +23,7 @@ #include "r300_state_derived.h" #include "r300_fs.h" +#include "r300_state_inlines.h" #include "r300_vs.h" /* r300_state_derived: Various bits of state which are dependent upon @@ -195,13 +196,13 @@ static void r300_vertex_psc(struct r300_context* r300, * and not on attrib information. */ if (r300screen->caps->has_tcl) { attrib_count = r300->vs->info.num_inputs; - debug_printf("r300: routing %d attribs in psc for vs\n", + DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n", attrib_count); } else { attrib_count = vinfo->num_attribs; - debug_printf("r300: attrib count: %d\n", attrib_count); + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); for (i = 0; i < attrib_count; i++) { - debug_printf("r300: attrib: offset %d, interp %d, size %d," + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," " tab %d\n", vinfo->attrib[i].src_index, vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, tab[i]); @@ -299,18 +300,18 @@ static void r300_update_fs_tab(struct r300_context* r300) } /* Now that we know where everything is... */ - debug_printf("r300: fp input count: %d\n", info->num_inputs); + DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs); for (i = 0; i < info->num_inputs; i++) { switch (tab[i]) { case INTERP_LINEAR: - debug_printf("r300: attrib: " + DBG(r300, DBG_DRAW, "r300: attrib: " "stack offset %d, color, tab %d\n", i, cols_emitted); tab[i] = cols_emitted; cols_emitted++; break; case INTERP_PERSPECTIVE: - debug_printf("r300: attrib: " + DBG(r300, DBG_DRAW, "r300: attrib: " "stack offset %d, texcoord, tab %d\n", i, cols + texs); tab[i] = cols + texs; diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h index 63ae8eb8d08..71a4a47b003 100644 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ b/src/gallium/drivers/r300/r300_state_derived.h @@ -23,11 +23,7 @@ #ifndef R300_STATE_DERIVED_H #define R300_STATE_DERIVED_H -#include "draw/draw_vertex.h" - -#include "r300_context.h" -#include "r300_reg.h" -#include "r300_state_inlines.h" +struct r300_context; void r300_update_derived_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 7d822fec483..3865730d635 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -23,6 +23,12 @@ #include "r300_state_invariant.h" + +struct pipe_viewport_state r300_viewport_identity = { + .scale = {1.0, 1.0, 1.0, 1.0}, + .translate = {0.0, 0.0, 0.0, 0.0}, +}; + /* Calculate and emit invariant state. This is data that the 3D engine * will probably want at the beginning of every CS, but it's not currently * handled by any CSO setup, and in addition it doesn't really change much. diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 96e6e4a77d4..cc6288cb519 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -29,7 +29,7 @@ static void r300_surface_setup(struct r300_context* r300, unsigned w, unsigned h) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; - unsigned pixpitch = dest->stride / dest->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size; CS_LOCALS(r300); r300_emit_blend_state(r300, &blend_clear_state); @@ -100,7 +100,7 @@ static void r300_surface_fill(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* tex = (struct r300_texture*)dest->texture; - unsigned pixpitch = tex->stride / tex->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; boolean invalid = FALSE; CS_LOCALS(r300); @@ -233,7 +233,7 @@ static void r300_surface_copy(struct pipe_context* pipe, struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* srctex = (struct r300_texture*)src->texture; struct r300_texture* desttex = (struct r300_texture*)dest->texture; - unsigned pixpitch = srctex->stride / srctex->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size; boolean invalid = FALSE; float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty; CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 590052509cc..6e8c3683200 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -25,7 +25,6 @@ static void r300_setup_texture_state(struct r300_texture* tex, unsigned width, unsigned height, - unsigned pitch, unsigned levels) { struct r300_texture_state* state = &tex->state; @@ -38,7 +37,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, /* XXX */ state->format1 = r300_translate_texformat(tex->tex.format); - state->format2 = pitch - 1; + state->format2 = r300_texture_get_stride(tex, 0); /* Assume (somewhat foolishly) that oversized textures will * not be permitted by the state tracker. */ @@ -50,13 +49,30 @@ static void r300_setup_texture_state(struct r300_texture* tex, } debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n", - width, height, pitch, levels); + width, height, levels); +} + +/** + * Return the stride, in bytes, of the texture images of the given texture + * at the given level. + */ +unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) +{ + if (tex->stride_override) + return tex->stride_override; + + if (level > tex->tex.last_level) { + debug_printf("%s: level (%u) > last_level (%u)\n", level, tex->tex.last_level); + return 0; + } + + return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, offset; + int stride, size; int i; for (i = 0; i <= base->last_level; i++) { @@ -74,7 +90,7 @@ static void r300_setup_miptree(struct r300_texture* tex) * XXX * POT, uncompressed, unmippmapped textures can be aligned to 32, * instead of 64. */ - stride = align(pf_get_stride(&base->block, base->width[i]), 32); + stride = r300_texture_get_stride(tex, i); size = stride * base->nblocksy[i] * base->depth[i]; tex->offset[i] = align(tex->size, 32); @@ -84,10 +100,6 @@ static void r300_setup_miptree(struct r300_texture* tex) "(%dx%dx%d px, pitch %d bytes)\n", i, base->width[i], base->height[i], base->depth[i], stride); - /* Save stride of first level to the texture. */ - if (i == 0) { - tex->stride = stride; - } } } @@ -109,7 +121,7 @@ static struct pipe_texture* r300_setup_miptree(tex); r300_setup_texture_state(tex, template->width[0], template->height[0], - template->width[0], template->last_level); + template->last_level); tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, @@ -189,11 +201,10 @@ static struct pipe_texture* pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; - tex->stride = *stride; + tex->stride_override = *stride; /* XXX */ - r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], - tex->stride, 0); + r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], 0); pipe_buffer_reference(&tex->buffer, buffer); @@ -221,7 +232,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture, pipe_buffer_reference(buffer, tex->buffer); if (stride) { - *stride = tex->stride; + *stride = r300_texture_get_stride(tex, 0); } return TRUE; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 3b56f0307c0..3109af5baca 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -30,8 +30,12 @@ #include "r300_context.h" #include "r300_reg.h" +struct r300_texture; + void r300_init_screen_texture_functions(struct pipe_screen* screen); +unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); + /* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index d68a1041063..0913ca1bd5b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -158,7 +158,6 @@ static unsigned translate_saturate(unsigned saturate) switch(saturate) { case TGSI_SAT_NONE: return SATURATE_OFF; case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE; - case TGSI_SAT_MINUS_PLUS_ONE: return SATURATE_PLUS_MINUS_ONE; } fprintf(stderr, "Unknown saturate mode: %i\n", saturate); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 2cb903bba2f..12a6e37be62 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -116,7 +116,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Setup the compiler */ rc_init(&compiler.Base); - compiler.Base.Debug = 1; + compiler.Base.Debug = DBG_ON(r300, DBG_VP); compiler.code = &vs->code; compiler.UserData = vs; diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index d3af18e162b..8fac8e6e05f 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,8 +36,6 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" -#include "sp_surface.h" -#include "sp_state.h" #include "sp_tile_cache.h" diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h index 2e450672f58..9be3b86fe9f 100644 --- a/src/gallium/drivers/softpipe/sp_clear.h +++ b/src/gallium/drivers/softpipe/sp_clear.h @@ -32,7 +32,6 @@ #ifndef SP_CLEAR_H #define SP_CLEAR_H -#include "pipe/p_state.h" struct pipe_context; extern void diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 86df320ea8a..c699c433e9c 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -83,7 +83,8 @@ softpipe_unmap_transfers(struct softpipe_context *sp) } -static void softpipe_destroy( struct pipe_context *pipe ) +static void +softpipe_destroy( struct pipe_context *pipe ) { struct softpipe_context *softpipe = softpipe_context( pipe ); uint i; @@ -121,6 +122,15 @@ static void softpipe_destroy( struct pipe_context *pipe ) FREE( softpipe ); } + +/** + * if (the texture is being used as a framebuffer surface) + * return PIPE_REFERENCED_FOR_WRITE + * else if (the texture is a bound texture source) + * return PIPE_REFERENCED_FOR_READ XXX not done yet + * else + * return PIPE_UNREFERENCED + */ static unsigned int softpipe_is_texture_referenced( struct pipe_context *pipe, struct pipe_texture *texture, @@ -129,15 +139,17 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context( pipe ); unsigned i; - if(softpipe->dirty_render_cache) { + if (softpipe->dirty_render_cache) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { - if(softpipe->framebuffer.cbufs[i] && - softpipe->framebuffer.cbufs[i]->texture == texture) + if (softpipe->framebuffer.cbufs[i] && + softpipe->framebuffer.cbufs[i]->texture == texture) { return PIPE_REFERENCED_FOR_WRITE; + } } - if(softpipe->framebuffer.zsbuf && - softpipe->framebuffer.zsbuf->texture == texture) + if (softpipe->framebuffer.zsbuf && + softpipe->framebuffer.zsbuf->texture == texture) { return PIPE_REFERENCED_FOR_WRITE; + } } /* FIXME: we also need to do the same for the texture cache */ @@ -145,6 +157,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, return PIPE_UNREFERENCED; } + static unsigned int softpipe_is_buffer_referenced( struct pipe_context *pipe, struct pipe_buffer *buf) @@ -152,6 +165,7 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, return PIPE_UNREFERENCED; } + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -222,7 +236,6 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.is_buffer_referenced = softpipe_is_buffer_referenced; softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( softpipe ); /* * Alloc caches for accessing drawing surfaces and textures. diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 70f09324311..548e40c4a8c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -30,26 +30,21 @@ * Michel Dänzer <[email protected]> */ -#include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" -#include "sp_tile_cache.h" #include "sp_screen.h" #include "sp_winsys.h" -/* Simple, maximally packed layout. - */ - - -/* Conventional allocation path for non-display textures: +/** + * Conventional allocation path for non-display textures: + * Use a simple, maximally packed layout. */ static boolean softpipe_texture_layout(struct pipe_screen *screen, @@ -89,6 +84,10 @@ softpipe_texture_layout(struct pipe_screen *screen, return spt->buffer != NULL; } + +/** + * Texture layout for simple color buffers. + */ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, struct softpipe_texture * spt) @@ -112,9 +111,6 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, } - - - static struct pipe_texture * softpipe_texture_create(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -342,14 +338,13 @@ softpipe_transfer_map( struct pipe_screen *screen, /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) - { + if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) { /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. */ softpipe_screen(screen)->timestamp++; } - + xfer_map = map + softpipe_transfer(transfer)->offset + transfer->y / transfer->block.height * transfer->stride + transfer->x / transfer->block.width * transfer->block.size; @@ -360,7 +355,7 @@ softpipe_transfer_map( struct pipe_screen *screen, static void softpipe_transfer_unmap(struct pipe_screen *screen, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { struct softpipe_texture *spt; @@ -377,12 +372,6 @@ softpipe_transfer_unmap(struct pipe_screen *screen, void -softpipe_init_texture_funcs(struct softpipe_context *sp) -{ -} - - -void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = softpipe_texture_create; @@ -404,7 +393,7 @@ softpipe_get_texture_buffer( struct pipe_texture *texture, struct pipe_buffer **buf, unsigned *stride ) { - struct softpipe_texture *tex = (struct softpipe_texture *)texture; + struct softpipe_texture *tex = (struct softpipe_texture *) texture; if (!tex) return FALSE; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 893aa7d11d8..b1b6130b22c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -74,9 +74,6 @@ softpipe_transfer(struct pipe_transfer *pt) extern void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ); - -extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index a5c1e8270a3..25257bdf3f4 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -118,7 +118,7 @@ pipe_buffer_write(struct pipe_screen *screen, unsigned offset, unsigned size, const void *data) { - uint8_t *map; + void *map; assert(offset < buf->size); assert(offset + size <= buf->size); @@ -129,7 +129,7 @@ pipe_buffer_write(struct pipe_screen *screen, PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); assert(map); if(map) { - memcpy(map + offset, data, size); + memcpy((uint8_t *)map + offset, data, size); pipe_buffer_flush_mapped_range(screen, buf, offset, size); pipe_buffer_unmap(screen, buf); } @@ -141,7 +141,7 @@ pipe_buffer_read(struct pipe_screen *screen, unsigned offset, unsigned size, void *data) { - uint8_t *map; + void *map; assert(offset < buf->size); assert(offset + size <= buf->size); @@ -150,11 +150,31 @@ pipe_buffer_read(struct pipe_screen *screen, map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_READ); assert(map); if(map) { - memcpy(data, map + offset, size); + memcpy(data, (const uint8_t *)map + offset, size); pipe_buffer_unmap(screen, buf); } } +static INLINE void * +pipe_transfer_map( struct pipe_transfer *transf ) +{ + struct pipe_screen *screen = transf->texture->screen; + return screen->transfer_map(screen, transf); +} + +static INLINE void +pipe_transfer_unmap( struct pipe_transfer *transf ) +{ + struct pipe_screen *screen = transf->texture->screen; + screen->transfer_unmap(screen, transf); +} + +static INLINE void +pipe_transfer_destroy( struct pipe_transfer *transf ) +{ + struct pipe_screen *screen = transf->texture->screen; + return screen->tex_transfer_destroy(transf); +} #ifdef __cplusplus } diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 626bedb35a8..2187f5b3677 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -307,7 +307,7 @@ struct pipe_transfer unsigned nblocksx; /**< allocated width in blocks */ unsigned nblocksy; /**< allocated height in blocks */ unsigned stride; /**< stride in bytes between rows of blocks */ - unsigned usage; /**< PIPE_TRANSFER_* */ + enum pipe_transfer_usage usage; /**< PIPE_TRANSFER_* */ struct pipe_texture *texture; /**< texture to transfer to/from */ unsigned face; diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index 20d682de3fb..b1683b891be 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -51,6 +51,7 @@ static int vlResizeFrameBuffer struct vlBasicCSC *basic_csc; struct pipe_context *pipe; struct pipe_texture template; + float clear_color[4]; assert(csc); @@ -68,7 +69,12 @@ static int vlResizeFrameBuffer basic_csc->viewport.translate[1] = 0; basic_csc->viewport.translate[2] = 0; basic_csc->viewport.translate[3] = 0; - + + clear_color[0] = 0.0f; + clear_color[1] = 0.0f; + clear_color[2] = 0.0f; + clear_color[3] = 0.0f; + if (basic_csc->framebuffer_tex) { pipe_surface_reference(&basic_csc->framebuffer.cbufs[0], NULL); @@ -98,7 +104,7 @@ static int vlResizeFrameBuffer /* Clear to black, in case video doesn't fill the entire window */ pipe->set_framebuffer_state(pipe, &basic_csc->framebuffer); - pipe->clear(pipe, PIPE_CLEAR_COLOR, 0, 0.0f, 0); + pipe->clear(pipe, PIPE_CLEAR_COLOR, clear_color, 0.0f, 0); return 0; } @@ -425,7 +431,7 @@ static int vlCreateVertexShader */ for (i = 0; i < 2; ++i) { - inst = vl_inst4(TGSI_OPCODE_MADD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); + inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc index ef4a4b2add9..34d93e1df0a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc @@ -615,7 +615,7 @@ static int vlCreateFragmentShaderFieldPMB ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + inst = vl_inst2(TGSI_OPCODE_FLR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* mul t3, t3, c1.y ; Multiply by 2 */ @@ -632,7 +632,7 @@ static int vlCreateFragmentShaderFieldPMB /* TODO: Move to conditional tex fetch on t3 instead of lerp */ /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* add o0, t0, t1 ; Add ref and differential to form final output */ @@ -969,7 +969,7 @@ static int vlCreateFragmentShaderFrameBMB } /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -1116,7 +1116,7 @@ static int vlCreateFragmentShaderFieldBMB ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + inst = vl_inst2(TGSI_OPCODE_FLR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* mul t3, t3, c1.y ; Multiply by 2 */ @@ -1143,7 +1143,7 @@ static int vlCreateFragmentShaderFieldBMB /* TODO: Move to conditional tex fetch on t3 instead of lerp */ /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* @@ -1158,11 +1158,11 @@ static int vlCreateFragmentShaderFieldBMB /* TODO: Move to conditional tex fetch on t3 instead of lerp */ /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index c708ac31702..e45a82944b8 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -4,6 +4,7 @@ #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" +#include "util/u_math.h" #include "pipe/p_inlines.h" @@ -40,9 +41,25 @@ static const struct xorg_composite_blend xorg_blends[] = { PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_INV_SRC_ALPHA }, }; + static INLINE void -pixel_to_float4(PictFormatPtr format, - CARD32 pixel, float *color) +pixel_to_float4(Pixel pixel, float *color) +{ + CARD32 r, g, b, a; + + a = (pixel >> 24) & 0xff; + r = (pixel >> 16) & 0xff; + g = (pixel >> 8) & 0xff; + b = (pixel >> 0) & 0xff; + color[0] = ((float)r) / 255.; + color[1] = ((float)g) / 255.; + color[2] = ((float)b) / 255.; + color[3] = ((float)a) / 255.; +} + +static INLINE void +render_pixel_to_float4(PictFormatPtr format, + CARD32 pixel, float *color) { CARD32 r, g, b, a; @@ -148,7 +165,7 @@ setup_vertex_data0(struct exa_context *ctx, setup_vertex0(vertices[3], dstX, dstY + height, ctx->solid_color); - return pipe_user_buffer_create(ctx->ctx->screen, + return pipe_user_buffer_create(ctx->pipe->screen, vertices, sizeof(vertices)); } @@ -194,11 +211,38 @@ setup_vertex_data1(struct exa_context *ctx, setup_vertex1(vertices[3], dstX, dstY + height, s0, t1); - return pipe_user_buffer_create(ctx->ctx->screen, + return pipe_user_buffer_create(ctx->pipe->screen, vertices, sizeof(vertices)); } +static struct pipe_buffer * +setup_vertex_data_tex(struct exa_context *ctx, + float x0, float y0, float x1, float y1, + float s0, float t0, float s1, float t1, + float z) +{ + float vertices[4][2][4]; + + /* 1st vertex */ + setup_vertex1(vertices[0], x0, y0, + s0, t0); + /* 2nd vertex */ + setup_vertex1(vertices[1], x1, y0, + s1, t0); + /* 3rd vertex */ + setup_vertex1(vertices[2], x1, y1, + s1, t1); + /* 4th vertex */ + setup_vertex1(vertices[3], x0, y1, + s0, t1); + + return pipe_user_buffer_create(ctx->pipe->screen, + vertices, + sizeof(vertices)); +} + + static INLINE void setup_vertex2(float vertex[3][4], float x, float y, @@ -253,7 +297,7 @@ setup_vertex_data2(struct exa_context *ctx, setup_vertex2(vertices[3], dstX, dstY + height, st0[0], st0[3], st1[0], st1[3]); - return pipe_user_buffer_create(ctx->ctx->screen, + return pipe_user_buffer_create(ctx->pipe->screen, vertices, sizeof(vertices)); } @@ -291,16 +335,20 @@ boolean xorg_composite_accelerated(int op, } static void -bind_framebuffer_state(struct exa_context *exa, PicturePtr pDstPicture, - struct exa_pixmap_priv *pDst) +bind_clip_state(struct exa_context *exa) +{ +} + +static void +bind_framebuffer_state(struct exa_context *exa, struct exa_pixmap_priv *pDst) { unsigned i; struct pipe_framebuffer_state state; struct pipe_surface *surface = exa_gpu_surface(exa, pDst); memset(&state, 0, sizeof(struct pipe_framebuffer_state)); - state.width = pDstPicture->pDrawable->width; - state.height = pDstPicture->pDrawable->height; + state.width = pDst->tex->width[0]; + state.height = pDst->tex->height[0]; state.nr_cbufs = 1; state.cbufs[0] = surface; @@ -338,10 +386,12 @@ set_viewport(struct exa_context *exa, int width, int height, } static void -bind_viewport_state(struct exa_context *exa, PicturePtr pDstPicture) +bind_viewport_state(struct exa_context *exa, struct exa_pixmap_priv *pDst) { - int width = pDstPicture->pDrawable->width; - int height = pDstPicture->pDrawable->height; + int width = pDst->tex->width[0]; + int height = pDst->tex->height[0]; + + debug_printf("Bind viewport (%d, %d)\n", width, height); set_viewport(exa, width, height, Y0_TOP); } @@ -350,7 +400,8 @@ static void bind_blend_state(struct exa_context *exa, int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture) { - boolean component_alpha = pSrcPicture->componentAlpha; + boolean component_alpha = (pSrcPicture) ? + pSrcPicture->componentAlpha : FALSE; struct xorg_composite_blend blend_opt; struct pipe_blend_state blend; @@ -390,14 +441,17 @@ bind_shaders(struct exa_context *exa, int op, unsigned vs_traits = 0, fs_traits = 0; struct xorg_shader shader; + exa->has_solid_color = FALSE; + if (pSrcPicture) { if (pSrcPicture->pSourcePict) { if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { fs_traits |= FS_SOLID_FILL; vs_traits |= VS_SOLID_FILL; - pixel_to_float4(pSrcPicture->pFormat, - pSrcPicture->pSourcePict->solidFill.color, - exa->solid_color); + render_pixel_to_float4(pSrcPicture->pFormat, + pSrcPicture->pSourcePict->solidFill.color, + exa->solid_color); + exa->has_solid_color = TRUE; } else { debug_assert("!gradients not supported"); } @@ -478,15 +532,15 @@ setup_vs_constant_buffer(struct exa_context *exa, struct pipe_constant_buffer *cbuf = &exa->vs_const_buffer; pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16, + cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); if (cbuf->buffer) { - pipe_buffer_write(exa->ctx->screen, cbuf->buffer, + pipe_buffer_write(exa->pipe->screen, cbuf->buffer, 0, param_bytes, vs_consts); } - exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_VERTEX, 0, cbuf); + exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_VERTEX, 0, cbuf); } @@ -500,22 +554,22 @@ setup_fs_constant_buffer(struct exa_context *exa) struct pipe_constant_buffer *cbuf = &exa->fs_const_buffer; pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16, + cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); if (cbuf->buffer) { - pipe_buffer_write(exa->ctx->screen, cbuf->buffer, + pipe_buffer_write(exa->pipe->screen, cbuf->buffer, 0, param_bytes, fs_consts); } - exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_FRAGMENT, 0, cbuf); + exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); } static void -setup_constant_buffers(struct exa_context *exa, PicturePtr pDstPicture) +setup_constant_buffers(struct exa_context *exa, struct exa_pixmap_priv *pDst) { - int width = pDstPicture->pDrawable->width; - int height = pDstPicture->pDrawable->height; + int width = pDst->tex->width[0]; + int height = pDst->tex->height[0]; setup_vs_constant_buffer(exa, width, height); setup_fs_constant_buffer(exa); @@ -530,15 +584,15 @@ boolean xorg_composite_bind_state(struct exa_context *exa, struct exa_pixmap_priv *pMask, struct exa_pixmap_priv *pDst) { - bind_framebuffer_state(exa, pDstPicture, pDst); - bind_viewport_state(exa, pDstPicture); + bind_framebuffer_state(exa, pDst); + bind_viewport_state(exa, pDst); bind_blend_state(exa, op, pSrcPicture, pMaskPicture); bind_rasterizer_state(exa); bind_shaders(exa, op, pSrcPicture, pMaskPicture); bind_samplers(exa, op, pSrcPicture, pMaskPicture, pDstPicture, pSrc, pMask, pDst); - - setup_constant_buffers(exa, pDstPicture); + bind_clip_state(exa); + setup_constant_buffers(exa, pDst); return FALSE; } @@ -548,7 +602,7 @@ void xorg_composite(struct exa_context *exa, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int width, int height) { - struct pipe_context *pipe = exa->ctx; + struct pipe_context *pipe = exa->pipe; struct pipe_buffer *buf = 0; if (exa->num_bound_samplers == 0 ) { /* solid fill */ @@ -573,12 +627,389 @@ void xorg_composite(struct exa_context *exa, } if (buf) { + int num_attribs = 1; /*pos*/ + num_attribs += exa->num_bound_samplers; + if (exa->has_solid_color) + ++num_attribs; + util_draw_vertex_buffer(pipe, buf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ - 1 + exa->num_bound_samplers); /* attribs/vert */ + num_attribs); /* attribs/vert */ pipe_buffer_reference(&buf, NULL); } } +boolean xorg_solid_bind_state(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + Pixel fg) +{ + unsigned vs_traits, fs_traits; + struct xorg_shader shader; + + pixel_to_float4(fg, exa->solid_color); + exa->has_solid_color = TRUE; + + exa->solid_color[3] = 1.f; + +#if 0 + debug_printf("Color Pixel=(%d, %d, %d, %d), RGBA=(%f, %f, %f, %f)\n", + (fg >> 24) & 0xff, (fg >> 16) & 0xff, + (fg >> 8) & 0xff, (fg >> 0) & 0xff, + exa->solid_color[0], exa->solid_color[1], + exa->solid_color[2], exa->solid_color[3]); +#endif + + vs_traits = VS_SOLID_FILL; + fs_traits = FS_SOLID_FILL; + + bind_framebuffer_state(exa, pixmap); + bind_viewport_state(exa, pixmap); + bind_rasterizer_state(exa); + bind_blend_state(exa, PictOpSrc, NULL, NULL); + setup_constant_buffers(exa, pixmap); + bind_clip_state(exa); + + shader = xorg_shaders_get(exa->shaders, vs_traits, fs_traits); + cso_set_vertex_shader_handle(exa->cso, shader.vs); + cso_set_fragment_shader_handle(exa->cso, shader.fs); + + return FALSE; +} + +void xorg_solid(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + int x0, int y0, int x1, int y1) +{ + struct pipe_context *pipe = exa->pipe; + struct pipe_buffer *buf = 0; + float vertices[4][2][4]; + + /* 1st vertex */ + setup_vertex0(vertices[0], x0, y0, + exa->solid_color); + /* 2nd vertex */ + setup_vertex0(vertices[1], x1, y0, + exa->solid_color); + /* 3rd vertex */ + setup_vertex0(vertices[2], x1, y1, + exa->solid_color); + /* 4th vertex */ + setup_vertex0(vertices[3], x0, y1, + exa->solid_color); + + buf = pipe_user_buffer_create(exa->pipe->screen, + vertices, + sizeof(vertices)); + + + if (buf) { + debug_printf("Drawing buf is %p\n", buf); + util_draw_vertex_buffer(pipe, buf, 0, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe_buffer_reference(&buf, NULL); + } +} + + +static INLINE void shift_rectx(float coords[4], + const float *bounds, + const float shift) +{ + coords[0] += shift; + coords[2] -= shift; + if (bounds) { + coords[2] = MIN2(coords[2], bounds[2]); + /* bound x/y + width/height */ + if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) { + coords[2] = (bounds[0] + bounds[2]) - coords[0]; + } + } +} + +static INLINE void shift_recty(float coords[4], + const float *bounds, + const float shift) +{ + coords[1] += shift; + coords[3] -= shift; + if (bounds) { + coords[3] = MIN2(coords[3], bounds[3]); + if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) { + coords[3] = (bounds[1] + bounds[3]) - coords[1]; + } + } +} + +static INLINE void bound_rect(float coords[4], + const float bounds[4], + float shift[4]) +{ + /* if outside the bounds */ + if (coords[0] > (bounds[0] + bounds[2]) || + coords[1] > (bounds[1] + bounds[3]) || + (coords[0] + coords[2]) < bounds[0] || + (coords[1] + coords[3]) < bounds[1]) { + coords[0] = 0.f; + coords[1] = 0.f; + coords[2] = 0.f; + coords[3] = 0.f; + shift[0] = 0.f; + shift[1] = 0.f; + return; + } + + /* bound x */ + if (coords[0] < bounds[0]) { + shift[0] = bounds[0] - coords[0]; + coords[2] -= shift[0]; + coords[0] = bounds[0]; + } else + shift[0] = 0.f; + + /* bound y */ + if (coords[1] < bounds[1]) { + shift[1] = bounds[1] - coords[1]; + coords[3] -= shift[1]; + coords[1] = bounds[1]; + } else + shift[1] = 0.f; + + shift[2] = bounds[2] - coords[2]; + shift[3] = bounds[3] - coords[3]; + /* bound width/height */ + coords[2] = MIN2(coords[2], bounds[2]); + coords[3] = MIN2(coords[3], bounds[3]); + + /* bound x/y + width/height */ + if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) { + coords[2] = (bounds[0] + bounds[2]) - coords[0]; + } + if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) { + coords[3] = (bounds[1] + bounds[3]) - coords[1]; + } + + /* if outside the bounds */ + if ((coords[0] + coords[2]) < bounds[0] || + (coords[1] + coords[3]) < bounds[1]) { + coords[0] = 0.f; + coords[1] = 0.f; + coords[2] = 0.f; + coords[3] = 0.f; + return; + } +} + +static INLINE void sync_size(float *src_loc, float *dst_loc) +{ + src_loc[2] = MIN2(src_loc[2], dst_loc[2]); + src_loc[3] = MIN2(src_loc[3], dst_loc[3]); + dst_loc[2] = src_loc[2]; + dst_loc[3] = src_loc[3]; +} + + +static void renderer_copy_texture(struct exa_context *exa, + struct pipe_texture *src, + float sx1, float sy1, + float sx2, float sy2, + struct pipe_texture *dst, + float dx1, float dy1, + float dx2, float dy2) +{ + struct pipe_context *pipe = exa->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_buffer *buf; + struct pipe_surface *dst_surf = screen->get_tex_surface( + screen, dst, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + struct pipe_framebuffer_state fb; + float s0, t0, s1, t1; + struct xorg_shader shader; + + assert(src->width[0] != 0); + assert(src->height[0] != 0); + assert(dst->width[0] != 0); + assert(dst->height[0] != 0); + +#if 0 + debug_printf("copy texture [%f, %f, %f, %f], [%f, %f, %f, %f]\n", + sx1, sy1, sx2, sy2, dx1, dy1, dx2, dy2); +#endif + +#if 1 + s0 = sx1 / src->width[0]; + s1 = sx2 / src->width[0]; + t0 = sy1 / src->height[0]; + t1 = sy2 / src->height[0]; +#else + s0 = 0; + s1 = 1; + t0 = 0; + t1 = 1; +#endif + + assert(screen->is_format_supported(screen, dst_surf->format, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, + 0)); + + /* save state (restored below) */ + cso_save_blend(exa->cso); + cso_save_samplers(exa->cso); + cso_save_sampler_textures(exa->cso); + cso_save_framebuffer(exa->cso); + cso_save_fragment_shader(exa->cso); + cso_save_vertex_shader(exa->cso); + + cso_save_viewport(exa->cso); + + + /* set misc state we care about */ + { + struct pipe_blend_state blend; + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.colormask = PIPE_MASK_RGBA; + cso_set_blend(exa->cso, &blend); + } + + /* sampler */ + { + struct pipe_sampler_state sampler; + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + cso_single_sampler(exa->cso, 0, &sampler); + cso_single_sampler_done(exa->cso); + } + + set_viewport(exa, dst_surf->width, dst_surf->height, Y0_TOP); + + /* texture */ + cso_set_sampler_textures(exa->cso, 1, &src); + + /* shaders */ + shader = xorg_shaders_get(exa->shaders, + VS_COMPOSITE, + FS_COMPOSITE); + cso_set_vertex_shader_handle(exa->cso, shader.vs); + cso_set_fragment_shader_handle(exa->cso, shader.fs); + + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.width = dst_surf->width; + fb.height = dst_surf->height; + fb.nr_cbufs = 1; + fb.cbufs[0] = dst_surf; + { + int i; + for (i = 1; i < PIPE_MAX_COLOR_BUFS; ++i) + fb.cbufs[i] = 0; + } + cso_set_framebuffer(exa->cso, &fb); + + /* draw quad */ + buf = setup_vertex_data_tex(exa, + dx1, dy1, + dx2, dy2, + s0, t0, s1, t1, + 0.0f); + + if (buf) { + util_draw_vertex_buffer(exa->pipe, buf, 0, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe_buffer_reference(&buf, NULL); + } + + /* restore state we changed */ + cso_restore_blend(exa->cso); + cso_restore_samplers(exa->cso); + cso_restore_sampler_textures(exa->cso); + cso_restore_framebuffer(exa->cso); + cso_restore_vertex_shader(exa->cso); + cso_restore_fragment_shader(exa->cso); + cso_restore_viewport(exa->cso); + + pipe_surface_reference(&dst_surf, NULL); +} + +void xorg_copy_pixmap(struct exa_context *ctx, + struct exa_pixmap_priv *dst_priv, int dx, int dy, + struct exa_pixmap_priv *src_priv, int sx, int sy, + int width, int height) +{ + float dst_loc[4], src_loc[4]; + float dst_bounds[4], src_bounds[4]; + float src_shift[4], dst_shift[4], shift[4]; + struct pipe_texture *dst = dst_priv->tex; + struct pipe_texture *src = src_priv->tex; + + xorg_exa_finish(ctx); + + dst_loc[0] = dx; + dst_loc[1] = dy; + dst_loc[2] = width; + dst_loc[3] = height; + dst_bounds[0] = 0.f; + dst_bounds[1] = 0.f; + dst_bounds[2] = dst->width[0]; + dst_bounds[3] = dst->height[0]; + + src_loc[0] = sx; + src_loc[1] = sy; + src_loc[2] = width; + src_loc[3] = height; + src_bounds[0] = 0.f; + src_bounds[1] = 0.f; + src_bounds[2] = src->width[0]; + src_bounds[3] = src->height[0]; + + bound_rect(src_loc, src_bounds, src_shift); + bound_rect(dst_loc, dst_bounds, dst_shift); + shift[0] = src_shift[0] - dst_shift[0]; + shift[1] = src_shift[1] - dst_shift[1]; + + if (shift[0] < 0) + shift_rectx(src_loc, src_bounds, -shift[0]); + else + shift_rectx(dst_loc, dst_bounds, shift[0]); + + if (shift[1] < 0) + shift_recty(src_loc, src_bounds, -shift[1]); + else + shift_recty(dst_loc, dst_bounds, shift[1]); + + sync_size(src_loc, dst_loc); + + if (src_loc[2] >= 0 && src_loc[3] >= 0 && + dst_loc[2] >= 0 && dst_loc[3] >= 0) { + renderer_copy_texture(ctx, + src, + src_loc[0], + src_loc[1] + src_loc[3], + src_loc[0] + src_loc[2], + src_loc[1], + dst, + dst_loc[0], + dst_loc[1] + dst_loc[3], + dst_loc[0] + dst_loc[2], + dst_loc[1]); + } +} + diff --git a/src/gallium/state_trackers/xorg/xorg_composite.h b/src/gallium/state_trackers/xorg/xorg_composite.h index 17dfcb199ea..e73f1c704a8 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.h +++ b/src/gallium/state_trackers/xorg/xorg_composite.h @@ -22,4 +22,16 @@ void xorg_composite(struct exa_context *exa, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int width, int height); +boolean xorg_solid_bind_state(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + Pixel fg); +void xorg_solid(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + int x0, int y0, int x1, int y1); + +void xorg_copy_pixmap(struct exa_context *ctx, + struct exa_pixmap_priv *dst, int dx, int dy, + struct exa_pixmap_priv *src, int sx, int sy, + int width, int height); + #endif diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 6431a0fe254..8a362596c75 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -118,6 +118,7 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format) } if (!tex) { + exaMoveInPixmap(private->pPixmap); xorg_exa_set_shared_usage(private->pPixmap); pScreen->ModifyPixmapHeader(private->pPixmap, 0, 0, 0, 0, 0, NULL); tex = xorg_exa_get_texture(private->pPixmap); diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index a17a71f23a8..0d60b963bf3 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -47,6 +47,8 @@ #include "util/u_rect.h" +#define DEBUG_SOLID 0 + /* * Helper functions */ @@ -82,6 +84,25 @@ exa_get_pipe_format(int depth, enum pipe_format *format, int *bbp) } } +static void +xorg_exa_init_state(struct exa_context *exa) +{ + struct pipe_depth_stencil_alpha_state dsa; + + /* set common initial clip state */ + memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state)); + cso_set_depth_stencil_alpha(exa->cso, &dsa); +} + +static void +xorg_exa_common_done(struct exa_context *exa) +{ + exa->copy.src = NULL; + exa->copy.dst = NULL; + exa->has_solid_color = FALSE; + exa->num_bound_samplers = 0; +} + /* * Static exported EXA functions */ @@ -111,9 +132,9 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst, if (!priv || !priv->tex) return FALSE; - if (exa->ctx->is_texture_referenced(exa->ctx, priv->tex, 0, 0) & + if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & PIPE_REFERENCED_FOR_WRITE) - exa->ctx->flush(exa->ctx, 0, NULL); + exa->pipe->flush(exa->pipe, 0, NULL); transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, PIPE_TRANSFER_READ, x, y, w, h); @@ -178,9 +199,9 @@ ExaPrepareAccess(PixmapPtr pPix, int index) if (priv->map_count++ == 0) { - if (exa->ctx->is_texture_referenced(exa->ctx, priv->tex, 0, 0) & + if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & PIPE_REFERENCED_FOR_WRITE) - exa->ctx->flush(exa->ctx, 0, NULL); + exa->pipe->flush(exa->pipe, 0, NULL); priv->map_transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, @@ -231,15 +252,22 @@ ExaDone(PixmapPtr pPixmap) if (!priv) return; - if (priv->src_surf) - exa->scrn->tex_surface_destroy(priv->src_surf); - priv->src_surf = NULL; +#if 1 + xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, NULL); +#else + xorg_exa_finish(exa); +#endif + xorg_exa_common_done(exa); } static void ExaDoneComposite(PixmapPtr pPixmap) { + ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + xorg_exa_common_done(exa); } static Bool @@ -250,6 +278,9 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg) struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap); struct exa_context *exa = ms->exa; +#if 0 + debug_printf("ExaPrepareSolid - test\n"); +#endif if (pPixmap->drawable.depth < 15) return FALSE; @@ -262,12 +293,18 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg) if (alu != GXcopy) return FALSE; - if (!exa->ctx || !exa->ctx->surface_fill) + if (!exa->pipe) return FALSE; - priv->color = fg; - return TRUE; +#if DEBUG_SOLID + fg = 0xffff0000; +#endif + +#if 1 + debug_printf(" ExaPrepareSolid(0x%x)\n", fg); +#endif + return xorg_solid_bind_state(exa, priv, fg); } static void @@ -277,12 +314,30 @@ ExaSolid(PixmapPtr pPixmap, int x0, int y0, int x1, int y1) modesettingPtr ms = modesettingPTR(pScrn); struct exa_context *exa = ms->exa; struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap); - struct pipe_surface *surf = exa_gpu_surface(exa, priv); - exa->ctx->surface_fill(exa->ctx, surf, x0, y0, x1 - x0, y1 - y0, - priv->color); + debug_printf("\tExaSolid(%d, %d, %d, %d)\n", x0, y0, x1, y1); - exa->scrn->tex_surface_destroy(surf); +#if 0 + if (x0 == 0 && y0 == 0 && + x1 == priv->tex->width[0] && + y1 == priv->tex->height[0]) { + exa->ctx->clear(exa->pipe, PIPE_CLEAR_COLOR, + exa->solid_color, 1., 0); + } else +#endif + +#if DEBUG_SOLID + xorg_solid(exa, priv, 0, 0, 300, 300); + xorg_solid(exa, priv, 300, 300, 350, 350); + xorg_solid(exa, priv, 350, 350, 500, 500); + xorg_solid(exa, priv, + priv->tex->width[0] - 10, + priv->tex->height[0] - 10, + priv->tex->width[0], + priv->tex->height[0]); +#else + xorg_solid(exa, priv, x0, y0, x1, y1) ; +#endif } static Bool @@ -295,6 +350,8 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); struct exa_pixmap_priv *src_priv = exaGetPixmapDriverPrivate(pSrcPixmap); + debug_printf("ExaPrepareCopy\n"); + if (alu != GXcopy) return FALSE; @@ -310,27 +367,33 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, if (!priv->tex || !src_priv->tex) return FALSE; - if (!exa->ctx || !exa->ctx->surface_copy) + if (!exa->pipe) return FALSE; - priv->src_surf = exa_gpu_surface(exa, src_priv); + exa->copy.src = src_priv; + exa->copy.dst = priv; - return TRUE; + /*XXX disabled until some issues with syncing are fixed */ + return FALSE; } static void ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int width, int height) { - ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; - modesettingPtr ms = modesettingPTR(pScrn); - struct exa_context *exa = ms->exa; - struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); - struct pipe_surface *surf = exa_gpu_surface(exa, priv); + ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); + + debug_printf("\tExaCopy(srcx=%d, srcy=%d, dstX=%d, dstY=%d, w=%d, h=%d)\n", + srcX, srcY, dstX, dstY, width, height); + + debug_assert(priv == exa->copy.dst); - exa->ctx->surface_copy(exa->ctx, surf, dstX, dstY, priv->src_surf, - srcX, srcY, width, height); - exa->scrn->tex_surface_destroy(surf); + xorg_copy_pixmap(exa, exa->copy.dst, dstX, dstY, + exa->copy.src, srcX, srcY, + width, height); } static Bool @@ -342,6 +405,8 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture, modesettingPtr ms = modesettingPTR(pScrn); struct exa_context *exa = ms->exa; + debug_printf("ExaPrepareComposite\n"); + return xorg_composite_bind_state(exa, op, pSrcPicture, pMaskPicture, pDstPicture, exaGetPixmapDriverPrivate(pSrc), @@ -358,6 +423,8 @@ ExaComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, struct exa_context *exa = ms->exa; struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDst); + debug_printf("\tExaComposite\n"); + xorg_composite(exa, priv, srcX, srcY, maskX, maskY, dstX, dstY, width, height); } @@ -537,16 +604,16 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, if (priv->tex) { struct pipe_surface *dst_surf; + struct pipe_surface *src_surf; dst_surf = exa->scrn->get_tex_surface(exa->scrn, texture, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE); - priv->src_surf = exa_gpu_surface(exa, priv); - exa->ctx->surface_copy(exa->ctx, dst_surf, 0, 0, priv->src_surf, - 0, 0, min(width, texture->width[0]), - min(height, texture->height[0])); + src_surf = exa_gpu_surface(exa, priv); + exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf, + 0, 0, min(width, texture->width[0]), + min(height, texture->height[0])); exa->scrn->tex_surface_destroy(dst_surf); - exa->scrn->tex_surface_destroy(priv->src_surf); - priv->src_surf = NULL; + exa->scrn->tex_surface_destroy(src_surf); } else if (pPixmap->devPrivate.ptr) { struct pipe_transfer *transfer; @@ -563,6 +630,9 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, pPixmap->devKind, 0, 0); exa->scrn->transfer_unmap(exa->scrn, transfer); exa->scrn->tex_transfer_destroy(transfer); + + xfree(pPixmap->devPrivate.ptr); + pPixmap->devPrivate.ptr = NULL; } } #ifdef DRM_MODE_FEATURE_DIRTYFB @@ -611,8 +681,8 @@ xorg_exa_close(ScrnInfoPtr pScrn) cso_destroy_context(exa->cso); } - if (exa->ctx) - exa->ctx->destroy(exa->ctx); + if (exa->pipe) + exa->pipe->destroy(exa->pipe); exaDriverFini(pScrn->pScreen); xfree(exa); @@ -645,6 +715,12 @@ xorg_exa_init(ScrnInfoPtr pScrn) pExa->pixmapOffsetAlign = 0; pExa->pixmapPitchAlign = 1; pExa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_HANDLES_PIXMAPS; +#ifdef EXA_SUPPORTS_PREPARE_AUX + pExa->flags |= EXA_SUPPORTS_PREPARE_AUX; +#endif +#ifdef EXA_MIXED_PIXMAPS + pExa->flags |= EXA_MIXED_PIXMAPS; +#endif pExa->maxX = 8191; /* FIXME */ pExa->maxY = 8191; /* FIXME */ @@ -674,13 +750,15 @@ xorg_exa_init(ScrnInfoPtr pScrn) } exa->scrn = ms->screen; - exa->ctx = ms->api->create_context(ms->api, exa->scrn); + exa->pipe = ms->api->create_context(ms->api, exa->scrn); /* Share context with DRI */ - ms->ctx = exa->ctx; + ms->ctx = exa->pipe; - exa->cso = cso_create_context(exa->ctx); + exa->cso = cso_create_context(exa->pipe); exa->shaders = xorg_shaders_create(exa); + xorg_exa_init_state(exa); + return (void *)exa; out_err: @@ -698,3 +776,19 @@ exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv) } +void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags, + struct pipe_fence_handle **fence) +{ + exa->pipe->flush(exa->pipe, pipeFlushFlags, fence); +} + +void xorg_exa_finish(struct exa_context *exa) +{ + struct pipe_fence_handle *fence = NULL; + + xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, &fence); + + exa->pipe->screen->fence_finish(exa->pipe->screen, fence, 0); + exa->pipe->screen->fence_reference(exa->pipe->screen, &fence, NULL); +} + diff --git a/src/gallium/state_trackers/xorg/xorg_exa.h b/src/gallium/state_trackers/xorg/xorg_exa.h index 5b515be1397..43949b04a44 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.h +++ b/src/gallium/state_trackers/xorg/xorg_exa.h @@ -14,7 +14,7 @@ struct xorg_shaders; struct exa_context { ExaDriverPtr pExa; - struct pipe_context *ctx; + struct pipe_context *pipe; struct pipe_screen *scrn; struct cso_context *cso; struct xorg_shaders *shaders; @@ -26,6 +26,12 @@ struct exa_context int num_bound_samplers; float solid_color[4]; + boolean has_solid_color; + + struct { + struct exa_pixmap_priv *src; + struct exa_pixmap_priv *dst; + } copy; }; @@ -36,8 +42,6 @@ struct exa_pixmap_priv struct pipe_texture *tex; struct pipe_texture *depth_stencil_tex; - unsigned int color; - struct pipe_surface *src_surf; /* for copies */ struct pipe_transfer *map_transfer; unsigned map_count; @@ -46,5 +50,8 @@ struct exa_pixmap_priv struct pipe_surface * exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv); +void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags, + struct pipe_fence_handle **fence); +void xorg_exa_finish(struct exa_context *exa); #endif diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c index cfee10c3b30..801d0d8df4d 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c @@ -241,41 +241,38 @@ create_vs(struct pipe_context *pipe, boolean is_fill = vs_traits & VS_FILL; boolean is_composite = vs_traits & VS_COMPOSITE; boolean has_mask = vs_traits & VS_MASK; + unsigned input_slot = 0; ureg = ureg_create(TGSI_PROCESSOR_VERTEX); if (ureg == NULL) return 0; - const0 = ureg_DECL_constant(ureg); - const1 = ureg_DECL_constant(ureg); + const0 = ureg_DECL_constant(ureg, 0); + const1 = ureg_DECL_constant(ureg, 1); /* it has to be either a fill or a composite op */ debug_assert(is_fill ^ is_composite); - src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_POSITION, 0); + src = ureg_DECL_vs_input(ureg, input_slot++); dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); src = vs_normalize_coords(ureg, src, const0, const1); ureg_MOV(ureg, dst, src); - if (is_composite) { - src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_GENERIC, 1); + src = ureg_DECL_vs_input(ureg, input_slot++); dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1); ureg_MOV(ureg, dst, src); } + if (is_fill) { - src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_COLOR, 1); - dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1); + src = ureg_DECL_vs_input(ureg, input_slot++); + dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); ureg_MOV(ureg, dst, src); } if (has_mask) { - src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_GENERIC, 2); + src = ureg_DECL_vs_input(ureg, input_slot++); dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 2); ureg_MOV(ureg, dst, src); } @@ -318,8 +315,8 @@ create_fs(struct pipe_context *pipe, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_PERSPECTIVE); - } - if (is_fill) { + } else { + debug_assert(is_fill); if (is_solid) src_input = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, @@ -370,11 +367,11 @@ create_fs(struct pipe_context *pipe, else src = out; - coords = ureg_DECL_constant(ureg); - const0124 = ureg_DECL_constant(ureg); - matrow0 = ureg_DECL_constant(ureg); - matrow1 = ureg_DECL_constant(ureg); - matrow2 = ureg_DECL_constant(ureg); + coords = ureg_DECL_constant(ureg, 0); + const0124 = ureg_DECL_constant(ureg, 1); + matrow0 = ureg_DECL_constant(ureg, 2); + matrow1 = ureg_DECL_constant(ureg, 3); + matrow2 = ureg_DECL_constant(ureg, 4); if (is_lingrad) { linear_gradient(ureg, src, @@ -473,9 +470,9 @@ struct xorg_shader xorg_shaders_get(struct xorg_shaders *sc, struct xorg_shader shader = {0}; void *vs, *fs; - vs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_VERTEX, + vs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_VERTEX, sc->vs_hash, vs_traits); - fs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_FRAGMENT, + fs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_FRAGMENT, sc->fs_hash, fs_traits); debug_assert(vs && fs); diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript index 6c00861f517..f9738110723 100644 --- a/src/gallium/winsys/drm/intel/dri/SConscript +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -12,8 +12,9 @@ drivers = [ trace, ] -env.SharedLibrary( +env.LoadableModule( target ='i915_dri.so', source = COMMON_GALLIUM_SOURCES, LIBS = drivers + mesa + auxiliaries + env['LIBS'], + SHLIBPREFIX = '', ) diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c index 5ca3ad9762d..ebd1b607b78 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c @@ -13,6 +13,7 @@ #define INTEL_BATCH_CLIPRECTS 0x2 #undef INTEL_RUN_SYNC +#undef INTEL_MAP_BATCHBUFFER struct intel_drm_batchbuffer { @@ -40,8 +41,11 @@ intel_drm_batchbuffer_reset(struct intel_drm_batchbuffer *batch) "gallium3d_batchbuffer", batch->actual_size, 4096); + +#ifdef INTEL_MAP_BATCHBUFFER drm_intel_bo_map(batch->bo, TRUE); batch->base.map = batch->bo->virtual; +#endif memset(batch->base.map, 0, batch->actual_size); batch->base.ptr = batch->base.map; @@ -55,7 +59,13 @@ intel_drm_batchbuffer_create(struct intel_winsys *iws) struct intel_drm_winsys *idws = intel_drm_winsys(iws); struct intel_drm_batchbuffer *batch = CALLOC_STRUCT(intel_drm_batchbuffer); + batch->actual_size = idws->max_batch_size; + +#ifdef INTEL_MAP_BATCHBUFFER batch->base.map = NULL; +#else + batch->base.map = MALLOC(batch->actual_size); +#endif batch->base.ptr = NULL; batch->base.size = 0; @@ -64,8 +74,6 @@ intel_drm_batchbuffer_create(struct intel_winsys *iws) batch->base.iws = iws; - batch->actual_size = idws->max_batch_size; - intel_drm_batchbuffer_reset(batch); return &batch->base; @@ -156,7 +164,11 @@ intel_drm_batchbuffer_flush(struct intel_batchbuffer *ibatch, used = batch->base.ptr - batch->base.map; +#ifdef INTEL_MAP_BATCHBUFFER drm_intel_bo_unmap(batch->bo); +#else + drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); +#endif /* Do the sending to HW */ ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); @@ -202,7 +214,10 @@ intel_drm_batchbuffer_destroy(struct intel_batchbuffer *ibatch) if (batch->bo) drm_intel_bo_unreference(batch->bo); - free(batch); +#ifndef INTEL_MAP_BATCHBUFFER + FREE(batch->base.map); +#endif + FREE(batch); } void intel_drm_winsys_init_batchbuffer_functions(struct intel_drm_winsys *idws) diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c index e017cd2e982..0030f915a36 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c @@ -28,6 +28,7 @@ intel_drm_buffer_create(struct intel_winsys *iws, } else if (type == INTEL_NEW_VERTEX) { name = "gallium3d_vertex"; pool = idws->pools.gem; + buf->map_gtt = TRUE; } else if (type == INTEL_NEW_SCANOUT) { name = "gallium3d_scanout"; pool = idws->pools.gem; @@ -109,6 +110,18 @@ intel_drm_buffer_unmap(struct intel_winsys *iws, drm_intel_bo_unmap(intel_bo(buffer)); } +static int +intel_drm_buffer_write(struct intel_winsys *iws, + struct intel_buffer *buffer, + const void *data, + size_t size, + size_t offset) +{ + struct intel_drm_buffer *buf = intel_drm_buffer(buffer); + + return drm_intel_bo_subdata(buf->bo, offset, size, (void*)data); +} + static void intel_drm_buffer_destroy(struct intel_winsys *iws, struct intel_buffer *buffer) @@ -130,5 +143,6 @@ intel_drm_winsys_init_buffer_functions(struct intel_drm_winsys *idws) idws->base.buffer_set_fence_reg = intel_drm_buffer_set_fence_reg; idws->base.buffer_map = intel_drm_buffer_map; idws->base.buffer_unmap = intel_drm_buffer_unmap; + idws->base.buffer_write = intel_drm_buffer_write; idws->base.buffer_destroy = intel_drm_buffer_destroy; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 07551e7cd16..7bf23cba236 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -32,6 +32,8 @@ #include "radeon_buffer.h" +#include "radeon_bo_gem.h" + static const char *radeon_get_name(struct pipe_winsys *ws) { return "Radeon/GEM+KMS"; @@ -99,6 +101,7 @@ static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws, unsigned height, enum pipe_format format, unsigned usage, + unsigned tex_usage, unsigned *stride) { struct pipe_format_block block; @@ -134,8 +137,11 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, (struct radeon_pipe_buffer*)buffer; int write = 0; - if (!(flags & PIPE_BUFFER_USAGE_DONTBLOCK)) { - radeon_bo_wait(radeon_buffer->bo); + if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { + uint32_t domain; + + if (radeon_bo_is_busy(radeon_buffer->bo, &domain)) + return NULL; } if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) { write = 1; @@ -187,7 +193,6 @@ static void radeon_flush_frontbuffer(struct pipe_winsys *pipe_winsys, struct radeon_winsys* radeon_pipe_winsys(int fd) { struct radeon_winsys* radeon_ws; - struct radeon_bo_manager* bom; radeon_ws = CALLOC_STRUCT(radeon_winsys); if (radeon_ws == NULL) { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 47376a0f07b..a4011db0b87 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -98,7 +98,7 @@ struct pipe_buffer* radeon_buffer_from_handle(struct drm_api* api, return &radeon_buffer->base; } -struct pipe_texture* +static struct pipe_texture* radeon_texture_from_shared_handle(struct drm_api *api, struct pipe_screen *screen, struct pipe_texture *templ, @@ -116,20 +116,22 @@ radeon_texture_from_shared_handle(struct drm_api *api, return screen->texture_blanket(screen, templ, &stride, buffer); } -boolean radeon_shared_handle_from_texture(struct drm_api *api, - struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned *stride, - unsigned *handle) +static boolean radeon_shared_handle_from_texture(struct drm_api *api, + struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned *stride, + unsigned *handle) { int retval, fd; struct drm_gem_flink flink; struct radeon_pipe_buffer* radeon_buffer; - struct pipe_buffer* buffer = &radeon_buffer->base; - if (!radeon_buffer_from_texture(api, texture, buffer, stride)) { + struct pipe_buffer *buffer; + + if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { return FALSE; } + radeon_buffer = (struct radeon_pipe_buffer*)buffer; if (!radeon_buffer->flinked) { fd = ((struct radeon_winsys*)screen->winsys)->priv->fd; @@ -150,11 +152,11 @@ boolean radeon_shared_handle_from_texture(struct drm_api *api, return TRUE; } -boolean radeon_local_handle_from_texture(struct drm_api *api, - struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned *stride, - unsigned *handle) +static boolean radeon_local_handle_from_texture(struct drm_api *api, + struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned *stride, + unsigned *handle) { struct pipe_buffer *buffer; if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index d7238762219..d2d84f1a8f0 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -137,7 +137,7 @@ static void do_ioctls(struct r300_winsys* winsys, int fd) int target = 0; int retval; - info.value = ⌖ + info.value = (unsigned long)⌖ /* First, get the number of pixel pipes */ info.request = RADEON_INFO_NUM_GB_PIPES; diff --git a/src/gallium/winsys/g3dvl/xsp_winsys.c b/src/gallium/winsys/g3dvl/xsp_winsys.c index 698c2856a4f..37d60ce5406 100644 --- a/src/gallium/winsys/g3dvl/xsp_winsys.c +++ b/src/gallium/winsys/g3dvl/xsp_winsys.c @@ -105,6 +105,7 @@ static struct pipe_buffer* xsp_surface_buffer_create unsigned height, enum pipe_format format, unsigned usage, + unsigned tex_usage, unsigned *stride ) { diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 33826524d7a..66120a6a983 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -166,6 +166,7 @@ gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, unsigned usage, + unsigned tex_usage, unsigned *stride) { const unsigned alignment = 64; |