diff options
author | Keith Whitwell <[email protected]> | 2009-07-16 11:01:19 +0100 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2009-07-16 11:37:52 +0100 |
commit | 3858f47994dd39b725533cf3cf1a495fbc0ee6e3 (patch) | |
tree | ed1be59fffc226b7674eee95349923a07c81f625 /src | |
parent | a01383ff6ac87295a2c2bda45ddfac35530b0055 (diff) |
tgsi: initial texturing support on sse path
Most obvious problem is drawpixels comes out blocky, but this may be
an existing issue of KIL on the sse path.
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 202 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_fs_sse.c | 1 |
2 files changed, 184 insertions, 19 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 5928f874a97..e43bee00a98 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -101,7 +101,7 @@ get_const_base( void ) { return x86_make_reg( file_REG32, - reg_CX ); + reg_AX ); } static struct x86_reg @@ -109,7 +109,7 @@ get_machine_base( void ) { return x86_make_reg( file_REG32, - reg_AX ); + reg_CX ); } static struct x86_reg @@ -145,6 +145,14 @@ get_coef_base( void ) } static struct x86_reg +get_sampler_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + +static struct x86_reg get_immediate_base( void ) { return x86_make_reg( @@ -179,6 +187,15 @@ get_const( } static struct x86_reg +get_sampler_ptr( + unsigned unit ) +{ + return x86_make_disp( + get_sampler_base(), + unit * sizeof( struct tgsi_sampler * ) ); +} + +static struct x86_reg get_input( unsigned vec, unsigned chan ) @@ -1222,6 +1239,12 @@ emit_sub( make_xmm( xmm_src ) ); } + + + + + + /** * Register fetch. */ @@ -1380,11 +1403,156 @@ emit_store( #define STORE( FUNC, INST, XMM, INDEX, CHAN )\ emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + +static void PIPE_CDECL +fetch_texel( struct tgsi_sampler **sampler, + float *store ) +{ +#if 0 + uint j; + + debug_printf("%s sampler: %p (%p) store: %p\n", + __FUNCTION__, + sampler, *sampler, + store ); + + debug_printf("lodbias %f\n", store[12]); + + for (j = 0; j < 4; j++) + debug_printf("sample %d texcoord %f %f\n", + j, + store[0+j], + store[4+j]); +#endif + + { + float rgba[NUM_CHANNELS][QUAD_SIZE]; + (*sampler)->get_samples(*sampler, + &store[0], + &store[4], + &store[8], + 0.0f, /*store[12], lodbias */ + rgba); + + memcpy( store, rgba, 16 * sizeof(float)); + } + +#if 0 + for (j = 0; j < 4; j++) + debug_printf("sample %d result %f %f %f %f\n", + j, + store[0+j], + store[4+j], + store[8+j], + store[12+j]); +#endif +} + /** * High-level instruction translators. */ static void +emit_tex( struct x86_function *func, + const struct tgsi_full_instruction *inst, + boolean lodbias, + boolean projected) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + struct x86_reg args[2]; + unsigned count; + unsigned i; + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + count = 1; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + count = 2; + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + count = 3; + break; + default: + assert(0); + return; + } + + if (lodbias) { + FETCH( func, *inst, 3, 0, 3 ); + } + else { + emit_tempf( + func, + 3, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + + } + + /* store lodbias whether enabled or not -- fetch_texel currently + * respects it always. + */ + sse_movaps( func, + get_temp( TEMP_R0, 3 ), + make_xmm( 3 ) ); + + + if (projected) { + FETCH( func, *inst, 3, 0, 3 ); + + emit_rcp( func, 3, 3 ); + } + + for (i = 0; i < count; i++) { + FETCH( func, *inst, i, 0, i ); + + if (projected) { + sse_mulps( + func, + make_xmm( i ), + make_xmm( 3 ) ); + } + + /* Store in the argument buffer: + */ + sse_movaps( + func, + get_temp( TEMP_R0, i ), + make_xmm( i ) ); + } + + args[0] = get_temp( TEMP_R0, 0 ); + args[1] = get_sampler_ptr( unit ); + + + emit_func_call( func, + 0, + args, + Elements(args), + fetch_texel ); + + /* If all four channels are enabled, could use a pointer to + * dst[0].x instead of TEMP_R0 for store? + */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, i ) { + + sse_movaps( + func, + make_xmm( 0 ), + get_temp( TEMP_R0, i ) ); + + STORE( func, *inst, 0, 0, i ); + } +} + + +static void emit_kil( struct x86_function *func, const struct tgsi_full_src_register *reg ) @@ -2168,21 +2336,7 @@ emit_instruction( break; case TGSI_OPCODE_TEX: - if (0) { - /* Disable dummy texture code: - */ - emit_tempf( - func, - 0, - TEMP_ONE_I, - TEMP_ONE_C ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - } - else { - return 0; - } + emit_tex( func, inst, FALSE, FALSE ); break; case TGSI_OPCODE_TXD: @@ -2280,7 +2434,7 @@ emit_instruction( break; case TGSI_OPCODE_TXB: - return 0; + emit_tex( func, inst, TRUE, FALSE ); break; case TGSI_OPCODE_NRM: @@ -2388,9 +2542,13 @@ emit_instruction( break; case TGSI_OPCODE_TXL: - return 0; + emit_tex( func, inst, TRUE, FALSE ); break; + case TGSI_OPCODE_TXP: + emit_tex( func, inst, FALSE, TRUE ); + break; + case TGSI_OPCODE_BRK: return 0; break; @@ -2758,6 +2916,12 @@ tgsi_emit_sse2( func, get_coef_base(), x86_fn_arg( func, 4 ) ); + + x86_mov( + func, + get_sampler_base(), + x86_make_disp( get_machine_base(), + Offset( struct tgsi_exec_machine, Samplers ) ) ); } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index f9362efcb71..f4fa0905d74 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -72,6 +72,7 @@ fs_sse_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers ) { + machine->Samplers = samplers; } |