From 5a0299875c7a4a9a0cb2cf55777c92c1b17d528b Mon Sep 17 00:00:00 2001 From: michal Date: Wed, 5 Nov 2008 11:58:11 +0100 Subject: draw: Implement TGSI_OPCODE_TRUNC. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/gallium/auxiliary/draw') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 87232865e23..a6880685db8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1632,6 +1632,17 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } +static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg tmp0 = aos_get_xmm_reg(cp); + + sse2_cvttps2dq(cp->func, tmp0, arg0); + sse2_cvtdq2ps(cp->func, tmp0, tmp0); + + store_dest(cp, &op->FullDstRegisters[0], tmp0); + return TRUE; +} static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { @@ -1770,6 +1781,9 @@ emit_instruction( struct aos_compilation *cp, case TGSI_OPCODE_SIN: return emit_SIN(cp, inst); + case TGSI_OPCODE_TRUNC: + return emit_TRUNC(cp, inst); + case TGSI_OPCODE_END: return TRUE; -- cgit v1.2.3 From fc3b361191c35d2b0b072c08e39b1e5b26d7e2a6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 08:57:11 -0700 Subject: gallium: disable some debug output --- src/gallium/auxiliary/draw/draw_vs_aos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/draw') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index a6880685db8..6141ba9cbf7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2190,7 +2190,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!vaos->buffer) goto fail; - debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); + if (0) + debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); #if 0 tgsi_dump(vs->state.tokens, 0); -- cgit v1.2.3 From 05a17f83b0a6549fde41540f9075505e81ab08d3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 08:58:40 -0700 Subject: gallium: added some debug code (disabled) --- src/gallium/auxiliary/draw/draw_pt.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/gallium/auxiliary/draw') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 87ec6ae20c2..b98c0a0ecf4 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -33,6 +33,8 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "tgsi/tgsi_dump.h" static unsigned trim( unsigned count, unsigned first, unsigned incr ) { @@ -195,6 +197,28 @@ draw_arrays(struct draw_context *draw, unsigned prim, draw->reduced_prim = reduced_prim; } +#if 0 + { + int i; + debug_printf("draw_arrays(prim=%u start=%u count=%u):\n", + prim, start, count); + tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); + debug_printf("Elements:\n"); + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + debug_printf(" format=%s comps=%u\n", + pf_name(draw->pt.vertex_element[i].src_format), + draw->pt.vertex_element[i].nr_components); + } + debug_printf("Buffers:\n"); + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + debug_printf(" pitch=%u offset=%u ptr=%p\n", + draw->pt.vertex_buffer[i].pitch, + draw->pt.vertex_buffer[i].buffer_offset, + draw->pt.user.vbuffer[i]); + } + } +#endif + /* drawing done here: */ draw_pt_arrays(draw, prim, start, count); } -- cgit v1.2.3 From f0debbb0bb951bfc6dc0ae467564b3b1230324cf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 14:02:07 -0700 Subject: gallium: call tgsi_set_exec_mask() and use exec mask in SSE ARL code This prevents vertex shaders from referencing invalid memory locations when the shader is operating on less than four vertices or fragments. --- src/gallium/auxiliary/draw/draw_vs_exec.c | 6 ++++++ src/gallium/auxiliary/draw/draw_vs_sse.c | 14 +++++++++++++ src/gallium/auxiliary/tgsi/tgsi_sse2.c | 35 ++++++++++++++++++++++++++++--- src/gallium/drivers/softpipe/sp_fs_sse.c | 3 ++- 4 files changed, 54 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary/draw') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 44563803f90..82d27d44934 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -120,6 +120,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, input = (const float (*)[4])((const char *)input + input_stride); } + tgsi_set_exec_mask(machine, + 1, + max_vertices > 1, + max_vertices > 2, + max_vertices > 3); + /* run interpreter */ tgsi_exec_machine_run( machine ); diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0efabd9de8b..77ba5152f9f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -99,9 +99,23 @@ vs_sse_run_linear( struct draw_vertex_shader *base, struct tgsi_exec_machine *machine = shader->machine; unsigned int i; + /* By default, execute all channels. XXX move this inside the loop + * below when we support shader conditionals/loops. + */ + tgsi_set_exec_mask(machine, 1, 1, 1, 1); + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + if (max_vertices < 4) { + /* disable the unused execution channels */ + tgsi_set_exec_mask(machine, + 1, + max_vertices > 1, + max_vertices > 2, + 0); + } + /* run compiled shader */ shader->func(machine->Inputs, diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index c115956c5d7..4d59106dbfa 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -69,6 +69,9 @@ #define TEMP_R0 TGSI_EXEC_TEMP_R0 #define TEMP_ADDR TGSI_EXEC_TEMP_ADDR +#define TEMP_EXEC_MASK_I TGSI_EXEC_MASK_I +#define TEMP_EXEC_MASK_C TGSI_EXEC_MASK_C + /** * X86 utility functions. @@ -230,6 +233,9 @@ emit_const( int indirectIndex ) { if (indirect) { + /* 'vec' is the offset from the address register's value. + * We're loading CONST[ADDR+vec] into an xmm register. + */ struct x86_reg r0 = get_input_base(); struct x86_reg r1 = get_output_base(); uint i; @@ -240,18 +246,40 @@ emit_const( x86_push( func, r0 ); x86_push( func, r1 ); + /* + * Loop over the four pixels or vertices in the quad. + * Get the value of the address (offset) register for pixel/vertex[i], + * add it to the src offset and index into the constant buffer. + * Note that we're working on SOA data. + * If any of the pixel/vertex execution channels are unused their + * values will be garbage. It's very important that we don't use + * those garbage values as indexes into the constant buffer since + * that'll cause segfaults. + * The solution is to bitwise-AND the offset with the execution mask + * register whose values are either 0 or ~0. + * The caller must setup the execution mask register to indicate + * which channels are valid/alive before running the shader. + * The execution mask will also figure into loops and conditionals + * someday. + */ for (i = 0; i < QUAD_SIZE; i++) { - x86_lea( func, r0, get_const( vec, chan ) ); + /* r1 = address register[i] */ x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) ); + /* r0 = execution mask[i] */ + x86_mov( func, r0, x86_make_disp( get_temp( TEMP_EXEC_MASK_I, TEMP_EXEC_MASK_C ), i * 4 ) ); + /* r1 = r1 & r0 */ + x86_and( func, r1, r0 ); + /* r0 = 'vec', the offset */ + x86_lea( func, r0, get_const( vec, chan ) ); - /* Quick hack to multiply by 16 -- need to add SHL to rtasm. + /* Quick hack to multiply r1 by 16 -- need to add SHL to rtasm. */ x86_add( func, r1, r1 ); x86_add( func, r1, r1 ); x86_add( func, r1, r1 ); x86_add( func, r1, r1 ); - x86_add( func, r0, r1 ); + x86_add( func, r0, r1 ); /* r0 = r0 + r1 */ x86_mov( func, r1, x86_deref( r0 ) ); x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 ); } @@ -265,6 +293,7 @@ emit_const( get_temp( TEMP_R0, CHAN_X ) ); } else { + /* 'vec' is the index into the src register file, such as TEMP[vec] */ assert( vec >= 0 ); sse_movss( diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 496ed43df26..50eb2c07bcb 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -92,7 +92,8 @@ fs_sse_run( const struct sp_fragment_shader *base, machine->Temps); /* init kill mask */ - machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = 0x0; + tgsi_set_kill_mask(machine, 0x0); + tgsi_set_exec_mask(machine, 1, 1, 1, 1); shader->func( machine->Inputs, machine->Outputs, -- cgit v1.2.3