diff options
author | Brian Paul <[email protected]> | 2008-11-05 15:58:09 -0700 |
---|---|---|
committer | Brian Paul <[email protected]> | 2008-11-05 15:58:09 -0700 |
commit | 80a718a63bf2fa817e346f0f5731ee9ef2e0e68b (patch) | |
tree | 4a1bdb03ae8b8482b9a15e56c9604fcc34e95b0e /src/gallium/auxiliary/tgsi | |
parent | 7115b79b77e541f3eb81db00f6f0c34a0f224feb (diff) | |
parent | de14fdd63f26a2e6fc55fad92c08966f269601a6 (diff) |
Merge commit 'origin/gallium-0.1' into gallium-0.2
Conflicts:
src/gallium/auxiliary/rtasm/rtasm_execmem.c
src/mesa/shader/slang/slang_emit.c
src/mesa/shader/slang/slang_log.c
src/mesa/state_tracker/st_atom_framebuffer.c
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 25 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 35 |
3 files changed, 61 insertions, 3 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 1a5294eabc3..e32779123f6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -958,6 +958,10 @@ fetch_src_file_channel( switch( file ) { case TGSI_FILE_CONSTANT: assert(mach->Consts); + assert(index->i[0] >= 0); + assert(index->i[1] >= 0); + assert(index->i[2] >= 0); + assert(index->i[3] >= 0); chan->f[0] = mach->Consts[index->i[0]][swizzle]; chan->f[1] = mach->Consts[index->i[1]][swizzle]; chan->f[2] = mach->Consts[index->i[2]][swizzle]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index c4e649e69c4..fc40a25e09f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -165,6 +165,10 @@ struct tgsi_exec_labels #define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) #define TGSI_EXEC_TEMP_HALF_C 1 +/* execution mask, each value is either 0 or ~0 */ +#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3) +#define TGSI_EXEC_MASK_C 2 + #define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5) @@ -265,6 +269,27 @@ void tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach); +static INLINE void +tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask) +{ + mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = + mask; +} + + +/** Set execution mask values prior to executing the shader */ +static INLINE void +tgsi_set_exec_mask(struct tgsi_exec_machine *mach, + boolean ch0, boolean ch1, boolean ch2, boolean ch3) +{ + int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i; + mask[0] = ch0 ? ~0 : 0; + mask[1] = ch1 ? ~0 : 0; + mask[2] = ch2 ? ~0 : 0; + mask[3] = ch3 ? ~0 : 0; +} + + #if defined __cplusplus } /* extern "C" */ #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 47e52c84247..3df0c5db3fa 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -72,6 +72,9 @@ #define TEMP_R0 TGSI_EXEC_TEMP_R0 #define TEMP_ADDR TGSI_EXEC_TEMP_ADDR +#define TEMP_EXEC_MASK_I TGSI_EXEC_MASK_I +#define TEMP_EXEC_MASK_C TGSI_EXEC_MASK_C + /** * X86 utility functions. @@ -233,6 +236,9 @@ emit_const( int indirectIndex ) { if (indirect) { + /* 'vec' is the offset from the address register's value. + * We're loading CONST[ADDR+vec] into an xmm register. + */ struct x86_reg r0 = get_input_base(); struct x86_reg r1 = get_output_base(); uint i; @@ -243,18 +249,40 @@ emit_const( x86_push( func, r0 ); x86_push( func, r1 ); + /* + * Loop over the four pixels or vertices in the quad. + * Get the value of the address (offset) register for pixel/vertex[i], + * add it to the src offset and index into the constant buffer. + * Note that we're working on SOA data. + * If any of the pixel/vertex execution channels are unused their + * values will be garbage. It's very important that we don't use + * those garbage values as indexes into the constant buffer since + * that'll cause segfaults. + * The solution is to bitwise-AND the offset with the execution mask + * register whose values are either 0 or ~0. + * The caller must setup the execution mask register to indicate + * which channels are valid/alive before running the shader. + * The execution mask will also figure into loops and conditionals + * someday. + */ for (i = 0; i < QUAD_SIZE; i++) { - x86_lea( func, r0, get_const( vec, chan ) ); + /* r1 = address register[i] */ x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) ); + /* r0 = execution mask[i] */ + x86_mov( func, r0, x86_make_disp( get_temp( TEMP_EXEC_MASK_I, TEMP_EXEC_MASK_C ), i * 4 ) ); + /* r1 = r1 & r0 */ + x86_and( func, r1, r0 ); + /* r0 = 'vec', the offset */ + x86_lea( func, r0, get_const( vec, chan ) ); - /* Quick hack to multiply by 16 -- need to add SHL to rtasm. + /* Quick hack to multiply r1 by 16 -- need to add SHL to rtasm. */ x86_add( func, r1, r1 ); x86_add( func, r1, r1 ); x86_add( func, r1, r1 ); x86_add( func, r1, r1 ); - x86_add( func, r0, r1 ); + x86_add( func, r0, r1 ); /* r0 = r0 + r1 */ x86_mov( func, r1, x86_deref( r0 ) ); x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 ); } @@ -268,6 +296,7 @@ emit_const( get_temp( TEMP_R0, CHAN_X ) ); } else { + /* 'vec' is the index into the src register file, such as TEMP[vec] */ assert( vec >= 0 ); sse_movss( |