summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
authorBrian Paul <[email protected]>2008-11-05 15:58:09 -0700
committerBrian Paul <[email protected]>2008-11-05 15:58:09 -0700
commit80a718a63bf2fa817e346f0f5731ee9ef2e0e68b (patch)
tree4a1bdb03ae8b8482b9a15e56c9604fcc34e95b0e /src/gallium/auxiliary/tgsi
parent7115b79b77e541f3eb81db00f6f0c34a0f224feb (diff)
parentde14fdd63f26a2e6fc55fad92c08966f269601a6 (diff)
Merge commit 'origin/gallium-0.1' into gallium-0.2
Conflicts: src/gallium/auxiliary/rtasm/rtasm_execmem.c src/mesa/shader/slang/slang_emit.c src/mesa/shader/slang/slang_log.c src/mesa/state_tracker/st_atom_framebuffer.c
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h25
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c35
3 files changed, 61 insertions, 3 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 1a5294eabc3..e32779123f6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -958,6 +958,10 @@ fetch_src_file_channel(
switch( file ) {
case TGSI_FILE_CONSTANT:
assert(mach->Consts);
+ assert(index->i[0] >= 0);
+ assert(index->i[1] >= 0);
+ assert(index->i[2] >= 0);
+ assert(index->i[3] >= 0);
chan->f[0] = mach->Consts[index->i[0]][swizzle];
chan->f[1] = mach->Consts[index->i[1]][swizzle];
chan->f[2] = mach->Consts[index->i[2]][swizzle];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index c4e649e69c4..fc40a25e09f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -165,6 +165,10 @@ struct tgsi_exec_labels
#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3)
#define TGSI_EXEC_TEMP_HALF_C 1
+/* execution mask, each value is either 0 or ~0 */
+#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
+#define TGSI_EXEC_MASK_C 2
+
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5)
@@ -265,6 +269,27 @@ void
tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
+static INLINE void
+tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
+{
+ mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
+ mask;
+}
+
+
+/** Set execution mask values prior to executing the shader */
+static INLINE void
+tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
+ boolean ch0, boolean ch1, boolean ch2, boolean ch3)
+{
+ int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i;
+ mask[0] = ch0 ? ~0 : 0;
+ mask[1] = ch1 ? ~0 : 0;
+ mask[2] = ch2 ? ~0 : 0;
+ mask[3] = ch3 ? ~0 : 0;
+}
+
+
#if defined __cplusplus
} /* extern "C" */
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 47e52c84247..3df0c5db3fa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -72,6 +72,9 @@
#define TEMP_R0 TGSI_EXEC_TEMP_R0
#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
+#define TEMP_EXEC_MASK_I TGSI_EXEC_MASK_I
+#define TEMP_EXEC_MASK_C TGSI_EXEC_MASK_C
+
/**
* X86 utility functions.
@@ -233,6 +236,9 @@ emit_const(
int indirectIndex )
{
if (indirect) {
+ /* 'vec' is the offset from the address register's value.
+ * We're loading CONST[ADDR+vec] into an xmm register.
+ */
struct x86_reg r0 = get_input_base();
struct x86_reg r1 = get_output_base();
uint i;
@@ -243,18 +249,40 @@ emit_const(
x86_push( func, r0 );
x86_push( func, r1 );
+ /*
+ * Loop over the four pixels or vertices in the quad.
+ * Get the value of the address (offset) register for pixel/vertex[i],
+ * add it to the src offset and index into the constant buffer.
+ * Note that we're working on SOA data.
+ * If any of the pixel/vertex execution channels are unused their
+ * values will be garbage. It's very important that we don't use
+ * those garbage values as indexes into the constant buffer since
+ * that'll cause segfaults.
+ * The solution is to bitwise-AND the offset with the execution mask
+ * register whose values are either 0 or ~0.
+ * The caller must setup the execution mask register to indicate
+ * which channels are valid/alive before running the shader.
+ * The execution mask will also figure into loops and conditionals
+ * someday.
+ */
for (i = 0; i < QUAD_SIZE; i++) {
- x86_lea( func, r0, get_const( vec, chan ) );
+ /* r1 = address register[i] */
x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
+ /* r0 = execution mask[i] */
+ x86_mov( func, r0, x86_make_disp( get_temp( TEMP_EXEC_MASK_I, TEMP_EXEC_MASK_C ), i * 4 ) );
+ /* r1 = r1 & r0 */
+ x86_and( func, r1, r0 );
+ /* r0 = 'vec', the offset */
+ x86_lea( func, r0, get_const( vec, chan ) );
- /* Quick hack to multiply by 16 -- need to add SHL to rtasm.
+ /* Quick hack to multiply r1 by 16 -- need to add SHL to rtasm.
*/
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
x86_add( func, r1, r1 );
- x86_add( func, r0, r1 );
+ x86_add( func, r0, r1 ); /* r0 = r0 + r1 */
x86_mov( func, r1, x86_deref( r0 ) );
x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
}
@@ -268,6 +296,7 @@ emit_const(
get_temp( TEMP_R0, CHAN_X ) );
}
else {
+ /* 'vec' is the index into the src register file, such as TEMP[vec] */
assert( vec >= 0 );
sse_movss(