summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/i915
diff options
context:
space:
mode:
authorStéphane Marchesin <[email protected]>2011-09-22 19:24:07 -0700
committerStéphane Marchesin <[email protected]>2011-09-22 19:26:33 -0700
commite3c94fac4eb159f8c35798d1ad7515a40f5a2eca (patch)
treee330a298fae61b1dbb39f403394c372d42f5b204 /src/gallium/drivers/i915
parent79a0499369470a2a9b2cb5cfc83b1790283f4556 (diff)
i915g: Don't generate useless swizzles before texture accesses.
That helps reduce the number of texture indirections, which are very limited on i915.
Diffstat (limited to 'src/gallium/drivers/i915')
-rw-r--r--src/gallium/drivers/i915/i915_fpc.h5
-rw-r--r--src/gallium/drivers/i915/i915_fpc_emit.c34
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c33
3 files changed, 62 insertions, 10 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h
index 41bf5161b0b..26cf2fbbe10 100644
--- a/src/gallium/drivers/i915/i915_fpc.h
+++ b/src/gallium/drivers/i915/i915_fpc.h
@@ -169,7 +169,10 @@ extern void i915_release_utemps(struct i915_fp_compile *p);
extern uint i915_emit_texld(struct i915_fp_compile *p,
uint dest,
uint destmask,
- uint sampler, uint coord, uint op);
+ uint sampler,
+ uint coord,
+ uint op,
+ uint num_coord);
extern uint i915_emit_arith(struct i915_fp_compile *p,
uint op,
diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c
index c4a42df7882..d2953962329 100644
--- a/src/gallium/drivers/i915/i915_fpc_emit.c
+++ b/src/gallium/drivers/i915/i915_fpc_emit.c
@@ -216,16 +216,36 @@ i915_emit_arith(struct i915_fp_compile * p,
* \param opcode the instruction opcode
*/
uint i915_emit_texld( struct i915_fp_compile *p,
- uint dest,
- uint destmask,
- uint sampler,
- uint coord,
- uint opcode )
+ uint dest,
+ uint destmask,
+ uint sampler,
+ uint coord,
+ uint opcode,
+ uint num_coord )
{
const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
+
int temp = -1;
+ uint ignore = 0;
+
+ /* Eliminate the useless texture coordinates. Otherwise we end up generating
+ * a swizzle for no reason below. */
+ switch(num_coord) {
+ case 0:
+ /* Ignore x */
+ ignore |= (0xf << UREG_CHANNEL_X_SHIFT);
+ case 1:
+ /* Ignore y */
+ ignore |= (0xf << UREG_CHANNEL_Y_SHIFT);
+ case 2:
+ /* Ignore z */
+ ignore |= (0xf << UREG_CHANNEL_Z_SHIFT);
+ case 3:
+ /* Ignore w */
+ ignore |= (0xf << UREG_CHANNEL_W_SHIFT);
+ }
- if (coord != k) {
+ if ( (coord &~ignore ) != (k & ~ignore) ) {
/* texcoord is swizzled or negated. Need to allocate a new temporary
* register (a utemp / unpreserved temp) won't do.
*/
@@ -248,7 +268,7 @@ uint i915_emit_texld( struct i915_fp_compile *p,
if (destmask != A0_DEST_CHANNEL_ALL) {
/* if not writing to XYZW... */
uint tmp = i915_get_utemp(p);
- i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode );
+ i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode, num_coord );
i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
/* XXX release utemp here? */
}
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 641ab3c8107..b383a7476b8 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -372,6 +372,33 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
}
}
+/**
+ * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
+ */
+static uint
+texture_num_coords(struct i915_fp_compile *p, uint tex)
+{
+ switch (tex) {
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_1D:
+ return 1;
+
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_RECT:
+ return 2;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ return 3;
+
+ default:
+ i915_program_error(p, "Num coords");
+ return 2;
+ }
+}
+
/**
* Generate texel lookup instruction.
@@ -393,7 +420,8 @@ emit_tex(struct i915_fp_compile *p,
get_result_flags( inst ),
sampler,
coord,
- opcode);
+ opcode,
+ texture_num_coords(p, texture) );
}
@@ -622,7 +650,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
A0_DEST_CHANNEL_ALL, /* dest writemask */
0, /* sampler */
src0, /* coord*/
- T0_TEXKILL); /* opcode */
+ T0_TEXKILL, /* opcode */
+ 1); /* num_coord */
break;
case TGSI_OPCODE_KILP: