diff options
author | Stéphane Marchesin <[email protected]> | 2011-09-22 19:24:07 -0700 |
---|---|---|
committer | Stéphane Marchesin <[email protected]> | 2011-09-22 19:26:33 -0700 |
commit | e3c94fac4eb159f8c35798d1ad7515a40f5a2eca (patch) | |
tree | e330a298fae61b1dbb39f403394c372d42f5b204 /src/gallium/drivers | |
parent | 79a0499369470a2a9b2cb5cfc83b1790283f4556 (diff) |
i915g: Don't generate useless swizzles before texture accesses.
That helps reduce the number of texture indirections, which are very limited on i915.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc_emit.c | 34 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc_translate.c | 33 |
3 files changed, 62 insertions, 10 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index 41bf5161b0b..26cf2fbbe10 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -169,7 +169,10 @@ extern void i915_release_utemps(struct i915_fp_compile *p); extern uint i915_emit_texld(struct i915_fp_compile *p, uint dest, uint destmask, - uint sampler, uint coord, uint op); + uint sampler, + uint coord, + uint op, + uint num_coord); extern uint i915_emit_arith(struct i915_fp_compile *p, uint op, diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c index c4a42df7882..d2953962329 100644 --- a/src/gallium/drivers/i915/i915_fpc_emit.c +++ b/src/gallium/drivers/i915/i915_fpc_emit.c @@ -216,16 +216,36 @@ i915_emit_arith(struct i915_fp_compile * p, * \param opcode the instruction opcode */ uint i915_emit_texld( struct i915_fp_compile *p, - uint dest, - uint destmask, - uint sampler, - uint coord, - uint opcode ) + uint dest, + uint destmask, + uint sampler, + uint coord, + uint opcode, + uint num_coord ) { const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + int temp = -1; + uint ignore = 0; + + /* Eliminate the useless texture coordinates. Otherwise we end up generating + * a swizzle for no reason below. */ + switch(num_coord) { + case 0: + /* Ignore x */ + ignore |= (0xf << UREG_CHANNEL_X_SHIFT); + case 1: + /* Ignore y */ + ignore |= (0xf << UREG_CHANNEL_Y_SHIFT); + case 2: + /* Ignore z */ + ignore |= (0xf << UREG_CHANNEL_Z_SHIFT); + case 3: + /* Ignore w */ + ignore |= (0xf << UREG_CHANNEL_W_SHIFT); + } - if (coord != k) { + if ( (coord &~ignore ) != (k & ~ignore) ) { /* texcoord is swizzled or negated. Need to allocate a new temporary * register (a utemp / unpreserved temp) won't do. */ @@ -248,7 +268,7 @@ uint i915_emit_texld( struct i915_fp_compile *p, if (destmask != A0_DEST_CHANNEL_ALL) { /* if not writing to XYZW... */ uint tmp = i915_get_utemp(p); - i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode, num_coord ); i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); /* XXX release utemp here? */ } diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 641ab3c8107..b383a7476b8 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -372,6 +372,33 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) } } +/** + * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token + */ +static uint +texture_num_coords(struct i915_fp_compile *p, uint tex) +{ + switch (tex) { + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_1D: + return 1; + + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_RECT: + return 2; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + return 3; + + default: + i915_program_error(p, "Num coords"); + return 2; + } +} + /** * Generate texel lookup instruction. @@ -393,7 +420,8 @@ emit_tex(struct i915_fp_compile *p, get_result_flags( inst ), sampler, coord, - opcode); + opcode, + texture_num_coords(p, texture) ); } @@ -622,7 +650,8 @@ i915_translate_instruction(struct i915_fp_compile *p, A0_DEST_CHANNEL_ALL, /* dest writemask */ 0, /* sampler */ src0, /* coord*/ - T0_TEXKILL); /* opcode */ + T0_TEXKILL, /* opcode */ + 1); /* num_coord */ break; case TGSI_OPCODE_KILP: |