diff options
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 342 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 11 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ppc.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sanity.c | 15 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 64 |
5 files changed, 245 insertions, 190 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index f43233bdb49..2bcb33392a8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1509,7 +1509,8 @@ fetch_texel( struct tgsi_sampler *sampler, const union tgsi_exec_channel *s, const union tgsi_exec_channel *t, const union tgsi_exec_channel *p, - float lodbias, /* XXX should be float[4] */ + const union tgsi_exec_channel *c0, + enum tgsi_sampler_control control, union tgsi_exec_channel *r, union tgsi_exec_channel *g, union tgsi_exec_channel *b, @@ -1518,7 +1519,7 @@ fetch_texel( struct tgsi_sampler *sampler, uint j; float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); + sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba); for (j = 0; j < 4; j++) { r->f[j] = rgba[0][j]; @@ -1529,102 +1530,95 @@ fetch_texel( struct tgsi_sampler *sampler, } +#define TEX_MODIFIER_NONE 0 +#define TEX_MODIFIER_PROJECTED 1 +#define TEX_MODIFIER_LOD_BIAS 2 +#define TEX_MODIFIER_EXPLICIT_LOD 3 + + static void exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod, - boolean projected) + uint modifier) { const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; + const union tgsi_exec_channel *lod = &ZeroVec; + enum tgsi_sampler_control control; uint chan_index; - float lodBias; - /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ + if (modifier != TEX_MODIFIER_NONE) { + FETCH(&r[3], 0, CHAN_W); + if (modifier != TEX_MODIFIER_PROJECTED) { + lod = &r[3]; + } + } + + if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + control = tgsi_sampler_lod_explicit; + } else { + control = tgsi_sampler_lod_bias; + } switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: - FETCH(&r[0], 0, CHAN_X); - if (projected) { - FETCH(&r[1], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[1] ); + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); } - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; - } - else - lodBias = 0.0; - fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: - FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); } - else - lodBias = 0.0; fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: - FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); } - else - lodBias = 0.0; fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], lod, + control, &r[0], &r[1], &r[2], &r[3]); break; default: - assert (0); + assert(0); } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); } } @@ -1647,8 +1641,9 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: @@ -1661,8 +1656,9 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[2], 0, CHAN_Z); fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ + &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; case TGSI_TEXTURE_3D: @@ -1673,7 +1669,8 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[2], 0, CHAN_Z); fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, + &r[0], &r[1], &r[2], &ZeroVec, + tgsi_sampler_lod_bias, &r[0], &r[1], &r[2], &r[3]); break; @@ -1918,6 +1915,130 @@ exec_vector_trinary(struct tgsi_exec_machine *mach, } static void +exec_dp3(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp4(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + for (chan = CHAN_Y; chan <= CHAN_W; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp2a(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], arg); + + fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dph(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], arg); + + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp2(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void exec_break(struct tgsi_exec_machine *mach) { if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { @@ -2397,54 +2518,11 @@ exec_instruction( break; case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp3(mach, inst); break; - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + case TGSI_OPCODE_DP4: + exec_dp4(mach, inst); break; case TGSI_OPCODE_DST: @@ -2540,21 +2618,7 @@ exec_instruction( break; case TGSI_OPCODE_DP2A: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[2], 2, CHAN_X ); - micro_add( &r[0], &r[0], &r[2] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2a(mach, inst); break; case TGSI_OPCODE_FRC: @@ -2654,30 +2718,7 @@ exec_instruction( break; case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 1, CHAN_W); - - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dph(mach, inst); break; case TGSI_OPCODE_COS: @@ -2801,14 +2842,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_NONE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS); break; case TGSI_OPCODE_TXD: @@ -2824,14 +2865,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); break; case TGSI_OPCODE_TXP: /* Texture lookup with projection */ /* src[0] = texcoord (src[0].w = projection) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, TRUE); + exec_tex(mach, inst, TEX_MODIFIER_PROJECTED); break; case TGSI_OPCODE_UP2H: @@ -3091,18 +3132,7 @@ exec_instruction( break; case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2(mach, inst); break; case TGSI_OPCODE_IF: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index aa3a98d7f18..59e3b445cc3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -35,11 +36,13 @@ extern "C" { #endif + #define MAX_LABELS (4 * 1024) /**< basically, max instructions */ #define NUM_CHANNELS 4 /* R,G,B,A */ #define QUAD_SIZE 4 /* 4 pixel/quad */ + /** * Registers may be treated as float, signed int or unsigned int. */ @@ -69,6 +72,11 @@ struct tgsi_interp_coef float dady[NUM_CHANNELS]; }; +enum tgsi_sampler_control { + tgsi_sampler_lod_bias, + tgsi_sampler_lod_explicit +}; + /** * Information for sampling textures, which must be implemented * by code outside the TGSI executor. @@ -80,7 +88,8 @@ struct tgsi_sampler const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 138d2d095bb..ad553c71a57 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -51,7 +51,8 @@ * Since it's pretty much impossible to form PPC vector immediates, load * them from memory here: */ -const float ppc_builtin_constants[] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) const float +ppc_builtin_constants[] = { 1.0f, -128.0f, 128.0, 0.0 }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 9b0644465af..7f1c8e5dd68 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -265,13 +265,15 @@ check_register_usage( } else { if (!is_register_declared( ctx, reg )) { - if (reg->dimensions == 2) + if (reg->dimensions == 2) { report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file], reg->indices[0], reg->indices[1], name ); - else + } + else { report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file], reg->indices[0], name ); } + } if (!is_register_used( ctx, reg )) cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg); else @@ -333,15 +335,15 @@ iter_instruction( fill_scan_register1d(ind_reg, inst->Src[i].Indirect.File, inst->Src[i].Indirect.Index); + if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || + reg->indices[0] != 0) { + report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); + } check_register_usage( ctx, reg, "indirect", FALSE ); - if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || - reg->indices[0] != 0) { - report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); - } } } @@ -513,6 +515,7 @@ regs_hash_destroy(struct cso_hash *hash) while (!cso_hash_iter_is_null(iter)) { scan_register *reg = (scan_register *)cso_hash_iter_data(iter); iter = cso_hash_erase(hash, iter); + assert(reg->file < TGSI_FILE_COUNT); FREE(reg); } cso_hash_delete(hash); diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 118059ace9c..a85cc4659e0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -1418,13 +1419,13 @@ fetch_texel( struct tgsi_sampler **sampler, sampler, *sampler, store ); - debug_printf("lodbias %f\n", store[12]); - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", + debug_printf("sample %d texcoord %f %f %f lodbias %f\n", j, store[0+j], - store[4+j]); + store[4+j], + store[8 + j], + store[12 + j]); #endif { @@ -1433,7 +1434,8 @@ fetch_texel( struct tgsi_sampler **sampler, &store[0], /* s */ &store[4], /* t */ &store[8], /* r */ - store[12], /* lodbias */ + &store[12], /* lodbias */ + tgsi_sampler_lod_bias, rgba); /* results */ memcpy( store, rgba, 16 * sizeof(float)); @@ -2144,40 +2146,50 @@ emit_instruction( break; case TGSI_OPCODE_XPD: + /* Note: we do all stores after all operands have been fetched + * to avoid src/dst register aliasing issues for an instruction + * such as: XPD TEMP[2].xyz, TEMP[0], TEMP[2]; + */ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 1, 1, CHAN_Z ); - FETCH( func, *inst, 3, 0, CHAN_Z ); + FETCH( func, *inst, 1, 1, CHAN_Z ); /* xmm[1] = src[1].z */ + FETCH( func, *inst, 3, 0, CHAN_Z ); /* xmm[3] = src[0].z */ } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 4, 1, CHAN_Y ); + FETCH( func, *inst, 0, 0, CHAN_Y ); /* xmm[0] = src[0].y */ + FETCH( func, *inst, 4, 1, CHAN_Y ); /* xmm[4] = src[1].y */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_MOV( func, 2, 0 ); - emit_mul( func, 2, 1 ); - emit_MOV( func, 5, 3 ); - emit_mul( func, 5, 4 ); - emit_sub( func, 2, 5 ); - STORE( func, *inst, 2, 0, CHAN_X ); + emit_MOV( func, 7, 0 ); /* xmm[7] = xmm[0] */ + emit_mul( func, 7, 1 ); /* xmm[7] = xmm[2] * xmm[1] */ + emit_MOV( func, 5, 3 ); /* xmm[5] = xmm[3] */ + emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */ + emit_sub( func, 7, 5 ); /* xmm[7] = xmm[2] - xmm[5] */ + /* store xmm[7] in dst.x below */ } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 2, 1, CHAN_X ); - FETCH( func, *inst, 5, 0, CHAN_X ); + FETCH( func, *inst, 2, 1, CHAN_X ); /* xmm[2] = src[1].x */ + FETCH( func, *inst, 5, 0, CHAN_X ); /* xmm[5] = src[0].x */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - emit_mul( func, 3, 2 ); - emit_mul( func, 1, 5 ); - emit_sub( func, 3, 1 ); - STORE( func, *inst, 3, 0, CHAN_Y ); + emit_mul( func, 3, 2 ); /* xmm[3] = xmm[3] * xmm[2] */ + emit_mul( func, 1, 5 ); /* xmm[1] = xmm[1] * xmm[5] */ + emit_sub( func, 3, 1 ); /* xmm[3] = xmm[3] - xmm[1] */ + /* store xmm[3] in dst.y below */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_mul( func, 5, 4 ); - emit_mul( func, 0, 2 ); - emit_sub( func, 5, 0 ); - STORE( func, *inst, 5, 0, CHAN_Z ); + emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */ + emit_mul( func, 0, 2 ); /* xmm[0] = xmm[0] * xmm[2] */ + emit_sub( func, 5, 0 ); /* xmm[5] = xmm[5] - xmm[0] */ + STORE( func, *inst, 5, 0, CHAN_Z ); /* dst.z = xmm[5] */ + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + STORE( func, *inst, 7, 0, CHAN_X ); /* dst.x = xmm[7] */ + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + STORE( func, *inst, 3, 0, CHAN_Y ); /* dst.y = xmm[3] */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { emit_tempf( @@ -2506,7 +2518,7 @@ emit_instruction( break; case TGSI_OPCODE_TXL: - emit_tex( func, inst, TRUE, FALSE ); + return 0; break; case TGSI_OPCODE_TXP: |