From abe4f3d1aa68aec70d329447abc890b3eaaba9cb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 1 Jun 2010 16:27:36 +0100 Subject: tgsi: Determine which shader input channels are effectively. TGSI's UsageMask flag is never set. We can move this logic into tgsi_ureg, but there there are still cases where's not used, so this seems a better place for now. --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 32 ++++++-- src/gallium/auxiliary/tgsi/tgsi_scan.h | 1 + src/gallium/auxiliary/tgsi/tgsi_util.c | 146 +++++++++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_util.h | 5 ++ 4 files changed, 177 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index e0c5d3d3d61..7ee272b9498 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -36,6 +36,7 @@ #include "util/u_math.h" #include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "tgsi/tgsi_scan.h" @@ -84,19 +85,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens, { const struct tgsi_full_instruction *fullinst = &parse.FullToken.FullInstruction; + uint i; assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); info->opcode_count[fullinst->Instruction.Opcode]++; - /* check if we read the frag shader FOG or FACE inputs */ - if (procType == TGSI_PROCESSOR_FRAGMENT) { - uint i; - for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *src = - &fullinst->Src[i]; + for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *src = + &fullinst->Src[i]; + int ind = src->Register.Index; + + /* check if we read the frag shader FOG or FACE inputs */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { if (src->Register.File == TGSI_FILE_INPUT || src->Register.File == TGSI_FILE_SYSTEM_VALUE) { - const int ind = src->Register.Index; + assert(ind >= 0); if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { info->uses_fogcoord = TRUE; } @@ -105,6 +108,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } } + + /* Mark which inputs are effectively used */ + if (src->Register.File == TGSI_FILE_INPUT) { + unsigned usage_mask; + usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i); + if (src->Register.Indirect) { + for (ind = 0; ind < info->num_inputs; ++ind) { + info->input_usage_mask[ind] |= usage_mask; + } + } else { + assert(ind >= 0); + assert(ind < PIPE_MAX_SHADER_INPUTS); + info->input_usage_mask[ind] |= usage_mask; + } + } } info->num_instructions++; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 27de33f7990..f43541429d7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -45,6 +45,7 @@ struct tgsi_shader_info ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; + ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS]; ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 0a7e4105a80..3ec54964169 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -163,3 +163,149 @@ tgsi_util_set_full_src_register_sign_mode( assert( 0 ); } } + +/** + * Determine which channels of the specificed src register are effectively + * used by this instruction. + */ +unsigned +tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, + unsigned src_idx) +{ + const struct tgsi_full_src_register *src = &inst->Src[src_idx]; + unsigned write_mask = inst->Dst[0].Register.WriteMask; + unsigned read_mask; + unsigned usage_mask; + unsigned chan; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_ARL: + case TGSI_OPCODE_ARR: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_DIV: + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_LRP: + case TGSI_OPCODE_CND: + case TGSI_OPCODE_FRC: + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_CLAMP: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_ROUND: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SSG: + case TGSI_OPCODE_CMP: + case TGSI_OPCODE_TRUNC: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_SAD: + /* Channel-wise operations */ + read_mask = write_mask; + break; + + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_RCC: + read_mask = TGSI_WRITEMASK_X; + break; + + case TGSI_OPCODE_SCS: + read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0; + break; + + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LOG: + read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0; + break; + + case TGSI_OPCODE_DP2A: + read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY; + break; + + case TGSI_OPCODE_DP2: + read_mask = TGSI_WRITEMASK_XY; + break; + + case TGSI_OPCODE_DP3: + read_mask = TGSI_WRITEMASK_XYZ; + break; + + case TGSI_OPCODE_DP4: + read_mask = TGSI_WRITEMASK_XYZW; + break; + + case TGSI_OPCODE_DPH: + read_mask = src_idx == 0 ? TGSI_WRITEMASK_XYZ : TGSI_WRITEMASK_XYZW; + break; + + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + if (src_idx == 0) { + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + read_mask = TGSI_WRITEMASK_X; + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + read_mask = TGSI_WRITEMASK_XY; + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + read_mask = TGSI_WRITEMASK_XYZ; + break; + + default: + assert(0); + read_mask = 0; + } + + if (inst->Instruction.Opcode != TGSI_OPCODE_TEX) { + read_mask |= TGSI_WRITEMASK_W; + } + } else { + /* A safe approximation */ + read_mask = TGSI_WRITEMASK_XYZW; + } + break; + + default: + /* Assume all channels are read */ + read_mask = TGSI_WRITEMASK_XYZW; + break; + } + + usage_mask = 0; + for (chan = 0; chan < 4; ++chan) { + if (read_mask & (1 << chan)) { + usage_mask |= 1 << tgsi_util_get_full_src_register_swizzle(src, chan); + } + } + + return usage_mask; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 19ee2e7cf2a..04702ba9826 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -34,6 +34,7 @@ extern "C" { struct tgsi_src_register; struct tgsi_full_src_register; +struct tgsi_full_instruction; void * tgsi_align_128bit( @@ -71,6 +72,10 @@ tgsi_util_set_full_src_register_sign_mode( struct tgsi_full_src_register *reg, unsigned sign_mode ); +unsigned +tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, + unsigned src_idx); + #if defined __cplusplus } #endif -- cgit v1.2.3