diff options
-rw-r--r-- | src/mesa/drivers/dri/r300/r500_fragprog.c | 89 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_program_pair.c | 16 |
2 files changed, 79 insertions, 26 deletions
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index c78deab2ace..3fbdb30acff 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -269,44 +269,87 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) GLuint relevant; int i; - if (reg.Abs) + if (opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP || + opcode == OPCODE_KIL) { + if (reg.Abs) + return GL_FALSE; + + if (reg.NegateAbs) + reg.NegateBase ^= 15; + + if (opcode == OPCODE_KIL) { + if (reg.Swizzle != SWIZZLE_NOOP) + return GL_FALSE; + } else { + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz == SWIZZLE_NIL) { + reg.NegateBase &= ~(1 << i); + continue; + } + if (swz >= 4) + return GL_FALSE; + } + } + + if (reg.NegateBase) + return GL_FALSE; + return GL_TRUE; + } else { + /* ALU instructions support almost everything */ + if (reg.Abs) + return GL_TRUE; - relevant = 0; - for(i = 0; i < 3; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) - relevant |= 1 << i; - } - if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant)) - return GL_FALSE; + relevant = 0; + for(i = 0; i < 3; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant)) + return GL_FALSE; - return GL_TRUE; + return GL_TRUE; + } } /** - * Implement a non-native swizzle. This function assumes that - * is_native_swizzle returned true. + * Implement a MOV with a potentially non-native swizzle. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. Therefore, we split the MOV into two instructions when necessary. */ static void nqssadce_build_swizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { struct prog_instruction *inst; + GLuint negatebase[2] = { 0, 0 }; + int i; - _mesa_insert_instructions(s->Program, s->IP, 2); - inst = s->Program->Instructions + s->IP; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(src.Swizzle, i); + if (swz == SWIZZLE_NIL) + continue; + negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i; + } - inst[0].Opcode = OPCODE_MOV; - inst[0].DstReg = dst; - inst[0].DstReg.WriteMask &= src.NegateBase; - inst[0].SrcReg[0] = src; + _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); + inst = s->Program->Instructions + s->IP; - inst[1].Opcode = OPCODE_MOV; - inst[1].DstReg = dst; - inst[1].DstReg.WriteMask &= ~src.NegateBase; - inst[1].SrcReg[0] = src; + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; - s->IP += 2; + inst->Opcode = OPCODE_MOV; + inst->DstReg = dst; + inst->DstReg.WriteMask = negatebase[i]; + inst->SrcReg[0] = src; + inst++; + s->IP++; + } } static GLuint build_dtm(GLuint depthmode) diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c index 8762422801e..4307994d74c 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -265,11 +265,21 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) inst->SrcReg[0] = tmp; break; case OPCODE_MOV: - inst->SrcReg[1] = inst->SrcReg[0]; + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; inst->SrcReg[2].File = PROGRAM_BUILTIN; inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_CMP; - // TODO: disable output modifiers on R500 + inst->Opcode = OPCODE_MAD; break; case OPCODE_MUL: inst->SrcReg[2].File = PROGRAM_BUILTIN; |