diff options
Diffstat (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c')
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.c | 149 |
1 files changed, 138 insertions, 11 deletions
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 99a33df4fcb..9c954cbf70c 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -275,7 +275,10 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) case 2: format = FMT_8_8; break; case 3: - format = FMT_8_8_8; break; + /* for some (small/unaligned) strides using 4 comps works + * better, probably same as GL_SHORT below + * test piglit/draw-vertices */ + format = FMT_8_8_8_8; break; case 4: format = FMT_8_8_8_8; break; default: @@ -2872,25 +2875,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { + /* + * r600 - trunc to -PI..PI range + * r700 - normalize by dividing by 2PI + * see fdo bug 27901 + */ + int tmp; checkop1(pAsm); tmp = gethelpr(pAsm); - pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + pAsm->D2.dst2.literal_slots = 1; pAsm->C[0].f = 1/(3.1415926535 * 2); - pAsm->C[1].f = 0.0F; - next_ins(pAsm); + pAsm->C[1].f = 0.5f; + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + pAsm->D2.dst2.literal_slots = 1; + + if (pAsm->bR6xx) + { + pAsm->C[0].f = 3.1415926535897f * 2.0f; + pAsm->C[1].f = -3.1415926535897f; + } + else + { + pAsm->C[0].f = 1.0f; + pAsm->C[1].f = -0.5f; + } + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; @@ -4030,22 +4100,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) checkop1(pAsm); tmp = gethelpr(pAsm); - /* tmp.x = src /2*PI */ - pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + pAsm->D2.dst2.literal_slots = 1; pAsm->C[0].f = 1/(3.1415926535 * 2); - pAsm->C[1].f = 0.0F; + pAsm->C[1].f = 0.5F; - next_ins(pAsm); + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + pAsm->D2.dst2.literal_slots = 1; + + if(pAsm->bR6xx) { + pAsm->C[0].f = 3.1415926535897f * 2.0f; + pAsm->C[1].f = -3.1415926535897f; + } else { + pAsm->C[0].f = 1.0f; + pAsm->C[1].f = -0.5f; + } + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } // COS dst.x, a.x pAsm->D.dst.opcode = SQ_OP2_INST_COS; @@ -6473,7 +6600,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, * results are undefined anyway */ if(export_count == 0) { - Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE); + Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE); } if(pR700AsmCode->cf_last_export_ptr != NULL) |