summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorAndre Maasikas <[email protected]>2010-08-02 15:11:22 +0300
committerAndre Maasikas <[email protected]>2010-08-02 15:11:22 +0300
commitd6a5f94ea4d03b05c434fcad125d1f9c50c638e8 (patch)
treeeb060fcf3977562f725cb918c5fbfeb599554e3a /src/mesa/drivers
parentc1f33097f4a6cd33df57dc601ba1733985979a4f (diff)
r600: fix sin,cos functions on r600
r600 doesnt need the same normalization as r700 - instead it requires range to be truncated to -pi..pi I left the range trunc also effective on r700 althouch according the docs it has sufficent range (-512*PI, +512*PI). The instructions seem to be used not too often to cause perf loss because of this Based on patches and testing by Conn Clark and Alain Perrot
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c142
1 files changed, 133 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 8f6cc1d8753..b555ea683c2 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -2872,25 +2872,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
+ /*
+ * r600 - trunc to -PI..PI range
+ * r700 - normalize by dividing by 2PI
+ * see fdo bug 27901
+ */
+
int tmp;
checkop1(pAsm);
tmp = gethelpr(pAsm);
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
- next_ins(pAsm);
+ pAsm->C[1].f = 0.5f;
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if (pAsm->bR6xx)
+ {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ }
+ else
+ {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
pAsm->D.dst.opcode = opcode;
pAsm->D.dst.math = 1;
@@ -4030,22 +4097,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
checkop1(pAsm);
tmp = gethelpr(pAsm);
- /* tmp.x = src /2*PI */
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
+ pAsm->C[1].f = 0.5F;
- next_ins(pAsm);
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if(pAsm->bR6xx) {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ } else {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
// COS dst.x, a.x
pAsm->D.dst.opcode = SQ_OP2_INST_COS;