aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/r600/r700_assembler.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c')
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c149
1 files changed, 138 insertions, 11 deletions
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 99a33df4fcb..9c954cbf70c 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -275,7 +275,10 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
case 2:
format = FMT_8_8; break;
case 3:
- format = FMT_8_8_8; break;
+ /* for some (small/unaligned) strides using 4 comps works
+ * better, probably same as GL_SHORT below
+ * test piglit/draw-vertices */
+ format = FMT_8_8_8_8; break;
case 4:
format = FMT_8_8_8_8; break;
default:
@@ -2872,25 +2875,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
+ /*
+ * r600 - trunc to -PI..PI range
+ * r700 - normalize by dividing by 2PI
+ * see fdo bug 27901
+ */
+
int tmp;
checkop1(pAsm);
tmp = gethelpr(pAsm);
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
- next_ins(pAsm);
+ pAsm->C[1].f = 0.5f;
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if (pAsm->bR6xx)
+ {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ }
+ else
+ {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
pAsm->D.dst.opcode = opcode;
pAsm->D.dst.math = 1;
@@ -4030,22 +4100,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
checkop1(pAsm);
tmp = gethelpr(pAsm);
- /* tmp.x = src /2*PI */
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
+ pAsm->C[1].f = 0.5F;
- next_ins(pAsm);
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if(pAsm->bR6xx) {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ } else {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
// COS dst.x, a.x
pAsm->D.dst.opcode = SQ_OP2_INST_COS;
@@ -6473,7 +6600,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
* results are undefined anyway */
if(export_count == 0)
{
- Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
+ Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE);
}
if(pR700AsmCode->cf_last_export_ptr != NULL)