summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h6
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog.c9
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog_emit.c1218
3 files changed, 458 insertions, 775 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index 58a19554c71..cd232c5b7b2 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -2705,6 +2705,7 @@ enum {
# define R500_ALPHA_OP_MDV 15
# define R500_ALPHA_ADDRD(x) (x << 4)
# define R500_ALPHA_ADDRD_REL (1 << 11)
+# define R500_ALPHA_SEL_A_SHIFT 12
# define R500_ALPHA_SEL_A_SRC0 (0 << 12)
# define R500_ALPHA_SEL_A_SRC1 (1 << 12)
# define R500_ALPHA_SEL_A_SRC2 (2 << 12)
@@ -2721,6 +2722,7 @@ enum {
# define R500_ALPHA_MOD_A_NEG (1 << 17)
# define R500_ALPHA_MOD_A_ABS (2 << 17)
# define R500_ALPHA_MOD_A_NAB (3 << 17)
+# define R500_ALPHA_SEL_B_SHIFT 19
# define R500_ALPHA_SEL_B_SRC0 (0 << 19)
# define R500_ALPHA_SEL_B_SRC1 (1 << 19)
# define R500_ALPHA_SEL_B_SRC2 (2 << 19)
@@ -2777,6 +2779,7 @@ enum {
# define R500_ALU_RGBA_OP_MDV (12 << 0)
# define R500_ALU_RGBA_ADDRD(x) (x << 4)
# define R500_ALU_RGBA_ADDRD_REL (1 << 11)
+# define R500_ALU_RGBA_SEL_C_SHIFT 12
# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12)
# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12)
# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12)
@@ -2809,6 +2812,7 @@ enum {
# define R500_ALU_RGBA_MOD_C_NEG (1 << 23)
# define R500_ALU_RGBA_MOD_C_ABS (2 << 23)
# define R500_ALU_RGBA_MOD_C_NAB (3 << 23)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25
# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25)
# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25)
# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25)
@@ -2826,6 +2830,7 @@ enum {
# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30)
# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30)
#define R500_US_ALU_RGB_INST_0 0xa000
+# define R500_ALU_RGB_SEL_A_SHIFT 0
# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0)
# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0)
# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0)
@@ -2858,6 +2863,7 @@ enum {
# define R500_ALU_RGB_MOD_A_NEG (1 << 11)
# define R500_ALU_RGB_MOD_A_ABS (2 << 11)
# define R500_ALU_RGB_MOD_A_NAB (3 << 11)
+# define R500_ALU_RGB_SEL_B_SHIFT 13
# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13)
# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13)
# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13)
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
index 62e06ea52c0..b46e924ac7b 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
@@ -27,6 +27,8 @@
#include "r500_fragprog.h"
+#include "radeon_program_alu.h"
+
/**
* Transform TEX, TXP, TXB, and KIL instructions in the following way:
@@ -316,11 +318,12 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
insert_WPOS_trailer(&compiler);
- struct radeon_program_transformation transformations[1] = {
- { &transform_TEX, &compiler }
+ struct radeon_program_transformation transformations[2] = {
+ { &transform_TEX, &compiler },
+ { &radeonTransformALU, 0 }
};
radeonLocalTransform(r300->radeon.glCtx, compiler.program,
- 1, transformations);
+ 2, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: after all transformations:\n");
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
index 67545cbb4f6..0e95c81e482 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
@@ -168,6 +168,12 @@ static const GLfloat LIT[] = {127.999999,
127.999999,
-127.999999};
+static const struct prog_dst_register dstreg_template = {
+ .File = PROGRAM_TEMPORARY,
+ .Index = 0,
+ .WriteMask = WRITEMASK_XYZW
+};
+
static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {
GLuint swiz = 0x0;
GLuint temp;
@@ -179,8 +185,14 @@ static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {
if (temp == 5) temp++;
swiz |= temp << i*3;
}
- if (src.NegateBase)
- swiz |= (R500_SWIZ_MOD_NEG << 9);
+ if (src.Abs) {
+ swiz |= R500_SWIZ_MOD_ABS << 9;
+ } else if (src.NegateBase & 7) {
+ ASSERT((src.NegateBase & 7) == 7);
+ swiz |= R500_SWIZ_MOD_NEG << 9;
+ }
+ if (src.NegateAbs)
+ swiz ^= R500_SWIZ_MOD_NEG << 9;
return swiz;
}
@@ -202,8 +214,13 @@ static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
if (swiz == 5) swiz++;
- if (src.NegateBase)
- swiz |= (R500_SWIZ_MOD_NEG << 3);
+ if (src.Abs) {
+ swiz |= R500_SWIZ_MOD_ABS << 3;
+ } else if (src.NegateBase & 8) {
+ swiz |= R500_SWIZ_MOD_NEG << 3;
+ }
+ if (src.NegateAbs)
+ swiz ^= R500_SWIZ_MOD_NEG << 3;
return swiz;
}
@@ -212,6 +229,15 @@ static INLINE GLuint make_sop_swizzle(struct prog_src_register src) {
GLuint swiz = GET_SWZ(src.Swizzle, 0);
if (swiz == 5) swiz++;
+
+ if (src.Abs) {
+ swiz |= R500_SWIZ_MOD_ABS << 3;
+ } else if (src.NegateBase & 1) {
+ swiz |= R500_SWIZ_MOD_NEG << 3;
+ }
+ if (src.NegateAbs)
+ swiz ^= R500_SWIZ_MOD_NEG << 3;
+
return swiz;
}
@@ -324,12 +350,23 @@ static GLuint make_dest(struct r500_pfs_compile_state *cs, struct prog_dst_regis
return reg;
}
-static void emit_tex(struct r500_pfs_compile_state *cs,
- struct prog_instruction *fpi, int dest, int counter)
+static int emit_slot(struct r500_pfs_compile_state *cs)
+{
+ if (cs->nrslots >= 512) {
+ ERROR("Too many instructions");
+ cs->nrslots = 1;
+ return 0;
+ }
+ return cs->nrslots++;
+}
+
+static int emit_tex(struct r500_pfs_compile_state *cs,
+ struct prog_instruction *fpi, int dest)
{
PROG_CODE;
int hwsrc, hwdest;
GLuint mask;
+ int counter = emit_slot(cs);
mask = fpi->DstReg.WriteMask << 11;
hwsrc = make_src(cs, fpi->SrcReg[0]);
@@ -399,844 +436,490 @@ static void emit_tex(struct r500_pfs_compile_state *cs,
| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
}
+
+ return counter;
}
-static void emit_alu(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi) {
+/* Do not call directly */
+static int _helper_emit_alu(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop,
+ int File, int Index, int WriteMask)
+{
PROG_CODE;
- /* Ideally, we shouldn't have to explicitly clear memory here! */
- code->inst[counter].inst0 = 0x0;
- code->inst[counter].inst1 = 0x0;
- code->inst[counter].inst2 = 0x0;
- code->inst[counter].inst3 = 0x0;
- code->inst[counter].inst4 = 0x0;
- code->inst[counter].inst5 = 0x0;
+ int counter = emit_slot(cs);
- if (fpi->DstReg.File == PROGRAM_OUTPUT) {
- code->inst[counter].inst0 = R500_INST_TYPE_OUT;
+ code->inst[counter].inst4 = alphaop;
+ code->inst[counter].inst5 = rgbop;
- if (fpi->DstReg.Index == FRAG_RESULT_COLR)
- code->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15);
+ if (File == PROGRAM_OUTPUT) {
+ code->inst[counter].inst0 = R500_INST_TYPE_OUT;
- if (fpi->DstReg.Index == FRAG_RESULT_DEPR) {
+ if (Index == FRAG_RESULT_COLR) {
+ code->inst[counter].inst0 |= WriteMask << 15;
+ } else if (Index == FRAG_RESULT_DEPR) {
code->inst[counter].inst4 |= R500_ALPHA_W_OMASK;
- /* Notify the state emission! */
cs->compiler->fp->writes_depth = GL_TRUE;
}
} else {
+ int dest = Index + code->temp_reg_offset;
+
code->inst[counter].inst0 = R500_INST_TYPE_ALU
- /* pixel_mask */
- | (fpi->DstReg.WriteMask << 11);
+ | (WriteMask << 11);
+ code->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest);
+ code->inst[counter].inst5 |= R500_ALU_RGBA_ADDRD(dest);
}
code->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
+
+ return counter;
}
-static void emit_mov(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) {
+/**
+ * Prepare an ALU slot with the given RGB operation, ALPHA operation, and
+ * destination register.
+ */
+static int emit_alu(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, struct prog_dst_register dst)
+{
+ return _helper_emit_alu(cs, rgbop, alphaop, dst.File, dst.Index, dst.WriteMask);
+}
+
+static int emit_alu_temp(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, int dst, int writemask)
+{
+ return _helper_emit_alu(cs, rgbop, alphaop,
+ PROGRAM_TEMPORARY, dst - cs->compiler->code->temp_reg_offset, writemask);
+}
+
+/**
+ * Set an instruction's source 0 (both RGB and ALPHA) to the given hardware index.
+ */
+static void set_src0_direct(struct r500_pfs_compile_state *cs, int ip, GLuint src)
+{
PROG_CODE;
- /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
- * it is technically more accurate and recommended by ATI/AMD. */
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
- /* (De)mangle the swizzle from Mesa to R500. */
- swizzle = make_rgba_swizzle(swizzle);
- /* 0x1FF is 9 bits, size of an RGB swizzle. */
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A((swizzle & 0x1ff))
- | R500_ALU_RGB_SEL_B_SRC0
- | MAKE_SWIZ_RGB_B((swizzle & 0x1ff))
- | R500_ALU_RGB_OMOD_DISABLE;
- code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3))
- | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3))
- | R500_ALPHA_OMOD_DISABLE;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
- | R500_ALU_RGBA_ADDRD(dest)
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
+ code->inst[ip].inst1 |= R500_RGB_ADDR0(src);
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR0(src);
}
-static void emit_mad(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int one, int two, int three) {
+/**
+ * Set an instruction's source 1 (both RGB and ALPHA) to the given hardware index.
+ */
+static void set_src1_direct(struct r500_pfs_compile_state *cs, int ip, GLuint src)
+{
PROG_CODE;
- /* Note: This code was all Corbin's. Corbin is a rather hackish coder.
- * If you can make it pretty or fast, please do so! */
- emit_alu(cs, counter, fpi);
- /* Common MAD stuff */
- code->inst[counter].inst4 |= R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(make_dest(cs, fpi->DstReg));
- code->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(make_dest(cs, fpi->DstReg));
- switch (one) {
- case 0:
- case 1:
- case 2:
- code->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(cs, fpi->SrcReg[one]));
- code->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(cs, fpi->SrcReg[one]));
- code->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one]));
- code->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0
- | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one]));
- break;
- case R500_SWIZZLE_ZERO:
- code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO);
- code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO);
- break;
- case R500_SWIZZLE_ONE:
- code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE);
- code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE);
- break;
- default:
- ERROR("Bad src index in emit_mad: %d\n", one);
- break;
- }
- switch (two) {
- case 0:
- case 1:
- case 2:
- code->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(cs, fpi->SrcReg[two]));
- code->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(cs, fpi->SrcReg[two]));
- code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
- | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two]));
- code->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1
- | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two]));
- break;
- case R500_SWIZZLE_ZERO:
- code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
- code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
- break;
- case R500_SWIZZLE_ONE:
- code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
- code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
- break;
- default:
- ERROR("Bad src index in emit_mad: %d\n", two);
- break;
- }
- switch (three) {
- case 0:
- case 1:
- case 2:
- code->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(cs, fpi->SrcReg[three]));
- code->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(cs, fpi->SrcReg[three]));
- code->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2
- | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three]))
- | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
- | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three]));
- break;
- case R500_SWIZZLE_ZERO:
- code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
- break;
- case R500_SWIZZLE_ONE:
- code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE);
- break;
- default:
- ERROR("Bad src index in emit_mad: %d\n", three);
- break;
- }
+ code->inst[ip].inst1 |= R500_RGB_ADDR1(src);
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR1(src);
}
-static void emit_sop(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int opcode, GLuint src, GLuint swiz, GLuint dest) {
+/**
+ * Set an instruction's source 2 (both RGB and ALPHA) to the given hardware index.
+ */
+static void set_src2_direct(struct r500_pfs_compile_state *cs, int ip, GLuint src)
+{
PROG_CODE;
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src);
- code->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(swiz);
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
- | R500_ALU_RGBA_ADDRD(dest);
- switch (opcode) {
- case OPCODE_COS:
- code->inst[counter].inst4 |= R500_ALPHA_OP_COS;
- break;
- case OPCODE_EX2:
- code->inst[counter].inst4 |= R500_ALPHA_OP_EX2;
- break;
- case OPCODE_LG2:
- code->inst[counter].inst4 |= R500_ALPHA_OP_LN2;
- break;
- case OPCODE_RCP:
- code->inst[counter].inst4 |= R500_ALPHA_OP_RCP;
- break;
- case OPCODE_RSQ:
- code->inst[counter].inst4 |= R500_ALPHA_OP_RSQ;
- break;
- case OPCODE_SIN:
- code->inst[counter].inst4 |= R500_ALPHA_OP_SIN;
- break;
- default:
- ERROR("Bad opcode in emit_sop: %d\n", opcode);
- break;
+ code->inst[ip].inst1 |= R500_RGB_ADDR2(src);
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR2(src);
+}
+
+/**
+ * Set an instruction's source 0 (both RGB and ALPHA) according to the given source register.
+ */
+static void set_src0(struct r500_pfs_compile_state *cs, int ip, struct prog_src_register srcreg)
+{
+ set_src0_direct(cs, ip, make_src(cs, srcreg));
+}
+
+/**
+ * Set an instruction's source 1 (both RGB and ALPHA) according to the given source register.
+ */
+static void set_src1(struct r500_pfs_compile_state *cs, int ip, struct prog_src_register srcreg)
+{
+ set_src1_direct(cs, ip, make_src(cs, srcreg));
+}
+
+/**
+ * Set an instruction's source 2 (both RGB and ALPHA) according to the given source register.
+ */
+static void set_src2(struct r500_pfs_compile_state *cs, int ip, struct prog_src_register srcreg)
+{
+ set_src2_direct(cs, ip, make_src(cs, srcreg));
+}
+
+/**
+ * Set an instruction's argument A (both RGB and ALPHA) from the given source,
+ * taking swizzles+neg+abs as specified (see also _reg version below).
+ */
+static void set_argA(struct r500_pfs_compile_state *cs, int ip, int source, GLuint swizRGB, GLuint swizA)
+{
+ PROG_CODE;
+ code->inst[ip].inst3 |= (source << R500_ALU_RGB_SEL_A_SHIFT) | MAKE_SWIZ_RGB_A(swizRGB);
+ code->inst[ip].inst4 |= (source << R500_ALPHA_SEL_A_SHIFT) | MAKE_SWIZ_ALPHA_A(swizA);
+}
+
+/**
+ * Set an instruction's argument B (both RGB and ALPHA) from the given source,
+ * taking swizzles+neg+abs as specified (see also _reg version below).
+ */
+static void set_argB(struct r500_pfs_compile_state *cs, int ip, int source, GLuint swizRGB, GLuint swizA)
+{
+ PROG_CODE;
+ code->inst[ip].inst3 |= (source << R500_ALU_RGB_SEL_B_SHIFT) | MAKE_SWIZ_RGB_B(swizRGB);
+ code->inst[ip].inst4 |= (source << R500_ALPHA_SEL_B_SHIFT) | MAKE_SWIZ_ALPHA_B(swizA);
+}
+
+/**
+ * Set an instruction's argument C (both RGB and ALPHA) from the given source,
+ * taking swizzles+neg+abs as specified (see also _reg version below).
+ */
+static void set_argC(struct r500_pfs_compile_state *cs, int ip, int source, GLuint swizRGB, GLuint swizA)
+{
+ PROG_CODE;
+ code->inst[ip].inst5 |=
+ (source << R500_ALU_RGBA_SEL_C_SHIFT) |
+ MAKE_SWIZ_RGBA_C(swizRGB) |
+ (source << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT) |
+ MAKE_SWIZ_ALPHA_C(swizA);
+}
+
+/**
+ * Set an instruction's argument A (both RGB and ALPHA) from the given source,
+ * taking swizzles, negation and absolute value from the given source register.
+ */
+static void set_argA_reg(struct r500_pfs_compile_state *cs, int ip, int source, struct prog_src_register srcreg)
+{
+ set_argA(cs, ip, source, make_rgb_swizzle(srcreg), make_alpha_swizzle(srcreg));
+}
+
+/**
+ * Set an instruction's argument B (both RGB and ALPHA) from the given source,
+ * taking swizzles, negation and absolute value from the given source register.
+ */
+static void set_argB_reg(struct r500_pfs_compile_state *cs, int ip, int source, struct prog_src_register srcreg)
+{
+ set_argB(cs, ip, source, make_rgb_swizzle(srcreg), make_alpha_swizzle(srcreg));
+}
+
+/**
+ * Set an instruction's argument C (both RGB and ALPHA) from the given source,
+ * taking swizzles, negation and absolute value from the given source register.
+ */
+static void set_argC_reg(struct r500_pfs_compile_state *cs, int ip, int source, struct prog_src_register srcreg)
+{
+ set_argC(cs, ip, source, make_rgb_swizzle(srcreg), make_alpha_swizzle(srcreg));
+}
+
+/**
+ * Emit a special scalar operation.
+ */
+static int emit_sop(struct r500_pfs_compile_state *cs,
+ int opcode, struct prog_dst_register dstreg, GLuint src, GLuint swiz)
+{
+ int ip = emit_alu(cs, R500_ALU_RGBA_OP_SOP, opcode, dstreg);
+ set_src0_direct(cs, ip, src);
+ set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, swiz);
+ return ip;
+}
+
+
+/**
+ * Emit trigonometric function COS, SIN, SCS
+ */
+static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi)
+{
+ int ip;
+ struct prog_dst_register temp = dstreg_template;
+ temp.Index = get_temp(cs, 0);
+ temp.WriteMask = WRITEMASK_W;
+
+ /* temp = Input*(1/2pi) */
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1_direct(cs, ip, emit_const4fv(cs, RCP_2PI));
+ set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, make_sop_swizzle(fpi->SrcReg[0]));
+ set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W);
+ set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
+
+ /* temp = frac(dst) */
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, temp);
+ set_src0_direct(cs, ip, temp.Index);
+ set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W);
+
+ /* Dest = trig(temp) */
+ if (fpi->Opcode == OPCODE_COS) {
+ emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W);
+ } else if (fpi->Opcode == OPCODE_SIN) {
+ emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W);
+ } else if (fpi->Opcode == OPCODE_SCS) {
+ struct prog_dst_register moddst = fpi->DstReg;
+
+ if (fpi->DstReg.WriteMask & WRITEMASK_X) {
+ moddst.WriteMask = WRITEMASK_X;
+ emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W);
+ }
+ if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+ moddst.WriteMask = WRITEMASK_Y;
+ emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W);
+ }
}
}
-static int do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi, int counter) {
+/**
+ * Emit a LIT instruction.
+ *
+ * Definition of LIT (from ARB_fragment_program):
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ */
+static void emit_lit(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi)
+{
+ GLuint cnst;
+ int needTemporary;
+ GLuint temp;
+ int ip;
+
+ cnst = emit_const4fv(cs, LIT);
+
+ needTemporary = 0;
+ if (fpi->DstReg.WriteMask != WRITEMASK_XYZW || fpi->DstReg.File == PROGRAM_OUTPUT)
+ needTemporary = 1;
+
+ if (needTemporary) {
+ temp = get_temp(cs, 0);
+ } else {
+ temp = fpi->DstReg.Index;
+ }
+
+ // MAX tmp.xyw, op0, { 0, 0, 0, -128+eps }
+ ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAX, R500_ALPHA_OP_MAX, temp, WRITEMASK_XYW);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1_direct(cs, ip, cnst);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W);
+
+ // MIN tmp.z, tmp.w, { 128-eps }
+ // LG2 tmp.w, tmp.y
+ ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MIN, R500_ALPHA_OP_LN2, temp, WRITEMASK_ZW);
+ set_src0_direct(cs, ip, temp);
+ set_src1_direct(cs, ip, cnst);
+ set_argA(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), SWIZZLE_Y);
+ set_argB(cs, ip, 1, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_X);
+
+ // MOV tmp.y, tmp.x
+ // MUL tmp.w, tmp.z, tmp.w
+ ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp, WRITEMASK_YW);
+ set_src0_direct(cs, ip, temp);
+ set_argA(cs, ip, 0, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_Z);
+ set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W);
+ set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
+
+ // MOV tmp.x, 1.0
+ // EX2 tmp.w, tmp.w
+ ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_EX2, temp, WRITEMASK_XW);
+ set_src0_direct(cs, ip, temp);
+ set_argA(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W);
+ set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ZERO);
+ set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
+
+ // tmp.z := (-tmp.x >= 0) ? tmp.y : 0.0
+ // MOV tmp.w, 1.0
+ ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, temp, WRITEMASK_ZW);
+ set_src0_direct(cs, ip, temp);
+ set_argA(cs, ip, 0, R500_SWIZZLE_ZERO, R500_SWIZZLE_ONE);
+ set_argB(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), R500_SWIZZLE_ONE);
+ set_argC(cs, ip, 0, SWIZZLE_Y | (SWIZZLE_Y<<3) | (SWIZZLE_Y<<6) | (R500_SWIZ_MOD_NEG<<9), R500_SWIZZLE_ZERO);
+
+ if (needTemporary) {
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg);
+ set_src0_direct(cs, ip, temp);
+ set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W);
+ set_argB(cs, ip, 1, R500_SWIZ_RGB_RGB, SWIZZLE_W);
+ set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
+ }
+}
+
+static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) {
PROG_CODE;
GLuint src[3], dest = 0;
- int temp_swiz = 0;
+ int ip;
if (fpi->Opcode != OPCODE_KIL) {
dest = make_dest(cs, fpi->DstReg);
}
switch (fpi->Opcode) {
- case OPCODE_ABS:
- emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
- code->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
- | R500_ALU_RGB_MOD_B_ABS;
- code->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
- | R500_ALPHA_MOD_B_ABS;
- break;
case OPCODE_ADD:
/* Variation on MAD: 1*src0+src1 */
- emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_argA(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ONE);
+ set_argB_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argC_reg(cs, ip, 1, fpi->SrcReg[1]);
break;
case OPCODE_CMP:
/* This inst's selects need to be swapped as follows:
* 0 -> C ; 1 -> B ; 2 -> A */
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- src[2] = make_src(cs, fpi->SrcReg[2]);
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
- | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
- | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
- | R500_ALU_RGBA_ADDRD(dest)
- | R500_ALU_RGBA_SEL_C_SRC2
- | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
- | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_src2(cs, ip, fpi->SrcReg[2]);
+ set_argA_reg(cs, ip, 2, fpi->SrcReg[2]);
+ set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);
+ set_argC_reg(cs, ip, 0, fpi->SrcReg[0]);
break;
case OPCODE_COS:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = emit_const4fv(cs, RCP_2PI);
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
- | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(get_temp(cs, 0))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
- | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
- counter++;
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
- code->inst[counter].inst4 = R500_ALPHA_OP_FRC
- | R500_ALPHA_ADDRD(get_temp(cs, 1))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 1));
- counter++;
- emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
+ emit_trig(cs, fpi);
break;
case OPCODE_DP3:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst4 |= R500_ALPHA_OP_DP
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
- | R500_ALU_RGBA_ADDRD(dest);
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_DP3, R500_ALPHA_OP_DP, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);
break;
case OPCODE_DP4:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- /* Based on DP3 */
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst4 |= R500_ALPHA_OP_DP
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
- | R500_ALU_RGBA_ADDRD(dest);
- break;
- case OPCODE_DPH:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- /* Based on DP3 */
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst4 |= R500_ALPHA_OP_DP
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
- | R500_ALU_RGBA_ADDRD(dest);
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_DP4, R500_ALPHA_OP_DP, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);
break;
case OPCODE_DST:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
/* [1, src0.y*src1.y, src0.z, src1.w]
- * So basically MUL with lotsa swizzling. */
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | R500_ALU_RGB_SEL_B_SRC1;
- /* Select [1, y, z, 1] */
- temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE;
- code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz);
- /* Select [1, y, 1, w] */
- temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6);
- code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz);
- code->inst[counter].inst4 |= R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(dest)
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
+ * So basically MUL with lotsa swizzling. */
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_argA(cs, ip, 0,
+ (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE,
+ R500_SWIZZLE_ONE);
+ set_argB(cs, ip, 1,
+ (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6),
+ make_alpha_swizzle(fpi->SrcReg[1]));
+ set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
break;
case OPCODE_EX2:
src[0] = make_src(cs, fpi->SrcReg[0]);
- emit_sop(cs, counter, fpi, OPCODE_EX2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest);
+ emit_sop(cs, R500_ALPHA_OP_EX2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
break;
case OPCODE_FLR:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst4 |= R500_ALPHA_OP_FRC
- | R500_ALPHA_ADDRD(get_temp(cs, 0))
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 0));
- counter++;
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(get_temp(cs, 0));
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(get_temp(cs, 0));
- code->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
- | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SWIZ_A_A
- | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(dest)
- | R500_ALU_RGBA_SEL_C_SRC1
- | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
- | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGBA_MOD_C_NEG;
+ dest = get_temp(cs, 0);
+ ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, dest, WRITEMASK_XYZW);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1_direct(cs, ip, dest);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[1]);
+ set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ONE);
+ set_argC(cs, ip, 1,
+ R500_SWIZ_RGB_RGB|(R500_SWIZ_MOD_NEG<<9),
+ SWIZZLE_W|(R500_SWIZ_MOD_NEG<<3));
break;
case OPCODE_FRC:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst4 |= R500_ALPHA_OP_FRC
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
- | R500_ALU_RGBA_ADDRD(dest);
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
break;
case OPCODE_LG2:
src[0] = make_src(cs, fpi->SrcReg[0]);
- emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest);
+ emit_sop(cs, R500_ALPHA_OP_LN2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
break;
case OPCODE_LIT:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = emit_const4fv(cs, LIT);
- /* First inst: MAX temp, input, [0, 0, 0, -128]
- * Write: RG, A */
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
- | (R500_WRITEMASK_ARG << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
- code->inst[counter].inst4 = R500_ALPHA_OP_MAX
- | R500_ALPHA_ADDRD(get_temp(cs, 0))
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 0));
- counter++;
- /* Second inst: MIN temp, temp, [x, x, x, 128]
- * Write: A */
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)) | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)) | R500_ALPHA_ADDR1(src[1]);
- /* code->inst[counter].inst3; */
- code->inst[counter].inst4 = R500_ALPHA_OP_MAX
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
- | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
- | R500_ALU_RGBA_ADDRD(dest);
- counter++;
- /* Third-fifth insts: POW temp, temp.y, temp.w
- * Write: B */
- emit_sop(cs, counter, fpi, OPCODE_LG2, get_temp(cs, 0), SWIZZLE_Y, get_temp(cs, 1));
- code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11);
- counter++;
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 1))
- | R500_RGB_ADDR1(get_temp(cs, 0));
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 1))
- | R500_ALPHA_ADDR1(get_temp(cs, 0));
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(get_temp(cs, 1))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
- | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 1))
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
- counter++;
- emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), SWIZZLE_W, get_temp(cs, 0));
- code->inst[counter].inst0 |= (R500_WRITEMASK_B << 11);
- counter++;
- /* Sixth inst: CMP dest, temp.xxxx, temp.[1, x, z, 1], temp.[1, x, 0, 1];
- * Write: ARGB
- * This inst's selects need to be swapped as follows:
- * 0 -> C ; 1 -> B ; 2 -> A */
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | R500_ALU_RGB_R_SWIZ_A_1
- | R500_ALU_RGB_G_SWIZ_A_R
- | R500_ALU_RGB_B_SWIZ_A_B
- | R500_ALU_RGB_SEL_B_SRC0
- | R500_ALU_RGB_R_SWIZ_B_1
- | R500_ALU_RGB_G_SWIZ_B_R
- | R500_ALU_RGB_B_SWIZ_B_0;
- code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1
- | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
- | R500_ALU_RGBA_ADDRD(dest)
- | R500_ALU_RGBA_SEL_C_SRC0
- | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
- | R500_ALU_RGBA_R_SWIZ_R
- | R500_ALU_RGBA_G_SWIZ_R
- | R500_ALU_RGBA_B_SWIZ_R
- | R500_ALU_RGBA_A_SWIZ_R;
+ emit_lit(cs, fpi);
break;
case OPCODE_LRP:
- /* src0 * src1 + INV(src0) * src2
- * 1) MUL src0, src1, temp
- * 2) PRE 1-src0; MAD srcp, src2, temp */
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- src[2] = make_src(cs, fpi->SrcReg[2]);
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
- | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(get_temp(cs, 0))
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
- counter++;
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[2])
- | R500_RGB_ADDR2(get_temp(cs, 0))
- | R500_RGB_SRCP_OP_1_MINUS_RGB0;
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[2])
- | R500_ALPHA_ADDR2(get_temp(cs, 0))
- | R500_ALPHA_SRCP_OP_1_MINUS_A0;
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[2]));
- code->inst[counter].inst4 |= R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[2]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(dest)
- | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
- | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | R500_ALU_RGBA_A_SWIZ_A;
+ /* result = src0*src1 + (1-src0)*src2
+ * = src0*src1 + src2 + (-src0)*src2
+ *
+ * Note: LRP without swizzling (or with only limited
+ * swizzling) could be done more efficiently using the
+ * presubtract hardware.
+ */
+ dest = get_temp(cs, 0);
+ ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, dest, WRITEMASK_XYZW);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_src2(cs, ip, fpi->SrcReg[2]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);
+ set_argC_reg(cs, ip, 2, fpi->SrcReg[2]);
+
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[2]);
+ set_src2_direct(cs, ip, dest);
+ set_argA(cs, ip, 0,
+ make_rgb_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<9),
+ make_alpha_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<3));
+ set_argB_reg(cs, ip, 1, fpi->SrcReg[2]);
+ set_argC(cs, ip, 2, R500_SWIZ_RGB_RGB, SWIZZLE_W);
break;
case OPCODE_MAD:
- emit_mad(cs, counter, fpi, 0, 1, 2);
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_src2(cs, ip, fpi->SrcReg[2]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);
+ set_argC_reg(cs, ip, 2, fpi->SrcReg[2]);
break;
case OPCODE_MAX:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGB_SEL_B_SRC1
- | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst4 |= R500_ALPHA_OP_MAX
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
- | R500_ALU_RGBA_ADDRD(dest);
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_MAX, R500_ALPHA_OP_MAX, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);
break;
case OPCODE_MIN:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGB_SEL_B_SRC1
- | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst4 |= R500_ALPHA_OP_MIN
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
- | R500_ALU_RGBA_ADDRD(dest);
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_MIN, R500_ALPHA_OP_MIN, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);
break;
case OPCODE_MOV:
- emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argB_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
+ code->inst[ip].inst3 |= R500_ALU_RGB_OMOD_DISABLE;
+ code->inst[ip].inst4 |= R500_ALPHA_OMOD_DISABLE;
break;
case OPCODE_MUL:
/* Variation on MAD: src0*src1+0 */
- emit_mad(cs, counter, fpi, 0, 1, R500_SWIZZLE_ZERO);
- break;
- case OPCODE_POW:
- /* POW(a,b) = EX2(LN2(a)*b) */
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), get_temp(cs, 0));
- code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11);
- counter++;
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0))
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0))
- | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(get_temp(cs, 1))
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 1))
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
- counter++;
- emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
+ ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
+ set_src0(cs, ip, fpi->SrcReg[0]);
+ set_src1(cs, ip, fpi->SrcReg[1]);
+ set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
+ set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);
+ set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
break;
case OPCODE_RCP:
src[0] = make_src(cs, fpi->SrcReg[0]);
- emit_sop(cs, counter, fpi, OPCODE_RCP, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest);
+ emit_sop(cs, R500_ALPHA_OP_RCP, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
break;
case OPCODE_RSQ:
src[0] = make_src(cs, fpi->SrcReg[0]);
- emit_sop(cs, counter, fpi, OPCODE_RSQ, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest);
+ emit_sop(cs, R500_ALPHA_OP_RSQ, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
break;
case OPCODE_SCS:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = emit_const4fv(cs, RCP_2PI);
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
- | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(get_temp(cs, 0))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
- | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
- counter++;
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
- code->inst[counter].inst4 = R500_ALPHA_OP_FRC
- | R500_ALPHA_ADDRD(get_temp(cs, 1))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 1));
- counter++;
- /* Do a cosine, then a sine, masking out the channels we want to protect. */
- /* Cosine only goes in R (x) channel. */
- fpi->DstReg.WriteMask = 0x1;
- emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
- counter++;
- /* Sine only goes in G (y) channel. */
- fpi->DstReg.WriteMask = 0x2;
- emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
- break;
- case OPCODE_SGE:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
- | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
- | R500_RGB_ADDR2(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
- | R500_ALPHA_ADDR2(src[1]);
- code->inst[counter].inst3 = /* 1 */
- MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(get_temp(cs, 0))
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
- | R500_ALU_RGBA_SEL_C_SRC2
- | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
- | R500_ALU_RGBA_MOD_C_NEG
- | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
- | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
- | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
- counter++;
- /* This inst's selects need to be swapped as follows:
- * 0 -> C ; 1 -> B ; 2 -> A */
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
- | R500_ALU_RGB_SEL_B_SRC0
- | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
- code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
- | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
- | R500_ALU_RGBA_ADDRD(dest)
- | R500_ALU_RGBA_SEL_C_SRC0
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
- | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
- | R500_ALU_RGBA_A_SWIZ_A;
+ emit_trig(cs, fpi);
break;
case OPCODE_SIN:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = emit_const4fv(cs, RCP_2PI);
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
- | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1]);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(get_temp(cs, 0))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
- | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
- counter++;
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
- code->inst[counter].inst4 = R500_ALPHA_OP_FRC
- | R500_ALPHA_ADDRD(get_temp(cs, 1))
- | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 1));
- counter++;
- emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
- break;
- case OPCODE_SLT:
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
- | (R500_WRITEMASK_ARGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
- | R500_RGB_ADDR2(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
- | R500_ALPHA_ADDR2(src[1]);
- code->inst[counter].inst3 = /* 1 */
- MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
- | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(get_temp(cs, 0))
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
- | R500_ALU_RGBA_SEL_C_SRC2
- | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
- | R500_ALU_RGBA_MOD_C_NEG
- | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
- | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
- | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
- counter++;
- /* This inst's selects need to be swapped as follows:
- * 0 -> C ; 1 -> B ; 2 -> A */
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
- | R500_ALU_RGB_SEL_B_SRC0
- | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
- code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
- | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
- | R500_ALU_RGBA_ADDRD(dest)
- | R500_ALU_RGBA_SEL_C_SRC0
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
- | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
- | R500_ALU_RGBA_A_SWIZ_A;
- break;
- case OPCODE_SUB:
- /* Variation on MAD: 1*src0-src1 */
- fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */
- emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
- break;
- case OPCODE_SWZ:
- /* TODO: The rarer negation masks! */
- emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
- break;
- case OPCODE_XPD:
- /* src0 * src1 - src1 * src0
- * 1) MUL temp.xyz, src0.yzx, src1.zxy
- * 2) MAD src0.zxy, src1.yzx, -temp.xyz */
- src[0] = make_src(cs, fpi->SrcReg[0]);
- src[1] = make_src(cs, fpi->SrcReg[1]);
- code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
- | (R500_WRITEMASK_RGB << 11);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1]);
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1]);
- /* Select [y, z, x] */
- temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]);
- temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(temp_swiz);
- /* Select [z, x, y] */
- temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]);
- temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6);
- code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
- | MAKE_SWIZ_RGB_B(temp_swiz);
- code->inst[counter].inst4 = R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(get_temp(cs, 0))
- | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
- | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
- | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
- counter++;
- emit_alu(cs, counter, fpi);
- code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
- | R500_RGB_ADDR1(src[1])
- | R500_RGB_ADDR2(get_temp(cs, 0));
- code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
- | R500_ALPHA_ADDR1(src[1])
- | R500_ALPHA_ADDR2(get_temp(cs, 0));
- /* Select [z, x, y] */
- temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]);
- temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6);
- code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
- | MAKE_SWIZ_RGB_A(temp_swiz);
- /* Select [y, z, x] */
- temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]);
- temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6);
- code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
- | MAKE_SWIZ_RGB_B(temp_swiz);
- code->inst[counter].inst4 |= R500_ALPHA_OP_MAD
- | R500_ALPHA_ADDRD(dest)
- | R500_ALPHA_SWIZ_A_1
- | R500_ALPHA_SWIZ_B_1;
- code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
- | R500_ALU_RGBA_ADDRD(dest)
- | R500_ALU_RGBA_SEL_C_SRC2
- | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
- | R500_ALU_RGBA_MOD_C_NEG
- | R500_ALU_RGBA_A_SWIZ_0;
+ emit_trig(cs, fpi);
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
- emit_tex(cs, fpi, dest, counter);
- if (fpi->DstReg.File == PROGRAM_OUTPUT)
- counter++;
+ emit_tex(cs, fpi, dest);
break;
default:
ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
@@ -1245,37 +928,30 @@ static int do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *f
/* Finishing touches */
if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
- code->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
+ code->inst[cs->nrslots-1].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
}
-
- counter++;
-
- return counter;
}
static GLboolean parse_program(struct r500_pfs_compile_state *cs)
{
PROG_CODE;
- int counter = 0;
struct prog_instruction* fpi;
for(fpi = cs->compiler->program->Instructions; fpi->Opcode != OPCODE_END; ++fpi) {
- counter = do_inst(cs, fpi, counter);
+ do_inst(cs, fpi);
if (cs->compiler->fp->error)
return GL_FALSE;
}
/* Finish him! (If it's an ALU/OUT instruction...) */
- if ((code->inst[counter-1].inst0 & 0x3) == 1) {
- code->inst[counter-1].inst0 |= R500_INST_LAST;
+ if ((code->inst[cs->nrslots-1].inst0 & 0x3) == 1) {
+ code->inst[cs->nrslots-1].inst0 |= R500_INST_LAST;
} else {
/* We still need to put an output inst, right? */
WARN_ONCE("Final FP instruction is not an OUT.\n");
}
- cs->nrslots = counter;
-
code->max_temp_idx++;
return GL_TRUE;
@@ -1295,12 +971,10 @@ static void init_program(struct r500_pfs_compile_state *cs)
driQueryOptioni(&cs->compiler->r300->radeon.optionCache, "fp_optimization");
cs->compiler->fp->translated = GL_FALSE;
cs->compiler->fp->error = GL_FALSE;
- code->const_nr = 0;
- /* Size of pixel stack, plus 1. */
- code->max_temp_idx = 1;
- /* Temp register offset. */
- code->temp_reg_offset = 0;
- /* Whether or not we perform any depth writing. */
+
+ _mesa_bzero(code, sizeof(*code));
+ code->max_temp_idx = 1; /* Size of pixel stack, plus 1. */
+ cs->nrslots = 0;
cs->compiler->fp->writes_depth = GL_FALSE;
/* Work out what temps the Mesa inputs correspond to, this must match