diff options
author | Nicolai Hähnle <[email protected]> | 2009-08-26 23:31:37 +0200 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2009-08-27 01:46:50 +0200 |
commit | c024f1047ff8f14fd6deaaffbb1a9aa567988549 (patch) | |
tree | 58f009fc634e08d2e7aa71ec564e46c6e202833a /src/mesa/drivers/dri/r300 | |
parent | 2114acb044b1d8860350a9f14890fce1e2f315f0 (diff) |
r300/compiler: Fix vertex program MAD emit
Only use the macro variant of MAD when absolutely necessary.
Apparently it cannot deal with relative addressing.
Signed-off-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 52 |
1 files changed, 46 insertions, 6 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index fc9c8f805ae..6c1a0386891 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -284,12 +284,52 @@ static void ei_mad(struct r300_vertex_program_code *vp, struct prog_instruction *vpi, GLuint * inst) { - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY && + vpi->SrcReg[1].File == PROGRAM_TEMPORARY && + vpi->SrcReg[2].File == PROGRAM_TEMPORARY && + vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = t_src(vp, &vpi->SrcReg[1]); inst[3] = t_src(vp, &vpi->SrcReg[2]); |