summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2017-01-19 14:38:54 +0100
committerNicolai Hähnle <[email protected]>2017-01-23 16:15:45 +0100
commit76b02d2fe1df5351f67f53d07b37952043f0a84c (patch)
tree7090991b2d6856fc3398c8fa67f257a615c174b7
parentf3f9207786c2862b9b3fed1b4348ecb6a85cf02e (diff)
r600: double multiply can handle only one multiply at a time
It seems clear that trying to multiply two pairs of doubles would result in the temporary register getting overwritten by the second pair. So make the code more explicit. Tested-by: Glenn Kennard <[email protected]> Tested-by: James Harvey <[email protected]> Cc: 17.0 <[email protected]>
-rw-r--r--src/gallium/drivers/r600/r600_shader.c36
1 files changed, 19 insertions, 17 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index ebe27445486..7d1452add34 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4326,25 +4326,27 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
int t1 = ctx->temp_reg;
- for (k = 0; k < 2; k++) {
- if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
- continue;
+ /* t1 would get overwritten below if we actually tried to
+ * multiply two pairs of doubles at a time. */
+ assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
+ inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
- for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
- }
- alu.dst.sel = t1;
- alu.dst.chan = i;
- alu.dst.write = 1;
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
}
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
}
for (i = 0; i <= lasti; i++) {