diff options
author | Martin Andersson <[email protected]> | 2013-04-02 22:43:33 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2013-04-09 03:09:37 +0200 |
commit | a8246927e35a49097f70cffb7fa8dd05ec1365e1 (patch) | |
tree | 050abd29a3fd9247784f194d7f908619060a639d /src | |
parent | b76539aabea6a910e15d2c626901c7b208868f0c (diff) |
r600g: Fix UMAD on Cayman
The multiplication part of tgsi_umad did not work on Cayman, because it did
not populate the correct vector slots.
This fixed hardlocks in the EXT_transform_feedback/order tests.
NOTE: This is a candidate for the stable branches.
(might not be easy to cherry-pick though)
Signed-off-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 45 |
1 files changed, 32 insertions, 13 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index a5d224f5e1f..f8017072228 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -5838,7 +5838,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; - int i, j, r; + int i, j, k, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); /* src0 * src1 */ @@ -5846,21 +5846,40 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + if (ctx->bc->chip_class == CAYMAN) { + for (j = 0 ; j < 4; j++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.dst.chan = i; - alu.dst.sel = ctx->temp_reg; - alu.dst.write = 1; + alu.op = ALU_OP2_MULLO_UINT; + for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { + r600_bytecode_src(&alu.src[k], &ctx->src[k], i); + } + tgsi_dst(ctx, &inst->Dst[0], j, &alu.dst); + alu.dst.sel = ctx->temp_reg; + alu.dst.write = (j == i); + if (j == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULLO_UINT; - for (j = 0; j < 2; j++) { - r600_bytecode_src(&alu.src[j], &ctx->src[j], i); - } + alu.dst.chan = i; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.op = ALU_OP2_MULLO_UINT; + for (j = 0; j < 2; j++) { + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); + } + + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } } |