summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
authorMartin Andersson <[email protected]>2013-04-02 22:43:33 +0200
committerMarek Olšák <[email protected]>2013-04-09 03:09:37 +0200
commita8246927e35a49097f70cffb7fa8dd05ec1365e1 (patch)
tree050abd29a3fd9247784f194d7f908619060a639d /src/gallium/drivers/r600
parentb76539aabea6a910e15d2c626901c7b208868f0c (diff)
r600g: Fix UMAD on Cayman
The multiplication part of tgsi_umad did not work on Cayman, because it did not populate the correct vector slots. This fixed hardlocks in the EXT_transform_feedback/order tests. NOTE: This is a candidate for the stable branches. (might not be easy to cherry-pick though) Signed-off-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/r600_shader.c45
1 files changed, 32 insertions, 13 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index a5d224f5e1f..f8017072228 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5838,7 +5838,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
- int i, j, r;
+ int i, j, k, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
/* src0 * src1 */
@@ -5846,21 +5846,40 @@ static int tgsi_umad(struct r600_shader_ctx *ctx)
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (j = 0 ; j < 4; j++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.dst.chan = i;
- alu.dst.sel = ctx->temp_reg;
- alu.dst.write = 1;
+ alu.op = ALU_OP2_MULLO_UINT;
+ for (k = 0; k < inst->Instruction.NumSrcRegs; k++) {
+ r600_bytecode_src(&alu.src[k], &ctx->src[k], i);
+ }
+ tgsi_dst(ctx, &inst->Dst[0], j, &alu.dst);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = (j == i);
+ if (j == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MULLO_UINT;
- for (j = 0; j < 2; j++) {
- r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
- }
+ alu.dst.chan = i;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ alu.op = ALU_OP2_MULLO_UINT;
+ for (j = 0; j < 2; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
+ }
+
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
}