summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2017-01-19 14:44:57 +0100
committerNicolai Hähnle <[email protected]>2017-01-23 16:17:15 +0100
commite4f8f9a638c1ffb9b76840b088290f11f0f91813 (patch)
tree85e32c08608b6e477d1fc9c418b53fae2e976639
parent488560cfe6ee2206f7a7f894694ebc43b419be61 (diff)
r600: implement DDIV
Tested-by: Glenn Kennard <[email protected]> Tested-by: James Harvey <[email protected]> Cc: 17.0 <[email protected]>
-rw-r--r--src/gallium/drivers/r600/r600_shader.c59
1 files changed, 59 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 5c4bc91b498..eaabb042f97 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4391,6 +4391,63 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
}
/*
+ * Emit RECIP_64 + MUL_64 to implement division.
+ */
+static int cayman_ddiv_instr(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int r;
+ struct r600_bytecode_alu alu;
+ int t1 = ctx->temp_reg;
+ int k;
+
+ /* Only support one double at a time. This is the same constraint as
+ * in DMUL lowering. */
+ assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
+ inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
+
+ k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
+
+ r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false);
+ if (r)
+ return r;
+
+ for (int i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_MUL_64;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1));
+
+ alu.src[1].sel = t1;
+ alu.src[1].chan = (i == 3) ? 0 : 1;
+
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (int i = 0; i < 2; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = t1;
+ alu.src[0].chan = i;
+ tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst);
+ alu.dst.write = 1;
+ if (i == 1)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+/*
* r600 - trunc to -PI..PI range
* r700 - normalize by dividing by 2PI
* see fdo bug 27901
@@ -9400,6 +9457,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
[TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
[TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+ [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr },
[TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
[TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
[TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
@@ -9622,6 +9680,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
[TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
[TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+ [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr },
[TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
[TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
[TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},