From 86e6f7a73bdbced24e10fb80fdcba591e0568120 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sat, 19 Aug 2017 21:41:57 +0200
Subject: gallium: remove TGSI opcode DP2A

use DP3 instead.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 29 ---------------------
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c    |  3 ---
 src/gallium/auxiliary/nir/tgsi_to_nir.c            | 14 ----------
 src/gallium/auxiliary/tgsi/tgsi_exec.c             | 29 ---------------------
 src/gallium/auxiliary/tgsi/tgsi_info.c             |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_lowering.c         | 30 +---------------------
 src/gallium/auxiliary/tgsi/tgsi_lowering.h         |  1 -
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h       |  1 -
 src/gallium/auxiliary/tgsi/tgsi_util.c             |  4 ---
 9 files changed, 2 insertions(+), 111 deletions(-)

(limited to 'src/gallium/auxiliary')

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index dc6568a2d4b..0319b8885f3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -159,34 +159,6 @@ static struct lp_build_tgsi_action dp2_action = {
    dp2_emit	 /* emit */
 };
 
-/* TGSI_OPCODE_DP2A */
-static void
-dp2a_fetch_args(
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   dp_fetch_args(bld_base, emit_data, 2);
-   emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
-                                            2, TGSI_CHAN_X);
-}
-
-static void
-dp2a_emit(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   LLVMValueRef tmp;
-   tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
-   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
-                                    emit_data->args[5], tmp);
-}
-
-static struct lp_build_tgsi_action dp2a_action = {
-   dp2a_fetch_args,	 /* fetch_args */
-   dp2a_emit	 /* emit */
-};
-
 /* TGSI_OPCODE_DP3 */
 static void
 dp3_fetch_args(
@@ -1286,7 +1258,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
    bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
    bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
    bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
-   bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
    bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
    bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
    bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 92ecb43a2b0..3cc50796bcd 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -554,9 +554,6 @@ lp_emit_instruction_aos(
       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
       break;
 
-   case TGSI_OPCODE_DP2A:
-      return FALSE;
-
    case TGSI_OPCODE_FRC:
       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 46238a10255..3daa896062f 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1000,15 +1000,6 @@ ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
    ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
 }
 
-static void
-ttn_dp2a(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
-   ttn_move_dest(b, dest,
-                 ttn_channel(b, nir_fadd(b, nir_fdot2(b, src[0], src[1]),
-                                         src[2]),
-                             X));
-}
-
 static void
 ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
@@ -1536,7 +1527,6 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
    [TGSI_OPCODE_MAD] = nir_op_ffma,
    [TGSI_OPCODE_LRP] = 0,
    [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
-   [TGSI_OPCODE_DP2A] = 0,
    [TGSI_OPCODE_FRC] = nir_op_ffract,
    [TGSI_OPCODE_FLR] = nir_op_ffloor,
    [TGSI_OPCODE_ROUND] = nir_op_fround_even,
@@ -1773,10 +1763,6 @@ ttn_emit_instruction(struct ttn_compile *c)
       ttn_dp4(b, op_trans[tgsi_op], dest, src);
       break;
 
-   case TGSI_OPCODE_DP2A:
-      ttn_dp2a(b, op_trans[tgsi_op], dest, src);
-      break;
-
    case TGSI_OPCODE_DPH:
       ttn_dph(b, op_trans[tgsi_op], dest, src);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index cc3e232d388..34a4af6f55b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3184,31 +3184,6 @@ exec_dp4(struct tgsi_exec_machine *mach,
    }
 }
 
-static void
-exec_dp2a(struct tgsi_exec_machine *mach,
-          const struct tgsi_full_instruction *inst)
-{
-   unsigned int chan;
-   union tgsi_exec_channel arg[3];
-
-   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   micro_mul(&arg[2], &arg[0], &arg[1]);
-
-   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-   micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
-
-   fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   micro_add(&arg[0], &arg[0], &arg[1]);
-
-   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
-         store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
-      }
-   }
-}
-
 static void
 exec_dph(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
@@ -5183,10 +5158,6 @@ exec_instruction(
       exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
-   case TGSI_OPCODE_DP2A:
-      exec_dp2a(mach, inst);
-      break;
-
    case TGSI_OPCODE_FRC:
       exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 8450cd7a075..c31705a0ee9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -58,7 +58,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
    { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
    { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
-   { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
+   { 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */
    { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index c26c13b5e58..3013a41a8ef 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -920,14 +920,11 @@ transform_log(struct tgsi_transform_context *tctx,
  * DP2 - 2-component Dot Product
  *   dst = src0.x \times src1.x + src0.y \times src1.y
  *
- * DP2A - 2-component Dot Product And Add
- *   dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
- *
  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
  * operations, which is what you'd prefer for a ISA that is natively
  * scalar.  Probably a native vector ISA would at least already have
  * DP4/DP3 instructions, but perhaps there is room for an alternative
- * translation for DPH/DP2/DP2A using vector instructions.
+ * translation for DPH/DP2 using vector instructions.
  *
  * ; needs: 1 tmp
  * MUL tmpA.x, src0.x, src1.x
@@ -939,8 +936,6 @@ transform_log(struct tgsi_transform_context *tctx,
  *   } else if (DP4) {
  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
  *   }
- * } else if (DP2A) {
- *   ADD tmpA.x, src2.x, tmpA.x
  * }
  * ; fixup last instruction to replicate into dst
  */
@@ -948,7 +943,6 @@ transform_log(struct tgsi_transform_context *tctx,
 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
 #define DPH_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
-#define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
 #define DOTP_TMP  1
 static void
 transform_dotp(struct tgsi_transform_context *tctx,
@@ -958,7 +952,6 @@ transform_dotp(struct tgsi_transform_context *tctx,
    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    struct tgsi_full_src_register *src0 = &inst->Src[0];
    struct tgsi_full_src_register *src1 = &inst->Src[1];
-   struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
    struct tgsi_full_instruction new_inst;
    unsigned opcode = inst->Instruction.Opcode;
 
@@ -1026,17 +1019,6 @@ transform_dotp(struct tgsi_transform_context *tctx,
             reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
             reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
          }
-      } else if (opcode == TGSI_OPCODE_DP2A) {
-         tctx->emit_instruction(tctx, &new_inst);
-
-         /* ADD tmpA.x, src2.x, tmpA.x */
-         new_inst = tgsi_default_full_instruction();
-         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
-         new_inst.Instruction.NumDstRegs = 1;
-         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
-         new_inst.Instruction.NumSrcRegs = 2;
-         reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
-         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
       }
 
       /* fixup last instruction to write to dst: */
@@ -1562,11 +1544,6 @@ transform_instr(struct tgsi_transform_context *tctx,
          goto skip;
       transform_dotp(tctx, inst);
       break;
-   case TGSI_OPCODE_DP2A:
-      if (!ctx->config->lower_DP2A)
-         goto skip;
-      transform_dotp(tctx, inst);
-      break;
    case TGSI_OPCODE_FLR:
       if (!ctx->config->lower_FLR)
          goto skip;
@@ -1657,7 +1634,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
          OPCS(DP3) ||
          OPCS(DPH) ||
          OPCS(DP2) ||
-         OPCS(DP2A) ||
          OPCS(FLR) ||
          OPCS(CEIL) ||
          OPCS(TRUNC) ||
@@ -1725,10 +1701,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
       newlen += DP2_GROW * OPCS(DP2);
       numtmp = MAX2(numtmp, DOTP_TMP);
    }
-   if (OPCS(DP2A)) {
-      newlen += DP2A_GROW * OPCS(DP2A);
-      numtmp = MAX2(numtmp, DOTP_TMP);
-   }
    if (OPCS(FLR)) {
       newlen += FLR_GROW * OPCS(FLR);
       numtmp = MAX2(numtmp, FLR_TMP);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
index 20e4f843a92..85e4b8e9833 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
@@ -67,7 +67,6 @@ struct tgsi_lowering_config
    unsigned lower_DP3:1;
    unsigned lower_DPH:1;
    unsigned lower_DP2:1;
-   unsigned lower_DP2A:1;
    unsigned lower_FLR:1;
    unsigned lower_CEIL:1;
    unsigned lower_TRUNC:1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index f244db61023..e4cbdae2718 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -56,7 +56,6 @@ OP13(MAD)
 OP12_TEX(TEX_LZ)
 OP13(LRP)
 OP11(SQRT)
-OP13(DP2A)
 OP11(FRC)
 OP12_TEX(TXF_LZ)
 OP11(FLR)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 932545cb897..fc61351f6ed 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -262,10 +262,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
       read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
       break;
 
-   case TGSI_OPCODE_DP2A:
-      read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY;
-      break;
-
    case TGSI_OPCODE_DP2:
       read_mask = TGSI_WRITEMASK_XY;
       break;
-- 
cgit v1.2.3