summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2012-04-28 17:06:59 +0200
committerChristoph Bumiller <[email protected]>2012-04-29 18:03:11 +0200
commit1f4c154f0253ed8fb448402532cfa670f74e69cd (patch)
tree10c485cfad921e6d962c2146efc468ff0aec63a8
parentd6ab3106cf7475cdaddf788a3e650bdd5833f73c (diff)
nv50/ir/opt: try to convert ABS(SUB) to SAD
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.cpp25
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h1
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp33
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h11
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp107
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp16
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp2
7 files changed, 179 insertions, 16 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
index 1006985ab93..335e9e01e61 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
@@ -658,6 +658,31 @@ Instruction::swapSources(int a, int b)
srcs[b].mod = m;
}
+// TODO: extend for delta < 0
+void
+Instruction::moveSources(int s, int delta)
+{
+ if (delta == 0)
+ return;
+ assert(delta > 0);
+
+ int k;
+ for (k = 0; srcExists(k); ++k) {
+ for (int i = 0; i < 2; ++i) {
+ if (src(k).indirect[i] >= s)
+ src(k).indirect[i] += delta;
+ }
+ }
+ if (predSrc >= s)
+ predSrc += delta;
+ if (flagsSrc >= s)
+ flagsSrc += delta;
+
+ --k;
+ for (int p = k + delta; k >= s; --k, --p)
+ setSrc(p, src(k));
+}
+
void
Instruction::takeExtraSources(int s, Value *values[3])
{
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index e544d071b52..9b47e3e13c1 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -603,6 +603,7 @@ public:
void setSrc(int s, Value *);
void setSrc(int s, const ValueRef&);
void swapSources(int a, int b);
+ void moveSources(int s, int delta); // NOTE: only delta > 0 implemented
bool setIndirect(int s, int dim, Value *);
inline ValueRef& src(int s) { return srcs[s]; }
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
index c534d4a0c5e..7542b84d17a 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
@@ -99,6 +99,7 @@ private:
void emitFMUL(const Instruction *);
void emitFMAD(const Instruction *);
void emitIMAD(const Instruction *);
+ void emitISAD(const Instruction *);
void emitMINMAX(const Instruction *);
@@ -1023,6 +1024,35 @@ CodeEmitterNV50::emitIMAD(const Instruction *i)
}
void
+CodeEmitterNV50::emitISAD(const Instruction *i)
+{
+ if (i->encSize == 8) {
+ code[0] = 0x50000000;
+ switch (i->sType) {
+ case TYPE_U32: code[1] = 0x04000000; break;
+ case TYPE_S32: code[1] = 0x0c000000; break;
+ case TYPE_U16: code[1] = 0x00000000; break;
+ case TYPE_S16: code[1] = 0x08000000; break;
+ default:
+ assert(0);
+ break;
+ }
+ emitForm_MAD(i);
+ } else {
+ switch (i->sType) {
+ case TYPE_U32: code[0] = 0x50008000; break;
+ case TYPE_S32: code[0] = 0x50008100; break;
+ case TYPE_U16: code[0] = 0x50000000; break;
+ case TYPE_S16: code[0] = 0x50000100; break;
+ default:
+ assert(0);
+ break;
+ }
+ emitForm_MUL(i);
+ }
+}
+
+void
CodeEmitterNV50::emitSET(const Instruction *i)
{
code[0] = 0x30000000;
@@ -1543,6 +1573,9 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
else
emitIMAD(insn);
break;
+ case OP_SAD:
+ emitISAD(insn);
+ break;
case OP_NOT:
emitNOT(insn);
break;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
index 93e502ea609..b62431f1e31 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
@@ -114,6 +114,17 @@ static inline bool isSignedType(DataType ty)
}
}
+static inline DataType intTypeToSigned(DataType ty)
+{
+ switch (ty) {
+ case TYPE_U32: return TYPE_S32;
+ case TYPE_U16: return TYPE_S16;
+ case TYPE_U8: return TYPE_S8;
+ default:
+ return ty;
+ }
+}
+
const ValueRef *ValueRef::getIndirect(int dim) const
{
return isIndirect(dim) ? &insn->src(indirect[dim]) : NULL;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
index 5bc3a450779..8613d7f2efd 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
@@ -915,57 +915,129 @@ class AlgebraicOpt : public Pass
private:
virtual bool visit(BasicBlock *);
- void handleADD(Instruction *);
+ void handleABS(Instruction *);
+ bool handleADD(Instruction *);
+ bool tryADDToMADOrSAD(Instruction *, operation toOp);
void handleMINMAX(Instruction *);
void handleRCP(Instruction *);
void handleSLCT(Instruction *);
void handleLOGOP(Instruction *);
void handleCVT(Instruction *);
+
+ BuildUtil bld;
};
void
+AlgebraicOpt::handleABS(Instruction *abs)
+{
+ Instruction *sub = abs->getSrc(0)->getInsn();
+ DataType ty;
+ if (!sub ||
+ !prog->getTarget()->isOpSupported(OP_SAD, abs->dType))
+ return;
+ // expect not to have mods yet, if we do, bail
+ if (sub->src(0).mod || sub->src(1).mod)
+ return;
+ // hidden conversion ?
+ ty = intTypeToSigned(sub->dType);
+ if (abs->dType != abs->sType || ty != abs->sType)
+ return;
+
+ if ((sub->op != OP_ADD && sub->op != OP_SUB) ||
+ sub->src(0).getFile() != FILE_GPR || sub->src(0).mod ||
+ sub->src(1).getFile() != FILE_GPR || sub->src(1).mod)
+ return;
+
+ Value *src0 = sub->getSrc(0);
+ Value *src1 = sub->getSrc(1);
+
+ if (sub->op == OP_ADD) {
+ Instruction *neg = sub->getSrc(1)->getInsn();
+ if (neg && neg->op != OP_NEG) {
+ neg = sub->getSrc(0)->getInsn();
+ src0 = sub->getSrc(1);
+ }
+ if (!neg || neg->op != OP_NEG ||
+ neg->dType != neg->sType || neg->sType != ty)
+ return;
+ src1 = neg->getSrc(0);
+ }
+
+ // found ABS(SUB))
+ abs->moveSources(1, 2); // move sources >=1 up by 2
+ abs->op = OP_SAD;
+ abs->setType(sub->dType);
+ abs->setSrc(0, src0);
+ abs->setSrc(1, src1);
+ bld.setPosition(abs, false);
+ abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0));
+}
+
+bool
AlgebraicOpt::handleADD(Instruction *add)
{
Value *src0 = add->getSrc(0);
Value *src1 = add->getSrc(1);
+
+ if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
+ return false;
+
+ bool changed = false;
+ if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType))
+ changed = tryADDToMADOrSAD(add, OP_MAD);
+ if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType))
+ changed = tryADDToMADOrSAD(add, OP_SAD);
+ return changed;
+}
+
+// ADD(SAD(a,b,0), c) -> SAD(a,b,c)
+// ADD(MUL(a,b), c) -> MAD(a,b,c)
+bool
+AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
+{
+ Value *src0 = add->getSrc(0);
+ Value *src1 = add->getSrc(1);
Value *src;
int s;
+ const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL;
+ const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0));
Modifier mod[4];
- if (!prog->getTarget()->isOpSupported(OP_MAD, add->dType))
- return;
-
- if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
- return;
-
if (src0->refCount() == 1 &&
- src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_MUL)
+ src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp)
s = 0;
else
if (src1->refCount() == 1 &&
- src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_MUL)
+ src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp)
s = 1;
else
- return;
+ return false;
if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) ||
(src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb))
- return;
+ return false;
src = add->getSrc(s);
if (src->getInsn()->postFactor)
- return;
+ return false;
+ if (toOp == OP_SAD) {
+ ImmediateValue imm;
+ if (!src->getInsn()->src(2).getImmediate(imm))
+ return false;
+ if (!imm.isInteger(0))
+ return false;
+ }
mod[0] = add->src(0).mod;
mod[1] = add->src(1).mod;
mod[2] = src->getUniqueInsn()->src(0).mod;
mod[3] = src->getUniqueInsn()->src(1).mod;
- if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & Modifier(~NV50_IR_MOD_NEG))
- return;
+ if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad)
+ return false;
- add->op = OP_MAD;
+ add->op = toOp;
add->subOp = src->getInsn()->subOp; // potentially mul-high
add->setSrc(2, add->src(s ? 0 : 1));
@@ -974,6 +1046,8 @@ AlgebraicOpt::handleADD(Instruction *add)
add->src(0).mod = mod[2] ^ mod[s];
add->setSrc(1, src->getInsn()->getSrc(1));
add->src(1).mod = mod[3];
+
+ return true;
}
void
@@ -1140,6 +1214,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
switch (i->op) {
+ case OP_ABS:
+ handleABS(i);
+ break;
case OP_ADD:
handleADD(i);
break;
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
index 026a6a0ee99..2ca4979dc74 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
@@ -87,6 +87,7 @@ private:
void emitUMUL(const Instruction *);
void emitFMUL(const Instruction *);
void emitIMAD(const Instruction *);
+ void emitISAD(const Instruction *);
void emitFMAD(const Instruction *);
void emitNOT(Instruction *);
@@ -621,6 +622,18 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i)
}
void
+CodeEmitterNVC0::emitISAD(const Instruction *i)
+{
+ assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
+ assert(i->encSize == 8);
+
+ emitForm_A(i, HEX64(38000000, 00000003));
+
+ if (i->dType == TYPE_S32)
+ code[0] |= 1 << 5;
+}
+
+void
CodeEmitterNVC0::emitNOT(Instruction *i)
{
assert(i->encSize == 8);
@@ -1608,6 +1621,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
else
emitIMAD(insn);
break;
+ case OP_SAD:
+ emitISAD(insn);
+ break;
case OP_NOT:
emitNOT(insn);
break;
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
index ffa40dd93fc..10c2d09d657 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
@@ -456,7 +456,7 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const
{
if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
return false;
- if (op == OP_SAD && ty != TYPE_S32)
+ if (op == OP_SAD && ty != TYPE_S32 && ty != TYPE_U32)
return false;
if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
return false;