summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2013-03-02 14:59:06 +0100
committerChristoph Bumiller <[email protected]>2013-03-12 12:55:36 +0100
commit99e4eba669f13a0dc80880f4f91e2338377c1667 (patch)
tree43e1feb96d009a1b17b21a6d5aa51b62d0153447
parentac9f19e485a15b2c58680d5c884597f60d6f1b1b (diff)
nv50/ir: implement splitting of 64 bit ops after RA
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h3
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp70
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h4
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp27
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp2
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp31
6 files changed, 98 insertions, 39 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index dd7ff90de96..548125901e2 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -48,7 +48,7 @@ enum operation
OP_MOV, // simple copy, no modifiers allowed
OP_LOAD,
OP_STORE,
- OP_ADD,
+ OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds
OP_SUB,
OP_MUL,
OP_DIV,
@@ -707,6 +707,7 @@ public:
inline void setFlagsSrc(int s, Value *);
inline void setFlagsDef(int d, Value *);
+ inline bool usesFlags() const { return flagsSrc >= 0; }
unsigned int defCount() const { return defs.size(); };
unsigned int defCount(unsigned int mask, bool singleFile = false) const;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
index 0af2c61b3e5..935af32f1ed 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
@@ -541,4 +541,74 @@ BuildUtil::DataArray::mkSymbol(int i, int c)
return sym;
}
+
+Instruction *
+BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
+ Value *zero,
+ Value *carry)
+{
+ DataType hTy;
+ int srcNr;
+
+ switch (i->dType) {
+ case TYPE_U64: hTy = TYPE_U32; break;
+ case TYPE_S64: hTy = TYPE_S32; break;
+ default:
+ return NULL;
+ }
+
+ switch (i->op) {
+ case OP_MOV: srcNr = 1; break;
+ case OP_ADD:
+ case OP_SUB:
+ if (!carry)
+ return NULL;
+ srcNr = 2;
+ break;
+ default:
+ // TODO when needed
+ return NULL;
+ }
+
+ i->setType(hTy);
+ i->setDef(0, cloneShallow(fn, i->getDef(0)));
+ i->getDef(0)->reg.size = 4;
+ Instruction *lo = i;
+ Instruction *hi = cloneForward(fn, i);
+ lo->bb->insertAfter(lo, hi);
+
+ hi->getDef(0)->reg.data.id++;
+
+ for (int s = 0; s < srcNr; ++s) {
+ if (lo->getSrc(s)->reg.size < 8) {
+ hi->setSrc(s, zero);
+ } else {
+ if (lo->getSrc(s)->refCount() > 1)
+ lo->setSrc(s, cloneShallow(fn, lo->getSrc(s)));
+ lo->getSrc(s)->reg.size /= 2;
+ hi->setSrc(s, cloneShallow(fn, lo->getSrc(s)));
+
+ switch (hi->src(s).getFile()) {
+ case FILE_IMMEDIATE:
+ hi->getSrc(s)->reg.data.u64 >>= 32;
+ break;
+ case FILE_MEMORY_CONST:
+ case FILE_MEMORY_SHARED:
+ case FILE_SHADER_INPUT:
+ hi->getSrc(s)->reg.data.offset += 4;
+ break;
+ default:
+ assert(hi->src(s).getFile() == FILE_GPR);
+ hi->getSrc(s)->reg.data.id++;
+ break;
+ }
+ }
+ }
+ if (srcNr == 2) {
+ lo->setDef(1, carry);
+ hi->setFlagsSrc(hi->srcCount(), carry);
+ }
+ return hi;
+}
+
} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
index f48dbc21168..dbb9c03e3a8 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
@@ -101,6 +101,10 @@ public:
Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); }
+ // returns high part of the operation
+ static Instruction *split64BitOpPostRA(Function *, Instruction *,
+ Value *zero, Value *carry);
+
struct Location
{
Location(unsigned array, unsigned arrayIdx, unsigned i, unsigned c)
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
index 9c3f8f64fa7..20f76f81ded 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -122,7 +122,6 @@ private:
void handlePRERET(FlowInstruction *);
void replaceZero(Instruction *);
- void split64BitOp(Instruction *);
LValue *r63;
};
@@ -160,22 +159,6 @@ NV50LegalizePostRA::replaceZero(Instruction *i)
}
}
-void
-NV50LegalizePostRA::split64BitOp(Instruction *i)
-{
- if (i->dType == TYPE_F64) {
- if (i->op == OP_MAD)
- i->op = OP_FMA;
- if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
- i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
- i->op == OP_SET)
- return;
- i->dType = i->sType = TYPE_U32;
-
- i->bb->insertAfter(i, cloneForward(func, i));
- }
-}
-
// Emulate PRERET: jump to the target and call to the origin from there
//
// WARNING: atm only works if BBs are affected by at most a single PRERET
@@ -229,12 +212,18 @@ NV50LegalizePostRA::visit(BasicBlock *bb)
if (i->op == OP_PRERET && prog->getTarget()->getChipset() < 0xa0) {
handlePRERET(i->asFlow());
} else {
+ // TODO: We will want to do this before register allocation,
+ // since have to use a $c register for the carry flag.
+ if (typeSizeof(i->dType) == 8) {
+ Instruction *hi = BuildUtil::split64BitOpPostRA(func, i, r63, NULL);
+ if (hi)
+ next = hi;
+ }
+
if (i->op != OP_MOV && i->op != OP_PFETCH &&
i->op != OP_BAR &&
(!i->defExists(0) || i->def(0).getFile() != FILE_ADDRESS))
replaceZero(i);
- if (typeSizeof(i->dType) == 8)
- split64BitOp(i);
}
}
if (!bb->getEntry())
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
index 0af44baa8fc..2926907cdf1 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
@@ -667,6 +667,8 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
}
break;
case OP_ADD:
+ if (i->usesFlags())
+ break;
if (imm0.isInteger(0)) {
if (s == 0) {
i->setSrc(0, i->getSrc(1));
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
index 94d3cea0112..414a503c87d 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -128,7 +128,6 @@ private:
virtual bool visit(BasicBlock *);
void replaceZero(Instruction *);
- void split64BitOp(Instruction *);
bool tryReplaceContWithBra(BasicBlock *);
void propagateJoin(BasicBlock *);
@@ -158,6 +157,7 @@ private:
private:
LValue *rZero;
+ LValue *carry;
const bool needTexBar;
};
@@ -468,8 +468,10 @@ NVC0LegalizePostRA::visit(Function *fn)
insertTextureBarriers(fn);
rZero = new_LValue(fn, FILE_GPR);
+ carry = new_LValue(fn, FILE_FLAGS);
rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR);
+ carry->reg.data.id = 0;
return true;
}
@@ -486,22 +488,6 @@ NVC0LegalizePostRA::replaceZero(Instruction *i)
}
}
-void
-NVC0LegalizePostRA::split64BitOp(Instruction *i)
-{
- if (i->dType == TYPE_F64) {
- if (i->op == OP_MAD)
- i->op = OP_FMA;
- if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
- i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
- i->op == OP_SET)
- return;
- i->dType = i->sType = TYPE_U32;
-
- i->bb->insertAfter(i, cloneForward(func, i));
- }
-}
-
// replace CONT with BRA for single unconditional continue
bool
NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb)
@@ -565,10 +551,17 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
if (i->isNop()) {
bb->remove(i);
} else {
+ // TODO: Move this to before register allocation for operations that
+ // need the $c register !
+ if (typeSizeof(i->dType) == 8) {
+ Instruction *hi;
+ hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry);
+ if (hi)
+ next = hi;
+ }
+
if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
- if (typeSizeof(i->dType) == 8)
- split64BitOp(i);
}
}
if (!bb->getEntry())