summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2012-05-04 18:00:40 +0200
committerChristoph Bumiller <[email protected]>2012-05-04 18:00:40 +0200
commitdffc2fb4e659f41b30e80e23ceb833d53417fb8e (patch)
tree4c7efd20c8646fec578b5cc6600895b317e44125 /src
parent02fac2930581b9bea9f6d221eb6d6b471fc3b9c6 (diff)
nv50/ir: move expansion of IMUL to later stage and handle memory operands
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp25
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp24
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp17
4 files changed, 51 insertions, 17 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
index f7dac25c116..f713e6391c6 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
@@ -278,6 +278,31 @@ BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc)
return mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1);
}
+Instruction *
+BuildUtil::mkSplit(Value *h[2], uint8_t halfSize, Value *val)
+{
+ Instruction *insn = NULL;
+
+ const DataType fTy = typeOfSize(halfSize * 2);
+
+ if (val->reg.file == FILE_IMMEDIATE)
+ val = mkMov(getSSA(halfSize * 2), val, fTy)->getDef(0);
+
+ if (isMemoryFile(val->reg.file)) {
+ h[0] = cloneShallow(getFunction(), val);
+ h[1] = cloneShallow(getFunction(), val);
+ h[0]->reg.size = halfSize;
+ h[1]->reg.size = halfSize;
+ h[1]->reg.data.offset += halfSize;
+ } else {
+ h[0] = getSSA(halfSize, val->reg.file);
+ h[1] = getSSA(halfSize, val->reg.file);
+ insn = mkOp1(OP_SPLIT, fTy, h[0], val);
+ insn->setDef(1, h[1]);
+ }
+ return insn;
+}
+
FlowInstruction *
BuildUtil::mkFlow(operation op, void *targ, CondCode cc, Value *pred)
{
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
index 9ee04dbcd12..dd7e491cb5c 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
@@ -81,6 +81,8 @@ public:
Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc);
+ Instruction *mkSplit(Value *half[2], uint8_t halfSize, Value *);
+
void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2);
ImmediateValue *mkImm(float);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
index 011014eb5ba..16bba0e1723 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -57,15 +57,17 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
Instruction *i[9];
- Value *a[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) };
- Value *b[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) };
+ bld->setPosition(mul, true);
+
+ Value *a[2], *b[2];
Value *c[2];
Value *t[4];
for (int j = 0; j < 4; ++j)
t[j] = bld->getSSA(fullSize);
- (i[0] = bld->mkOp1(OP_SPLIT, fTy, a[0], mul->getSrc(0)))->setDef(1, a[1]);
- (i[1] = bld->mkOp1(OP_SPLIT, fTy, b[0], mul->getSrc(1)))->setDef(1, b[1]);
+ // split sources into halves
+ i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0));
+ i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1));
i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
@@ -96,7 +98,8 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
delete_Instruction(bld->getProgram(), mul);
for (int j = 2; j <= (highResult ? 5 : 4); ++j)
- i[j]->sType = hTy;
+ if (i[j])
+ i[j]->sType = hTy;
return true;
}
@@ -518,7 +521,6 @@ private:
bool handleEXPORT(Instruction *);
- bool handleMUL(Instruction *);
bool handleDIV(Instruction *);
bool handleSQRT(Instruction *);
bool handlePOW(Instruction *);
@@ -942,14 +944,6 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i)
}
bool
-NV50LoweringPreSSA::handleMUL(Instruction *i)
-{
- if (!isFloatType(i->dType) && typeSizeof(i->sType) > 2)
- return expandIntegerMUL(&bld, i);
- return true;
-}
-
-bool
NV50LoweringPreSSA::handleDIV(Instruction *i)
{
if (!isFloatType(i->dType))
@@ -1069,8 +1063,6 @@ NV50LoweringPreSSA::visit(Instruction *i)
return handleSELP(i);
case OP_POW:
return handlePOW(i);
- case OP_MUL:
- return handleMUL(i);
case OP_DIV:
return handleDIV(i);
case OP_SQRT:
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
index 5e541e514cb..8b11c6a2fdd 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
@@ -310,7 +310,22 @@ TargetNV50::insnCanLoad(const Instruction *i, int s,
return false;
}
- if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * typeSizeof(ld->dType)))
+ uint8_t ldSize;
+
+ if ((i->op == OP_MUL || i->op == OP_MAD) && !isFloatType(i->dType)) {
+ // 32-bit MUL will be split into 16-bit MULs
+ if (ld->src(0).isIndirect(0))
+ return false;
+ if (sf == FILE_IMMEDIATE)
+ return false;
+ ldSize = 2;
+ } else {
+ ldSize = typeSizeof(ld->dType);
+ }
+
+ if (ldSize < 4 && sf == FILE_SHADER_INPUT) // no < 4-byte aligned a[] access
+ return false;
+ if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * ldSize))
return false;
if (ld->src(0).isIndirect(0)) {