aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarol Herbst <[email protected]>2018-08-04 03:13:11 +0200
committerKarol Herbst <[email protected]>2018-08-04 15:24:08 +0200
commit6f98a3065bce873652e24e3591c3d57192514473 (patch)
treec05ae4112f57b79cba856053d4fabe91d42641ac
parent4334196ab325c6a19d618a392cddcc9f03adeb18 (diff)
gm200/ir: add native OP_SQRT support
./GpuTest /test=pixmark_piano 1024x640 30sec: 301 -> 327 points shader-db: total instructions in shared programs : 5472103 -> 5456166 (-0.29%) total gprs used in shared programs : 647530 -> 647522 (-0.00%) total shared used in shared programs : 389120 -> 389120 (0.00%) total local used in shared programs : 21064 -> 21064 (0.00%) total bytes used in shared programs : 58459304 -> 58288696 (-0.29%) local shared gpr inst bytes helped 0 0 27 8281 8281 hurt 0 0 21 431 431 v2: use NVISA_GM200_CHIPSET Reviewed-by: Ilia Mirkin <[email protected]> Signed-off-by: Karol Herbst <[email protected]>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp8
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp1
4 files changed, 14 insertions, 2 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 1d31f181e44..5e8c22cd54b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -1409,6 +1409,7 @@ CodeEmitterGM107::emitMUFU()
case OP_LG2: mufu = 3; break;
case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
+ case OP_SQRT: mufu = 8; break;
default:
assert(!"invalid mufu");
break;
@@ -1418,7 +1419,7 @@ CodeEmitterGM107::emitMUFU()
emitSAT (0x32);
emitNEG (0x30, insn->src(0));
emitABS (0x2e, insn->src(0));
- emitField(0x14, 3, mufu);
+ emitField(0x14, 4, mufu);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
@@ -3342,6 +3343,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_LG2:
case OP_RCP:
case OP_RSQ:
+ case OP_SQRT:
emitMUFU();
break;
case OP_AND:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 1410cf26c87..c47d10896ce 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2752,6 +2752,9 @@ NVC0LoweringPass::handleMOD(Instruction *i)
bool
NVC0LoweringPass::handleSQRT(Instruction *i)
{
+ if (targ->isOpSupported(OP_SQRT, i->dType))
+ return true;
+
if (i->dType == TYPE_F64) {
Value *pred = bld.getSSA(1, FILE_PREDICATE);
Value *zero = bld.loadImm(NULL, 0.0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
index adbfcc3cfec..c25e6da024d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -57,10 +57,13 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
switch (op) {
case OP_SAD:
case OP_POW:
- case OP_SQRT:
case OP_DIV:
case OP_MOD:
return false;
+ case OP_SQRT:
+ if (ty == TYPE_F64)
+ return false;
+ return chipset >= NVISA_GM200_CHIPSET;
default:
break;
}
@@ -125,6 +128,7 @@ TargetGM107::isBarrierRequired(const Instruction *insn) const
case OP_RCP:
case OP_RSQ:
case OP_SIN:
+ case OP_SQRT:
return true;
default:
break;
@@ -256,6 +260,7 @@ TargetGM107::getLatency(const Instruction *insn) const
case OP_RCP:
case OP_RSQ:
case OP_SIN:
+ case OP_SQRT:
return 13;
default:
break;
@@ -284,6 +289,7 @@ TargetGM107::getReadLatency(const Instruction *insn) const
case OP_RSQ:
case OP_SAT:
case OP_SIN:
+ case OP_SQRT:
case OP_SULDB:
case OP_SULDP:
case OP_SUREDB:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 7e059235f4c..9304e392361 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -129,6 +129,7 @@ static const struct opProperties _initProps[] =
{ OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_SQRT, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },