summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBoyan Ding <[email protected]>2017-04-10 22:55:57 +0800
committerIlia Mirkin <[email protected]>2017-04-13 02:24:30 -0400
commit854554c314e556c158b0e2aa3911a43f58fd6d34 (patch)
tree80cb798a1875810140ec922878b045678cadb578
parenta981e68c26dc4079a335101da0033185030207f6 (diff)
gm107/ir: Emit third src 'bound' and optional predicate output of SHFL
v2: Emit the original hard-coded 0x1c03 when OP_SHFL is used in gm107's lowering (Samuel Pitoiset) Signed-off-by: Boyan Ding <[email protected]> Reviewed-by: Ilia Mirkin <[email protected]>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp23
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp15
2 files changed, 29 insertions, 9 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index c3c0dcd9fc1..944563c93cf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -967,11 +967,26 @@ CodeEmitterGM107::emitSHFL()
break;
}
- /*XXX: what is this arg? hardcode immediate for now */
- emitField(0x22, 13, 0x1c03);
- type |= 2;
+ switch (insn->src(2).getFile()) {
+ case FILE_GPR:
+ emitGPR(0x27, insn->src(2));
+ break;
+ case FILE_IMMEDIATE:
+ emitIMMD(0x22, 13, insn->src(2));
+ type |= 2;
+ break;
+ default:
+ assert(!"invalid src2 file");
+ break;
+ }
+
+ if (!insn->defExists(1))
+ emitPRED(0x30);
+ else {
+ assert(insn->def(1).getFile() == FILE_PREDICATE);
+ emitPRED(0x30, insn->def(1));
+ }
- emitPRED (0x30);
emitField(0x1e, 2, insn->subOp);
emitField(0x1c, 2, type);
emitGPR (0x08, insn->src(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 371ebae40c1..6b9edd48645 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -41,6 +41,8 @@ namespace nv50_ir {
((QOP_##q << 6) | (QOP_##r << 4) | \
(QOP_##s << 2) | (QOP_##t << 0))
+#define SHFL_BOUND_QUAD 0x1c03
+
void
GM107LegalizeSSA::handlePFETCH(Instruction *i)
{
@@ -120,7 +122,8 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
// mov coordinates from lane l to all lanes
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (c = 0; c < dim; ++c) {
- bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l));
+ bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array),
+ bld.mkImm(l), bld.mkImm(SHFL_BOUND_QUAD));
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
add->subOp = 0x00;
add->lanes = 1; /* abused for .ndv */
@@ -128,7 +131,8 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
// add dPdx from lane l to lanes dx
for (c = 0; c < dim; ++c) {
- bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
+ bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l),
+ bld.mkImm(SHFL_BOUND_QUAD));
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
add->subOp = qOps[l][0];
add->lanes = 1; /* abused for .ndv */
@@ -136,7 +140,8 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c) {
- bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
+ bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l),
+ bld.mkImm(SHFL_BOUND_QUAD));
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
add->subOp = qOps[l][1];
add->lanes = 1; /* abused for .ndv */
@@ -203,8 +208,8 @@ GM107LoweringPass::handleDFDX(Instruction *insn)
break;
}
- shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
- insn->getSrc(0), bld.mkImm(xid));
+ shfl = bld.mkOp3(OP_SHFL, TYPE_F32, bld.getScratch(), insn->getSrc(0),
+ bld.mkImm(xid), bld.mkImm(SHFL_BOUND_QUAD));
shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
insn->op = OP_QUADOP;
insn->subOp = qop;