summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp115
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h2
2 files changed, 117 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 43a6e5f0fb1..dc0667fb6a9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2012,6 +2012,119 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8;
}
+void
+NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
+{
+ const int idx = su->tex.r;
+ const int dim = su->tex.target.getDim();
+ const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
+ const uint16_t base = idx * NVE4_SU_INFO__STRIDE;
+ int c;
+ Value *zero = bld.mkImm(0);
+ Value *src[3];
+ Value *v;
+ Value *ind = NULL;
+
+ if (su->tex.rIndirectSrc >= 0) {
+ // FIXME: out of bounds
+ assert(su->tex.r == 0);
+ ind = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+ su->getIndirectR(), bld.mkImm(6));
+ }
+
+ // get surface coordinates
+ for (c = 0; c < arg; ++c)
+ src[c] = su->getSrc(c);
+ for (; c < 3; ++c)
+ src[c] = zero;
+
+ // calculate pixel offset
+ if (su->op == OP_SULDP || su->op == OP_SUREDP) {
+ v = loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE);
+ su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
+ }
+
+ // add array layer offset
+ if (su->tex.target.isArray() || su->tex.target.isCube()) {
+ v = loadSuInfo32(ind, base + NVE4_SU_INFO_ARRAY);
+ assert(dim > 1);
+ su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
+ }
+
+ // prevent read fault when the image is not actually bound
+ CmpInstruction *pred =
+ bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+ TYPE_U32, bld.mkImm(0),
+ loadSuInfo32(ind, base + NVE4_SU_INFO_ADDR));
+ if (su->op != OP_SUSTP && su->tex.format) {
+ const TexInstruction::ImgFormatDesc *format = su->tex.format;
+ int blockwidth = format->bits[0] + format->bits[1] +
+ format->bits[2] + format->bits[3];
+
+ assert(format->components != 0);
+ // make sure that the format doesn't mismatch when it's not FMT_NONE
+ bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
+ TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
+ loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE),
+ pred->getDef(0));
+ }
+ su->setPredicate(CC_NOT_P, pred->getDef(0));
+}
+
+void
+NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
+{
+ if (su->tex.target == TEX_TARGET_1D_ARRAY) {
+ /* As 1d arrays also need 3 coordinates, switching to TEX_TARGET_2D_ARRAY
+ * will simplify the lowering pass and the texture constraints. */
+ su->moveSources(1, 1);
+ su->setSrc(1, bld.loadImm(NULL, 0));
+ su->tex.target = TEX_TARGET_2D_ARRAY;
+ }
+
+ processSurfaceCoordsNVC0(su);
+
+ if (su->op == OP_SULDP)
+ convertSurfaceFormat(su);
+
+ if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
+ const int dim = su->tex.target.getDim();
+ const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
+ LValue *addr = bld.getSSA(8);
+ Value *def = su->getDef(0);
+
+ su->op = OP_SULEA;
+
+ // Set the destination to the address
+ su->dType = TYPE_U64;
+ su->setDef(0, addr);
+ su->setDef(1, su->getPredicate());
+
+ bld.setPosition(su, true);
+
+ // Perform the atomic op
+ Instruction *red = bld.mkOp(OP_ATOM, su->sType, bld.getSSA());
+ red->subOp = su->subOp;
+ red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, su->sType, 0));
+ red->setSrc(1, su->getSrc(arg));
+ if (red->subOp == NV50_IR_SUBOP_ATOM_CAS)
+ red->setSrc(2, su->getSrc(arg + 1));
+ red->setIndirect(0, 0, addr);
+
+ // make sure to initialize dst value when the atomic operation is not
+ // performed
+ Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+
+ assert(su->cc == CC_NOT_P);
+ red->setPredicate(su->cc, su->getPredicate());
+ mov->setPredicate(CC_P, su->getPredicate());
+
+ bld.mkOp2(OP_UNION, TYPE_U32, def, red->getDef(0), mov->getDef(0));
+
+ handleCasExch(red, false);
+ }
+}
+
bool
NVC0LoweringPass::handleWRSV(Instruction *i)
{
@@ -2491,6 +2604,8 @@ NVC0LoweringPass::visit(Instruction *i)
case OP_SUREDP:
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
handleSurfaceOpNVE4(i->asTex());
+ else
+ handleSurfaceOpNVC0(i->asTex());
break;
case OP_SUQ:
handleSUQ(i->asTex());
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index c007e09439e..2321956322e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -106,6 +106,7 @@ protected:
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
+ void handleSurfaceOpNVC0(TexInstruction *);
void handleSharedATOM(Instruction *);
void handleSharedATOMNVE4(Instruction *);
void handleLDST(Instruction *);
@@ -138,6 +139,7 @@ private:
void adjustCoordinatesMS(TexInstruction *);
void processSurfaceCoordsNVE4(TexInstruction *);
+ void processSurfaceCoordsNVC0(TexInstruction *);
void convertSurfaceFormat(TexInstruction *);
protected: