summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/codegen
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2018-06-14 19:56:28 -0600
committerBrian Paul <[email protected]>2018-06-14 20:09:45 -0600
commit66ca7e400b8cf736943feddafef7f76adabf9120 (patch)
treea338e1c9fd3ced8b144bedbbebcba8fd52c91135 /src/gallium/drivers/nouveau/codegen
parent9f217facbde04dd005b3f6b53bc97480b856d246 (diff)
nvc0: add support for programmable sample locations
Signed-off-by: Rhys Perry <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp7
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp102
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h2
4 files changed, 103 insertions, 10 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 3d0782f86b5..7c835ceab8d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -73,6 +73,7 @@ struct nv50_ir_prog_symbol
#define NVISA_GK104_CHIPSET 0xe0
#define NVISA_GK20A_CHIPSET 0xea
#define NVISA_GM107_CHIPSET 0x110
+#define NVISA_GM200_CHIPSET 0x120
struct nv50_ir_prog_info
{
@@ -145,6 +146,7 @@ struct nv50_ir_prog_info
bool persampleInvocation;
bool usesSampleMaskIn;
bool readsFramebuffer;
+ bool readsSampleLocations;
} fp;
struct {
uint32_t inputOffset; /* base address for user args */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 3c5bad05fe7..d7844d73816 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1520,6 +1520,10 @@ void Source::scanInstructionSrc(const Instruction& insn,
info->out[src.getIndex(0)].oread = 1;
}
}
+ if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) {
+ if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS)
+ info->prop.fp.readsSampleLocations = true;
+ }
if (src.getFile() != TGSI_FILE_INPUT)
return;
@@ -1560,6 +1564,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
info->prop.fp.readsFramebuffer = true;
+ if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE)
+ info->prop.fp.readsSampleLocations = true;
+
if (insn.dstCount()) {
Instruction::DstRegister dst = insn.getDst(0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 29f674b4514..5723847234e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2662,17 +2662,33 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
break;
case SV_SAMPLE_POS: {
- Value *off = new_LValue(func, FILE_GPR);
- ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
+ Value *sampleID = bld.getScratch();
+ ld = bld.mkOp1(OP_PIXLD, TYPE_U32, sampleID, bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
- bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3));
- bld.mkLoad(TYPE_F32,
- i->getDef(0),
- bld.mkSymbol(
- FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
- TYPE_U32, prog->driver->io.sampleInfoBase +
- 4 * sym->reg.data.sv.index),
- off);
+ Value *offset = calculateSampleOffset(sampleID);
+
+ assert(prog->driver->prop.fp.readsSampleLocations);
+
+ if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
+ bld.mkLoad(TYPE_F32,
+ i->getDef(0),
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase),
+ offset);
+ bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0),
+ bld.mkImm(0x040c + sym->reg.data.sv.index * 16));
+ bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_U32, i->getDef(0));
+ bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), bld.mkImm(1.0f / 16.0f));
+ } else {
+ bld.mkLoad(TYPE_F32,
+ i->getDef(0),
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase +
+ 4 * sym->reg.data.sv.index),
+ offset);
+ }
break;
}
case SV_SAMPLE_MASK: {
@@ -2832,6 +2848,69 @@ NVC0LoweringPass::handleOUT(Instruction *i)
return true;
}
+Value *
+NVC0LoweringPass::calculateSampleOffset(Value *sampleID)
+{
+ Value *offset = bld.getScratch();
+ if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
+ // Sample location offsets (in bytes) are calculated like so:
+ // offset = (SV_POSITION.y % 4 * 2) + (SV_POSITION.x % 2)
+ // offset = offset * 32 + sampleID % 8 * 4;
+ // which is equivalent to:
+ // offset = (SV_POSITION.y & 0x3) << 6 + (SV_POSITION.x & 0x1) << 5;
+ // offset += sampleID << 2
+
+ // The second operand (src1) of the INSBF instructions are like so:
+ // 0xssll where ss is the size and ll is the offset.
+ // so: dest = src2 | (src0 & (1 << ss - 1)) << ll
+
+ // Add sample ID (offset = (sampleID & 0x7) << 2)
+ bld.mkOp3(OP_INSBF, TYPE_U32, offset, sampleID, bld.mkImm(0x0302), bld.mkImm(0x0));
+
+ Symbol *xSym = bld.mkSysVal(SV_POSITION, 0);
+ Symbol *ySym = bld.mkSysVal(SV_POSITION, 1);
+ Value *coord = bld.getScratch();
+
+ // Add X coordinate (offset |= (SV_POSITION.x & 0x1) << 5)
+ bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
+ targ->getSVAddress(FILE_SHADER_INPUT, xSym), NULL);
+ bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
+ ->rnd = ROUND_ZI;
+ bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0105), offset);
+
+ // Add Y coordinate (offset |= (SV_POSITION.y & 0x3) << 6)
+ bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
+ targ->getSVAddress(FILE_SHADER_INPUT, ySym), NULL);
+ bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
+ ->rnd = ROUND_ZI;
+ bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0206), offset);
+ } else {
+ bld.mkOp2(OP_SHL, TYPE_U32, offset, sampleID, bld.mkImm(3));
+ }
+ return offset;
+}
+
+// Handle programmable sample locations for GM20x+
+void
+NVC0LoweringPass::handlePIXLD(Instruction *i)
+{
+ if (i->subOp != NV50_IR_SUBOP_PIXLD_OFFSET)
+ return;
+ if (targ->getChipset() < NVISA_GM200_CHIPSET)
+ return;
+
+ assert(prog->driver->prop.fp.readsSampleLocations);
+
+ bld.mkLoad(TYPE_F32,
+ i->getDef(0),
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase),
+ calculateSampleOffset(i->getSrc(0)));
+
+ bld.getBB()->remove(i);
+}
+
// Generate a binary predicate if an instruction is predicated by
// e.g. an f32 value.
void
@@ -2931,6 +3010,9 @@ NVC0LoweringPass::visit(Instruction *i)
case OP_BUFQ:
handleBUFQ(i);
break;
+ case OP_PIXLD:
+ handlePIXLD(i);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 1b2b36d3cc9..91771fbf7e9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -116,6 +116,7 @@ protected:
void handleSharedATOMNVE4(Instruction *);
void handleLDST(Instruction *);
bool handleBUFQ(Instruction *);
+ void handlePIXLD(Instruction *);
void checkPredicate(Instruction *);
@@ -142,6 +143,7 @@ private:
void processSurfaceCoordsNVE4(TexInstruction *);
void processSurfaceCoordsNVC0(TexInstruction *);
void convertSurfaceFormat(TexInstruction *);
+ Value *calculateSampleOffset(Value *sampleID);
protected:
Value *loadTexHandle(Value *ptr, unsigned int slot);