diff options
author | Rhys Perry <[email protected]> | 2018-06-14 19:56:28 -0600 |
---|---|---|
committer | Brian Paul <[email protected]> | 2018-06-14 20:09:45 -0600 |
commit | 66ca7e400b8cf736943feddafef7f76adabf9120 (patch) | |
tree | a338e1c9fd3ced8b144bedbbebcba8fd52c91135 /src/gallium/drivers/nouveau/codegen | |
parent | 9f217facbde04dd005b3f6b53bc97480b856d246 (diff) |
nvc0: add support for programmable sample locations
Signed-off-by: Rhys Perry <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
4 files changed, 103 insertions, 10 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 3d0782f86b5..7c835ceab8d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -73,6 +73,7 @@ struct nv50_ir_prog_symbol #define NVISA_GK104_CHIPSET 0xe0 #define NVISA_GK20A_CHIPSET 0xea #define NVISA_GM107_CHIPSET 0x110 +#define NVISA_GM200_CHIPSET 0x120 struct nv50_ir_prog_info { @@ -145,6 +146,7 @@ struct nv50_ir_prog_info bool persampleInvocation; bool usesSampleMaskIn; bool readsFramebuffer; + bool readsSampleLocations; } fp; struct { uint32_t inputOffset; /* base address for user args */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 3c5bad05fe7..d7844d73816 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1520,6 +1520,10 @@ void Source::scanInstructionSrc(const Instruction& insn, info->out[src.getIndex(0)].oread = 1; } } + if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) { + if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS) + info->prop.fp.readsSampleLocations = true; + } if (src.getFile() != TGSI_FILE_INPUT) return; @@ -1560,6 +1564,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (insn.getOpcode() == TGSI_OPCODE_FBFETCH) info->prop.fp.readsFramebuffer = true; + if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE) + info->prop.fp.readsSampleLocations = true; + if (insn.dstCount()) { Instruction::DstRegister dst = insn.getDst(0); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 29f674b4514..5723847234e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2662,17 +2662,33 @@ NVC0LoweringPass::handleRDSV(Instruction *i) ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; break; case SV_SAMPLE_POS: { - Value *off = new_LValue(func, FILE_GPR); - ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); + Value *sampleID = bld.getScratch(); + ld = bld.mkOp1(OP_PIXLD, TYPE_U32, sampleID, bld.mkImm(0)); ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; - bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3)); - bld.mkLoad(TYPE_F32, - i->getDef(0), - bld.mkSymbol( - FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, - TYPE_U32, prog->driver->io.sampleInfoBase + - 4 * sym->reg.data.sv.index), - off); + Value *offset = calculateSampleOffset(sampleID); + + assert(prog->driver->prop.fp.readsSampleLocations); + + if (targ->getChipset() >= NVISA_GM200_CHIPSET) { + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase), + offset); + bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0), + bld.mkImm(0x040c + sym->reg.data.sv.index * 16)); + bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_U32, i->getDef(0)); + bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), bld.mkImm(1.0f / 16.0f)); + } else { + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase + + 4 * sym->reg.data.sv.index), + offset); + } break; } case SV_SAMPLE_MASK: { @@ -2832,6 +2848,69 @@ NVC0LoweringPass::handleOUT(Instruction *i) return true; } +Value * +NVC0LoweringPass::calculateSampleOffset(Value *sampleID) +{ + Value *offset = bld.getScratch(); + if (targ->getChipset() >= NVISA_GM200_CHIPSET) { + // Sample location offsets (in bytes) are calculated like so: + // offset = (SV_POSITION.y % 4 * 2) + (SV_POSITION.x % 2) + // offset = offset * 32 + sampleID % 8 * 4; + // which is equivalent to: + // offset = (SV_POSITION.y & 0x3) << 6 + (SV_POSITION.x & 0x1) << 5; + // offset += sampleID << 2 + + // The second operand (src1) of the INSBF instructions are like so: + // 0xssll where ss is the size and ll is the offset. + // so: dest = src2 | (src0 & (1 << ss - 1)) << ll + + // Add sample ID (offset = (sampleID & 0x7) << 2) + bld.mkOp3(OP_INSBF, TYPE_U32, offset, sampleID, bld.mkImm(0x0302), bld.mkImm(0x0)); + + Symbol *xSym = bld.mkSysVal(SV_POSITION, 0); + Symbol *ySym = bld.mkSysVal(SV_POSITION, 1); + Value *coord = bld.getScratch(); + + // Add X coordinate (offset |= (SV_POSITION.x & 0x1) << 5) + bld.mkInterp(NV50_IR_INTERP_LINEAR, coord, + targ->getSVAddress(FILE_SHADER_INPUT, xSym), NULL); + bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord) + ->rnd = ROUND_ZI; + bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0105), offset); + + // Add Y coordinate (offset |= (SV_POSITION.y & 0x3) << 6) + bld.mkInterp(NV50_IR_INTERP_LINEAR, coord, + targ->getSVAddress(FILE_SHADER_INPUT, ySym), NULL); + bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord) + ->rnd = ROUND_ZI; + bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0206), offset); + } else { + bld.mkOp2(OP_SHL, TYPE_U32, offset, sampleID, bld.mkImm(3)); + } + return offset; +} + +// Handle programmable sample locations for GM20x+ +void +NVC0LoweringPass::handlePIXLD(Instruction *i) +{ + if (i->subOp != NV50_IR_SUBOP_PIXLD_OFFSET) + return; + if (targ->getChipset() < NVISA_GM200_CHIPSET) + return; + + assert(prog->driver->prop.fp.readsSampleLocations); + + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase), + calculateSampleOffset(i->getSrc(0))); + + bld.getBB()->remove(i); +} + // Generate a binary predicate if an instruction is predicated by // e.g. an f32 value. void @@ -2931,6 +3010,9 @@ NVC0LoweringPass::visit(Instruction *i) case OP_BUFQ: handleBUFQ(i); break; + case OP_PIXLD: + handlePIXLD(i); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 1b2b36d3cc9..91771fbf7e9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -116,6 +116,7 @@ protected: void handleSharedATOMNVE4(Instruction *); void handleLDST(Instruction *); bool handleBUFQ(Instruction *); + void handlePIXLD(Instruction *); void checkPredicate(Instruction *); @@ -142,6 +143,7 @@ private: void processSurfaceCoordsNVE4(TexInstruction *); void processSurfaceCoordsNVC0(TexInstruction *); void convertSurfaceFormat(TexInstruction *); + Value *calculateSampleOffset(Value *sampleID); protected: Value *loadTexHandle(Value *ptr, unsigned int slot); |