diff options
Diffstat (limited to 'src/gallium')
3 files changed, 136 insertions, 13 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 49a45f8394b..3c790cfe77f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1126,6 +1126,7 @@ private: ValueMap values; }; + Value *shiftAddress(Value *); Value *getVertexBase(int s); DataArray *getArrayForFile(unsigned file, int idx); Value *fetchSrc(int s, int c); @@ -1344,7 +1345,8 @@ Converter::getVertexBase(int s) if (tgsi.getSrc(s).isIndirect(1)) rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL); vtxBaseValid |= 1 << s; - vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel); + vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), + mkImm(index), rel); } return vtxBase[s]; } @@ -1403,6 +1405,14 @@ Converter::getArrayForFile(unsigned file, int idx) } Value * +Converter::shiftAddress(Value *index) +{ + if (!index) + return NULL; + return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4)); +} + +Value * Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) { const int idx2d = src.is2D() ? src.getIndex(1) : 0; @@ -1414,7 +1424,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) assert(!ptr); return loadImm(NULL, info->immd.data[idx * 4 + swz]); case TGSI_FILE_CONSTANT: - return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); + return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); case TGSI_FILE_INPUT: if (prog->getType() == Program::TYPE_FRAGMENT) { // don't load masked inputs, won't be assigned a slot @@ -1422,9 +1432,17 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0)); - return interpolate(src, c, ptr); + return interpolate(src, c, shiftAddress(ptr)); + } else + if (ptr && prog->getType() == Program::TYPE_GEOMETRY) { + // XXX: This is going to be a problem with scalar arrays, i.e. when + // we cannot assume that the address is given in units of vec4. + // + // nv50 and nvc0 need different things here, so let the lowering + // passes decide what to do with the address + return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); } - return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); + return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); case TGSI_FILE_OUTPUT: assert(!"load from output file"); return NULL; @@ -1433,7 +1451,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); default: return getArrayForFile(src.getFile(), idx2d)->load( - sub.cur->values, idx, swz, ptr); + sub.cur->values, idx, swz, shiftAddress(ptr)); } } @@ -1476,8 +1494,9 @@ Converter::storeDst(int d, int c, Value *val) break; } - Value *ptr = dst.isIndirect(0) ? - fetchSrc(dst.getIndirect(0), 0, NULL) : NULL; + Value *ptr = NULL; + if (dst.isIndirect(0)) + ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL)); if (info->io.genUserClip > 0 && dst.getFile() == TGSI_FILE_OUTPUT && @@ -2179,12 +2198,11 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M; - mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4)); } break; case TGSI_OPCODE_UARL: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) - mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4)); + mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); break; case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: @@ -2721,7 +2739,7 @@ Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0); pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); - aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0); + aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0); oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); zero = mkImm((uint32_t)0); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index 07f3a217e59..1d13aea98b1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -278,10 +278,24 @@ NV50LegalizeSSA::propagateWriteToOutput(Instruction *st) // TODO: move exports (if beneficial) in common opt pass if (di->isPseudo() || isTextureOp(di->op) || di->defCount(0xff, true) > 1) return; + for (int s = 0; di->srcExists(s); ++s) if (di->src(s).getFile() == FILE_IMMEDIATE) return; + if (prog->getType() == Program::TYPE_GEOMETRY) { + // Only propagate output writes in geometry shaders when we can be sure + // that we are propagating to the same output vertex. + if (di->bb != st->bb) + return; + Instruction *i; + for (i = di; i != st; i = i->next) { + if (i->op == OP_EMIT || i->op == OP_RESTART) + return; + } + assert(i); // st after di + } + // We cannot set defs to non-lvalues before register allocation, so // save & remove (to save registers) the exports and replace later. outWrites->push_back(st); @@ -307,6 +321,9 @@ NV50LegalizeSSA::handleAddrDef(Instruction *i) i->getDef(0)->reg.size = 2; // $aX are only 16 bit + // PFETCH can always write to $a + if (i->op == OP_PFETCH) + return; // only ADDR <- SHL(GPR, IMM) and ADDR <- ADD(ADDR, IMM) are valid if (i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE) { if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) @@ -473,6 +490,9 @@ NV50LegalizeSSA::visit(BasicBlock *bb) for (insn = bb->getEntry(); insn; insn = next) { next = insn->next; + if (insn->defExists(0) && insn->getDef(0)->reg.file == FILE_ADDRESS) + handleAddrDef(insn); + switch (insn->op) { case OP_EXPORT: if (outWrites) @@ -491,9 +511,6 @@ NV50LegalizeSSA::visit(BasicBlock *bb) default: break; } - - if (insn->defExists(0) && insn->getDef(0)->reg.file == FILE_ADDRESS) - handleAddrDef(insn); } return true; } @@ -510,7 +527,9 @@ private: bool handleRDSV(Instruction *); bool handleWRSV(Instruction *); + bool handlePFETCH(Instruction *); bool handleEXPORT(Instruction *); + bool handleLOAD(Instruction *); bool handleDIV(Instruction *); bool handleSQRT(Instruction *); @@ -1002,6 +1021,81 @@ NV50LoweringPreSSA::handleEXPORT(Instruction *i) return true; } +// Handle indirect addressing in geometry shaders: +// +// ld $r0 a[$a1][$a2+k] -> +// ld $r0 a[($a1 + $a2 * $vstride) + k], where k *= $vstride is implicit +// +bool +NV50LoweringPreSSA::handleLOAD(Instruction *i) +{ + ValueRef src = i->src(0); + + if (src.isIndirect(1)) { + assert(prog->getType() == Program::TYPE_GEOMETRY); + Value *addr = i->getIndirect(0, 1); + + if (src.isIndirect(0)) { + // base address is in an address register, so move to a GPR + Value *base = bld.getScratch(); + bld.mkMov(base, addr); + + Symbol *sv = bld.mkSysVal(SV_VERTEX_STRIDE, 0); + Value *vstride = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getSSA(), sv); + Value *attrib = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), + i->getIndirect(0, 0), bld.mkImm(2)); + + // Calculate final address: addr = base + attr*vstride; use 16-bit + // multiplication since 32-bit would be lowered to multiple + // instructions, and we only need the low 16 bits of the result + Value *a[2], *b[2]; + bld.mkSplit(a, 2, attrib); + bld.mkSplit(b, 2, vstride); + Value *sum = bld.mkOp3v(OP_MAD, TYPE_U16, bld.getSSA(), a[0], b[0], + base); + + // move address from GPR into an address register + addr = bld.getSSA(2, FILE_ADDRESS); + bld.mkMov(addr, sum); + } + + i->setIndirect(0, 1, NULL); + i->setIndirect(0, 0, addr); + } + + return true; +} + +bool +NV50LoweringPreSSA::handlePFETCH(Instruction *i) +{ + assert(prog->getType() == Program::TYPE_GEOMETRY); + + // NOTE: cannot use getImmediate here, not in SSA form yet, move to + // later phase if that assertion ever triggers: + + ImmediateValue *imm = i->getSrc(0)->asImm(); + assert(imm); + + assert(imm->reg.data.u32 <= 127); // TODO: use address reg if that happens + + if (i->srcExists(1)) { + // indirect addressing of vertex in primitive space + + LValue *val = bld.getScratch(); + Value *ptr = bld.getSSA(2, FILE_ADDRESS); + bld.mkOp2v(OP_SHL, TYPE_U32, ptr, i->getSrc(1), bld.mkImm(2)); + bld.mkOp2v(OP_PFETCH, TYPE_U32, val, imm, ptr); + + // NOTE: PFETCH directly to an $aX only works with direct addressing + i->op = OP_SHL; + i->setSrc(0, val); + i->setSrc(1, bld.mkImm(0)); + } + + return true; +} + // Set flags according to predicate and make the instruction read $cX. void NV50LoweringPreSSA::checkPredicate(Instruction *insn) @@ -1060,6 +1154,8 @@ NV50LoweringPreSSA::visit(Instruction *i) return handleSQRT(i); case OP_EXPORT: return handleEXPORT(i); + case OP_LOAD: + return handleLOAD(i); case OP_RDSV: return handleRDSV(i); case OP_WRSV: @@ -1070,6 +1166,8 @@ NV50LoweringPreSSA::visit(Instruction *i) return handlePRECONT(i); case OP_CONT: return handleCONT(i); + case OP_PFETCH: + return handlePFETCH(i); default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index a8380044914..3840f75a280 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1548,6 +1548,13 @@ NVC0LoweringPass::visit(Instruction *i) if (prog->getType() == Program::TYPE_COMPUTE) { i->getSrc(0)->reg.file = FILE_MEMORY_CONST; i->getSrc(0)->reg.fileIndex = 0; + } else + if (prog->getType() == Program::TYPE_GEOMETRY && + i->src(0).isIndirect(0)) { + // XXX: this assumes vec4 units + Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), + i->getIndirect(0, 0), bld.mkImm(4)); + i->setIndirect(0, 0, ptr); } else { i->op = OP_VFETCH; assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP |