diff options
author | Ilia Mirkin <[email protected]> | 2016-01-14 01:09:25 -0500 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2016-01-14 20:14:01 -0500 |
commit | fffb559129dd1ae978ec7f9ba30b4ae97a5ebbcc (patch) | |
tree | fbc89ccd5b54a3de3a0217324822da51a7a5e0b4 | |
parent | e231f59b6d5b12035a8041305c3a732d39a39c19 (diff) |
nv50/ir: rebase indirect temp arrays to 0, so that we use less lmem space
Reduces local memory usage in a lot of Metro 2033 Redux and a few KSP
shaders:
total local used in shared programs : 54116 -> 30372 (-43.88%)
Probably modest advantage to execution, but it's an imporant
prerequisite to dropping some of the TGSI optimizations done by the
state tracker.
Signed-off-by: Ilia Mirkin <[email protected]>
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 58 |
1 files changed, 44 insertions, 14 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 5454f042078..9c4a38f291b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -824,6 +824,8 @@ public: std::set<Location> locals; std::set<int> indirectTempArrays; + std::map<int, int> indirectTempOffsets; + std::map<int, std::pair<int, int> > tempArrayInfo; std::vector<int> tempArrayId; int clipVertexOutput; @@ -931,9 +933,16 @@ bool Source::scanSource() } tgsi_parse_free(&parse); - // TODO: Compute based on relevant array sizes - if (indirectTempArrays.size()) - info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16; + if (indirectTempArrays.size()) { + int tempBase = 0; + for (std::set<int>::const_iterator it = indirectTempArrays.begin(); + it != indirectTempArrays.end(); ++it) { + std::pair<int, int>& info = tempArrayInfo[*it]; + indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first)); + tempBase += info.second; + } + info->bin.tlsSpace += tempBase * 16; + } if (info->io.genUserClip > 0) { info->io.clipDistances = info->io.genUserClip; @@ -1191,6 +1200,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) case TGSI_FILE_TEMPORARY: for (i = first; i <= last; ++i) tempArrayId[i] = arrayId; + if (arrayId) + tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair( + first, last - first + 1))); break; case TGSI_FILE_NULL: case TGSI_FILE_ADDRESS: @@ -1356,6 +1368,7 @@ private: void storeDst(const tgsi::Instruction::DstRegister dst, int c, Value *val, Value *ptr); + void adjustTempIndex(int arrayId, int &idx, int &idx2d) const; Value *applySrcMod(Value *, int s, int c); Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr); @@ -1661,11 +1674,23 @@ Converter::shiftAddress(Value *index) return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4)); } +void +Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const +{ + std::map<int, int>::const_iterator it = + code->indirectTempOffsets.find(arrayId); + if (it == code->indirectTempOffsets.end()) + return; + + idx2d = 1; + idx += it->second; +} + Value * Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) { int idx2d = src.is2D() ? src.getIndex(1) : 0; - const int idx = src.getIndex(0); + int idx = src.getIndex(0); const int swz = src.getSwizzle(c); Instruction *ld; @@ -1710,8 +1735,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) int arrayid = src.getArrayId(); if (!arrayid) arrayid = code->tempArrayId[idx]; - idx2d = (code->indirectTempArrays.find(arrayid) != - code->indirectTempArrays.end()); + adjustTempIndex(arrayid, idx, idx2d); } /* fallthrough */ default: @@ -1725,7 +1749,7 @@ Converter::acquireDst(int d, int c) { const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); const unsigned f = dst.getFile(); - const int idx = dst.getIndex(0); + int idx = dst.getIndex(0); int idx2d = dst.is2D() ? dst.getIndex(1) : 0; if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/) @@ -1736,9 +1760,12 @@ Converter::acquireDst(int d, int c) (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT)) return getScratch(); - if (f == TGSI_FILE_TEMPORARY) - idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) != - code->indirectTempArrays.end(); + if (f == TGSI_FILE_TEMPORARY) { + int arrayid = dst.getArrayId(); + if (!arrayid) + arrayid = code->tempArrayId[idx]; + adjustTempIndex(arrayid, idx, idx2d); + } return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c); } @@ -1771,7 +1798,7 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, Value *val, Value *ptr) { const unsigned f = dst.getFile(); - const int idx = dst.getIndex(0); + int idx = dst.getIndex(0); int idx2d = dst.is2D() ? dst.getIndex(1) : 0; if (f == TGSI_FILE_SYSTEM_VALUE) { @@ -1795,9 +1822,12 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, f == TGSI_FILE_PREDICATE || f == TGSI_FILE_ADDRESS || f == TGSI_FILE_OUTPUT) { - if (f == TGSI_FILE_TEMPORARY) - idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) != - code->indirectTempArrays.end(); + if (f == TGSI_FILE_TEMPORARY) { + int arrayid = dst.getArrayId(); + if (!arrayid) + arrayid = code->tempArrayId[idx]; + adjustTempIndex(arrayid, idx, idx2d); + } getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val); } else { |