summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2016-01-14 01:09:25 -0500
committerIlia Mirkin <[email protected]>2016-01-14 20:14:01 -0500
commitfffb559129dd1ae978ec7f9ba30b4ae97a5ebbcc (patch)
treefbc89ccd5b54a3de3a0217324822da51a7a5e0b4
parente231f59b6d5b12035a8041305c3a732d39a39c19 (diff)
nv50/ir: rebase indirect temp arrays to 0, so that we use less lmem space
Reduces local memory usage in a lot of Metro 2033 Redux and a few KSP shaders: total local used in shared programs : 54116 -> 30372 (-43.88%) Probably modest advantage to execution, but it's an imporant prerequisite to dropping some of the TGSI optimizations done by the state tracker. Signed-off-by: Ilia Mirkin <[email protected]>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp58
1 files changed, 44 insertions, 14 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 5454f042078..9c4a38f291b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -824,6 +824,8 @@ public:
std::set<Location> locals;
std::set<int> indirectTempArrays;
+ std::map<int, int> indirectTempOffsets;
+ std::map<int, std::pair<int, int> > tempArrayInfo;
std::vector<int> tempArrayId;
int clipVertexOutput;
@@ -931,9 +933,16 @@ bool Source::scanSource()
}
tgsi_parse_free(&parse);
- // TODO: Compute based on relevant array sizes
- if (indirectTempArrays.size())
- info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
+ if (indirectTempArrays.size()) {
+ int tempBase = 0;
+ for (std::set<int>::const_iterator it = indirectTempArrays.begin();
+ it != indirectTempArrays.end(); ++it) {
+ std::pair<int, int>& info = tempArrayInfo[*it];
+ indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first));
+ tempBase += info.second;
+ }
+ info->bin.tlsSpace += tempBase * 16;
+ }
if (info->io.genUserClip > 0) {
info->io.clipDistances = info->io.genUserClip;
@@ -1191,6 +1200,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_FILE_TEMPORARY:
for (i = first; i <= last; ++i)
tempArrayId[i] = arrayId;
+ if (arrayId)
+ tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
+ first, last - first + 1)));
break;
case TGSI_FILE_NULL:
case TGSI_FILE_ADDRESS:
@@ -1356,6 +1368,7 @@ private:
void storeDst(const tgsi::Instruction::DstRegister dst, int c,
Value *val, Value *ptr);
+ void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
Value *applySrcMod(Value *, int s, int c);
Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
@@ -1661,11 +1674,23 @@ Converter::shiftAddress(Value *index)
return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
}
+void
+Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
+{
+ std::map<int, int>::const_iterator it =
+ code->indirectTempOffsets.find(arrayId);
+ if (it == code->indirectTempOffsets.end())
+ return;
+
+ idx2d = 1;
+ idx += it->second;
+}
+
Value *
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
{
int idx2d = src.is2D() ? src.getIndex(1) : 0;
- const int idx = src.getIndex(0);
+ int idx = src.getIndex(0);
const int swz = src.getSwizzle(c);
Instruction *ld;
@@ -1710,8 +1735,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
int arrayid = src.getArrayId();
if (!arrayid)
arrayid = code->tempArrayId[idx];
- idx2d = (code->indirectTempArrays.find(arrayid) !=
- code->indirectTempArrays.end());
+ adjustTempIndex(arrayid, idx, idx2d);
}
/* fallthrough */
default:
@@ -1725,7 +1749,7 @@ Converter::acquireDst(int d, int c)
{
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
const unsigned f = dst.getFile();
- const int idx = dst.getIndex(0);
+ int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
@@ -1736,9 +1760,12 @@ Converter::acquireDst(int d, int c)
(f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
return getScratch();
- if (f == TGSI_FILE_TEMPORARY)
- idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
- code->indirectTempArrays.end();
+ if (f == TGSI_FILE_TEMPORARY) {
+ int arrayid = dst.getArrayId();
+ if (!arrayid)
+ arrayid = code->tempArrayId[idx];
+ adjustTempIndex(arrayid, idx, idx2d);
+ }
return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
}
@@ -1771,7 +1798,7 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
Value *val, Value *ptr)
{
const unsigned f = dst.getFile();
- const int idx = dst.getIndex(0);
+ int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (f == TGSI_FILE_SYSTEM_VALUE) {
@@ -1795,9 +1822,12 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
f == TGSI_FILE_PREDICATE ||
f == TGSI_FILE_ADDRESS ||
f == TGSI_FILE_OUTPUT) {
- if (f == TGSI_FILE_TEMPORARY)
- idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
- code->indirectTempArrays.end();
+ if (f == TGSI_FILE_TEMPORARY) {
+ int arrayid = dst.getArrayId();
+ if (!arrayid)
+ arrayid = code->tempArrayId[idx];
+ adjustTempIndex(arrayid, idx, idx2d);
+ }
getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
} else {