summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2019-02-02 02:56:48 -0500
committerDylan Baker <[email protected]>2019-02-07 09:51:39 -0800
commit36d99d9ad0e13ca12e94d9dfaa510e2f3b0782c1 (patch)
tree490dd66433a22e82f5bbbc53ddbae1663759c123 /src/gallium
parent94f0908216db0aa06fe49a53ecbb35840d855d8d (diff)
nvc0/ir: fix second tex argument after levelZero optimization
We used to pre-set a bunch of extra arguments to a texture instruction in order to force the RA to allocate a register at the boundary of 4. However with the levelZero optimization, which removes a LOD argument when it's uniformly equal to zero, we undid that logic by removing an extra argument. As a result, we could end up with insufficient alignment on the second wide texture argument. Instead we switch to a different method of achieving the same result. The logic runs during the constraint analysis of the RA, and adds unset sources as necessary right before being merged into a wide argument. Fixes MISALIGNED_REG errors in Hitman when run with bindless textures enabled on a GK208. Fixes: 9145873b152 ("nvc0/ir: use levelZero flag when the lod is set to 0") Signed-off-by: Ilia Mirkin <[email protected]> Cc: 19.0 <[email protected]> (cherry picked from commit 5de5beedf21306b01730085f8e03d8f424729016)
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp16
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp33
2 files changed, 24 insertions, 25 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 295497be2f9..80a71ee8524 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1063,22 +1063,6 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
}
}
- if (chipset >= NVISA_GK104_CHIPSET) {
- //
- // If TEX requires more than 4 sources, the 2nd register tuple must be
- // aligned to 4, even if it consists of just a single 4-byte register.
- //
- // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case.
- //
- int s = i->srcCount(0xff, true);
- if (s > 4 && s < 7) {
- if (i->srcExists(s)) // move potential predicate out of the way
- i->moveSources(s, 7 - s);
- while (s < 7)
- i->setSrc(s++, bld.loadImm(NULL, 0));
- }
- }
-
return true;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index f4379c137c5..f25bce00884 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -2341,9 +2341,19 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
if (!tex->tex.target.isArray() && tex->tex.useOffsets)
s++;
}
- n = tex->srcCount(0xff) - s;
+ n = tex->srcCount(0xff, true) - s;
+ // TODO: Is this necessary? Perhaps just has to be aligned to the
+ // level that the first arg is, not necessarily to 4. This
+ // requirement has not been rigorously verified, as it has been on
+ // Kepler.
+ if (n > 0 && n < 3) {
+ if (tex->srcExists(n + s)) // move potential predicate out of the way
+ tex->moveSources(n + s, 3 - n);
+ while (n < 3)
+ tex->setSrc(s + n++, new_LValue(func, FILE_GPR));
+ }
} else {
- s = tex->srcCount(0xff);
+ s = tex->srcCount(0xff, true);
n = 0;
}
@@ -2366,14 +2376,18 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
} else
if (isTextureOp(tex->op)) {
int n = tex->srcCount(0xff, true);
- if (n > 4) {
- condenseSrcs(tex, 0, 3);
- if (n > 5) // NOTE: first call modified positions already
- condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
- } else
- if (n > 1) {
- condenseSrcs(tex, 0, n - 1);
+ int s = n > 4 ? 4 : n;
+ if (n > 4 && n < 7) {
+ if (tex->srcExists(n)) // move potential predicate out of the way
+ tex->moveSources(n, 7 - n);
+
+ while (n < 7)
+ tex->setSrc(n++, new_LValue(func, FILE_GPR));
}
+ if (s > 1)
+ condenseSrcs(tex, 0, s - 1);
+ if (n > 4)
+ condenseSrcs(tex, 1, n - s);
}
}
@@ -2510,6 +2524,7 @@ RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s)
assert(cst->getSrc(s)->defs.size() == 1); // still SSA
Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
+
bool imm = defi->op == OP_MOV &&
defi->src(0).getFile() == FILE_IMMEDIATE;
bool load = defi->op == OP_LOAD &&