diff options
author | Ilia Mirkin <[email protected]> | 2019-02-02 02:56:48 -0500 |
---|---|---|
committer | Dylan Baker <[email protected]> | 2019-02-07 09:51:39 -0800 |
commit | 36d99d9ad0e13ca12e94d9dfaa510e2f3b0782c1 (patch) | |
tree | 490dd66433a22e82f5bbbc53ddbae1663759c123 /src/gallium | |
parent | 94f0908216db0aa06fe49a53ecbb35840d855d8d (diff) |
nvc0/ir: fix second tex argument after levelZero optimization
We used to pre-set a bunch of extra arguments to a texture instruction
in order to force the RA to allocate a register at the boundary of 4.
However with the levelZero optimization, which removes a LOD argument
when it's uniformly equal to zero, we undid that logic by removing an
extra argument. As a result, we could end up with insufficient alignment
on the second wide texture argument.
Instead we switch to a different method of achieving the same result.
The logic runs during the constraint analysis of the RA, and adds unset
sources as necessary right before being merged into a wide argument.
Fixes MISALIGNED_REG errors in Hitman when run with bindless textures
enabled on a GK208.
Fixes: 9145873b152 ("nvc0/ir: use levelZero flag when the lod is set to 0")
Signed-off-by: Ilia Mirkin <[email protected]>
Cc: 19.0 <[email protected]>
(cherry picked from commit 5de5beedf21306b01730085f8e03d8f424729016)
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 16 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 33 |
2 files changed, 24 insertions, 25 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 295497be2f9..80a71ee8524 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1063,22 +1063,6 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) } } - if (chipset >= NVISA_GK104_CHIPSET) { - // - // If TEX requires more than 4 sources, the 2nd register tuple must be - // aligned to 4, even if it consists of just a single 4-byte register. - // - // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. - // - int s = i->srcCount(0xff, true); - if (s > 4 && s < 7) { - if (i->srcExists(s)) // move potential predicate out of the way - i->moveSources(s, 7 - s); - while (s < 7) - i->setSrc(s++, bld.loadImm(NULL, 0)); - } - } - return true; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index f4379c137c5..f25bce00884 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -2341,9 +2341,19 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) if (!tex->tex.target.isArray() && tex->tex.useOffsets) s++; } - n = tex->srcCount(0xff) - s; + n = tex->srcCount(0xff, true) - s; + // TODO: Is this necessary? Perhaps just has to be aligned to the + // level that the first arg is, not necessarily to 4. This + // requirement has not been rigorously verified, as it has been on + // Kepler. + if (n > 0 && n < 3) { + if (tex->srcExists(n + s)) // move potential predicate out of the way + tex->moveSources(n + s, 3 - n); + while (n < 3) + tex->setSrc(s + n++, new_LValue(func, FILE_GPR)); + } } else { - s = tex->srcCount(0xff); + s = tex->srcCount(0xff, true); n = 0; } @@ -2366,14 +2376,18 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) } else if (isTextureOp(tex->op)) { int n = tex->srcCount(0xff, true); - if (n > 4) { - condenseSrcs(tex, 0, 3); - if (n > 5) // NOTE: first call modified positions already - condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); - } else - if (n > 1) { - condenseSrcs(tex, 0, n - 1); + int s = n > 4 ? 4 : n; + if (n > 4 && n < 7) { + if (tex->srcExists(n)) // move potential predicate out of the way + tex->moveSources(n, 7 - n); + + while (n < 7) + tex->setSrc(n++, new_LValue(func, FILE_GPR)); } + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 4) + condenseSrcs(tex, 1, n - s); } } @@ -2510,6 +2524,7 @@ RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s) assert(cst->getSrc(s)->defs.size() == 1); // still SSA Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); + bool imm = defi->op == OP_MOV && defi->src(0).getFile() == FILE_IMMEDIATE; bool load = defi->op == OP_LOAD && |