diff options
author | Ilia Mirkin <[email protected]> | 2019-02-02 02:56:48 -0500 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2019-02-06 19:35:57 -0500 |
commit | 5de5beedf21306b01730085f8e03d8f424729016 (patch) | |
tree | d15d9f6592b14508f7a5141dade370524e9548f4 /src/gallium/drivers/nouveau/codegen | |
parent | 4443b6ddf2e08d06f3d0457cf20a2e04244cde37 (diff) |
nvc0/ir: fix second tex argument after levelZero optimization
We used to pre-set a bunch of extra arguments to a texture instruction
in order to force the RA to allocate a register at the boundary of 4.
However with the levelZero optimization, which removes a LOD argument
when it's uniformly equal to zero, we undid that logic by removing an
extra argument. As a result, we could end up with insufficient alignment
on the second wide texture argument.
Instead we switch to a different method of achieving the same result.
The logic runs during the constraint analysis of the RA, and adds unset
sources as necessary right before being merged into a wide argument.
Fixes MISALIGNED_REG errors in Hitman when run with bindless textures
enabled on a GK208.
Fixes: 9145873b152 ("nvc0/ir: use levelZero flag when the lod is set to 0")
Signed-off-by: Ilia Mirkin <[email protected]>
Cc: 19.0 <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 16 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 33 |
2 files changed, 24 insertions, 25 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 0d9df71d0d3..45474f07547 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1126,22 +1126,6 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) } } - if (chipset >= NVISA_GK104_CHIPSET) { - // - // If TEX requires more than 4 sources, the 2nd register tuple must be - // aligned to 4, even if it consists of just a single 4-byte register. - // - // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. - // - int s = i->srcCount(0xff, true); - if (s > 4 && s < 7) { - if (i->srcExists(s)) // move potential predicate out of the way - i->moveSources(s, 7 - s); - while (s < 7) - i->setSrc(s++, bld.loadImm(NULL, 0)); - } - } - return true; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index f4379c137c5..f25bce00884 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -2341,9 +2341,19 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) if (!tex->tex.target.isArray() && tex->tex.useOffsets) s++; } - n = tex->srcCount(0xff) - s; + n = tex->srcCount(0xff, true) - s; + // TODO: Is this necessary? Perhaps just has to be aligned to the + // level that the first arg is, not necessarily to 4. This + // requirement has not been rigorously verified, as it has been on + // Kepler. + if (n > 0 && n < 3) { + if (tex->srcExists(n + s)) // move potential predicate out of the way + tex->moveSources(n + s, 3 - n); + while (n < 3) + tex->setSrc(s + n++, new_LValue(func, FILE_GPR)); + } } else { - s = tex->srcCount(0xff); + s = tex->srcCount(0xff, true); n = 0; } @@ -2366,14 +2376,18 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) } else if (isTextureOp(tex->op)) { int n = tex->srcCount(0xff, true); - if (n > 4) { - condenseSrcs(tex, 0, 3); - if (n > 5) // NOTE: first call modified positions already - condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); - } else - if (n > 1) { - condenseSrcs(tex, 0, n - 1); + int s = n > 4 ? 4 : n; + if (n > 4 && n < 7) { + if (tex->srcExists(n)) // move potential predicate out of the way + tex->moveSources(n, 7 - n); + + while (n < 7) + tex->setSrc(n++, new_LValue(func, FILE_GPR)); } + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 4) + condenseSrcs(tex, 1, n - s); } } @@ -2510,6 +2524,7 @@ RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s) assert(cst->getSrc(s)->defs.size() == 1); // still SSA Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); + bool imm = defi->op == OP_MOV && defi->src(0).getFile() == FILE_IMMEDIATE; bool load = defi->op == OP_LOAD && |