diff options
author | Ilia Mirkin <[email protected]> | 2017-08-16 00:34:43 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2017-12-19 23:09:19 -0500 |
commit | 0cf6320eb5eca1ea20906624ad5a46ca386e0aa6 (patch) | |
tree | 7d3e1af82e6069c4c9ac1ae9c195af94da415200 /src/gallium/drivers | |
parent | 22ceb1f99b5ae62d23200f4fecea8b62ab745318 (diff) |
nvc0/ir: change textureGrad to always use lane 0 as the tex origin
Thanks to Karol Herbst for the debugging / tracing work that led to this
change.
Move to using lane 0 as the "work" lane for the texture. It is unclear
why this helps, as that computation should be identical to doing it in
the "correct" lane with the properly adjusted quadops.
In order to be able to use the lane 0 result, we also have to ensure
that lane 0 contains the proper array/indirect/shadow values.
This applies to Fermi and Kepler. Maxwell+ may or may not need fixing,
but that lowering logic is separate.
Fixes KHR-GL45.texture_cube_map_array.sampling
Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 60 |
1 files changed, 46 insertions, 14 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 6b51b7607cb..51f6fae2c1f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1081,15 +1081,20 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) bool NVC0LoweringPass::handleManualTXD(TexInstruction *i) { - static const uint8_t qOps[4][2] = - { - { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 - { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 - { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 - { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 - }; + // Always done from the l0 perspective. This is the way that NVIDIA's + // driver does it, and doing it from the "current" lane's perpsective + // doesn't seem to always work for reasons that aren't altogether clear, + // even in frag shaders. + // + // Note that we must move not only the coordinates into lane0, but also all + // ancillary arguments, like array indices and depth compare as they may + // differ between lanes. Offsets for TXD are supposed to be uniform, so we + // leave them alone. + static const uint8_t qOps[2] = + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }; + Value *def[4][4]; - Value *crd[3]; + Value *crd[3], *arr[2], *shadow; Instruction *tex; Value *zero = bld.loadImm(bld.getSSA(), 0); int l, c; @@ -1100,7 +1105,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) // indirect are both in the leading arg, while for Kepler, array and // indirect are separate (and both precede the coordinates). Maxwell is // handled in a separate function. - unsigned array; + int array; if (targ->getChipset() < NVISA_GK104_CHIPSET) array = i->tex.target.isArray() || i->tex.rIndirectSrc >= 0; else @@ -1110,19 +1115,34 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) for (c = 0; c < dim; ++c) crd[c] = bld.getScratch(); + for (c = 0; c < array; ++c) + arr[c] = bld.getScratch(); + shadow = bld.getScratch(); - bld.mkOp(OP_QUADON, TYPE_NONE, NULL); for (l = 0; l < 4; ++l) { Value *src[3], *val; - // mov coordinates from lane l to all lanes + + bld.mkOp(OP_QUADON, TYPE_NONE, NULL); + // we're using the texture result from lane 0 in all cases, so make sure + // that lane 0 is pointing at the proper array index, indirect value, + // and depth compare. + if (l != 0) { + for (c = 0; c < array; ++c) + bld.mkQuadop(0x00, arr[c], l, i->getSrc(c), zero); + if (i->tex.target.isShadow()) { + // The next argument after coords is the depth compare + bld.mkQuadop(0x00, shadow, l, i->getSrc(array + dim), zero); + } + } + // mov position coordinates from lane l to all lanes for (c = 0; c < dim; ++c) bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero); // add dPdx from lane l to lanes dx for (c = 0; c < dim; ++c) - bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); + bld.mkQuadop(qOps[0], crd[c], l, i->dPdx[c].get(), crd[c]); // add dPdy from lane l to lanes dy for (c = 0; c < dim; ++c) - bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); + bld.mkQuadop(qOps[1], crd[c], l, i->dPdy[c].get(), crd[c]); // normalize cube coordinates if (i->tex.target.isCube()) { for (c = 0; c < 3; ++c) @@ -1139,8 +1159,21 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) } // texture bld.insert(tex = cloneForward(func, i)); + if (l != 0) { + for (c = 0; c < array; ++c) + tex->setSrc(c, arr[c]); + if (i->tex.target.isShadow()) + tex->setSrc(array + dim, shadow); + } for (c = 0; c < dim; ++c) tex->setSrc(c + array, src[c]); + // broadcast results from lane 0 to all lanes so that the moves *into* + // the target lane pick up the proper value. + if (l != 0) + for (c = 0; i->defExists(c); ++c) + bld.mkQuadop(0x00, tex->getDef(c), 0, tex->getDef(c), zero); + bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); + // save results for (c = 0; i->defExists(c); ++c) { Instruction *mov; @@ -1150,7 +1183,6 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) mov->lanes = 1 << l; } } - bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); for (c = 0; i->defExists(c); ++c) { Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); |