diff options
author | Ilia Mirkin <[email protected]> | 2016-06-19 16:57:50 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2016-07-16 11:45:30 -0400 |
commit | 062c6b8e54c14adcc1ec603fad524f38fe058e67 (patch) | |
tree | bcb4774e8d9225d7f8bf557f299061f310f6f85f /src/gallium/drivers/nouveau/codegen | |
parent | cc46fc3c0921c86baa0fbe25ba6a9c4858f04ab3 (diff) |
nv50: fix alphatest for non-blendable formats
The hardware can only do alphatest when using a blendable format. This
means that the various *16 norm formats didn't work with alphatest. It
appears that Talos Principle uses such formats, as well as alpha tests,
for some internal renders, which made them be incorrect. However this
does not appear to affect the final renders, but in a different game it
easily could.
The approach we take is that when alphatests are enabled and a suitable
format is used (which we anticipate is the vast minority of the time),
we insert code into the shader to perform the comparison and discard.
Once inserted, that code lives in the shader forever, and we re-upload
it each time the function changes with a fixed-up compare. To avoid
re-uploading too often, if we switch back to a blendable format, the
test is (effectively) disabled and the hw alphatest functionality is
used.
Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
6 files changed, 63 insertions, 6 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index b611519ade3..58a5d38a82c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -166,6 +166,7 @@ struct nv50_ir_prog_info uint8_t auxCBSlot; /* driver constant buffer slot */ uint16_t ucpBase; /* base address for UCPs */ uint16_t drawInfoBase; /* base address for draw parameters */ + uint16_t alphaRefBase; /* base address for alpha test values */ uint8_t pointSize; /* output index for PointSize */ uint8_t instanceId; /* system value index of InstanceID */ uint8_t vertexId; /* system value index of VertexID */ @@ -206,7 +207,8 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, extern void nv50_ir_apply_fixups(void *fixupData, uint32_t *code, - bool force_per_sample, bool flatshade); + bool force_per_sample, bool flatshade, + uint8_t alphatest); /* obtain code that will be shared among programs */ extern void nv50_ir_get_target_library(uint32_t chipset, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 0fe399bdd8e..7878f2fae48 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -1265,6 +1265,28 @@ CodeEmitterNV50::emitISAD(const Instruction *i) } } +static void +alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data) +{ + int loc = entry->loc; + int enc; + + switch (data.alphatest) { + case PIPE_FUNC_NEVER: enc = 0x0; break; + case PIPE_FUNC_LESS: enc = 0x1; break; + case PIPE_FUNC_EQUAL: enc = 0x2; break; + case PIPE_FUNC_LEQUAL: enc = 0x3; break; + case PIPE_FUNC_GREATER: enc = 0x4; break; + case PIPE_FUNC_NOTEQUAL: enc = 0x5; break; + case PIPE_FUNC_GEQUAL: enc = 0x6; break; + default: + case PIPE_FUNC_ALWAYS: enc = 0xf; break; + } + + code[loc + 1] &= ~(0x1f << 14); + code[loc + 1] |= enc << 14; +} + void CodeEmitterNV50::emitSET(const Instruction *i) { @@ -1294,6 +1316,10 @@ CodeEmitterNV50::emitSET(const Instruction *i) if (i->src(1).mod.abs()) code[1] |= 0x00080000; emitForm_MAD(i); + + if (i->subOp == 1) { + addInterp(0, 0, alphatestSet); + } } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 825a44f35fb..61eb7f5c256 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1006,7 +1006,7 @@ bool Source::scanSource() if (info->type == PIPE_SHADER_FRAGMENT) { info->prop.fp.writesDepth = scan.writes_z; - info->prop.fp.usesDiscard = scan.uses_kill; + info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase; } else if (info->type == PIPE_SHADER_GEOMETRY) { info->prop.gp.instanceCount = 1; // default value @@ -3818,6 +3818,28 @@ Converter::handleUserClipPlanes() void Converter::exportOutputs() { + if (info->io.alphaRefBase) { + for (unsigned int i = 0; i < info->numOutputs; ++i) { + if (info->out[i].sn != TGSI_SEMANTIC_COLOR || + info->out[i].si != 0) + continue; + const unsigned int c = 3; + if (!oData.exists(sub.cur->values, i, c)) + continue; + Value *val = oData.load(sub.cur->values, i, c, NULL); + if (!val) + continue; + + Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot, + TYPE_U32, info->io.alphaRefBase); + Value *pred = new_LValue(func, FILE_PREDICATE); + mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val, + mkLoadv(TYPE_U32, ref, NULL)) + ->subOp = 1; + mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred); + } + } + for (unsigned int i = 0; i < info->numOutputs; ++i) { for (unsigned int c = 0; c < 4; ++c) { if (!oData.exists(sub.cur->values, i, c)) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 32131884c5f..3fc1abf18cd 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -196,6 +196,10 @@ LoadPropagation::checkSwapSrc01(Instruction *insn) return; if (insn->src(1).getFile() != FILE_GPR) return; + // This is the special OP_SET used for alphatesting, we can't reverse its + // arguments as that will confuse the fixup code. + if (insn->op == OP_SET && insn->subOp) + return; Instruction *i0 = insn->getSrc(0)->getInsn(); Instruction *i1 = insn->getSrc(1)->getInsn(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 08aac00e536..7d7b3158951 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -506,14 +506,16 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code, void nv50_ir_apply_fixups(void *fixupData, uint32_t *code, - bool force_persample_interp, bool flatshade) + bool force_persample_interp, bool flatshade, + uint8_t alphatest) { nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>( fixupData); // force_persample_interp: all non-flat -> per-sample // flatshade: all color -> flat - nv50_ir::FixupData data(force_persample_interp, flatshade); + // alphatest: PIPE_FUNC_* to use with alphatest + nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest); for (unsigned i = 0; i < info->count; ++i) info->entry[i].apply(&info->entry[i], code, data); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h index 6bf1a5cff23..4a701f7cb9d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h @@ -59,10 +59,11 @@ struct RelocInfo }; struct FixupData { - FixupData(bool force, bool flat) : - force_persample_interp(force), flatshade(flat) {} + FixupData(bool force, bool flat, uint8_t alphatest) : + force_persample_interp(force), flatshade(flat), alphatest(alphatest) {} bool force_persample_interp; bool flatshade; + uint8_t alphatest; }; struct FixupEntry; |