diff options
author | Ilia Mirkin <[email protected]> | 2016-06-19 16:57:50 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2016-07-16 11:45:30 -0400 |
commit | 062c6b8e54c14adcc1ec603fad524f38fe058e67 (patch) | |
tree | bcb4774e8d9225d7f8bf557f299061f310f6f85f /src/gallium/drivers/nouveau/nv50 | |
parent | cc46fc3c0921c86baa0fbe25ba6a9c4858f04ab3 (diff) |
nv50: fix alphatest for non-blendable formats
The hardware can only do alphatest when using a blendable format. This
means that the various *16 norm formats didn't work with alphatest. It
appears that Talos Principle uses such formats, as well as alpha tests,
for some internal renders, which made them be incorrect. However this
does not appear to affect the final renders, but in a different game it
easily could.
The approach we take is that when alphatests are enabled and a suitable
format is used (which we anticipate is the vast minority of the time),
we insert code into the shader to perform the comparison and discard.
Once inserted, that code lives in the shader forever, and we re-upload
it each time the function changes with a fixed-up compare. To avoid
re-uploading too often, if we switch back to a blendable format, the
test is (effectively) disabled and the hw alphatest functionality is
used.
Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/nv50')
7 files changed, 53 insertions, 4 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index cb94c8edc54..cca44f5bb21 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -97,7 +97,10 @@ /* Sample position pairs for the current output MS level */ #define NV50_CB_AUX_SAMPLE_OFFSET 0x300 #define NV50_CB_AUX_SAMPLE_OFFSET_SIZE (4 * 8 * 2) -/* next spot: 0x340 */ +/* Alpha test ref value */ +#define NV50_CB_AUX_ALPHATEST_OFFSET 0x340 +#define NV50_CB_AUX_ALPHATEST_SIZE (4) +/* next spot: 0x344 */ /* 4 32-bit floats for the vertex runout, put at the end */ #define NV50_CB_AUX_RUNOUT_OFFSET (NV50_CB_AUX_SIZE - 0x10) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index c764f5c5728..2b66877c679 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -334,6 +334,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, info->io.auxCBSlot = 15; info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; + if (prog->fp.alphatest) + info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET; info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET; @@ -488,7 +490,8 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) if (prog->interps) nv50_ir_apply_fixups(prog->interps, prog->code, prog->fp.force_persample_interp, - false /* flatshade */); + false /* flatshade */, + prog->fp.alphatest - 1); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 0a22e5bbbcf..fc9ada43624 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -90,6 +90,7 @@ struct nv50_program { uint32_t colors; /* 0x1904 */ uint8_t has_samplemask; uint8_t force_persample_interp; + uint8_t alphatest; } fp; struct { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index 23263945bdc..d234748a0a0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -174,6 +174,42 @@ nv50_fragprog_validate(struct nv50_context *nv50) struct nv50_program *fp = nv50->fragprog; struct pipe_rasterizer_state *rast = &nv50->rast->pipe; + if (nv50->zsa && nv50->zsa->pipe.alpha.enabled) { + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + bool blendable = fb->nr_cbufs == 0 || !fb->cbufs[0] || + nv50->screen->base.base.is_format_supported( + &nv50->screen->base.base, + fb->cbufs[0]->format, + fb->cbufs[0]->texture->target, + fb->cbufs[0]->texture->nr_samples, + PIPE_BIND_BLENDABLE); + /* If we already have alphatest code, we have to keep updating + * it. However we only have to have different code if the current RT0 is + * non-blendable. Otherwise we just set it to always pass and use the + * hardware alpha test. + */ + if (fp->fp.alphatest || !blendable) { + uint8_t alphatest = PIPE_FUNC_ALWAYS + 1; + if (!blendable) + alphatest = nv50->zsa->pipe.alpha.func + 1; + if (!fp->fp.alphatest) + nv50_program_destroy(nv50, fp); + else if (fp->mem && fp->fp.alphatest != alphatest) + nouveau_heap_free(&fp->mem); + + fp->fp.alphatest = alphatest; + } + } else if (fp->fp.alphatest && fp->fp.alphatest != PIPE_FUNC_ALWAYS + 1) { + /* Alpha test is disabled but we have a shader where it's filled + * in. Make sure to reset the function to 'always', otherwise it'll end + * up discarding fragments incorrectly. + */ + if (fp->mem) + nouveau_heap_free(&fp->mem); + + fp->fp.alphatest = PIPE_FUNC_ALWAYS + 1; + } + if (fp->fp.force_persample_interp != rast->force_persample_interp) { /* Force the program to be reuploaded, which will trigger interp fixups * to get applied diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index a84c9e27992..b6741140e50 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -416,6 +416,11 @@ nv50_zsa_state_create(struct pipe_context *pipe, SB_DATA (so, 0); } + SB_BEGIN_3D(so, CB_ADDR, 1); + SB_DATA (so, NV50_CB_AUX_ALPHATEST_OFFSET << (8 - 2) | NV50_CB_AUX); + SB_BEGIN_3D(so, CB_DATA(0), 1); + SB_DATA (so, fui(cso->alpha.ref_value)); + assert(so->size <= ARRAY_SIZE(so->state)); return (void *)so; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 3a374a24867..19181a9f496 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -522,7 +522,8 @@ validate_list_3d[] = { { nv50_vertprog_validate, NV50_NEW_3D_VERTPROG }, { nv50_gmtyprog_validate, NV50_NEW_3D_GMTYPROG }, { nv50_fragprog_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER | - NV50_NEW_3D_MIN_SAMPLES }, + NV50_NEW_3D_MIN_SAMPLES | NV50_NEW_3D_ZSA | + NV50_NEW_3D_FRAMEBUFFER}, { nv50_fp_linkage_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_RASTERIZER }, { nv50_gp_linkage_validate, NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_VERTPROG }, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h index b8fa0f623f8..9598b04e0f4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h @@ -31,7 +31,7 @@ struct nv50_rasterizer_stateobj { struct nv50_zsa_stateobj { struct pipe_depth_stencil_alpha_state pipe; int size; - uint32_t state[34]; + uint32_t state[38]; }; struct nv50_constbuf { |