summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2010-10-10 01:39:13 +0200
committerFrancisco Jerez <[email protected]>2010-10-10 04:14:34 +0200
commite2acc7be2683fd3c295480724b02f5a497309cfd (patch)
tree7707be4e255377633aa7bb79db6dec77a9658f04 /src/mesa
parent35a1893fd1993932a428f5f83051383d51c8135e (diff)
dri/nv10: Fake fast Z clears for pre-nv17 cards.
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/nouveau/nv10_context.c130
-rw-r--r--src/mesa/drivers/dri/nouveau/nv10_driver.h6
-rw-r--r--src/mesa/drivers/dri/nouveau/nv10_state_fb.c8
-rw-r--r--src/mesa/drivers/dri/nouveau/nv10_state_tnl.c3
4 files changed, 127 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c
index f0e27441947..3d898fd94d9 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_context.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_context.c
@@ -61,39 +61,129 @@ use_fast_zclear(GLcontext *ctx, GLbitfield buffers)
fb->_Ymax == fb->Height && fb->_Ymin == 0;
}
+GLboolean
+nv10_use_viewport_zclear(GLcontext *ctx)
+{
+ struct nouveau_context *nctx = to_nouveau_context(ctx);
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+ return context_chipset(ctx) < 0x17 &&
+ !nctx->hierz.clear_blocked && fb->_DepthBuffer &&
+ (_mesa_get_format_bits(fb->_DepthBuffer->Format,
+ GL_DEPTH_BITS) >= 24);
+}
+
+float
+nv10_transform_depth(GLcontext *ctx, float z)
+{
+ struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+ if (nv10_use_viewport_zclear(ctx))
+ return 2097152.0 * (z + (nctx->hierz.clear_seq & 7));
+ else
+ return ctx->DrawBuffer->_DepthMaxF * z;
+}
+
static void
-nv10_clear(GLcontext *ctx, GLbitfield buffers)
+nv10_zclear(GLcontext *ctx, GLbitfield *buffers)
+{
+ /*
+ * Pre-nv17 cards don't have native support for fast Z clears,
+ * but in some cases we can still "clear" the Z buffer without
+ * actually blitting to it if we're willing to sacrifice a few
+ * bits of depth precision.
+ *
+ * Each time a clear is requested we modify the viewport
+ * transform in such a way that the old contents of the depth
+ * buffer are clamped to the requested clear value when
+ * they're read by the GPU.
+ */
+ struct nouveau_context *nctx = to_nouveau_context(ctx);
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+ struct nouveau_surface *s = &to_nouveau_renderbuffer(
+ fb->_DepthBuffer->Wrapped)->surface;
+
+ if (nv10_use_viewport_zclear(ctx)) {
+ int x, y, w, h;
+ float z = ctx->Depth.Clear;
+ uint32_t value = pack_zs_f(s->format, z, 0);
+
+ get_scissors(fb, &x, &y, &w, &h);
+ *buffers &= ~BUFFER_BIT_DEPTH;
+
+ if (use_fast_zclear(ctx, *buffers)) {
+ if (nfb->hierz.clear_value != value) {
+ /* Don't fast clear if we're changing
+ * the depth value. */
+ nfb->hierz.clear_value = value;
+
+ } else if (z == 0.0) {
+ nctx->hierz.clear_seq++;
+ context_dirty(ctx, ZCLEAR);
+
+ if ((nctx->hierz.clear_seq & 7) != 0 &&
+ nctx->hierz.clear_seq != 1)
+ /* We didn't wrap around -- no need to
+ * clear the depth buffer for real. */
+ return;
+
+ } else if (z == 1.0) {
+ nctx->hierz.clear_seq--;
+ context_dirty(ctx, ZCLEAR);
+
+ if ((nctx->hierz.clear_seq & 7) != 7)
+ /* No wrap around */
+ return;
+ }
+ }
+
+ value = pack_zs_f(s->format,
+ (z + (nctx->hierz.clear_seq & 7)) / 8, 0);
+ context_drv(ctx)->surface_fill(ctx, s, ~0, value, x, y, w, h);
+ }
+}
+
+static void
+nv17_zclear(GLcontext *ctx, GLbitfield *buffers)
{
struct nouveau_context *nctx = to_nouveau_context(ctx);
struct nouveau_channel *chan = context_chan(ctx);
struct nouveau_grobj *celsius = context_eng3d(ctx);
struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(
ctx->DrawBuffer);
+ struct nouveau_surface *s = &to_nouveau_renderbuffer(
+ nfb->base._DepthBuffer->Wrapped)->surface;
- nouveau_validate_framebuffer(ctx);
+ /* Clear the hierarchical depth buffer */
+ BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1);
+ OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0));
+ BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1);
+ OUT_RING(chan, 1);
- if ((buffers & BUFFER_BIT_DEPTH) &&
- ctx->Depth.Mask && nfb->hierz.bo) {
- struct nouveau_surface *s = &to_nouveau_renderbuffer(
- nfb->base._DepthBuffer->Wrapped)->surface;
+ /* Mark the depth buffer as cleared */
+ if (use_fast_zclear(ctx, *buffers)) {
+ if (nctx->hierz.clear_seq)
+ *buffers &= ~BUFFER_BIT_DEPTH;
- /* Clear the hierarchical depth buffer */
- BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1);
- OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0));
- BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1);
- OUT_RING(chan, 1);
+ nfb->hierz.clear_value =
+ pack_zs_f(s->format, ctx->Depth.Clear, 0);
+ nctx->hierz.clear_seq++;
- /* Mark the depth buffer as cleared */
- if (use_fast_zclear(ctx, buffers)) {
- if (nctx->hierz.clear_seq)
- buffers &= ~BUFFER_BIT_DEPTH;
+ context_dirty(ctx, ZCLEAR);
+ }
+}
- nfb->hierz.clear_value =
- pack_zs_f(s->format, ctx->Depth.Clear, 0);
- nctx->hierz.clear_seq++;
+static void
+nv10_clear(GLcontext *ctx, GLbitfield buffers)
+{
+ nouveau_validate_framebuffer(ctx);
- context_dirty(ctx, ZCLEAR);
- }
+ if ((buffers & BUFFER_BIT_DEPTH) && ctx->Depth.Mask) {
+ if (context_chipset(ctx) >= 0x17)
+ nv17_zclear(ctx, &buffers);
+ else
+ nv10_zclear(ctx, &buffers);
}
nouveau_clear(ctx, buffers);
diff --git a/src/mesa/drivers/dri/nouveau/nv10_driver.h b/src/mesa/drivers/dri/nouveau/nv10_driver.h
index 340ba05adee..61dceab7b61 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_driver.h
+++ b/src/mesa/drivers/dri/nouveau/nv10_driver.h
@@ -37,6 +37,12 @@ enum {
/* nv10_context.c */
extern const struct nouveau_driver nv10_driver;
+GLboolean
+nv10_use_viewport_zclear(GLcontext *ctx);
+
+float
+nv10_transform_depth(GLcontext *ctx, float z);
+
/* nv10_render.c */
void
nv10_render_init(GLcontext *ctx);
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
index 98eb0e8eceb..f9f3ebaa8d0 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
@@ -172,12 +172,15 @@ nv10_emit_viewport(GLcontext *ctx, int emit)
{
struct nouveau_channel *chan = context_chan(ctx);
struct nouveau_grobj *celsius = context_eng3d(ctx);
+ struct gl_viewport_attrib *vp = &ctx->Viewport;
struct gl_framebuffer *fb = ctx->DrawBuffer;
float a[4] = {};
get_viewport_translate(ctx, a);
a[0] -= 2048;
a[1] -= 2048;
+ if (nv10_use_viewport_zclear(ctx))
+ a[2] = nv10_transform_depth(ctx, (vp->Far + vp->Near) / 2);
BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
OUT_RINGp(chan, a, 4);
@@ -204,5 +207,10 @@ nv10_emit_zclear(GLcontext *ctx, int emit)
OUT_RING(chan, nctx->hierz.clear_blocked ? 0 : 1);
OUT_RING(chan, nfb->hierz.clear_value |
(nctx->hierz.clear_seq & 0xff));
+ } else {
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RINGf(chan, nv10_transform_depth(ctx, 0));
+ OUT_RINGf(chan, nv10_transform_depth(ctx, 1));
+ context_dirty(ctx, VIEWPORT);
}
}
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
index 0e592a16292..6b2ede88e67 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
@@ -479,6 +479,9 @@ nv10_emit_projection(GLcontext *ctx, int emit)
_math_matrix_ctr(&m);
get_viewport_scale(ctx, m.m);
+ if (nv10_use_viewport_zclear(ctx))
+ m.m[MAT_SZ] /= 8;
+
if (nctx->fallback == HWTNL)
_math_matrix_mul_matrix(&m, &m, &ctx->_ModelProjectMatrix);