diff options
author | Ilia Mirkin <[email protected]> | 2016-01-30 10:02:43 -0500 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2016-01-30 16:01:41 -0500 |
commit | 3ca2001b537a2709e7ef60410e7dfad5d38663f4 (patch) | |
tree | a8466aadc9a44c52456ccbaac207bc3644884640 /src/gallium/drivers/nouveau/nv50 | |
parent | f15447e7c9dc1e00973b02098637da0aa74de7d5 (diff) |
nv50,nvc0: fix buffer clearing to respect engine alignment requirements
It appears that the nvidia render engine is quite picky when it comes to
linear surfaces. It doesn't like non-256-byte aligned offsets, and
apparently doesn't even do non-256-byte strides.
This makes arb_clear_buffer_object-unaligned pass on both nv50 and nvc0.
As a side-effect this also allows RGB32 clears to work via GPU data
upload instead of synchronizing the buffer to the CPU (nvc0 only).
Signed-off-by: Ilia Mirkin <[email protected]> # tested on GF108, GT215
Tested-by: Nick Sarnie <[email protected]> # GK208
Cc: [email protected]
Diffstat (limited to 'src/gallium/drivers/nouveau/nv50')
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_surface.c | 120 |
1 files changed, 102 insertions, 18 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 86be1b4c4ed..ec5cf376227 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -595,6 +595,82 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, } static void +nv50_clear_buffer_push(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv04_resource *buf = nv04_resource(res); + unsigned count = (size + 3) / 4; + unsigned xcoord = offset & 0xff; + unsigned tmp, i; + + if (data_size == 1) { + tmp = *(unsigned char *)data; + tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp; + data = &tmp; + data_size = 4; + } else if (data_size == 2) { + tmp = *(unsigned short *)data; + tmp = (tmp << 16) | tmp; + data = &tmp; + data_size = 4; + } + + unsigned data_words = data_size / 4; + + nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nv50->bufctx); + nouveau_pushbuf_validate(push); + + offset &= ~0xff; + + BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); + PUSH_DATA (push, 262144); + PUSH_DATA (push, 65536); + PUSH_DATA (push, 1); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); + PUSH_DATA (push, size); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, xcoord); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + while (count) { + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words; + unsigned nr = nr_data * data_words; + + BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr); + for (i = 0; i < nr_data; i++) + PUSH_DATAp(push, data, data_words); + + count -= nr; + } + + if (buf->mm) { + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); + } + + nouveau_bufctx_reset(nv50->bufctx, 0); +} + +static void nv50_clear_buffer(struct pipe_context *pipe, struct pipe_resource *res, unsigned offset, unsigned size, @@ -643,9 +719,22 @@ nv50_clear_buffer(struct pipe_context *pipe, assert(size % data_size == 0); + if (offset & 0xff) { + unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset); + assert(fixup_size % data_size == 0); + nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size); + offset += fixup_size; + size -= fixup_size; + if (!size) + return; + } + elements = size / data_size; height = (elements + 8191) / 8192; width = elements / height; + if (height > 1) + width &= ~0xff; + assert(width > 0); BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4); PUSH_DATAf(push, color.f[0]); @@ -669,13 +758,13 @@ nv50_clear_buffer(struct pipe_context *pipe, BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5); - PUSH_DATAh(push, buf->bo->offset + buf->offset + offset); - PUSH_DATA (push, buf->bo->offset + buf->offset + offset); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); PUSH_DATA (push, nv50_format_table[dst_fmt].rt); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); - PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size)); + PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | align(width * data_size, 0x100)); PUSH_DATA (push, height); BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); PUSH_DATA (push, 0); @@ -694,26 +783,21 @@ nv50_clear_buffer(struct pipe_context *pipe, BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); PUSH_DATA (push, 0x3c); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + + if (buf->mm) { + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); + } + if (width * height != elements) { offset += width * height * data_size; width = elements - width * height; - height = 1; - BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2); - PUSH_DATAh(push, buf->bo->offset + buf->offset + offset); - PUSH_DATA (push, buf->bo->offset + buf->offset + offset); - BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); - PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size)); - PUSH_DATA (push, height); - BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); - PUSH_DATA (push, 0x3c); + nv50_clear_buffer_push(pipe, res, offset, width * data_size, + data, data_size); } - BEGIN_NV04(push, NV50_3D(COND_MODE), 1); - PUSH_DATA (push, nv50->cond_condmode); - - nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); - nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); - nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; } |