diff options
author | Christoph Bumiller <[email protected]> | 2012-04-14 23:56:56 +0200 |
---|---|---|
committer | Christoph Bumiller <[email protected]> | 2012-04-15 00:08:51 +0200 |
commit | e44089b2f79aa2dcaacf348911433d1e21235c0c (patch) | |
tree | 955d621392f0068ef8e3c98dc46195ff3916525e /src/gallium/drivers/nvc0/nvc0_transfer.c | |
parent | 69a921892d2303f1400576aa73980c28880f8654 (diff) |
nvc0: add initial support for nve4+ (Kepler) chipsets
Most things that work on Fermi should work on Kepler too.
There are a few performance optimizations left to do, like better
placement of texture barriers and adding scheduling data to the
shader instructions (without them, a thread group will be masked
for 32 cycles after each single instruction issue).
Diffstat (limited to 'src/gallium/drivers/nvc0/nvc0_transfer.c')
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_transfer.c | 159 |
1 files changed, 154 insertions, 5 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 774793d8d02..fb44190574e 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -13,7 +13,7 @@ struct nvc0_transfer { uint16_t nlayers; }; -void +static void nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, const struct nv50_m2mf_rect *dst, const struct nv50_m2mf_rect *src, @@ -108,6 +108,71 @@ nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, nouveau_bufctx_reset(bctx, 0); } +static void +nve4_m2mf_transfer_rect(struct nvc0_context *nvc0, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bufctx *bctx = nvc0->bufctx; + uint32_t exec; + uint32_t src_base = src->base; + uint32_t dst_base = dst->base; + const int cpp = dst->cpp; + + assert(dst->cpp == src->cpp); + + nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); + nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */; + + if (!nouveau_bo_memtype(dst->bo)) { + assert(!dst->z); + dst_base += dst->y * dst->pitch + dst->x * cpp; + exec |= 0x100; /* DST_MODE_2D_LINEAR */ + } + if (!nouveau_bo_memtype(src->bo)) { + assert(!src->z); + src_base += src->y * src->pitch + src->x * cpp; + exec |= 0x080; /* SRC_MODE_2D_LINEAR */ + } + + BEGIN_NVC0(push, SUBC_COPY(0x070c), 6); + PUSH_DATA (push, 0x1000 | dst->tile_mode); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, dst->height); + PUSH_DATA (push, dst->depth); + PUSH_DATA (push, dst->z); + PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp)); + + BEGIN_NVC0(push, SUBC_COPY(0x0728), 6); + PUSH_DATA (push, 0x1000 | src->tile_mode); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, src->height); + PUSH_DATA (push, src->depth); + PUSH_DATA (push, src->z); + PUSH_DATA (push, (src->y << 16) | (src->x * cpp)); + + BEGIN_NVC0(push, SUBC_COPY(0x0400), 8); + PUSH_DATAh(push, src->bo->offset + src_base); + PUSH_DATA (push, src->bo->offset + src_base); + PUSH_DATAh(push, dst->bo->offset + dst_base); + PUSH_DATA (push, dst->bo->offset + dst_base); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, nblocksx * cpp); + PUSH_DATA (push, nblocksy); + + BEGIN_NVC0(push, SUBC_COPY(0x0300), 1); + PUSH_DATA (push, exec); + + nouveau_bufctx_reset(bctx, 0); +} + void nvc0_m2mf_push_linear(struct nouveau_context *nv, struct nouveau_bo *dst, unsigned offset, unsigned domain, @@ -154,6 +219,49 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv, } void +nve4_p2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data) +{ + struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); + struct nouveau_pushbuf *push = nv->pushbuf; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + + nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); + + while (count) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(count, nr - 8); + nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1)); + + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, dst->offset + offset); + PUSH_DATA (push, dst->offset + offset); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, nr * 4); + PUSH_DATA (push, 1); + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, src, nr); + + count -= nr; + src += nr; + offset += nr * 4; + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void nvc0_m2mf_copy_linear(struct nouveau_context *nv, struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, @@ -191,6 +299,32 @@ nvc0_m2mf_copy_linear(struct nouveau_context *nv, nouveau_bufctx_reset(bctx, 0); } +static void +nve4_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx; + + nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + BEGIN_NVC0(push, SUBC_COPY(0x0400), 4); + PUSH_DATAh(push, src->offset + srcoff); + PUSH_DATA (push, src->offset + srcoff); + PUSH_DATAh(push, dst->offset + dstoff); + PUSH_DATA (push, dst->offset + dstoff); + BEGIN_NVC0(push, SUBC_COPY(0x0418), 1); + PUSH_DATA (push, size); + IMMED_NVC0(push, SUBC_COPY(0x0300), 0x6); + + nouveau_bufctx_reset(bctx, 0); +} + struct pipe_transfer * nvc0_miptree_transfer_new(struct pipe_context *pctx, struct pipe_resource *res, @@ -253,8 +387,8 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, unsigned z = tx->rect[0].z; unsigned i; for (i = 0; i < tx->nlayers; ++i) { - nvc0_m2mf_transfer_rect(nvc0, &tx->rect[1], &tx->rect[0], - tx->nblocksx, tx->nblocksy); + nvc0->m2mf_copy_rect(nvc0, &tx->rect[1], &tx->rect[0], + tx->nblocksx, tx->nblocksy); if (mt->layout_3d) tx->rect[0].z++; else @@ -280,8 +414,8 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx, if (tx->base.usage & PIPE_TRANSFER_WRITE) { for (i = 0; i < tx->nlayers; ++i) { - nvc0_m2mf_transfer_rect(nvc0, &tx->rect[0], &tx->rect[1], - tx->nblocksx, tx->nblocksy); + nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1], + tx->nblocksx, tx->nblocksy); if (mt->layout_3d) tx->rect[0].z++; else @@ -362,3 +496,18 @@ nvc0_cb_push(struct nouveau_context *nv, nouveau_bufctx_reset(bctx, 0); } + +void +nvc0_init_transfer_functions(struct nvc0_context *nvc0) +{ + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + nvc0->m2mf_copy_rect = nve4_m2mf_transfer_rect; + nvc0->base.copy_data = nve4_m2mf_copy_linear; + nvc0->base.push_data = nve4_p2mf_push_linear; + } else { + nvc0->m2mf_copy_rect = nvc0_m2mf_transfer_rect; + nvc0->base.copy_data = nvc0_m2mf_copy_linear; + nvc0->base.push_data = nvc0_m2mf_push_linear; + } + nvc0->base.push_cb = nvc0_cb_push; +} |