diff options
Diffstat (limited to 'src/gallium/drivers/nvfx/nvfx_vbo.c')
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_vbo.c | 628 |
1 files changed, 628 insertions, 0 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c new file mode 100644 index 00000000000..520bae5aed2 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -0,0 +1,628 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nvfx_context.h" +#include "nvfx_state.h" +#include "nvfx_resource.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_class.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +static INLINE int +nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ + switch (pipe) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; + break; + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + *fmt = NV34TCL_VTXFMT_TYPE_HALF; + break; + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; + break; + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *fmt = NV34TCL_VTXFMT_TYPE_USHORT; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); + return 1; + } + + switch (pipe) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16_SSCALED: + *ncomp = 1; + break; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16_SSCALED: + *ncomp = 2; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16_SSCALED: + *ncomp = 3; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *ncomp = 4; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); + return 1; + } + + return 0; +} + +static boolean +nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, + unsigned ib_size) +{ + unsigned type; + + if (!ib) { + nvfx->idxbuf = NULL; + nvfx->idxbuf_format = 0xdeadbeef; + return FALSE; + } + + if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + return FALSE; + } + + if (ib != nvfx->idxbuf || + type != nvfx->idxbuf_format) { + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->idxbuf = ib; + nvfx->idxbuf_format = type; + } + + return TRUE; +} + +// type must be floating point +static inline void +nvfx_vbo_static_attrib(struct nvfx_context *nvfx, + int attrib, struct pipe_vertex_element *ve, + struct pipe_vertex_buffer *vb, unsigned ncomp) +{ + struct pipe_transfer *transfer; + struct nouveau_channel* chan = nvfx->screen->base.channel; + void *map; + float *v; + + map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer); + map = (uint8_t *) map + vb->buffer_offset + ve->src_offset; + + v = map; + + switch (ncomp) { + case 4: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + OUT_RING(chan, fui(v[3])); + break; + case 3: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + break; + case 2: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + break; + case 1: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1)); + OUT_RING(chan, fui(v[0])); + break; + } + + pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer); +} + +void +nvfx_draw_arrays(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + unsigned restart = 0; + + nvfx_vbo_set_idxbuf(nvfx, NULL, 0); + if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { + nvfx_draw_elements_swtnl(pipe, NULL, 0, 0, + mode, start, count); + return; + } + + while (count) { + unsigned vc, nr, avail; + + nvfx_state_emit(nvfx); + + avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(chan); + continue; + } + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1)); + OUT_RING (chan, ((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push)); + while (push--) { + OUT_RING(chan, ((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, 0); + + count -= vc; + start = restart; + } + + pipe->flush(pipe, 0, NULL); +} + +static INLINE void +nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + + while (count) { + uint8_t *elts = (uint8_t *)ib + start; + unsigned vc, push, restart = 0, avail; + + nvfx_state_emit(nvfx); + + avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(chan); + continue; + } + count -= vc; + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, nvgl_primitive(mode)); + + if (vc & 1) { + OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); + OUT_RING (chan, elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); + for (i = 0; i < push; i+=2) + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, 0); + + start = restart; + } +} + +static INLINE void +nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + + while (count) { + uint16_t *elts = (uint16_t *)ib + start; + unsigned vc, push, restart = 0, avail; + + nvfx_state_emit(nvfx); + + avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(chan); + continue; + } + count -= vc; + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, nvgl_primitive(mode)); + + if (vc & 1) { + OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); + OUT_RING (chan, elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); + for (i = 0; i < push; i+=2) + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, 0); + + start = restart; + } +} + +static INLINE void +nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + + while (count) { + uint32_t *elts = (uint32_t *)ib + start; + unsigned vc, push, restart = 0, avail; + + nvfx_state_emit(nvfx); + + avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 5, 1, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(chan); + continue; + } + count -= vc; + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, nvgl_primitive(mode)); + + while (vc) { + push = MIN2(vc, 2047); + + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push)); + OUT_RINGp (chan, elts, push); + + vc -= push; + elts += push; + } + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, 0); + + start = restart; + } +} + +static void +nvfx_draw_elements_inline(struct pipe_context *pipe, + struct pipe_resource *ib, + unsigned ib_size, int ib_bias, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct pipe_transfer *transfer; + void *map; + + map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer); + if (!ib) { + NOUVEAU_ERR("failed mapping ib\n"); + return; + } + + assert(ib_bias == 0); + + switch (ib_size) { + case 1: + nvfx_draw_elements_u08(nvfx, map, mode, start, count); + break; + case 2: + nvfx_draw_elements_u16(nvfx, map, mode, start, count); + break; + case 4: + nvfx_draw_elements_u32(nvfx, map, mode, start, count); + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + break; + } + + pipe_buffer_unmap(pipe, ib, transfer); +} + +static void +nvfx_draw_elements_vbo(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + unsigned restart = 0; + + while (count) { + unsigned nr, vc, avail; + + nvfx_state_emit(nvfx); + + avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(chan); + continue; + } + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1)); + OUT_RING (chan, ((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push)); + while (push--) { + OUT_RING(chan, ((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING (chan, 0); + + count -= vc; + start = restart; + } +} + +void +nvfx_draw_elements(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, int indexBias, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + boolean idxbuf; + + idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); + if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { + nvfx_draw_elements_swtnl(pipe, + indexBuffer, indexSize, indexBias, + mode, start, count); + return; + } + + if (idxbuf) { + nvfx_draw_elements_vbo(pipe, mode, start, count); + } else { + nvfx_draw_elements_inline(pipe, + indexBuffer, indexSize, indexBias, + mode, start, count); + } + + pipe->flush(pipe, 0, NULL); +} + +boolean +nvfx_vbo_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + struct pipe_resource *ib = nvfx->idxbuf; + unsigned ib_format = nvfx->idxbuf_format; + int i; + int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); + uint32_t vtxfmt[16]; + unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; + + if (!elements) + return TRUE; + + nvfx->vbo_bo = 0; + + MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2); + for (i = 0; i < nvfx->vtxelt->num_elements; i++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned type, ncomp; + + ve = &nvfx->vtxelt->pipe[i]; + vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + + if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + MARK_UNDO(chan); + nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS; + return FALSE; + } + + if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) { + nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp); + vtxfmt[i] = type; + } else { + vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | + (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type); + nvfx->vbo_bo |= (1 << i); + } + } + + for(; i < elements; ++i) + vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT; + + OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements)); + OUT_RINGp(chan, vtxfmt, elements); + + if(nvfx->is_nv4x) { + unsigned i; + /* seems to be some kind of cache flushing */ + for(i = 0; i < 3; ++i) { + OUT_RING(chan, RING_3D(0x1718, 1)); + OUT_RING(chan, 0); + } + } + + OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements)); + for (i = 0; i < nvfx->vtxelt->num_elements; i++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + + ve = &nvfx->vtxelt->pipe[i]; + vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + + if (!(nvfx->vbo_bo & (1 << i))) + OUT_RING(chan, 0); + else + { + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; + OUT_RELOC(chan, bo, + vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + } + } + + for (; i < elements; i++) + OUT_RING(chan, 0); + + OUT_RING(chan, RING_3D(0x1710, 1)); + OUT_RING(chan, 0); + + if (ib) { + unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD; + struct nouveau_bo* bo = nvfx_resource(ib)->bo; + + assert(nvfx->screen->index_buffer_reloc_flags); + + OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2)); + OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR, + 0, NV34TCL_IDXBUF_FORMAT_DMA1); + } + + nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; + return TRUE; +} + +void +nvfx_vbo_relocate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; + int i; + + MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3); + for(i = 0; i < nvfx->vtxelt->num_elements; ++i) { + if(nvfx->vbo_bo & (1 << i)) { + struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; + OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1), + vb_flags, 0, 0); + OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + } + } + + if(nvfx->idxbuf) + { + unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; + struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo; + + assert(nvfx->screen->index_buffer_reloc_flags); + + OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), + ib_flags, 0, 0); + OUT_RELOC(chan, bo, 0, + ib_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, nvfx->idxbuf_format, + ib_flags | NOUVEAU_BO_OR, + 0, NV34TCL_IDXBUF_FORMAT_DMA1); + } +} |