diff options
Diffstat (limited to 'src/gallium/drivers/nvfx/nvfx_vbo.c')
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_vbo.c | 1022 |
1 files changed, 486 insertions, 536 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 4aa37938425..e6e9a8f2e40 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -2,6 +2,7 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "translate/translate.h" #include "nvfx_context.h" #include "nvfx_state.h" @@ -10,646 +11,595 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_class.h" #include "nouveau/nouveau_pushbuf.h" -#include "nouveau/nouveau_util.h" -static INLINE int -nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +static inline unsigned +util_guess_unique_indices_count(unsigned mode, unsigned indices) { - switch (pipe) { - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; - break; - case PIPE_FORMAT_R16_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - *fmt = NV34TCL_VTXFMT_TYPE_HALF; - break; - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; - break; - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *fmt = NV34TCL_VTXFMT_TYPE_USHORT; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; + /* Euler's formula gives V = + * = E - F + 2 = + * = F * (polygon_edges / 2 - 1) + 2 = + * = F * (polygon_edges - 2) / 2 + 2 = + * = indices * (polygon_edges - 2) / (2 * indices_per_face) + 2 + * = indices * (1 / 2 - 1 / polygon_edges) + 2 + */ + switch(mode) + { + case PIPE_PRIM_LINES: + return indices >> 1; + case PIPE_PRIM_TRIANGLES: + { + // avoid an expensive division by 3 using the multiplicative inverse mod 2^32 + unsigned q; + unsigned inv3 = 2863311531; + indices >>= 1; + q = indices * inv3; + if(unlikely(q >= indices)) + { + q += inv3; + if(q >= indices) + q += inv3; + } + return indices + 2; + //return indices / 6 + 2; } - - switch (pipe) { - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R16_FLOAT: - case PIPE_FORMAT_R16_SSCALED: - *ncomp = 1; - break; - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16_SSCALED: - *ncomp = 2; - break; - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16_SSCALED: - *ncomp = 3; - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *ncomp = 4; - break; + // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely + case PIPE_PRIM_QUADS: + return (indices >> 1) + 2; + // return (indices >> 2) + 2; // if it is a closed mesh default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; + return indices; } - - return 0; } -static boolean -nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, - unsigned ib_size) +static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info) { - unsigned type; - - if (!ib) { - nvfx->idxbuf_buffer = NULL; - nvfx->idxbuf_format = 0xdeadbeef; - return FALSE; + struct nvfx_context* nvfx = nvfx_context(pipe); + unsigned hardware_cost = 0; + unsigned inline_cost = 0; + unsigned unique_vertices; + unsigned upload_mode; + float best_index_cost_for_hardware_vertices_as_inline_cost; + boolean prefer_hardware_indices; + unsigned index_inline_cost; + unsigned index_hardware_cost; + if (info->indexed) + unique_vertices = util_guess_unique_indices_count(info->mode, info->count); + else + unique_vertices = info->count; + + /* Here we try to figure out if we are better off writing vertex data directly on the FIFO, + * or create hardware buffer objects and pointing the hardware to them. + * + * This is done by computing the total memcpy cost of each option, ignoring uploads + * if we think that the buffer is static and thus the upload cost will be amortized over + * future draw calls. + * + * For instance, if everything looks static, we will always create buffer objects, while if + * everything is a user buffer and we are not doing indexed drawing, we never do. + * + * Other interesting cases are where a small user vertex buffer, but a huge user index buffer, + * where we will upload the vertex buffer, so that we can use hardware index lookup, and + * the opposite case, where we instead do index lookup in software to avoid uploading + * a huge amount of vertex data that is not going to be used. + * + * Otherwise, we generally move to the GPU the after it has been pushed + * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having + * been updated with a transfer (or just the buffer having been destroyed). + * + * There is no special handling for user buffers, since applications can use + * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this + * by the way. + * + * Note that currently we don't support only putting some data on the FIFO, and + * some on vertex buffers (constant and instanced data is independent from this). + * + * nVidia doesn't seem to do this either, even though it should be at least + * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed. + */ + + for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices; + if (!nvfx_buffer_seems_static(buffer)) + { + hardware_cost += buffer->dirty_end - buffer->dirty_begin; + if (!buffer->base.bo) + hardware_cost += nvfx->screen->buffer_allocation_cost; + } + inline_cost += vbi->per_vertex_size * info->count; } - if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1) - return FALSE; + best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f; + prefer_hardware_indices = FALSE; + index_inline_cost = 0; + index_hardware_cost = 0; - switch (ib_size) { - case 2: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - return FALSE; - } + if (info->indexed) + { + index_inline_cost = nvfx->idxbuf.index_size * info->count; + if (nvfx->screen->index_buffer_reloc_flags + && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4) + && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1))) + { + struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer); + buffer->bytes_to_draw_until_static -= index_inline_cost; - if (ib != nvfx->idxbuf_buffer || - type != nvfx->idxbuf_format) { - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->idxbuf_buffer = ib; - nvfx->idxbuf_format = type; - } + prefer_hardware_indices = TRUE; - return TRUE; -} + if (!nvfx_buffer_seems_static(buffer)) + { + index_hardware_cost = buffer->dirty_end - buffer->dirty_begin; + if (!buffer->base.bo) + index_hardware_cost += nvfx->screen->buffer_allocation_cost; + } -// type must be floating point -static inline void -nvfx_vbo_static_attrib(struct nvfx_context *nvfx, - int attrib, struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb, unsigned ncomp) -{ - struct pipe_transfer *transfer; - struct nouveau_channel* chan = nvfx->screen->base.channel; - void *map; - float *v; - - map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer); - map = (uint8_t *) map + vb->buffer_offset + ve->src_offset; - - v = map; - - switch (ncomp) { - case 4: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4)); - OUT_RING(chan, fui(v[0])); - OUT_RING(chan, fui(v[1])); - OUT_RING(chan, fui(v[2])); - OUT_RING(chan, fui(v[3])); - break; - case 3: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3)); - OUT_RING(chan, fui(v[0])); - OUT_RING(chan, fui(v[1])); - OUT_RING(chan, fui(v[2])); - break; - case 2: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2)); - OUT_RING(chan, fui(v[0])); - OUT_RING(chan, fui(v[1])); - break; - case 1: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1)); - OUT_RING(chan, fui(v[0])); - break; + if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost) + { + best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost; + } + else + { + best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost; + prefer_hardware_indices = TRUE; + } + } } - pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer); + /* let's finally figure out which of the 3 paths we want to take */ + if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost)) + upload_mode = 1 + prefer_hardware_indices; + else + upload_mode = 0; + +#ifdef DEBUG + if (unlikely(nvfx->screen->trace_draw)) + { + fprintf(stderr, "DRAW"); + if (info->indexed) + { + fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size); + if (info->index_bias) + fprintf(stderr, " biased %u", info->index_bias); + fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index); + } + if (info->instance_count > 1) + fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed); + fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode); + if (!upload_mode) + fprintf(stderr, " -> inline vertex data"); + else if (upload_mode == 2 || !info->indexed) + fprintf(stderr, " -> buffer range"); + else + fprintf(stderr, " -> inline indices"); + fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost); + for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + if (i) + fprintf(stderr, ", "); + fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static); + } + fprintf(stderr, ">\n"); + } +#endif + + return upload_mode; } -static void -nvfx_draw_arrays(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) +void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - unsigned restart = 0; - - nvfx_vbo_set_idxbuf(nvfx, NULL, 0); - if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { - nvfx_draw_elements_swtnl(pipe, NULL, 0, 0, - mode, start, count); - return; - } + unsigned upload_mode = 0; - while (count) { - unsigned vc, nr, avail; + if (!nvfx->vtxelt->needs_translate) + upload_mode = nvfx_decide_upload_mode(pipe, info); - nvfx_state_emit(nvfx); + nvfx->use_index_buffer = upload_mode > 1; - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + if ((upload_mode > 0) != nvfx->use_vertex_buffers) + { + nvfx->use_vertex_buffers = (upload_mode > 0); + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; + } - vc = nouveau_vbuf_split(avail, 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; + if (upload_mode > 0) + { + for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + nvfx_buffer_upload(nvfx_buffer(vb->buffer)); } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + if (upload_mode > 1) + { + nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer)); - nr = (vc & 0xff); - if (nr) { - OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1)); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; + if (unlikely(info->index_bias != nvfx->base_vertex)) + { + nvfx->base_vertex = info->index_bias; + nvfx->dirty |= NVFX_NEW_ARRAYS; + } } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push)); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; + else + { + if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex)) + { + nvfx->base_vertex = 0; + nvfx->dirty |= NVFX_NEW_ARRAYS; } } - - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); - - count -= vc; - start = restart; } - pipe->flush(pipe, 0, NULL); + if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) + nvfx_draw_vbo_swtnl(pipe, info); + else + nvfx_push_vbo(pipe, info); } -static INLINE void -nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) +boolean +nvfx_vbo_validate(struct nvfx_context *nvfx) { - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; + struct nouveau_channel* chan = nvfx->screen->base.channel; + int i; + int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); + unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; - while (count) { - uint8_t *elts = (uint8_t *)ib + start; - unsigned vc, push, restart = 0, avail; + if (!elements) + return TRUE; - nvfx_state_emit(nvfx); + MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2); + for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i) + { + struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + float v[4]; + ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0); + nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp); + } - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ - vc = nouveau_vbuf_split(avail, 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; + OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements)); + if(nvfx->use_vertex_buffers) + { + unsigned idx = 0; + for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) { + struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + if(idx != ve->idx) + { + assert(idx < ve->idx); + OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx); + idx = ve->idx; + } - if (vc & 1) { - OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); - OUT_RING (chan, elts[0]); - elts++; vc--; + OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT)); + ++idx; } + if(idx != nvfx->vtxelt->num_elements) + OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx); + } + else + OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements); - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); + for(i = nvfx->vtxelt->num_elements; i < elements; ++i) + OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT); - vc -= push; - elts += push; + if(nvfx->is_nv4x) { + unsigned i; + /* seems to be some kind of cache flushing */ + for(i = 0; i < 3; ++i) { + OUT_RING(chan, RING_3D(0x1718, 1)); + OUT_RING(chan, 0); } - - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); - - start = restart; } -} - -static INLINE void -nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - - while (count) { - uint16_t *elts = (uint16_t *)ib + start; - unsigned vc, push, restart = 0, avail; - - nvfx_state_emit(nvfx); - - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ - vc = nouveau_vbuf_split(avail, 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; + OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements)); + if(nvfx->use_vertex_buffers) + { + unsigned idx = 0; + for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) { + struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + for(; idx < ve->idx; ++idx) + OUT_RING(chan, 0); - if (vc & 1) { - OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); - OUT_RING (chan, elts[0]); - elts++; vc--; + OUT_RELOC(chan, bo, + vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + ++idx; } - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } + for(; idx < elements; ++idx) + OUT_RING(chan, 0); + } + else + { + for (i = 0; i < elements; i++) + OUT_RING(chan, 0); + } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); + OUT_RING(chan, RING_3D(0x1710, 1)); + OUT_RING(chan, 0); - start = restart; - } + nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; + nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF; + return TRUE; } -static INLINE void -nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) +void +nvfx_vbo_relocate(struct nvfx_context *nvfx) { - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - - while (count) { - uint32_t *elts = (uint32_t *)ib + start; - unsigned vc, push, restart = 0, avail; - - nvfx_state_emit(nvfx); - - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ - - vc = nouveau_vbuf_split(avail, 5, 1, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); - - while (vc) { - push = MIN2(vc, 2047); - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push)); - OUT_RINGp (chan, elts, push); + struct nouveau_channel* chan; + unsigned vb_flags; + int i; - vc -= push; - elts += push; - } + if(!nvfx->use_vertex_buffers) + return; - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); + chan = nvfx->screen->base.channel; + vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - start = restart; + MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3); + for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) { + struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; + + OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(ve->idx), 1), + vb_flags, 0, 0); + OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); } + nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF; } static void -nvfx_draw_elements_inline(struct pipe_context *pipe, - struct pipe_resource *ib, - unsigned ib_size, int ib_bias, - unsigned mode, unsigned start, unsigned count) +nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags) { - struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_transfer *transfer; - void *map; - - map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer); - if (!ib) { - NOUVEAU_ERR("failed mapping ib\n"); - return; - } + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV34TCL_IDXBUF_FORMAT_TYPE_U16 : NV34TCL_IDXBUF_FORMAT_TYPE_U32; + struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo; + ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD; - assert(ib_bias == 0); - - switch (ib_size) { - case 1: - nvfx_draw_elements_u08(nvfx, map, mode, start, count); - break; - case 2: - nvfx_draw_elements_u16(nvfx, map, mode, start, count); - break; - case 4: - nvfx_draw_elements_u32(nvfx, map, mode, start, count); - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - break; - } + assert(nvfx->screen->index_buffer_reloc_flags); - pipe_buffer_unmap(pipe, ib, transfer); + MARK_RING(chan, 3, 3); + if(ib_flags & NOUVEAU_BO_DUMMY) + OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), ib_flags, 0, 0); + else + OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2)); + OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR, + 0, NV34TCL_IDXBUF_FORMAT_DMA1); + nvfx->relocs_needed &=~ NVFX_RELOCATE_IDXBUF; } -static void -nvfx_draw_elements_vbo(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) +void +nvfx_idxbuf_validate(struct nvfx_context* nvfx) { - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - unsigned restart = 0; + nvfx_idxbuf_emit(nvfx, 0); +} - while (count) { - unsigned nr, vc, avail; +void +nvfx_idxbuf_relocate(struct nvfx_context* nvfx) +{ + nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY); +} - nvfx_state_emit(nvfx); +unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT, + [PIPE_FORMAT_R32G32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT, + [PIPE_FORMAT_R32G32B32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT, + [PIPE_FORMAT_R16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT, + [PIPE_FORMAT_R16G16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT, + [PIPE_FORMAT_R16G16B16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT, + [PIPE_FORMAT_R8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM, + [PIPE_FORMAT_R8G8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM, + [PIPE_FORMAT_R8G8B8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM, + [PIPE_FORMAT_R8G8B8A8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM, + [PIPE_FORMAT_R8G8B8A8_USCALED] = NV34TCL_VTXFMT_TYPE_8_USCALED, + [PIPE_FORMAT_R16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM, + [PIPE_FORMAT_R16G16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM, + [PIPE_FORMAT_R16G16B16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM, + [PIPE_FORMAT_R16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED, + [PIPE_FORMAT_R16G16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED, + [PIPE_FORMAT_R16G16B16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED, + [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED, +}; + +static void * +nvfx_vtxelts_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); + struct translate_key transkey; + unsigned per_vertex_size[16]; + unsigned vb_compacted_index[16]; - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + if(num_elements > 16) + { + _debug_printf("Error: application attempted to use %u vertex elements, but only 16 are supported: ignoring the rest\n", num_elements); + num_elements = 16; + } - vc = nouveau_vbuf_split(avail, 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } + memset(per_vertex_size, 0, sizeof(per_vertex_size)); + memcpy(cso->pipe, elements, num_elements * sizeof(elements[0])); + cso->num_elements = num_elements; + cso->needs_translate = FALSE; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for(unsigned i = 0; i < num_elements; ++i) + { + const struct pipe_vertex_element* ve = &elements[i]; + if(!ve->instance_divisor) + per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1); + } + + for(unsigned i = 0; i < 16; ++i) + { + if(per_vertex_size[i]) + { + unsigned idx = cso->num_per_vertex_buffer_infos++; + cso->per_vertex_buffer_info[idx].vertex_buffer_index = i; + cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i]; + vb_compacted_index[i] = idx; + } + } + + for(unsigned i = 0; i < num_elements; ++i) + { + const struct pipe_vertex_element* ve = &elements[i]; + unsigned type = nvfx_vertex_formats[ve->src_format]; + unsigned ncomp = util_format_get_nr_components(ve->src_format); - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX) + if(ve->instance_divisor) + { + struct nvfx_low_frequency_element* lfve; + cso->vtxfmt[i] = NV34TCL_VTXFMT_TYPE_32_FLOAT; + + //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT) + if(0) + lfve = &cso->constant[cso->num_constant++]; + else + { + lfve = &cso->per_instance[cso->num_per_instance++].base; + ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor; + } - nr = (vc & 0xff); - if (nr) { - OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1)); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; + lfve->idx = i; + lfve->vertex_buffer_index = ve->vertex_buffer_index; + lfve->src_offset = ve->src_offset; + lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float; + lfve->ncomp = ncomp; } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push)); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; + else + { + unsigned idx; + + idx = cso->num_per_vertex++; + cso->per_vertex[idx].idx = i; + cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index; + cso->per_vertex[idx].src_offset = ve->src_offset; + + idx = transkey.nr_elements++; + transkey.element[idx].input_format = ve->src_format; + transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index]; + transkey.element[idx].input_offset = ve->src_offset; + transkey.element[idx].instance_divisor = 0; + transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL; + if(type) + { + transkey.element[idx].output_format = ve->src_format; + cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type; + } + else + { + unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT}; + transkey.element[idx].output_format = float32[ncomp - 1]; + cso->needs_translate = TRUE; + cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT; } + transkey.element[idx].output_offset = transkey.output_stride; + transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3; } + } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); + cso->translate = translate_create(&transkey); + cso->vertex_length = transkey.output_stride >> 2; + cso->max_vertices_per_packet = 2047 / cso->vertex_length; - count -= vc; - start = restart; - } + return (void *)cso; } static void -nvfx_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) +nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nvfx_context *nvfx = nvfx_context(pipe); - boolean idxbuf; - - idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); - if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { - nvfx_draw_elements_swtnl(pipe, - indexBuffer, indexSize, indexBias, - mode, start, count); - return; - } - - if (idxbuf) { - nvfx_draw_elements_vbo(pipe, mode, start, count); - } else { - nvfx_draw_elements_inline(pipe, - indexBuffer, indexSize, indexBias, - mode, start, count); - } - - pipe->flush(pipe, 0, NULL); + FREE(hwcso); } -void -nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +static void +nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) { struct nvfx_context *nvfx = nvfx_context(pipe); - if (info->indexed && nvfx->idxbuf.buffer) { - unsigned offset; - - assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0); - offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size; - - nvfx_draw_elements(pipe, - nvfx->idxbuf.buffer, - nvfx->idxbuf.index_size, - info->index_bias, - info->mode, - info->start + offset, - info->count); - } - else { - nvfx_draw_arrays(pipe, - info->mode, - info->start, - info->count); - } + nvfx->vtxelt = hwcso; + nvfx->use_vertex_buffers = -1; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; } -boolean -nvfx_vbo_validate(struct nvfx_context *nvfx) +static void +nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) { - struct nouveau_channel* chan = nvfx->screen->base.channel; - struct pipe_resource *ib = nvfx->idxbuf_buffer; - unsigned ib_format = nvfx->idxbuf_format; - int i; - int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); - uint32_t vtxfmt[16]; - unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; - - if (!elements) - return TRUE; - - nvfx->vbo_bo = 0; - - MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2); - for (i = 0; i < nvfx->vtxelt->num_elements; i++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - unsigned type, ncomp; - - ve = &nvfx->vtxelt->pipe[i]; - vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - - if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { - MARK_UNDO(chan); - nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS; - return FALSE; - } + struct nvfx_context *nvfx = nvfx_context(pipe); - if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) { - nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp); - vtxfmt[i] = type; - } else { - vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | - (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type); - nvfx->vbo_bo |= (1 << i); - } + for(unsigned i = 0; i < count; ++i) + { + pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer); + nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset; + nvfx->vtxbuf[i].max_index = vb[i].max_index; + nvfx->vtxbuf[i].stride = vb[i].stride; } - for(; i < elements; ++i) - vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT; + for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i) + pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); - OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements)); - OUT_RINGp(chan, vtxfmt, elements); - - if(nvfx->is_nv4x) { - unsigned i; - /* seems to be some kind of cache flushing */ - for(i = 0; i < 3; ++i) { - OUT_RING(chan, RING_3D(0x1718, 1)); - OUT_RING(chan, 0); - } - } - - OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements)); - for (i = 0; i < nvfx->vtxelt->num_elements; i++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; + nvfx->vtxbuf_nr = count; + nvfx->use_vertex_buffers = -1; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; +} - ve = &nvfx->vtxelt->pipe[i]; - vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; +static void +nvfx_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); - if (!(nvfx->vbo_bo & (1 << i))) - OUT_RING(chan, 0); - else - { - struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; - OUT_RELOC(chan, bo, - vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - } + if(ib) + { + pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer); + nvfx->idxbuf.index_size = ib->index_size; + nvfx->idxbuf.offset = ib->offset; } - - for (; i < elements; i++) - OUT_RING(chan, 0); - - OUT_RING(chan, RING_3D(0x1710, 1)); - OUT_RING(chan, 0); - - if (ib) { - unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD; - struct nouveau_bo* bo = nvfx_resource(ib)->bo; - - assert(nvfx->screen->index_buffer_reloc_flags); - - OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2)); - OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR, - 0, NV34TCL_IDXBUF_FORMAT_DMA1); + else + { + pipe_resource_reference(&nvfx->idxbuf.buffer, 0); + nvfx->idxbuf.index_size = 0; + nvfx->idxbuf.offset = 0; } - nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; - return TRUE; + nvfx->dirty |= NVFX_NEW_INDEX; + nvfx->draw_dirty |= NVFX_NEW_INDEX; } void -nvfx_vbo_relocate(struct nvfx_context *nvfx) +nvfx_init_vbo_functions(struct nvfx_context *nvfx) { - struct nouveau_channel* chan = nvfx->screen->base.channel; - unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - int i; + nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; + nvfx->pipe.set_index_buffer = nvfx_set_index_buffer; - MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3); - for(i = 0; i < nvfx->vtxelt->num_elements; ++i) { - if(nvfx->vbo_bo & (1 << i)) { - struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i]; - struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; - OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1), - vb_flags, 0, 0); - OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - } - } - - if(nvfx->idxbuf_buffer) - { - unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo; - - assert(nvfx->screen->index_buffer_reloc_flags); - - OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), - ib_flags, 0, 0); - OUT_RELOC(chan, bo, 0, - ib_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RELOC(chan, bo, nvfx->idxbuf_format, - ib_flags | NOUVEAU_BO_OR, - 0, NV34TCL_IDXBUF_FORMAT_DMA1); - } + nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; + nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; + nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; } |